aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/devices.txt1
-rw-r--r--Documentation/filesystems/directory-locking31
-rw-r--r--Documentation/filesystems/f2fs.txt7
-rw-r--r--Documentation/filesystems/porting8
-rw-r--r--arch/arm64/kernel/signal32.c2
-rw-r--r--arch/ia64/kernel/elfcore.c12
-rw-r--r--arch/ia64/kernel/signal.c2
-rw-r--r--arch/mips/kernel/signal32.c2
-rw-r--r--arch/parisc/kernel/signal32.c2
-rw-r--r--arch/parisc/kernel/signal32.h2
-rw-r--r--arch/powerpc/include/asm/spu.h3
-rw-r--r--arch/powerpc/kernel/signal_32.c2
-rw-r--r--arch/powerpc/platforms/cell/spu_syscalls.c5
-rw-r--r--arch/powerpc/platforms/cell/spufs/coredump.c89
-rw-r--r--arch/powerpc/platforms/cell/spufs/spufs.h3
-rw-r--r--arch/s390/kernel/compat_signal.c2
-rw-r--r--arch/sparc/kernel/signal32.c2
-rw-r--r--arch/tile/kernel/compat_signal.c2
-rw-r--r--arch/x86/ia32/ia32_aout.c86
-rw-r--r--arch/x86/ia32/ia32_signal.c2
-rw-r--r--arch/x86/include/asm/percpu.h8
-rw-r--r--arch/x86/um/elfcore.c15
-rw-r--r--drivers/ata/ahci.c4
-rw-r--r--drivers/ata/ahci.h2
-rw-r--r--drivers/ata/ahci_imx.c101
-rw-r--r--drivers/ata/ahci_platform.c3
-rw-r--r--drivers/ata/ata_piix.c19
-rw-r--r--drivers/ata/libahci.c27
-rw-r--r--drivers/ata/libata-core.c1
-rw-r--r--drivers/ata/libata-eh.c8
-rw-r--r--drivers/ata/libata-transport.c16
-rw-r--r--drivers/ata/sata_highbank.c8
-rw-r--r--drivers/ata/sata_rcar.c10
-rw-r--r--drivers/base/devtmpfs.c6
-rw-r--r--drivers/char/misc.c12
-rw-r--r--drivers/gpu/drm/drm_fops.c17
-rw-r--r--drivers/media/dvb-core/dmxdev.c4
-rw-r--r--drivers/media/dvb-core/dvbdev.c19
-rw-r--r--drivers/mtd/nand/nandsim.c2
-rw-r--r--drivers/mtd/ubi/attach.c11
-rw-r--r--drivers/mtd/ubi/fastmap.c41
-rw-r--r--drivers/mtd/ubi/wl.c4
-rw-r--r--drivers/staging/comedi/comedi_compat32.c3
-rw-r--r--drivers/staging/lustre/lustre/include/linux/lustre_compat25.h4
-rw-r--r--drivers/staging/lustre/lustre/llite/namei.c2
-rw-r--r--drivers/staging/lustre/lustre/lvfs/lvfs_linux.c2
-rw-r--r--drivers/staging/rtl8188eu/include/osdep_service.h5
-rw-r--r--drivers/staging/rtl8188eu/os_dep/osdep_service.c208
-rw-r--r--drivers/usb/core/file.c16
-rw-r--r--fs/9p/cache.h12
-rw-r--r--fs/9p/vfs_file.c2
-rw-r--r--fs/9p/vfs_inode.c6
-rw-r--r--fs/9p/vfs_inode_dotl.c4
-rw-r--r--fs/adfs/adfs.h9
-rw-r--r--fs/adfs/super.c3
-rw-r--r--fs/aio.c63
-rw-r--r--fs/anon_inodes.c114
-rw-r--r--fs/attr.c25
-rw-r--r--fs/autofs4/autofs_i.h3
-rw-r--r--fs/autofs4/dev-ioctl.c6
-rw-r--r--fs/autofs4/inode.c13
-rw-r--r--fs/befs/linuxvfs.c61
-rw-r--r--fs/binfmt_aout.c13
-rw-r--r--fs/binfmt_elf.c127
-rw-r--r--fs/binfmt_elf_fdpic.c152
-rw-r--r--fs/binfmt_em86.c2
-rw-r--r--fs/cachefiles/interface.c4
-rw-r--r--fs/cachefiles/namei.c4
-rw-r--r--fs/char_dev.c6
-rw-r--r--fs/cifs/cifs_fs_sb.h1
-rw-r--r--fs/cifs/cifsfs.c2
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/cifs/connect.c10
-rw-r--r--fs/cifs/link.c7
-rw-r--r--fs/coda/coda_linux.h2
-rw-r--r--fs/coda/dir.c6
-rw-r--r--fs/coda/file.c6
-rw-r--r--fs/coda/inode.c2
-rw-r--r--fs/compat_ioctl.c4
-rw-r--r--fs/coredump.c71
-rw-r--r--fs/dcache.c342
-rw-r--r--fs/dlm/lockspace.c4
-rw-r--r--fs/ecryptfs/dentry.c29
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h19
-rw-r--r--fs/ecryptfs/file.c8
-rw-r--r--fs/ecryptfs/inode.c29
-rw-r--r--fs/ecryptfs/main.c3
-rw-r--r--fs/eventpoll.c2
-rw-r--r--fs/exec.c35
-rw-r--r--fs/exportfs/expfs.c267
-rw-r--r--fs/ext2/inode.c2
-rw-r--r--fs/ext2/xip.c1
-rw-r--r--fs/ext3/super.c4
-rw-r--r--fs/ext4/ext4.h2
-rw-r--r--fs/ext4/ioctl.c4
-rw-r--r--fs/ext4/move_extent.c40
-rw-r--r--fs/f2fs/Kconfig8
-rw-r--r--fs/f2fs/acl.c36
-rw-r--r--fs/f2fs/acl.h9
-rw-r--r--fs/f2fs/checkpoint.c75
-rw-r--r--fs/f2fs/data.c29
-rw-r--r--fs/f2fs/dir.c4
-rw-r--r--fs/f2fs/f2fs.h117
-rw-r--r--fs/f2fs/file.c45
-rw-r--r--fs/f2fs/gc.c31
-rw-r--r--fs/f2fs/inode.c62
-rw-r--r--fs/f2fs/namei.c52
-rw-r--r--fs/f2fs/node.c142
-rw-r--r--fs/f2fs/recovery.c45
-rw-r--r--fs/f2fs/segment.c133
-rw-r--r--fs/f2fs/segment.h38
-rw-r--r--fs/f2fs/super.c143
-rw-r--r--fs/f2fs/xattr.c36
-rw-r--r--fs/fat/fat.h1
-rw-r--r--fs/fat/inode.c19
-rw-r--r--fs/fcntl.c5
-rw-r--r--fs/file_table.c129
-rw-r--r--fs/fs-writeback.c1
-rw-r--r--fs/fuse/cuse.c7
-rw-r--r--fs/fuse/dir.c40
-rw-r--r--fs/fuse/file.c361
-rw-r--r--fs/fuse/fuse_i.h6
-rw-r--r--fs/fuse/inode.c4
-rw-r--r--fs/gfs2/inode.c9
-rw-r--r--fs/hpfs/hpfs_fn.h1
-rw-r--r--fs/hpfs/namei.c2
-rw-r--r--fs/hpfs/super.c28
-rw-r--r--fs/inode.c62
-rw-r--r--fs/internal.h7
-rw-r--r--fs/ioctl.c4
-rw-r--r--fs/isofs/inode.c12
-rw-r--r--fs/jbd/transaction.c8
-rw-r--r--fs/libfs.c122
-rw-r--r--fs/locks.c69
-rw-r--r--fs/mount.h20
-rw-r--r--fs/namei.c322
-rw-r--r--fs/namespace.c390
-rw-r--r--fs/ncpfs/dir.c55
-rw-r--r--fs/ncpfs/file.c12
-rw-r--r--fs/ncpfs/inode.c19
-rw-r--r--fs/ncpfs/ncp_fs_sb.h2
-rw-r--r--fs/nfs/dir.c119
-rw-r--r--fs/nfs/direct.c17
-rw-r--r--fs/nfs/file.c117
-rw-r--r--fs/nfs/namespace.c5
-rw-r--r--fs/nfs/nfs3proc.c8
-rw-r--r--fs/nfs/nfs4file.c4
-rw-r--r--fs/nfs/nfs4namespace.c7
-rw-r--r--fs/nfs/nfs4proc.c5
-rw-r--r--fs/nfs/proc.c8
-rw-r--r--fs/nfs/unlink.c9
-rw-r--r--fs/nfs/write.c6
-rw-r--r--fs/nfsd/nfs4recover.c12
-rw-r--r--fs/nfsd/nfs4state.c17
-rw-r--r--fs/nfsd/nfsfh.c28
-rw-r--r--fs/nfsd/nfsfh.h4
-rw-r--r--fs/nfsd/vfs.c23
-rw-r--r--fs/ntfs/inode.c2
-rw-r--r--fs/ocfs2/inode.c10
-rw-r--r--fs/open.c32
-rw-r--r--fs/pnode.c13
-rw-r--r--fs/proc/self.c10
-rw-r--r--fs/proc_namespace.c8
-rw-r--r--fs/qnx4/namei.c4
-rw-r--r--fs/quota/quota.c1
-rw-r--r--fs/read_write.c25
-rw-r--r--fs/readdir.c2
-rw-r--r--fs/select.c4
-rw-r--r--fs/splice.c6
-rw-r--r--fs/stat.c31
-rw-r--r--fs/super.c201
-rw-r--r--fs/sync.c2
-rw-r--r--fs/ubifs/debug.c6
-rw-r--r--fs/ubifs/dir.c41
-rw-r--r--fs/ubifs/gc.c3
-rw-r--r--fs/ubifs/journal.c6
-rw-r--r--fs/ubifs/super.c8
-rw-r--r--fs/ubifs/xattr.c16
-rw-r--r--fs/udf/super.c45
-rw-r--r--fs/utimes.c9
-rw-r--r--include/asm-generic/siginfo.h2
-rw-r--r--include/linux/anon_inodes.h3
-rw-r--r--include/linux/ata.h7
-rw-r--r--include/linux/binfmts.h3
-rw-r--r--include/linux/cgroup.h37
-rw-r--r--include/linux/compat.h2
-rw-r--r--include/linux/coredump.h10
-rw-r--r--include/linux/dcache.h104
-rw-r--r--include/linux/elf.h6
-rw-r--r--include/linux/elfcore.h7
-rw-r--r--include/linux/fs.h106
-rw-r--r--include/linux/lglock.h10
-rw-r--r--include/linux/miscdevice.h1
-rw-r--r--include/linux/mount.h2
-rw-r--r--include/linux/namei.h2
-rw-r--r--include/linux/percpu.h32
-rw-r--r--include/linux/pid_namespace.h1
-rw-r--r--include/trace/events/f2fs.h51
-rw-r--r--ipc/mqueue.c2
-rw-r--r--kernel/cgroup.c248
-rw-r--r--kernel/elfcore.c10
-rw-r--r--kernel/pid_namespace.c8
-rw-r--r--kernel/signal.c2
-rw-r--r--mm/memcontrol.c66
-rw-r--r--mm/memory.c2
-rw-r--r--mm/mmap.c4
-rw-r--r--mm/nommu.c2
-rw-r--r--mm/percpu.c5
-rw-r--r--net/9p/trans_fd.c4
-rw-r--r--net/sunrpc/rpc_pipe.c12
-rw-r--r--security/device_cgroup.c11
-rw-r--r--sound/core/sound.c22
-rw-r--r--sound/sound_core.c17
213 files changed, 3409 insertions, 3364 deletions
diff --git a/Documentation/devices.txt b/Documentation/devices.txt
index 23721d3be3e6..80b72419ffd8 100644
--- a/Documentation/devices.txt
+++ b/Documentation/devices.txt
@@ -414,6 +414,7 @@ Your cooperation is appreciated.
414 200 = /dev/net/tun TAP/TUN network device 414 200 = /dev/net/tun TAP/TUN network device
415 201 = /dev/button/gulpb Transmeta GULP-B buttons 415 201 = /dev/button/gulpb Transmeta GULP-B buttons
416 202 = /dev/emd/ctl Enhanced Metadisk RAID (EMD) control 416 202 = /dev/emd/ctl Enhanced Metadisk RAID (EMD) control
417 203 = /dev/cuse Cuse (character device in user-space)
417 204 = /dev/video/em8300 EM8300 DVD decoder control 418 204 = /dev/video/em8300 EM8300 DVD decoder control
418 205 = /dev/video/em8300_mv EM8300 DVD decoder video 419 205 = /dev/video/em8300_mv EM8300 DVD decoder video
419 206 = /dev/video/em8300_ma EM8300 DVD decoder audio 420 206 = /dev/video/em8300_ma EM8300 DVD decoder audio
diff --git a/Documentation/filesystems/directory-locking b/Documentation/filesystems/directory-locking
index ff7b611abf33..09bbf9a54f80 100644
--- a/Documentation/filesystems/directory-locking
+++ b/Documentation/filesystems/directory-locking
@@ -2,6 +2,10 @@
2kinds of locks - per-inode (->i_mutex) and per-filesystem 2kinds of locks - per-inode (->i_mutex) and per-filesystem
3(->s_vfs_rename_mutex). 3(->s_vfs_rename_mutex).
4 4
5 When taking the i_mutex on multiple non-directory objects, we
6always acquire the locks in order by increasing address. We'll call
7that "inode pointer" order in the following.
8
5 For our purposes all operations fall in 5 classes: 9 For our purposes all operations fall in 5 classes:
6 10
71) read access. Locking rules: caller locks directory we are accessing. 111) read access. Locking rules: caller locks directory we are accessing.
@@ -12,8 +16,9 @@ kinds of locks - per-inode (->i_mutex) and per-filesystem
12locks victim and calls the method. 16locks victim and calls the method.
13 17
144) rename() that is _not_ cross-directory. Locking rules: caller locks 184) rename() that is _not_ cross-directory. Locking rules: caller locks
15the parent, finds source and target, if target already exists - locks it 19the parent and finds source and target. If target already exists, lock
16and then calls the method. 20it. If source is a non-directory, lock it. If that means we need to
21lock both, lock them in inode pointer order.
17 22
185) link creation. Locking rules: 235) link creation. Locking rules:
19 * lock parent 24 * lock parent
@@ -30,7 +35,9 @@ rules:
30 fail with -ENOTEMPTY 35 fail with -ENOTEMPTY
31 * if new parent is equal to or is a descendent of source 36 * if new parent is equal to or is a descendent of source
32 fail with -ELOOP 37 fail with -ELOOP
33 * if target exists - lock it. 38 * If target exists, lock it. If source is a non-directory, lock
39 it. In case that means we need to lock both source and target,
40 do so in inode pointer order.
34 * call the method. 41 * call the method.
35 42
36 43
@@ -56,9 +63,11 @@ objects - A < B iff A is an ancestor of B.
56 renames will be blocked on filesystem lock and we don't start changing 63 renames will be blocked on filesystem lock and we don't start changing
57 the order until we had acquired all locks). 64 the order until we had acquired all locks).
58 65
59(3) any operation holds at most one lock on non-directory object and 66(3) locks on non-directory objects are acquired only after locks on
60 that lock is acquired after all other locks. (Proof: see descriptions 67 directory objects, and are acquired in inode pointer order.
61 of operations). 68 (Proof: all operations but renames take lock on at most one
69 non-directory object, except renames, which take locks on source and
70 target in inode pointer order in the case they are not directories.)
62 71
63 Now consider the minimal deadlock. Each process is blocked on 72 Now consider the minimal deadlock. Each process is blocked on
64attempt to acquire some lock and already holds at least one lock. Let's 73attempt to acquire some lock and already holds at least one lock. Let's
@@ -66,9 +75,13 @@ consider the set of contended locks. First of all, filesystem lock is
66not contended, since any process blocked on it is not holding any locks. 75not contended, since any process blocked on it is not holding any locks.
67Thus all processes are blocked on ->i_mutex. 76Thus all processes are blocked on ->i_mutex.
68 77
69 Non-directory objects are not contended due to (3). Thus link 78 By (3), any process holding a non-directory lock can only be
70creation can't be a part of deadlock - it can't be blocked on source 79waiting on another non-directory lock with a larger address. Therefore
71and it means that it doesn't hold any locks. 80the process holding the "largest" such lock can always make progress, and
81non-directory objects are not included in the set of contended locks.
82
83 Thus link creation can't be a part of deadlock - it can't be
84blocked on source and it means that it doesn't hold any locks.
72 85
73 Any contended object is either held by cross-directory rename or 86 Any contended object is either held by cross-directory rename or
74has a child that is also contended. Indeed, suppose that it is held by 87has a child that is also contended. Indeed, suppose that it is held by
diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt
index 3cd27bed6349..a3fe811bbdbc 100644
--- a/Documentation/filesystems/f2fs.txt
+++ b/Documentation/filesystems/f2fs.txt
@@ -119,6 +119,7 @@ active_logs=%u Support configuring the number of active logs. In the
119 Default number is 6. 119 Default number is 6.
120disable_ext_identify Disable the extension list configured by mkfs, so f2fs 120disable_ext_identify Disable the extension list configured by mkfs, so f2fs
121 does not aware of cold files such as media files. 121 does not aware of cold files such as media files.
122inline_xattr Enable the inline xattrs feature.
122 123
123================================================================================ 124================================================================================
124DEBUGFS ENTRIES 125DEBUGFS ENTRIES
@@ -164,6 +165,12 @@ Files in /sys/fs/f2fs/<devname>
164 gc_idle = 1 will select the Cost Benefit approach 165 gc_idle = 1 will select the Cost Benefit approach
165 & setting gc_idle = 2 will select the greedy aproach. 166 & setting gc_idle = 2 will select the greedy aproach.
166 167
168 reclaim_segments This parameter controls the number of prefree
169 segments to be reclaimed. If the number of prefree
170 segments is larger than this number, f2fs tries to
171 conduct checkpoint to reclaim the prefree segments
172 to free segments. By default, 100 segments, 200MB.
173
167================================================================================ 174================================================================================
168USAGE 175USAGE
169================================================================================ 176================================================================================
diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting
index f0890581f7f6..fe2b7ae6f962 100644
--- a/Documentation/filesystems/porting
+++ b/Documentation/filesystems/porting
@@ -455,3 +455,11 @@ in your dentry operations instead.
455 vfs_follow_link has been removed. Filesystems must use nd_set_link 455 vfs_follow_link has been removed. Filesystems must use nd_set_link
456 from ->follow_link for normal symlinks, or nd_jump_link for magic 456 from ->follow_link for normal symlinks, or nd_jump_link for magic
457 /proc/<pid> style links. 457 /proc/<pid> style links.
458--
459[mandatory]
460 iget5_locked()/ilookup5()/ilookup5_nowait() test() callback used to be
461 called with both ->i_lock and inode_hash_lock held; the former is *not*
462 taken anymore, so verify that your callbacks do not rely on it (none
463 of the in-tree instances did). inode_hash_lock is still held,
464 of course, so they are still serialized wrt removal from inode hash,
465 as well as wrt set() callback of iget5_locked().
diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c
index e51bbe79f5b5..b3fc9f5ec6d3 100644
--- a/arch/arm64/kernel/signal32.c
+++ b/arch/arm64/kernel/signal32.c
@@ -122,7 +122,7 @@ static inline int get_sigset_t(sigset_t *set,
122 return 0; 122 return 0;
123} 123}
124 124
125int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from) 125int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from)
126{ 126{
127 int err; 127 int err;
128 128
diff --git a/arch/ia64/kernel/elfcore.c b/arch/ia64/kernel/elfcore.c
index bac1639bc320..04bc8fd5f893 100644
--- a/arch/ia64/kernel/elfcore.c
+++ b/arch/ia64/kernel/elfcore.c
@@ -11,8 +11,7 @@ Elf64_Half elf_core_extra_phdrs(void)
11 return GATE_EHDR->e_phnum; 11 return GATE_EHDR->e_phnum;
12} 12}
13 13
14int elf_core_write_extra_phdrs(struct file *file, loff_t offset, size_t *size, 14int elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset)
15 unsigned long limit)
16{ 15{
17 const struct elf_phdr *const gate_phdrs = 16 const struct elf_phdr *const gate_phdrs =
18 (const struct elf_phdr *) (GATE_ADDR + GATE_EHDR->e_phoff); 17 (const struct elf_phdr *) (GATE_ADDR + GATE_EHDR->e_phoff);
@@ -35,15 +34,13 @@ int elf_core_write_extra_phdrs(struct file *file, loff_t offset, size_t *size,
35 phdr.p_offset += ofs; 34 phdr.p_offset += ofs;
36 } 35 }
37 phdr.p_paddr = 0; /* match other core phdrs */ 36 phdr.p_paddr = 0; /* match other core phdrs */
38 *size += sizeof(phdr); 37 if (!dump_emit(cprm, &phdr, sizeof(phdr)))
39 if (*size > limit || !dump_write(file, &phdr, sizeof(phdr)))
40 return 0; 38 return 0;
41 } 39 }
42 return 1; 40 return 1;
43} 41}
44 42
45int elf_core_write_extra_data(struct file *file, size_t *size, 43int elf_core_write_extra_data(struct coredump_params *cprm)
46 unsigned long limit)
47{ 44{
48 const struct elf_phdr *const gate_phdrs = 45 const struct elf_phdr *const gate_phdrs =
49 (const struct elf_phdr *) (GATE_ADDR + GATE_EHDR->e_phoff); 46 (const struct elf_phdr *) (GATE_ADDR + GATE_EHDR->e_phoff);
@@ -54,8 +51,7 @@ int elf_core_write_extra_data(struct file *file, size_t *size,
54 void *addr = (void *)gate_phdrs[i].p_vaddr; 51 void *addr = (void *)gate_phdrs[i].p_vaddr;
55 size_t memsz = PAGE_ALIGN(gate_phdrs[i].p_memsz); 52 size_t memsz = PAGE_ALIGN(gate_phdrs[i].p_memsz);
56 53
57 *size += memsz; 54 if (!dump_emit(cprm, addr, memsz))
58 if (*size > limit || !dump_write(file, addr, memsz))
59 return 0; 55 return 0;
60 break; 56 break;
61 } 57 }
diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c
index 3637e03d2282..33cab9a8adff 100644
--- a/arch/ia64/kernel/signal.c
+++ b/arch/ia64/kernel/signal.c
@@ -105,7 +105,7 @@ restore_sigcontext (struct sigcontext __user *sc, struct sigscratch *scr)
105} 105}
106 106
107int 107int
108copy_siginfo_to_user (siginfo_t __user *to, siginfo_t *from) 108copy_siginfo_to_user (siginfo_t __user *to, const siginfo_t *from)
109{ 109{
110 if (!access_ok(VERIFY_WRITE, to, sizeof(siginfo_t))) 110 if (!access_ok(VERIFY_WRITE, to, sizeof(siginfo_t)))
111 return -EFAULT; 111 return -EFAULT;
diff --git a/arch/mips/kernel/signal32.c b/arch/mips/kernel/signal32.c
index 57de8b751627..1905a419aa46 100644
--- a/arch/mips/kernel/signal32.c
+++ b/arch/mips/kernel/signal32.c
@@ -314,7 +314,7 @@ SYSCALL_DEFINE3(32_sigaction, long, sig, const struct compat_sigaction __user *,
314 return ret; 314 return ret;
315} 315}
316 316
317int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from) 317int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from)
318{ 318{
319 int err; 319 int err;
320 320
diff --git a/arch/parisc/kernel/signal32.c b/arch/parisc/kernel/signal32.c
index 6c6a271a6140..984abbee71ca 100644
--- a/arch/parisc/kernel/signal32.c
+++ b/arch/parisc/kernel/signal32.c
@@ -319,7 +319,7 @@ copy_siginfo_from_user32 (siginfo_t *to, compat_siginfo_t __user *from)
319} 319}
320 320
321int 321int
322copy_siginfo_to_user32 (compat_siginfo_t __user *to, siginfo_t *from) 322copy_siginfo_to_user32 (compat_siginfo_t __user *to, const siginfo_t *from)
323{ 323{
324 compat_uptr_t addr; 324 compat_uptr_t addr;
325 compat_int_t val; 325 compat_int_t val;
diff --git a/arch/parisc/kernel/signal32.h b/arch/parisc/kernel/signal32.h
index 72ab41a51f32..af51d4ccee42 100644
--- a/arch/parisc/kernel/signal32.h
+++ b/arch/parisc/kernel/signal32.h
@@ -34,7 +34,7 @@ struct compat_ucontext {
34 34
35/* ELF32 signal handling */ 35/* ELF32 signal handling */
36 36
37int copy_siginfo_to_user32 (compat_siginfo_t __user *to, siginfo_t *from); 37int copy_siginfo_to_user32 (compat_siginfo_t __user *to, const siginfo_t *from);
38int copy_siginfo_from_user32 (siginfo_t *to, compat_siginfo_t __user *from); 38int copy_siginfo_from_user32 (siginfo_t *to, compat_siginfo_t __user *from);
39 39
40/* In a deft move of uber-hackery, we decide to carry the top half of all 40/* In a deft move of uber-hackery, we decide to carry the top half of all
diff --git a/arch/powerpc/include/asm/spu.h b/arch/powerpc/include/asm/spu.h
index 93f280e23279..37b7ca39ec9f 100644
--- a/arch/powerpc/include/asm/spu.h
+++ b/arch/powerpc/include/asm/spu.h
@@ -235,6 +235,7 @@ extern long spu_sys_callback(struct spu_syscall_block *s);
235 235
236/* syscalls implemented in spufs */ 236/* syscalls implemented in spufs */
237struct file; 237struct file;
238struct coredump_params;
238struct spufs_calls { 239struct spufs_calls {
239 long (*create_thread)(const char __user *name, 240 long (*create_thread)(const char __user *name,
240 unsigned int flags, umode_t mode, 241 unsigned int flags, umode_t mode,
@@ -242,7 +243,7 @@ struct spufs_calls {
242 long (*spu_run)(struct file *filp, __u32 __user *unpc, 243 long (*spu_run)(struct file *filp, __u32 __user *unpc,
243 __u32 __user *ustatus); 244 __u32 __user *ustatus);
244 int (*coredump_extra_notes_size)(void); 245 int (*coredump_extra_notes_size)(void);
245 int (*coredump_extra_notes_write)(struct file *file, loff_t *foffset); 246 int (*coredump_extra_notes_write)(struct coredump_params *cprm);
246 void (*notify_spus_active)(void); 247 void (*notify_spus_active)(void);
247 struct module *owner; 248 struct module *owner;
248}; 249};
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index 1a410aa57fb7..749778e0a69d 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -893,7 +893,7 @@ static long restore_tm_user_regs(struct pt_regs *regs,
893#endif 893#endif
894 894
895#ifdef CONFIG_PPC64 895#ifdef CONFIG_PPC64
896int copy_siginfo_to_user32(struct compat_siginfo __user *d, siginfo_t *s) 896int copy_siginfo_to_user32(struct compat_siginfo __user *d, const siginfo_t *s)
897{ 897{
898 int err; 898 int err;
899 899
diff --git a/arch/powerpc/platforms/cell/spu_syscalls.c b/arch/powerpc/platforms/cell/spu_syscalls.c
index db4e638cf408..3844f1397fc3 100644
--- a/arch/powerpc/platforms/cell/spu_syscalls.c
+++ b/arch/powerpc/platforms/cell/spu_syscalls.c
@@ -25,6 +25,7 @@
25#include <linux/module.h> 25#include <linux/module.h>
26#include <linux/syscalls.h> 26#include <linux/syscalls.h>
27#include <linux/rcupdate.h> 27#include <linux/rcupdate.h>
28#include <linux/binfmts.h>
28 29
29#include <asm/spu.h> 30#include <asm/spu.h>
30 31
@@ -126,7 +127,7 @@ int elf_coredump_extra_notes_size(void)
126 return ret; 127 return ret;
127} 128}
128 129
129int elf_coredump_extra_notes_write(struct file *file, loff_t *foffset) 130int elf_coredump_extra_notes_write(struct coredump_params *cprm)
130{ 131{
131 struct spufs_calls *calls; 132 struct spufs_calls *calls;
132 int ret; 133 int ret;
@@ -135,7 +136,7 @@ int elf_coredump_extra_notes_write(struct file *file, loff_t *foffset)
135 if (!calls) 136 if (!calls)
136 return 0; 137 return 0;
137 138
138 ret = calls->coredump_extra_notes_write(file, foffset); 139 ret = calls->coredump_extra_notes_write(cprm);
139 140
140 spufs_calls_put(calls); 141 spufs_calls_put(calls);
141 142
diff --git a/arch/powerpc/platforms/cell/spufs/coredump.c b/arch/powerpc/platforms/cell/spufs/coredump.c
index c9500ea7be2f..be6212ddbf06 100644
--- a/arch/powerpc/platforms/cell/spufs/coredump.c
+++ b/arch/powerpc/platforms/cell/spufs/coredump.c
@@ -27,6 +27,8 @@
27#include <linux/gfp.h> 27#include <linux/gfp.h>
28#include <linux/list.h> 28#include <linux/list.h>
29#include <linux/syscalls.h> 29#include <linux/syscalls.h>
30#include <linux/coredump.h>
31#include <linux/binfmts.h>
30 32
31#include <asm/uaccess.h> 33#include <asm/uaccess.h>
32 34
@@ -48,44 +50,6 @@ static ssize_t do_coredump_read(int num, struct spu_context *ctx, void *buffer,
48 return ++ret; /* count trailing NULL */ 50 return ++ret; /* count trailing NULL */
49} 51}
50 52
51/*
52 * These are the only things you should do on a core-file: use only these
53 * functions to write out all the necessary info.
54 */
55static int spufs_dump_write(struct file *file, const void *addr, int nr, loff_t *foffset)
56{
57 unsigned long limit = rlimit(RLIMIT_CORE);
58 ssize_t written;
59
60 if (*foffset + nr > limit)
61 return -EIO;
62
63 written = file->f_op->write(file, addr, nr, &file->f_pos);
64 *foffset += written;
65
66 if (written != nr)
67 return -EIO;
68
69 return 0;
70}
71
72static int spufs_dump_align(struct file *file, char *buf, loff_t new_off,
73 loff_t *foffset)
74{
75 int rc, size;
76
77 size = min((loff_t)PAGE_SIZE, new_off - *foffset);
78 memset(buf, 0, size);
79
80 rc = 0;
81 while (rc == 0 && new_off > *foffset) {
82 size = min((loff_t)PAGE_SIZE, new_off - *foffset);
83 rc = spufs_dump_write(file, buf, size, foffset);
84 }
85
86 return rc;
87}
88
89static int spufs_ctx_note_size(struct spu_context *ctx, int dfd) 53static int spufs_ctx_note_size(struct spu_context *ctx, int dfd)
90{ 54{
91 int i, sz, total = 0; 55 int i, sz, total = 0;
@@ -165,10 +129,10 @@ int spufs_coredump_extra_notes_size(void)
165} 129}
166 130
167static int spufs_arch_write_note(struct spu_context *ctx, int i, 131static int spufs_arch_write_note(struct spu_context *ctx, int i,
168 struct file *file, int dfd, loff_t *foffset) 132 struct coredump_params *cprm, int dfd)
169{ 133{
170 loff_t pos = 0; 134 loff_t pos = 0;
171 int sz, rc, nread, total = 0; 135 int sz, rc, total = 0;
172 const int bufsz = PAGE_SIZE; 136 const int bufsz = PAGE_SIZE;
173 char *name; 137 char *name;
174 char fullname[80], *buf; 138 char fullname[80], *buf;
@@ -186,42 +150,39 @@ static int spufs_arch_write_note(struct spu_context *ctx, int i,
186 en.n_descsz = sz; 150 en.n_descsz = sz;
187 en.n_type = NT_SPU; 151 en.n_type = NT_SPU;
188 152
189 rc = spufs_dump_write(file, &en, sizeof(en), foffset); 153 if (!dump_emit(cprm, &en, sizeof(en)))
190 if (rc) 154 goto Eio;
191 goto out;
192 155
193 rc = spufs_dump_write(file, fullname, en.n_namesz, foffset); 156 if (!dump_emit(cprm, fullname, en.n_namesz))
194 if (rc) 157 goto Eio;
195 goto out;
196 158
197 rc = spufs_dump_align(file, buf, roundup(*foffset, 4), foffset); 159 if (!dump_align(cprm, 4))
198 if (rc) 160 goto Eio;
199 goto out;
200 161
201 do { 162 do {
202 nread = do_coredump_read(i, ctx, buf, bufsz, &pos); 163 rc = do_coredump_read(i, ctx, buf, bufsz, &pos);
203 if (nread > 0) { 164 if (rc > 0) {
204 rc = spufs_dump_write(file, buf, nread, foffset); 165 if (!dump_emit(cprm, buf, rc))
205 if (rc) 166 goto Eio;
206 goto out; 167 total += rc;
207 total += nread;
208 } 168 }
209 } while (nread == bufsz && total < sz); 169 } while (rc == bufsz && total < sz);
210 170
211 if (nread < 0) { 171 if (rc < 0)
212 rc = nread;
213 goto out; 172 goto out;
214 }
215
216 rc = spufs_dump_align(file, buf, roundup(*foffset - total + sz, 4),
217 foffset);
218 173
174 if (!dump_skip(cprm,
175 roundup(cprm->written - total + sz, 4) - cprm->written))
176 goto Eio;
219out: 177out:
220 free_page((unsigned long)buf); 178 free_page((unsigned long)buf);
221 return rc; 179 return rc;
180Eio:
181 free_page((unsigned long)buf);
182 return -EIO;
222} 183}
223 184
224int spufs_coredump_extra_notes_write(struct file *file, loff_t *foffset) 185int spufs_coredump_extra_notes_write(struct coredump_params *cprm)
225{ 186{
226 struct spu_context *ctx; 187 struct spu_context *ctx;
227 int fd, j, rc; 188 int fd, j, rc;
@@ -233,7 +194,7 @@ int spufs_coredump_extra_notes_write(struct file *file, loff_t *foffset)
233 return rc; 194 return rc;
234 195
235 for (j = 0; spufs_coredump_read[j].name != NULL; j++) { 196 for (j = 0; spufs_coredump_read[j].name != NULL; j++) {
236 rc = spufs_arch_write_note(ctx, j, file, fd, foffset); 197 rc = spufs_arch_write_note(ctx, j, cprm, fd);
237 if (rc) { 198 if (rc) {
238 spu_release_saved(ctx); 199 spu_release_saved(ctx);
239 return rc; 200 return rc;
diff --git a/arch/powerpc/platforms/cell/spufs/spufs.h b/arch/powerpc/platforms/cell/spufs/spufs.h
index 67852ade4c01..0ba3c9598358 100644
--- a/arch/powerpc/platforms/cell/spufs/spufs.h
+++ b/arch/powerpc/platforms/cell/spufs/spufs.h
@@ -247,12 +247,13 @@ extern const struct spufs_tree_descr spufs_dir_debug_contents[];
247 247
248/* system call implementation */ 248/* system call implementation */
249extern struct spufs_calls spufs_calls; 249extern struct spufs_calls spufs_calls;
250struct coredump_params;
250long spufs_run_spu(struct spu_context *ctx, u32 *npc, u32 *status); 251long spufs_run_spu(struct spu_context *ctx, u32 *npc, u32 *status);
251long spufs_create(struct path *nd, struct dentry *dentry, unsigned int flags, 252long spufs_create(struct path *nd, struct dentry *dentry, unsigned int flags,
252 umode_t mode, struct file *filp); 253 umode_t mode, struct file *filp);
253/* ELF coredump callbacks for writing SPU ELF notes */ 254/* ELF coredump callbacks for writing SPU ELF notes */
254extern int spufs_coredump_extra_notes_size(void); 255extern int spufs_coredump_extra_notes_size(void);
255extern int spufs_coredump_extra_notes_write(struct file *file, loff_t *foffset); 256extern int spufs_coredump_extra_notes_write(struct coredump_params *cprm);
256 257
257extern const struct file_operations spufs_context_fops; 258extern const struct file_operations spufs_context_fops;
258 259
diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c
index 5a3ab5c191fd..6e2442978409 100644
--- a/arch/s390/kernel/compat_signal.c
+++ b/arch/s390/kernel/compat_signal.c
@@ -49,7 +49,7 @@ typedef struct
49 __u32 gprs_high[NUM_GPRS]; 49 __u32 gprs_high[NUM_GPRS];
50} rt_sigframe32; 50} rt_sigframe32;
51 51
52int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from) 52int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from)
53{ 53{
54 int err; 54 int err;
55 55
diff --git a/arch/sparc/kernel/signal32.c b/arch/sparc/kernel/signal32.c
index b524f91dd0e5..ee789d2ef05d 100644
--- a/arch/sparc/kernel/signal32.c
+++ b/arch/sparc/kernel/signal32.c
@@ -68,7 +68,7 @@ struct rt_signal_frame32 {
68 /* __siginfo_rwin_t * */u32 rwin_save; 68 /* __siginfo_rwin_t * */u32 rwin_save;
69} __attribute__((aligned(8))); 69} __attribute__((aligned(8)));
70 70
71int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from) 71int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from)
72{ 72{
73 int err; 73 int err;
74 74
diff --git a/arch/tile/kernel/compat_signal.c b/arch/tile/kernel/compat_signal.c
index 85e00b2f39bf..19c04b5ce408 100644
--- a/arch/tile/kernel/compat_signal.c
+++ b/arch/tile/kernel/compat_signal.c
@@ -49,7 +49,7 @@ struct compat_rt_sigframe {
49 struct compat_ucontext uc; 49 struct compat_ucontext uc;
50}; 50};
51 51
52int copy_siginfo_to_user32(struct compat_siginfo __user *to, siginfo_t *from) 52int copy_siginfo_to_user32(struct compat_siginfo __user *to, const siginfo_t *from)
53{ 53{
54 int err; 54 int err;
55 55
diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c
index bae3aba95b15..d21ff89207cd 100644
--- a/arch/x86/ia32/ia32_aout.c
+++ b/arch/x86/ia32/ia32_aout.c
@@ -25,6 +25,7 @@
25#include <linux/personality.h> 25#include <linux/personality.h>
26#include <linux/init.h> 26#include <linux/init.h>
27#include <linux/jiffies.h> 27#include <linux/jiffies.h>
28#include <linux/perf_event.h>
28 29
29#include <asm/uaccess.h> 30#include <asm/uaccess.h>
30#include <asm/pgalloc.h> 31#include <asm/pgalloc.h>
@@ -33,14 +34,18 @@
33#include <asm/ia32.h> 34#include <asm/ia32.h>
34 35
35#undef WARN_OLD 36#undef WARN_OLD
36#undef CORE_DUMP /* definitely broken */
37 37
38static int load_aout_binary(struct linux_binprm *); 38static int load_aout_binary(struct linux_binprm *);
39static int load_aout_library(struct file *); 39static int load_aout_library(struct file *);
40 40
41#ifdef CORE_DUMP 41#ifdef CONFIG_COREDUMP
42static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, 42static int aout_core_dump(struct coredump_params *);
43 unsigned long limit); 43
44static unsigned long get_dr(int n)
45{
46 struct perf_event *bp = current->thread.ptrace_bps[n];
47 return bp ? bp->hw.info.address : 0;
48}
44 49
45/* 50/*
46 * fill in the user structure for a core dump.. 51 * fill in the user structure for a core dump..
@@ -48,6 +53,7 @@ static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file,
48static void dump_thread32(struct pt_regs *regs, struct user32 *dump) 53static void dump_thread32(struct pt_regs *regs, struct user32 *dump)
49{ 54{
50 u32 fs, gs; 55 u32 fs, gs;
56 memset(dump, 0, sizeof(*dump));
51 57
52/* changed the size calculations - should hopefully work better. lbt */ 58/* changed the size calculations - should hopefully work better. lbt */
53 dump->magic = CMAGIC; 59 dump->magic = CMAGIC;
@@ -57,15 +63,12 @@ static void dump_thread32(struct pt_regs *regs, struct user32 *dump)
57 dump->u_dsize = ((unsigned long) 63 dump->u_dsize = ((unsigned long)
58 (current->mm->brk + (PAGE_SIZE-1))) >> PAGE_SHIFT; 64 (current->mm->brk + (PAGE_SIZE-1))) >> PAGE_SHIFT;
59 dump->u_dsize -= dump->u_tsize; 65 dump->u_dsize -= dump->u_tsize;
60 dump->u_ssize = 0; 66 dump->u_debugreg[0] = get_dr(0);
61 dump->u_debugreg[0] = current->thread.debugreg0; 67 dump->u_debugreg[1] = get_dr(1);
62 dump->u_debugreg[1] = current->thread.debugreg1; 68 dump->u_debugreg[2] = get_dr(2);
63 dump->u_debugreg[2] = current->thread.debugreg2; 69 dump->u_debugreg[3] = get_dr(3);
64 dump->u_debugreg[3] = current->thread.debugreg3;
65 dump->u_debugreg[4] = 0;
66 dump->u_debugreg[5] = 0;
67 dump->u_debugreg[6] = current->thread.debugreg6; 70 dump->u_debugreg[6] = current->thread.debugreg6;
68 dump->u_debugreg[7] = current->thread.debugreg7; 71 dump->u_debugreg[7] = current->thread.ptrace_dr7;
69 72
70 if (dump->start_stack < 0xc0000000) { 73 if (dump->start_stack < 0xc0000000) {
71 unsigned long tmp; 74 unsigned long tmp;
@@ -74,24 +77,24 @@ static void dump_thread32(struct pt_regs *regs, struct user32 *dump)
74 dump->u_ssize = tmp >> PAGE_SHIFT; 77 dump->u_ssize = tmp >> PAGE_SHIFT;
75 } 78 }
76 79
77 dump->regs.bx = regs->bx; 80 dump->regs.ebx = regs->bx;
78 dump->regs.cx = regs->cx; 81 dump->regs.ecx = regs->cx;
79 dump->regs.dx = regs->dx; 82 dump->regs.edx = regs->dx;
80 dump->regs.si = regs->si; 83 dump->regs.esi = regs->si;
81 dump->regs.di = regs->di; 84 dump->regs.edi = regs->di;
82 dump->regs.bp = regs->bp; 85 dump->regs.ebp = regs->bp;
83 dump->regs.ax = regs->ax; 86 dump->regs.eax = regs->ax;
84 dump->regs.ds = current->thread.ds; 87 dump->regs.ds = current->thread.ds;
85 dump->regs.es = current->thread.es; 88 dump->regs.es = current->thread.es;
86 savesegment(fs, fs); 89 savesegment(fs, fs);
87 dump->regs.fs = fs; 90 dump->regs.fs = fs;
88 savesegment(gs, gs); 91 savesegment(gs, gs);
89 dump->regs.gs = gs; 92 dump->regs.gs = gs;
90 dump->regs.orig_ax = regs->orig_ax; 93 dump->regs.orig_eax = regs->orig_ax;
91 dump->regs.ip = regs->ip; 94 dump->regs.eip = regs->ip;
92 dump->regs.cs = regs->cs; 95 dump->regs.cs = regs->cs;
93 dump->regs.flags = regs->flags; 96 dump->regs.eflags = regs->flags;
94 dump->regs.sp = regs->sp; 97 dump->regs.esp = regs->sp;
95 dump->regs.ss = regs->ss; 98 dump->regs.ss = regs->ss;
96 99
97#if 1 /* FIXME */ 100#if 1 /* FIXME */
@@ -107,7 +110,7 @@ static struct linux_binfmt aout_format = {
107 .module = THIS_MODULE, 110 .module = THIS_MODULE,
108 .load_binary = load_aout_binary, 111 .load_binary = load_aout_binary,
109 .load_shlib = load_aout_library, 112 .load_shlib = load_aout_library,
110#ifdef CORE_DUMP 113#ifdef CONFIG_COREDUMP
111 .core_dump = aout_core_dump, 114 .core_dump = aout_core_dump,
112#endif 115#endif
113 .min_coredump = PAGE_SIZE 116 .min_coredump = PAGE_SIZE
@@ -122,7 +125,7 @@ static void set_brk(unsigned long start, unsigned long end)
122 vm_brk(start, end - start); 125 vm_brk(start, end - start);
123} 126}
124 127
125#ifdef CORE_DUMP 128#ifdef CONFIG_COREDUMP
126/* 129/*
127 * These are the only things you should do on a core-file: use only these 130 * These are the only things you should do on a core-file: use only these
128 * macros to write out all the necessary info. 131 * macros to write out all the necessary info.
@@ -130,15 +133,7 @@ static void set_brk(unsigned long start, unsigned long end)
130 133
131#include <linux/coredump.h> 134#include <linux/coredump.h>
132 135
133#define DUMP_WRITE(addr, nr) \ 136#define START_DATA(u) (u.u_tsize << PAGE_SHIFT)
134 if (!dump_write(file, (void *)(addr), (nr))) \
135 goto end_coredump;
136
137#define DUMP_SEEK(offset) \
138 if (!dump_seek(file, offset)) \
139 goto end_coredump;
140
141#define START_DATA() (u.u_tsize << PAGE_SHIFT)
142#define START_STACK(u) (u.start_stack) 137#define START_STACK(u) (u.start_stack)
143 138
144/* 139/*
@@ -151,8 +146,7 @@ static void set_brk(unsigned long start, unsigned long end)
151 * dumping of the process results in another error.. 146 * dumping of the process results in another error..
152 */ 147 */
153 148
154static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, 149static int aout_core_dump(struct coredump_params *cprm)
155 unsigned long limit)
156{ 150{
157 mm_segment_t fs; 151 mm_segment_t fs;
158 int has_dumped = 0; 152 int has_dumped = 0;
@@ -164,19 +158,19 @@ static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file,
164 has_dumped = 1; 158 has_dumped = 1;
165 strncpy(dump.u_comm, current->comm, sizeof(current->comm)); 159 strncpy(dump.u_comm, current->comm, sizeof(current->comm));
166 dump.u_ar0 = offsetof(struct user32, regs); 160 dump.u_ar0 = offsetof(struct user32, regs);
167 dump.signal = signr; 161 dump.signal = cprm->siginfo->si_signo;
168 dump_thread32(regs, &dump); 162 dump_thread32(cprm->regs, &dump);
169 163
170 /* 164 /*
171 * If the size of the dump file exceeds the rlimit, then see 165 * If the size of the dump file exceeds the rlimit, then see
172 * what would happen if we wrote the stack, but not the data 166 * what would happen if we wrote the stack, but not the data
173 * area. 167 * area.
174 */ 168 */
175 if ((dump.u_dsize + dump.u_ssize + 1) * PAGE_SIZE > limit) 169 if ((dump.u_dsize + dump.u_ssize + 1) * PAGE_SIZE > cprm->limit)
176 dump.u_dsize = 0; 170 dump.u_dsize = 0;
177 171
178 /* Make sure we have enough room to write the stack and data areas. */ 172 /* Make sure we have enough room to write the stack and data areas. */
179 if ((dump.u_ssize + 1) * PAGE_SIZE > limit) 173 if ((dump.u_ssize + 1) * PAGE_SIZE > cprm->limit)
180 dump.u_ssize = 0; 174 dump.u_ssize = 0;
181 175
182 /* make sure we actually have a data and stack area to dump */ 176 /* make sure we actually have a data and stack area to dump */
@@ -190,22 +184,26 @@ static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file,
190 184
191 set_fs(KERNEL_DS); 185 set_fs(KERNEL_DS);
192 /* struct user */ 186 /* struct user */
193 DUMP_WRITE(&dump, sizeof(dump)); 187 if (!dump_emit(cprm, &dump, sizeof(dump)))
188 goto end_coredump;
194 /* Now dump all of the user data. Include malloced stuff as well */ 189 /* Now dump all of the user data. Include malloced stuff as well */
195 DUMP_SEEK(PAGE_SIZE - sizeof(dump)); 190 if (!dump_skip(cprm, PAGE_SIZE - sizeof(dump)))
191 goto end_coredump;
196 /* now we start writing out the user space info */ 192 /* now we start writing out the user space info */
197 set_fs(USER_DS); 193 set_fs(USER_DS);
198 /* Dump the data area */ 194 /* Dump the data area */
199 if (dump.u_dsize != 0) { 195 if (dump.u_dsize != 0) {
200 dump_start = START_DATA(dump); 196 dump_start = START_DATA(dump);
201 dump_size = dump.u_dsize << PAGE_SHIFT; 197 dump_size = dump.u_dsize << PAGE_SHIFT;
202 DUMP_WRITE(dump_start, dump_size); 198 if (!dump_emit(cprm, (void *)dump_start, dump_size))
199 goto end_coredump;
203 } 200 }
204 /* Now prepare to dump the stack area */ 201 /* Now prepare to dump the stack area */
205 if (dump.u_ssize != 0) { 202 if (dump.u_ssize != 0) {
206 dump_start = START_STACK(dump); 203 dump_start = START_STACK(dump);
207 dump_size = dump.u_ssize << PAGE_SHIFT; 204 dump_size = dump.u_ssize << PAGE_SHIFT;
208 DUMP_WRITE(dump_start, dump_size); 205 if (!dump_emit(cprm, (void *)dump_start, dump_size))
206 goto end_coredump;
209 } 207 }
210end_coredump: 208end_coredump:
211 set_fs(fs); 209 set_fs(fs);
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index 665a730307f2..220675795e08 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -34,7 +34,7 @@
34#include <asm/sys_ia32.h> 34#include <asm/sys_ia32.h>
35#include <asm/smap.h> 35#include <asm/smap.h>
36 36
37int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from) 37int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from)
38{ 38{
39 int err = 0; 39 int err = 0;
40 bool ia32 = test_thread_flag(TIF_IA32); 40 bool ia32 = test_thread_flag(TIF_IA32);
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index b3e18f800302..94220d14d5cc 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -378,9 +378,6 @@ do { \
378#define __this_cpu_or_1(pcp, val) percpu_to_op("or", (pcp), val) 378#define __this_cpu_or_1(pcp, val) percpu_to_op("or", (pcp), val)
379#define __this_cpu_or_2(pcp, val) percpu_to_op("or", (pcp), val) 379#define __this_cpu_or_2(pcp, val) percpu_to_op("or", (pcp), val)
380#define __this_cpu_or_4(pcp, val) percpu_to_op("or", (pcp), val) 380#define __this_cpu_or_4(pcp, val) percpu_to_op("or", (pcp), val)
381#define __this_cpu_xor_1(pcp, val) percpu_to_op("xor", (pcp), val)
382#define __this_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val)
383#define __this_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val)
384#define __this_cpu_xchg_1(pcp, val) percpu_xchg_op(pcp, val) 381#define __this_cpu_xchg_1(pcp, val) percpu_xchg_op(pcp, val)
385#define __this_cpu_xchg_2(pcp, val) percpu_xchg_op(pcp, val) 382#define __this_cpu_xchg_2(pcp, val) percpu_xchg_op(pcp, val)
386#define __this_cpu_xchg_4(pcp, val) percpu_xchg_op(pcp, val) 383#define __this_cpu_xchg_4(pcp, val) percpu_xchg_op(pcp, val)
@@ -400,9 +397,6 @@ do { \
400#define this_cpu_or_1(pcp, val) percpu_to_op("or", (pcp), val) 397#define this_cpu_or_1(pcp, val) percpu_to_op("or", (pcp), val)
401#define this_cpu_or_2(pcp, val) percpu_to_op("or", (pcp), val) 398#define this_cpu_or_2(pcp, val) percpu_to_op("or", (pcp), val)
402#define this_cpu_or_4(pcp, val) percpu_to_op("or", (pcp), val) 399#define this_cpu_or_4(pcp, val) percpu_to_op("or", (pcp), val)
403#define this_cpu_xor_1(pcp, val) percpu_to_op("xor", (pcp), val)
404#define this_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val)
405#define this_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val)
406#define this_cpu_xchg_1(pcp, nval) percpu_xchg_op(pcp, nval) 400#define this_cpu_xchg_1(pcp, nval) percpu_xchg_op(pcp, nval)
407#define this_cpu_xchg_2(pcp, nval) percpu_xchg_op(pcp, nval) 401#define this_cpu_xchg_2(pcp, nval) percpu_xchg_op(pcp, nval)
408#define this_cpu_xchg_4(pcp, nval) percpu_xchg_op(pcp, nval) 402#define this_cpu_xchg_4(pcp, nval) percpu_xchg_op(pcp, nval)
@@ -447,7 +441,6 @@ do { \
447#define __this_cpu_add_8(pcp, val) percpu_add_op((pcp), val) 441#define __this_cpu_add_8(pcp, val) percpu_add_op((pcp), val)
448#define __this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) 442#define __this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val)
449#define __this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) 443#define __this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val)
450#define __this_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val)
451#define __this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val) 444#define __this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val)
452#define __this_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval) 445#define __this_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval)
453#define __this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) 446#define __this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
@@ -457,7 +450,6 @@ do { \
457#define this_cpu_add_8(pcp, val) percpu_add_op((pcp), val) 450#define this_cpu_add_8(pcp, val) percpu_add_op((pcp), val)
458#define this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) 451#define this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val)
459#define this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) 452#define this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val)
460#define this_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val)
461#define this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val) 453#define this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val)
462#define this_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval) 454#define this_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval)
463#define this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) 455#define this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
diff --git a/arch/x86/um/elfcore.c b/arch/x86/um/elfcore.c
index 6bb49b687c97..7bb89a27a5e4 100644
--- a/arch/x86/um/elfcore.c
+++ b/arch/x86/um/elfcore.c
@@ -11,8 +11,7 @@ Elf32_Half elf_core_extra_phdrs(void)
11 return vsyscall_ehdr ? (((struct elfhdr *)vsyscall_ehdr)->e_phnum) : 0; 11 return vsyscall_ehdr ? (((struct elfhdr *)vsyscall_ehdr)->e_phnum) : 0;
12} 12}
13 13
14int elf_core_write_extra_phdrs(struct file *file, loff_t offset, size_t *size, 14int elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset)
15 unsigned long limit)
16{ 15{
17 if ( vsyscall_ehdr ) { 16 if ( vsyscall_ehdr ) {
18 const struct elfhdr *const ehdrp = 17 const struct elfhdr *const ehdrp =
@@ -32,17 +31,14 @@ int elf_core_write_extra_phdrs(struct file *file, loff_t offset, size_t *size,
32 phdr.p_offset += ofs; 31 phdr.p_offset += ofs;
33 } 32 }
34 phdr.p_paddr = 0; /* match other core phdrs */ 33 phdr.p_paddr = 0; /* match other core phdrs */
35 *size += sizeof(phdr); 34 if (!dump_emit(cprm, &phdr, sizeof(phdr)))
36 if (*size > limit
37 || !dump_write(file, &phdr, sizeof(phdr)))
38 return 0; 35 return 0;
39 } 36 }
40 } 37 }
41 return 1; 38 return 1;
42} 39}
43 40
44int elf_core_write_extra_data(struct file *file, size_t *size, 41int elf_core_write_extra_data(struct coredump_params *cprm)
45 unsigned long limit)
46{ 42{
47 if ( vsyscall_ehdr ) { 43 if ( vsyscall_ehdr ) {
48 const struct elfhdr *const ehdrp = 44 const struct elfhdr *const ehdrp =
@@ -55,10 +51,7 @@ int elf_core_write_extra_data(struct file *file, size_t *size,
55 if (phdrp[i].p_type == PT_LOAD) { 51 if (phdrp[i].p_type == PT_LOAD) {
56 void *addr = (void *) phdrp[i].p_vaddr; 52 void *addr = (void *) phdrp[i].p_vaddr;
57 size_t filesz = phdrp[i].p_filesz; 53 size_t filesz = phdrp[i].p_filesz;
58 54 if (!dump_emit(cprm, addr, filesz))
59 *size += filesz;
60 if (*size > limit
61 || !dump_write(file, addr, filesz))
62 return 0; 55 return 0;
63 } 56 }
64 } 57 }
diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index 8e28f923cf7f..e2903d03180e 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -292,6 +292,10 @@ static const struct pci_device_id ahci_pci_tbl[] = {
292 { PCI_VDEVICE(INTEL, 0x8d66), board_ahci }, /* Wellsburg RAID */ 292 { PCI_VDEVICE(INTEL, 0x8d66), board_ahci }, /* Wellsburg RAID */
293 { PCI_VDEVICE(INTEL, 0x8d6e), board_ahci }, /* Wellsburg RAID */ 293 { PCI_VDEVICE(INTEL, 0x8d6e), board_ahci }, /* Wellsburg RAID */
294 { PCI_VDEVICE(INTEL, 0x23a3), board_ahci }, /* Coleto Creek AHCI */ 294 { PCI_VDEVICE(INTEL, 0x23a3), board_ahci }, /* Coleto Creek AHCI */
295 { PCI_VDEVICE(INTEL, 0x9c83), board_ahci }, /* Wildcat Point-LP AHCI */
296 { PCI_VDEVICE(INTEL, 0x9c85), board_ahci }, /* Wildcat Point-LP RAID */
297 { PCI_VDEVICE(INTEL, 0x9c87), board_ahci }, /* Wildcat Point-LP RAID */
298 { PCI_VDEVICE(INTEL, 0x9c8f), board_ahci }, /* Wildcat Point-LP RAID */
295 299
296 /* JMicron 360/1/3/5/6, match class to avoid IDE function */ 300 /* JMicron 360/1/3/5/6, match class to avoid IDE function */
297 { PCI_VENDOR_ID_JMICRON, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, 301 { PCI_VENDOR_ID_JMICRON, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID,
diff --git a/drivers/ata/ahci.h b/drivers/ata/ahci.h
index 11456371f29b..2289efdf8203 100644
--- a/drivers/ata/ahci.h
+++ b/drivers/ata/ahci.h
@@ -339,6 +339,7 @@ extern struct device_attribute *ahci_sdev_attrs[];
339 .sdev_attrs = ahci_sdev_attrs 339 .sdev_attrs = ahci_sdev_attrs
340 340
341extern struct ata_port_operations ahci_ops; 341extern struct ata_port_operations ahci_ops;
342extern struct ata_port_operations ahci_platform_ops;
342extern struct ata_port_operations ahci_pmp_retry_srst_ops; 343extern struct ata_port_operations ahci_pmp_retry_srst_ops;
343 344
344unsigned int ahci_dev_classify(struct ata_port *ap); 345unsigned int ahci_dev_classify(struct ata_port *ap);
@@ -368,6 +369,7 @@ irqreturn_t ahci_hw_interrupt(int irq, void *dev_instance);
368irqreturn_t ahci_thread_fn(int irq, void *dev_instance); 369irqreturn_t ahci_thread_fn(int irq, void *dev_instance);
369void ahci_print_info(struct ata_host *host, const char *scc_s); 370void ahci_print_info(struct ata_host *host, const char *scc_s);
370int ahci_host_activate(struct ata_host *host, int irq, unsigned int n_msis); 371int ahci_host_activate(struct ata_host *host, int irq, unsigned int n_msis);
372void ahci_error_handler(struct ata_port *ap);
371 373
372static inline void __iomem *__ahci_port_base(struct ata_host *host, 374static inline void __iomem *__ahci_port_base(struct ata_host *host,
373 unsigned int port_no) 375 unsigned int port_no)
diff --git a/drivers/ata/ahci_imx.c b/drivers/ata/ahci_imx.c
index 58debb0acc3a..ae2d73fe321e 100644
--- a/drivers/ata/ahci_imx.c
+++ b/drivers/ata/ahci_imx.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * copyright (c) 2013 Freescale Semiconductor, Inc.
2 * Freescale IMX AHCI SATA platform driver 3 * Freescale IMX AHCI SATA platform driver
3 * Copyright 2013 Freescale Semiconductor, Inc.
4 * 4 *
5 * based on the AHCI SATA platform driver by Jeff Garzik and Anton Vorontsov 5 * based on the AHCI SATA platform driver by Jeff Garzik and Anton Vorontsov
6 * 6 *
@@ -25,10 +25,13 @@
25#include <linux/of_device.h> 25#include <linux/of_device.h>
26#include <linux/mfd/syscon.h> 26#include <linux/mfd/syscon.h>
27#include <linux/mfd/syscon/imx6q-iomuxc-gpr.h> 27#include <linux/mfd/syscon/imx6q-iomuxc-gpr.h>
28#include <linux/libata.h>
28#include "ahci.h" 29#include "ahci.h"
29 30
30enum { 31enum {
31 HOST_TIMER1MS = 0xe0, /* Timer 1-ms */ 32 PORT_PHY_CTL = 0x178, /* Port0 PHY Control */
33 PORT_PHY_CTL_PDDQ_LOC = 0x100000, /* PORT_PHY_CTL bits */
34 HOST_TIMER1MS = 0xe0, /* Timer 1-ms */
32}; 35};
33 36
34struct imx_ahci_priv { 37struct imx_ahci_priv {
@@ -36,6 +39,56 @@ struct imx_ahci_priv {
36 struct clk *sata_ref_clk; 39 struct clk *sata_ref_clk;
37 struct clk *ahb_clk; 40 struct clk *ahb_clk;
38 struct regmap *gpr; 41 struct regmap *gpr;
42 bool no_device;
43 bool first_time;
44};
45
46static int ahci_imx_hotplug;
47module_param_named(hotplug, ahci_imx_hotplug, int, 0644);
48MODULE_PARM_DESC(hotplug, "AHCI IMX hot-plug support (0=Don't support, 1=support)");
49
50static void ahci_imx_error_handler(struct ata_port *ap)
51{
52 u32 reg_val;
53 struct ata_device *dev;
54 struct ata_host *host = dev_get_drvdata(ap->dev);
55 struct ahci_host_priv *hpriv = host->private_data;
56 void __iomem *mmio = hpriv->mmio;
57 struct imx_ahci_priv *imxpriv = dev_get_drvdata(ap->dev->parent);
58
59 ahci_error_handler(ap);
60
61 if (!(imxpriv->first_time) || ahci_imx_hotplug)
62 return;
63
64 imxpriv->first_time = false;
65
66 ata_for_each_dev(dev, &ap->link, ENABLED)
67 return;
68 /*
69 * Disable link to save power. An imx ahci port can't be recovered
70 * without full reset once the pddq mode is enabled making it
71 * impossible to use as part of libata LPM.
72 */
73 reg_val = readl(mmio + PORT_PHY_CTL);
74 writel(reg_val | PORT_PHY_CTL_PDDQ_LOC, mmio + PORT_PHY_CTL);
75 regmap_update_bits(imxpriv->gpr, IOMUXC_GPR13,
76 IMX6Q_GPR13_SATA_MPLL_CLK_EN,
77 !IMX6Q_GPR13_SATA_MPLL_CLK_EN);
78 clk_disable_unprepare(imxpriv->sata_ref_clk);
79 imxpriv->no_device = true;
80}
81
82static struct ata_port_operations ahci_imx_ops = {
83 .inherits = &ahci_platform_ops,
84 .error_handler = ahci_imx_error_handler,
85};
86
87static const struct ata_port_info ahci_imx_port_info = {
88 .flags = AHCI_FLAG_COMMON,
89 .pio_mask = ATA_PIO4,
90 .udma_mask = ATA_UDMA6,
91 .port_ops = &ahci_imx_ops,
39}; 92};
40 93
41static int imx6q_sata_init(struct device *dev, void __iomem *mmio) 94static int imx6q_sata_init(struct device *dev, void __iomem *mmio)
@@ -117,9 +170,51 @@ static void imx6q_sata_exit(struct device *dev)
117 clk_disable_unprepare(imxpriv->sata_ref_clk); 170 clk_disable_unprepare(imxpriv->sata_ref_clk);
118} 171}
119 172
173static int imx_ahci_suspend(struct device *dev)
174{
175 struct imx_ahci_priv *imxpriv = dev_get_drvdata(dev->parent);
176
177 /*
178 * If no_device is set, The CLKs had been gated off in the
179 * initialization so don't do it again here.
180 */
181 if (!imxpriv->no_device) {
182 regmap_update_bits(imxpriv->gpr, IOMUXC_GPR13,
183 IMX6Q_GPR13_SATA_MPLL_CLK_EN,
184 !IMX6Q_GPR13_SATA_MPLL_CLK_EN);
185 clk_disable_unprepare(imxpriv->sata_ref_clk);
186 }
187
188 return 0;
189}
190
191static int imx_ahci_resume(struct device *dev)
192{
193 struct imx_ahci_priv *imxpriv = dev_get_drvdata(dev->parent);
194 int ret;
195
196 if (!imxpriv->no_device) {
197 ret = clk_prepare_enable(imxpriv->sata_ref_clk);
198 if (ret < 0) {
199 dev_err(dev, "pre-enable sata_ref clock err:%d\n", ret);
200 return ret;
201 }
202
203 regmap_update_bits(imxpriv->gpr, IOMUXC_GPR13,
204 IMX6Q_GPR13_SATA_MPLL_CLK_EN,
205 IMX6Q_GPR13_SATA_MPLL_CLK_EN);
206 usleep_range(1000, 2000);
207 }
208
209 return 0;
210}
211
120static struct ahci_platform_data imx6q_sata_pdata = { 212static struct ahci_platform_data imx6q_sata_pdata = {
121 .init = imx6q_sata_init, 213 .init = imx6q_sata_init,
122 .exit = imx6q_sata_exit, 214 .exit = imx6q_sata_exit,
215 .ata_port_info = &ahci_imx_port_info,
216 .suspend = imx_ahci_suspend,
217 .resume = imx_ahci_resume,
123}; 218};
124 219
125static const struct of_device_id imx_ahci_of_match[] = { 220static const struct of_device_id imx_ahci_of_match[] = {
@@ -152,6 +247,8 @@ static int imx_ahci_probe(struct platform_device *pdev)
152 ahci_dev = &ahci_pdev->dev; 247 ahci_dev = &ahci_pdev->dev;
153 ahci_dev->parent = dev; 248 ahci_dev->parent = dev;
154 249
250 imxpriv->no_device = false;
251 imxpriv->first_time = true;
155 imxpriv->ahb_clk = devm_clk_get(dev, "ahb"); 252 imxpriv->ahb_clk = devm_clk_get(dev, "ahb");
156 if (IS_ERR(imxpriv->ahb_clk)) { 253 if (IS_ERR(imxpriv->ahb_clk)) {
157 dev_err(dev, "can't get ahb clock.\n"); 254 dev_err(dev, "can't get ahb clock.\n");
diff --git a/drivers/ata/ahci_platform.c b/drivers/ata/ahci_platform.c
index 7d3b85385bfc..f9554318504f 100644
--- a/drivers/ata/ahci_platform.c
+++ b/drivers/ata/ahci_platform.c
@@ -49,10 +49,11 @@ static struct platform_device_id ahci_devtype[] = {
49}; 49};
50MODULE_DEVICE_TABLE(platform, ahci_devtype); 50MODULE_DEVICE_TABLE(platform, ahci_devtype);
51 51
52static struct ata_port_operations ahci_platform_ops = { 52struct ata_port_operations ahci_platform_ops = {
53 .inherits = &ahci_ops, 53 .inherits = &ahci_ops,
54 .host_stop = ahci_host_stop, 54 .host_stop = ahci_host_stop,
55}; 55};
56EXPORT_SYMBOL_GPL(ahci_platform_ops);
56 57
57static struct ata_port_operations ahci_platform_retry_srst_ops = { 58static struct ata_port_operations ahci_platform_retry_srst_ops = {
58 .inherits = &ahci_pmp_retry_srst_ops, 59 .inherits = &ahci_pmp_retry_srst_ops,
diff --git a/drivers/ata/ata_piix.c b/drivers/ata/ata_piix.c
index 513ad7ed0c99..6334c8d7c3f1 100644
--- a/drivers/ata/ata_piix.c
+++ b/drivers/ata/ata_piix.c
@@ -100,7 +100,7 @@
100 100
101enum { 101enum {
102 PIIX_IOCFG = 0x54, /* IDE I/O configuration register */ 102 PIIX_IOCFG = 0x54, /* IDE I/O configuration register */
103 ICH5_PMR = 0x90, /* port mapping register */ 103 ICH5_PMR = 0x90, /* address map register */
104 ICH5_PCS = 0x92, /* port control and status */ 104 ICH5_PCS = 0x92, /* port control and status */
105 PIIX_SIDPR_BAR = 5, 105 PIIX_SIDPR_BAR = 5,
106 PIIX_SIDPR_LEN = 16, 106 PIIX_SIDPR_LEN = 16,
@@ -233,7 +233,7 @@ static const struct pci_device_id piix_pci_tbl[] = {
233 PCI_CLASS_STORAGE_IDE << 8, 0xffff00, ich6m_sata }, 233 PCI_CLASS_STORAGE_IDE << 8, 0xffff00, ich6m_sata },
234 /* 82801GB/GR/GH (ICH7, identical to ICH6) */ 234 /* 82801GB/GR/GH (ICH7, identical to ICH6) */
235 { 0x8086, 0x27c0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich6_sata }, 235 { 0x8086, 0x27c0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich6_sata },
236 /* 2801GBM/GHM (ICH7M, identical to ICH6M) */ 236 /* 82801GBM/GHM (ICH7M, identical to ICH6M) */
237 { 0x8086, 0x27c4, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich6m_sata }, 237 { 0x8086, 0x27c4, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich6m_sata },
238 /* Enterprise Southbridge 2 (631xESB/632xESB) */ 238 /* Enterprise Southbridge 2 (631xESB/632xESB) */
239 { 0x8086, 0x2680, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich6_sata }, 239 { 0x8086, 0x2680, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich6_sata },
@@ -517,7 +517,7 @@ static int ich_pata_cable_detect(struct ata_port *ap)
517 const struct ich_laptop *lap = &ich_laptop[0]; 517 const struct ich_laptop *lap = &ich_laptop[0];
518 u8 mask; 518 u8 mask;
519 519
520 /* Check for specials - Acer Aspire 5602WLMi */ 520 /* Check for specials */
521 while (lap->device) { 521 while (lap->device) {
522 if (lap->device == pdev->device && 522 if (lap->device == pdev->device &&
523 lap->subvendor == pdev->subsystem_vendor && 523 lap->subvendor == pdev->subsystem_vendor &&
@@ -1366,38 +1366,39 @@ static const int *piix_init_sata_map(struct pci_dev *pdev,
1366 const int *map; 1366 const int *map;
1367 int i, invalid_map = 0; 1367 int i, invalid_map = 0;
1368 u8 map_value; 1368 u8 map_value;
1369 char buf[32];
1370 char *p = buf, *end = buf + sizeof(buf);
1369 1371
1370 pci_read_config_byte(pdev, ICH5_PMR, &map_value); 1372 pci_read_config_byte(pdev, ICH5_PMR, &map_value);
1371 1373
1372 map = map_db->map[map_value & map_db->mask]; 1374 map = map_db->map[map_value & map_db->mask];
1373 1375
1374 dev_info(&pdev->dev, "MAP [");
1375 for (i = 0; i < 4; i++) { 1376 for (i = 0; i < 4; i++) {
1376 switch (map[i]) { 1377 switch (map[i]) {
1377 case RV: 1378 case RV:
1378 invalid_map = 1; 1379 invalid_map = 1;
1379 pr_cont(" XX"); 1380 p += scnprintf(p, end - p, " XX");
1380 break; 1381 break;
1381 1382
1382 case NA: 1383 case NA:
1383 pr_cont(" --"); 1384 p += scnprintf(p, end - p, " --");
1384 break; 1385 break;
1385 1386
1386 case IDE: 1387 case IDE:
1387 WARN_ON((i & 1) || map[i + 1] != IDE); 1388 WARN_ON((i & 1) || map[i + 1] != IDE);
1388 pinfo[i / 2] = piix_port_info[ich_pata_100]; 1389 pinfo[i / 2] = piix_port_info[ich_pata_100];
1389 i++; 1390 i++;
1390 pr_cont(" IDE IDE"); 1391 p += scnprintf(p, end - p, " IDE IDE");
1391 break; 1392 break;
1392 1393
1393 default: 1394 default:
1394 pr_cont(" P%d", map[i]); 1395 p += scnprintf(p, end - p, " P%d", map[i]);
1395 if (i & 1) 1396 if (i & 1)
1396 pinfo[i / 2].flags |= ATA_FLAG_SLAVE_POSS; 1397 pinfo[i / 2].flags |= ATA_FLAG_SLAVE_POSS;
1397 break; 1398 break;
1398 } 1399 }
1399 } 1400 }
1400 pr_cont(" ]\n"); 1401 dev_info(&pdev->dev, "MAP [%s ]\n", buf);
1401 1402
1402 if (invalid_map) 1403 if (invalid_map)
1403 dev_err(&pdev->dev, "invalid MAP value %u\n", map_value); 1404 dev_err(&pdev->dev, "invalid MAP value %u\n", map_value);
diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c
index aaac4fb0d564..c482f8cadd7a 100644
--- a/drivers/ata/libahci.c
+++ b/drivers/ata/libahci.c
@@ -89,7 +89,6 @@ static int ahci_pmp_retry_softreset(struct ata_link *link, unsigned int *class,
89static int ahci_hardreset(struct ata_link *link, unsigned int *class, 89static int ahci_hardreset(struct ata_link *link, unsigned int *class,
90 unsigned long deadline); 90 unsigned long deadline);
91static void ahci_postreset(struct ata_link *link, unsigned int *class); 91static void ahci_postreset(struct ata_link *link, unsigned int *class);
92static void ahci_error_handler(struct ata_port *ap);
93static void ahci_post_internal_cmd(struct ata_queued_cmd *qc); 92static void ahci_post_internal_cmd(struct ata_queued_cmd *qc);
94static void ahci_dev_config(struct ata_device *dev); 93static void ahci_dev_config(struct ata_device *dev);
95#ifdef CONFIG_PM 94#ifdef CONFIG_PM
@@ -189,14 +188,15 @@ struct ata_port_operations ahci_pmp_retry_srst_ops = {
189}; 188};
190EXPORT_SYMBOL_GPL(ahci_pmp_retry_srst_ops); 189EXPORT_SYMBOL_GPL(ahci_pmp_retry_srst_ops);
191 190
192int ahci_em_messages = 1; 191static bool ahci_em_messages __read_mostly = true;
193EXPORT_SYMBOL_GPL(ahci_em_messages); 192EXPORT_SYMBOL_GPL(ahci_em_messages);
194module_param(ahci_em_messages, int, 0444); 193module_param(ahci_em_messages, bool, 0444);
195/* add other LED protocol types when they become supported */ 194/* add other LED protocol types when they become supported */
196MODULE_PARM_DESC(ahci_em_messages, 195MODULE_PARM_DESC(ahci_em_messages,
197 "AHCI Enclosure Management Message control (0 = off, 1 = on)"); 196 "AHCI Enclosure Management Message control (0 = off, 1 = on)");
198 197
199int devslp_idle_timeout = 1000; /* device sleep idle timeout in ms */ 198/* device sleep idle timeout in ms */
199static int devslp_idle_timeout __read_mostly = 1000;
200module_param(devslp_idle_timeout, int, 0644); 200module_param(devslp_idle_timeout, int, 0644);
201MODULE_PARM_DESC(devslp_idle_timeout, "device sleep idle timeout"); 201MODULE_PARM_DESC(devslp_idle_timeout, "device sleep idle timeout");
202 202
@@ -1275,9 +1275,11 @@ int ahci_do_softreset(struct ata_link *link, unsigned int *class,
1275{ 1275{
1276 struct ata_port *ap = link->ap; 1276 struct ata_port *ap = link->ap;
1277 struct ahci_host_priv *hpriv = ap->host->private_data; 1277 struct ahci_host_priv *hpriv = ap->host->private_data;
1278 struct ahci_port_priv *pp = ap->private_data;
1278 const char *reason = NULL; 1279 const char *reason = NULL;
1279 unsigned long now, msecs; 1280 unsigned long now, msecs;
1280 struct ata_taskfile tf; 1281 struct ata_taskfile tf;
1282 bool fbs_disabled = false;
1281 int rc; 1283 int rc;
1282 1284
1283 DPRINTK("ENTER\n"); 1285 DPRINTK("ENTER\n");
@@ -1287,6 +1289,16 @@ int ahci_do_softreset(struct ata_link *link, unsigned int *class,
1287 if (rc && rc != -EOPNOTSUPP) 1289 if (rc && rc != -EOPNOTSUPP)
1288 ata_link_warn(link, "failed to reset engine (errno=%d)\n", rc); 1290 ata_link_warn(link, "failed to reset engine (errno=%d)\n", rc);
1289 1291
1292 /*
1293 * According to AHCI-1.2 9.3.9: if FBS is enable, software shall
1294 * clear PxFBS.EN to '0' prior to issuing software reset to devices
1295 * that is attached to port multiplier.
1296 */
1297 if (!ata_is_host_link(link) && pp->fbs_enabled) {
1298 ahci_disable_fbs(ap);
1299 fbs_disabled = true;
1300 }
1301
1290 ata_tf_init(link->device, &tf); 1302 ata_tf_init(link->device, &tf);
1291 1303
1292 /* issue the first D2H Register FIS */ 1304 /* issue the first D2H Register FIS */
@@ -1327,6 +1339,10 @@ int ahci_do_softreset(struct ata_link *link, unsigned int *class,
1327 } else 1339 } else
1328 *class = ahci_dev_classify(ap); 1340 *class = ahci_dev_classify(ap);
1329 1341
1342 /* re-enable FBS if disabled before */
1343 if (fbs_disabled)
1344 ahci_enable_fbs(ap);
1345
1330 DPRINTK("EXIT, class=%u\n", *class); 1346 DPRINTK("EXIT, class=%u\n", *class);
1331 return 0; 1347 return 0;
1332 1348
@@ -1989,7 +2005,7 @@ static void ahci_thaw(struct ata_port *ap)
1989 writel(pp->intr_mask, port_mmio + PORT_IRQ_MASK); 2005 writel(pp->intr_mask, port_mmio + PORT_IRQ_MASK);
1990} 2006}
1991 2007
1992static void ahci_error_handler(struct ata_port *ap) 2008void ahci_error_handler(struct ata_port *ap)
1993{ 2009{
1994 if (!(ap->pflags & ATA_PFLAG_FROZEN)) { 2010 if (!(ap->pflags & ATA_PFLAG_FROZEN)) {
1995 /* restart engine */ 2011 /* restart engine */
@@ -2002,6 +2018,7 @@ static void ahci_error_handler(struct ata_port *ap)
2002 if (!ata_dev_enabled(ap->link.device)) 2018 if (!ata_dev_enabled(ap->link.device))
2003 ahci_stop_engine(ap); 2019 ahci_stop_engine(ap);
2004} 2020}
2021EXPORT_SYMBOL_GPL(ahci_error_handler);
2005 2022
2006static void ahci_post_internal_cmd(struct ata_queued_cmd *qc) 2023static void ahci_post_internal_cmd(struct ata_queued_cmd *qc)
2007{ 2024{
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 83b1a9fb2d44..81a94a3919db 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -4126,6 +4126,7 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
4126 { "TORiSAN DVD-ROM DRD-N216", NULL, ATA_HORKAGE_MAX_SEC_128 }, 4126 { "TORiSAN DVD-ROM DRD-N216", NULL, ATA_HORKAGE_MAX_SEC_128 },
4127 { "QUANTUM DAT DAT72-000", NULL, ATA_HORKAGE_ATAPI_MOD16_DMA }, 4127 { "QUANTUM DAT DAT72-000", NULL, ATA_HORKAGE_ATAPI_MOD16_DMA },
4128 { "Slimtype DVD A DS8A8SH", NULL, ATA_HORKAGE_MAX_SEC_LBA48 }, 4128 { "Slimtype DVD A DS8A8SH", NULL, ATA_HORKAGE_MAX_SEC_LBA48 },
4129 { "Slimtype DVD A DS8A9SH", NULL, ATA_HORKAGE_MAX_SEC_LBA48 },
4129 4130
4130 /* Devices we expect to fail diagnostics */ 4131 /* Devices we expect to fail diagnostics */
4131 4132
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index 370462fa8e01..77bbc8266883 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -2293,6 +2293,7 @@ const char *ata_get_cmd_descript(u8 command)
2293 { ATA_CMD_IDLE, "IDLE" }, 2293 { ATA_CMD_IDLE, "IDLE" },
2294 { ATA_CMD_EDD, "EXECUTE DEVICE DIAGNOSTIC" }, 2294 { ATA_CMD_EDD, "EXECUTE DEVICE DIAGNOSTIC" },
2295 { ATA_CMD_DOWNLOAD_MICRO, "DOWNLOAD MICROCODE" }, 2295 { ATA_CMD_DOWNLOAD_MICRO, "DOWNLOAD MICROCODE" },
2296 { ATA_CMD_DOWNLOAD_MICRO_DMA, "DOWNLOAD MICROCODE DMA" },
2296 { ATA_CMD_NOP, "NOP" }, 2297 { ATA_CMD_NOP, "NOP" },
2297 { ATA_CMD_FLUSH, "FLUSH CACHE" }, 2298 { ATA_CMD_FLUSH, "FLUSH CACHE" },
2298 { ATA_CMD_FLUSH_EXT, "FLUSH CACHE EXT" }, 2299 { ATA_CMD_FLUSH_EXT, "FLUSH CACHE EXT" },
@@ -2313,6 +2314,8 @@ const char *ata_get_cmd_descript(u8 command)
2313 { ATA_CMD_WRITE_QUEUED_FUA_EXT, "WRITE DMA QUEUED FUA EXT" }, 2314 { ATA_CMD_WRITE_QUEUED_FUA_EXT, "WRITE DMA QUEUED FUA EXT" },
2314 { ATA_CMD_FPDMA_READ, "READ FPDMA QUEUED" }, 2315 { ATA_CMD_FPDMA_READ, "READ FPDMA QUEUED" },
2315 { ATA_CMD_FPDMA_WRITE, "WRITE FPDMA QUEUED" }, 2316 { ATA_CMD_FPDMA_WRITE, "WRITE FPDMA QUEUED" },
2317 { ATA_CMD_FPDMA_SEND, "SEND FPDMA QUEUED" },
2318 { ATA_CMD_FPDMA_RECV, "RECEIVE FPDMA QUEUED" },
2316 { ATA_CMD_PIO_READ, "READ SECTOR(S)" }, 2319 { ATA_CMD_PIO_READ, "READ SECTOR(S)" },
2317 { ATA_CMD_PIO_READ_EXT, "READ SECTOR(S) EXT" }, 2320 { ATA_CMD_PIO_READ_EXT, "READ SECTOR(S) EXT" },
2318 { ATA_CMD_PIO_WRITE, "WRITE SECTOR(S)" }, 2321 { ATA_CMD_PIO_WRITE, "WRITE SECTOR(S)" },
@@ -2339,12 +2342,15 @@ const char *ata_get_cmd_descript(u8 command)
2339 { ATA_CMD_WRITE_LOG_EXT, "WRITE LOG EXT" }, 2342 { ATA_CMD_WRITE_LOG_EXT, "WRITE LOG EXT" },
2340 { ATA_CMD_READ_LOG_DMA_EXT, "READ LOG DMA EXT" }, 2343 { ATA_CMD_READ_LOG_DMA_EXT, "READ LOG DMA EXT" },
2341 { ATA_CMD_WRITE_LOG_DMA_EXT, "WRITE LOG DMA EXT" }, 2344 { ATA_CMD_WRITE_LOG_DMA_EXT, "WRITE LOG DMA EXT" },
2345 { ATA_CMD_TRUSTED_NONDATA, "TRUSTED NON-DATA" },
2342 { ATA_CMD_TRUSTED_RCV, "TRUSTED RECEIVE" }, 2346 { ATA_CMD_TRUSTED_RCV, "TRUSTED RECEIVE" },
2343 { ATA_CMD_TRUSTED_RCV_DMA, "TRUSTED RECEIVE DMA" }, 2347 { ATA_CMD_TRUSTED_RCV_DMA, "TRUSTED RECEIVE DMA" },
2344 { ATA_CMD_TRUSTED_SND, "TRUSTED SEND" }, 2348 { ATA_CMD_TRUSTED_SND, "TRUSTED SEND" },
2345 { ATA_CMD_TRUSTED_SND_DMA, "TRUSTED SEND DMA" }, 2349 { ATA_CMD_TRUSTED_SND_DMA, "TRUSTED SEND DMA" },
2346 { ATA_CMD_PMP_READ, "READ BUFFER" }, 2350 { ATA_CMD_PMP_READ, "READ BUFFER" },
2351 { ATA_CMD_PMP_READ_DMA, "READ BUFFER DMA" },
2347 { ATA_CMD_PMP_WRITE, "WRITE BUFFER" }, 2352 { ATA_CMD_PMP_WRITE, "WRITE BUFFER" },
2353 { ATA_CMD_PMP_WRITE_DMA, "WRITE BUFFER DMA" },
2348 { ATA_CMD_CONF_OVERLAY, "DEVICE CONFIGURATION OVERLAY" }, 2354 { ATA_CMD_CONF_OVERLAY, "DEVICE CONFIGURATION OVERLAY" },
2349 { ATA_CMD_SEC_SET_PASS, "SECURITY SET PASSWORD" }, 2355 { ATA_CMD_SEC_SET_PASS, "SECURITY SET PASSWORD" },
2350 { ATA_CMD_SEC_UNLOCK, "SECURITY UNLOCK" }, 2356 { ATA_CMD_SEC_UNLOCK, "SECURITY UNLOCK" },
@@ -2363,6 +2369,8 @@ const char *ata_get_cmd_descript(u8 command)
2363 { ATA_CMD_CFA_TRANS_SECT, "CFA TRANSLATE SECTOR" }, 2369 { ATA_CMD_CFA_TRANS_SECT, "CFA TRANSLATE SECTOR" },
2364 { ATA_CMD_CFA_ERASE, "CFA ERASE SECTORS" }, 2370 { ATA_CMD_CFA_ERASE, "CFA ERASE SECTORS" },
2365 { ATA_CMD_CFA_WRITE_MULT_NE, "CFA WRITE MULTIPLE WITHOUT ERASE" }, 2371 { ATA_CMD_CFA_WRITE_MULT_NE, "CFA WRITE MULTIPLE WITHOUT ERASE" },
2372 { ATA_CMD_REQ_SENSE_DATA, "REQUEST SENSE DATA EXT" },
2373 { ATA_CMD_SANITIZE_DEVICE, "SANITIZE DEVICE" },
2366 { ATA_CMD_READ_LONG, "READ LONG (with retries)" }, 2374 { ATA_CMD_READ_LONG, "READ LONG (with retries)" },
2367 { ATA_CMD_READ_LONG_ONCE, "READ LONG (without retries)" }, 2375 { ATA_CMD_READ_LONG_ONCE, "READ LONG (without retries)" },
2368 { ATA_CMD_WRITE_LONG, "WRITE LONG (with retries)" }, 2376 { ATA_CMD_WRITE_LONG, "WRITE LONG (with retries)" },
diff --git a/drivers/ata/libata-transport.c b/drivers/ata/libata-transport.c
index 150a917f0c3c..e37413228228 100644
--- a/drivers/ata/libata-transport.c
+++ b/drivers/ata/libata-transport.c
@@ -321,25 +321,25 @@ int ata_tport_add(struct device *parent,
321/* 321/*
322 * ATA link attributes 322 * ATA link attributes
323 */ 323 */
324static int noop(int x) { return x; }
324 325
325 326#define ata_link_show_linkspeed(field, format) \
326#define ata_link_show_linkspeed(field) \
327static ssize_t \ 327static ssize_t \
328show_ata_link_##field(struct device *dev, \ 328show_ata_link_##field(struct device *dev, \
329 struct device_attribute *attr, char *buf) \ 329 struct device_attribute *attr, char *buf) \
330{ \ 330{ \
331 struct ata_link *link = transport_class_to_link(dev); \ 331 struct ata_link *link = transport_class_to_link(dev); \
332 \ 332 \
333 return sprintf(buf,"%s\n", sata_spd_string(fls(link->field))); \ 333 return sprintf(buf, "%s\n", sata_spd_string(format(link->field))); \
334} 334}
335 335
336#define ata_link_linkspeed_attr(field) \ 336#define ata_link_linkspeed_attr(field, format) \
337 ata_link_show_linkspeed(field) \ 337 ata_link_show_linkspeed(field, format) \
338static DEVICE_ATTR(field, S_IRUGO, show_ata_link_##field, NULL) 338static DEVICE_ATTR(field, S_IRUGO, show_ata_link_##field, NULL)
339 339
340ata_link_linkspeed_attr(hw_sata_spd_limit); 340ata_link_linkspeed_attr(hw_sata_spd_limit, fls);
341ata_link_linkspeed_attr(sata_spd_limit); 341ata_link_linkspeed_attr(sata_spd_limit, fls);
342ata_link_linkspeed_attr(sata_spd); 342ata_link_linkspeed_attr(sata_spd, noop);
343 343
344 344
345static DECLARE_TRANSPORT_CLASS(ata_link_class, 345static DECLARE_TRANSPORT_CLASS(ata_link_class,
diff --git a/drivers/ata/sata_highbank.c b/drivers/ata/sata_highbank.c
index 7f5e5d96327f..ea3b3dc10f33 100644
--- a/drivers/ata/sata_highbank.c
+++ b/drivers/ata/sata_highbank.c
@@ -343,13 +343,11 @@ static int highbank_initialize_phys(struct device *dev, void __iomem *addr)
343{ 343{
344 struct device_node *sata_node = dev->of_node; 344 struct device_node *sata_node = dev->of_node;
345 int phy_count = 0, phy, port = 0, i; 345 int phy_count = 0, phy, port = 0, i;
346 void __iomem *cphy_base[CPHY_PHY_COUNT]; 346 void __iomem *cphy_base[CPHY_PHY_COUNT] = {};
347 struct device_node *phy_nodes[CPHY_PHY_COUNT]; 347 struct device_node *phy_nodes[CPHY_PHY_COUNT] = {};
348 u32 tx_atten[CPHY_PORT_COUNT]; 348 u32 tx_atten[CPHY_PORT_COUNT] = {};
349 349
350 memset(port_data, 0, sizeof(struct phy_lane_info) * CPHY_PORT_COUNT); 350 memset(port_data, 0, sizeof(struct phy_lane_info) * CPHY_PORT_COUNT);
351 memset(phy_nodes, 0, sizeof(struct device_node*) * CPHY_PHY_COUNT);
352 memset(tx_atten, 0xff, CPHY_PORT_COUNT);
353 351
354 do { 352 do {
355 u32 tmp; 353 u32 tmp;
diff --git a/drivers/ata/sata_rcar.c b/drivers/ata/sata_rcar.c
index c2d95e9fb971..1dae9a9009f7 100644
--- a/drivers/ata/sata_rcar.c
+++ b/drivers/ata/sata_rcar.c
@@ -792,7 +792,7 @@ static int sata_rcar_probe(struct platform_device *pdev)
792 dev_err(&pdev->dev, "failed to get access to sata clock\n"); 792 dev_err(&pdev->dev, "failed to get access to sata clock\n");
793 return PTR_ERR(priv->clk); 793 return PTR_ERR(priv->clk);
794 } 794 }
795 clk_enable(priv->clk); 795 clk_prepare_enable(priv->clk);
796 796
797 host = ata_host_alloc(&pdev->dev, 1); 797 host = ata_host_alloc(&pdev->dev, 1);
798 if (!host) { 798 if (!host) {
@@ -822,7 +822,7 @@ static int sata_rcar_probe(struct platform_device *pdev)
822 return 0; 822 return 0;
823 823
824cleanup: 824cleanup:
825 clk_disable(priv->clk); 825 clk_disable_unprepare(priv->clk);
826 826
827 return ret; 827 return ret;
828} 828}
@@ -841,7 +841,7 @@ static int sata_rcar_remove(struct platform_device *pdev)
841 iowrite32(0, base + SATAINTSTAT_REG); 841 iowrite32(0, base + SATAINTSTAT_REG);
842 iowrite32(0x7ff, base + SATAINTMASK_REG); 842 iowrite32(0x7ff, base + SATAINTMASK_REG);
843 843
844 clk_disable(priv->clk); 844 clk_disable_unprepare(priv->clk);
845 845
846 return 0; 846 return 0;
847} 847}
@@ -861,7 +861,7 @@ static int sata_rcar_suspend(struct device *dev)
861 /* mask */ 861 /* mask */
862 iowrite32(0x7ff, base + SATAINTMASK_REG); 862 iowrite32(0x7ff, base + SATAINTMASK_REG);
863 863
864 clk_disable(priv->clk); 864 clk_disable_unprepare(priv->clk);
865 } 865 }
866 866
867 return ret; 867 return ret;
@@ -873,7 +873,7 @@ static int sata_rcar_resume(struct device *dev)
873 struct sata_rcar_priv *priv = host->private_data; 873 struct sata_rcar_priv *priv = host->private_data;
874 void __iomem *base = priv->base; 874 void __iomem *base = priv->base;
875 875
876 clk_enable(priv->clk); 876 clk_prepare_enable(priv->clk);
877 877
878 /* ack and mask */ 878 /* ack and mask */
879 iowrite32(0, base + SATAINTSTAT_REG); 879 iowrite32(0, base + SATAINTSTAT_REG);
diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c
index 7413d065906b..0f3820121e02 100644
--- a/drivers/base/devtmpfs.c
+++ b/drivers/base/devtmpfs.c
@@ -216,7 +216,7 @@ static int handle_create(const char *nodename, umode_t mode, kuid_t uid,
216 newattrs.ia_gid = gid; 216 newattrs.ia_gid = gid;
217 newattrs.ia_valid = ATTR_MODE|ATTR_UID|ATTR_GID; 217 newattrs.ia_valid = ATTR_MODE|ATTR_UID|ATTR_GID;
218 mutex_lock(&dentry->d_inode->i_mutex); 218 mutex_lock(&dentry->d_inode->i_mutex);
219 notify_change(dentry, &newattrs); 219 notify_change(dentry, &newattrs, NULL);
220 mutex_unlock(&dentry->d_inode->i_mutex); 220 mutex_unlock(&dentry->d_inode->i_mutex);
221 221
222 /* mark as kernel-created inode */ 222 /* mark as kernel-created inode */
@@ -322,9 +322,9 @@ static int handle_remove(const char *nodename, struct device *dev)
322 newattrs.ia_valid = 322 newattrs.ia_valid =
323 ATTR_UID|ATTR_GID|ATTR_MODE; 323 ATTR_UID|ATTR_GID|ATTR_MODE;
324 mutex_lock(&dentry->d_inode->i_mutex); 324 mutex_lock(&dentry->d_inode->i_mutex);
325 notify_change(dentry, &newattrs); 325 notify_change(dentry, &newattrs, NULL);
326 mutex_unlock(&dentry->d_inode->i_mutex); 326 mutex_unlock(&dentry->d_inode->i_mutex);
327 err = vfs_unlink(parent.dentry->d_inode, dentry); 327 err = vfs_unlink(parent.dentry->d_inode, dentry, NULL);
328 if (!err || err == -ENOENT) 328 if (!err || err == -ENOENT)
329 deleted = 1; 329 deleted = 1;
330 } 330 }
diff --git a/drivers/char/misc.c b/drivers/char/misc.c
index 2f685f6eda48..ffa97d261cf3 100644
--- a/drivers/char/misc.c
+++ b/drivers/char/misc.c
@@ -114,7 +114,7 @@ static int misc_open(struct inode * inode, struct file * file)
114 int minor = iminor(inode); 114 int minor = iminor(inode);
115 struct miscdevice *c; 115 struct miscdevice *c;
116 int err = -ENODEV; 116 int err = -ENODEV;
117 const struct file_operations *old_fops, *new_fops = NULL; 117 const struct file_operations *new_fops = NULL;
118 118
119 mutex_lock(&misc_mtx); 119 mutex_lock(&misc_mtx);
120 120
@@ -141,17 +141,11 @@ static int misc_open(struct inode * inode, struct file * file)
141 } 141 }
142 142
143 err = 0; 143 err = 0;
144 old_fops = file->f_op; 144 replace_fops(file, new_fops);
145 file->f_op = new_fops;
146 if (file->f_op->open) { 145 if (file->f_op->open) {
147 file->private_data = c; 146 file->private_data = c;
148 err=file->f_op->open(inode,file); 147 err = file->f_op->open(inode,file);
149 if (err) {
150 fops_put(file->f_op);
151 file->f_op = fops_get(old_fops);
152 }
153 } 148 }
154 fops_put(old_fops);
155fail: 149fail:
156 mutex_unlock(&misc_mtx); 150 mutex_unlock(&misc_mtx);
157 return err; 151 return err;
diff --git a/drivers/gpu/drm/drm_fops.c b/drivers/gpu/drm/drm_fops.c
index 3f84277d7036..22d14ecbd3ec 100644
--- a/drivers/gpu/drm/drm_fops.c
+++ b/drivers/gpu/drm/drm_fops.c
@@ -148,7 +148,7 @@ int drm_stub_open(struct inode *inode, struct file *filp)
148 struct drm_minor *minor; 148 struct drm_minor *minor;
149 int minor_id = iminor(inode); 149 int minor_id = iminor(inode);
150 int err = -ENODEV; 150 int err = -ENODEV;
151 const struct file_operations *old_fops; 151 const struct file_operations *new_fops;
152 152
153 DRM_DEBUG("\n"); 153 DRM_DEBUG("\n");
154 154
@@ -163,18 +163,13 @@ int drm_stub_open(struct inode *inode, struct file *filp)
163 if (drm_device_is_unplugged(dev)) 163 if (drm_device_is_unplugged(dev))
164 goto out; 164 goto out;
165 165
166 old_fops = filp->f_op; 166 new_fops = fops_get(dev->driver->fops);
167 filp->f_op = fops_get(dev->driver->fops); 167 if (!new_fops)
168 if (filp->f_op == NULL) {
169 filp->f_op = old_fops;
170 goto out; 168 goto out;
171 }
172 if (filp->f_op->open && (err = filp->f_op->open(inode, filp))) {
173 fops_put(filp->f_op);
174 filp->f_op = fops_get(old_fops);
175 }
176 fops_put(old_fops);
177 169
170 replace_fops(filp, new_fops);
171 if (filp->f_op->open)
172 err = filp->f_op->open(inode, filp);
178out: 173out:
179 mutex_unlock(&drm_global_mutex); 174 mutex_unlock(&drm_global_mutex);
180 return err; 175 return err;
diff --git a/drivers/media/dvb-core/dmxdev.c b/drivers/media/dvb-core/dmxdev.c
index 0b4616b87195..c0363f1b6c90 100644
--- a/drivers/media/dvb-core/dmxdev.c
+++ b/drivers/media/dvb-core/dmxdev.c
@@ -206,8 +206,6 @@ static int dvb_dvr_release(struct inode *inode, struct file *file)
206 /* TODO */ 206 /* TODO */
207 dvbdev->users--; 207 dvbdev->users--;
208 if (dvbdev->users == 1 && dmxdev->exit == 1) { 208 if (dvbdev->users == 1 && dmxdev->exit == 1) {
209 fops_put(file->f_op);
210 file->f_op = NULL;
211 mutex_unlock(&dmxdev->mutex); 209 mutex_unlock(&dmxdev->mutex);
212 wake_up(&dvbdev->wait_queue); 210 wake_up(&dvbdev->wait_queue);
213 } else 211 } else
@@ -1120,8 +1118,6 @@ static int dvb_demux_release(struct inode *inode, struct file *file)
1120 mutex_lock(&dmxdev->mutex); 1118 mutex_lock(&dmxdev->mutex);
1121 dmxdev->dvbdev->users--; 1119 dmxdev->dvbdev->users--;
1122 if(dmxdev->dvbdev->users==1 && dmxdev->exit==1) { 1120 if(dmxdev->dvbdev->users==1 && dmxdev->exit==1) {
1123 fops_put(file->f_op);
1124 file->f_op = NULL;
1125 mutex_unlock(&dmxdev->mutex); 1121 mutex_unlock(&dmxdev->mutex);
1126 wake_up(&dmxdev->dvbdev->wait_queue); 1122 wake_up(&dmxdev->dvbdev->wait_queue);
1127 } else 1123 } else
diff --git a/drivers/media/dvb-core/dvbdev.c b/drivers/media/dvb-core/dvbdev.c
index 401ef64f92c6..983db75de350 100644
--- a/drivers/media/dvb-core/dvbdev.c
+++ b/drivers/media/dvb-core/dvbdev.c
@@ -74,22 +74,15 @@ static int dvb_device_open(struct inode *inode, struct file *file)
74 74
75 if (dvbdev && dvbdev->fops) { 75 if (dvbdev && dvbdev->fops) {
76 int err = 0; 76 int err = 0;
77 const struct file_operations *old_fops; 77 const struct file_operations *new_fops;
78 78
79 file->private_data = dvbdev; 79 new_fops = fops_get(dvbdev->fops);
80 old_fops = file->f_op; 80 if (!new_fops)
81 file->f_op = fops_get(dvbdev->fops);
82 if (file->f_op == NULL) {
83 file->f_op = old_fops;
84 goto fail; 81 goto fail;
85 } 82 file->private_data = dvbdev;
86 if(file->f_op->open) 83 replace_fops(file, new_fops);
84 if (file->f_op->open)
87 err = file->f_op->open(inode,file); 85 err = file->f_op->open(inode,file);
88 if (err) {
89 fops_put(file->f_op);
90 file->f_op = fops_get(old_fops);
91 }
92 fops_put(old_fops);
93 up_read(&minor_rwsem); 86 up_read(&minor_rwsem);
94 mutex_unlock(&dvbdev_mutex); 87 mutex_unlock(&dvbdev_mutex);
95 return err; 88 return err;
diff --git a/drivers/mtd/nand/nandsim.c b/drivers/mtd/nand/nandsim.c
index bdc1d15369f8..d1c7de92cfdf 100644
--- a/drivers/mtd/nand/nandsim.c
+++ b/drivers/mtd/nand/nandsim.c
@@ -575,7 +575,7 @@ static int alloc_device(struct nandsim *ns)
575 cfile = filp_open(cache_file, O_CREAT | O_RDWR | O_LARGEFILE, 0600); 575 cfile = filp_open(cache_file, O_CREAT | O_RDWR | O_LARGEFILE, 0600);
576 if (IS_ERR(cfile)) 576 if (IS_ERR(cfile))
577 return PTR_ERR(cfile); 577 return PTR_ERR(cfile);
578 if (!cfile->f_op || (!cfile->f_op->read && !cfile->f_op->aio_read)) { 578 if (!cfile->f_op->read && !cfile->f_op->aio_read) {
579 NS_ERR("alloc_device: cache file not readable\n"); 579 NS_ERR("alloc_device: cache file not readable\n");
580 err = -EINVAL; 580 err = -EINVAL;
581 goto err_close; 581 goto err_close;
diff --git a/drivers/mtd/ubi/attach.c b/drivers/mtd/ubi/attach.c
index c071d410488f..33bb1f2b63e4 100644
--- a/drivers/mtd/ubi/attach.c
+++ b/drivers/mtd/ubi/attach.c
@@ -900,10 +900,9 @@ static int scan_peb(struct ubi_device *ubi, struct ubi_attach_info *ai,
900 * number. 900 * number.
901 */ 901 */
902 image_seq = be32_to_cpu(ech->image_seq); 902 image_seq = be32_to_cpu(ech->image_seq);
903 if (!ubi->image_seq && image_seq) 903 if (!ubi->image_seq)
904 ubi->image_seq = image_seq; 904 ubi->image_seq = image_seq;
905 if (ubi->image_seq && image_seq && 905 if (image_seq && ubi->image_seq != image_seq) {
906 ubi->image_seq != image_seq) {
907 ubi_err("bad image sequence number %d in PEB %d, expected %d", 906 ubi_err("bad image sequence number %d in PEB %d, expected %d",
908 image_seq, pnum, ubi->image_seq); 907 image_seq, pnum, ubi->image_seq);
909 ubi_dump_ec_hdr(ech); 908 ubi_dump_ec_hdr(ech);
@@ -1417,9 +1416,11 @@ int ubi_attach(struct ubi_device *ubi, int force_scan)
1417 ai = alloc_ai("ubi_aeb_slab_cache2"); 1416 ai = alloc_ai("ubi_aeb_slab_cache2");
1418 if (!ai) 1417 if (!ai)
1419 return -ENOMEM; 1418 return -ENOMEM;
1420 }
1421 1419
1422 err = scan_all(ubi, ai, UBI_FM_MAX_START); 1420 err = scan_all(ubi, ai, 0);
1421 } else {
1422 err = scan_all(ubi, ai, UBI_FM_MAX_START);
1423 }
1423 } 1424 }
1424 } 1425 }
1425#else 1426#else
diff --git a/drivers/mtd/ubi/fastmap.c b/drivers/mtd/ubi/fastmap.c
index f5aa4b02cfa6..ead861307b3c 100644
--- a/drivers/mtd/ubi/fastmap.c
+++ b/drivers/mtd/ubi/fastmap.c
@@ -407,6 +407,7 @@ static int scan_pool(struct ubi_device *ubi, struct ubi_attach_info *ai,
407 */ 407 */
408 for (i = 0; i < pool_size; i++) { 408 for (i = 0; i < pool_size; i++) {
409 int scrub = 0; 409 int scrub = 0;
410 int image_seq;
410 411
411 pnum = be32_to_cpu(pebs[i]); 412 pnum = be32_to_cpu(pebs[i]);
412 413
@@ -425,10 +426,16 @@ static int scan_pool(struct ubi_device *ubi, struct ubi_attach_info *ai,
425 } else if (ret == UBI_IO_BITFLIPS) 426 } else if (ret == UBI_IO_BITFLIPS)
426 scrub = 1; 427 scrub = 1;
427 428
428 if (be32_to_cpu(ech->image_seq) != ubi->image_seq) { 429 /*
430 * Older UBI implementations have image_seq set to zero, so
431 * we shouldn't fail if image_seq == 0.
432 */
433 image_seq = be32_to_cpu(ech->image_seq);
434
435 if (image_seq && (image_seq != ubi->image_seq)) {
429 ubi_err("bad image seq: 0x%x, expected: 0x%x", 436 ubi_err("bad image seq: 0x%x, expected: 0x%x",
430 be32_to_cpu(ech->image_seq), ubi->image_seq); 437 be32_to_cpu(ech->image_seq), ubi->image_seq);
431 err = UBI_BAD_FASTMAP; 438 ret = UBI_BAD_FASTMAP;
432 goto out; 439 goto out;
433 } 440 }
434 441
@@ -819,6 +826,10 @@ static int ubi_attach_fastmap(struct ubi_device *ubi,
819 list_for_each_entry_safe(tmp_aeb, _tmp_aeb, &free, u.list) 826 list_for_each_entry_safe(tmp_aeb, _tmp_aeb, &free, u.list)
820 list_move_tail(&tmp_aeb->u.list, &ai->free); 827 list_move_tail(&tmp_aeb->u.list, &ai->free);
821 828
829 ubi_assert(list_empty(&used));
830 ubi_assert(list_empty(&eba_orphans));
831 ubi_assert(list_empty(&free));
832
822 /* 833 /*
823 * If fastmap is leaking PEBs (must not happen), raise a 834 * If fastmap is leaking PEBs (must not happen), raise a
824 * fat warning and fall back to scanning mode. 835 * fat warning and fall back to scanning mode.
@@ -834,6 +845,19 @@ static int ubi_attach_fastmap(struct ubi_device *ubi,
834fail_bad: 845fail_bad:
835 ret = UBI_BAD_FASTMAP; 846 ret = UBI_BAD_FASTMAP;
836fail: 847fail:
848 list_for_each_entry_safe(tmp_aeb, _tmp_aeb, &used, u.list) {
849 kmem_cache_free(ai->aeb_slab_cache, tmp_aeb);
850 list_del(&tmp_aeb->u.list);
851 }
852 list_for_each_entry_safe(tmp_aeb, _tmp_aeb, &eba_orphans, u.list) {
853 kmem_cache_free(ai->aeb_slab_cache, tmp_aeb);
854 list_del(&tmp_aeb->u.list);
855 }
856 list_for_each_entry_safe(tmp_aeb, _tmp_aeb, &free, u.list) {
857 kmem_cache_free(ai->aeb_slab_cache, tmp_aeb);
858 list_del(&tmp_aeb->u.list);
859 }
860
837 return ret; 861 return ret;
838} 862}
839 863
@@ -923,6 +947,8 @@ int ubi_scan_fastmap(struct ubi_device *ubi, struct ubi_attach_info *ai,
923 } 947 }
924 948
925 for (i = 0; i < used_blocks; i++) { 949 for (i = 0; i < used_blocks; i++) {
950 int image_seq;
951
926 pnum = be32_to_cpu(fmsb->block_loc[i]); 952 pnum = be32_to_cpu(fmsb->block_loc[i]);
927 953
928 if (ubi_io_is_bad(ubi, pnum)) { 954 if (ubi_io_is_bad(ubi, pnum)) {
@@ -940,10 +966,17 @@ int ubi_scan_fastmap(struct ubi_device *ubi, struct ubi_attach_info *ai,
940 } else if (ret == UBI_IO_BITFLIPS) 966 } else if (ret == UBI_IO_BITFLIPS)
941 fm->to_be_tortured[i] = 1; 967 fm->to_be_tortured[i] = 1;
942 968
969 image_seq = be32_to_cpu(ech->image_seq);
943 if (!ubi->image_seq) 970 if (!ubi->image_seq)
944 ubi->image_seq = be32_to_cpu(ech->image_seq); 971 ubi->image_seq = image_seq;
945 972
946 if (be32_to_cpu(ech->image_seq) != ubi->image_seq) { 973 /*
974 * Older UBI implementations have image_seq set to zero, so
975 * we shouldn't fail if image_seq == 0.
976 */
977 if (image_seq && (image_seq != ubi->image_seq)) {
978 ubi_err("wrong image seq:%d instead of %d",
979 be32_to_cpu(ech->image_seq), ubi->image_seq);
947 ret = UBI_BAD_FASTMAP; 980 ret = UBI_BAD_FASTMAP;
948 goto free_hdr; 981 goto free_hdr;
949 } 982 }
diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c
index c95bfb183c62..02317c1c0238 100644
--- a/drivers/mtd/ubi/wl.c
+++ b/drivers/mtd/ubi/wl.c
@@ -599,10 +599,6 @@ static void refill_wl_user_pool(struct ubi_device *ubi)
599 return_unused_pool_pebs(ubi, pool); 599 return_unused_pool_pebs(ubi, pool);
600 600
601 for (pool->size = 0; pool->size < pool->max_size; pool->size++) { 601 for (pool->size = 0; pool->size < pool->max_size; pool->size++) {
602 if (!ubi->free.rb_node ||
603 (ubi->free_count - ubi->beb_rsvd_pebs < 1))
604 break;
605
606 pool->pebs[pool->size] = __wl_get_peb(ubi); 602 pool->pebs[pool->size] = __wl_get_peb(ubi);
607 if (pool->pebs[pool->size] < 0) 603 if (pool->pebs[pool->size] < 0)
608 break; 604 break;
diff --git a/drivers/staging/comedi/comedi_compat32.c b/drivers/staging/comedi/comedi_compat32.c
index 2dfb06aedb15..1e9da405d833 100644
--- a/drivers/staging/comedi/comedi_compat32.c
+++ b/drivers/staging/comedi/comedi_compat32.c
@@ -86,9 +86,6 @@ struct comedi32_insnlist_struct {
86static int translated_ioctl(struct file *file, unsigned int cmd, 86static int translated_ioctl(struct file *file, unsigned int cmd,
87 unsigned long arg) 87 unsigned long arg)
88{ 88{
89 if (!file->f_op)
90 return -ENOTTY;
91
92 if (file->f_op->unlocked_ioctl) 89 if (file->f_op->unlocked_ioctl)
93 return file->f_op->unlocked_ioctl(file, cmd, arg); 90 return file->f_op->unlocked_ioctl(file, cmd, arg);
94 91
diff --git a/drivers/staging/lustre/lustre/include/linux/lustre_compat25.h b/drivers/staging/lustre/lustre/include/linux/lustre_compat25.h
index 359c6c1ba68d..eefdb8d061b1 100644
--- a/drivers/staging/lustre/lustre/include/linux/lustre_compat25.h
+++ b/drivers/staging/lustre/lustre/include/linux/lustre_compat25.h
@@ -105,8 +105,8 @@ static inline void ll_set_fs_pwd(struct fs_struct *fs, struct vfsmount *mnt,
105#define ll_vfs_unlink(inode,entry,mnt) vfs_unlink(inode,entry) 105#define ll_vfs_unlink(inode,entry,mnt) vfs_unlink(inode,entry)
106#define ll_vfs_mknod(dir,entry,mnt,mode,dev) vfs_mknod(dir,entry,mode,dev) 106#define ll_vfs_mknod(dir,entry,mnt,mode,dev) vfs_mknod(dir,entry,mode,dev)
107#define ll_security_inode_unlink(dir,entry,mnt) security_inode_unlink(dir,entry) 107#define ll_security_inode_unlink(dir,entry,mnt) security_inode_unlink(dir,entry)
108#define ll_vfs_rename(old,old_dir,mnt,new,new_dir,mnt1) \ 108#define ll_vfs_rename(old,old_dir,mnt,new,new_dir,mnt1,delegated_inode) \
109 vfs_rename(old,old_dir,new,new_dir) 109 vfs_rename(old,old_dir,new,new_dir,delegated_inode)
110 110
111#define cfs_bio_io_error(a,b) bio_io_error((a)) 111#define cfs_bio_io_error(a,b) bio_io_error((a))
112#define cfs_bio_endio(a,b,c) bio_endio((a),(c)) 112#define cfs_bio_endio(a,b,c) bio_endio((a),(c))
diff --git a/drivers/staging/lustre/lustre/llite/namei.c b/drivers/staging/lustre/lustre/llite/namei.c
index 34815b550e71..90bbdae824ac 100644
--- a/drivers/staging/lustre/lustre/llite/namei.c
+++ b/drivers/staging/lustre/lustre/llite/namei.c
@@ -83,7 +83,7 @@ int ll_unlock(__u32 mode, struct lustre_handle *lockh)
83} 83}
84 84
85 85
86/* called from iget5_locked->find_inode() under inode_lock spinlock */ 86/* called from iget5_locked->find_inode() under inode_hash_lock spinlock */
87static int ll_test_inode(struct inode *inode, void *opaque) 87static int ll_test_inode(struct inode *inode, void *opaque)
88{ 88{
89 struct ll_inode_info *lli = ll_i2info(inode); 89 struct ll_inode_info *lli = ll_i2info(inode);
diff --git a/drivers/staging/lustre/lustre/lvfs/lvfs_linux.c b/drivers/staging/lustre/lustre/lvfs/lvfs_linux.c
index 1ef06fea793b..09474e7553dd 100644
--- a/drivers/staging/lustre/lustre/lvfs/lvfs_linux.c
+++ b/drivers/staging/lustre/lustre/lvfs/lvfs_linux.c
@@ -224,7 +224,7 @@ int lustre_rename(struct dentry *dir, struct vfsmount *mnt,
224 GOTO(put_old, err = PTR_ERR(dchild_new)); 224 GOTO(put_old, err = PTR_ERR(dchild_new));
225 225
226 err = ll_vfs_rename(dir->d_inode, dchild_old, mnt, 226 err = ll_vfs_rename(dir->d_inode, dchild_old, mnt,
227 dir->d_inode, dchild_new, mnt); 227 dir->d_inode, dchild_new, mnt, NULL);
228 228
229 dput(dchild_new); 229 dput(dchild_new);
230put_old: 230put_old:
diff --git a/drivers/staging/rtl8188eu/include/osdep_service.h b/drivers/staging/rtl8188eu/include/osdep_service.h
index 44f24fa31a34..36523edf6a71 100644
--- a/drivers/staging/rtl8188eu/include/osdep_service.h
+++ b/drivers/staging/rtl8188eu/include/osdep_service.h
@@ -430,11 +430,6 @@ int ATOMIC_SUB_RETURN(ATOMIC_T *v, int i);
430int ATOMIC_INC_RETURN(ATOMIC_T *v); 430int ATOMIC_INC_RETURN(ATOMIC_T *v);
431int ATOMIC_DEC_RETURN(ATOMIC_T *v); 431int ATOMIC_DEC_RETURN(ATOMIC_T *v);
432 432
433/* File operation APIs, just for linux now */
434int rtw_is_file_readable(char *path);
435int rtw_retrive_from_file(char *path, u8 __user *buf, u32 sz);
436int rtw_store_to_file(char *path, u8 __user *buf, u32 sz);
437
438struct rtw_netdev_priv_indicator { 433struct rtw_netdev_priv_indicator {
439 void *priv; 434 void *priv;
440 u32 sizeof_priv; 435 u32 sizeof_priv;
diff --git a/drivers/staging/rtl8188eu/os_dep/osdep_service.c b/drivers/staging/rtl8188eu/os_dep/osdep_service.c
index 5a9e9e4558aa..a1ae72772c5f 100644
--- a/drivers/staging/rtl8188eu/os_dep/osdep_service.c
+++ b/drivers/staging/rtl8188eu/os_dep/osdep_service.c
@@ -356,214 +356,6 @@ inline int ATOMIC_DEC_RETURN(ATOMIC_T *v)
356 return atomic_dec_return(v); 356 return atomic_dec_return(v);
357} 357}
358 358
359/* Open a file with the specific @param path, @param flag, @param mode
360 * @param fpp the pointer of struct file pointer to get struct file pointer while file opening is success
361 * @param path the path of the file to open
362 * @param flag file operation flags, please refer to linux document
363 * @param mode please refer to linux document
364 * @return Linux specific error code
365 */
366static int openfile(struct file **fpp, char *path, int flag, int mode)
367{
368 struct file *fp;
369
370 fp = filp_open(path, flag, mode);
371 if (IS_ERR(fp)) {
372 *fpp = NULL;
373 return PTR_ERR(fp);
374 } else {
375 *fpp = fp;
376 return 0;
377 }
378}
379
380/* Close the file with the specific @param fp
381 * @param fp the pointer of struct file to close
382 * @return always 0
383 */
384static int closefile(struct file *fp)
385{
386 filp_close(fp, NULL);
387 return 0;
388}
389
390static int readfile(struct file *fp, char __user *buf, int len)
391{
392 int rlen = 0, sum = 0;
393
394 if (!fp->f_op || !fp->f_op->read)
395 return -EPERM;
396
397 while (sum < len) {
398 rlen = fp->f_op->read(fp, buf+sum, len-sum, &fp->f_pos);
399 if (rlen > 0)
400 sum += rlen;
401 else if (0 != rlen)
402 return rlen;
403 else
404 break;
405 }
406 return sum;
407}
408
409static int writefile(struct file *fp, char __user *buf, int len)
410{
411 int wlen = 0, sum = 0;
412
413 if (!fp->f_op || !fp->f_op->write)
414 return -EPERM;
415
416 while (sum < len) {
417 wlen = fp->f_op->write(fp, buf+sum, len-sum, &fp->f_pos);
418 if (wlen > 0)
419 sum += wlen;
420 else if (0 != wlen)
421 return wlen;
422 else
423 break;
424 }
425 return sum;
426}
427
428/* Test if the specifi @param path is a file and readable
429 * @param path the path of the file to test
430 * @return Linux specific error code
431 */
432static int isfilereadable(char *path)
433{
434 struct file *fp;
435 int ret = 0;
436 mm_segment_t oldfs;
437 char __user buf;
438
439 fp = filp_open(path, O_RDONLY, 0);
440 if (IS_ERR(fp)) {
441 ret = PTR_ERR(fp);
442 } else {
443 oldfs = get_fs(); set_fs(get_ds());
444
445 if (1 != readfile(fp, &buf, 1))
446 ret = PTR_ERR(fp);
447
448 set_fs(oldfs);
449 filp_close(fp, NULL);
450 }
451 return ret;
452}
453
454/* Open the file with @param path and retrive the file content into
455 * memory starting from @param buf for @param sz at most
456 * @param path the path of the file to open and read
457 * @param buf the starting address of the buffer to store file content
458 * @param sz how many bytes to read at most
459 * @return the byte we've read, or Linux specific error code
460 */
461static int retrievefromfile(char *path, u8 __user *buf, u32 sz)
462{
463 int ret = -1;
464 mm_segment_t oldfs;
465 struct file *fp;
466
467 if (path && buf) {
468 ret = openfile(&fp, path, O_RDONLY, 0);
469 if (0 == ret) {
470 DBG_88E("%s openfile path:%s fp =%p\n", __func__,
471 path, fp);
472
473 oldfs = get_fs(); set_fs(get_ds());
474 ret = readfile(fp, buf, sz);
475 set_fs(oldfs);
476 closefile(fp);
477
478 DBG_88E("%s readfile, ret:%d\n", __func__, ret);
479
480 } else {
481 DBG_88E("%s openfile path:%s Fail, ret:%d\n", __func__,
482 path, ret);
483 }
484 } else {
485 DBG_88E("%s NULL pointer\n", __func__);
486 ret = -EINVAL;
487 }
488 return ret;
489}
490
491/*
492* Open the file with @param path and wirte @param sz byte of data starting from @param buf into the file
493* @param path the path of the file to open and write
494* @param buf the starting address of the data to write into file
495* @param sz how many bytes to write at most
496* @return the byte we've written, or Linux specific error code
497*/
498static int storetofile(char *path, u8 __user *buf, u32 sz)
499{
500 int ret = 0;
501 mm_segment_t oldfs;
502 struct file *fp;
503
504 if (path && buf) {
505 ret = openfile(&fp, path, O_CREAT|O_WRONLY, 0666);
506 if (0 == ret) {
507 DBG_88E("%s openfile path:%s fp =%p\n", __func__, path, fp);
508
509 oldfs = get_fs(); set_fs(get_ds());
510 ret = writefile(fp, buf, sz);
511 set_fs(oldfs);
512 closefile(fp);
513
514 DBG_88E("%s writefile, ret:%d\n", __func__, ret);
515
516 } else {
517 DBG_88E("%s openfile path:%s Fail, ret:%d\n", __func__, path, ret);
518 }
519 } else {
520 DBG_88E("%s NULL pointer\n", __func__);
521 ret = -EINVAL;
522 }
523 return ret;
524}
525
526/*
527* Test if the specifi @param path is a file and readable
528* @param path the path of the file to test
529* @return true or false
530*/
531int rtw_is_file_readable(char *path)
532{
533 if (isfilereadable(path) == 0)
534 return true;
535 else
536 return false;
537}
538
539/*
540* Open the file with @param path and retrive the file content into memory starting from @param buf for @param sz at most
541* @param path the path of the file to open and read
542* @param buf the starting address of the buffer to store file content
543* @param sz how many bytes to read at most
544* @return the byte we've read
545*/
546int rtw_retrive_from_file(char *path, u8 __user *buf, u32 sz)
547{
548 int ret = retrievefromfile(path, buf, sz);
549
550 return ret >= 0 ? ret : 0;
551}
552
553/*
554 * Open the file with @param path and wirte @param sz byte of data
555 * starting from @param buf into the file
556 * @param path the path of the file to open and write
557 * @param buf the starting address of the data to write into file
558 * @param sz how many bytes to write at most
559 * @return the byte we've written
560 */
561int rtw_store_to_file(char *path, u8 __user *buf, u32 sz)
562{
563 int ret = storetofile(path, buf, sz);
564 return ret >= 0 ? ret : 0;
565}
566
567struct net_device *rtw_alloc_etherdev_with_old_priv(int sizeof_priv, 359struct net_device *rtw_alloc_etherdev_with_old_priv(int sizeof_priv,
568 void *old_priv) 360 void *old_priv)
569{ 361{
diff --git a/drivers/usb/core/file.c b/drivers/usb/core/file.c
index 3bdfbf88a0ae..ea337a718cc1 100644
--- a/drivers/usb/core/file.c
+++ b/drivers/usb/core/file.c
@@ -29,27 +29,19 @@ static DECLARE_RWSEM(minor_rwsem);
29 29
30static int usb_open(struct inode *inode, struct file *file) 30static int usb_open(struct inode *inode, struct file *file)
31{ 31{
32 int minor = iminor(inode);
33 const struct file_operations *c;
34 int err = -ENODEV; 32 int err = -ENODEV;
35 const struct file_operations *old_fops, *new_fops = NULL; 33 const struct file_operations *new_fops;
36 34
37 down_read(&minor_rwsem); 35 down_read(&minor_rwsem);
38 c = usb_minors[minor]; 36 new_fops = fops_get(usb_minors[iminor(inode)]);
39 37
40 if (!c || !(new_fops = fops_get(c))) 38 if (!new_fops)
41 goto done; 39 goto done;
42 40
43 old_fops = file->f_op; 41 replace_fops(file, new_fops);
44 file->f_op = new_fops;
45 /* Curiouser and curiouser... NULL ->open() as "no device" ? */ 42 /* Curiouser and curiouser... NULL ->open() as "no device" ? */
46 if (file->f_op->open) 43 if (file->f_op->open)
47 err = file->f_op->open(inode, file); 44 err = file->f_op->open(inode, file);
48 if (err) {
49 fops_put(file->f_op);
50 file->f_op = fops_get(old_fops);
51 }
52 fops_put(old_fops);
53 done: 45 done:
54 up_read(&minor_rwsem); 46 up_read(&minor_rwsem);
55 return err; 47 return err;
diff --git a/fs/9p/cache.h b/fs/9p/cache.h
index 40cc54ced5d9..2f9675491095 100644
--- a/fs/9p/cache.h
+++ b/fs/9p/cache.h
@@ -101,6 +101,18 @@ static inline void v9fs_fscache_wait_on_page_write(struct inode *inode,
101 101
102#else /* CONFIG_9P_FSCACHE */ 102#else /* CONFIG_9P_FSCACHE */
103 103
104static inline void v9fs_cache_inode_get_cookie(struct inode *inode)
105{
106}
107
108static inline void v9fs_cache_inode_put_cookie(struct inode *inode)
109{
110}
111
112static inline void v9fs_cache_inode_set_cookie(struct inode *inode, struct file *file)
113{
114}
115
104static inline int v9fs_fscache_release_page(struct page *page, 116static inline int v9fs_fscache_release_page(struct page *page,
105 gfp_t gfp) { 117 gfp_t gfp) {
106 return 1; 118 return 1;
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index aa5ecf479a57..a0df3e73c2b1 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -105,10 +105,8 @@ int v9fs_file_open(struct inode *inode, struct file *file)
105 v9inode->writeback_fid = (void *) fid; 105 v9inode->writeback_fid = (void *) fid;
106 } 106 }
107 mutex_unlock(&v9inode->v_mutex); 107 mutex_unlock(&v9inode->v_mutex);
108#ifdef CONFIG_9P_FSCACHE
109 if (v9ses->cache) 108 if (v9ses->cache)
110 v9fs_cache_inode_set_cookie(inode, file); 109 v9fs_cache_inode_set_cookie(inode, file);
111#endif
112 return 0; 110 return 0;
113out_error: 111out_error:
114 p9_client_clunk(file->private_data); 112 p9_client_clunk(file->private_data);
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 94de6d1482e2..4e65aa903345 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -448,9 +448,7 @@ void v9fs_evict_inode(struct inode *inode)
448 clear_inode(inode); 448 clear_inode(inode);
449 filemap_fdatawrite(inode->i_mapping); 449 filemap_fdatawrite(inode->i_mapping);
450 450
451#ifdef CONFIG_9P_FSCACHE
452 v9fs_cache_inode_put_cookie(inode); 451 v9fs_cache_inode_put_cookie(inode);
453#endif
454 /* clunk the fid stashed in writeback_fid */ 452 /* clunk the fid stashed in writeback_fid */
455 if (v9inode->writeback_fid) { 453 if (v9inode->writeback_fid) {
456 p9_client_clunk(v9inode->writeback_fid); 454 p9_client_clunk(v9inode->writeback_fid);
@@ -531,9 +529,7 @@ static struct inode *v9fs_qid_iget(struct super_block *sb,
531 goto error; 529 goto error;
532 530
533 v9fs_stat2inode(st, inode, sb); 531 v9fs_stat2inode(st, inode, sb);
534#ifdef CONFIG_9P_FSCACHE
535 v9fs_cache_inode_get_cookie(inode); 532 v9fs_cache_inode_get_cookie(inode);
536#endif
537 unlock_new_inode(inode); 533 unlock_new_inode(inode);
538 return inode; 534 return inode;
539error: 535error:
@@ -905,10 +901,8 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
905 goto error; 901 goto error;
906 902
907 file->private_data = fid; 903 file->private_data = fid;
908#ifdef CONFIG_9P_FSCACHE
909 if (v9ses->cache) 904 if (v9ses->cache)
910 v9fs_cache_inode_set_cookie(dentry->d_inode, file); 905 v9fs_cache_inode_set_cookie(dentry->d_inode, file);
911#endif
912 906
913 *opened |= FILE_CREATED; 907 *opened |= FILE_CREATED;
914out: 908out:
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index a7c481402c46..4c10edec26a0 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -141,9 +141,7 @@ static struct inode *v9fs_qid_iget_dotl(struct super_block *sb,
141 goto error; 141 goto error;
142 142
143 v9fs_stat2inode_dotl(st, inode); 143 v9fs_stat2inode_dotl(st, inode);
144#ifdef CONFIG_9P_FSCACHE
145 v9fs_cache_inode_get_cookie(inode); 144 v9fs_cache_inode_get_cookie(inode);
146#endif
147 retval = v9fs_get_acl(inode, fid); 145 retval = v9fs_get_acl(inode, fid);
148 if (retval) 146 if (retval)
149 goto error; 147 goto error;
@@ -355,10 +353,8 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
355 if (err) 353 if (err)
356 goto err_clunk_old_fid; 354 goto err_clunk_old_fid;
357 file->private_data = ofid; 355 file->private_data = ofid;
358#ifdef CONFIG_9P_FSCACHE
359 if (v9ses->cache) 356 if (v9ses->cache)
360 v9fs_cache_inode_set_cookie(inode, file); 357 v9fs_cache_inode_set_cookie(inode, file);
361#endif
362 *opened |= FILE_CREATED; 358 *opened |= FILE_CREATED;
363out: 359out:
364 v9fs_put_acl(dacl, pacl); 360 v9fs_put_acl(dacl, pacl);
diff --git a/fs/adfs/adfs.h b/fs/adfs/adfs.h
index 585adafb0cc2..c770337c4b45 100644
--- a/fs/adfs/adfs.h
+++ b/fs/adfs/adfs.h
@@ -43,9 +43,12 @@ struct adfs_dir_ops;
43 * ADFS file system superblock data in memory 43 * ADFS file system superblock data in memory
44 */ 44 */
45struct adfs_sb_info { 45struct adfs_sb_info {
46 struct adfs_discmap *s_map; /* bh list containing map */ 46 union { struct {
47 struct adfs_dir_ops *s_dir; /* directory operations */ 47 struct adfs_discmap *s_map; /* bh list containing map */
48 48 struct adfs_dir_ops *s_dir; /* directory operations */
49 };
50 struct rcu_head rcu; /* used only at shutdown time */
51 };
49 kuid_t s_uid; /* owner uid */ 52 kuid_t s_uid; /* owner uid */
50 kgid_t s_gid; /* owner gid */ 53 kgid_t s_gid; /* owner gid */
51 umode_t s_owner_mask; /* ADFS owner perm -> unix perm */ 54 umode_t s_owner_mask; /* ADFS owner perm -> unix perm */
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index 0ff4bae2c2a2..7b3003cb6f1b 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -123,8 +123,7 @@ static void adfs_put_super(struct super_block *sb)
123 for (i = 0; i < asb->s_map_size; i++) 123 for (i = 0; i < asb->s_map_size; i++)
124 brelse(asb->s_map[i].dm_bh); 124 brelse(asb->s_map[i].dm_bh);
125 kfree(asb->s_map); 125 kfree(asb->s_map);
126 kfree(asb); 126 kfree_rcu(asb, rcu);
127 sb->s_fs_info = NULL;
128} 127}
129 128
130static int adfs_show_options(struct seq_file *seq, struct dentry *root) 129static int adfs_show_options(struct seq_file *seq, struct dentry *root)
diff --git a/fs/aio.c b/fs/aio.c
index 067e3d340c35..1f602d9be4c5 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -36,10 +36,10 @@
36#include <linux/eventfd.h> 36#include <linux/eventfd.h>
37#include <linux/blkdev.h> 37#include <linux/blkdev.h>
38#include <linux/compat.h> 38#include <linux/compat.h>
39#include <linux/anon_inodes.h>
40#include <linux/migrate.h> 39#include <linux/migrate.h>
41#include <linux/ramfs.h> 40#include <linux/ramfs.h>
42#include <linux/percpu-refcount.h> 41#include <linux/percpu-refcount.h>
42#include <linux/mount.h>
43 43
44#include <asm/kmap_types.h> 44#include <asm/kmap_types.h>
45#include <asm/uaccess.h> 45#include <asm/uaccess.h>
@@ -152,12 +152,67 @@ unsigned long aio_max_nr = 0x10000; /* system wide maximum number of aio request
152static struct kmem_cache *kiocb_cachep; 152static struct kmem_cache *kiocb_cachep;
153static struct kmem_cache *kioctx_cachep; 153static struct kmem_cache *kioctx_cachep;
154 154
155static struct vfsmount *aio_mnt;
156
157static const struct file_operations aio_ring_fops;
158static const struct address_space_operations aio_ctx_aops;
159
160static struct file *aio_private_file(struct kioctx *ctx, loff_t nr_pages)
161{
162 struct qstr this = QSTR_INIT("[aio]", 5);
163 struct file *file;
164 struct path path;
165 struct inode *inode = alloc_anon_inode(aio_mnt->mnt_sb);
166 if (!inode)
167 return ERR_PTR(-ENOMEM);
168
169 inode->i_mapping->a_ops = &aio_ctx_aops;
170 inode->i_mapping->private_data = ctx;
171 inode->i_size = PAGE_SIZE * nr_pages;
172
173 path.dentry = d_alloc_pseudo(aio_mnt->mnt_sb, &this);
174 if (!path.dentry) {
175 iput(inode);
176 return ERR_PTR(-ENOMEM);
177 }
178 path.mnt = mntget(aio_mnt);
179
180 d_instantiate(path.dentry, inode);
181 file = alloc_file(&path, FMODE_READ | FMODE_WRITE, &aio_ring_fops);
182 if (IS_ERR(file)) {
183 path_put(&path);
184 return file;
185 }
186
187 file->f_flags = O_RDWR;
188 file->private_data = ctx;
189 return file;
190}
191
192static struct dentry *aio_mount(struct file_system_type *fs_type,
193 int flags, const char *dev_name, void *data)
194{
195 static const struct dentry_operations ops = {
196 .d_dname = simple_dname,
197 };
198 return mount_pseudo(fs_type, "aio:", NULL, &ops, 0xa10a10a1);
199}
200
155/* aio_setup 201/* aio_setup
156 * Creates the slab caches used by the aio routines, panic on 202 * Creates the slab caches used by the aio routines, panic on
157 * failure as this is done early during the boot sequence. 203 * failure as this is done early during the boot sequence.
158 */ 204 */
159static int __init aio_setup(void) 205static int __init aio_setup(void)
160{ 206{
207 static struct file_system_type aio_fs = {
208 .name = "aio",
209 .mount = aio_mount,
210 .kill_sb = kill_anon_super,
211 };
212 aio_mnt = kern_mount(&aio_fs);
213 if (IS_ERR(aio_mnt))
214 panic("Failed to create aio fs mount.");
215
161 kiocb_cachep = KMEM_CACHE(kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC); 216 kiocb_cachep = KMEM_CACHE(kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC);
162 kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC); 217 kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC);
163 218
@@ -283,16 +338,12 @@ static int aio_setup_ring(struct kioctx *ctx)
283 if (nr_pages < 0) 338 if (nr_pages < 0)
284 return -EINVAL; 339 return -EINVAL;
285 340
286 file = anon_inode_getfile_private("[aio]", &aio_ring_fops, ctx, O_RDWR); 341 file = aio_private_file(ctx, nr_pages);
287 if (IS_ERR(file)) { 342 if (IS_ERR(file)) {
288 ctx->aio_ring_file = NULL; 343 ctx->aio_ring_file = NULL;
289 return -EAGAIN; 344 return -EAGAIN;
290 } 345 }
291 346
292 file->f_inode->i_mapping->a_ops = &aio_ctx_aops;
293 file->f_inode->i_mapping->private_data = ctx;
294 file->f_inode->i_size = PAGE_SIZE * (loff_t)nr_pages;
295
296 for (i = 0; i < nr_pages; i++) { 347 for (i = 0; i < nr_pages; i++) {
297 struct page *page; 348 struct page *page;
298 page = find_or_create_page(file->f_inode->i_mapping, 349 page = find_or_create_page(file->f_inode->i_mapping,
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 85c961849953..24084732b1d0 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -24,7 +24,6 @@
24 24
25static struct vfsmount *anon_inode_mnt __read_mostly; 25static struct vfsmount *anon_inode_mnt __read_mostly;
26static struct inode *anon_inode_inode; 26static struct inode *anon_inode_inode;
27static const struct file_operations anon_inode_fops;
28 27
29/* 28/*
30 * anon_inodefs_dname() is called from d_path(). 29 * anon_inodefs_dname() is called from d_path().
@@ -39,51 +38,6 @@ static const struct dentry_operations anon_inodefs_dentry_operations = {
39 .d_dname = anon_inodefs_dname, 38 .d_dname = anon_inodefs_dname,
40}; 39};
41 40
42/*
43 * nop .set_page_dirty method so that people can use .page_mkwrite on
44 * anon inodes.
45 */
46static int anon_set_page_dirty(struct page *page)
47{
48 return 0;
49};
50
51static const struct address_space_operations anon_aops = {
52 .set_page_dirty = anon_set_page_dirty,
53};
54
55/*
56 * A single inode exists for all anon_inode files. Contrary to pipes,
57 * anon_inode inodes have no associated per-instance data, so we need
58 * only allocate one of them.
59 */
60static struct inode *anon_inode_mkinode(struct super_block *s)
61{
62 struct inode *inode = new_inode_pseudo(s);
63
64 if (!inode)
65 return ERR_PTR(-ENOMEM);
66
67 inode->i_ino = get_next_ino();
68 inode->i_fop = &anon_inode_fops;
69
70 inode->i_mapping->a_ops = &anon_aops;
71
72 /*
73 * Mark the inode dirty from the very beginning,
74 * that way it will never be moved to the dirty
75 * list because mark_inode_dirty() will think
76 * that it already _is_ on the dirty list.
77 */
78 inode->i_state = I_DIRTY;
79 inode->i_mode = S_IRUSR | S_IWUSR;
80 inode->i_uid = current_fsuid();
81 inode->i_gid = current_fsgid();
82 inode->i_flags |= S_PRIVATE;
83 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
84 return inode;
85}
86
87static struct dentry *anon_inodefs_mount(struct file_system_type *fs_type, 41static struct dentry *anon_inodefs_mount(struct file_system_type *fs_type,
88 int flags, const char *dev_name, void *data) 42 int flags, const char *dev_name, void *data)
89{ 43{
@@ -92,7 +46,7 @@ static struct dentry *anon_inodefs_mount(struct file_system_type *fs_type,
92 &anon_inodefs_dentry_operations, ANON_INODE_FS_MAGIC); 46 &anon_inodefs_dentry_operations, ANON_INODE_FS_MAGIC);
93 if (!IS_ERR(root)) { 47 if (!IS_ERR(root)) {
94 struct super_block *s = root->d_sb; 48 struct super_block *s = root->d_sb;
95 anon_inode_inode = anon_inode_mkinode(s); 49 anon_inode_inode = alloc_anon_inode(s);
96 if (IS_ERR(anon_inode_inode)) { 50 if (IS_ERR(anon_inode_inode)) {
97 dput(root); 51 dput(root);
98 deactivate_locked_super(s); 52 deactivate_locked_super(s);
@@ -109,72 +63,6 @@ static struct file_system_type anon_inode_fs_type = {
109}; 63};
110 64
111/** 65/**
112 * anon_inode_getfile_private - creates a new file instance by hooking it up to an
113 * anonymous inode, and a dentry that describe the "class"
114 * of the file
115 *
116 * @name: [in] name of the "class" of the new file
117 * @fops: [in] file operations for the new file
118 * @priv: [in] private data for the new file (will be file's private_data)
119 * @flags: [in] flags
120 *
121 *
122 * Similar to anon_inode_getfile, but each file holds a single inode.
123 *
124 */
125struct file *anon_inode_getfile_private(const char *name,
126 const struct file_operations *fops,
127 void *priv, int flags)
128{
129 struct qstr this;
130 struct path path;
131 struct file *file;
132 struct inode *inode;
133
134 if (fops->owner && !try_module_get(fops->owner))
135 return ERR_PTR(-ENOENT);
136
137 inode = anon_inode_mkinode(anon_inode_mnt->mnt_sb);
138 if (IS_ERR(inode)) {
139 file = ERR_PTR(-ENOMEM);
140 goto err_module;
141 }
142
143 /*
144 * Link the inode to a directory entry by creating a unique name
145 * using the inode sequence number.
146 */
147 file = ERR_PTR(-ENOMEM);
148 this.name = name;
149 this.len = strlen(name);
150 this.hash = 0;
151 path.dentry = d_alloc_pseudo(anon_inode_mnt->mnt_sb, &this);
152 if (!path.dentry)
153 goto err_module;
154
155 path.mnt = mntget(anon_inode_mnt);
156
157 d_instantiate(path.dentry, inode);
158
159 file = alloc_file(&path, OPEN_FMODE(flags), fops);
160 if (IS_ERR(file))
161 goto err_dput;
162
163 file->f_mapping = inode->i_mapping;
164 file->f_flags = flags & (O_ACCMODE | O_NONBLOCK);
165 file->private_data = priv;
166
167 return file;
168
169err_dput:
170 path_put(&path);
171err_module:
172 module_put(fops->owner);
173 return file;
174}
175EXPORT_SYMBOL_GPL(anon_inode_getfile_private);
176
177/**
178 * anon_inode_getfile - creates a new file instance by hooking it up to an 66 * anon_inode_getfile - creates a new file instance by hooking it up to an
179 * anonymous inode, and a dentry that describe the "class" 67 * anonymous inode, and a dentry that describe the "class"
180 * of the file 68 * of the file
diff --git a/fs/attr.c b/fs/attr.c
index 1449adb14ef6..267968d94673 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -167,7 +167,27 @@ void setattr_copy(struct inode *inode, const struct iattr *attr)
167} 167}
168EXPORT_SYMBOL(setattr_copy); 168EXPORT_SYMBOL(setattr_copy);
169 169
170int notify_change(struct dentry * dentry, struct iattr * attr) 170/**
171 * notify_change - modify attributes of a filesytem object
172 * @dentry: object affected
173 * @iattr: new attributes
174 * @delegated_inode: returns inode, if the inode is delegated
175 *
176 * The caller must hold the i_mutex on the affected object.
177 *
178 * If notify_change discovers a delegation in need of breaking,
179 * it will return -EWOULDBLOCK and return a reference to the inode in
180 * delegated_inode. The caller should then break the delegation and
181 * retry. Because breaking a delegation may take a long time, the
182 * caller should drop the i_mutex before doing so.
183 *
184 * Alternatively, a caller may pass NULL for delegated_inode. This may
185 * be appropriate for callers that expect the underlying filesystem not
186 * to be NFS exported. Also, passing NULL is fine for callers holding
187 * the file open for write, as there can be no conflicting delegation in
188 * that case.
189 */
190int notify_change(struct dentry * dentry, struct iattr * attr, struct inode **delegated_inode)
171{ 191{
172 struct inode *inode = dentry->d_inode; 192 struct inode *inode = dentry->d_inode;
173 umode_t mode = inode->i_mode; 193 umode_t mode = inode->i_mode;
@@ -243,6 +263,9 @@ int notify_change(struct dentry * dentry, struct iattr * attr)
243 error = security_inode_setattr(dentry, attr); 263 error = security_inode_setattr(dentry, attr);
244 if (error) 264 if (error)
245 return error; 265 return error;
266 error = try_break_deleg(inode, delegated_inode);
267 if (error)
268 return error;
246 269
247 if (inode->i_op->setattr) 270 if (inode->i_op->setattr)
248 error = inode->i_op->setattr(dentry, attr); 271 error = inode->i_op->setattr(dentry, attr);
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index 3f1128b37e46..4218e26df916 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -122,6 +122,7 @@ struct autofs_sb_info {
122 spinlock_t lookup_lock; 122 spinlock_t lookup_lock;
123 struct list_head active_list; 123 struct list_head active_list;
124 struct list_head expiring_list; 124 struct list_head expiring_list;
125 struct rcu_head rcu;
125}; 126};
126 127
127static inline struct autofs_sb_info *autofs4_sbi(struct super_block *sb) 128static inline struct autofs_sb_info *autofs4_sbi(struct super_block *sb)
@@ -271,7 +272,7 @@ void autofs4_clean_ino(struct autofs_info *);
271 272
272static inline int autofs_prepare_pipe(struct file *pipe) 273static inline int autofs_prepare_pipe(struct file *pipe)
273{ 274{
274 if (!pipe->f_op || !pipe->f_op->write) 275 if (!pipe->f_op->write)
275 return -EINVAL; 276 return -EINVAL;
276 if (!S_ISFIFO(file_inode(pipe)->i_mode)) 277 if (!S_ISFIFO(file_inode(pipe)->i_mode))
277 return -EINVAL; 278 return -EINVAL;
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index 0f00da329e71..1818ce7f5a06 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -658,12 +658,6 @@ static int _autofs_dev_ioctl(unsigned int command, struct autofs_dev_ioctl __use
658 goto out; 658 goto out;
659 } 659 }
660 660
661 if (!fp->f_op) {
662 err = -ENOTTY;
663 fput(fp);
664 goto out;
665 }
666
667 sbi = autofs_dev_ioctl_sbi(fp); 661 sbi = autofs_dev_ioctl_sbi(fp);
668 if (!sbi || sbi->magic != AUTOFS_SBI_MAGIC) { 662 if (!sbi || sbi->magic != AUTOFS_SBI_MAGIC) {
669 err = -EINVAL; 663 err = -EINVAL;
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index b104726e2d0a..3b9cc9b973c2 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -56,18 +56,13 @@ void autofs4_kill_sb(struct super_block *sb)
56 * just call kill_anon_super when we are called from 56 * just call kill_anon_super when we are called from
57 * deactivate_super. 57 * deactivate_super.
58 */ 58 */
59 if (!sbi) 59 if (sbi) /* Free wait queues, close pipe */
60 goto out_kill_sb; 60 autofs4_catatonic_mode(sbi);
61
62 /* Free wait queues, close pipe */
63 autofs4_catatonic_mode(sbi);
64
65 sb->s_fs_info = NULL;
66 kfree(sbi);
67 61
68out_kill_sb:
69 DPRINTK("shutting down"); 62 DPRINTK("shutting down");
70 kill_litter_super(sb); 63 kill_litter_super(sb);
64 if (sbi)
65 kfree_rcu(sbi, rcu);
71} 66}
72 67
73static int autofs4_show_options(struct seq_file *m, struct dentry *root) 68static int autofs4_show_options(struct seq_file *m, struct dentry *root)
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index e9c75e20db32..daa15d6ba450 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -42,7 +42,7 @@ static void befs_destroy_inode(struct inode *inode);
42static int befs_init_inodecache(void); 42static int befs_init_inodecache(void);
43static void befs_destroy_inodecache(void); 43static void befs_destroy_inodecache(void);
44static void *befs_follow_link(struct dentry *, struct nameidata *); 44static void *befs_follow_link(struct dentry *, struct nameidata *);
45static void befs_put_link(struct dentry *, struct nameidata *, void *); 45static void *befs_fast_follow_link(struct dentry *, struct nameidata *);
46static int befs_utf2nls(struct super_block *sb, const char *in, int in_len, 46static int befs_utf2nls(struct super_block *sb, const char *in, int in_len,
47 char **out, int *out_len); 47 char **out, int *out_len);
48static int befs_nls2utf(struct super_block *sb, const char *in, int in_len, 48static int befs_nls2utf(struct super_block *sb, const char *in, int in_len,
@@ -79,10 +79,15 @@ static const struct address_space_operations befs_aops = {
79 .bmap = befs_bmap, 79 .bmap = befs_bmap,
80}; 80};
81 81
82static const struct inode_operations befs_fast_symlink_inode_operations = {
83 .readlink = generic_readlink,
84 .follow_link = befs_fast_follow_link,
85};
86
82static const struct inode_operations befs_symlink_inode_operations = { 87static const struct inode_operations befs_symlink_inode_operations = {
83 .readlink = generic_readlink, 88 .readlink = generic_readlink,
84 .follow_link = befs_follow_link, 89 .follow_link = befs_follow_link,
85 .put_link = befs_put_link, 90 .put_link = kfree_put_link,
86}; 91};
87 92
88/* 93/*
@@ -411,7 +416,10 @@ static struct inode *befs_iget(struct super_block *sb, unsigned long ino)
411 inode->i_op = &befs_dir_inode_operations; 416 inode->i_op = &befs_dir_inode_operations;
412 inode->i_fop = &befs_dir_operations; 417 inode->i_fop = &befs_dir_operations;
413 } else if (S_ISLNK(inode->i_mode)) { 418 } else if (S_ISLNK(inode->i_mode)) {
414 inode->i_op = &befs_symlink_inode_operations; 419 if (befs_ino->i_flags & BEFS_LONG_SYMLINK)
420 inode->i_op = &befs_symlink_inode_operations;
421 else
422 inode->i_op = &befs_fast_symlink_inode_operations;
415 } else { 423 } else {
416 befs_error(sb, "Inode %lu is not a regular file, " 424 befs_error(sb, "Inode %lu is not a regular file, "
417 "directory or symlink. THAT IS WRONG! BeFS has no " 425 "directory or symlink. THAT IS WRONG! BeFS has no "
@@ -477,47 +485,40 @@ befs_destroy_inodecache(void)
477static void * 485static void *
478befs_follow_link(struct dentry *dentry, struct nameidata *nd) 486befs_follow_link(struct dentry *dentry, struct nameidata *nd)
479{ 487{
488 struct super_block *sb = dentry->d_sb;
480 befs_inode_info *befs_ino = BEFS_I(dentry->d_inode); 489 befs_inode_info *befs_ino = BEFS_I(dentry->d_inode);
490 befs_data_stream *data = &befs_ino->i_data.ds;
491 befs_off_t len = data->size;
481 char *link; 492 char *link;
482 493
483 if (befs_ino->i_flags & BEFS_LONG_SYMLINK) { 494 if (len == 0) {
484 struct super_block *sb = dentry->d_sb; 495 befs_error(sb, "Long symlink with illegal length");
485 befs_data_stream *data = &befs_ino->i_data.ds; 496 link = ERR_PTR(-EIO);
486 befs_off_t len = data->size; 497 } else {
498 befs_debug(sb, "Follow long symlink");
487 499
488 if (len == 0) { 500 link = kmalloc(len, GFP_NOFS);
489 befs_error(sb, "Long symlink with illegal length"); 501 if (!link) {
502 link = ERR_PTR(-ENOMEM);
503 } else if (befs_read_lsymlink(sb, data, link, len) != len) {
504 kfree(link);
505 befs_error(sb, "Failed to read entire long symlink");
490 link = ERR_PTR(-EIO); 506 link = ERR_PTR(-EIO);
491 } else { 507 } else {
492 befs_debug(sb, "Follow long symlink"); 508 link[len - 1] = '\0';
493
494 link = kmalloc(len, GFP_NOFS);
495 if (!link) {
496 link = ERR_PTR(-ENOMEM);
497 } else if (befs_read_lsymlink(sb, data, link, len) != len) {
498 kfree(link);
499 befs_error(sb, "Failed to read entire long symlink");
500 link = ERR_PTR(-EIO);
501 } else {
502 link[len - 1] = '\0';
503 }
504 } 509 }
505 } else {
506 link = befs_ino->i_data.symlink;
507 } 510 }
508
509 nd_set_link(nd, link); 511 nd_set_link(nd, link);
510 return NULL; 512 return NULL;
511} 513}
512 514
513static void befs_put_link(struct dentry *dentry, struct nameidata *nd, void *p) 515
516static void *
517befs_fast_follow_link(struct dentry *dentry, struct nameidata *nd)
514{ 518{
515 befs_inode_info *befs_ino = BEFS_I(dentry->d_inode); 519 befs_inode_info *befs_ino = BEFS_I(dentry->d_inode);
516 if (befs_ino->i_flags & BEFS_LONG_SYMLINK) { 520 nd_set_link(nd, befs_ino->i_data.symlink);
517 char *link = nd_get_link(nd); 521 return NULL;
518 if (!IS_ERR(link))
519 kfree(link);
520 }
521} 522}
522 523
523/* 524/*
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index 89dec7f789a4..ca0ba15a7306 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -45,7 +45,6 @@ static int load_aout_library(struct file*);
45 */ 45 */
46static int aout_core_dump(struct coredump_params *cprm) 46static int aout_core_dump(struct coredump_params *cprm)
47{ 47{
48 struct file *file = cprm->file;
49 mm_segment_t fs; 48 mm_segment_t fs;
50 int has_dumped = 0; 49 int has_dumped = 0;
51 void __user *dump_start; 50 void __user *dump_start;
@@ -85,10 +84,10 @@ static int aout_core_dump(struct coredump_params *cprm)
85 84
86 set_fs(KERNEL_DS); 85 set_fs(KERNEL_DS);
87/* struct user */ 86/* struct user */
88 if (!dump_write(file, &dump, sizeof(dump))) 87 if (!dump_emit(cprm, &dump, sizeof(dump)))
89 goto end_coredump; 88 goto end_coredump;
90/* Now dump all of the user data. Include malloced stuff as well */ 89/* Now dump all of the user data. Include malloced stuff as well */
91 if (!dump_seek(cprm->file, PAGE_SIZE - sizeof(dump))) 90 if (!dump_skip(cprm, PAGE_SIZE - sizeof(dump)))
92 goto end_coredump; 91 goto end_coredump;
93/* now we start writing out the user space info */ 92/* now we start writing out the user space info */
94 set_fs(USER_DS); 93 set_fs(USER_DS);
@@ -96,14 +95,14 @@ static int aout_core_dump(struct coredump_params *cprm)
96 if (dump.u_dsize != 0) { 95 if (dump.u_dsize != 0) {
97 dump_start = START_DATA(dump); 96 dump_start = START_DATA(dump);
98 dump_size = dump.u_dsize << PAGE_SHIFT; 97 dump_size = dump.u_dsize << PAGE_SHIFT;
99 if (!dump_write(file, dump_start, dump_size)) 98 if (!dump_emit(cprm, dump_start, dump_size))
100 goto end_coredump; 99 goto end_coredump;
101 } 100 }
102/* Now prepare to dump the stack area */ 101/* Now prepare to dump the stack area */
103 if (dump.u_ssize != 0) { 102 if (dump.u_ssize != 0) {
104 dump_start = START_STACK(dump); 103 dump_start = START_STACK(dump);
105 dump_size = dump.u_ssize << PAGE_SHIFT; 104 dump_size = dump.u_ssize << PAGE_SHIFT;
106 if (!dump_write(file, dump_start, dump_size)) 105 if (!dump_emit(cprm, dump_start, dump_size))
107 goto end_coredump; 106 goto end_coredump;
108 } 107 }
109end_coredump: 108end_coredump:
@@ -221,7 +220,7 @@ static int load_aout_binary(struct linux_binprm * bprm)
221 * Requires a mmap handler. This prevents people from using a.out 220 * Requires a mmap handler. This prevents people from using a.out
222 * as part of an exploit attack against /proc-related vulnerabilities. 221 * as part of an exploit attack against /proc-related vulnerabilities.
223 */ 222 */
224 if (!bprm->file->f_op || !bprm->file->f_op->mmap) 223 if (!bprm->file->f_op->mmap)
225 return -ENOEXEC; 224 return -ENOEXEC;
226 225
227 fd_offset = N_TXTOFF(ex); 226 fd_offset = N_TXTOFF(ex);
@@ -374,7 +373,7 @@ static int load_aout_library(struct file *file)
374 * Requires a mmap handler. This prevents people from using a.out 373 * Requires a mmap handler. This prevents people from using a.out
375 * as part of an exploit attack against /proc-related vulnerabilities. 374 * as part of an exploit attack against /proc-related vulnerabilities.
376 */ 375 */
377 if (!file->f_op || !file->f_op->mmap) 376 if (!file->f_op->mmap)
378 goto out; 377 goto out;
379 378
380 if (N_FLAGS(ex)) 379 if (N_FLAGS(ex))
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 4c94a79991bb..571a42326908 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -406,7 +406,7 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
406 goto out; 406 goto out;
407 if (!elf_check_arch(interp_elf_ex)) 407 if (!elf_check_arch(interp_elf_ex))
408 goto out; 408 goto out;
409 if (!interpreter->f_op || !interpreter->f_op->mmap) 409 if (!interpreter->f_op->mmap)
410 goto out; 410 goto out;
411 411
412 /* 412 /*
@@ -607,7 +607,7 @@ static int load_elf_binary(struct linux_binprm *bprm)
607 goto out; 607 goto out;
608 if (!elf_check_arch(&loc->elf_ex)) 608 if (!elf_check_arch(&loc->elf_ex))
609 goto out; 609 goto out;
610 if (!bprm->file->f_op || !bprm->file->f_op->mmap) 610 if (!bprm->file->f_op->mmap)
611 goto out; 611 goto out;
612 612
613 /* Now read in all of the header information */ 613 /* Now read in all of the header information */
@@ -1028,7 +1028,7 @@ static int load_elf_library(struct file *file)
1028 1028
1029 /* First of all, some simple consistency checks */ 1029 /* First of all, some simple consistency checks */
1030 if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 || 1030 if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1031 !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap) 1031 !elf_check_arch(&elf_ex) || !file->f_op->mmap)
1032 goto out; 1032 goto out;
1033 1033
1034 /* Now read in all of the header information */ 1034 /* Now read in all of the header information */
@@ -1225,35 +1225,17 @@ static int notesize(struct memelfnote *en)
1225 return sz; 1225 return sz;
1226} 1226}
1227 1227
1228#define DUMP_WRITE(addr, nr, foffset) \ 1228static int writenote(struct memelfnote *men, struct coredump_params *cprm)
1229 do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
1230
1231static int alignfile(struct file *file, loff_t *foffset)
1232{
1233 static const char buf[4] = { 0, };
1234 DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
1235 return 1;
1236}
1237
1238static int writenote(struct memelfnote *men, struct file *file,
1239 loff_t *foffset)
1240{ 1229{
1241 struct elf_note en; 1230 struct elf_note en;
1242 en.n_namesz = strlen(men->name) + 1; 1231 en.n_namesz = strlen(men->name) + 1;
1243 en.n_descsz = men->datasz; 1232 en.n_descsz = men->datasz;
1244 en.n_type = men->type; 1233 en.n_type = men->type;
1245 1234
1246 DUMP_WRITE(&en, sizeof(en), foffset); 1235 return dump_emit(cprm, &en, sizeof(en)) &&
1247 DUMP_WRITE(men->name, en.n_namesz, foffset); 1236 dump_emit(cprm, men->name, en.n_namesz) && dump_align(cprm, 4) &&
1248 if (!alignfile(file, foffset)) 1237 dump_emit(cprm, men->data, men->datasz) && dump_align(cprm, 4);
1249 return 0;
1250 DUMP_WRITE(men->data, men->datasz, foffset);
1251 if (!alignfile(file, foffset))
1252 return 0;
1253
1254 return 1;
1255} 1238}
1256#undef DUMP_WRITE
1257 1239
1258static void fill_elf_header(struct elfhdr *elf, int segs, 1240static void fill_elf_header(struct elfhdr *elf, int segs,
1259 u16 machine, u32 flags) 1241 u16 machine, u32 flags)
@@ -1392,7 +1374,7 @@ static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1392} 1374}
1393 1375
1394static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata, 1376static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
1395 siginfo_t *siginfo) 1377 const siginfo_t *siginfo)
1396{ 1378{
1397 mm_segment_t old_fs = get_fs(); 1379 mm_segment_t old_fs = get_fs();
1398 set_fs(KERNEL_DS); 1380 set_fs(KERNEL_DS);
@@ -1599,7 +1581,7 @@ static int fill_thread_core_info(struct elf_thread_core_info *t,
1599 1581
1600static int fill_note_info(struct elfhdr *elf, int phdrs, 1582static int fill_note_info(struct elfhdr *elf, int phdrs,
1601 struct elf_note_info *info, 1583 struct elf_note_info *info,
1602 siginfo_t *siginfo, struct pt_regs *regs) 1584 const siginfo_t *siginfo, struct pt_regs *regs)
1603{ 1585{
1604 struct task_struct *dump_task = current; 1586 struct task_struct *dump_task = current;
1605 const struct user_regset_view *view = task_user_regset_view(dump_task); 1587 const struct user_regset_view *view = task_user_regset_view(dump_task);
@@ -1702,7 +1684,7 @@ static size_t get_note_info_size(struct elf_note_info *info)
1702 * process-wide notes are interleaved after the first thread-specific note. 1684 * process-wide notes are interleaved after the first thread-specific note.
1703 */ 1685 */
1704static int write_note_info(struct elf_note_info *info, 1686static int write_note_info(struct elf_note_info *info,
1705 struct file *file, loff_t *foffset) 1687 struct coredump_params *cprm)
1706{ 1688{
1707 bool first = 1; 1689 bool first = 1;
1708 struct elf_thread_core_info *t = info->thread; 1690 struct elf_thread_core_info *t = info->thread;
@@ -1710,22 +1692,22 @@ static int write_note_info(struct elf_note_info *info,
1710 do { 1692 do {
1711 int i; 1693 int i;
1712 1694
1713 if (!writenote(&t->notes[0], file, foffset)) 1695 if (!writenote(&t->notes[0], cprm))
1714 return 0; 1696 return 0;
1715 1697
1716 if (first && !writenote(&info->psinfo, file, foffset)) 1698 if (first && !writenote(&info->psinfo, cprm))
1717 return 0; 1699 return 0;
1718 if (first && !writenote(&info->signote, file, foffset)) 1700 if (first && !writenote(&info->signote, cprm))
1719 return 0; 1701 return 0;
1720 if (first && !writenote(&info->auxv, file, foffset)) 1702 if (first && !writenote(&info->auxv, cprm))
1721 return 0; 1703 return 0;
1722 if (first && info->files.data && 1704 if (first && info->files.data &&
1723 !writenote(&info->files, file, foffset)) 1705 !writenote(&info->files, cprm))
1724 return 0; 1706 return 0;
1725 1707
1726 for (i = 1; i < info->thread_notes; ++i) 1708 for (i = 1; i < info->thread_notes; ++i)
1727 if (t->notes[i].data && 1709 if (t->notes[i].data &&
1728 !writenote(&t->notes[i], file, foffset)) 1710 !writenote(&t->notes[i], cprm))
1729 return 0; 1711 return 0;
1730 1712
1731 first = 0; 1713 first = 0;
@@ -1848,34 +1830,31 @@ static int elf_note_info_init(struct elf_note_info *info)
1848 1830
1849static int fill_note_info(struct elfhdr *elf, int phdrs, 1831static int fill_note_info(struct elfhdr *elf, int phdrs,
1850 struct elf_note_info *info, 1832 struct elf_note_info *info,
1851 siginfo_t *siginfo, struct pt_regs *regs) 1833 const siginfo_t *siginfo, struct pt_regs *regs)
1852{ 1834{
1853 struct list_head *t; 1835 struct list_head *t;
1836 struct core_thread *ct;
1837 struct elf_thread_status *ets;
1854 1838
1855 if (!elf_note_info_init(info)) 1839 if (!elf_note_info_init(info))
1856 return 0; 1840 return 0;
1857 1841
1858 if (siginfo->si_signo) { 1842 for (ct = current->mm->core_state->dumper.next;
1859 struct core_thread *ct; 1843 ct; ct = ct->next) {
1860 struct elf_thread_status *ets; 1844 ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1861 1845 if (!ets)
1862 for (ct = current->mm->core_state->dumper.next; 1846 return 0;
1863 ct; ct = ct->next) {
1864 ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1865 if (!ets)
1866 return 0;
1867 1847
1868 ets->thread = ct->task; 1848 ets->thread = ct->task;
1869 list_add(&ets->list, &info->thread_list); 1849 list_add(&ets->list, &info->thread_list);
1870 } 1850 }
1871 1851
1872 list_for_each(t, &info->thread_list) { 1852 list_for_each(t, &info->thread_list) {
1873 int sz; 1853 int sz;
1874 1854
1875 ets = list_entry(t, struct elf_thread_status, list); 1855 ets = list_entry(t, struct elf_thread_status, list);
1876 sz = elf_dump_thread_status(siginfo->si_signo, ets); 1856 sz = elf_dump_thread_status(siginfo->si_signo, ets);
1877 info->thread_status_size += sz; 1857 info->thread_status_size += sz;
1878 }
1879 } 1858 }
1880 /* now collect the dump for the current */ 1859 /* now collect the dump for the current */
1881 memset(info->prstatus, 0, sizeof(*info->prstatus)); 1860 memset(info->prstatus, 0, sizeof(*info->prstatus));
@@ -1935,13 +1914,13 @@ static size_t get_note_info_size(struct elf_note_info *info)
1935} 1914}
1936 1915
1937static int write_note_info(struct elf_note_info *info, 1916static int write_note_info(struct elf_note_info *info,
1938 struct file *file, loff_t *foffset) 1917 struct coredump_params *cprm)
1939{ 1918{
1940 int i; 1919 int i;
1941 struct list_head *t; 1920 struct list_head *t;
1942 1921
1943 for (i = 0; i < info->numnote; i++) 1922 for (i = 0; i < info->numnote; i++)
1944 if (!writenote(info->notes + i, file, foffset)) 1923 if (!writenote(info->notes + i, cprm))
1945 return 0; 1924 return 0;
1946 1925
1947 /* write out the thread status notes section */ 1926 /* write out the thread status notes section */
@@ -1950,7 +1929,7 @@ static int write_note_info(struct elf_note_info *info,
1950 list_entry(t, struct elf_thread_status, list); 1929 list_entry(t, struct elf_thread_status, list);
1951 1930
1952 for (i = 0; i < tmp->num_notes; i++) 1931 for (i = 0; i < tmp->num_notes; i++)
1953 if (!writenote(&tmp->notes[i], file, foffset)) 1932 if (!writenote(&tmp->notes[i], cprm))
1954 return 0; 1933 return 0;
1955 } 1934 }
1956 1935
@@ -2046,10 +2025,9 @@ static int elf_core_dump(struct coredump_params *cprm)
2046 int has_dumped = 0; 2025 int has_dumped = 0;
2047 mm_segment_t fs; 2026 mm_segment_t fs;
2048 int segs; 2027 int segs;
2049 size_t size = 0;
2050 struct vm_area_struct *vma, *gate_vma; 2028 struct vm_area_struct *vma, *gate_vma;
2051 struct elfhdr *elf = NULL; 2029 struct elfhdr *elf = NULL;
2052 loff_t offset = 0, dataoff, foffset; 2030 loff_t offset = 0, dataoff;
2053 struct elf_note_info info = { }; 2031 struct elf_note_info info = { };
2054 struct elf_phdr *phdr4note = NULL; 2032 struct elf_phdr *phdr4note = NULL;
2055 struct elf_shdr *shdr4extnum = NULL; 2033 struct elf_shdr *shdr4extnum = NULL;
@@ -2105,7 +2083,6 @@ static int elf_core_dump(struct coredump_params *cprm)
2105 2083
2106 offset += sizeof(*elf); /* Elf header */ 2084 offset += sizeof(*elf); /* Elf header */
2107 offset += segs * sizeof(struct elf_phdr); /* Program headers */ 2085 offset += segs * sizeof(struct elf_phdr); /* Program headers */
2108 foffset = offset;
2109 2086
2110 /* Write notes phdr entry */ 2087 /* Write notes phdr entry */
2111 { 2088 {
@@ -2136,13 +2113,10 @@ static int elf_core_dump(struct coredump_params *cprm)
2136 2113
2137 offset = dataoff; 2114 offset = dataoff;
2138 2115
2139 size += sizeof(*elf); 2116 if (!dump_emit(cprm, elf, sizeof(*elf)))
2140 if (size > cprm->limit || !dump_write(cprm->file, elf, sizeof(*elf)))
2141 goto end_coredump; 2117 goto end_coredump;
2142 2118
2143 size += sizeof(*phdr4note); 2119 if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note)))
2144 if (size > cprm->limit
2145 || !dump_write(cprm->file, phdr4note, sizeof(*phdr4note)))
2146 goto end_coredump; 2120 goto end_coredump;
2147 2121
2148 /* Write program headers for segments dump */ 2122 /* Write program headers for segments dump */
@@ -2164,24 +2138,22 @@ static int elf_core_dump(struct coredump_params *cprm)
2164 phdr.p_flags |= PF_X; 2138 phdr.p_flags |= PF_X;
2165 phdr.p_align = ELF_EXEC_PAGESIZE; 2139 phdr.p_align = ELF_EXEC_PAGESIZE;
2166 2140
2167 size += sizeof(phdr); 2141 if (!dump_emit(cprm, &phdr, sizeof(phdr)))
2168 if (size > cprm->limit
2169 || !dump_write(cprm->file, &phdr, sizeof(phdr)))
2170 goto end_coredump; 2142 goto end_coredump;
2171 } 2143 }
2172 2144
2173 if (!elf_core_write_extra_phdrs(cprm->file, offset, &size, cprm->limit)) 2145 if (!elf_core_write_extra_phdrs(cprm, offset))
2174 goto end_coredump; 2146 goto end_coredump;
2175 2147
2176 /* write out the notes section */ 2148 /* write out the notes section */
2177 if (!write_note_info(&info, cprm->file, &foffset)) 2149 if (!write_note_info(&info, cprm))
2178 goto end_coredump; 2150 goto end_coredump;
2179 2151
2180 if (elf_coredump_extra_notes_write(cprm->file, &foffset)) 2152 if (elf_coredump_extra_notes_write(cprm))
2181 goto end_coredump; 2153 goto end_coredump;
2182 2154
2183 /* Align to page */ 2155 /* Align to page */
2184 if (!dump_seek(cprm->file, dataoff - foffset)) 2156 if (!dump_skip(cprm, dataoff - cprm->written))
2185 goto end_coredump; 2157 goto end_coredump;
2186 2158
2187 for (vma = first_vma(current, gate_vma); vma != NULL; 2159 for (vma = first_vma(current, gate_vma); vma != NULL;
@@ -2198,26 +2170,21 @@ static int elf_core_dump(struct coredump_params *cprm)
2198 page = get_dump_page(addr); 2170 page = get_dump_page(addr);
2199 if (page) { 2171 if (page) {
2200 void *kaddr = kmap(page); 2172 void *kaddr = kmap(page);
2201 stop = ((size += PAGE_SIZE) > cprm->limit) || 2173 stop = !dump_emit(cprm, kaddr, PAGE_SIZE);
2202 !dump_write(cprm->file, kaddr,
2203 PAGE_SIZE);
2204 kunmap(page); 2174 kunmap(page);
2205 page_cache_release(page); 2175 page_cache_release(page);
2206 } else 2176 } else
2207 stop = !dump_seek(cprm->file, PAGE_SIZE); 2177 stop = !dump_skip(cprm, PAGE_SIZE);
2208 if (stop) 2178 if (stop)
2209 goto end_coredump; 2179 goto end_coredump;
2210 } 2180 }
2211 } 2181 }
2212 2182
2213 if (!elf_core_write_extra_data(cprm->file, &size, cprm->limit)) 2183 if (!elf_core_write_extra_data(cprm))
2214 goto end_coredump; 2184 goto end_coredump;
2215 2185
2216 if (e_phnum == PN_XNUM) { 2186 if (e_phnum == PN_XNUM) {
2217 size += sizeof(*shdr4extnum); 2187 if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum)))
2218 if (size > cprm->limit
2219 || !dump_write(cprm->file, shdr4extnum,
2220 sizeof(*shdr4extnum)))
2221 goto end_coredump; 2188 goto end_coredump;
2222 } 2189 }
2223 2190
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index c166f325a183..fe2a643ee005 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -111,7 +111,7 @@ static int is_elf_fdpic(struct elfhdr *hdr, struct file *file)
111 return 0; 111 return 0;
112 if (!elf_check_arch(hdr) || !elf_check_fdpic(hdr)) 112 if (!elf_check_arch(hdr) || !elf_check_fdpic(hdr))
113 return 0; 113 return 0;
114 if (!file->f_op || !file->f_op->mmap) 114 if (!file->f_op->mmap)
115 return 0; 115 return 0;
116 return 1; 116 return 1;
117} 117}
@@ -1267,35 +1267,17 @@ static int notesize(struct memelfnote *en)
1267 1267
1268/* #define DEBUG */ 1268/* #define DEBUG */
1269 1269
1270#define DUMP_WRITE(addr, nr, foffset) \ 1270static int writenote(struct memelfnote *men, struct coredump_params *cprm)
1271 do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
1272
1273static int alignfile(struct file *file, loff_t *foffset)
1274{
1275 static const char buf[4] = { 0, };
1276 DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
1277 return 1;
1278}
1279
1280static int writenote(struct memelfnote *men, struct file *file,
1281 loff_t *foffset)
1282{ 1271{
1283 struct elf_note en; 1272 struct elf_note en;
1284 en.n_namesz = strlen(men->name) + 1; 1273 en.n_namesz = strlen(men->name) + 1;
1285 en.n_descsz = men->datasz; 1274 en.n_descsz = men->datasz;
1286 en.n_type = men->type; 1275 en.n_type = men->type;
1287 1276
1288 DUMP_WRITE(&en, sizeof(en), foffset); 1277 return dump_emit(cprm, &en, sizeof(en)) &&
1289 DUMP_WRITE(men->name, en.n_namesz, foffset); 1278 dump_emit(cprm, men->name, en.n_namesz) && dump_align(cprm, 4) &&
1290 if (!alignfile(file, foffset)) 1279 dump_emit(cprm, men->data, men->datasz) && dump_align(cprm, 4);
1291 return 0;
1292 DUMP_WRITE(men->data, men->datasz, foffset);
1293 if (!alignfile(file, foffset))
1294 return 0;
1295
1296 return 1;
1297} 1280}
1298#undef DUMP_WRITE
1299 1281
1300static inline void fill_elf_fdpic_header(struct elfhdr *elf, int segs) 1282static inline void fill_elf_fdpic_header(struct elfhdr *elf, int segs)
1301{ 1283{
@@ -1500,66 +1482,40 @@ static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
1500/* 1482/*
1501 * dump the segments for an MMU process 1483 * dump the segments for an MMU process
1502 */ 1484 */
1503#ifdef CONFIG_MMU 1485static bool elf_fdpic_dump_segments(struct coredump_params *cprm)
1504static int elf_fdpic_dump_segments(struct file *file, size_t *size,
1505 unsigned long *limit, unsigned long mm_flags)
1506{ 1486{
1507 struct vm_area_struct *vma; 1487 struct vm_area_struct *vma;
1508 int err = 0;
1509 1488
1510 for (vma = current->mm->mmap; vma; vma = vma->vm_next) { 1489 for (vma = current->mm->mmap; vma; vma = vma->vm_next) {
1511 unsigned long addr; 1490 unsigned long addr;
1512 1491
1513 if (!maydump(vma, mm_flags)) 1492 if (!maydump(vma, cprm->mm_flags))
1514 continue; 1493 continue;
1515 1494
1495#ifdef CONFIG_MMU
1516 for (addr = vma->vm_start; addr < vma->vm_end; 1496 for (addr = vma->vm_start; addr < vma->vm_end;
1517 addr += PAGE_SIZE) { 1497 addr += PAGE_SIZE) {
1498 bool res;
1518 struct page *page = get_dump_page(addr); 1499 struct page *page = get_dump_page(addr);
1519 if (page) { 1500 if (page) {
1520 void *kaddr = kmap(page); 1501 void *kaddr = kmap(page);
1521 *size += PAGE_SIZE; 1502 res = dump_emit(cprm, kaddr, PAGE_SIZE);
1522 if (*size > *limit)
1523 err = -EFBIG;
1524 else if (!dump_write(file, kaddr, PAGE_SIZE))
1525 err = -EIO;
1526 kunmap(page); 1503 kunmap(page);
1527 page_cache_release(page); 1504 page_cache_release(page);
1528 } else if (!dump_seek(file, PAGE_SIZE)) 1505 } else {
1529 err = -EFBIG; 1506 res = dump_skip(cprm, PAGE_SIZE);
1530 if (err) 1507 }
1531 goto out; 1508 if (!res)
1509 return false;
1532 } 1510 }
1533 } 1511#else
1534out: 1512 if (!dump_emit(cprm, (void *) vma->vm_start,
1535 return err;
1536}
1537#endif
1538
1539/*
1540 * dump the segments for a NOMMU process
1541 */
1542#ifndef CONFIG_MMU
1543static int elf_fdpic_dump_segments(struct file *file, size_t *size,
1544 unsigned long *limit, unsigned long mm_flags)
1545{
1546 struct vm_area_struct *vma;
1547
1548 for (vma = current->mm->mmap; vma; vma = vma->vm_next) {
1549 if (!maydump(vma, mm_flags))
1550 continue;
1551
1552 if ((*size += PAGE_SIZE) > *limit)
1553 return -EFBIG;
1554
1555 if (!dump_write(file, (void *) vma->vm_start,
1556 vma->vm_end - vma->vm_start)) 1513 vma->vm_end - vma->vm_start))
1557 return -EIO; 1514 return false;
1515#endif
1558 } 1516 }
1559 1517 return true;
1560 return 0;
1561} 1518}
1562#endif
1563 1519
1564static size_t elf_core_vma_data_size(unsigned long mm_flags) 1520static size_t elf_core_vma_data_size(unsigned long mm_flags)
1565{ 1521{
@@ -1585,11 +1541,10 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
1585 int has_dumped = 0; 1541 int has_dumped = 0;
1586 mm_segment_t fs; 1542 mm_segment_t fs;
1587 int segs; 1543 int segs;
1588 size_t size = 0;
1589 int i; 1544 int i;
1590 struct vm_area_struct *vma; 1545 struct vm_area_struct *vma;
1591 struct elfhdr *elf = NULL; 1546 struct elfhdr *elf = NULL;
1592 loff_t offset = 0, dataoff, foffset; 1547 loff_t offset = 0, dataoff;
1593 int numnote; 1548 int numnote;
1594 struct memelfnote *notes = NULL; 1549 struct memelfnote *notes = NULL;
1595 struct elf_prstatus *prstatus = NULL; /* NT_PRSTATUS */ 1550 struct elf_prstatus *prstatus = NULL; /* NT_PRSTATUS */
@@ -1606,6 +1561,8 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
1606 struct elf_shdr *shdr4extnum = NULL; 1561 struct elf_shdr *shdr4extnum = NULL;
1607 Elf_Half e_phnum; 1562 Elf_Half e_phnum;
1608 elf_addr_t e_shoff; 1563 elf_addr_t e_shoff;
1564 struct core_thread *ct;
1565 struct elf_thread_status *tmp;
1609 1566
1610 /* 1567 /*
1611 * We no longer stop all VM operations. 1568 * We no longer stop all VM operations.
@@ -1641,28 +1598,23 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
1641 goto cleanup; 1598 goto cleanup;
1642#endif 1599#endif
1643 1600
1644 if (cprm->siginfo->si_signo) { 1601 for (ct = current->mm->core_state->dumper.next;
1645 struct core_thread *ct; 1602 ct; ct = ct->next) {
1646 struct elf_thread_status *tmp; 1603 tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
1647 1604 if (!tmp)
1648 for (ct = current->mm->core_state->dumper.next; 1605 goto cleanup;
1649 ct; ct = ct->next) {
1650 tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
1651 if (!tmp)
1652 goto cleanup;
1653 1606
1654 tmp->thread = ct->task; 1607 tmp->thread = ct->task;
1655 list_add(&tmp->list, &thread_list); 1608 list_add(&tmp->list, &thread_list);
1656 } 1609 }
1657 1610
1658 list_for_each(t, &thread_list) { 1611 list_for_each(t, &thread_list) {
1659 struct elf_thread_status *tmp; 1612 struct elf_thread_status *tmp;
1660 int sz; 1613 int sz;
1661 1614
1662 tmp = list_entry(t, struct elf_thread_status, list); 1615 tmp = list_entry(t, struct elf_thread_status, list);
1663 sz = elf_dump_thread_status(cprm->siginfo->si_signo, tmp); 1616 sz = elf_dump_thread_status(cprm->siginfo->si_signo, tmp);
1664 thread_status_size += sz; 1617 thread_status_size += sz;
1665 }
1666 } 1618 }
1667 1619
1668 /* now collect the dump for the current */ 1620 /* now collect the dump for the current */
@@ -1720,7 +1672,6 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
1720 1672
1721 offset += sizeof(*elf); /* Elf header */ 1673 offset += sizeof(*elf); /* Elf header */
1722 offset += segs * sizeof(struct elf_phdr); /* Program headers */ 1674 offset += segs * sizeof(struct elf_phdr); /* Program headers */
1723 foffset = offset;
1724 1675
1725 /* Write notes phdr entry */ 1676 /* Write notes phdr entry */
1726 { 1677 {
@@ -1755,13 +1706,10 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
1755 1706
1756 offset = dataoff; 1707 offset = dataoff;
1757 1708
1758 size += sizeof(*elf); 1709 if (!dump_emit(cprm, elf, sizeof(*elf)))
1759 if (size > cprm->limit || !dump_write(cprm->file, elf, sizeof(*elf)))
1760 goto end_coredump; 1710 goto end_coredump;
1761 1711
1762 size += sizeof(*phdr4note); 1712 if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note)))
1763 if (size > cprm->limit
1764 || !dump_write(cprm->file, phdr4note, sizeof(*phdr4note)))
1765 goto end_coredump; 1713 goto end_coredump;
1766 1714
1767 /* write program headers for segments dump */ 1715 /* write program headers for segments dump */
@@ -1785,18 +1733,16 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
1785 phdr.p_flags |= PF_X; 1733 phdr.p_flags |= PF_X;
1786 phdr.p_align = ELF_EXEC_PAGESIZE; 1734 phdr.p_align = ELF_EXEC_PAGESIZE;
1787 1735
1788 size += sizeof(phdr); 1736 if (!dump_emit(cprm, &phdr, sizeof(phdr)))
1789 if (size > cprm->limit
1790 || !dump_write(cprm->file, &phdr, sizeof(phdr)))
1791 goto end_coredump; 1737 goto end_coredump;
1792 } 1738 }
1793 1739
1794 if (!elf_core_write_extra_phdrs(cprm->file, offset, &size, cprm->limit)) 1740 if (!elf_core_write_extra_phdrs(cprm, offset))
1795 goto end_coredump; 1741 goto end_coredump;
1796 1742
1797 /* write out the notes section */ 1743 /* write out the notes section */
1798 for (i = 0; i < numnote; i++) 1744 for (i = 0; i < numnote; i++)
1799 if (!writenote(notes + i, cprm->file, &foffset)) 1745 if (!writenote(notes + i, cprm))
1800 goto end_coredump; 1746 goto end_coredump;
1801 1747
1802 /* write out the thread status notes section */ 1748 /* write out the thread status notes section */
@@ -1805,25 +1751,21 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
1805 list_entry(t, struct elf_thread_status, list); 1751 list_entry(t, struct elf_thread_status, list);
1806 1752
1807 for (i = 0; i < tmp->num_notes; i++) 1753 for (i = 0; i < tmp->num_notes; i++)
1808 if (!writenote(&tmp->notes[i], cprm->file, &foffset)) 1754 if (!writenote(&tmp->notes[i], cprm))
1809 goto end_coredump; 1755 goto end_coredump;
1810 } 1756 }
1811 1757
1812 if (!dump_seek(cprm->file, dataoff - foffset)) 1758 if (!dump_skip(cprm, dataoff - cprm->written))
1813 goto end_coredump; 1759 goto end_coredump;
1814 1760
1815 if (elf_fdpic_dump_segments(cprm->file, &size, &cprm->limit, 1761 if (!elf_fdpic_dump_segments(cprm))
1816 cprm->mm_flags) < 0)
1817 goto end_coredump; 1762 goto end_coredump;
1818 1763
1819 if (!elf_core_write_extra_data(cprm->file, &size, cprm->limit)) 1764 if (!elf_core_write_extra_data(cprm))
1820 goto end_coredump; 1765 goto end_coredump;
1821 1766
1822 if (e_phnum == PN_XNUM) { 1767 if (e_phnum == PN_XNUM) {
1823 size += sizeof(*shdr4extnum); 1768 if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum)))
1824 if (size > cprm->limit
1825 || !dump_write(cprm->file, shdr4extnum,
1826 sizeof(*shdr4extnum)))
1827 goto end_coredump; 1769 goto end_coredump;
1828 } 1770 }
1829 1771
diff --git a/fs/binfmt_em86.c b/fs/binfmt_em86.c
index 037a3e2b045b..f37b08cea1f7 100644
--- a/fs/binfmt_em86.c
+++ b/fs/binfmt_em86.c
@@ -38,7 +38,7 @@ static int load_em86(struct linux_binprm *bprm)
38 /* First of all, some simple consistency checks */ 38 /* First of all, some simple consistency checks */
39 if ((elf_ex.e_type != ET_EXEC && elf_ex.e_type != ET_DYN) || 39 if ((elf_ex.e_type != ET_EXEC && elf_ex.e_type != ET_DYN) ||
40 (!((elf_ex.e_machine == EM_386) || (elf_ex.e_machine == EM_486))) || 40 (!((elf_ex.e_machine == EM_386) || (elf_ex.e_machine == EM_486))) ||
41 (!bprm->file->f_op || !bprm->file->f_op->mmap)) { 41 !bprm->file->f_op->mmap) {
42 return -ENOEXEC; 42 return -ENOEXEC;
43 } 43 }
44 44
diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c
index 00baf1419989..57e17fe6121a 100644
--- a/fs/cachefiles/interface.c
+++ b/fs/cachefiles/interface.c
@@ -449,14 +449,14 @@ static int cachefiles_attr_changed(struct fscache_object *_object)
449 _debug("discard tail %llx", oi_size); 449 _debug("discard tail %llx", oi_size);
450 newattrs.ia_valid = ATTR_SIZE; 450 newattrs.ia_valid = ATTR_SIZE;
451 newattrs.ia_size = oi_size & PAGE_MASK; 451 newattrs.ia_size = oi_size & PAGE_MASK;
452 ret = notify_change(object->backer, &newattrs); 452 ret = notify_change(object->backer, &newattrs, NULL);
453 if (ret < 0) 453 if (ret < 0)
454 goto truncate_failed; 454 goto truncate_failed;
455 } 455 }
456 456
457 newattrs.ia_valid = ATTR_SIZE; 457 newattrs.ia_valid = ATTR_SIZE;
458 newattrs.ia_size = ni_size; 458 newattrs.ia_size = ni_size;
459 ret = notify_change(object->backer, &newattrs); 459 ret = notify_change(object->backer, &newattrs, NULL);
460 460
461truncate_failed: 461truncate_failed:
462 mutex_unlock(&object->backer->d_inode->i_mutex); 462 mutex_unlock(&object->backer->d_inode->i_mutex);
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index f4a08d7fa2f7..ca65f39dc8dc 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -294,7 +294,7 @@ static int cachefiles_bury_object(struct cachefiles_cache *cache,
294 if (ret < 0) { 294 if (ret < 0) {
295 cachefiles_io_error(cache, "Unlink security error"); 295 cachefiles_io_error(cache, "Unlink security error");
296 } else { 296 } else {
297 ret = vfs_unlink(dir->d_inode, rep); 297 ret = vfs_unlink(dir->d_inode, rep, NULL);
298 298
299 if (preemptive) 299 if (preemptive)
300 cachefiles_mark_object_buried(cache, rep); 300 cachefiles_mark_object_buried(cache, rep);
@@ -396,7 +396,7 @@ try_again:
396 cachefiles_io_error(cache, "Rename security error %d", ret); 396 cachefiles_io_error(cache, "Rename security error %d", ret);
397 } else { 397 } else {
398 ret = vfs_rename(dir->d_inode, rep, 398 ret = vfs_rename(dir->d_inode, rep,
399 cache->graveyard->d_inode, grave); 399 cache->graveyard->d_inode, grave, NULL);
400 if (ret != 0 && ret != -ENOMEM) 400 if (ret != 0 && ret != -ENOMEM)
401 cachefiles_io_error(cache, 401 cachefiles_io_error(cache,
402 "Rename failed with error %d", ret); 402 "Rename failed with error %d", ret);
diff --git a/fs/char_dev.c b/fs/char_dev.c
index afc2bb691780..94b5f60076da 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -368,6 +368,7 @@ void cdev_put(struct cdev *p)
368 */ 368 */
369static int chrdev_open(struct inode *inode, struct file *filp) 369static int chrdev_open(struct inode *inode, struct file *filp)
370{ 370{
371 const struct file_operations *fops;
371 struct cdev *p; 372 struct cdev *p;
372 struct cdev *new = NULL; 373 struct cdev *new = NULL;
373 int ret = 0; 374 int ret = 0;
@@ -400,10 +401,11 @@ static int chrdev_open(struct inode *inode, struct file *filp)
400 return ret; 401 return ret;
401 402
402 ret = -ENXIO; 403 ret = -ENXIO;
403 filp->f_op = fops_get(p->ops); 404 fops = fops_get(p->ops);
404 if (!filp->f_op) 405 if (!fops)
405 goto out_cdev_put; 406 goto out_cdev_put;
406 407
408 replace_fops(filp, fops);
407 if (filp->f_op->open) { 409 if (filp->f_op->open) {
408 ret = filp->f_op->open(inode, filp); 410 ret = filp->f_op->open(inode, filp);
409 if (ret) 411 if (ret)
diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h
index 37e4a72a7d1c..9409fa10bd5c 100644
--- a/fs/cifs/cifs_fs_sb.h
+++ b/fs/cifs/cifs_fs_sb.h
@@ -65,5 +65,6 @@ struct cifs_sb_info {
65 char *mountdata; /* options received at mount time or via DFS refs */ 65 char *mountdata; /* options received at mount time or via DFS refs */
66 struct backing_dev_info bdi; 66 struct backing_dev_info bdi;
67 struct delayed_work prune_tlinks; 67 struct delayed_work prune_tlinks;
68 struct rcu_head rcu;
68}; 69};
69#endif /* _CIFS_FS_SB_H */ 70#endif /* _CIFS_FS_SB_H */
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 77fc5e181077..849f6132b327 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -862,7 +862,7 @@ const struct inode_operations cifs_file_inode_ops = {
862const struct inode_operations cifs_symlink_inode_ops = { 862const struct inode_operations cifs_symlink_inode_ops = {
863 .readlink = generic_readlink, 863 .readlink = generic_readlink,
864 .follow_link = cifs_follow_link, 864 .follow_link = cifs_follow_link,
865 .put_link = cifs_put_link, 865 .put_link = kfree_put_link,
866 .permission = cifs_permission, 866 .permission = cifs_permission,
867 /* BB add the following two eventually */ 867 /* BB add the following two eventually */
868 /* revalidate: cifs_revalidate, 868 /* revalidate: cifs_revalidate,
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 6d0b07217ac9..26a754f49ba1 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -115,8 +115,6 @@ extern struct vfsmount *cifs_dfs_d_automount(struct path *path);
115 115
116/* Functions related to symlinks */ 116/* Functions related to symlinks */
117extern void *cifs_follow_link(struct dentry *direntry, struct nameidata *nd); 117extern void *cifs_follow_link(struct dentry *direntry, struct nameidata *nd);
118extern void cifs_put_link(struct dentry *direntry,
119 struct nameidata *nd, void *);
120extern int cifs_readlink(struct dentry *direntry, char __user *buffer, 118extern int cifs_readlink(struct dentry *direntry, char __user *buffer,
121 int buflen); 119 int buflen);
122extern int cifs_symlink(struct inode *inode, struct dentry *direntry, 120extern int cifs_symlink(struct inode *inode, struct dentry *direntry,
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 62a55147400a..8813ff776ba3 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -3770,6 +3770,13 @@ CIFSTCon(const unsigned int xid, struct cifs_ses *ses,
3770 return rc; 3770 return rc;
3771} 3771}
3772 3772
3773static void delayed_free(struct rcu_head *p)
3774{
3775 struct cifs_sb_info *sbi = container_of(p, struct cifs_sb_info, rcu);
3776 unload_nls(sbi->local_nls);
3777 kfree(sbi);
3778}
3779
3773void 3780void
3774cifs_umount(struct cifs_sb_info *cifs_sb) 3781cifs_umount(struct cifs_sb_info *cifs_sb)
3775{ 3782{
@@ -3794,8 +3801,7 @@ cifs_umount(struct cifs_sb_info *cifs_sb)
3794 3801
3795 bdi_destroy(&cifs_sb->bdi); 3802 bdi_destroy(&cifs_sb->bdi);
3796 kfree(cifs_sb->mountdata); 3803 kfree(cifs_sb->mountdata);
3797 unload_nls(cifs_sb->local_nls); 3804 call_rcu(&cifs_sb->rcu, delayed_free);
3798 kfree(cifs_sb);
3799} 3805}
3800 3806
3801int 3807int
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index 7e36ceba0c7a..cc0234710ddb 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -621,10 +621,3 @@ symlink_exit:
621 free_xid(xid); 621 free_xid(xid);
622 return rc; 622 return rc;
623} 623}
624
625void cifs_put_link(struct dentry *direntry, struct nameidata *nd, void *cookie)
626{
627 char *p = nd_get_link(nd);
628 if (!IS_ERR(p))
629 kfree(p);
630}
diff --git a/fs/coda/coda_linux.h b/fs/coda/coda_linux.h
index cc0ea9fe5ecf..e7550cb9fb74 100644
--- a/fs/coda/coda_linux.h
+++ b/fs/coda/coda_linux.h
@@ -40,7 +40,7 @@ extern const struct file_operations coda_ioctl_operations;
40int coda_open(struct inode *i, struct file *f); 40int coda_open(struct inode *i, struct file *f);
41int coda_release(struct inode *i, struct file *f); 41int coda_release(struct inode *i, struct file *f);
42int coda_permission(struct inode *inode, int mask); 42int coda_permission(struct inode *inode, int mask);
43int coda_revalidate_inode(struct dentry *); 43int coda_revalidate_inode(struct inode *);
44int coda_getattr(struct vfsmount *, struct dentry *, struct kstat *); 44int coda_getattr(struct vfsmount *, struct dentry *, struct kstat *);
45int coda_setattr(struct dentry *, struct iattr *); 45int coda_setattr(struct dentry *, struct iattr *);
46 46
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index 190effc6a6fa..5efbb5ee0adc 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -387,9 +387,6 @@ static int coda_readdir(struct file *coda_file, struct dir_context *ctx)
387 BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC); 387 BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC);
388 host_file = cfi->cfi_container; 388 host_file = cfi->cfi_container;
389 389
390 if (!host_file->f_op)
391 return -ENOTDIR;
392
393 if (host_file->f_op->iterate) { 390 if (host_file->f_op->iterate) {
394 struct inode *host_inode = file_inode(host_file); 391 struct inode *host_inode = file_inode(host_file);
395 mutex_lock(&host_inode->i_mutex); 392 mutex_lock(&host_inode->i_mutex);
@@ -566,13 +563,12 @@ static int coda_dentry_delete(const struct dentry * dentry)
566 * cache manager Venus issues a downcall to the kernel when this 563 * cache manager Venus issues a downcall to the kernel when this
567 * happens 564 * happens
568 */ 565 */
569int coda_revalidate_inode(struct dentry *dentry) 566int coda_revalidate_inode(struct inode *inode)
570{ 567{
571 struct coda_vattr attr; 568 struct coda_vattr attr;
572 int error; 569 int error;
573 int old_mode; 570 int old_mode;
574 ino_t old_ino; 571 ino_t old_ino;
575 struct inode *inode = dentry->d_inode;
576 struct coda_inode_info *cii = ITOC(inode); 572 struct coda_inode_info *cii = ITOC(inode);
577 573
578 if (!cii->c_flags) 574 if (!cii->c_flags)
diff --git a/fs/coda/file.c b/fs/coda/file.c
index 380b798f8443..9e83b7790212 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -36,7 +36,7 @@ coda_file_read(struct file *coda_file, char __user *buf, size_t count, loff_t *p
36 BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC); 36 BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC);
37 host_file = cfi->cfi_container; 37 host_file = cfi->cfi_container;
38 38
39 if (!host_file->f_op || !host_file->f_op->read) 39 if (!host_file->f_op->read)
40 return -EINVAL; 40 return -EINVAL;
41 41
42 return host_file->f_op->read(host_file, buf, count, ppos); 42 return host_file->f_op->read(host_file, buf, count, ppos);
@@ -75,7 +75,7 @@ coda_file_write(struct file *coda_file, const char __user *buf, size_t count, lo
75 BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC); 75 BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC);
76 host_file = cfi->cfi_container; 76 host_file = cfi->cfi_container;
77 77
78 if (!host_file->f_op || !host_file->f_op->write) 78 if (!host_file->f_op->write)
79 return -EINVAL; 79 return -EINVAL;
80 80
81 host_inode = file_inode(host_file); 81 host_inode = file_inode(host_file);
@@ -105,7 +105,7 @@ coda_file_mmap(struct file *coda_file, struct vm_area_struct *vma)
105 BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC); 105 BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC);
106 host_file = cfi->cfi_container; 106 host_file = cfi->cfi_container;
107 107
108 if (!host_file->f_op || !host_file->f_op->mmap) 108 if (!host_file->f_op->mmap)
109 return -ENODEV; 109 return -ENODEV;
110 110
111 coda_inode = file_inode(coda_file); 111 coda_inode = file_inode(coda_file);
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index 4dcc0d81a7aa..506de34a4ef3 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -257,7 +257,7 @@ static void coda_evict_inode(struct inode *inode)
257 257
258int coda_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) 258int coda_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
259{ 259{
260 int err = coda_revalidate_inode(dentry); 260 int err = coda_revalidate_inode(dentry->d_inode);
261 if (!err) 261 if (!err)
262 generic_fillattr(dentry->d_inode, stat); 262 generic_fillattr(dentry->d_inode, stat);
263 return err; 263 return err;
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 5d19acfa7c6c..dc52e13d58e0 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -1583,13 +1583,13 @@ asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd,
1583 /*FALL THROUGH*/ 1583 /*FALL THROUGH*/
1584 1584
1585 default: 1585 default:
1586 if (f.file->f_op && f.file->f_op->compat_ioctl) { 1586 if (f.file->f_op->compat_ioctl) {
1587 error = f.file->f_op->compat_ioctl(f.file, cmd, arg); 1587 error = f.file->f_op->compat_ioctl(f.file, cmd, arg);
1588 if (error != -ENOIOCTLCMD) 1588 if (error != -ENOIOCTLCMD)
1589 goto out_fput; 1589 goto out_fput;
1590 } 1590 }
1591 1591
1592 if (!f.file->f_op || !f.file->f_op->unlocked_ioctl) 1592 if (!f.file->f_op->unlocked_ioctl)
1593 goto do_ioctl; 1593 goto do_ioctl;
1594 break; 1594 break;
1595 } 1595 }
diff --git a/fs/coredump.c b/fs/coredump.c
index 9bdeca12ae0e..62406b6959b6 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -485,7 +485,7 @@ static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
485 return err; 485 return err;
486} 486}
487 487
488void do_coredump(siginfo_t *siginfo) 488void do_coredump(const siginfo_t *siginfo)
489{ 489{
490 struct core_state core_state; 490 struct core_state core_state;
491 struct core_name cn; 491 struct core_name cn;
@@ -645,7 +645,7 @@ void do_coredump(siginfo_t *siginfo)
645 */ 645 */
646 if (!uid_eq(inode->i_uid, current_fsuid())) 646 if (!uid_eq(inode->i_uid, current_fsuid()))
647 goto close_fail; 647 goto close_fail;
648 if (!cprm.file->f_op || !cprm.file->f_op->write) 648 if (!cprm.file->f_op->write)
649 goto close_fail; 649 goto close_fail;
650 if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file)) 650 if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file))
651 goto close_fail; 651 goto close_fail;
@@ -685,40 +685,55 @@ fail:
685 * do on a core-file: use only these functions to write out all the 685 * do on a core-file: use only these functions to write out all the
686 * necessary info. 686 * necessary info.
687 */ 687 */
688int dump_write(struct file *file, const void *addr, int nr) 688int dump_emit(struct coredump_params *cprm, const void *addr, int nr)
689{ 689{
690 return !dump_interrupted() && 690 struct file *file = cprm->file;
691 access_ok(VERIFY_READ, addr, nr) && 691 loff_t pos = file->f_pos;
692 file->f_op->write(file, addr, nr, &file->f_pos) == nr; 692 ssize_t n;
693 if (cprm->written + nr > cprm->limit)
694 return 0;
695 while (nr) {
696 if (dump_interrupted())
697 return 0;
698 n = vfs_write(file, addr, nr, &pos);
699 if (n <= 0)
700 return 0;
701 file->f_pos = pos;
702 cprm->written += n;
703 nr -= n;
704 }
705 return 1;
693} 706}
694EXPORT_SYMBOL(dump_write); 707EXPORT_SYMBOL(dump_emit);
695 708
696int dump_seek(struct file *file, loff_t off) 709int dump_skip(struct coredump_params *cprm, size_t nr)
697{ 710{
698 int ret = 1; 711 static char zeroes[PAGE_SIZE];
699 712 struct file *file = cprm->file;
700 if (file->f_op->llseek && file->f_op->llseek != no_llseek) { 713 if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
714 if (cprm->written + nr > cprm->limit)
715 return 0;
701 if (dump_interrupted() || 716 if (dump_interrupted() ||
702 file->f_op->llseek(file, off, SEEK_CUR) < 0) 717 file->f_op->llseek(file, nr, SEEK_CUR) < 0)
703 return 0; 718 return 0;
719 cprm->written += nr;
720 return 1;
704 } else { 721 } else {
705 char *buf = (char *)get_zeroed_page(GFP_KERNEL); 722 while (nr > PAGE_SIZE) {
706 723 if (!dump_emit(cprm, zeroes, PAGE_SIZE))
707 if (!buf) 724 return 0;
708 return 0; 725 nr -= PAGE_SIZE;
709 while (off > 0) {
710 unsigned long n = off;
711
712 if (n > PAGE_SIZE)
713 n = PAGE_SIZE;
714 if (!dump_write(file, buf, n)) {
715 ret = 0;
716 break;
717 }
718 off -= n;
719 } 726 }
720 free_page((unsigned long)buf); 727 return dump_emit(cprm, zeroes, nr);
721 } 728 }
722 return ret;
723} 729}
724EXPORT_SYMBOL(dump_seek); 730EXPORT_SYMBOL(dump_skip);
731
732int dump_align(struct coredump_params *cprm, int align)
733{
734 unsigned mod = cprm->written & (align - 1);
735 if (align & (align - 1))
736 return -EINVAL;
737 return mod ? dump_skip(cprm, align - mod) : 0;
738}
739EXPORT_SYMBOL(dump_align);
diff --git a/fs/dcache.c b/fs/dcache.c
index ae6ebb88ceff..1f24cd684c51 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -343,6 +343,7 @@ static void dentry_unlink_inode(struct dentry * dentry)
343 __releases(dentry->d_inode->i_lock) 343 __releases(dentry->d_inode->i_lock)
344{ 344{
345 struct inode *inode = dentry->d_inode; 345 struct inode *inode = dentry->d_inode;
346 __d_clear_type(dentry);
346 dentry->d_inode = NULL; 347 dentry->d_inode = NULL;
347 hlist_del_init(&dentry->d_alias); 348 hlist_del_init(&dentry->d_alias);
348 dentry_rcuwalk_barrier(dentry); 349 dentry_rcuwalk_barrier(dentry);
@@ -483,27 +484,6 @@ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent)
483 return parent; 484 return parent;
484} 485}
485 486
486/*
487 * Unhash a dentry without inserting an RCU walk barrier or checking that
488 * dentry->d_lock is locked. The caller must take care of that, if
489 * appropriate.
490 */
491static void __d_shrink(struct dentry *dentry)
492{
493 if (!d_unhashed(dentry)) {
494 struct hlist_bl_head *b;
495 if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED))
496 b = &dentry->d_sb->s_anon;
497 else
498 b = d_hash(dentry->d_parent, dentry->d_name.hash);
499
500 hlist_bl_lock(b);
501 __hlist_bl_del(&dentry->d_hash);
502 dentry->d_hash.pprev = NULL;
503 hlist_bl_unlock(b);
504 }
505}
506
507/** 487/**
508 * d_drop - drop a dentry 488 * d_drop - drop a dentry
509 * @dentry: dentry to drop 489 * @dentry: dentry to drop
@@ -522,7 +502,21 @@ static void __d_shrink(struct dentry *dentry)
522void __d_drop(struct dentry *dentry) 502void __d_drop(struct dentry *dentry)
523{ 503{
524 if (!d_unhashed(dentry)) { 504 if (!d_unhashed(dentry)) {
525 __d_shrink(dentry); 505 struct hlist_bl_head *b;
506 /*
507 * Hashed dentries are normally on the dentry hashtable,
508 * with the exception of those newly allocated by
509 * d_obtain_alias, which are always IS_ROOT:
510 */
511 if (unlikely(IS_ROOT(dentry)))
512 b = &dentry->d_sb->s_anon;
513 else
514 b = d_hash(dentry->d_parent, dentry->d_name.hash);
515
516 hlist_bl_lock(b);
517 __hlist_bl_del(&dentry->d_hash);
518 dentry->d_hash.pprev = NULL;
519 hlist_bl_unlock(b);
526 dentry_rcuwalk_barrier(dentry); 520 dentry_rcuwalk_barrier(dentry);
527 } 521 }
528} 522}
@@ -1076,116 +1070,6 @@ void shrink_dcache_sb(struct super_block *sb)
1076EXPORT_SYMBOL(shrink_dcache_sb); 1070EXPORT_SYMBOL(shrink_dcache_sb);
1077 1071
1078/* 1072/*
1079 * destroy a single subtree of dentries for unmount
1080 * - see the comments on shrink_dcache_for_umount() for a description of the
1081 * locking
1082 */
1083static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
1084{
1085 struct dentry *parent;
1086
1087 BUG_ON(!IS_ROOT(dentry));
1088
1089 for (;;) {
1090 /* descend to the first leaf in the current subtree */
1091 while (!list_empty(&dentry->d_subdirs))
1092 dentry = list_entry(dentry->d_subdirs.next,
1093 struct dentry, d_u.d_child);
1094
1095 /* consume the dentries from this leaf up through its parents
1096 * until we find one with children or run out altogether */
1097 do {
1098 struct inode *inode;
1099
1100 /*
1101 * inform the fs that this dentry is about to be
1102 * unhashed and destroyed.
1103 */
1104 if ((dentry->d_flags & DCACHE_OP_PRUNE) &&
1105 !d_unhashed(dentry))
1106 dentry->d_op->d_prune(dentry);
1107
1108 dentry_lru_del(dentry);
1109 __d_shrink(dentry);
1110
1111 if (dentry->d_lockref.count != 0) {
1112 printk(KERN_ERR
1113 "BUG: Dentry %p{i=%lx,n=%s}"
1114 " still in use (%d)"
1115 " [unmount of %s %s]\n",
1116 dentry,
1117 dentry->d_inode ?
1118 dentry->d_inode->i_ino : 0UL,
1119 dentry->d_name.name,
1120 dentry->d_lockref.count,
1121 dentry->d_sb->s_type->name,
1122 dentry->d_sb->s_id);
1123 BUG();
1124 }
1125
1126 if (IS_ROOT(dentry)) {
1127 parent = NULL;
1128 list_del(&dentry->d_u.d_child);
1129 } else {
1130 parent = dentry->d_parent;
1131 parent->d_lockref.count--;
1132 list_del(&dentry->d_u.d_child);
1133 }
1134
1135 inode = dentry->d_inode;
1136 if (inode) {
1137 dentry->d_inode = NULL;
1138 hlist_del_init(&dentry->d_alias);
1139 if (dentry->d_op && dentry->d_op->d_iput)
1140 dentry->d_op->d_iput(dentry, inode);
1141 else
1142 iput(inode);
1143 }
1144
1145 d_free(dentry);
1146
1147 /* finished when we fall off the top of the tree,
1148 * otherwise we ascend to the parent and move to the
1149 * next sibling if there is one */
1150 if (!parent)
1151 return;
1152 dentry = parent;
1153 } while (list_empty(&dentry->d_subdirs));
1154
1155 dentry = list_entry(dentry->d_subdirs.next,
1156 struct dentry, d_u.d_child);
1157 }
1158}
1159
1160/*
1161 * destroy the dentries attached to a superblock on unmounting
1162 * - we don't need to use dentry->d_lock because:
1163 * - the superblock is detached from all mountings and open files, so the
1164 * dentry trees will not be rearranged by the VFS
1165 * - s_umount is write-locked, so the memory pressure shrinker will ignore
1166 * any dentries belonging to this superblock that it comes across
1167 * - the filesystem itself is no longer permitted to rearrange the dentries
1168 * in this superblock
1169 */
1170void shrink_dcache_for_umount(struct super_block *sb)
1171{
1172 struct dentry *dentry;
1173
1174 if (down_read_trylock(&sb->s_umount))
1175 BUG();
1176
1177 dentry = sb->s_root;
1178 sb->s_root = NULL;
1179 dentry->d_lockref.count--;
1180 shrink_dcache_for_umount_subtree(dentry);
1181
1182 while (!hlist_bl_empty(&sb->s_anon)) {
1183 dentry = hlist_bl_entry(hlist_bl_first(&sb->s_anon), struct dentry, d_hash);
1184 shrink_dcache_for_umount_subtree(dentry);
1185 }
1186}
1187
1188/*
1189 * This tries to ascend one level of parenthood, but 1073 * This tries to ascend one level of parenthood, but
1190 * we can race with renaming, so we need to re-check 1074 * we can race with renaming, so we need to re-check
1191 * the parenthood after dropping the lock and check 1075 * the parenthood after dropping the lock and check
@@ -1478,6 +1362,91 @@ void shrink_dcache_parent(struct dentry *parent)
1478} 1362}
1479EXPORT_SYMBOL(shrink_dcache_parent); 1363EXPORT_SYMBOL(shrink_dcache_parent);
1480 1364
1365static enum d_walk_ret umount_collect(void *_data, struct dentry *dentry)
1366{
1367 struct select_data *data = _data;
1368 enum d_walk_ret ret = D_WALK_CONTINUE;
1369
1370 if (dentry->d_lockref.count) {
1371 dentry_lru_del(dentry);
1372 if (likely(!list_empty(&dentry->d_subdirs)))
1373 goto out;
1374 if (dentry == data->start && dentry->d_lockref.count == 1)
1375 goto out;
1376 printk(KERN_ERR
1377 "BUG: Dentry %p{i=%lx,n=%s}"
1378 " still in use (%d)"
1379 " [unmount of %s %s]\n",
1380 dentry,
1381 dentry->d_inode ?
1382 dentry->d_inode->i_ino : 0UL,
1383 dentry->d_name.name,
1384 dentry->d_lockref.count,
1385 dentry->d_sb->s_type->name,
1386 dentry->d_sb->s_id);
1387 BUG();
1388 } else if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) {
1389 /*
1390 * We can't use d_lru_shrink_move() because we
1391 * need to get the global LRU lock and do the
1392 * LRU accounting.
1393 */
1394 if (dentry->d_flags & DCACHE_LRU_LIST)
1395 d_lru_del(dentry);
1396 d_shrink_add(dentry, &data->dispose);
1397 data->found++;
1398 ret = D_WALK_NORETRY;
1399 }
1400out:
1401 if (data->found && need_resched())
1402 ret = D_WALK_QUIT;
1403 return ret;
1404}
1405
1406/*
1407 * destroy the dentries attached to a superblock on unmounting
1408 */
1409void shrink_dcache_for_umount(struct super_block *sb)
1410{
1411 struct dentry *dentry;
1412
1413 if (down_read_trylock(&sb->s_umount))
1414 BUG();
1415
1416 dentry = sb->s_root;
1417 sb->s_root = NULL;
1418 for (;;) {
1419 struct select_data data;
1420
1421 INIT_LIST_HEAD(&data.dispose);
1422 data.start = dentry;
1423 data.found = 0;
1424
1425 d_walk(dentry, &data, umount_collect, NULL);
1426 if (!data.found)
1427 break;
1428
1429 shrink_dentry_list(&data.dispose);
1430 cond_resched();
1431 }
1432 d_drop(dentry);
1433 dput(dentry);
1434
1435 while (!hlist_bl_empty(&sb->s_anon)) {
1436 struct select_data data;
1437 dentry = hlist_bl_entry(hlist_bl_first(&sb->s_anon), struct dentry, d_hash);
1438
1439 INIT_LIST_HEAD(&data.dispose);
1440 data.start = NULL;
1441 data.found = 0;
1442
1443 d_walk(dentry, &data, umount_collect, NULL);
1444 if (data.found)
1445 shrink_dentry_list(&data.dispose);
1446 cond_resched();
1447 }
1448}
1449
1481static enum d_walk_ret check_and_collect(void *_data, struct dentry *dentry) 1450static enum d_walk_ret check_and_collect(void *_data, struct dentry *dentry)
1482{ 1451{
1483 struct select_data *data = _data; 1452 struct select_data *data = _data;
@@ -1638,12 +1607,17 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
1638} 1607}
1639EXPORT_SYMBOL(d_alloc); 1608EXPORT_SYMBOL(d_alloc);
1640 1609
1610/**
1611 * d_alloc_pseudo - allocate a dentry (for lookup-less filesystems)
1612 * @sb: the superblock
1613 * @name: qstr of the name
1614 *
1615 * For a filesystem that just pins its dentries in memory and never
1616 * performs lookups at all, return an unhashed IS_ROOT dentry.
1617 */
1641struct dentry *d_alloc_pseudo(struct super_block *sb, const struct qstr *name) 1618struct dentry *d_alloc_pseudo(struct super_block *sb, const struct qstr *name)
1642{ 1619{
1643 struct dentry *dentry = __d_alloc(sb, name); 1620 return __d_alloc(sb, name);
1644 if (dentry)
1645 dentry->d_flags |= DCACHE_DISCONNECTED;
1646 return dentry;
1647} 1621}
1648EXPORT_SYMBOL(d_alloc_pseudo); 1622EXPORT_SYMBOL(d_alloc_pseudo);
1649 1623
@@ -1685,14 +1659,42 @@ void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op)
1685} 1659}
1686EXPORT_SYMBOL(d_set_d_op); 1660EXPORT_SYMBOL(d_set_d_op);
1687 1661
1662static unsigned d_flags_for_inode(struct inode *inode)
1663{
1664 unsigned add_flags = DCACHE_FILE_TYPE;
1665
1666 if (!inode)
1667 return DCACHE_MISS_TYPE;
1668
1669 if (S_ISDIR(inode->i_mode)) {
1670 add_flags = DCACHE_DIRECTORY_TYPE;
1671 if (unlikely(!(inode->i_opflags & IOP_LOOKUP))) {
1672 if (unlikely(!inode->i_op->lookup))
1673 add_flags = DCACHE_AUTODIR_TYPE;
1674 else
1675 inode->i_opflags |= IOP_LOOKUP;
1676 }
1677 } else if (unlikely(!(inode->i_opflags & IOP_NOFOLLOW))) {
1678 if (unlikely(inode->i_op->follow_link))
1679 add_flags = DCACHE_SYMLINK_TYPE;
1680 else
1681 inode->i_opflags |= IOP_NOFOLLOW;
1682 }
1683
1684 if (unlikely(IS_AUTOMOUNT(inode)))
1685 add_flags |= DCACHE_NEED_AUTOMOUNT;
1686 return add_flags;
1687}
1688
1688static void __d_instantiate(struct dentry *dentry, struct inode *inode) 1689static void __d_instantiate(struct dentry *dentry, struct inode *inode)
1689{ 1690{
1691 unsigned add_flags = d_flags_for_inode(inode);
1692
1690 spin_lock(&dentry->d_lock); 1693 spin_lock(&dentry->d_lock);
1691 if (inode) { 1694 dentry->d_flags &= ~DCACHE_ENTRY_TYPE;
1692 if (unlikely(IS_AUTOMOUNT(inode))) 1695 dentry->d_flags |= add_flags;
1693 dentry->d_flags |= DCACHE_NEED_AUTOMOUNT; 1696 if (inode)
1694 hlist_add_head(&dentry->d_alias, &inode->i_dentry); 1697 hlist_add_head(&dentry->d_alias, &inode->i_dentry);
1695 }
1696 dentry->d_inode = inode; 1698 dentry->d_inode = inode;
1697 dentry_rcuwalk_barrier(dentry); 1699 dentry_rcuwalk_barrier(dentry);
1698 spin_unlock(&dentry->d_lock); 1700 spin_unlock(&dentry->d_lock);
@@ -1801,6 +1803,33 @@ struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
1801 1803
1802EXPORT_SYMBOL(d_instantiate_unique); 1804EXPORT_SYMBOL(d_instantiate_unique);
1803 1805
1806/**
1807 * d_instantiate_no_diralias - instantiate a non-aliased dentry
1808 * @entry: dentry to complete
1809 * @inode: inode to attach to this dentry
1810 *
1811 * Fill in inode information in the entry. If a directory alias is found, then
1812 * return an error (and drop inode). Together with d_materialise_unique() this
1813 * guarantees that a directory inode may never have more than one alias.
1814 */
1815int d_instantiate_no_diralias(struct dentry *entry, struct inode *inode)
1816{
1817 BUG_ON(!hlist_unhashed(&entry->d_alias));
1818
1819 spin_lock(&inode->i_lock);
1820 if (S_ISDIR(inode->i_mode) && !hlist_empty(&inode->i_dentry)) {
1821 spin_unlock(&inode->i_lock);
1822 iput(inode);
1823 return -EBUSY;
1824 }
1825 __d_instantiate(entry, inode);
1826 spin_unlock(&inode->i_lock);
1827 security_d_instantiate(entry, inode);
1828
1829 return 0;
1830}
1831EXPORT_SYMBOL(d_instantiate_no_diralias);
1832
1804struct dentry *d_make_root(struct inode *root_inode) 1833struct dentry *d_make_root(struct inode *root_inode)
1805{ 1834{
1806 struct dentry *res = NULL; 1835 struct dentry *res = NULL;
@@ -1870,6 +1899,7 @@ struct dentry *d_obtain_alias(struct inode *inode)
1870 static const struct qstr anonstring = QSTR_INIT("/", 1); 1899 static const struct qstr anonstring = QSTR_INIT("/", 1);
1871 struct dentry *tmp; 1900 struct dentry *tmp;
1872 struct dentry *res; 1901 struct dentry *res;
1902 unsigned add_flags;
1873 1903
1874 if (!inode) 1904 if (!inode)
1875 return ERR_PTR(-ESTALE); 1905 return ERR_PTR(-ESTALE);
@@ -1895,9 +1925,11 @@ struct dentry *d_obtain_alias(struct inode *inode)
1895 } 1925 }
1896 1926
1897 /* attach a disconnected dentry */ 1927 /* attach a disconnected dentry */
1928 add_flags = d_flags_for_inode(inode) | DCACHE_DISCONNECTED;
1929
1898 spin_lock(&tmp->d_lock); 1930 spin_lock(&tmp->d_lock);
1899 tmp->d_inode = inode; 1931 tmp->d_inode = inode;
1900 tmp->d_flags |= DCACHE_DISCONNECTED; 1932 tmp->d_flags |= add_flags;
1901 hlist_add_head(&tmp->d_alias, &inode->i_dentry); 1933 hlist_add_head(&tmp->d_alias, &inode->i_dentry);
1902 hlist_bl_lock(&tmp->d_sb->s_anon); 1934 hlist_bl_lock(&tmp->d_sb->s_anon);
1903 hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon); 1935 hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon);
@@ -2725,7 +2757,6 @@ static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon)
2725 spin_unlock(&dentry->d_lock); 2757 spin_unlock(&dentry->d_lock);
2726 2758
2727 /* anon->d_lock still locked, returns locked */ 2759 /* anon->d_lock still locked, returns locked */
2728 anon->d_flags &= ~DCACHE_DISCONNECTED;
2729} 2760}
2730 2761
2731/** 2762/**
@@ -2885,23 +2916,28 @@ static int prepend_path(const struct path *path,
2885 struct vfsmount *vfsmnt = path->mnt; 2916 struct vfsmount *vfsmnt = path->mnt;
2886 struct mount *mnt = real_mount(vfsmnt); 2917 struct mount *mnt = real_mount(vfsmnt);
2887 int error = 0; 2918 int error = 0;
2888 unsigned seq = 0; 2919 unsigned seq, m_seq = 0;
2889 char *bptr; 2920 char *bptr;
2890 int blen; 2921 int blen;
2891 2922
2892 rcu_read_lock(); 2923 rcu_read_lock();
2924restart_mnt:
2925 read_seqbegin_or_lock(&mount_lock, &m_seq);
2926 seq = 0;
2893restart: 2927restart:
2894 bptr = *buffer; 2928 bptr = *buffer;
2895 blen = *buflen; 2929 blen = *buflen;
2930 error = 0;
2896 read_seqbegin_or_lock(&rename_lock, &seq); 2931 read_seqbegin_or_lock(&rename_lock, &seq);
2897 while (dentry != root->dentry || vfsmnt != root->mnt) { 2932 while (dentry != root->dentry || vfsmnt != root->mnt) {
2898 struct dentry * parent; 2933 struct dentry * parent;
2899 2934
2900 if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) { 2935 if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
2936 struct mount *parent = ACCESS_ONCE(mnt->mnt_parent);
2901 /* Global root? */ 2937 /* Global root? */
2902 if (mnt_has_parent(mnt)) { 2938 if (mnt != parent) {
2903 dentry = mnt->mnt_mountpoint; 2939 dentry = ACCESS_ONCE(mnt->mnt_mountpoint);
2904 mnt = mnt->mnt_parent; 2940 mnt = parent;
2905 vfsmnt = &mnt->mnt; 2941 vfsmnt = &mnt->mnt;
2906 continue; 2942 continue;
2907 } 2943 }
@@ -2935,6 +2971,11 @@ restart:
2935 goto restart; 2971 goto restart;
2936 } 2972 }
2937 done_seqretry(&rename_lock, seq); 2973 done_seqretry(&rename_lock, seq);
2974 if (need_seqretry(&mount_lock, m_seq)) {
2975 m_seq = 1;
2976 goto restart_mnt;
2977 }
2978 done_seqretry(&mount_lock, m_seq);
2938 2979
2939 if (error >= 0 && bptr == *buffer) { 2980 if (error >= 0 && bptr == *buffer) {
2940 if (--blen < 0) 2981 if (--blen < 0)
@@ -2971,9 +3012,7 @@ char *__d_path(const struct path *path,
2971 int error; 3012 int error;
2972 3013
2973 prepend(&res, &buflen, "\0", 1); 3014 prepend(&res, &buflen, "\0", 1);
2974 br_read_lock(&vfsmount_lock);
2975 error = prepend_path(path, root, &res, &buflen); 3015 error = prepend_path(path, root, &res, &buflen);
2976 br_read_unlock(&vfsmount_lock);
2977 3016
2978 if (error < 0) 3017 if (error < 0)
2979 return ERR_PTR(error); 3018 return ERR_PTR(error);
@@ -2990,9 +3029,7 @@ char *d_absolute_path(const struct path *path,
2990 int error; 3029 int error;
2991 3030
2992 prepend(&res, &buflen, "\0", 1); 3031 prepend(&res, &buflen, "\0", 1);
2993 br_read_lock(&vfsmount_lock);
2994 error = prepend_path(path, &root, &res, &buflen); 3032 error = prepend_path(path, &root, &res, &buflen);
2995 br_read_unlock(&vfsmount_lock);
2996 3033
2997 if (error > 1) 3034 if (error > 1)
2998 error = -EINVAL; 3035 error = -EINVAL;
@@ -3067,9 +3104,7 @@ char *d_path(const struct path *path, char *buf, int buflen)
3067 3104
3068 rcu_read_lock(); 3105 rcu_read_lock();
3069 get_fs_root_rcu(current->fs, &root); 3106 get_fs_root_rcu(current->fs, &root);
3070 br_read_lock(&vfsmount_lock);
3071 error = path_with_deleted(path, &root, &res, &buflen); 3107 error = path_with_deleted(path, &root, &res, &buflen);
3072 br_read_unlock(&vfsmount_lock);
3073 rcu_read_unlock(); 3108 rcu_read_unlock();
3074 3109
3075 if (error < 0) 3110 if (error < 0)
@@ -3224,7 +3259,6 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
3224 get_fs_root_and_pwd_rcu(current->fs, &root, &pwd); 3259 get_fs_root_and_pwd_rcu(current->fs, &root, &pwd);
3225 3260
3226 error = -ENOENT; 3261 error = -ENOENT;
3227 br_read_lock(&vfsmount_lock);
3228 if (!d_unlinked(pwd.dentry)) { 3262 if (!d_unlinked(pwd.dentry)) {
3229 unsigned long len; 3263 unsigned long len;
3230 char *cwd = page + PATH_MAX; 3264 char *cwd = page + PATH_MAX;
@@ -3232,7 +3266,6 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
3232 3266
3233 prepend(&cwd, &buflen, "\0", 1); 3267 prepend(&cwd, &buflen, "\0", 1);
3234 error = prepend_path(&pwd, &root, &cwd, &buflen); 3268 error = prepend_path(&pwd, &root, &cwd, &buflen);
3235 br_read_unlock(&vfsmount_lock);
3236 rcu_read_unlock(); 3269 rcu_read_unlock();
3237 3270
3238 if (error < 0) 3271 if (error < 0)
@@ -3253,7 +3286,6 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
3253 error = -EFAULT; 3286 error = -EFAULT;
3254 } 3287 }
3255 } else { 3288 } else {
3256 br_read_unlock(&vfsmount_lock);
3257 rcu_read_unlock(); 3289 rcu_read_unlock();
3258 } 3290 }
3259 3291
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index 88556dc0458e..d5abafd56a6d 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -706,9 +706,7 @@ static int lkb_idr_is_local(int id, void *p, void *data)
706{ 706{
707 struct dlm_lkb *lkb = p; 707 struct dlm_lkb *lkb = p;
708 708
709 if (!lkb->lkb_nodeid) 709 return lkb->lkb_nodeid == 0 && lkb->lkb_grmode != DLM_LOCK_IV;
710 return 1;
711 return 0;
712} 710}
713 711
714static int lkb_idr_is_any(int id, void *p, void *data) 712static int lkb_idr_is_any(int id, void *p, void *data)
diff --git a/fs/ecryptfs/dentry.c b/fs/ecryptfs/dentry.c
index bf12ba5dd223..4000f6b3a750 100644
--- a/fs/ecryptfs/dentry.c
+++ b/fs/ecryptfs/dentry.c
@@ -44,15 +44,15 @@
44 */ 44 */
45static int ecryptfs_d_revalidate(struct dentry *dentry, unsigned int flags) 45static int ecryptfs_d_revalidate(struct dentry *dentry, unsigned int flags)
46{ 46{
47 struct dentry *lower_dentry; 47 struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
48 int rc = 1; 48 int rc;
49
50 if (!(lower_dentry->d_flags & DCACHE_OP_REVALIDATE))
51 return 1;
49 52
50 if (flags & LOOKUP_RCU) 53 if (flags & LOOKUP_RCU)
51 return -ECHILD; 54 return -ECHILD;
52 55
53 lower_dentry = ecryptfs_dentry_to_lower(dentry);
54 if (!lower_dentry->d_op || !lower_dentry->d_op->d_revalidate)
55 goto out;
56 rc = lower_dentry->d_op->d_revalidate(lower_dentry, flags); 56 rc = lower_dentry->d_op->d_revalidate(lower_dentry, flags);
57 if (dentry->d_inode) { 57 if (dentry->d_inode) {
58 struct inode *lower_inode = 58 struct inode *lower_inode =
@@ -60,12 +60,17 @@ static int ecryptfs_d_revalidate(struct dentry *dentry, unsigned int flags)
60 60
61 fsstack_copy_attr_all(dentry->d_inode, lower_inode); 61 fsstack_copy_attr_all(dentry->d_inode, lower_inode);
62 } 62 }
63out:
64 return rc; 63 return rc;
65} 64}
66 65
67struct kmem_cache *ecryptfs_dentry_info_cache; 66struct kmem_cache *ecryptfs_dentry_info_cache;
68 67
68static void ecryptfs_dentry_free_rcu(struct rcu_head *head)
69{
70 kmem_cache_free(ecryptfs_dentry_info_cache,
71 container_of(head, struct ecryptfs_dentry_info, rcu));
72}
73
69/** 74/**
70 * ecryptfs_d_release 75 * ecryptfs_d_release
71 * @dentry: The ecryptfs dentry 76 * @dentry: The ecryptfs dentry
@@ -74,15 +79,11 @@ struct kmem_cache *ecryptfs_dentry_info_cache;
74 */ 79 */
75static void ecryptfs_d_release(struct dentry *dentry) 80static void ecryptfs_d_release(struct dentry *dentry)
76{ 81{
77 if (ecryptfs_dentry_to_private(dentry)) { 82 struct ecryptfs_dentry_info *p = dentry->d_fsdata;
78 if (ecryptfs_dentry_to_lower(dentry)) { 83 if (p) {
79 dput(ecryptfs_dentry_to_lower(dentry)); 84 path_put(&p->lower_path);
80 mntput(ecryptfs_dentry_to_lower_mnt(dentry)); 85 call_rcu(&p->rcu, ecryptfs_dentry_free_rcu);
81 }
82 kmem_cache_free(ecryptfs_dentry_info_cache,
83 ecryptfs_dentry_to_private(dentry));
84 } 86 }
85 return;
86} 87}
87 88
88const struct dentry_operations ecryptfs_dops = { 89const struct dentry_operations ecryptfs_dops = {
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index df19d34a033b..90d1882b306f 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -261,7 +261,10 @@ struct ecryptfs_inode_info {
261 * vfsmount too. */ 261 * vfsmount too. */
262struct ecryptfs_dentry_info { 262struct ecryptfs_dentry_info {
263 struct path lower_path; 263 struct path lower_path;
264 struct ecryptfs_crypt_stat *crypt_stat; 264 union {
265 struct ecryptfs_crypt_stat *crypt_stat;
266 struct rcu_head rcu;
267 };
265}; 268};
266 269
267/** 270/**
@@ -512,13 +515,6 @@ ecryptfs_dentry_to_lower(struct dentry *dentry)
512 return ((struct ecryptfs_dentry_info *)dentry->d_fsdata)->lower_path.dentry; 515 return ((struct ecryptfs_dentry_info *)dentry->d_fsdata)->lower_path.dentry;
513} 516}
514 517
515static inline void
516ecryptfs_set_dentry_lower(struct dentry *dentry, struct dentry *lower_dentry)
517{
518 ((struct ecryptfs_dentry_info *)dentry->d_fsdata)->lower_path.dentry =
519 lower_dentry;
520}
521
522static inline struct vfsmount * 518static inline struct vfsmount *
523ecryptfs_dentry_to_lower_mnt(struct dentry *dentry) 519ecryptfs_dentry_to_lower_mnt(struct dentry *dentry)
524{ 520{
@@ -531,13 +527,6 @@ ecryptfs_dentry_to_lower_path(struct dentry *dentry)
531 return &((struct ecryptfs_dentry_info *)dentry->d_fsdata)->lower_path; 527 return &((struct ecryptfs_dentry_info *)dentry->d_fsdata)->lower_path;
532} 528}
533 529
534static inline void
535ecryptfs_set_dentry_lower_mnt(struct dentry *dentry, struct vfsmount *lower_mnt)
536{
537 ((struct ecryptfs_dentry_info *)dentry->d_fsdata)->lower_path.mnt =
538 lower_mnt;
539}
540
541#define ecryptfs_printk(type, fmt, arg...) \ 530#define ecryptfs_printk(type, fmt, arg...) \
542 __ecryptfs_printk(type "%s: " fmt, __func__, ## arg); 531 __ecryptfs_printk(type "%s: " fmt, __func__, ## arg);
543__printf(1, 2) 532__printf(1, 2)
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 992cf95830b5..2229a74aeeed 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -271,7 +271,7 @@ static int ecryptfs_flush(struct file *file, fl_owner_t td)
271{ 271{
272 struct file *lower_file = ecryptfs_file_to_lower(file); 272 struct file *lower_file = ecryptfs_file_to_lower(file);
273 273
274 if (lower_file->f_op && lower_file->f_op->flush) { 274 if (lower_file->f_op->flush) {
275 filemap_write_and_wait(file->f_mapping); 275 filemap_write_and_wait(file->f_mapping);
276 return lower_file->f_op->flush(lower_file, td); 276 return lower_file->f_op->flush(lower_file, td);
277 } 277 }
@@ -305,7 +305,7 @@ static int ecryptfs_fasync(int fd, struct file *file, int flag)
305 struct file *lower_file = NULL; 305 struct file *lower_file = NULL;
306 306
307 lower_file = ecryptfs_file_to_lower(file); 307 lower_file = ecryptfs_file_to_lower(file);
308 if (lower_file->f_op && lower_file->f_op->fasync) 308 if (lower_file->f_op->fasync)
309 rc = lower_file->f_op->fasync(fd, lower_file, flag); 309 rc = lower_file->f_op->fasync(fd, lower_file, flag);
310 return rc; 310 return rc;
311} 311}
@@ -318,7 +318,7 @@ ecryptfs_unlocked_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
318 318
319 if (ecryptfs_file_to_private(file)) 319 if (ecryptfs_file_to_private(file))
320 lower_file = ecryptfs_file_to_lower(file); 320 lower_file = ecryptfs_file_to_lower(file);
321 if (lower_file && lower_file->f_op && lower_file->f_op->unlocked_ioctl) 321 if (lower_file->f_op->unlocked_ioctl)
322 rc = lower_file->f_op->unlocked_ioctl(lower_file, cmd, arg); 322 rc = lower_file->f_op->unlocked_ioctl(lower_file, cmd, arg);
323 return rc; 323 return rc;
324} 324}
@@ -332,7 +332,7 @@ ecryptfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
332 332
333 if (ecryptfs_file_to_private(file)) 333 if (ecryptfs_file_to_private(file))
334 lower_file = ecryptfs_file_to_lower(file); 334 lower_file = ecryptfs_file_to_lower(file);
335 if (lower_file && lower_file->f_op && lower_file->f_op->compat_ioctl) 335 if (lower_file->f_op && lower_file->f_op->compat_ioctl)
336 rc = lower_file->f_op->compat_ioctl(lower_file, cmd, arg); 336 rc = lower_file->f_op->compat_ioctl(lower_file, cmd, arg);
337 return rc; 337 return rc;
338} 338}
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 67e9b6339691..c36c44824471 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -153,7 +153,7 @@ static int ecryptfs_do_unlink(struct inode *dir, struct dentry *dentry,
153 153
154 dget(lower_dentry); 154 dget(lower_dentry);
155 lower_dir_dentry = lock_parent(lower_dentry); 155 lower_dir_dentry = lock_parent(lower_dentry);
156 rc = vfs_unlink(lower_dir_inode, lower_dentry); 156 rc = vfs_unlink(lower_dir_inode, lower_dentry, NULL);
157 if (rc) { 157 if (rc) {
158 printk(KERN_ERR "Error in vfs_unlink; rc = [%d]\n", rc); 158 printk(KERN_ERR "Error in vfs_unlink; rc = [%d]\n", rc);
159 goto out_unlock; 159 goto out_unlock;
@@ -208,7 +208,7 @@ ecryptfs_do_create(struct inode *directory_inode,
208 inode = __ecryptfs_get_inode(lower_dentry->d_inode, 208 inode = __ecryptfs_get_inode(lower_dentry->d_inode,
209 directory_inode->i_sb); 209 directory_inode->i_sb);
210 if (IS_ERR(inode)) { 210 if (IS_ERR(inode)) {
211 vfs_unlink(lower_dir_dentry->d_inode, lower_dentry); 211 vfs_unlink(lower_dir_dentry->d_inode, lower_dentry, NULL);
212 goto out_lock; 212 goto out_lock;
213 } 213 }
214 fsstack_copy_attr_times(directory_inode, lower_dir_dentry->d_inode); 214 fsstack_copy_attr_times(directory_inode, lower_dir_dentry->d_inode);
@@ -361,8 +361,8 @@ static int ecryptfs_lookup_interpose(struct dentry *dentry,
361 BUG_ON(!d_count(lower_dentry)); 361 BUG_ON(!d_count(lower_dentry));
362 362
363 ecryptfs_set_dentry_private(dentry, dentry_info); 363 ecryptfs_set_dentry_private(dentry, dentry_info);
364 ecryptfs_set_dentry_lower(dentry, lower_dentry); 364 dentry_info->lower_path.mnt = lower_mnt;
365 ecryptfs_set_dentry_lower_mnt(dentry, lower_mnt); 365 dentry_info->lower_path.dentry = lower_dentry;
366 366
367 if (!lower_dentry->d_inode) { 367 if (!lower_dentry->d_inode) {
368 /* We want to add because we couldn't find in lower */ 368 /* We want to add because we couldn't find in lower */
@@ -475,7 +475,7 @@ static int ecryptfs_link(struct dentry *old_dentry, struct inode *dir,
475 dget(lower_new_dentry); 475 dget(lower_new_dentry);
476 lower_dir_dentry = lock_parent(lower_new_dentry); 476 lower_dir_dentry = lock_parent(lower_new_dentry);
477 rc = vfs_link(lower_old_dentry, lower_dir_dentry->d_inode, 477 rc = vfs_link(lower_old_dentry, lower_dir_dentry->d_inode,
478 lower_new_dentry); 478 lower_new_dentry, NULL);
479 if (rc || !lower_new_dentry->d_inode) 479 if (rc || !lower_new_dentry->d_inode)
480 goto out_lock; 480 goto out_lock;
481 rc = ecryptfs_interpose(lower_new_dentry, new_dentry, dir->i_sb); 481 rc = ecryptfs_interpose(lower_new_dentry, new_dentry, dir->i_sb);
@@ -640,7 +640,8 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry,
640 goto out_lock; 640 goto out_lock;
641 } 641 }
642 rc = vfs_rename(lower_old_dir_dentry->d_inode, lower_old_dentry, 642 rc = vfs_rename(lower_old_dir_dentry->d_inode, lower_old_dentry,
643 lower_new_dir_dentry->d_inode, lower_new_dentry); 643 lower_new_dir_dentry->d_inode, lower_new_dentry,
644 NULL);
644 if (rc) 645 if (rc)
645 goto out_lock; 646 goto out_lock;
646 if (target_inode) 647 if (target_inode)
@@ -703,16 +704,6 @@ out:
703 return NULL; 704 return NULL;
704} 705}
705 706
706static void
707ecryptfs_put_link(struct dentry *dentry, struct nameidata *nd, void *ptr)
708{
709 char *buf = nd_get_link(nd);
710 if (!IS_ERR(buf)) {
711 /* Free the char* */
712 kfree(buf);
713 }
714}
715
716/** 707/**
717 * upper_size_to_lower_size 708 * upper_size_to_lower_size
718 * @crypt_stat: Crypt_stat associated with file 709 * @crypt_stat: Crypt_stat associated with file
@@ -891,7 +882,7 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
891 struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry); 882 struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
892 883
893 mutex_lock(&lower_dentry->d_inode->i_mutex); 884 mutex_lock(&lower_dentry->d_inode->i_mutex);
894 rc = notify_change(lower_dentry, &lower_ia); 885 rc = notify_change(lower_dentry, &lower_ia, NULL);
895 mutex_unlock(&lower_dentry->d_inode->i_mutex); 886 mutex_unlock(&lower_dentry->d_inode->i_mutex);
896 } 887 }
897 return rc; 888 return rc;
@@ -992,7 +983,7 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
992 lower_ia.ia_valid &= ~ATTR_MODE; 983 lower_ia.ia_valid &= ~ATTR_MODE;
993 984
994 mutex_lock(&lower_dentry->d_inode->i_mutex); 985 mutex_lock(&lower_dentry->d_inode->i_mutex);
995 rc = notify_change(lower_dentry, &lower_ia); 986 rc = notify_change(lower_dentry, &lower_ia, NULL);
996 mutex_unlock(&lower_dentry->d_inode->i_mutex); 987 mutex_unlock(&lower_dentry->d_inode->i_mutex);
997out: 988out:
998 fsstack_copy_attr_all(inode, lower_inode); 989 fsstack_copy_attr_all(inode, lower_inode);
@@ -1121,7 +1112,7 @@ out:
1121const struct inode_operations ecryptfs_symlink_iops = { 1112const struct inode_operations ecryptfs_symlink_iops = {
1122 .readlink = generic_readlink, 1113 .readlink = generic_readlink,
1123 .follow_link = ecryptfs_follow_link, 1114 .follow_link = ecryptfs_follow_link,
1124 .put_link = ecryptfs_put_link, 1115 .put_link = kfree_put_link,
1125 .permission = ecryptfs_permission, 1116 .permission = ecryptfs_permission,
1126 .setattr = ecryptfs_setattr, 1117 .setattr = ecryptfs_setattr,
1127 .getattr = ecryptfs_getattr_link, 1118 .getattr = ecryptfs_getattr_link,
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index eb1c5979ecaf..1b119d3bf924 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -585,8 +585,7 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
585 585
586 /* ->kill_sb() will take care of root_info */ 586 /* ->kill_sb() will take care of root_info */
587 ecryptfs_set_dentry_private(s->s_root, root_info); 587 ecryptfs_set_dentry_private(s->s_root, root_info);
588 ecryptfs_set_dentry_lower(s->s_root, path.dentry); 588 root_info->lower_path = path;
589 ecryptfs_set_dentry_lower_mnt(s->s_root, path.mnt);
590 589
591 s->s_flags |= MS_ACTIVE; 590 s->s_flags |= MS_ACTIVE;
592 return dget(s->s_root); 591 return dget(s->s_root);
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index f7fe7e3ce664..79b65c3b9e87 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1848,7 +1848,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
1848 1848
1849 /* The target file descriptor must support poll */ 1849 /* The target file descriptor must support poll */
1850 error = -EPERM; 1850 error = -EPERM;
1851 if (!tf.file->f_op || !tf.file->f_op->poll) 1851 if (!tf.file->f_op->poll)
1852 goto error_tgt_fput; 1852 goto error_tgt_fput;
1853 1853
1854 /* Check if EPOLLWAKEUP is allowed */ 1854 /* Check if EPOLLWAKEUP is allowed */
diff --git a/fs/exec.c b/fs/exec.c
index 12120620f040..977319fd77f3 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -106,6 +106,7 @@ static inline void put_binfmt(struct linux_binfmt * fmt)
106 */ 106 */
107SYSCALL_DEFINE1(uselib, const char __user *, library) 107SYSCALL_DEFINE1(uselib, const char __user *, library)
108{ 108{
109 struct linux_binfmt *fmt;
109 struct file *file; 110 struct file *file;
110 struct filename *tmp = getname(library); 111 struct filename *tmp = getname(library);
111 int error = PTR_ERR(tmp); 112 int error = PTR_ERR(tmp);
@@ -136,24 +137,21 @@ SYSCALL_DEFINE1(uselib, const char __user *, library)
136 fsnotify_open(file); 137 fsnotify_open(file);
137 138
138 error = -ENOEXEC; 139 error = -ENOEXEC;
139 if(file->f_op) {
140 struct linux_binfmt * fmt;
141 140
142 read_lock(&binfmt_lock); 141 read_lock(&binfmt_lock);
143 list_for_each_entry(fmt, &formats, lh) { 142 list_for_each_entry(fmt, &formats, lh) {
144 if (!fmt->load_shlib) 143 if (!fmt->load_shlib)
145 continue; 144 continue;
146 if (!try_module_get(fmt->module)) 145 if (!try_module_get(fmt->module))
147 continue; 146 continue;
148 read_unlock(&binfmt_lock);
149 error = fmt->load_shlib(file);
150 read_lock(&binfmt_lock);
151 put_binfmt(fmt);
152 if (error != -ENOEXEC)
153 break;
154 }
155 read_unlock(&binfmt_lock); 147 read_unlock(&binfmt_lock);
148 error = fmt->load_shlib(file);
149 read_lock(&binfmt_lock);
150 put_binfmt(fmt);
151 if (error != -ENOEXEC)
152 break;
156 } 153 }
154 read_unlock(&binfmt_lock);
157exit: 155exit:
158 fput(file); 156 fput(file);
159out: 157out:
@@ -1277,13 +1275,10 @@ static int check_unsafe_exec(struct linux_binprm *bprm)
1277 */ 1275 */
1278int prepare_binprm(struct linux_binprm *bprm) 1276int prepare_binprm(struct linux_binprm *bprm)
1279{ 1277{
1280 umode_t mode; 1278 struct inode *inode = file_inode(bprm->file);
1281 struct inode * inode = file_inode(bprm->file); 1279 umode_t mode = inode->i_mode;
1282 int retval; 1280 int retval;
1283 1281
1284 mode = inode->i_mode;
1285 if (bprm->file->f_op == NULL)
1286 return -EACCES;
1287 1282
1288 /* clear any previous set[ug]id data from a previous binary */ 1283 /* clear any previous set[ug]id data from a previous binary */
1289 bprm->cred->euid = current_euid(); 1284 bprm->cred->euid = current_euid();
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index a235f0016889..48a359dd286e 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -69,145 +69,162 @@ find_acceptable_alias(struct dentry *result,
69 return NULL; 69 return NULL;
70} 70}
71 71
72/* 72static bool dentry_connected(struct dentry *dentry)
73 * Find root of a disconnected subtree and return a reference to it.
74 */
75static struct dentry *
76find_disconnected_root(struct dentry *dentry)
77{ 73{
78 dget(dentry); 74 dget(dentry);
79 while (!IS_ROOT(dentry)) { 75 while (dentry->d_flags & DCACHE_DISCONNECTED) {
80 struct dentry *parent = dget_parent(dentry); 76 struct dentry *parent = dget_parent(dentry);
81 77
82 if (!(parent->d_flags & DCACHE_DISCONNECTED)) { 78 dput(dentry);
79 if (IS_ROOT(dentry)) {
83 dput(parent); 80 dput(parent);
84 break; 81 return false;
85 } 82 }
83 dentry = parent;
84 }
85 dput(dentry);
86 return true;
87}
88
89static void clear_disconnected(struct dentry *dentry)
90{
91 dget(dentry);
92 while (dentry->d_flags & DCACHE_DISCONNECTED) {
93 struct dentry *parent = dget_parent(dentry);
94
95 WARN_ON_ONCE(IS_ROOT(dentry));
96
97 spin_lock(&dentry->d_lock);
98 dentry->d_flags &= ~DCACHE_DISCONNECTED;
99 spin_unlock(&dentry->d_lock);
86 100
87 dput(dentry); 101 dput(dentry);
88 dentry = parent; 102 dentry = parent;
89 } 103 }
90 return dentry; 104 dput(dentry);
105}
106
107/*
108 * Reconnect a directory dentry with its parent.
109 *
110 * This can return a dentry, or NULL, or an error.
111 *
112 * In the first case the returned dentry is the parent of the given
113 * dentry, and may itself need to be reconnected to its parent.
114 *
115 * In the NULL case, a concurrent VFS operation has either renamed or
116 * removed this directory. The concurrent operation has reconnected our
117 * dentry, so we no longer need to.
118 */
119static struct dentry *reconnect_one(struct vfsmount *mnt,
120 struct dentry *dentry, char *nbuf)
121{
122 struct dentry *parent;
123 struct dentry *tmp;
124 int err;
125
126 parent = ERR_PTR(-EACCES);
127 mutex_lock(&dentry->d_inode->i_mutex);
128 if (mnt->mnt_sb->s_export_op->get_parent)
129 parent = mnt->mnt_sb->s_export_op->get_parent(dentry);
130 mutex_unlock(&dentry->d_inode->i_mutex);
131
132 if (IS_ERR(parent)) {
133 dprintk("%s: get_parent of %ld failed, err %d\n",
134 __func__, dentry->d_inode->i_ino, PTR_ERR(parent));
135 return parent;
136 }
137
138 dprintk("%s: find name of %lu in %lu\n", __func__,
139 dentry->d_inode->i_ino, parent->d_inode->i_ino);
140 err = exportfs_get_name(mnt, parent, nbuf, dentry);
141 if (err == -ENOENT)
142 goto out_reconnected;
143 if (err)
144 goto out_err;
145 dprintk("%s: found name: %s\n", __func__, nbuf);
146 mutex_lock(&parent->d_inode->i_mutex);
147 tmp = lookup_one_len(nbuf, parent, strlen(nbuf));
148 mutex_unlock(&parent->d_inode->i_mutex);
149 if (IS_ERR(tmp)) {
150 dprintk("%s: lookup failed: %d\n", __func__, PTR_ERR(tmp));
151 goto out_err;
152 }
153 if (tmp != dentry) {
154 dput(tmp);
155 goto out_reconnected;
156 }
157 dput(tmp);
158 if (IS_ROOT(dentry)) {
159 err = -ESTALE;
160 goto out_err;
161 }
162 return parent;
163
164out_err:
165 dput(parent);
166 return ERR_PTR(err);
167out_reconnected:
168 dput(parent);
169 /*
170 * Someone must have renamed our entry into another parent, in
171 * which case it has been reconnected by the rename.
172 *
173 * Or someone removed it entirely, in which case filehandle
174 * lookup will succeed but the directory is now IS_DEAD and
175 * subsequent operations on it will fail.
176 *
177 * Alternatively, maybe there was no race at all, and the
178 * filesystem is just corrupt and gave us a parent that doesn't
179 * actually contain any entry pointing to this inode. So,
180 * double check that this worked and return -ESTALE if not:
181 */
182 if (!dentry_connected(dentry))
183 return ERR_PTR(-ESTALE);
184 return NULL;
91} 185}
92 186
93/* 187/*
94 * Make sure target_dir is fully connected to the dentry tree. 188 * Make sure target_dir is fully connected to the dentry tree.
95 * 189 *
96 * It may already be, as the flag isn't always updated when connection happens. 190 * On successful return, DCACHE_DISCONNECTED will be cleared on
191 * target_dir, and target_dir->d_parent->...->d_parent will reach the
192 * root of the filesystem.
193 *
194 * Whenever DCACHE_DISCONNECTED is unset, target_dir is fully connected.
195 * But the converse is not true: target_dir may have DCACHE_DISCONNECTED
196 * set but already be connected. In that case we'll verify the
197 * connection to root and then clear the flag.
198 *
199 * Note that target_dir could be removed by a concurrent operation. In
200 * that case reconnect_path may still succeed with target_dir fully
201 * connected, but further operations using the filehandle will fail when
202 * necessary (due to S_DEAD being set on the directory).
97 */ 203 */
98static int 204static int
99reconnect_path(struct vfsmount *mnt, struct dentry *target_dir, char *nbuf) 205reconnect_path(struct vfsmount *mnt, struct dentry *target_dir, char *nbuf)
100{ 206{
101 int noprogress = 0; 207 struct dentry *dentry, *parent;
102 int err = -ESTALE;
103 208
104 /* 209 dentry = dget(target_dir);
105 * It is possible that a confused file system might not let us complete
106 * the path to the root. For example, if get_parent returns a directory
107 * in which we cannot find a name for the child. While this implies a
108 * very sick filesystem we don't want it to cause knfsd to spin. Hence
109 * the noprogress counter. If we go through the loop 10 times (2 is
110 * probably enough) without getting anywhere, we just give up
111 */
112 while (target_dir->d_flags & DCACHE_DISCONNECTED && noprogress++ < 10) {
113 struct dentry *pd = find_disconnected_root(target_dir);
114
115 if (!IS_ROOT(pd)) {
116 /* must have found a connected parent - great */
117 spin_lock(&pd->d_lock);
118 pd->d_flags &= ~DCACHE_DISCONNECTED;
119 spin_unlock(&pd->d_lock);
120 noprogress = 0;
121 } else if (pd == mnt->mnt_sb->s_root) {
122 printk(KERN_ERR "export: Eeek filesystem root is not connected, impossible\n");
123 spin_lock(&pd->d_lock);
124 pd->d_flags &= ~DCACHE_DISCONNECTED;
125 spin_unlock(&pd->d_lock);
126 noprogress = 0;
127 } else {
128 /*
129 * We have hit the top of a disconnected path, try to
130 * find parent and connect.
131 *
132 * Racing with some other process renaming a directory
133 * isn't much of a problem here. If someone renames
134 * the directory, it will end up properly connected,
135 * which is what we want
136 *
137 * Getting the parent can't be supported generically,
138 * the locking is too icky.
139 *
140 * Instead we just return EACCES. If server reboots
141 * or inodes get flushed, you lose
142 */
143 struct dentry *ppd = ERR_PTR(-EACCES);
144 struct dentry *npd;
145
146 mutex_lock(&pd->d_inode->i_mutex);
147 if (mnt->mnt_sb->s_export_op->get_parent)
148 ppd = mnt->mnt_sb->s_export_op->get_parent(pd);
149 mutex_unlock(&pd->d_inode->i_mutex);
150
151 if (IS_ERR(ppd)) {
152 err = PTR_ERR(ppd);
153 dprintk("%s: get_parent of %ld failed, err %d\n",
154 __func__, pd->d_inode->i_ino, err);
155 dput(pd);
156 break;
157 }
158 210
159 dprintk("%s: find name of %lu in %lu\n", __func__, 211 while (dentry->d_flags & DCACHE_DISCONNECTED) {
160 pd->d_inode->i_ino, ppd->d_inode->i_ino); 212 BUG_ON(dentry == mnt->mnt_sb->s_root);
161 err = exportfs_get_name(mnt, ppd, nbuf, pd);
162 if (err) {
163 dput(ppd);
164 dput(pd);
165 if (err == -ENOENT)
166 /* some race between get_parent and
167 * get_name? just try again
168 */
169 continue;
170 break;
171 }
172 dprintk("%s: found name: %s\n", __func__, nbuf);
173 mutex_lock(&ppd->d_inode->i_mutex);
174 npd = lookup_one_len(nbuf, ppd, strlen(nbuf));
175 mutex_unlock(&ppd->d_inode->i_mutex);
176 if (IS_ERR(npd)) {
177 err = PTR_ERR(npd);
178 dprintk("%s: lookup failed: %d\n",
179 __func__, err);
180 dput(ppd);
181 dput(pd);
182 break;
183 }
184 /* we didn't really want npd, we really wanted
185 * a side-effect of the lookup.
186 * hopefully, npd == pd, though it isn't really
187 * a problem if it isn't
188 */
189 if (npd == pd)
190 noprogress = 0;
191 else
192 printk("%s: npd != pd\n", __func__);
193 dput(npd);
194 dput(ppd);
195 if (IS_ROOT(pd)) {
196 /* something went wrong, we have to give up */
197 dput(pd);
198 break;
199 }
200 }
201 dput(pd);
202 }
203 213
204 if (target_dir->d_flags & DCACHE_DISCONNECTED) { 214 if (IS_ROOT(dentry))
205 /* something went wrong - oh-well */ 215 parent = reconnect_one(mnt, dentry, nbuf);
206 if (!err) 216 else
207 err = -ESTALE; 217 parent = dget_parent(dentry);
208 return err;
209 }
210 218
219 if (!parent)
220 break;
221 dput(dentry);
222 if (IS_ERR(parent))
223 return PTR_ERR(parent);
224 dentry = parent;
225 }
226 dput(dentry);
227 clear_disconnected(target_dir);
211 return 0; 228 return 0;
212} 229}
213 230
@@ -215,7 +232,7 @@ struct getdents_callback {
215 struct dir_context ctx; 232 struct dir_context ctx;
216 char *name; /* name that was found. It already points to a 233 char *name; /* name that was found. It already points to a
217 buffer NAME_MAX+1 is size */ 234 buffer NAME_MAX+1 is size */
218 unsigned long ino; /* the inum we are looking for */ 235 u64 ino; /* the inum we are looking for */
219 int found; /* inode matched? */ 236 int found; /* inode matched? */
220 int sequence; /* sequence counter */ 237 int sequence; /* sequence counter */
221}; 238};
@@ -255,10 +272,14 @@ static int get_name(const struct path *path, char *name, struct dentry *child)
255 struct inode *dir = path->dentry->d_inode; 272 struct inode *dir = path->dentry->d_inode;
256 int error; 273 int error;
257 struct file *file; 274 struct file *file;
275 struct kstat stat;
276 struct path child_path = {
277 .mnt = path->mnt,
278 .dentry = child,
279 };
258 struct getdents_callback buffer = { 280 struct getdents_callback buffer = {
259 .ctx.actor = filldir_one, 281 .ctx.actor = filldir_one,
260 .name = name, 282 .name = name,
261 .ino = child->d_inode->i_ino
262 }; 283 };
263 284
264 error = -ENOTDIR; 285 error = -ENOTDIR;
@@ -268,6 +289,16 @@ static int get_name(const struct path *path, char *name, struct dentry *child)
268 if (!dir->i_fop) 289 if (!dir->i_fop)
269 goto out; 290 goto out;
270 /* 291 /*
292 * inode->i_ino is unsigned long, kstat->ino is u64, so the
293 * former would be insufficient on 32-bit hosts when the
294 * filesystem supports 64-bit inode numbers. So we need to
295 * actually call ->getattr, not just read i_ino:
296 */
297 error = vfs_getattr_nosec(&child_path, &stat);
298 if (error)
299 return error;
300 buffer.ino = stat.ino;
301 /*
271 * Open the directory ... 302 * Open the directory ...
272 */ 303 */
273 file = dentry_open(path, O_RDONLY, cred); 304 file = dentry_open(path, O_RDONLY, cred);
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index c260de6d7b6d..8a337640a46a 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -632,6 +632,8 @@ static int ext2_get_blocks(struct inode *inode,
632 int count = 0; 632 int count = 0;
633 ext2_fsblk_t first_block = 0; 633 ext2_fsblk_t first_block = 0;
634 634
635 BUG_ON(maxblocks == 0);
636
635 depth = ext2_block_to_path(inode,iblock,offsets,&blocks_to_boundary); 637 depth = ext2_block_to_path(inode,iblock,offsets,&blocks_to_boundary);
636 638
637 if (depth == 0) 639 if (depth == 0)
diff --git a/fs/ext2/xip.c b/fs/ext2/xip.c
index 1c3312858fcf..e98171a11cfe 100644
--- a/fs/ext2/xip.c
+++ b/fs/ext2/xip.c
@@ -35,6 +35,7 @@ __ext2_get_block(struct inode *inode, pgoff_t pgoff, int create,
35 int rc; 35 int rc;
36 36
37 memset(&tmp, 0, sizeof(struct buffer_head)); 37 memset(&tmp, 0, sizeof(struct buffer_head));
38 tmp.b_size = 1 << inode->i_blkbits;
38 rc = ext2_get_block(inode, pgoff, &tmp, create); 39 rc = ext2_get_block(inode, pgoff, &tmp, create);
39 *result = tmp.b_blocknr; 40 *result = tmp.b_blocknr;
40 41
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index c50c76190373..37fd31ed16e7 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -2825,6 +2825,10 @@ static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf)
2825 * bitmap, and an inode table. 2825 * bitmap, and an inode table.
2826 */ 2826 */
2827 overhead += ngroups * (2 + sbi->s_itb_per_group); 2827 overhead += ngroups * (2 + sbi->s_itb_per_group);
2828
2829 /* Add the journal blocks as well */
2830 overhead += sbi->s_journal->j_maxlen;
2831
2828 sbi->s_overhead_last = overhead; 2832 sbi->s_overhead_last = overhead;
2829 smp_wmb(); 2833 smp_wmb();
2830 sbi->s_blocks_last = le32_to_cpu(es->s_blocks_count); 2834 sbi->s_blocks_last = le32_to_cpu(es->s_blocks_count);
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index af815ea9d7cc..d01d62315f7e 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2734,8 +2734,6 @@ extern void ext4_double_down_write_data_sem(struct inode *first,
2734 struct inode *second); 2734 struct inode *second);
2735extern void ext4_double_up_write_data_sem(struct inode *orig_inode, 2735extern void ext4_double_up_write_data_sem(struct inode *orig_inode,
2736 struct inode *donor_inode); 2736 struct inode *donor_inode);
2737void ext4_inode_double_lock(struct inode *inode1, struct inode *inode2);
2738void ext4_inode_double_unlock(struct inode *inode1, struct inode *inode2);
2739extern int ext4_move_extents(struct file *o_filp, struct file *d_filp, 2737extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,
2740 __u64 start_orig, __u64 start_donor, 2738 __u64 start_orig, __u64 start_donor,
2741 __u64 len, __u64 *moved_len); 2739 __u64 len, __u64 *moved_len);
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index a569d335f804..60589b60e9b0 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -130,7 +130,7 @@ static long swap_inode_boot_loader(struct super_block *sb,
130 130
131 /* Protect orig inodes against a truncate and make sure, 131 /* Protect orig inodes against a truncate and make sure,
132 * that only 1 swap_inode_boot_loader is running. */ 132 * that only 1 swap_inode_boot_loader is running. */
133 ext4_inode_double_lock(inode, inode_bl); 133 lock_two_nondirectories(inode, inode_bl);
134 134
135 truncate_inode_pages(&inode->i_data, 0); 135 truncate_inode_pages(&inode->i_data, 0);
136 truncate_inode_pages(&inode_bl->i_data, 0); 136 truncate_inode_pages(&inode_bl->i_data, 0);
@@ -205,7 +205,7 @@ static long swap_inode_boot_loader(struct super_block *sb,
205 ext4_inode_resume_unlocked_dio(inode); 205 ext4_inode_resume_unlocked_dio(inode);
206 ext4_inode_resume_unlocked_dio(inode_bl); 206 ext4_inode_resume_unlocked_dio(inode_bl);
207 207
208 ext4_inode_double_unlock(inode, inode_bl); 208 unlock_two_nondirectories(inode, inode_bl);
209 209
210 iput(inode_bl); 210 iput(inode_bl);
211 211
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 7fa4d855dbd5..773b503bd18c 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -1203,42 +1203,6 @@ mext_check_arguments(struct inode *orig_inode,
1203} 1203}
1204 1204
1205/** 1205/**
1206 * ext4_inode_double_lock - Lock i_mutex on both @inode1 and @inode2
1207 *
1208 * @inode1: the inode structure
1209 * @inode2: the inode structure
1210 *
1211 * Lock two inodes' i_mutex
1212 */
1213void
1214ext4_inode_double_lock(struct inode *inode1, struct inode *inode2)
1215{
1216 BUG_ON(inode1 == inode2);
1217 if (inode1 < inode2) {
1218 mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT);
1219 mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD);
1220 } else {
1221 mutex_lock_nested(&inode2->i_mutex, I_MUTEX_PARENT);
1222 mutex_lock_nested(&inode1->i_mutex, I_MUTEX_CHILD);
1223 }
1224}
1225
1226/**
1227 * ext4_inode_double_unlock - Release i_mutex on both @inode1 and @inode2
1228 *
1229 * @inode1: the inode that is released first
1230 * @inode2: the inode that is released second
1231 *
1232 */
1233
1234void
1235ext4_inode_double_unlock(struct inode *inode1, struct inode *inode2)
1236{
1237 mutex_unlock(&inode1->i_mutex);
1238 mutex_unlock(&inode2->i_mutex);
1239}
1240
1241/**
1242 * ext4_move_extents - Exchange the specified range of a file 1206 * ext4_move_extents - Exchange the specified range of a file
1243 * 1207 *
1244 * @o_filp: file structure of the original file 1208 * @o_filp: file structure of the original file
@@ -1327,7 +1291,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
1327 return -EINVAL; 1291 return -EINVAL;
1328 } 1292 }
1329 /* Protect orig and donor inodes against a truncate */ 1293 /* Protect orig and donor inodes against a truncate */
1330 ext4_inode_double_lock(orig_inode, donor_inode); 1294 lock_two_nondirectories(orig_inode, donor_inode);
1331 1295
1332 /* Wait for all existing dio workers */ 1296 /* Wait for all existing dio workers */
1333 ext4_inode_block_unlocked_dio(orig_inode); 1297 ext4_inode_block_unlocked_dio(orig_inode);
@@ -1535,7 +1499,7 @@ out:
1535 ext4_double_up_write_data_sem(orig_inode, donor_inode); 1499 ext4_double_up_write_data_sem(orig_inode, donor_inode);
1536 ext4_inode_resume_unlocked_dio(orig_inode); 1500 ext4_inode_resume_unlocked_dio(orig_inode);
1537 ext4_inode_resume_unlocked_dio(donor_inode); 1501 ext4_inode_resume_unlocked_dio(donor_inode);
1538 ext4_inode_double_unlock(orig_inode, donor_inode); 1502 unlock_two_nondirectories(orig_inode, donor_inode);
1539 1503
1540 return ret; 1504 return ret;
1541} 1505}
diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig
index e06e0995e00f..214fe1054fce 100644
--- a/fs/f2fs/Kconfig
+++ b/fs/f2fs/Kconfig
@@ -63,3 +63,11 @@ config F2FS_FS_SECURITY
63 the extended attribute support in advance. 63 the extended attribute support in advance.
64 64
65 If you are not using a security module, say N. 65 If you are not using a security module, say N.
66
67config F2FS_CHECK_FS
68 bool "F2FS consistency checking feature"
69 depends on F2FS_FS
70 help
71 Enables BUG_ONs which check the file system consistency in runtime.
72
73 If you want to improve the performance, say N.
diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
index b7826ec1b470..d0fc287efeff 100644
--- a/fs/f2fs/acl.c
+++ b/fs/f2fs/acl.c
@@ -205,7 +205,8 @@ struct posix_acl *f2fs_get_acl(struct inode *inode, int type)
205 return acl; 205 return acl;
206} 206}
207 207
208static int f2fs_set_acl(struct inode *inode, int type, struct posix_acl *acl) 208static int f2fs_set_acl(struct inode *inode, int type,
209 struct posix_acl *acl, struct page *ipage)
209{ 210{
210 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 211 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
211 struct f2fs_inode_info *fi = F2FS_I(inode); 212 struct f2fs_inode_info *fi = F2FS_I(inode);
@@ -250,7 +251,7 @@ static int f2fs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
250 } 251 }
251 } 252 }
252 253
253 error = f2fs_setxattr(inode, name_index, "", value, size, NULL); 254 error = f2fs_setxattr(inode, name_index, "", value, size, ipage);
254 255
255 kfree(value); 256 kfree(value);
256 if (!error) 257 if (!error)
@@ -260,10 +261,10 @@ static int f2fs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
260 return error; 261 return error;
261} 262}
262 263
263int f2fs_init_acl(struct inode *inode, struct inode *dir) 264int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage)
264{ 265{
265 struct posix_acl *acl = NULL;
266 struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); 266 struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
267 struct posix_acl *acl = NULL;
267 int error = 0; 268 int error = 0;
268 269
269 if (!S_ISLNK(inode->i_mode)) { 270 if (!S_ISLNK(inode->i_mode)) {
@@ -276,19 +277,19 @@ int f2fs_init_acl(struct inode *inode, struct inode *dir)
276 inode->i_mode &= ~current_umask(); 277 inode->i_mode &= ~current_umask();
277 } 278 }
278 279
279 if (test_opt(sbi, POSIX_ACL) && acl) { 280 if (!test_opt(sbi, POSIX_ACL) || !acl)
281 goto cleanup;
280 282
281 if (S_ISDIR(inode->i_mode)) { 283 if (S_ISDIR(inode->i_mode)) {
282 error = f2fs_set_acl(inode, ACL_TYPE_DEFAULT, acl); 284 error = f2fs_set_acl(inode, ACL_TYPE_DEFAULT, acl, ipage);
283 if (error) 285 if (error)
284 goto cleanup; 286 goto cleanup;
285 }
286 error = posix_acl_create(&acl, GFP_KERNEL, &inode->i_mode);
287 if (error < 0)
288 return error;
289 if (error > 0)
290 error = f2fs_set_acl(inode, ACL_TYPE_ACCESS, acl);
291 } 287 }
288 error = posix_acl_create(&acl, GFP_KERNEL, &inode->i_mode);
289 if (error < 0)
290 return error;
291 if (error > 0)
292 error = f2fs_set_acl(inode, ACL_TYPE_ACCESS, acl, ipage);
292cleanup: 293cleanup:
293 posix_acl_release(acl); 294 posix_acl_release(acl);
294 return error; 295 return error;
@@ -313,7 +314,8 @@ int f2fs_acl_chmod(struct inode *inode)
313 error = posix_acl_chmod(&acl, GFP_KERNEL, mode); 314 error = posix_acl_chmod(&acl, GFP_KERNEL, mode);
314 if (error) 315 if (error)
315 return error; 316 return error;
316 error = f2fs_set_acl(inode, ACL_TYPE_ACCESS, acl); 317
318 error = f2fs_set_acl(inode, ACL_TYPE_ACCESS, acl, NULL);
317 posix_acl_release(acl); 319 posix_acl_release(acl);
318 return error; 320 return error;
319} 321}
@@ -388,7 +390,7 @@ static int f2fs_xattr_set_acl(struct dentry *dentry, const char *name,
388 acl = NULL; 390 acl = NULL;
389 } 391 }
390 392
391 error = f2fs_set_acl(inode, type, acl); 393 error = f2fs_set_acl(inode, type, acl, NULL);
392 394
393release_and_out: 395release_and_out:
394 posix_acl_release(acl); 396 posix_acl_release(acl);
diff --git a/fs/f2fs/acl.h b/fs/f2fs/acl.h
index 80f430674417..49633131e038 100644
--- a/fs/f2fs/acl.h
+++ b/fs/f2fs/acl.h
@@ -36,9 +36,9 @@ struct f2fs_acl_header {
36 36
37#ifdef CONFIG_F2FS_FS_POSIX_ACL 37#ifdef CONFIG_F2FS_FS_POSIX_ACL
38 38
39extern struct posix_acl *f2fs_get_acl(struct inode *inode, int type); 39extern struct posix_acl *f2fs_get_acl(struct inode *, int);
40extern int f2fs_acl_chmod(struct inode *inode); 40extern int f2fs_acl_chmod(struct inode *);
41extern int f2fs_init_acl(struct inode *inode, struct inode *dir); 41extern int f2fs_init_acl(struct inode *, struct inode *, struct page *);
42#else 42#else
43#define f2fs_check_acl NULL 43#define f2fs_check_acl NULL
44#define f2fs_get_acl NULL 44#define f2fs_get_acl NULL
@@ -49,7 +49,8 @@ static inline int f2fs_acl_chmod(struct inode *inode)
49 return 0; 49 return 0;
50} 50}
51 51
52static inline int f2fs_init_acl(struct inode *inode, struct inode *dir) 52static inline int f2fs_init_acl(struct inode *inode, struct inode *dir,
53 struct page *page)
53{ 54{
54 return 0; 55 return 0;
55} 56}
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index bb312201ca95..5716e5eb4e8e 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -81,7 +81,7 @@ static int f2fs_write_meta_page(struct page *page,
81 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 81 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
82 82
83 /* Should not write any meta pages, if any IO error was occurred */ 83 /* Should not write any meta pages, if any IO error was occurred */
84 if (wbc->for_reclaim || 84 if (wbc->for_reclaim || sbi->por_doing ||
85 is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ERROR_FLAG)) { 85 is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ERROR_FLAG)) {
86 dec_page_count(sbi, F2FS_DIRTY_META); 86 dec_page_count(sbi, F2FS_DIRTY_META);
87 wbc->pages_skipped++; 87 wbc->pages_skipped++;
@@ -142,8 +142,8 @@ long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
142 for (i = 0; i < nr_pages; i++) { 142 for (i = 0; i < nr_pages; i++) {
143 struct page *page = pvec.pages[i]; 143 struct page *page = pvec.pages[i];
144 lock_page(page); 144 lock_page(page);
145 BUG_ON(page->mapping != mapping); 145 f2fs_bug_on(page->mapping != mapping);
146 BUG_ON(!PageDirty(page)); 146 f2fs_bug_on(!PageDirty(page));
147 clear_page_dirty_for_io(page); 147 clear_page_dirty_for_io(page);
148 if (f2fs_write_meta_page(page, &wbc)) { 148 if (f2fs_write_meta_page(page, &wbc)) {
149 unlock_page(page); 149 unlock_page(page);
@@ -167,6 +167,8 @@ static int f2fs_set_meta_page_dirty(struct page *page)
167 struct address_space *mapping = page->mapping; 167 struct address_space *mapping = page->mapping;
168 struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); 168 struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
169 169
170 trace_f2fs_set_page_dirty(page, META);
171
170 SetPageUptodate(page); 172 SetPageUptodate(page);
171 if (!PageDirty(page)) { 173 if (!PageDirty(page)) {
172 __set_page_dirty_nobuffers(page); 174 __set_page_dirty_nobuffers(page);
@@ -206,6 +208,7 @@ int acquire_orphan_inode(struct f2fs_sb_info *sbi)
206void release_orphan_inode(struct f2fs_sb_info *sbi) 208void release_orphan_inode(struct f2fs_sb_info *sbi)
207{ 209{
208 mutex_lock(&sbi->orphan_inode_mutex); 210 mutex_lock(&sbi->orphan_inode_mutex);
211 f2fs_bug_on(sbi->n_orphans == 0);
209 sbi->n_orphans--; 212 sbi->n_orphans--;
210 mutex_unlock(&sbi->orphan_inode_mutex); 213 mutex_unlock(&sbi->orphan_inode_mutex);
211} 214}
@@ -225,12 +228,8 @@ void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
225 break; 228 break;
226 orphan = NULL; 229 orphan = NULL;
227 } 230 }
228retry: 231
229 new = kmem_cache_alloc(orphan_entry_slab, GFP_ATOMIC); 232 new = f2fs_kmem_cache_alloc(orphan_entry_slab, GFP_ATOMIC);
230 if (!new) {
231 cond_resched();
232 goto retry;
233 }
234 new->ino = ino; 233 new->ino = ino;
235 234
236 /* add new_oentry into list which is sorted by inode number */ 235 /* add new_oentry into list which is sorted by inode number */
@@ -253,6 +252,7 @@ void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
253 if (orphan->ino == ino) { 252 if (orphan->ino == ino) {
254 list_del(&orphan->list); 253 list_del(&orphan->list);
255 kmem_cache_free(orphan_entry_slab, orphan); 254 kmem_cache_free(orphan_entry_slab, orphan);
255 f2fs_bug_on(sbi->n_orphans == 0);
256 sbi->n_orphans--; 256 sbi->n_orphans--;
257 break; 257 break;
258 } 258 }
@@ -263,7 +263,7 @@ void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
263static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) 263static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
264{ 264{
265 struct inode *inode = f2fs_iget(sbi->sb, ino); 265 struct inode *inode = f2fs_iget(sbi->sb, ino);
266 BUG_ON(IS_ERR(inode)); 266 f2fs_bug_on(IS_ERR(inode));
267 clear_nlink(inode); 267 clear_nlink(inode);
268 268
269 /* truncate all the data during iput */ 269 /* truncate all the data during iput */
@@ -277,7 +277,7 @@ int recover_orphan_inodes(struct f2fs_sb_info *sbi)
277 if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG)) 277 if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG))
278 return 0; 278 return 0;
279 279
280 sbi->por_doing = 1; 280 sbi->por_doing = true;
281 start_blk = __start_cp_addr(sbi) + 1; 281 start_blk = __start_cp_addr(sbi) + 1;
282 orphan_blkaddr = __start_sum_addr(sbi) - 1; 282 orphan_blkaddr = __start_sum_addr(sbi) - 1;
283 283
@@ -294,7 +294,7 @@ int recover_orphan_inodes(struct f2fs_sb_info *sbi)
294 } 294 }
295 /* clear Orphan Flag */ 295 /* clear Orphan Flag */
296 clear_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG); 296 clear_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG);
297 sbi->por_doing = 0; 297 sbi->por_doing = false;
298 return 0; 298 return 0;
299} 299}
300 300
@@ -469,9 +469,7 @@ static int __add_dirty_inode(struct inode *inode, struct dir_inode_entry *new)
469 return -EEXIST; 469 return -EEXIST;
470 } 470 }
471 list_add_tail(&new->list, head); 471 list_add_tail(&new->list, head);
472#ifdef CONFIG_F2FS_STAT_FS 472 stat_inc_dirty_dir(sbi);
473 sbi->n_dirty_dirs++;
474#endif
475 return 0; 473 return 0;
476} 474}
477 475
@@ -482,12 +480,8 @@ void set_dirty_dir_page(struct inode *inode, struct page *page)
482 480
483 if (!S_ISDIR(inode->i_mode)) 481 if (!S_ISDIR(inode->i_mode))
484 return; 482 return;
485retry: 483
486 new = kmem_cache_alloc(inode_entry_slab, GFP_NOFS); 484 new = f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS);
487 if (!new) {
488 cond_resched();
489 goto retry;
490 }
491 new->inode = inode; 485 new->inode = inode;
492 INIT_LIST_HEAD(&new->list); 486 INIT_LIST_HEAD(&new->list);
493 487
@@ -504,13 +498,9 @@ retry:
504void add_dirty_dir_inode(struct inode *inode) 498void add_dirty_dir_inode(struct inode *inode)
505{ 499{
506 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 500 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
507 struct dir_inode_entry *new; 501 struct dir_inode_entry *new =
508retry: 502 f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS);
509 new = kmem_cache_alloc(inode_entry_slab, GFP_NOFS); 503
510 if (!new) {
511 cond_resched();
512 goto retry;
513 }
514 new->inode = inode; 504 new->inode = inode;
515 INIT_LIST_HEAD(&new->list); 505 INIT_LIST_HEAD(&new->list);
516 506
@@ -541,9 +531,7 @@ void remove_dirty_dir_inode(struct inode *inode)
541 if (entry->inode == inode) { 531 if (entry->inode == inode) {
542 list_del(&entry->list); 532 list_del(&entry->list);
543 kmem_cache_free(inode_entry_slab, entry); 533 kmem_cache_free(inode_entry_slab, entry);
544#ifdef CONFIG_F2FS_STAT_FS 534 stat_dec_dirty_dir(sbi);
545 sbi->n_dirty_dirs--;
546#endif
547 break; 535 break;
548 } 536 }
549 } 537 }
@@ -617,11 +605,10 @@ static void block_operations(struct f2fs_sb_info *sbi)
617 blk_start_plug(&plug); 605 blk_start_plug(&plug);
618 606
619retry_flush_dents: 607retry_flush_dents:
620 mutex_lock_all(sbi); 608 f2fs_lock_all(sbi);
621
622 /* write all the dirty dentry pages */ 609 /* write all the dirty dentry pages */
623 if (get_pages(sbi, F2FS_DIRTY_DENTS)) { 610 if (get_pages(sbi, F2FS_DIRTY_DENTS)) {
624 mutex_unlock_all(sbi); 611 f2fs_unlock_all(sbi);
625 sync_dirty_dir_inodes(sbi); 612 sync_dirty_dir_inodes(sbi);
626 goto retry_flush_dents; 613 goto retry_flush_dents;
627 } 614 }
@@ -644,7 +631,22 @@ retry_flush_nodes:
644static void unblock_operations(struct f2fs_sb_info *sbi) 631static void unblock_operations(struct f2fs_sb_info *sbi)
645{ 632{
646 mutex_unlock(&sbi->node_write); 633 mutex_unlock(&sbi->node_write);
647 mutex_unlock_all(sbi); 634 f2fs_unlock_all(sbi);
635}
636
637static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi)
638{
639 DEFINE_WAIT(wait);
640
641 for (;;) {
642 prepare_to_wait(&sbi->cp_wait, &wait, TASK_UNINTERRUPTIBLE);
643
644 if (!get_pages(sbi, F2FS_WRITEBACK))
645 break;
646
647 io_schedule();
648 }
649 finish_wait(&sbi->cp_wait, &wait);
648} 650}
649 651
650static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) 652static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
@@ -756,8 +758,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
756 f2fs_put_page(cp_page, 1); 758 f2fs_put_page(cp_page, 1);
757 759
758 /* wait for previous submitted node/meta pages writeback */ 760 /* wait for previous submitted node/meta pages writeback */
759 while (get_pages(sbi, F2FS_WRITEBACK)) 761 wait_on_all_pages_writeback(sbi);
760 congestion_wait(BLK_RW_ASYNC, HZ / 50);
761 762
762 filemap_fdatawait_range(sbi->node_inode->i_mapping, 0, LONG_MAX); 763 filemap_fdatawait_range(sbi->node_inode->i_mapping, 0, LONG_MAX);
763 filemap_fdatawait_range(sbi->meta_inode->i_mapping, 0, LONG_MAX); 764 filemap_fdatawait_range(sbi->meta_inode->i_mapping, 0, LONG_MAX);
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 941f9b9ca3a5..aa3438c571fa 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -68,9 +68,6 @@ static int check_extent_cache(struct inode *inode, pgoff_t pgofs,
68 struct buffer_head *bh_result) 68 struct buffer_head *bh_result)
69{ 69{
70 struct f2fs_inode_info *fi = F2FS_I(inode); 70 struct f2fs_inode_info *fi = F2FS_I(inode);
71#ifdef CONFIG_F2FS_STAT_FS
72 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
73#endif
74 pgoff_t start_fofs, end_fofs; 71 pgoff_t start_fofs, end_fofs;
75 block_t start_blkaddr; 72 block_t start_blkaddr;
76 73
@@ -80,9 +77,8 @@ static int check_extent_cache(struct inode *inode, pgoff_t pgofs,
80 return 0; 77 return 0;
81 } 78 }
82 79
83#ifdef CONFIG_F2FS_STAT_FS 80 stat_inc_total_hit(inode->i_sb);
84 sbi->total_hit_ext++; 81
85#endif
86 start_fofs = fi->ext.fofs; 82 start_fofs = fi->ext.fofs;
87 end_fofs = fi->ext.fofs + fi->ext.len - 1; 83 end_fofs = fi->ext.fofs + fi->ext.len - 1;
88 start_blkaddr = fi->ext.blk_addr; 84 start_blkaddr = fi->ext.blk_addr;
@@ -100,9 +96,7 @@ static int check_extent_cache(struct inode *inode, pgoff_t pgofs,
100 else 96 else
101 bh_result->b_size = UINT_MAX; 97 bh_result->b_size = UINT_MAX;
102 98
103#ifdef CONFIG_F2FS_STAT_FS 99 stat_inc_read_hit(inode->i_sb);
104 sbi->read_hit_ext++;
105#endif
106 read_unlock(&fi->ext.ext_lock); 100 read_unlock(&fi->ext.ext_lock);
107 return 1; 101 return 1;
108 } 102 }
@@ -116,7 +110,7 @@ void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn)
116 pgoff_t fofs, start_fofs, end_fofs; 110 pgoff_t fofs, start_fofs, end_fofs;
117 block_t start_blkaddr, end_blkaddr; 111 block_t start_blkaddr, end_blkaddr;
118 112
119 BUG_ON(blk_addr == NEW_ADDR); 113 f2fs_bug_on(blk_addr == NEW_ADDR);
120 fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) + 114 fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) +
121 dn->ofs_in_node; 115 dn->ofs_in_node;
122 116
@@ -442,7 +436,7 @@ static int get_data_block_ro(struct inode *inode, sector_t iblock,
442 } 436 }
443 437
444 /* It does not support data allocation */ 438 /* It does not support data allocation */
445 BUG_ON(create); 439 f2fs_bug_on(create);
446 440
447 if (dn.data_blkaddr != NEW_ADDR && dn.data_blkaddr != NULL_ADDR) { 441 if (dn.data_blkaddr != NEW_ADDR && dn.data_blkaddr != NULL_ADDR) {
448 int i; 442 int i;
@@ -560,9 +554,9 @@ write:
560 inode_dec_dirty_dents(inode); 554 inode_dec_dirty_dents(inode);
561 err = do_write_data_page(page); 555 err = do_write_data_page(page);
562 } else { 556 } else {
563 int ilock = mutex_lock_op(sbi); 557 f2fs_lock_op(sbi);
564 err = do_write_data_page(page); 558 err = do_write_data_page(page);
565 mutex_unlock_op(sbi, ilock); 559 f2fs_unlock_op(sbi);
566 need_balance_fs = true; 560 need_balance_fs = true;
567 } 561 }
568 if (err == -ENOENT) 562 if (err == -ENOENT)
@@ -641,7 +635,6 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
641 pgoff_t index = ((unsigned long long) pos) >> PAGE_CACHE_SHIFT; 635 pgoff_t index = ((unsigned long long) pos) >> PAGE_CACHE_SHIFT;
642 struct dnode_of_data dn; 636 struct dnode_of_data dn;
643 int err = 0; 637 int err = 0;
644 int ilock;
645 638
646 f2fs_balance_fs(sbi); 639 f2fs_balance_fs(sbi);
647repeat: 640repeat:
@@ -650,7 +643,7 @@ repeat:
650 return -ENOMEM; 643 return -ENOMEM;
651 *pagep = page; 644 *pagep = page;
652 645
653 ilock = mutex_lock_op(sbi); 646 f2fs_lock_op(sbi);
654 647
655 set_new_dnode(&dn, inode, NULL, NULL, 0); 648 set_new_dnode(&dn, inode, NULL, NULL, 0);
656 err = get_dnode_of_data(&dn, index, ALLOC_NODE); 649 err = get_dnode_of_data(&dn, index, ALLOC_NODE);
@@ -664,7 +657,7 @@ repeat:
664 if (err) 657 if (err)
665 goto err; 658 goto err;
666 659
667 mutex_unlock_op(sbi, ilock); 660 f2fs_unlock_op(sbi);
668 661
669 if ((len == PAGE_CACHE_SIZE) || PageUptodate(page)) 662 if ((len == PAGE_CACHE_SIZE) || PageUptodate(page))
670 return 0; 663 return 0;
@@ -700,7 +693,7 @@ out:
700 return 0; 693 return 0;
701 694
702err: 695err:
703 mutex_unlock_op(sbi, ilock); 696 f2fs_unlock_op(sbi);
704 f2fs_put_page(page, 1); 697 f2fs_put_page(page, 1);
705 return err; 698 return err;
706} 699}
@@ -763,6 +756,8 @@ static int f2fs_set_data_page_dirty(struct page *page)
763 struct address_space *mapping = page->mapping; 756 struct address_space *mapping = page->mapping;
764 struct inode *inode = mapping->host; 757 struct inode *inode = mapping->host;
765 758
759 trace_f2fs_set_page_dirty(page, DATA);
760
766 SetPageUptodate(page); 761 SetPageUptodate(page);
767 if (!PageDirty(page)) { 762 if (!PageDirty(page)) {
768 __set_page_dirty_nobuffers(page); 763 __set_page_dirty_nobuffers(page);
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 384c6daf9a89..594fc1bb64ef 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -139,7 +139,7 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
139 bool room = false; 139 bool room = false;
140 int max_slots = 0; 140 int max_slots = 0;
141 141
142 BUG_ON(level > MAX_DIR_HASH_DEPTH); 142 f2fs_bug_on(level > MAX_DIR_HASH_DEPTH);
143 143
144 nbucket = dir_buckets(level); 144 nbucket = dir_buckets(level);
145 nblock = bucket_blocks(level); 145 nblock = bucket_blocks(level);
@@ -346,7 +346,7 @@ static struct page *init_inode_metadata(struct inode *inode,
346 goto error; 346 goto error;
347 } 347 }
348 348
349 err = f2fs_init_acl(inode, dir); 349 err = f2fs_init_acl(inode, dir, page);
350 if (err) 350 if (err)
351 goto error; 351 goto error;
352 352
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 608f0df5b919..89dc7508faf2 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -18,6 +18,13 @@
18#include <linux/crc32.h> 18#include <linux/crc32.h>
19#include <linux/magic.h> 19#include <linux/magic.h>
20#include <linux/kobject.h> 20#include <linux/kobject.h>
21#include <linux/sched.h>
22
23#ifdef CONFIG_F2FS_CHECK_FS
24#define f2fs_bug_on(condition) BUG_ON(condition)
25#else
26#define f2fs_bug_on(condition)
27#endif
21 28
22/* 29/*
23 * For mount options 30 * For mount options
@@ -298,6 +305,9 @@ struct f2fs_sm_info {
298 unsigned int main_segments; /* # of segments in main area */ 305 unsigned int main_segments; /* # of segments in main area */
299 unsigned int reserved_segments; /* # of reserved segments */ 306 unsigned int reserved_segments; /* # of reserved segments */
300 unsigned int ovp_segments; /* # of overprovision segments */ 307 unsigned int ovp_segments; /* # of overprovision segments */
308
309 /* a threshold to reclaim prefree segments */
310 unsigned int rec_prefree_segments;
301}; 311};
302 312
303/* 313/*
@@ -318,14 +328,6 @@ enum count_type {
318}; 328};
319 329
320/* 330/*
321 * Uses as sbi->fs_lock[NR_GLOBAL_LOCKS].
322 * The checkpoint procedure blocks all the locks in this fs_lock array.
323 * Some FS operations grab free locks, and if there is no free lock,
324 * then wait to grab a lock in a round-robin manner.
325 */
326#define NR_GLOBAL_LOCKS 8
327
328/*
329 * The below are the page types of bios used in submti_bio(). 331 * The below are the page types of bios used in submti_bio().
330 * The available types are: 332 * The available types are:
331 * DATA User data pages. It operates as async mode. 333 * DATA User data pages. It operates as async mode.
@@ -365,12 +367,12 @@ struct f2fs_sb_info {
365 struct f2fs_checkpoint *ckpt; /* raw checkpoint pointer */ 367 struct f2fs_checkpoint *ckpt; /* raw checkpoint pointer */
366 struct inode *meta_inode; /* cache meta blocks */ 368 struct inode *meta_inode; /* cache meta blocks */
367 struct mutex cp_mutex; /* checkpoint procedure lock */ 369 struct mutex cp_mutex; /* checkpoint procedure lock */
368 struct mutex fs_lock[NR_GLOBAL_LOCKS]; /* blocking FS operations */ 370 struct rw_semaphore cp_rwsem; /* blocking FS operations */
369 struct mutex node_write; /* locking node writes */ 371 struct mutex node_write; /* locking node writes */
370 struct mutex writepages; /* mutex for writepages() */ 372 struct mutex writepages; /* mutex for writepages() */
371 unsigned char next_lock_num; /* round-robin global locks */ 373 bool por_doing; /* recovery is doing or not */
372 int por_doing; /* recovery is doing or not */ 374 bool on_build_free_nids; /* build_free_nids is doing */
373 int on_build_free_nids; /* build_free_nids is doing */ 375 wait_queue_head_t cp_wait;
374 376
375 /* for orphan inode management */ 377 /* for orphan inode management */
376 struct list_head orphan_inode_list; /* orphan inode list */ 378 struct list_head orphan_inode_list; /* orphan inode list */
@@ -520,48 +522,24 @@ static inline void clear_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f)
520 cp->ckpt_flags = cpu_to_le32(ckpt_flags); 522 cp->ckpt_flags = cpu_to_le32(ckpt_flags);
521} 523}
522 524
523static inline void mutex_lock_all(struct f2fs_sb_info *sbi) 525static inline void f2fs_lock_op(struct f2fs_sb_info *sbi)
524{ 526{
525 int i; 527 down_read(&sbi->cp_rwsem);
526
527 for (i = 0; i < NR_GLOBAL_LOCKS; i++) {
528 /*
529 * This is the only time we take multiple fs_lock[]
530 * instances; the order is immaterial since we
531 * always hold cp_mutex, which serializes multiple
532 * such operations.
533 */
534 mutex_lock_nest_lock(&sbi->fs_lock[i], &sbi->cp_mutex);
535 }
536} 528}
537 529
538static inline void mutex_unlock_all(struct f2fs_sb_info *sbi) 530static inline void f2fs_unlock_op(struct f2fs_sb_info *sbi)
539{ 531{
540 int i = 0; 532 up_read(&sbi->cp_rwsem);
541 for (; i < NR_GLOBAL_LOCKS; i++)
542 mutex_unlock(&sbi->fs_lock[i]);
543} 533}
544 534
545static inline int mutex_lock_op(struct f2fs_sb_info *sbi) 535static inline void f2fs_lock_all(struct f2fs_sb_info *sbi)
546{ 536{
547 unsigned char next_lock = sbi->next_lock_num % NR_GLOBAL_LOCKS; 537 down_write_nest_lock(&sbi->cp_rwsem, &sbi->cp_mutex);
548 int i = 0;
549
550 for (; i < NR_GLOBAL_LOCKS; i++)
551 if (mutex_trylock(&sbi->fs_lock[i]))
552 return i;
553
554 mutex_lock(&sbi->fs_lock[next_lock]);
555 sbi->next_lock_num++;
556 return next_lock;
557} 538}
558 539
559static inline void mutex_unlock_op(struct f2fs_sb_info *sbi, int ilock) 540static inline void f2fs_unlock_all(struct f2fs_sb_info *sbi)
560{ 541{
561 if (ilock < 0) 542 up_write(&sbi->cp_rwsem);
562 return;
563 BUG_ON(ilock >= NR_GLOBAL_LOCKS);
564 mutex_unlock(&sbi->fs_lock[ilock]);
565} 543}
566 544
567/* 545/*
@@ -612,8 +590,8 @@ static inline int dec_valid_block_count(struct f2fs_sb_info *sbi,
612 blkcnt_t count) 590 blkcnt_t count)
613{ 591{
614 spin_lock(&sbi->stat_lock); 592 spin_lock(&sbi->stat_lock);
615 BUG_ON(sbi->total_valid_block_count < (block_t) count); 593 f2fs_bug_on(sbi->total_valid_block_count < (block_t) count);
616 BUG_ON(inode->i_blocks < count); 594 f2fs_bug_on(inode->i_blocks < count);
617 inode->i_blocks -= count; 595 inode->i_blocks -= count;
618 sbi->total_valid_block_count -= (block_t)count; 596 sbi->total_valid_block_count -= (block_t)count;
619 spin_unlock(&sbi->stat_lock); 597 spin_unlock(&sbi->stat_lock);
@@ -745,9 +723,9 @@ static inline void dec_valid_node_count(struct f2fs_sb_info *sbi,
745{ 723{
746 spin_lock(&sbi->stat_lock); 724 spin_lock(&sbi->stat_lock);
747 725
748 BUG_ON(sbi->total_valid_block_count < count); 726 f2fs_bug_on(sbi->total_valid_block_count < count);
749 BUG_ON(sbi->total_valid_node_count < count); 727 f2fs_bug_on(sbi->total_valid_node_count < count);
750 BUG_ON(inode->i_blocks < count); 728 f2fs_bug_on(inode->i_blocks < count);
751 729
752 inode->i_blocks -= count; 730 inode->i_blocks -= count;
753 sbi->total_valid_node_count -= count; 731 sbi->total_valid_node_count -= count;
@@ -768,7 +746,7 @@ static inline unsigned int valid_node_count(struct f2fs_sb_info *sbi)
768static inline void inc_valid_inode_count(struct f2fs_sb_info *sbi) 746static inline void inc_valid_inode_count(struct f2fs_sb_info *sbi)
769{ 747{
770 spin_lock(&sbi->stat_lock); 748 spin_lock(&sbi->stat_lock);
771 BUG_ON(sbi->total_valid_inode_count == sbi->total_node_count); 749 f2fs_bug_on(sbi->total_valid_inode_count == sbi->total_node_count);
772 sbi->total_valid_inode_count++; 750 sbi->total_valid_inode_count++;
773 spin_unlock(&sbi->stat_lock); 751 spin_unlock(&sbi->stat_lock);
774} 752}
@@ -776,7 +754,7 @@ static inline void inc_valid_inode_count(struct f2fs_sb_info *sbi)
776static inline int dec_valid_inode_count(struct f2fs_sb_info *sbi) 754static inline int dec_valid_inode_count(struct f2fs_sb_info *sbi)
777{ 755{
778 spin_lock(&sbi->stat_lock); 756 spin_lock(&sbi->stat_lock);
779 BUG_ON(!sbi->total_valid_inode_count); 757 f2fs_bug_on(!sbi->total_valid_inode_count);
780 sbi->total_valid_inode_count--; 758 sbi->total_valid_inode_count--;
781 spin_unlock(&sbi->stat_lock); 759 spin_unlock(&sbi->stat_lock);
782 return 0; 760 return 0;
@@ -797,7 +775,7 @@ static inline void f2fs_put_page(struct page *page, int unlock)
797 return; 775 return;
798 776
799 if (unlock) { 777 if (unlock) {
800 BUG_ON(!PageLocked(page)); 778 f2fs_bug_on(!PageLocked(page));
801 unlock_page(page); 779 unlock_page(page);
802 } 780 }
803 page_cache_release(page); 781 page_cache_release(page);
@@ -819,6 +797,20 @@ static inline struct kmem_cache *f2fs_kmem_cache_create(const char *name,
819 return kmem_cache_create(name, size, 0, SLAB_RECLAIM_ACCOUNT, ctor); 797 return kmem_cache_create(name, size, 0, SLAB_RECLAIM_ACCOUNT, ctor);
820} 798}
821 799
800static inline void *f2fs_kmem_cache_alloc(struct kmem_cache *cachep,
801 gfp_t flags)
802{
803 void *entry;
804retry:
805 entry = kmem_cache_alloc(cachep, flags);
806 if (!entry) {
807 cond_resched();
808 goto retry;
809 }
810
811 return entry;
812}
813
822#define RAW_IS_INODE(p) ((p)->footer.nid == (p)->footer.ino) 814#define RAW_IS_INODE(p) ((p)->footer.nid == (p)->footer.ino)
823 815
824static inline bool IS_INODE(struct page *page) 816static inline bool IS_INODE(struct page *page)
@@ -979,6 +971,7 @@ long f2fs_compat_ioctl(struct file *, unsigned int, unsigned long);
979 */ 971 */
980void f2fs_set_inode_flags(struct inode *); 972void f2fs_set_inode_flags(struct inode *);
981struct inode *f2fs_iget(struct super_block *, unsigned long); 973struct inode *f2fs_iget(struct super_block *, unsigned long);
974int try_to_free_nats(struct f2fs_sb_info *, int);
982void update_inode(struct inode *, struct page *); 975void update_inode(struct inode *, struct page *);
983int update_inode_page(struct inode *); 976int update_inode_page(struct inode *);
984int f2fs_write_inode(struct inode *, struct writeback_control *); 977int f2fs_write_inode(struct inode *, struct writeback_control *);
@@ -1033,6 +1026,7 @@ void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *);
1033int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int); 1026int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int);
1034int truncate_inode_blocks(struct inode *, pgoff_t); 1027int truncate_inode_blocks(struct inode *, pgoff_t);
1035int truncate_xattr_node(struct inode *, struct page *); 1028int truncate_xattr_node(struct inode *, struct page *);
1029int wait_on_node_pages_writeback(struct f2fs_sb_info *, nid_t);
1036int remove_inode_page(struct inode *); 1030int remove_inode_page(struct inode *);
1037struct page *new_inode_page(struct inode *, const struct qstr *); 1031struct page *new_inode_page(struct inode *, const struct qstr *);
1038struct page *new_node_page(struct dnode_of_data *, unsigned int, struct page *); 1032struct page *new_node_page(struct dnode_of_data *, unsigned int, struct page *);
@@ -1059,6 +1053,7 @@ void destroy_node_manager_caches(void);
1059 * segment.c 1053 * segment.c
1060 */ 1054 */
1061void f2fs_balance_fs(struct f2fs_sb_info *); 1055void f2fs_balance_fs(struct f2fs_sb_info *);
1056void f2fs_balance_fs_bg(struct f2fs_sb_info *);
1062void invalidate_blocks(struct f2fs_sb_info *, block_t); 1057void invalidate_blocks(struct f2fs_sb_info *, block_t);
1063void clear_prefree_segments(struct f2fs_sb_info *); 1058void clear_prefree_segments(struct f2fs_sb_info *);
1064int npages_for_summary_flush(struct f2fs_sb_info *); 1059int npages_for_summary_flush(struct f2fs_sb_info *);
@@ -1172,7 +1167,16 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi)
1172 return (struct f2fs_stat_info*)sbi->stat_info; 1167 return (struct f2fs_stat_info*)sbi->stat_info;
1173} 1168}
1174 1169
1175#define stat_inc_call_count(si) ((si)->call_count++) 1170#define stat_inc_call_count(si) ((si)->call_count++)
1171#define stat_inc_bggc_count(sbi) ((sbi)->bg_gc++)
1172#define stat_inc_dirty_dir(sbi) ((sbi)->n_dirty_dirs++)
1173#define stat_dec_dirty_dir(sbi) ((sbi)->n_dirty_dirs--)
1174#define stat_inc_total_hit(sb) ((F2FS_SB(sb))->total_hit_ext++)
1175#define stat_inc_read_hit(sb) ((F2FS_SB(sb))->read_hit_ext++)
1176#define stat_inc_seg_type(sbi, curseg) \
1177 ((sbi)->segment_count[(curseg)->alloc_type]++)
1178#define stat_inc_block_count(sbi, curseg) \
1179 ((sbi)->block_count[(curseg)->alloc_type]++)
1176 1180
1177#define stat_inc_seg_count(sbi, type) \ 1181#define stat_inc_seg_count(sbi, type) \
1178 do { \ 1182 do { \
@@ -1207,6 +1211,13 @@ void __init f2fs_create_root_stats(void);
1207void f2fs_destroy_root_stats(void); 1211void f2fs_destroy_root_stats(void);
1208#else 1212#else
1209#define stat_inc_call_count(si) 1213#define stat_inc_call_count(si)
1214#define stat_inc_bggc_count(si)
1215#define stat_inc_dirty_dir(sbi)
1216#define stat_dec_dirty_dir(sbi)
1217#define stat_inc_total_hit(sb)
1218#define stat_inc_read_hit(sb)
1219#define stat_inc_seg_type(sbi, curseg)
1220#define stat_inc_block_count(sbi, curseg)
1210#define stat_inc_seg_count(si, type) 1221#define stat_inc_seg_count(si, type)
1211#define stat_inc_tot_blk_count(si, blks) 1222#define stat_inc_tot_blk_count(si, blks)
1212#define stat_inc_data_blk_count(si, blks) 1223#define stat_inc_data_blk_count(si, blks)
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 02c906971cc6..7d714f4972d5 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -35,18 +35,18 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
35 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 35 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
36 block_t old_blk_addr; 36 block_t old_blk_addr;
37 struct dnode_of_data dn; 37 struct dnode_of_data dn;
38 int err, ilock; 38 int err;
39 39
40 f2fs_balance_fs(sbi); 40 f2fs_balance_fs(sbi);
41 41
42 sb_start_pagefault(inode->i_sb); 42 sb_start_pagefault(inode->i_sb);
43 43
44 /* block allocation */ 44 /* block allocation */
45 ilock = mutex_lock_op(sbi); 45 f2fs_lock_op(sbi);
46 set_new_dnode(&dn, inode, NULL, NULL, 0); 46 set_new_dnode(&dn, inode, NULL, NULL, 0);
47 err = get_dnode_of_data(&dn, page->index, ALLOC_NODE); 47 err = get_dnode_of_data(&dn, page->index, ALLOC_NODE);
48 if (err) { 48 if (err) {
49 mutex_unlock_op(sbi, ilock); 49 f2fs_unlock_op(sbi);
50 goto out; 50 goto out;
51 } 51 }
52 52
@@ -56,12 +56,12 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
56 err = reserve_new_block(&dn); 56 err = reserve_new_block(&dn);
57 if (err) { 57 if (err) {
58 f2fs_put_dnode(&dn); 58 f2fs_put_dnode(&dn);
59 mutex_unlock_op(sbi, ilock); 59 f2fs_unlock_op(sbi);
60 goto out; 60 goto out;
61 } 61 }
62 } 62 }
63 f2fs_put_dnode(&dn); 63 f2fs_put_dnode(&dn);
64 mutex_unlock_op(sbi, ilock); 64 f2fs_unlock_op(sbi);
65 65
66 file_update_time(vma->vm_file); 66 file_update_time(vma->vm_file);
67 lock_page(page); 67 lock_page(page);
@@ -88,6 +88,7 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
88 set_page_dirty(page); 88 set_page_dirty(page);
89 SetPageUptodate(page); 89 SetPageUptodate(page);
90 90
91 trace_f2fs_vm_page_mkwrite(page, DATA);
91mapped: 92mapped:
92 /* fill the page */ 93 /* fill the page */
93 wait_on_page_writeback(page); 94 wait_on_page_writeback(page);
@@ -188,8 +189,9 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
188 if (ret) 189 if (ret)
189 goto out; 190 goto out;
190 } 191 }
191 filemap_fdatawait_range(sbi->node_inode->i_mapping, 192 ret = wait_on_node_pages_writeback(sbi, inode->i_ino);
192 0, LONG_MAX); 193 if (ret)
194 goto out;
193 ret = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); 195 ret = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
194 } 196 }
195out: 197out:
@@ -270,7 +272,7 @@ static int truncate_blocks(struct inode *inode, u64 from)
270 unsigned int blocksize = inode->i_sb->s_blocksize; 272 unsigned int blocksize = inode->i_sb->s_blocksize;
271 struct dnode_of_data dn; 273 struct dnode_of_data dn;
272 pgoff_t free_from; 274 pgoff_t free_from;
273 int count = 0, ilock = -1; 275 int count = 0;
274 int err; 276 int err;
275 277
276 trace_f2fs_truncate_blocks_enter(inode, from); 278 trace_f2fs_truncate_blocks_enter(inode, from);
@@ -278,13 +280,13 @@ static int truncate_blocks(struct inode *inode, u64 from)
278 free_from = (pgoff_t) 280 free_from = (pgoff_t)
279 ((from + blocksize - 1) >> (sbi->log_blocksize)); 281 ((from + blocksize - 1) >> (sbi->log_blocksize));
280 282
281 ilock = mutex_lock_op(sbi); 283 f2fs_lock_op(sbi);
282 set_new_dnode(&dn, inode, NULL, NULL, 0); 284 set_new_dnode(&dn, inode, NULL, NULL, 0);
283 err = get_dnode_of_data(&dn, free_from, LOOKUP_NODE); 285 err = get_dnode_of_data(&dn, free_from, LOOKUP_NODE);
284 if (err) { 286 if (err) {
285 if (err == -ENOENT) 287 if (err == -ENOENT)
286 goto free_next; 288 goto free_next;
287 mutex_unlock_op(sbi, ilock); 289 f2fs_unlock_op(sbi);
288 trace_f2fs_truncate_blocks_exit(inode, err); 290 trace_f2fs_truncate_blocks_exit(inode, err);
289 return err; 291 return err;
290 } 292 }
@@ -295,7 +297,7 @@ static int truncate_blocks(struct inode *inode, u64 from)
295 count = ADDRS_PER_BLOCK; 297 count = ADDRS_PER_BLOCK;
296 298
297 count -= dn.ofs_in_node; 299 count -= dn.ofs_in_node;
298 BUG_ON(count < 0); 300 f2fs_bug_on(count < 0);
299 301
300 if (dn.ofs_in_node || IS_INODE(dn.node_page)) { 302 if (dn.ofs_in_node || IS_INODE(dn.node_page)) {
301 truncate_data_blocks_range(&dn, count); 303 truncate_data_blocks_range(&dn, count);
@@ -305,7 +307,7 @@ static int truncate_blocks(struct inode *inode, u64 from)
305 f2fs_put_dnode(&dn); 307 f2fs_put_dnode(&dn);
306free_next: 308free_next:
307 err = truncate_inode_blocks(inode, free_from); 309 err = truncate_inode_blocks(inode, free_from);
308 mutex_unlock_op(sbi, ilock); 310 f2fs_unlock_op(sbi);
309 311
310 /* lastly zero out the first data page */ 312 /* lastly zero out the first data page */
311 truncate_partial_data_page(inode, from); 313 truncate_partial_data_page(inode, from);
@@ -416,16 +418,15 @@ static void fill_zero(struct inode *inode, pgoff_t index,
416{ 418{
417 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 419 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
418 struct page *page; 420 struct page *page;
419 int ilock;
420 421
421 if (!len) 422 if (!len)
422 return; 423 return;
423 424
424 f2fs_balance_fs(sbi); 425 f2fs_balance_fs(sbi);
425 426
426 ilock = mutex_lock_op(sbi); 427 f2fs_lock_op(sbi);
427 page = get_new_data_page(inode, NULL, index, false); 428 page = get_new_data_page(inode, NULL, index, false);
428 mutex_unlock_op(sbi, ilock); 429 f2fs_unlock_op(sbi);
429 430
430 if (!IS_ERR(page)) { 431 if (!IS_ERR(page)) {
431 wait_on_page_writeback(page); 432 wait_on_page_writeback(page);
@@ -484,7 +485,6 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len, int mode)
484 struct address_space *mapping = inode->i_mapping; 485 struct address_space *mapping = inode->i_mapping;
485 loff_t blk_start, blk_end; 486 loff_t blk_start, blk_end;
486 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 487 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
487 int ilock;
488 488
489 f2fs_balance_fs(sbi); 489 f2fs_balance_fs(sbi);
490 490
@@ -493,9 +493,9 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len, int mode)
493 truncate_inode_pages_range(mapping, blk_start, 493 truncate_inode_pages_range(mapping, blk_start,
494 blk_end - 1); 494 blk_end - 1);
495 495
496 ilock = mutex_lock_op(sbi); 496 f2fs_lock_op(sbi);
497 ret = truncate_hole(inode, pg_start, pg_end); 497 ret = truncate_hole(inode, pg_start, pg_end);
498 mutex_unlock_op(sbi, ilock); 498 f2fs_unlock_op(sbi);
499 } 499 }
500 } 500 }
501 501
@@ -529,13 +529,12 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
529 529
530 for (index = pg_start; index <= pg_end; index++) { 530 for (index = pg_start; index <= pg_end; index++) {
531 struct dnode_of_data dn; 531 struct dnode_of_data dn;
532 int ilock;
533 532
534 ilock = mutex_lock_op(sbi); 533 f2fs_lock_op(sbi);
535 set_new_dnode(&dn, inode, NULL, NULL, 0); 534 set_new_dnode(&dn, inode, NULL, NULL, 0);
536 ret = get_dnode_of_data(&dn, index, ALLOC_NODE); 535 ret = get_dnode_of_data(&dn, index, ALLOC_NODE);
537 if (ret) { 536 if (ret) {
538 mutex_unlock_op(sbi, ilock); 537 f2fs_unlock_op(sbi);
539 break; 538 break;
540 } 539 }
541 540
@@ -543,12 +542,12 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
543 ret = reserve_new_block(&dn); 542 ret = reserve_new_block(&dn);
544 if (ret) { 543 if (ret) {
545 f2fs_put_dnode(&dn); 544 f2fs_put_dnode(&dn);
546 mutex_unlock_op(sbi, ilock); 545 f2fs_unlock_op(sbi);
547 break; 546 break;
548 } 547 }
549 } 548 }
550 f2fs_put_dnode(&dn); 549 f2fs_put_dnode(&dn);
551 mutex_unlock_op(sbi, ilock); 550 f2fs_unlock_op(sbi);
552 551
553 if (pg_start == pg_end) 552 if (pg_start == pg_end)
554 new_size = offset + len; 553 new_size = offset + len;
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 2f157e883687..b7ad1ec7e4cc 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -77,13 +77,15 @@ static int gc_thread_func(void *data)
77 else 77 else
78 wait_ms = increase_sleep_time(gc_th, wait_ms); 78 wait_ms = increase_sleep_time(gc_th, wait_ms);
79 79
80#ifdef CONFIG_F2FS_STAT_FS 80 stat_inc_bggc_count(sbi);
81 sbi->bg_gc++;
82#endif
83 81
84 /* if return value is not zero, no victim was selected */ 82 /* if return value is not zero, no victim was selected */
85 if (f2fs_gc(sbi)) 83 if (f2fs_gc(sbi))
86 wait_ms = gc_th->no_gc_sleep_time; 84 wait_ms = gc_th->no_gc_sleep_time;
85
86 /* balancing f2fs's metadata periodically */
87 f2fs_balance_fs_bg(sbi);
88
87 } while (!kthread_should_stop()); 89 } while (!kthread_should_stop());
88 return 0; 90 return 0;
89} 91}
@@ -236,8 +238,8 @@ static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno)
236 return UINT_MAX - ((100 * (100 - u) * age) / (100 + u)); 238 return UINT_MAX - ((100 * (100 - u) * age) / (100 + u));
237} 239}
238 240
239static unsigned int get_gc_cost(struct f2fs_sb_info *sbi, unsigned int segno, 241static inline unsigned int get_gc_cost(struct f2fs_sb_info *sbi,
240 struct victim_sel_policy *p) 242 unsigned int segno, struct victim_sel_policy *p)
241{ 243{
242 if (p->alloc_mode == SSR) 244 if (p->alloc_mode == SSR)
243 return get_seg_entry(sbi, segno)->ckpt_valid_blocks; 245 return get_seg_entry(sbi, segno)->ckpt_valid_blocks;
@@ -293,7 +295,11 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
293 } 295 }
294 break; 296 break;
295 } 297 }
296 p.offset = ((segno / p.ofs_unit) * p.ofs_unit) + p.ofs_unit; 298
299 p.offset = segno + p.ofs_unit;
300 if (p.ofs_unit > 1)
301 p.offset -= segno % p.ofs_unit;
302
297 secno = GET_SECNO(sbi, segno); 303 secno = GET_SECNO(sbi, segno);
298 304
299 if (sec_usage_check(sbi, secno)) 305 if (sec_usage_check(sbi, secno))
@@ -306,10 +312,9 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
306 if (p.min_cost > cost) { 312 if (p.min_cost > cost) {
307 p.min_segno = segno; 313 p.min_segno = segno;
308 p.min_cost = cost; 314 p.min_cost = cost;
309 } 315 } else if (unlikely(cost == max_cost)) {
310
311 if (cost == max_cost)
312 continue; 316 continue;
317 }
313 318
314 if (nsearched++ >= p.max_search) { 319 if (nsearched++ >= p.max_search) {
315 sbi->last_victim[p.gc_mode] = segno; 320 sbi->last_victim[p.gc_mode] = segno;
@@ -358,12 +363,8 @@ static void add_gc_inode(struct inode *inode, struct list_head *ilist)
358 iput(inode); 363 iput(inode);
359 return; 364 return;
360 } 365 }
361repeat: 366
362 new_ie = kmem_cache_alloc(winode_slab, GFP_NOFS); 367 new_ie = f2fs_kmem_cache_alloc(winode_slab, GFP_NOFS);
363 if (!new_ie) {
364 cond_resched();
365 goto repeat;
366 }
367 new_ie->inode = inode; 368 new_ie->inode = inode;
368 list_add_tail(&new_ie->list, ilist); 369 list_add_tail(&new_ie->list, ilist);
369} 370}
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 9339cd292047..d0eaa9faeca0 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -37,6 +37,31 @@ void f2fs_set_inode_flags(struct inode *inode)
37 inode->i_flags |= S_DIRSYNC; 37 inode->i_flags |= S_DIRSYNC;
38} 38}
39 39
40static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri)
41{
42 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
43 S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
44 if (ri->i_addr[0])
45 inode->i_rdev = old_decode_dev(le32_to_cpu(ri->i_addr[0]));
46 else
47 inode->i_rdev = new_decode_dev(le32_to_cpu(ri->i_addr[1]));
48 }
49}
50
51static void __set_inode_rdev(struct inode *inode, struct f2fs_inode *ri)
52{
53 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) {
54 if (old_valid_dev(inode->i_rdev)) {
55 ri->i_addr[0] = cpu_to_le32(old_encode_dev(inode->i_rdev));
56 ri->i_addr[1] = 0;
57 } else {
58 ri->i_addr[0] = 0;
59 ri->i_addr[1] = cpu_to_le32(new_encode_dev(inode->i_rdev));
60 ri->i_addr[2] = 0;
61 }
62 }
63}
64
40static int do_read_inode(struct inode *inode) 65static int do_read_inode(struct inode *inode)
41{ 66{
42 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 67 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
@@ -73,10 +98,6 @@ static int do_read_inode(struct inode *inode)
73 inode->i_ctime.tv_nsec = le32_to_cpu(ri->i_ctime_nsec); 98 inode->i_ctime.tv_nsec = le32_to_cpu(ri->i_ctime_nsec);
74 inode->i_mtime.tv_nsec = le32_to_cpu(ri->i_mtime_nsec); 99 inode->i_mtime.tv_nsec = le32_to_cpu(ri->i_mtime_nsec);
75 inode->i_generation = le32_to_cpu(ri->i_generation); 100 inode->i_generation = le32_to_cpu(ri->i_generation);
76 if (ri->i_addr[0])
77 inode->i_rdev = old_decode_dev(le32_to_cpu(ri->i_addr[0]));
78 else
79 inode->i_rdev = new_decode_dev(le32_to_cpu(ri->i_addr[1]));
80 101
81 fi->i_current_depth = le32_to_cpu(ri->i_current_depth); 102 fi->i_current_depth = le32_to_cpu(ri->i_current_depth);
82 fi->i_xattr_nid = le32_to_cpu(ri->i_xattr_nid); 103 fi->i_xattr_nid = le32_to_cpu(ri->i_xattr_nid);
@@ -84,8 +105,13 @@ static int do_read_inode(struct inode *inode)
84 fi->flags = 0; 105 fi->flags = 0;
85 fi->i_advise = ri->i_advise; 106 fi->i_advise = ri->i_advise;
86 fi->i_pino = le32_to_cpu(ri->i_pino); 107 fi->i_pino = le32_to_cpu(ri->i_pino);
108
87 get_extent_info(&fi->ext, ri->i_ext); 109 get_extent_info(&fi->ext, ri->i_ext);
88 get_inline_info(fi, ri); 110 get_inline_info(fi, ri);
111
112 /* get rdev by using inline_info */
113 __get_inode_rdev(inode, ri);
114
89 f2fs_put_page(node_page, 1); 115 f2fs_put_page(node_page, 1);
90 return 0; 116 return 0;
91} 117}
@@ -179,21 +205,10 @@ void update_inode(struct inode *inode, struct page *node_page)
179 ri->i_pino = cpu_to_le32(F2FS_I(inode)->i_pino); 205 ri->i_pino = cpu_to_le32(F2FS_I(inode)->i_pino);
180 ri->i_generation = cpu_to_le32(inode->i_generation); 206 ri->i_generation = cpu_to_le32(inode->i_generation);
181 207
182 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { 208 __set_inode_rdev(inode, ri);
183 if (old_valid_dev(inode->i_rdev)) {
184 ri->i_addr[0] =
185 cpu_to_le32(old_encode_dev(inode->i_rdev));
186 ri->i_addr[1] = 0;
187 } else {
188 ri->i_addr[0] = 0;
189 ri->i_addr[1] =
190 cpu_to_le32(new_encode_dev(inode->i_rdev));
191 ri->i_addr[2] = 0;
192 }
193 }
194
195 set_cold_node(inode, node_page); 209 set_cold_node(inode, node_page);
196 set_page_dirty(node_page); 210 set_page_dirty(node_page);
211
197 clear_inode_flag(F2FS_I(inode), FI_DIRTY_INODE); 212 clear_inode_flag(F2FS_I(inode), FI_DIRTY_INODE);
198} 213}
199 214
@@ -214,7 +229,7 @@ int update_inode_page(struct inode *inode)
214int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) 229int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
215{ 230{
216 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 231 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
217 int ret, ilock; 232 int ret;
218 233
219 if (inode->i_ino == F2FS_NODE_INO(sbi) || 234 if (inode->i_ino == F2FS_NODE_INO(sbi) ||
220 inode->i_ino == F2FS_META_INO(sbi)) 235 inode->i_ino == F2FS_META_INO(sbi))
@@ -227,9 +242,9 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
227 * We need to lock here to prevent from producing dirty node pages 242 * We need to lock here to prevent from producing dirty node pages
228 * during the urgent cleaning time when runing out of free sections. 243 * during the urgent cleaning time when runing out of free sections.
229 */ 244 */
230 ilock = mutex_lock_op(sbi); 245 f2fs_lock_op(sbi);
231 ret = update_inode_page(inode); 246 ret = update_inode_page(inode);
232 mutex_unlock_op(sbi, ilock); 247 f2fs_unlock_op(sbi);
233 248
234 if (wbc) 249 if (wbc)
235 f2fs_balance_fs(sbi); 250 f2fs_balance_fs(sbi);
@@ -243,7 +258,6 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
243void f2fs_evict_inode(struct inode *inode) 258void f2fs_evict_inode(struct inode *inode)
244{ 259{
245 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 260 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
246 int ilock;
247 261
248 trace_f2fs_evict_inode(inode); 262 trace_f2fs_evict_inode(inode);
249 truncate_inode_pages(&inode->i_data, 0); 263 truncate_inode_pages(&inode->i_data, 0);
@@ -252,7 +266,7 @@ void f2fs_evict_inode(struct inode *inode)
252 inode->i_ino == F2FS_META_INO(sbi)) 266 inode->i_ino == F2FS_META_INO(sbi))
253 goto no_delete; 267 goto no_delete;
254 268
255 BUG_ON(atomic_read(&F2FS_I(inode)->dirty_dents)); 269 f2fs_bug_on(atomic_read(&F2FS_I(inode)->dirty_dents));
256 remove_dirty_dir_inode(inode); 270 remove_dirty_dir_inode(inode);
257 271
258 if (inode->i_nlink || is_bad_inode(inode)) 272 if (inode->i_nlink || is_bad_inode(inode))
@@ -265,9 +279,9 @@ void f2fs_evict_inode(struct inode *inode)
265 if (F2FS_HAS_BLOCKS(inode)) 279 if (F2FS_HAS_BLOCKS(inode))
266 f2fs_truncate(inode); 280 f2fs_truncate(inode);
267 281
268 ilock = mutex_lock_op(sbi); 282 f2fs_lock_op(sbi);
269 remove_inode_page(inode); 283 remove_inode_page(inode);
270 mutex_unlock_op(sbi, ilock); 284 f2fs_unlock_op(sbi);
271 285
272 sb_end_intwrite(inode->i_sb); 286 sb_end_intwrite(inode->i_sb);
273no_delete: 287no_delete:
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 2a5359c990fc..575adac17f8b 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -27,19 +27,19 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
27 nid_t ino; 27 nid_t ino;
28 struct inode *inode; 28 struct inode *inode;
29 bool nid_free = false; 29 bool nid_free = false;
30 int err, ilock; 30 int err;
31 31
32 inode = new_inode(sb); 32 inode = new_inode(sb);
33 if (!inode) 33 if (!inode)
34 return ERR_PTR(-ENOMEM); 34 return ERR_PTR(-ENOMEM);
35 35
36 ilock = mutex_lock_op(sbi); 36 f2fs_lock_op(sbi);
37 if (!alloc_nid(sbi, &ino)) { 37 if (!alloc_nid(sbi, &ino)) {
38 mutex_unlock_op(sbi, ilock); 38 f2fs_unlock_op(sbi);
39 err = -ENOSPC; 39 err = -ENOSPC;
40 goto fail; 40 goto fail;
41 } 41 }
42 mutex_unlock_op(sbi, ilock); 42 f2fs_unlock_op(sbi);
43 43
44 inode->i_uid = current_fsuid(); 44 inode->i_uid = current_fsuid();
45 45
@@ -115,7 +115,7 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
115 struct f2fs_sb_info *sbi = F2FS_SB(sb); 115 struct f2fs_sb_info *sbi = F2FS_SB(sb);
116 struct inode *inode; 116 struct inode *inode;
117 nid_t ino = 0; 117 nid_t ino = 0;
118 int err, ilock; 118 int err;
119 119
120 f2fs_balance_fs(sbi); 120 f2fs_balance_fs(sbi);
121 121
@@ -131,9 +131,9 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
131 inode->i_mapping->a_ops = &f2fs_dblock_aops; 131 inode->i_mapping->a_ops = &f2fs_dblock_aops;
132 ino = inode->i_ino; 132 ino = inode->i_ino;
133 133
134 ilock = mutex_lock_op(sbi); 134 f2fs_lock_op(sbi);
135 err = f2fs_add_link(dentry, inode); 135 err = f2fs_add_link(dentry, inode);
136 mutex_unlock_op(sbi, ilock); 136 f2fs_unlock_op(sbi);
137 if (err) 137 if (err)
138 goto out; 138 goto out;
139 139
@@ -157,7 +157,7 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir,
157 struct inode *inode = old_dentry->d_inode; 157 struct inode *inode = old_dentry->d_inode;
158 struct super_block *sb = dir->i_sb; 158 struct super_block *sb = dir->i_sb;
159 struct f2fs_sb_info *sbi = F2FS_SB(sb); 159 struct f2fs_sb_info *sbi = F2FS_SB(sb);
160 int err, ilock; 160 int err;
161 161
162 f2fs_balance_fs(sbi); 162 f2fs_balance_fs(sbi);
163 163
@@ -165,9 +165,9 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir,
165 ihold(inode); 165 ihold(inode);
166 166
167 set_inode_flag(F2FS_I(inode), FI_INC_LINK); 167 set_inode_flag(F2FS_I(inode), FI_INC_LINK);
168 ilock = mutex_lock_op(sbi); 168 f2fs_lock_op(sbi);
169 err = f2fs_add_link(dentry, inode); 169 err = f2fs_add_link(dentry, inode);
170 mutex_unlock_op(sbi, ilock); 170 f2fs_unlock_op(sbi);
171 if (err) 171 if (err)
172 goto out; 172 goto out;
173 173
@@ -220,7 +220,6 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry)
220 struct f2fs_dir_entry *de; 220 struct f2fs_dir_entry *de;
221 struct page *page; 221 struct page *page;
222 int err = -ENOENT; 222 int err = -ENOENT;
223 int ilock;
224 223
225 trace_f2fs_unlink_enter(dir, dentry); 224 trace_f2fs_unlink_enter(dir, dentry);
226 f2fs_balance_fs(sbi); 225 f2fs_balance_fs(sbi);
@@ -229,16 +228,16 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry)
229 if (!de) 228 if (!de)
230 goto fail; 229 goto fail;
231 230
231 f2fs_lock_op(sbi);
232 err = acquire_orphan_inode(sbi); 232 err = acquire_orphan_inode(sbi);
233 if (err) { 233 if (err) {
234 f2fs_unlock_op(sbi);
234 kunmap(page); 235 kunmap(page);
235 f2fs_put_page(page, 0); 236 f2fs_put_page(page, 0);
236 goto fail; 237 goto fail;
237 } 238 }
238
239 ilock = mutex_lock_op(sbi);
240 f2fs_delete_entry(de, page, inode); 239 f2fs_delete_entry(de, page, inode);
241 mutex_unlock_op(sbi, ilock); 240 f2fs_unlock_op(sbi);
242 241
243 /* In order to evict this inode, we set it dirty */ 242 /* In order to evict this inode, we set it dirty */
244 mark_inode_dirty(inode); 243 mark_inode_dirty(inode);
@@ -254,7 +253,7 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
254 struct f2fs_sb_info *sbi = F2FS_SB(sb); 253 struct f2fs_sb_info *sbi = F2FS_SB(sb);
255 struct inode *inode; 254 struct inode *inode;
256 size_t symlen = strlen(symname) + 1; 255 size_t symlen = strlen(symname) + 1;
257 int err, ilock; 256 int err;
258 257
259 f2fs_balance_fs(sbi); 258 f2fs_balance_fs(sbi);
260 259
@@ -265,9 +264,9 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
265 inode->i_op = &f2fs_symlink_inode_operations; 264 inode->i_op = &f2fs_symlink_inode_operations;
266 inode->i_mapping->a_ops = &f2fs_dblock_aops; 265 inode->i_mapping->a_ops = &f2fs_dblock_aops;
267 266
268 ilock = mutex_lock_op(sbi); 267 f2fs_lock_op(sbi);
269 err = f2fs_add_link(dentry, inode); 268 err = f2fs_add_link(dentry, inode);
270 mutex_unlock_op(sbi, ilock); 269 f2fs_unlock_op(sbi);
271 if (err) 270 if (err)
272 goto out; 271 goto out;
273 272
@@ -290,7 +289,7 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
290{ 289{
291 struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); 290 struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
292 struct inode *inode; 291 struct inode *inode;
293 int err, ilock; 292 int err;
294 293
295 f2fs_balance_fs(sbi); 294 f2fs_balance_fs(sbi);
296 295
@@ -304,9 +303,9 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
304 mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO); 303 mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO);
305 304
306 set_inode_flag(F2FS_I(inode), FI_INC_LINK); 305 set_inode_flag(F2FS_I(inode), FI_INC_LINK);
307 ilock = mutex_lock_op(sbi); 306 f2fs_lock_op(sbi);
308 err = f2fs_add_link(dentry, inode); 307 err = f2fs_add_link(dentry, inode);
309 mutex_unlock_op(sbi, ilock); 308 f2fs_unlock_op(sbi);
310 if (err) 309 if (err)
311 goto out_fail; 310 goto out_fail;
312 311
@@ -342,7 +341,6 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
342 struct f2fs_sb_info *sbi = F2FS_SB(sb); 341 struct f2fs_sb_info *sbi = F2FS_SB(sb);
343 struct inode *inode; 342 struct inode *inode;
344 int err = 0; 343 int err = 0;
345 int ilock;
346 344
347 if (!new_valid_dev(rdev)) 345 if (!new_valid_dev(rdev))
348 return -EINVAL; 346 return -EINVAL;
@@ -356,9 +354,9 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
356 init_special_inode(inode, inode->i_mode, rdev); 354 init_special_inode(inode, inode->i_mode, rdev);
357 inode->i_op = &f2fs_special_inode_operations; 355 inode->i_op = &f2fs_special_inode_operations;
358 356
359 ilock = mutex_lock_op(sbi); 357 f2fs_lock_op(sbi);
360 err = f2fs_add_link(dentry, inode); 358 err = f2fs_add_link(dentry, inode);
361 mutex_unlock_op(sbi, ilock); 359 f2fs_unlock_op(sbi);
362 if (err) 360 if (err)
363 goto out; 361 goto out;
364 362
@@ -387,7 +385,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
387 struct f2fs_dir_entry *old_dir_entry = NULL; 385 struct f2fs_dir_entry *old_dir_entry = NULL;
388 struct f2fs_dir_entry *old_entry; 386 struct f2fs_dir_entry *old_entry;
389 struct f2fs_dir_entry *new_entry; 387 struct f2fs_dir_entry *new_entry;
390 int err = -ENOENT, ilock = -1; 388 int err = -ENOENT;
391 389
392 f2fs_balance_fs(sbi); 390 f2fs_balance_fs(sbi);
393 391
@@ -402,7 +400,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
402 goto out_old; 400 goto out_old;
403 } 401 }
404 402
405 ilock = mutex_lock_op(sbi); 403 f2fs_lock_op(sbi);
406 404
407 if (new_inode) { 405 if (new_inode) {
408 406
@@ -467,7 +465,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
467 update_inode_page(old_dir); 465 update_inode_page(old_dir);
468 } 466 }
469 467
470 mutex_unlock_op(sbi, ilock); 468 f2fs_unlock_op(sbi);
471 return 0; 469 return 0;
472 470
473put_out_dir: 471put_out_dir:
@@ -477,7 +475,7 @@ out_dir:
477 kunmap(old_dir_page); 475 kunmap(old_dir_page);
478 f2fs_put_page(old_dir_page, 0); 476 f2fs_put_page(old_dir_page, 0);
479 } 477 }
480 mutex_unlock_op(sbi, ilock); 478 f2fs_unlock_op(sbi);
481out_old: 479out_old:
482 kunmap(old_page); 480 kunmap(old_page);
483 f2fs_put_page(old_page, 0); 481 f2fs_put_page(old_page, 0);
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 51ef27894433..4ac4150d421d 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -204,7 +204,7 @@ retry:
204 } 204 }
205 e->ni = *ni; 205 e->ni = *ni;
206 e->checkpointed = true; 206 e->checkpointed = true;
207 BUG_ON(ni->blk_addr == NEW_ADDR); 207 f2fs_bug_on(ni->blk_addr == NEW_ADDR);
208 } else if (new_blkaddr == NEW_ADDR) { 208 } else if (new_blkaddr == NEW_ADDR) {
209 /* 209 /*
210 * when nid is reallocated, 210 * when nid is reallocated,
@@ -212,19 +212,19 @@ retry:
212 * So, reinitialize it with new information. 212 * So, reinitialize it with new information.
213 */ 213 */
214 e->ni = *ni; 214 e->ni = *ni;
215 BUG_ON(ni->blk_addr != NULL_ADDR); 215 f2fs_bug_on(ni->blk_addr != NULL_ADDR);
216 } 216 }
217 217
218 if (new_blkaddr == NEW_ADDR) 218 if (new_blkaddr == NEW_ADDR)
219 e->checkpointed = false; 219 e->checkpointed = false;
220 220
221 /* sanity check */ 221 /* sanity check */
222 BUG_ON(nat_get_blkaddr(e) != ni->blk_addr); 222 f2fs_bug_on(nat_get_blkaddr(e) != ni->blk_addr);
223 BUG_ON(nat_get_blkaddr(e) == NULL_ADDR && 223 f2fs_bug_on(nat_get_blkaddr(e) == NULL_ADDR &&
224 new_blkaddr == NULL_ADDR); 224 new_blkaddr == NULL_ADDR);
225 BUG_ON(nat_get_blkaddr(e) == NEW_ADDR && 225 f2fs_bug_on(nat_get_blkaddr(e) == NEW_ADDR &&
226 new_blkaddr == NEW_ADDR); 226 new_blkaddr == NEW_ADDR);
227 BUG_ON(nat_get_blkaddr(e) != NEW_ADDR && 227 f2fs_bug_on(nat_get_blkaddr(e) != NEW_ADDR &&
228 nat_get_blkaddr(e) != NULL_ADDR && 228 nat_get_blkaddr(e) != NULL_ADDR &&
229 new_blkaddr == NEW_ADDR); 229 new_blkaddr == NEW_ADDR);
230 230
@@ -240,7 +240,7 @@ retry:
240 write_unlock(&nm_i->nat_tree_lock); 240 write_unlock(&nm_i->nat_tree_lock);
241} 241}
242 242
243static int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) 243int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
244{ 244{
245 struct f2fs_nm_info *nm_i = NM_I(sbi); 245 struct f2fs_nm_info *nm_i = NM_I(sbi);
246 246
@@ -495,10 +495,10 @@ static void truncate_node(struct dnode_of_data *dn)
495 495
496 get_node_info(sbi, dn->nid, &ni); 496 get_node_info(sbi, dn->nid, &ni);
497 if (dn->inode->i_blocks == 0) { 497 if (dn->inode->i_blocks == 0) {
498 BUG_ON(ni.blk_addr != NULL_ADDR); 498 f2fs_bug_on(ni.blk_addr != NULL_ADDR);
499 goto invalidate; 499 goto invalidate;
500 } 500 }
501 BUG_ON(ni.blk_addr == NULL_ADDR); 501 f2fs_bug_on(ni.blk_addr == NULL_ADDR);
502 502
503 /* Deallocate node address */ 503 /* Deallocate node address */
504 invalidate_blocks(sbi, ni.blk_addr); 504 invalidate_blocks(sbi, ni.blk_addr);
@@ -822,7 +822,7 @@ int remove_inode_page(struct inode *inode)
822 } 822 }
823 823
824 /* 0 is possible, after f2fs_new_inode() is failed */ 824 /* 0 is possible, after f2fs_new_inode() is failed */
825 BUG_ON(inode->i_blocks != 0 && inode->i_blocks != 1); 825 f2fs_bug_on(inode->i_blocks != 0 && inode->i_blocks != 1);
826 set_new_dnode(&dn, inode, page, page, ino); 826 set_new_dnode(&dn, inode, page, page, ino);
827 truncate_node(&dn); 827 truncate_node(&dn);
828 return 0; 828 return 0;
@@ -863,7 +863,7 @@ struct page *new_node_page(struct dnode_of_data *dn,
863 get_node_info(sbi, dn->nid, &old_ni); 863 get_node_info(sbi, dn->nid, &old_ni);
864 864
865 /* Reinitialize old_ni with new node page */ 865 /* Reinitialize old_ni with new node page */
866 BUG_ON(old_ni.blk_addr != NULL_ADDR); 866 f2fs_bug_on(old_ni.blk_addr != NULL_ADDR);
867 new_ni = old_ni; 867 new_ni = old_ni;
868 new_ni.ino = dn->inode->i_ino; 868 new_ni.ino = dn->inode->i_ino;
869 set_node_addr(sbi, &new_ni, NEW_ADDR); 869 set_node_addr(sbi, &new_ni, NEW_ADDR);
@@ -969,7 +969,7 @@ repeat:
969 goto repeat; 969 goto repeat;
970 } 970 }
971got_it: 971got_it:
972 BUG_ON(nid != nid_of_node(page)); 972 f2fs_bug_on(nid != nid_of_node(page));
973 mark_page_accessed(page); 973 mark_page_accessed(page);
974 return page; 974 return page;
975} 975}
@@ -1148,6 +1148,47 @@ continue_unlock:
1148 return nwritten; 1148 return nwritten;
1149} 1149}
1150 1150
1151int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino)
1152{
1153 struct address_space *mapping = sbi->node_inode->i_mapping;
1154 pgoff_t index = 0, end = LONG_MAX;
1155 struct pagevec pvec;
1156 int nr_pages;
1157 int ret2 = 0, ret = 0;
1158
1159 pagevec_init(&pvec, 0);
1160 while ((index <= end) &&
1161 (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
1162 PAGECACHE_TAG_WRITEBACK,
1163 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1)) != 0) {
1164 unsigned i;
1165
1166 for (i = 0; i < nr_pages; i++) {
1167 struct page *page = pvec.pages[i];
1168
1169 /* until radix tree lookup accepts end_index */
1170 if (page->index > end)
1171 continue;
1172
1173 if (ino && ino_of_node(page) == ino) {
1174 wait_on_page_writeback(page);
1175 if (TestClearPageError(page))
1176 ret = -EIO;
1177 }
1178 }
1179 pagevec_release(&pvec);
1180 cond_resched();
1181 }
1182
1183 if (test_and_clear_bit(AS_ENOSPC, &mapping->flags))
1184 ret2 = -ENOSPC;
1185 if (test_and_clear_bit(AS_EIO, &mapping->flags))
1186 ret2 = -EIO;
1187 if (!ret)
1188 ret = ret2;
1189 return ret;
1190}
1191
1151static int f2fs_write_node_page(struct page *page, 1192static int f2fs_write_node_page(struct page *page,
1152 struct writeback_control *wbc) 1193 struct writeback_control *wbc)
1153{ 1194{
@@ -1156,11 +1197,14 @@ static int f2fs_write_node_page(struct page *page,
1156 block_t new_addr; 1197 block_t new_addr;
1157 struct node_info ni; 1198 struct node_info ni;
1158 1199
1200 if (sbi->por_doing)
1201 goto redirty_out;
1202
1159 wait_on_page_writeback(page); 1203 wait_on_page_writeback(page);
1160 1204
1161 /* get old block addr of this node page */ 1205 /* get old block addr of this node page */
1162 nid = nid_of_node(page); 1206 nid = nid_of_node(page);
1163 BUG_ON(page->index != nid); 1207 f2fs_bug_on(page->index != nid);
1164 1208
1165 get_node_info(sbi, nid, &ni); 1209 get_node_info(sbi, nid, &ni);
1166 1210
@@ -1171,12 +1215,8 @@ static int f2fs_write_node_page(struct page *page,
1171 return 0; 1215 return 0;
1172 } 1216 }
1173 1217
1174 if (wbc->for_reclaim) { 1218 if (wbc->for_reclaim)
1175 dec_page_count(sbi, F2FS_DIRTY_NODES); 1219 goto redirty_out;
1176 wbc->pages_skipped++;
1177 set_page_dirty(page);
1178 return AOP_WRITEPAGE_ACTIVATE;
1179 }
1180 1220
1181 mutex_lock(&sbi->node_write); 1221 mutex_lock(&sbi->node_write);
1182 set_page_writeback(page); 1222 set_page_writeback(page);
@@ -1186,6 +1226,12 @@ static int f2fs_write_node_page(struct page *page,
1186 mutex_unlock(&sbi->node_write); 1226 mutex_unlock(&sbi->node_write);
1187 unlock_page(page); 1227 unlock_page(page);
1188 return 0; 1228 return 0;
1229
1230redirty_out:
1231 dec_page_count(sbi, F2FS_DIRTY_NODES);
1232 wbc->pages_skipped++;
1233 set_page_dirty(page);
1234 return AOP_WRITEPAGE_ACTIVATE;
1189} 1235}
1190 1236
1191/* 1237/*
@@ -1200,11 +1246,8 @@ static int f2fs_write_node_pages(struct address_space *mapping,
1200 struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); 1246 struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
1201 long nr_to_write = wbc->nr_to_write; 1247 long nr_to_write = wbc->nr_to_write;
1202 1248
1203 /* First check balancing cached NAT entries */ 1249 /* balancing f2fs's metadata in background */
1204 if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK)) { 1250 f2fs_balance_fs_bg(sbi);
1205 f2fs_sync_fs(sbi->sb, true);
1206 return 0;
1207 }
1208 1251
1209 /* collect a number of dirty node pages and write together */ 1252 /* collect a number of dirty node pages and write together */
1210 if (get_pages(sbi, F2FS_DIRTY_NODES) < COLLECT_DIRTY_NODES) 1253 if (get_pages(sbi, F2FS_DIRTY_NODES) < COLLECT_DIRTY_NODES)
@@ -1223,6 +1266,8 @@ static int f2fs_set_node_page_dirty(struct page *page)
1223 struct address_space *mapping = page->mapping; 1266 struct address_space *mapping = page->mapping;
1224 struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); 1267 struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
1225 1268
1269 trace_f2fs_set_page_dirty(page, NODE);
1270
1226 SetPageUptodate(page); 1271 SetPageUptodate(page);
1227 if (!PageDirty(page)) { 1272 if (!PageDirty(page)) {
1228 __set_page_dirty_nobuffers(page); 1273 __set_page_dirty_nobuffers(page);
@@ -1291,23 +1336,18 @@ static int add_free_nid(struct f2fs_nm_info *nm_i, nid_t nid, bool build)
1291 if (nid == 0) 1336 if (nid == 0)
1292 return 0; 1337 return 0;
1293 1338
1294 if (!build) 1339 if (build) {
1295 goto retry; 1340 /* do not add allocated nids */
1296 1341 read_lock(&nm_i->nat_tree_lock);
1297 /* do not add allocated nids */ 1342 ne = __lookup_nat_cache(nm_i, nid);
1298 read_lock(&nm_i->nat_tree_lock); 1343 if (ne && nat_get_blkaddr(ne) != NULL_ADDR)
1299 ne = __lookup_nat_cache(nm_i, nid); 1344 allocated = true;
1300 if (ne && nat_get_blkaddr(ne) != NULL_ADDR) 1345 read_unlock(&nm_i->nat_tree_lock);
1301 allocated = true; 1346 if (allocated)
1302 read_unlock(&nm_i->nat_tree_lock); 1347 return 0;
1303 if (allocated)
1304 return 0;
1305retry:
1306 i = kmem_cache_alloc(free_nid_slab, GFP_NOFS);
1307 if (!i) {
1308 cond_resched();
1309 goto retry;
1310 } 1348 }
1349
1350 i = f2fs_kmem_cache_alloc(free_nid_slab, GFP_NOFS);
1311 i->nid = nid; 1351 i->nid = nid;
1312 i->state = NID_NEW; 1352 i->state = NID_NEW;
1313 1353
@@ -1350,7 +1390,7 @@ static void scan_nat_page(struct f2fs_nm_info *nm_i,
1350 break; 1390 break;
1351 1391
1352 blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr); 1392 blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr);
1353 BUG_ON(blk_addr == NEW_ADDR); 1393 f2fs_bug_on(blk_addr == NEW_ADDR);
1354 if (blk_addr == NULL_ADDR) { 1394 if (blk_addr == NULL_ADDR) {
1355 if (add_free_nid(nm_i, start_nid, true) < 0) 1395 if (add_free_nid(nm_i, start_nid, true) < 0)
1356 break; 1396 break;
@@ -1421,14 +1461,14 @@ retry:
1421 1461
1422 /* We should not use stale free nids created by build_free_nids */ 1462 /* We should not use stale free nids created by build_free_nids */
1423 if (nm_i->fcnt && !sbi->on_build_free_nids) { 1463 if (nm_i->fcnt && !sbi->on_build_free_nids) {
1424 BUG_ON(list_empty(&nm_i->free_nid_list)); 1464 f2fs_bug_on(list_empty(&nm_i->free_nid_list));
1425 list_for_each(this, &nm_i->free_nid_list) { 1465 list_for_each(this, &nm_i->free_nid_list) {
1426 i = list_entry(this, struct free_nid, list); 1466 i = list_entry(this, struct free_nid, list);
1427 if (i->state == NID_NEW) 1467 if (i->state == NID_NEW)
1428 break; 1468 break;
1429 } 1469 }
1430 1470
1431 BUG_ON(i->state != NID_NEW); 1471 f2fs_bug_on(i->state != NID_NEW);
1432 *nid = i->nid; 1472 *nid = i->nid;
1433 i->state = NID_ALLOC; 1473 i->state = NID_ALLOC;
1434 nm_i->fcnt--; 1474 nm_i->fcnt--;
@@ -1439,9 +1479,9 @@ retry:
1439 1479
1440 /* Let's scan nat pages and its caches to get free nids */ 1480 /* Let's scan nat pages and its caches to get free nids */
1441 mutex_lock(&nm_i->build_lock); 1481 mutex_lock(&nm_i->build_lock);
1442 sbi->on_build_free_nids = 1; 1482 sbi->on_build_free_nids = true;
1443 build_free_nids(sbi); 1483 build_free_nids(sbi);
1444 sbi->on_build_free_nids = 0; 1484 sbi->on_build_free_nids = false;
1445 mutex_unlock(&nm_i->build_lock); 1485 mutex_unlock(&nm_i->build_lock);
1446 goto retry; 1486 goto retry;
1447} 1487}
@@ -1456,7 +1496,7 @@ void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid)
1456 1496
1457 spin_lock(&nm_i->free_nid_list_lock); 1497 spin_lock(&nm_i->free_nid_list_lock);
1458 i = __lookup_free_nid_list(nid, &nm_i->free_nid_list); 1498 i = __lookup_free_nid_list(nid, &nm_i->free_nid_list);
1459 BUG_ON(!i || i->state != NID_ALLOC); 1499 f2fs_bug_on(!i || i->state != NID_ALLOC);
1460 __del_from_free_nid_list(i); 1500 __del_from_free_nid_list(i);
1461 spin_unlock(&nm_i->free_nid_list_lock); 1501 spin_unlock(&nm_i->free_nid_list_lock);
1462} 1502}
@@ -1474,7 +1514,7 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
1474 1514
1475 spin_lock(&nm_i->free_nid_list_lock); 1515 spin_lock(&nm_i->free_nid_list_lock);
1476 i = __lookup_free_nid_list(nid, &nm_i->free_nid_list); 1516 i = __lookup_free_nid_list(nid, &nm_i->free_nid_list);
1477 BUG_ON(!i || i->state != NID_ALLOC); 1517 f2fs_bug_on(!i || i->state != NID_ALLOC);
1478 if (nm_i->fcnt > 2 * MAX_FREE_NIDS) { 1518 if (nm_i->fcnt > 2 * MAX_FREE_NIDS) {
1479 __del_from_free_nid_list(i); 1519 __del_from_free_nid_list(i);
1480 } else { 1520 } else {
@@ -1677,7 +1717,7 @@ to_nat_page:
1677 nat_blk = page_address(page); 1717 nat_blk = page_address(page);
1678 } 1718 }
1679 1719
1680 BUG_ON(!nat_blk); 1720 f2fs_bug_on(!nat_blk);
1681 raw_ne = nat_blk->entries[nid - start_nid]; 1721 raw_ne = nat_blk->entries[nid - start_nid];
1682flush_now: 1722flush_now:
1683 new_blkaddr = nat_get_blkaddr(ne); 1723 new_blkaddr = nat_get_blkaddr(ne);
@@ -1781,11 +1821,11 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
1781 /* destroy free nid list */ 1821 /* destroy free nid list */
1782 spin_lock(&nm_i->free_nid_list_lock); 1822 spin_lock(&nm_i->free_nid_list_lock);
1783 list_for_each_entry_safe(i, next_i, &nm_i->free_nid_list, list) { 1823 list_for_each_entry_safe(i, next_i, &nm_i->free_nid_list, list) {
1784 BUG_ON(i->state == NID_ALLOC); 1824 f2fs_bug_on(i->state == NID_ALLOC);
1785 __del_from_free_nid_list(i); 1825 __del_from_free_nid_list(i);
1786 nm_i->fcnt--; 1826 nm_i->fcnt--;
1787 } 1827 }
1788 BUG_ON(nm_i->fcnt); 1828 f2fs_bug_on(nm_i->fcnt);
1789 spin_unlock(&nm_i->free_nid_list_lock); 1829 spin_unlock(&nm_i->free_nid_list_lock);
1790 1830
1791 /* destroy nat cache */ 1831 /* destroy nat cache */
@@ -1799,7 +1839,7 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
1799 __del_from_nat_cache(nm_i, e); 1839 __del_from_nat_cache(nm_i, e);
1800 } 1840 }
1801 } 1841 }
1802 BUG_ON(nm_i->nat_cnt); 1842 f2fs_bug_on(nm_i->nat_cnt);
1803 write_unlock(&nm_i->nat_tree_lock); 1843 write_unlock(&nm_i->nat_tree_lock);
1804 1844
1805 kfree(nm_i->nat_bitmap); 1845 kfree(nm_i->nat_bitmap);
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 51ef5eec33d7..fdc81161f254 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -64,24 +64,31 @@ static int recover_dentry(struct page *ipage, struct inode *inode)
64 name.name = raw_inode->i_name; 64 name.name = raw_inode->i_name;
65retry: 65retry:
66 de = f2fs_find_entry(dir, &name, &page); 66 de = f2fs_find_entry(dir, &name, &page);
67 if (de && inode->i_ino == le32_to_cpu(de->ino)) { 67 if (de && inode->i_ino == le32_to_cpu(de->ino))
68 kunmap(page); 68 goto out_unmap_put;
69 f2fs_put_page(page, 0);
70 goto out;
71 }
72 if (de) { 69 if (de) {
73 einode = f2fs_iget(inode->i_sb, le32_to_cpu(de->ino)); 70 einode = f2fs_iget(inode->i_sb, le32_to_cpu(de->ino));
74 if (IS_ERR(einode)) { 71 if (IS_ERR(einode)) {
75 WARN_ON(1); 72 WARN_ON(1);
76 if (PTR_ERR(einode) == -ENOENT) 73 if (PTR_ERR(einode) == -ENOENT)
77 err = -EEXIST; 74 err = -EEXIST;
78 goto out; 75 goto out_unmap_put;
76 }
77 err = acquire_orphan_inode(F2FS_SB(inode->i_sb));
78 if (err) {
79 iput(einode);
80 goto out_unmap_put;
79 } 81 }
80 f2fs_delete_entry(de, page, einode); 82 f2fs_delete_entry(de, page, einode);
81 iput(einode); 83 iput(einode);
82 goto retry; 84 goto retry;
83 } 85 }
84 err = __f2fs_add_link(dir, &name, inode); 86 err = __f2fs_add_link(dir, &name, inode);
87 goto out;
88
89out_unmap_put:
90 kunmap(page);
91 f2fs_put_page(page, 0);
85out: 92out:
86 f2fs_msg(inode->i_sb, KERN_NOTICE, "recover_inode and its dentry: " 93 f2fs_msg(inode->i_sb, KERN_NOTICE, "recover_inode and its dentry: "
87 "ino = %x, name = %s, dir = %lx, err = %d", 94 "ino = %x, name = %s, dir = %lx, err = %d",
@@ -285,7 +292,6 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
285 struct f2fs_summary sum; 292 struct f2fs_summary sum;
286 struct node_info ni; 293 struct node_info ni;
287 int err = 0, recovered = 0; 294 int err = 0, recovered = 0;
288 int ilock;
289 295
290 start = start_bidx_of_node(ofs_of_node(page), fi); 296 start = start_bidx_of_node(ofs_of_node(page), fi);
291 if (IS_INODE(page)) 297 if (IS_INODE(page))
@@ -293,20 +299,20 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
293 else 299 else
294 end = start + ADDRS_PER_BLOCK; 300 end = start + ADDRS_PER_BLOCK;
295 301
296 ilock = mutex_lock_op(sbi); 302 f2fs_lock_op(sbi);
297 set_new_dnode(&dn, inode, NULL, NULL, 0); 303 set_new_dnode(&dn, inode, NULL, NULL, 0);
298 304
299 err = get_dnode_of_data(&dn, start, ALLOC_NODE); 305 err = get_dnode_of_data(&dn, start, ALLOC_NODE);
300 if (err) { 306 if (err) {
301 mutex_unlock_op(sbi, ilock); 307 f2fs_unlock_op(sbi);
302 return err; 308 return err;
303 } 309 }
304 310
305 wait_on_page_writeback(dn.node_page); 311 wait_on_page_writeback(dn.node_page);
306 312
307 get_node_info(sbi, dn.nid, &ni); 313 get_node_info(sbi, dn.nid, &ni);
308 BUG_ON(ni.ino != ino_of_node(page)); 314 f2fs_bug_on(ni.ino != ino_of_node(page));
309 BUG_ON(ofs_of_node(dn.node_page) != ofs_of_node(page)); 315 f2fs_bug_on(ofs_of_node(dn.node_page) != ofs_of_node(page));
310 316
311 for (; start < end; start++) { 317 for (; start < end; start++) {
312 block_t src, dest; 318 block_t src, dest;
@@ -316,9 +322,9 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
316 322
317 if (src != dest && dest != NEW_ADDR && dest != NULL_ADDR) { 323 if (src != dest && dest != NEW_ADDR && dest != NULL_ADDR) {
318 if (src == NULL_ADDR) { 324 if (src == NULL_ADDR) {
319 int err = reserve_new_block(&dn); 325 err = reserve_new_block(&dn);
320 /* We should not get -ENOSPC */ 326 /* We should not get -ENOSPC */
321 BUG_ON(err); 327 f2fs_bug_on(err);
322 } 328 }
323 329
324 /* Check the previous node page having this index */ 330 /* Check the previous node page having this index */
@@ -349,7 +355,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
349 recover_node_page(sbi, dn.node_page, &sum, &ni, blkaddr); 355 recover_node_page(sbi, dn.node_page, &sum, &ni, blkaddr);
350err: 356err:
351 f2fs_put_dnode(&dn); 357 f2fs_put_dnode(&dn);
352 mutex_unlock_op(sbi, ilock); 358 f2fs_unlock_op(sbi);
353 359
354 f2fs_msg(sbi->sb, KERN_NOTICE, "recover_data: ino = %lx, " 360 f2fs_msg(sbi->sb, KERN_NOTICE, "recover_data: ino = %lx, "
355 "recovered_data = %d blocks, err = %d", 361 "recovered_data = %d blocks, err = %d",
@@ -419,6 +425,7 @@ int recover_fsync_data(struct f2fs_sb_info *sbi)
419{ 425{
420 struct list_head inode_list; 426 struct list_head inode_list;
421 int err; 427 int err;
428 bool need_writecp = false;
422 429
423 fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry", 430 fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry",
424 sizeof(struct fsync_inode_entry), NULL); 431 sizeof(struct fsync_inode_entry), NULL);
@@ -428,7 +435,7 @@ int recover_fsync_data(struct f2fs_sb_info *sbi)
428 INIT_LIST_HEAD(&inode_list); 435 INIT_LIST_HEAD(&inode_list);
429 436
430 /* step #1: find fsynced inode numbers */ 437 /* step #1: find fsynced inode numbers */
431 sbi->por_doing = 1; 438 sbi->por_doing = true;
432 err = find_fsync_dnodes(sbi, &inode_list); 439 err = find_fsync_dnodes(sbi, &inode_list);
433 if (err) 440 if (err)
434 goto out; 441 goto out;
@@ -436,14 +443,16 @@ int recover_fsync_data(struct f2fs_sb_info *sbi)
436 if (list_empty(&inode_list)) 443 if (list_empty(&inode_list))
437 goto out; 444 goto out;
438 445
446 need_writecp = true;
447
439 /* step #2: recover data */ 448 /* step #2: recover data */
440 err = recover_data(sbi, &inode_list, CURSEG_WARM_NODE); 449 err = recover_data(sbi, &inode_list, CURSEG_WARM_NODE);
441 BUG_ON(!list_empty(&inode_list)); 450 f2fs_bug_on(!list_empty(&inode_list));
442out: 451out:
443 destroy_fsync_dnodes(&inode_list); 452 destroy_fsync_dnodes(&inode_list);
444 kmem_cache_destroy(fsync_entry_slab); 453 kmem_cache_destroy(fsync_entry_slab);
445 sbi->por_doing = 0; 454 sbi->por_doing = false;
446 if (!err) 455 if (!err && need_writecp)
447 write_checkpoint(sbi, false); 456 write_checkpoint(sbi, false);
448 return err; 457 return err;
449} 458}
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 09af9c7b0f52..fa284d397199 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -36,6 +36,14 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi)
36 } 36 }
37} 37}
38 38
39void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
40{
41 /* check the # of cached NAT entries and prefree segments */
42 if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK) ||
43 excess_prefree_segs(sbi))
44 f2fs_sync_fs(sbi->sb, true);
45}
46
39static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, 47static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
40 enum dirty_type dirty_type) 48 enum dirty_type dirty_type)
41{ 49{
@@ -50,20 +58,10 @@ static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
50 58
51 if (dirty_type == DIRTY) { 59 if (dirty_type == DIRTY) {
52 struct seg_entry *sentry = get_seg_entry(sbi, segno); 60 struct seg_entry *sentry = get_seg_entry(sbi, segno);
53 enum dirty_type t = DIRTY_HOT_DATA; 61 enum dirty_type t = sentry->type;
54
55 dirty_type = sentry->type;
56
57 if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type]))
58 dirty_i->nr_dirty[dirty_type]++;
59 62
60 /* Only one bitmap should be set */ 63 if (!test_and_set_bit(segno, dirty_i->dirty_segmap[t]))
61 for (; t <= DIRTY_COLD_NODE; t++) { 64 dirty_i->nr_dirty[t]++;
62 if (t == dirty_type)
63 continue;
64 if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t]))
65 dirty_i->nr_dirty[t]--;
66 }
67 } 65 }
68} 66}
69 67
@@ -76,12 +74,11 @@ static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
76 dirty_i->nr_dirty[dirty_type]--; 74 dirty_i->nr_dirty[dirty_type]--;
77 75
78 if (dirty_type == DIRTY) { 76 if (dirty_type == DIRTY) {
79 enum dirty_type t = DIRTY_HOT_DATA; 77 struct seg_entry *sentry = get_seg_entry(sbi, segno);
78 enum dirty_type t = sentry->type;
80 79
81 /* clear all the bitmaps */ 80 if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t]))
82 for (; t <= DIRTY_COLD_NODE; t++) 81 dirty_i->nr_dirty[t]--;
83 if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t]))
84 dirty_i->nr_dirty[t]--;
85 82
86 if (get_valid_blocks(sbi, segno, sbi->segs_per_sec) == 0) 83 if (get_valid_blocks(sbi, segno, sbi->segs_per_sec) == 0)
87 clear_bit(GET_SECNO(sbi, segno), 84 clear_bit(GET_SECNO(sbi, segno),
@@ -142,27 +139,33 @@ static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
142void clear_prefree_segments(struct f2fs_sb_info *sbi) 139void clear_prefree_segments(struct f2fs_sb_info *sbi)
143{ 140{
144 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 141 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
145 unsigned int segno = -1; 142 unsigned long *prefree_map = dirty_i->dirty_segmap[PRE];
146 unsigned int total_segs = TOTAL_SEGS(sbi); 143 unsigned int total_segs = TOTAL_SEGS(sbi);
144 unsigned int start = 0, end = -1;
147 145
148 mutex_lock(&dirty_i->seglist_lock); 146 mutex_lock(&dirty_i->seglist_lock);
147
149 while (1) { 148 while (1) {
150 segno = find_next_bit(dirty_i->dirty_segmap[PRE], total_segs, 149 int i;
151 segno + 1); 150 start = find_next_bit(prefree_map, total_segs, end + 1);
152 if (segno >= total_segs) 151 if (start >= total_segs)
153 break; 152 break;
153 end = find_next_zero_bit(prefree_map, total_segs, start + 1);
154
155 for (i = start; i < end; i++)
156 clear_bit(i, prefree_map);
154 157
155 if (test_and_clear_bit(segno, dirty_i->dirty_segmap[PRE])) 158 dirty_i->nr_dirty[PRE] -= end - start;
156 dirty_i->nr_dirty[PRE]--; 159
157 160 if (!test_opt(sbi, DISCARD))
158 /* Let's use trim */ 161 continue;
159 if (test_opt(sbi, DISCARD)) 162
160 blkdev_issue_discard(sbi->sb->s_bdev, 163 blkdev_issue_discard(sbi->sb->s_bdev,
161 START_BLOCK(sbi, segno) << 164 START_BLOCK(sbi, start) <<
162 sbi->log_sectors_per_block, 165 sbi->log_sectors_per_block,
163 1 << (sbi->log_sectors_per_block + 166 (1 << (sbi->log_sectors_per_block +
164 sbi->log_blocks_per_seg), 167 sbi->log_blocks_per_seg)) * (end - start),
165 GFP_NOFS, 0); 168 GFP_NOFS, 0);
166 } 169 }
167 mutex_unlock(&dirty_i->seglist_lock); 170 mutex_unlock(&dirty_i->seglist_lock);
168} 171}
@@ -195,7 +198,7 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
195 new_vblocks = se->valid_blocks + del; 198 new_vblocks = se->valid_blocks + del;
196 offset = GET_SEGOFF_FROM_SEG0(sbi, blkaddr) & (sbi->blocks_per_seg - 1); 199 offset = GET_SEGOFF_FROM_SEG0(sbi, blkaddr) & (sbi->blocks_per_seg - 1);
197 200
198 BUG_ON((new_vblocks >> (sizeof(unsigned short) << 3) || 201 f2fs_bug_on((new_vblocks >> (sizeof(unsigned short) << 3) ||
199 (new_vblocks > sbi->blocks_per_seg))); 202 (new_vblocks > sbi->blocks_per_seg)));
200 203
201 se->valid_blocks = new_vblocks; 204 se->valid_blocks = new_vblocks;
@@ -235,7 +238,7 @@ void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
235 unsigned int segno = GET_SEGNO(sbi, addr); 238 unsigned int segno = GET_SEGNO(sbi, addr);
236 struct sit_info *sit_i = SIT_I(sbi); 239 struct sit_info *sit_i = SIT_I(sbi);
237 240
238 BUG_ON(addr == NULL_ADDR); 241 f2fs_bug_on(addr == NULL_ADDR);
239 if (addr == NEW_ADDR) 242 if (addr == NEW_ADDR)
240 return; 243 return;
241 244
@@ -267,9 +270,8 @@ static void __add_sum_entry(struct f2fs_sb_info *sbi, int type,
267 */ 270 */
268int npages_for_summary_flush(struct f2fs_sb_info *sbi) 271int npages_for_summary_flush(struct f2fs_sb_info *sbi)
269{ 272{
270 int total_size_bytes = 0;
271 int valid_sum_count = 0; 273 int valid_sum_count = 0;
272 int i, sum_space; 274 int i, sum_in_page;
273 275
274 for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { 276 for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
275 if (sbi->ckpt->alloc_type[i] == SSR) 277 if (sbi->ckpt->alloc_type[i] == SSR)
@@ -278,13 +280,12 @@ int npages_for_summary_flush(struct f2fs_sb_info *sbi)
278 valid_sum_count += curseg_blkoff(sbi, i); 280 valid_sum_count += curseg_blkoff(sbi, i);
279 } 281 }
280 282
281 total_size_bytes = valid_sum_count * (SUMMARY_SIZE + 1) 283 sum_in_page = (PAGE_CACHE_SIZE - 2 * SUM_JOURNAL_SIZE -
282 + sizeof(struct nat_journal) + 2 284 SUM_FOOTER_SIZE) / SUMMARY_SIZE;
283 + sizeof(struct sit_journal) + 2; 285 if (valid_sum_count <= sum_in_page)
284 sum_space = PAGE_CACHE_SIZE - SUM_FOOTER_SIZE;
285 if (total_size_bytes < sum_space)
286 return 1; 286 return 1;
287 else if (total_size_bytes < 2 * sum_space) 287 else if ((valid_sum_count - sum_in_page) <=
288 (PAGE_CACHE_SIZE - SUM_FOOTER_SIZE) / SUMMARY_SIZE)
288 return 2; 289 return 2;
289 return 3; 290 return 3;
290} 291}
@@ -350,7 +351,7 @@ find_other_zone:
350 if (dir == ALLOC_RIGHT) { 351 if (dir == ALLOC_RIGHT) {
351 secno = find_next_zero_bit(free_i->free_secmap, 352 secno = find_next_zero_bit(free_i->free_secmap,
352 TOTAL_SECS(sbi), 0); 353 TOTAL_SECS(sbi), 0);
353 BUG_ON(secno >= TOTAL_SECS(sbi)); 354 f2fs_bug_on(secno >= TOTAL_SECS(sbi));
354 } else { 355 } else {
355 go_left = 1; 356 go_left = 1;
356 left_start = hint - 1; 357 left_start = hint - 1;
@@ -366,7 +367,7 @@ find_other_zone:
366 } 367 }
367 left_start = find_next_zero_bit(free_i->free_secmap, 368 left_start = find_next_zero_bit(free_i->free_secmap,
368 TOTAL_SECS(sbi), 0); 369 TOTAL_SECS(sbi), 0);
369 BUG_ON(left_start >= TOTAL_SECS(sbi)); 370 f2fs_bug_on(left_start >= TOTAL_SECS(sbi));
370 break; 371 break;
371 } 372 }
372 secno = left_start; 373 secno = left_start;
@@ -405,7 +406,7 @@ skip_left:
405 } 406 }
406got_it: 407got_it:
407 /* set it as dirty segment in free segmap */ 408 /* set it as dirty segment in free segmap */
408 BUG_ON(test_bit(segno, free_i->free_segmap)); 409 f2fs_bug_on(test_bit(segno, free_i->free_segmap));
409 __set_inuse(sbi, segno); 410 __set_inuse(sbi, segno);
410 *newseg = segno; 411 *newseg = segno;
411 write_unlock(&free_i->segmap_lock); 412 write_unlock(&free_i->segmap_lock);
@@ -550,9 +551,8 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi,
550 change_curseg(sbi, type, true); 551 change_curseg(sbi, type, true);
551 else 552 else
552 new_curseg(sbi, type, false); 553 new_curseg(sbi, type, false);
553#ifdef CONFIG_F2FS_STAT_FS 554
554 sbi->segment_count[curseg->alloc_type]++; 555 stat_inc_seg_type(sbi, curseg);
555#endif
556} 556}
557 557
558void allocate_new_segments(struct f2fs_sb_info *sbi) 558void allocate_new_segments(struct f2fs_sb_info *sbi)
@@ -597,6 +597,11 @@ static void f2fs_end_io_write(struct bio *bio, int err)
597 597
598 if (p->is_sync) 598 if (p->is_sync)
599 complete(p->wait); 599 complete(p->wait);
600
601 if (!get_pages(p->sbi, F2FS_WRITEBACK) &&
602 !list_empty(&p->sbi->cp_wait.task_list))
603 wake_up(&p->sbi->cp_wait);
604
600 kfree(p); 605 kfree(p);
601 bio_put(bio); 606 bio_put(bio);
602} 607}
@@ -657,6 +662,7 @@ static void submit_write_page(struct f2fs_sb_info *sbi, struct page *page,
657 block_t blk_addr, enum page_type type) 662 block_t blk_addr, enum page_type type)
658{ 663{
659 struct block_device *bdev = sbi->sb->s_bdev; 664 struct block_device *bdev = sbi->sb->s_bdev;
665 int bio_blocks;
660 666
661 verify_block_addr(sbi, blk_addr); 667 verify_block_addr(sbi, blk_addr);
662 668
@@ -676,7 +682,8 @@ retry:
676 goto retry; 682 goto retry;
677 } 683 }
678 684
679 sbi->bio[type] = f2fs_bio_alloc(bdev, max_hw_blocks(sbi)); 685 bio_blocks = MAX_BIO_BLOCKS(max_hw_blocks(sbi));
686 sbi->bio[type] = f2fs_bio_alloc(bdev, bio_blocks);
680 sbi->bio[type]->bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr); 687 sbi->bio[type]->bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr);
681 sbi->bio[type]->bi_private = priv; 688 sbi->bio[type]->bi_private = priv;
682 /* 689 /*
@@ -771,7 +778,7 @@ static int __get_segment_type(struct page *page, enum page_type p_type)
771 return __get_segment_type_4(page, p_type); 778 return __get_segment_type_4(page, p_type);
772 } 779 }
773 /* NR_CURSEG_TYPE(6) logs by default */ 780 /* NR_CURSEG_TYPE(6) logs by default */
774 BUG_ON(sbi->active_logs != NR_CURSEG_TYPE); 781 f2fs_bug_on(sbi->active_logs != NR_CURSEG_TYPE);
775 return __get_segment_type_6(page, p_type); 782 return __get_segment_type_6(page, p_type);
776} 783}
777 784
@@ -801,9 +808,8 @@ static void do_write_page(struct f2fs_sb_info *sbi, struct page *page,
801 808
802 mutex_lock(&sit_i->sentry_lock); 809 mutex_lock(&sit_i->sentry_lock);
803 __refresh_next_blkoff(sbi, curseg); 810 __refresh_next_blkoff(sbi, curseg);
804#ifdef CONFIG_F2FS_STAT_FS 811
805 sbi->block_count[curseg->alloc_type]++; 812 stat_inc_block_count(sbi, curseg);
806#endif
807 813
808 /* 814 /*
809 * SIT information should be updated before segment allocation, 815 * SIT information should be updated before segment allocation,
@@ -849,7 +855,7 @@ void write_data_page(struct inode *inode, struct page *page,
849 struct f2fs_summary sum; 855 struct f2fs_summary sum;
850 struct node_info ni; 856 struct node_info ni;
851 857
852 BUG_ON(old_blkaddr == NULL_ADDR); 858 f2fs_bug_on(old_blkaddr == NULL_ADDR);
853 get_node_info(sbi, dn->nid, &ni); 859 get_node_info(sbi, dn->nid, &ni);
854 set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); 860 set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
855 861
@@ -1122,8 +1128,6 @@ static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr)
1122 SUM_JOURNAL_SIZE); 1128 SUM_JOURNAL_SIZE);
1123 written_size += SUM_JOURNAL_SIZE; 1129 written_size += SUM_JOURNAL_SIZE;
1124 1130
1125 set_page_dirty(page);
1126
1127 /* Step 3: write summary entries */ 1131 /* Step 3: write summary entries */
1128 for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { 1132 for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
1129 unsigned short blkoff; 1133 unsigned short blkoff;
@@ -1142,18 +1146,20 @@ static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr)
1142 summary = (struct f2fs_summary *)(kaddr + written_size); 1146 summary = (struct f2fs_summary *)(kaddr + written_size);
1143 *summary = seg_i->sum_blk->entries[j]; 1147 *summary = seg_i->sum_blk->entries[j];
1144 written_size += SUMMARY_SIZE; 1148 written_size += SUMMARY_SIZE;
1145 set_page_dirty(page);
1146 1149
1147 if (written_size + SUMMARY_SIZE <= PAGE_CACHE_SIZE - 1150 if (written_size + SUMMARY_SIZE <= PAGE_CACHE_SIZE -
1148 SUM_FOOTER_SIZE) 1151 SUM_FOOTER_SIZE)
1149 continue; 1152 continue;
1150 1153
1154 set_page_dirty(page);
1151 f2fs_put_page(page, 1); 1155 f2fs_put_page(page, 1);
1152 page = NULL; 1156 page = NULL;
1153 } 1157 }
1154 } 1158 }
1155 if (page) 1159 if (page) {
1160 set_page_dirty(page);
1156 f2fs_put_page(page, 1); 1161 f2fs_put_page(page, 1);
1162 }
1157} 1163}
1158 1164
1159static void write_normal_summaries(struct f2fs_sb_info *sbi, 1165static void write_normal_summaries(struct f2fs_sb_info *sbi,
@@ -1239,7 +1245,7 @@ static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
1239 /* get current sit block page without lock */ 1245 /* get current sit block page without lock */
1240 src_page = get_meta_page(sbi, src_off); 1246 src_page = get_meta_page(sbi, src_off);
1241 dst_page = grab_meta_page(sbi, dst_off); 1247 dst_page = grab_meta_page(sbi, dst_off);
1242 BUG_ON(PageDirty(src_page)); 1248 f2fs_bug_on(PageDirty(src_page));
1243 1249
1244 src_addr = page_address(src_page); 1250 src_addr = page_address(src_page);
1245 dst_addr = page_address(dst_page); 1251 dst_addr = page_address(dst_page);
@@ -1271,9 +1277,9 @@ static bool flush_sits_in_journal(struct f2fs_sb_info *sbi)
1271 __mark_sit_entry_dirty(sbi, segno); 1277 __mark_sit_entry_dirty(sbi, segno);
1272 } 1278 }
1273 update_sits_in_cursum(sum, -sits_in_cursum(sum)); 1279 update_sits_in_cursum(sum, -sits_in_cursum(sum));
1274 return 1; 1280 return true;
1275 } 1281 }
1276 return 0; 1282 return false;
1277} 1283}
1278 1284
1279/* 1285/*
@@ -1637,6 +1643,7 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
1637 sm_info->ovp_segments = le32_to_cpu(ckpt->overprov_segment_count); 1643 sm_info->ovp_segments = le32_to_cpu(ckpt->overprov_segment_count);
1638 sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main); 1644 sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main);
1639 sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr); 1645 sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr);
1646 sm_info->rec_prefree_segments = DEF_RECLAIM_PREFREE_SEGMENTS;
1640 1647
1641 err = build_sit_info(sbi); 1648 err = build_sit_info(sbi);
1642 if (err) 1649 if (err)
@@ -1744,6 +1751,8 @@ static void destroy_sit_info(struct f2fs_sb_info *sbi)
1744void destroy_segment_manager(struct f2fs_sb_info *sbi) 1751void destroy_segment_manager(struct f2fs_sb_info *sbi)
1745{ 1752{
1746 struct f2fs_sm_info *sm_info = SM_I(sbi); 1753 struct f2fs_sm_info *sm_info = SM_I(sbi);
1754 if (!sm_info)
1755 return;
1747 destroy_dirty_segmap(sbi); 1756 destroy_dirty_segmap(sbi);
1748 destroy_curseg(sbi); 1757 destroy_curseg(sbi);
1749 destroy_free_segmap(sbi); 1758 destroy_free_segmap(sbi);
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index bdd10eab8c40..269f690b4e24 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -14,6 +14,8 @@
14#define NULL_SEGNO ((unsigned int)(~0)) 14#define NULL_SEGNO ((unsigned int)(~0))
15#define NULL_SECNO ((unsigned int)(~0)) 15#define NULL_SECNO ((unsigned int)(~0))
16 16
17#define DEF_RECLAIM_PREFREE_SEGMENTS 100 /* 200MB of prefree segments */
18
17/* L: Logical segment # in volume, R: Relative segment # in main area */ 19/* L: Logical segment # in volume, R: Relative segment # in main area */
18#define GET_L2R_SEGNO(free_i, segno) (segno - free_i->start_segno) 20#define GET_L2R_SEGNO(free_i, segno) (segno - free_i->start_segno)
19#define GET_R2L_SEGNO(free_i, segno) (segno + free_i->start_segno) 21#define GET_R2L_SEGNO(free_i, segno) (segno + free_i->start_segno)
@@ -90,6 +92,8 @@
90 (blk_addr << ((sbi)->log_blocksize - F2FS_LOG_SECTOR_SIZE)) 92 (blk_addr << ((sbi)->log_blocksize - F2FS_LOG_SECTOR_SIZE))
91#define SECTOR_TO_BLOCK(sbi, sectors) \ 93#define SECTOR_TO_BLOCK(sbi, sectors) \
92 (sectors >> ((sbi)->log_blocksize - F2FS_LOG_SECTOR_SIZE)) 94 (sectors >> ((sbi)->log_blocksize - F2FS_LOG_SECTOR_SIZE))
95#define MAX_BIO_BLOCKS(max_hw_blocks) \
96 (min((int)max_hw_blocks, BIO_MAX_PAGES))
93 97
94/* during checkpoint, bio_private is used to synchronize the last bio */ 98/* during checkpoint, bio_private is used to synchronize the last bio */
95struct bio_private { 99struct bio_private {
@@ -470,6 +474,11 @@ static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, int freed)
470 reserved_sections(sbi))); 474 reserved_sections(sbi)));
471} 475}
472 476
477static inline bool excess_prefree_segs(struct f2fs_sb_info *sbi)
478{
479 return (prefree_segments(sbi) > SM_I(sbi)->rec_prefree_segments);
480}
481
473static inline int utilization(struct f2fs_sb_info *sbi) 482static inline int utilization(struct f2fs_sb_info *sbi)
474{ 483{
475 return div_u64((u64)valid_user_blocks(sbi) * 100, sbi->user_block_count); 484 return div_u64((u64)valid_user_blocks(sbi) * 100, sbi->user_block_count);
@@ -513,16 +522,13 @@ static inline unsigned short curseg_blkoff(struct f2fs_sb_info *sbi, int type)
513 return curseg->next_blkoff; 522 return curseg->next_blkoff;
514} 523}
515 524
525#ifdef CONFIG_F2FS_CHECK_FS
516static inline void check_seg_range(struct f2fs_sb_info *sbi, unsigned int segno) 526static inline void check_seg_range(struct f2fs_sb_info *sbi, unsigned int segno)
517{ 527{
518 unsigned int end_segno = SM_I(sbi)->segment_count - 1; 528 unsigned int end_segno = SM_I(sbi)->segment_count - 1;
519 BUG_ON(segno > end_segno); 529 BUG_ON(segno > end_segno);
520} 530}
521 531
522/*
523 * This function is used for only debugging.
524 * NOTE: In future, we have to remove this function.
525 */
526static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr) 532static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr)
527{ 533{
528 struct f2fs_sm_info *sm_info = SM_I(sbi); 534 struct f2fs_sm_info *sm_info = SM_I(sbi);
@@ -541,8 +547,9 @@ static inline void check_block_count(struct f2fs_sb_info *sbi,
541{ 547{
542 struct f2fs_sm_info *sm_info = SM_I(sbi); 548 struct f2fs_sm_info *sm_info = SM_I(sbi);
543 unsigned int end_segno = sm_info->segment_count - 1; 549 unsigned int end_segno = sm_info->segment_count - 1;
550 bool is_valid = test_bit_le(0, raw_sit->valid_map) ? true : false;
544 int valid_blocks = 0; 551 int valid_blocks = 0;
545 int i; 552 int cur_pos = 0, next_pos;
546 553
547 /* check segment usage */ 554 /* check segment usage */
548 BUG_ON(GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg); 555 BUG_ON(GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg);
@@ -551,11 +558,26 @@ static inline void check_block_count(struct f2fs_sb_info *sbi,
551 BUG_ON(segno > end_segno); 558 BUG_ON(segno > end_segno);
552 559
553 /* check bitmap with valid block count */ 560 /* check bitmap with valid block count */
554 for (i = 0; i < sbi->blocks_per_seg; i++) 561 do {
555 if (f2fs_test_bit(i, raw_sit->valid_map)) 562 if (is_valid) {
556 valid_blocks++; 563 next_pos = find_next_zero_bit_le(&raw_sit->valid_map,
564 sbi->blocks_per_seg,
565 cur_pos);
566 valid_blocks += next_pos - cur_pos;
567 } else
568 next_pos = find_next_bit_le(&raw_sit->valid_map,
569 sbi->blocks_per_seg,
570 cur_pos);
571 cur_pos = next_pos;
572 is_valid = !is_valid;
573 } while (cur_pos < sbi->blocks_per_seg);
557 BUG_ON(GET_SIT_VBLOCKS(raw_sit) != valid_blocks); 574 BUG_ON(GET_SIT_VBLOCKS(raw_sit) != valid_blocks);
558} 575}
576#else
577#define check_seg_range(sbi, segno)
578#define verify_block_addr(sbi, blk_addr)
579#define check_block_count(sbi, segno, raw_sit)
580#endif
559 581
560static inline pgoff_t current_sit_addr(struct f2fs_sb_info *sbi, 582static inline pgoff_t current_sit_addr(struct f2fs_sb_info *sbi,
561 unsigned int start) 583 unsigned int start)
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 13d0a0fe49dd..bafff72de8e8 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -43,7 +43,9 @@ enum {
43 Opt_disable_roll_forward, 43 Opt_disable_roll_forward,
44 Opt_discard, 44 Opt_discard,
45 Opt_noheap, 45 Opt_noheap,
46 Opt_user_xattr,
46 Opt_nouser_xattr, 47 Opt_nouser_xattr,
48 Opt_acl,
47 Opt_noacl, 49 Opt_noacl,
48 Opt_active_logs, 50 Opt_active_logs,
49 Opt_disable_ext_identify, 51 Opt_disable_ext_identify,
@@ -56,7 +58,9 @@ static match_table_t f2fs_tokens = {
56 {Opt_disable_roll_forward, "disable_roll_forward"}, 58 {Opt_disable_roll_forward, "disable_roll_forward"},
57 {Opt_discard, "discard"}, 59 {Opt_discard, "discard"},
58 {Opt_noheap, "no_heap"}, 60 {Opt_noheap, "no_heap"},
61 {Opt_user_xattr, "user_xattr"},
59 {Opt_nouser_xattr, "nouser_xattr"}, 62 {Opt_nouser_xattr, "nouser_xattr"},
63 {Opt_acl, "acl"},
60 {Opt_noacl, "noacl"}, 64 {Opt_noacl, "noacl"},
61 {Opt_active_logs, "active_logs=%u"}, 65 {Opt_active_logs, "active_logs=%u"},
62 {Opt_disable_ext_identify, "disable_ext_identify"}, 66 {Opt_disable_ext_identify, "disable_ext_identify"},
@@ -65,24 +69,40 @@ static match_table_t f2fs_tokens = {
65}; 69};
66 70
67/* Sysfs support for f2fs */ 71/* Sysfs support for f2fs */
72enum {
73 GC_THREAD, /* struct f2fs_gc_thread */
74 SM_INFO, /* struct f2fs_sm_info */
75};
76
68struct f2fs_attr { 77struct f2fs_attr {
69 struct attribute attr; 78 struct attribute attr;
70 ssize_t (*show)(struct f2fs_attr *, struct f2fs_sb_info *, char *); 79 ssize_t (*show)(struct f2fs_attr *, struct f2fs_sb_info *, char *);
71 ssize_t (*store)(struct f2fs_attr *, struct f2fs_sb_info *, 80 ssize_t (*store)(struct f2fs_attr *, struct f2fs_sb_info *,
72 const char *, size_t); 81 const char *, size_t);
82 int struct_type;
73 int offset; 83 int offset;
74}; 84};
75 85
86static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type)
87{
88 if (struct_type == GC_THREAD)
89 return (unsigned char *)sbi->gc_thread;
90 else if (struct_type == SM_INFO)
91 return (unsigned char *)SM_I(sbi);
92 return NULL;
93}
94
76static ssize_t f2fs_sbi_show(struct f2fs_attr *a, 95static ssize_t f2fs_sbi_show(struct f2fs_attr *a,
77 struct f2fs_sb_info *sbi, char *buf) 96 struct f2fs_sb_info *sbi, char *buf)
78{ 97{
79 struct f2fs_gc_kthread *gc_kth = sbi->gc_thread; 98 unsigned char *ptr = NULL;
80 unsigned int *ui; 99 unsigned int *ui;
81 100
82 if (!gc_kth) 101 ptr = __struct_ptr(sbi, a->struct_type);
102 if (!ptr)
83 return -EINVAL; 103 return -EINVAL;
84 104
85 ui = (unsigned int *)(((char *)gc_kth) + a->offset); 105 ui = (unsigned int *)(ptr + a->offset);
86 106
87 return snprintf(buf, PAGE_SIZE, "%u\n", *ui); 107 return snprintf(buf, PAGE_SIZE, "%u\n", *ui);
88} 108}
@@ -91,15 +111,16 @@ static ssize_t f2fs_sbi_store(struct f2fs_attr *a,
91 struct f2fs_sb_info *sbi, 111 struct f2fs_sb_info *sbi,
92 const char *buf, size_t count) 112 const char *buf, size_t count)
93{ 113{
94 struct f2fs_gc_kthread *gc_kth = sbi->gc_thread; 114 unsigned char *ptr;
95 unsigned long t; 115 unsigned long t;
96 unsigned int *ui; 116 unsigned int *ui;
97 ssize_t ret; 117 ssize_t ret;
98 118
99 if (!gc_kth) 119 ptr = __struct_ptr(sbi, a->struct_type);
120 if (!ptr)
100 return -EINVAL; 121 return -EINVAL;
101 122
102 ui = (unsigned int *)(((char *)gc_kth) + a->offset); 123 ui = (unsigned int *)(ptr + a->offset);
103 124
104 ret = kstrtoul(skip_spaces(buf), 0, &t); 125 ret = kstrtoul(skip_spaces(buf), 0, &t);
105 if (ret < 0) 126 if (ret < 0)
@@ -135,21 +156,25 @@ static void f2fs_sb_release(struct kobject *kobj)
135 complete(&sbi->s_kobj_unregister); 156 complete(&sbi->s_kobj_unregister);
136} 157}
137 158
138#define F2FS_ATTR_OFFSET(_name, _mode, _show, _store, _elname) \ 159#define F2FS_ATTR_OFFSET(_struct_type, _name, _mode, _show, _store, _offset) \
139static struct f2fs_attr f2fs_attr_##_name = { \ 160static struct f2fs_attr f2fs_attr_##_name = { \
140 .attr = {.name = __stringify(_name), .mode = _mode }, \ 161 .attr = {.name = __stringify(_name), .mode = _mode }, \
141 .show = _show, \ 162 .show = _show, \
142 .store = _store, \ 163 .store = _store, \
143 .offset = offsetof(struct f2fs_gc_kthread, _elname), \ 164 .struct_type = _struct_type, \
165 .offset = _offset \
144} 166}
145 167
146#define F2FS_RW_ATTR(name, elname) \ 168#define F2FS_RW_ATTR(struct_type, struct_name, name, elname) \
147 F2FS_ATTR_OFFSET(name, 0644, f2fs_sbi_show, f2fs_sbi_store, elname) 169 F2FS_ATTR_OFFSET(struct_type, name, 0644, \
170 f2fs_sbi_show, f2fs_sbi_store, \
171 offsetof(struct struct_name, elname))
148 172
149F2FS_RW_ATTR(gc_min_sleep_time, min_sleep_time); 173F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_min_sleep_time, min_sleep_time);
150F2FS_RW_ATTR(gc_max_sleep_time, max_sleep_time); 174F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_max_sleep_time, max_sleep_time);
151F2FS_RW_ATTR(gc_no_gc_sleep_time, no_gc_sleep_time); 175F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_no_gc_sleep_time, no_gc_sleep_time);
152F2FS_RW_ATTR(gc_idle, gc_idle); 176F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_idle, gc_idle);
177F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments);
153 178
154#define ATTR_LIST(name) (&f2fs_attr_##name.attr) 179#define ATTR_LIST(name) (&f2fs_attr_##name.attr)
155static struct attribute *f2fs_attrs[] = { 180static struct attribute *f2fs_attrs[] = {
@@ -157,6 +182,7 @@ static struct attribute *f2fs_attrs[] = {
157 ATTR_LIST(gc_max_sleep_time), 182 ATTR_LIST(gc_max_sleep_time),
158 ATTR_LIST(gc_no_gc_sleep_time), 183 ATTR_LIST(gc_no_gc_sleep_time),
159 ATTR_LIST(gc_idle), 184 ATTR_LIST(gc_idle),
185 ATTR_LIST(reclaim_segments),
160 NULL, 186 NULL,
161}; 187};
162 188
@@ -237,6 +263,9 @@ static int parse_options(struct super_block *sb, char *options)
237 set_opt(sbi, NOHEAP); 263 set_opt(sbi, NOHEAP);
238 break; 264 break;
239#ifdef CONFIG_F2FS_FS_XATTR 265#ifdef CONFIG_F2FS_FS_XATTR
266 case Opt_user_xattr:
267 set_opt(sbi, XATTR_USER);
268 break;
240 case Opt_nouser_xattr: 269 case Opt_nouser_xattr:
241 clear_opt(sbi, XATTR_USER); 270 clear_opt(sbi, XATTR_USER);
242 break; 271 break;
@@ -244,6 +273,10 @@ static int parse_options(struct super_block *sb, char *options)
244 set_opt(sbi, INLINE_XATTR); 273 set_opt(sbi, INLINE_XATTR);
245 break; 274 break;
246#else 275#else
276 case Opt_user_xattr:
277 f2fs_msg(sb, KERN_INFO,
278 "user_xattr options not supported");
279 break;
247 case Opt_nouser_xattr: 280 case Opt_nouser_xattr:
248 f2fs_msg(sb, KERN_INFO, 281 f2fs_msg(sb, KERN_INFO,
249 "nouser_xattr options not supported"); 282 "nouser_xattr options not supported");
@@ -254,10 +287,16 @@ static int parse_options(struct super_block *sb, char *options)
254 break; 287 break;
255#endif 288#endif
256#ifdef CONFIG_F2FS_FS_POSIX_ACL 289#ifdef CONFIG_F2FS_FS_POSIX_ACL
290 case Opt_acl:
291 set_opt(sbi, POSIX_ACL);
292 break;
257 case Opt_noacl: 293 case Opt_noacl:
258 clear_opt(sbi, POSIX_ACL); 294 clear_opt(sbi, POSIX_ACL);
259 break; 295 break;
260#else 296#else
297 case Opt_acl:
298 f2fs_msg(sb, KERN_INFO, "acl options not supported");
299 break;
261 case Opt_noacl: 300 case Opt_noacl:
262 f2fs_msg(sb, KERN_INFO, "noacl options not supported"); 301 f2fs_msg(sb, KERN_INFO, "noacl options not supported");
263 break; 302 break;
@@ -355,7 +394,9 @@ static void f2fs_put_super(struct super_block *sb)
355 f2fs_destroy_stats(sbi); 394 f2fs_destroy_stats(sbi);
356 stop_gc_thread(sbi); 395 stop_gc_thread(sbi);
357 396
358 write_checkpoint(sbi, true); 397 /* We don't need to do checkpoint when it's clean */
398 if (sbi->s_dirty && get_pages(sbi, F2FS_DIRTY_NODES))
399 write_checkpoint(sbi, true);
359 400
360 iput(sbi->node_inode); 401 iput(sbi->node_inode);
361 iput(sbi->meta_inode); 402 iput(sbi->meta_inode);
@@ -727,30 +768,47 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
727 atomic_set(&sbi->nr_pages[i], 0); 768 atomic_set(&sbi->nr_pages[i], 0);
728} 769}
729 770
730static int validate_superblock(struct super_block *sb, 771/*
731 struct f2fs_super_block **raw_super, 772 * Read f2fs raw super block.
732 struct buffer_head **raw_super_buf, sector_t block) 773 * Because we have two copies of super block, so read the first one at first,
774 * if the first one is invalid, move to read the second one.
775 */
776static int read_raw_super_block(struct super_block *sb,
777 struct f2fs_super_block **raw_super,
778 struct buffer_head **raw_super_buf)
733{ 779{
734 const char *super = (block == 0 ? "first" : "second"); 780 int block = 0;
735 781
736 /* read f2fs raw super block */ 782retry:
737 *raw_super_buf = sb_bread(sb, block); 783 *raw_super_buf = sb_bread(sb, block);
738 if (!*raw_super_buf) { 784 if (!*raw_super_buf) {
739 f2fs_msg(sb, KERN_ERR, "unable to read %s superblock", 785 f2fs_msg(sb, KERN_ERR, "Unable to read %dth superblock",
740 super); 786 block + 1);
741 return -EIO; 787 if (block == 0) {
788 block++;
789 goto retry;
790 } else {
791 return -EIO;
792 }
742 } 793 }
743 794
744 *raw_super = (struct f2fs_super_block *) 795 *raw_super = (struct f2fs_super_block *)
745 ((char *)(*raw_super_buf)->b_data + F2FS_SUPER_OFFSET); 796 ((char *)(*raw_super_buf)->b_data + F2FS_SUPER_OFFSET);
746 797
747 /* sanity checking of raw super */ 798 /* sanity checking of raw super */
748 if (!sanity_check_raw_super(sb, *raw_super)) 799 if (sanity_check_raw_super(sb, *raw_super)) {
749 return 0; 800 brelse(*raw_super_buf);
801 f2fs_msg(sb, KERN_ERR, "Can't find a valid F2FS filesystem "
802 "in %dth superblock", block + 1);
803 if(block == 0) {
804 block++;
805 goto retry;
806 } else {
807 return -EINVAL;
808 }
809 }
750 810
751 f2fs_msg(sb, KERN_ERR, "Can't find a valid F2FS filesystem " 811 return 0;
752 "in %s superblock", super);
753 return -EINVAL;
754} 812}
755 813
756static int f2fs_fill_super(struct super_block *sb, void *data, int silent) 814static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
@@ -760,7 +818,6 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
760 struct buffer_head *raw_super_buf; 818 struct buffer_head *raw_super_buf;
761 struct inode *root; 819 struct inode *root;
762 long err = -EINVAL; 820 long err = -EINVAL;
763 int i;
764 821
765 /* allocate memory for f2fs-specific super block info */ 822 /* allocate memory for f2fs-specific super block info */
766 sbi = kzalloc(sizeof(struct f2fs_sb_info), GFP_KERNEL); 823 sbi = kzalloc(sizeof(struct f2fs_sb_info), GFP_KERNEL);
@@ -773,14 +830,10 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
773 goto free_sbi; 830 goto free_sbi;
774 } 831 }
775 832
776 err = validate_superblock(sb, &raw_super, &raw_super_buf, 0); 833 err = read_raw_super_block(sb, &raw_super, &raw_super_buf);
777 if (err) { 834 if (err)
778 brelse(raw_super_buf); 835 goto free_sbi;
779 /* check secondary superblock when primary failed */ 836
780 err = validate_superblock(sb, &raw_super, &raw_super_buf, 1);
781 if (err)
782 goto free_sb_buf;
783 }
784 sb->s_fs_info = sbi; 837 sb->s_fs_info = sbi;
785 /* init some FS parameters */ 838 /* init some FS parameters */
786 sbi->active_logs = NR_CURSEG_TYPE; 839 sbi->active_logs = NR_CURSEG_TYPE;
@@ -818,12 +871,12 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
818 mutex_init(&sbi->gc_mutex); 871 mutex_init(&sbi->gc_mutex);
819 mutex_init(&sbi->writepages); 872 mutex_init(&sbi->writepages);
820 mutex_init(&sbi->cp_mutex); 873 mutex_init(&sbi->cp_mutex);
821 for (i = 0; i < NR_GLOBAL_LOCKS; i++)
822 mutex_init(&sbi->fs_lock[i]);
823 mutex_init(&sbi->node_write); 874 mutex_init(&sbi->node_write);
824 sbi->por_doing = 0; 875 sbi->por_doing = false;
825 spin_lock_init(&sbi->stat_lock); 876 spin_lock_init(&sbi->stat_lock);
826 init_rwsem(&sbi->bio_sem); 877 init_rwsem(&sbi->bio_sem);
878 init_rwsem(&sbi->cp_rwsem);
879 init_waitqueue_head(&sbi->cp_wait);
827 init_sb_info(sbi); 880 init_sb_info(sbi);
828 881
829 /* get an inode for meta space */ 882 /* get an inode for meta space */
@@ -922,12 +975,12 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
922 /* After POR, we can run background GC thread.*/ 975 /* After POR, we can run background GC thread.*/
923 err = start_gc_thread(sbi); 976 err = start_gc_thread(sbi);
924 if (err) 977 if (err)
925 goto fail; 978 goto free_gc;
926 } 979 }
927 980
928 err = f2fs_build_stats(sbi); 981 err = f2fs_build_stats(sbi);
929 if (err) 982 if (err)
930 goto fail; 983 goto free_gc;
931 984
932 if (f2fs_proc_root) 985 if (f2fs_proc_root)
933 sbi->s_proc = proc_mkdir(sb->s_id, f2fs_proc_root); 986 sbi->s_proc = proc_mkdir(sb->s_id, f2fs_proc_root);
@@ -953,6 +1006,12 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
953 1006
954 return 0; 1007 return 0;
955fail: 1008fail:
1009 if (sbi->s_proc) {
1010 remove_proc_entry("segment_info", sbi->s_proc);
1011 remove_proc_entry(sb->s_id, f2fs_proc_root);
1012 }
1013 f2fs_destroy_stats(sbi);
1014free_gc:
956 stop_gc_thread(sbi); 1015 stop_gc_thread(sbi);
957free_root_inode: 1016free_root_inode:
958 dput(sb->s_root); 1017 dput(sb->s_root);
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index 1ac8a5f6e380..aa7a3f139fe5 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -154,6 +154,9 @@ static int f2fs_xattr_advise_set(struct dentry *dentry, const char *name,
154} 154}
155 155
156#ifdef CONFIG_F2FS_FS_SECURITY 156#ifdef CONFIG_F2FS_FS_SECURITY
157static int __f2fs_setxattr(struct inode *inode, int name_index,
158 const char *name, const void *value, size_t value_len,
159 struct page *ipage);
157static int f2fs_initxattrs(struct inode *inode, const struct xattr *xattr_array, 160static int f2fs_initxattrs(struct inode *inode, const struct xattr *xattr_array,
158 void *page) 161 void *page)
159{ 162{
@@ -161,7 +164,7 @@ static int f2fs_initxattrs(struct inode *inode, const struct xattr *xattr_array,
161 int err = 0; 164 int err = 0;
162 165
163 for (xattr = xattr_array; xattr->name != NULL; xattr++) { 166 for (xattr = xattr_array; xattr->name != NULL; xattr++) {
164 err = f2fs_setxattr(inode, F2FS_XATTR_INDEX_SECURITY, 167 err = __f2fs_setxattr(inode, F2FS_XATTR_INDEX_SECURITY,
165 xattr->name, xattr->value, 168 xattr->name, xattr->value,
166 xattr->value_len, (struct page *)page); 169 xattr->value_len, (struct page *)page);
167 if (err < 0) 170 if (err < 0)
@@ -369,7 +372,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
369 alloc_nid_failed(sbi, new_nid); 372 alloc_nid_failed(sbi, new_nid);
370 return PTR_ERR(xpage); 373 return PTR_ERR(xpage);
371 } 374 }
372 BUG_ON(new_nid); 375 f2fs_bug_on(new_nid);
373 } else { 376 } else {
374 struct dnode_of_data dn; 377 struct dnode_of_data dn;
375 set_new_dnode(&dn, inode, NULL, NULL, new_nid); 378 set_new_dnode(&dn, inode, NULL, NULL, new_nid);
@@ -469,16 +472,15 @@ cleanup:
469 return error; 472 return error;
470} 473}
471 474
472int f2fs_setxattr(struct inode *inode, int name_index, const char *name, 475static int __f2fs_setxattr(struct inode *inode, int name_index,
473 const void *value, size_t value_len, struct page *ipage) 476 const char *name, const void *value, size_t value_len,
477 struct page *ipage)
474{ 478{
475 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
476 struct f2fs_inode_info *fi = F2FS_I(inode); 479 struct f2fs_inode_info *fi = F2FS_I(inode);
477 struct f2fs_xattr_entry *here, *last; 480 struct f2fs_xattr_entry *here, *last;
478 void *base_addr; 481 void *base_addr;
479 int found, newsize; 482 int found, newsize;
480 size_t name_len; 483 size_t name_len;
481 int ilock;
482 __u32 new_hsize; 484 __u32 new_hsize;
483 int error = -ENOMEM; 485 int error = -ENOMEM;
484 486
@@ -493,10 +495,6 @@ int f2fs_setxattr(struct inode *inode, int name_index, const char *name,
493 if (name_len > F2FS_NAME_LEN || value_len > MAX_VALUE_LEN(inode)) 495 if (name_len > F2FS_NAME_LEN || value_len > MAX_VALUE_LEN(inode))
494 return -ERANGE; 496 return -ERANGE;
495 497
496 f2fs_balance_fs(sbi);
497
498 ilock = mutex_lock_op(sbi);
499
500 base_addr = read_all_xattrs(inode, ipage); 498 base_addr = read_all_xattrs(inode, ipage);
501 if (!base_addr) 499 if (!base_addr)
502 goto exit; 500 goto exit;
@@ -522,7 +520,7 @@ int f2fs_setxattr(struct inode *inode, int name_index, const char *name,
522 */ 520 */
523 free = MIN_OFFSET(inode) - ((char *)last - (char *)base_addr); 521 free = MIN_OFFSET(inode) - ((char *)last - (char *)base_addr);
524 if (found) 522 if (found)
525 free = free - ENTRY_SIZE(here); 523 free = free + ENTRY_SIZE(here);
526 524
527 if (free < newsize) { 525 if (free < newsize) {
528 error = -ENOSPC; 526 error = -ENOSPC;
@@ -578,7 +576,21 @@ int f2fs_setxattr(struct inode *inode, int name_index, const char *name,
578 else 576 else
579 update_inode_page(inode); 577 update_inode_page(inode);
580exit: 578exit:
581 mutex_unlock_op(sbi, ilock);
582 kzfree(base_addr); 579 kzfree(base_addr);
583 return error; 580 return error;
584} 581}
582
583int f2fs_setxattr(struct inode *inode, int name_index, const char *name,
584 const void *value, size_t value_len, struct page *ipage)
585{
586 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
587 int err;
588
589 f2fs_balance_fs(sbi);
590
591 f2fs_lock_op(sbi);
592 err = __f2fs_setxattr(inode, name_index, name, value, value_len, ipage);
593 f2fs_unlock_op(sbi);
594
595 return err;
596}
diff --git a/fs/fat/fat.h b/fs/fat/fat.h
index 4241e6f39e86..7c31f4bc74a9 100644
--- a/fs/fat/fat.h
+++ b/fs/fat/fat.h
@@ -102,6 +102,7 @@ struct msdos_sb_info {
102 struct hlist_head dir_hashtable[FAT_HASH_SIZE]; 102 struct hlist_head dir_hashtable[FAT_HASH_SIZE];
103 103
104 unsigned int dirty; /* fs state before mount */ 104 unsigned int dirty; /* fs state before mount */
105 struct rcu_head rcu;
105}; 106};
106 107
107#define FAT_CACHE_VALID 0 /* special case for valid cache */ 108#define FAT_CACHE_VALID 0 /* special case for valid cache */
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 0062da21dd8b..854b578f6695 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -548,6 +548,16 @@ static void fat_set_state(struct super_block *sb,
548 brelse(bh); 548 brelse(bh);
549} 549}
550 550
551static void delayed_free(struct rcu_head *p)
552{
553 struct msdos_sb_info *sbi = container_of(p, struct msdos_sb_info, rcu);
554 unload_nls(sbi->nls_disk);
555 unload_nls(sbi->nls_io);
556 if (sbi->options.iocharset != fat_default_iocharset)
557 kfree(sbi->options.iocharset);
558 kfree(sbi);
559}
560
551static void fat_put_super(struct super_block *sb) 561static void fat_put_super(struct super_block *sb)
552{ 562{
553 struct msdos_sb_info *sbi = MSDOS_SB(sb); 563 struct msdos_sb_info *sbi = MSDOS_SB(sb);
@@ -557,14 +567,7 @@ static void fat_put_super(struct super_block *sb)
557 iput(sbi->fsinfo_inode); 567 iput(sbi->fsinfo_inode);
558 iput(sbi->fat_inode); 568 iput(sbi->fat_inode);
559 569
560 unload_nls(sbi->nls_disk); 570 call_rcu(&sbi->rcu, delayed_free);
561 unload_nls(sbi->nls_io);
562
563 if (sbi->options.iocharset != fat_default_iocharset)
564 kfree(sbi->options.iocharset);
565
566 sb->s_fs_info = NULL;
567 kfree(sbi);
568} 571}
569 572
570static struct kmem_cache *fat_inode_cachep; 573static struct kmem_cache *fat_inode_cachep;
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 65343c3741ff..ef6866592a0f 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -56,7 +56,7 @@ static int setfl(int fd, struct file * filp, unsigned long arg)
56 return -EINVAL; 56 return -EINVAL;
57 } 57 }
58 58
59 if (filp->f_op && filp->f_op->check_flags) 59 if (filp->f_op->check_flags)
60 error = filp->f_op->check_flags(arg); 60 error = filp->f_op->check_flags(arg);
61 if (error) 61 if (error)
62 return error; 62 return error;
@@ -64,8 +64,7 @@ static int setfl(int fd, struct file * filp, unsigned long arg)
64 /* 64 /*
65 * ->fasync() is responsible for setting the FASYNC bit. 65 * ->fasync() is responsible for setting the FASYNC bit.
66 */ 66 */
67 if (((arg ^ filp->f_flags) & FASYNC) && filp->f_op && 67 if (((arg ^ filp->f_flags) & FASYNC) && filp->f_op->fasync) {
68 filp->f_op->fasync) {
69 error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0); 68 error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0);
70 if (error < 0) 69 if (error < 0)
71 goto out; 70 goto out;
diff --git a/fs/file_table.c b/fs/file_table.c
index e900ca518635..5fff9030be34 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -36,8 +36,6 @@ struct files_stat_struct files_stat = {
36 .max_files = NR_FILE 36 .max_files = NR_FILE
37}; 37};
38 38
39DEFINE_STATIC_LGLOCK(files_lglock);
40
41/* SLAB cache for file structures */ 39/* SLAB cache for file structures */
42static struct kmem_cache *filp_cachep __read_mostly; 40static struct kmem_cache *filp_cachep __read_mostly;
43 41
@@ -134,7 +132,6 @@ struct file *get_empty_filp(void)
134 return ERR_PTR(error); 132 return ERR_PTR(error);
135 } 133 }
136 134
137 INIT_LIST_HEAD(&f->f_u.fu_list);
138 atomic_long_set(&f->f_count, 1); 135 atomic_long_set(&f->f_count, 1);
139 rwlock_init(&f->f_owner.lock); 136 rwlock_init(&f->f_owner.lock);
140 spin_lock_init(&f->f_lock); 137 spin_lock_init(&f->f_lock);
@@ -240,11 +237,11 @@ static void __fput(struct file *file)
240 locks_remove_flock(file); 237 locks_remove_flock(file);
241 238
242 if (unlikely(file->f_flags & FASYNC)) { 239 if (unlikely(file->f_flags & FASYNC)) {
243 if (file->f_op && file->f_op->fasync) 240 if (file->f_op->fasync)
244 file->f_op->fasync(-1, file, 0); 241 file->f_op->fasync(-1, file, 0);
245 } 242 }
246 ima_file_free(file); 243 ima_file_free(file);
247 if (file->f_op && file->f_op->release) 244 if (file->f_op->release)
248 file->f_op->release(inode, file); 245 file->f_op->release(inode, file);
249 security_file_free(file); 246 security_file_free(file);
250 if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL && 247 if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL &&
@@ -304,7 +301,6 @@ void fput(struct file *file)
304 if (atomic_long_dec_and_test(&file->f_count)) { 301 if (atomic_long_dec_and_test(&file->f_count)) {
305 struct task_struct *task = current; 302 struct task_struct *task = current;
306 303
307 file_sb_list_del(file);
308 if (likely(!in_interrupt() && !(task->flags & PF_KTHREAD))) { 304 if (likely(!in_interrupt() && !(task->flags & PF_KTHREAD))) {
309 init_task_work(&file->f_u.fu_rcuhead, ____fput); 305 init_task_work(&file->f_u.fu_rcuhead, ____fput);
310 if (!task_work_add(task, &file->f_u.fu_rcuhead, true)) 306 if (!task_work_add(task, &file->f_u.fu_rcuhead, true))
@@ -333,7 +329,6 @@ void __fput_sync(struct file *file)
333{ 329{
334 if (atomic_long_dec_and_test(&file->f_count)) { 330 if (atomic_long_dec_and_test(&file->f_count)) {
335 struct task_struct *task = current; 331 struct task_struct *task = current;
336 file_sb_list_del(file);
337 BUG_ON(!(task->flags & PF_KTHREAD)); 332 BUG_ON(!(task->flags & PF_KTHREAD));
338 __fput(file); 333 __fput(file);
339 } 334 }
@@ -345,129 +340,10 @@ void put_filp(struct file *file)
345{ 340{
346 if (atomic_long_dec_and_test(&file->f_count)) { 341 if (atomic_long_dec_and_test(&file->f_count)) {
347 security_file_free(file); 342 security_file_free(file);
348 file_sb_list_del(file);
349 file_free(file); 343 file_free(file);
350 } 344 }
351} 345}
352 346
353static inline int file_list_cpu(struct file *file)
354{
355#ifdef CONFIG_SMP
356 return file->f_sb_list_cpu;
357#else
358 return smp_processor_id();
359#endif
360}
361
362/* helper for file_sb_list_add to reduce ifdefs */
363static inline void __file_sb_list_add(struct file *file, struct super_block *sb)
364{
365 struct list_head *list;
366#ifdef CONFIG_SMP
367 int cpu;
368 cpu = smp_processor_id();
369 file->f_sb_list_cpu = cpu;
370 list = per_cpu_ptr(sb->s_files, cpu);
371#else
372 list = &sb->s_files;
373#endif
374 list_add(&file->f_u.fu_list, list);
375}
376
377/**
378 * file_sb_list_add - add a file to the sb's file list
379 * @file: file to add
380 * @sb: sb to add it to
381 *
382 * Use this function to associate a file with the superblock of the inode it
383 * refers to.
384 */
385void file_sb_list_add(struct file *file, struct super_block *sb)
386{
387 if (likely(!(file->f_mode & FMODE_WRITE)))
388 return;
389 if (!S_ISREG(file_inode(file)->i_mode))
390 return;
391 lg_local_lock(&files_lglock);
392 __file_sb_list_add(file, sb);
393 lg_local_unlock(&files_lglock);
394}
395
396/**
397 * file_sb_list_del - remove a file from the sb's file list
398 * @file: file to remove
399 * @sb: sb to remove it from
400 *
401 * Use this function to remove a file from its superblock.
402 */
403void file_sb_list_del(struct file *file)
404{
405 if (!list_empty(&file->f_u.fu_list)) {
406 lg_local_lock_cpu(&files_lglock, file_list_cpu(file));
407 list_del_init(&file->f_u.fu_list);
408 lg_local_unlock_cpu(&files_lglock, file_list_cpu(file));
409 }
410}
411
412#ifdef CONFIG_SMP
413
414/*
415 * These macros iterate all files on all CPUs for a given superblock.
416 * files_lglock must be held globally.
417 */
418#define do_file_list_for_each_entry(__sb, __file) \
419{ \
420 int i; \
421 for_each_possible_cpu(i) { \
422 struct list_head *list; \
423 list = per_cpu_ptr((__sb)->s_files, i); \
424 list_for_each_entry((__file), list, f_u.fu_list)
425
426#define while_file_list_for_each_entry \
427 } \
428}
429
430#else
431
432#define do_file_list_for_each_entry(__sb, __file) \
433{ \
434 struct list_head *list; \
435 list = &(sb)->s_files; \
436 list_for_each_entry((__file), list, f_u.fu_list)
437
438#define while_file_list_for_each_entry \
439}
440
441#endif
442
443/**
444 * mark_files_ro - mark all files read-only
445 * @sb: superblock in question
446 *
447 * All files are marked read-only. We don't care about pending
448 * delete files so this should be used in 'force' mode only.
449 */
450void mark_files_ro(struct super_block *sb)
451{
452 struct file *f;
453
454 lg_global_lock(&files_lglock);
455 do_file_list_for_each_entry(sb, f) {
456 if (!file_count(f))
457 continue;
458 if (!(f->f_mode & FMODE_WRITE))
459 continue;
460 spin_lock(&f->f_lock);
461 f->f_mode &= ~FMODE_WRITE;
462 spin_unlock(&f->f_lock);
463 if (file_check_writeable(f) != 0)
464 continue;
465 __mnt_drop_write(f->f_path.mnt);
466 file_release_write(f);
467 } while_file_list_for_each_entry;
468 lg_global_unlock(&files_lglock);
469}
470
471void __init files_init(unsigned long mempages) 347void __init files_init(unsigned long mempages)
472{ 348{
473 unsigned long n; 349 unsigned long n;
@@ -483,6 +359,5 @@ void __init files_init(unsigned long mempages)
483 n = (mempages * (PAGE_SIZE / 1024)) / 10; 359 n = (mempages * (PAGE_SIZE / 1024)) / 10;
484 files_stat.max_files = max_t(unsigned long, n, NR_FILE); 360 files_stat.max_files = max_t(unsigned long, n, NR_FILE);
485 files_defer_init(); 361 files_defer_init();
486 lg_lock_init(&files_lglock, "files_lglock");
487 percpu_counter_init(&nr_files, 0); 362 percpu_counter_init(&nr_files, 0);
488} 363}
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 4afdbd6d9678..1f4a10ece2f1 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -26,6 +26,7 @@
26#include <linux/blkdev.h> 26#include <linux/blkdev.h>
27#include <linux/backing-dev.h> 27#include <linux/backing-dev.h>
28#include <linux/tracepoint.h> 28#include <linux/tracepoint.h>
29#include <linux/device.h>
29#include "internal.h" 30#include "internal.h"
30 31
31/* 32/*
diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
index adbfd66b380f..b96a49b37d66 100644
--- a/fs/fuse/cuse.c
+++ b/fs/fuse/cuse.c
@@ -473,7 +473,7 @@ err:
473static void cuse_fc_release(struct fuse_conn *fc) 473static void cuse_fc_release(struct fuse_conn *fc)
474{ 474{
475 struct cuse_conn *cc = fc_to_cc(fc); 475 struct cuse_conn *cc = fc_to_cc(fc);
476 kfree(cc); 476 kfree_rcu(cc, fc.rcu);
477} 477}
478 478
479/** 479/**
@@ -589,11 +589,14 @@ static struct attribute *cuse_class_dev_attrs[] = {
589ATTRIBUTE_GROUPS(cuse_class_dev); 589ATTRIBUTE_GROUPS(cuse_class_dev);
590 590
591static struct miscdevice cuse_miscdev = { 591static struct miscdevice cuse_miscdev = {
592 .minor = MISC_DYNAMIC_MINOR, 592 .minor = CUSE_MINOR,
593 .name = "cuse", 593 .name = "cuse",
594 .fops = &cuse_channel_fops, 594 .fops = &cuse_channel_fops,
595}; 595};
596 596
597MODULE_ALIAS_MISCDEV(CUSE_MINOR);
598MODULE_ALIAS("devname:cuse");
599
597static int __init cuse_init(void) 600static int __init cuse_init(void)
598{ 601{
599 int i, rc; 602 int i, rc;
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index b7989f2ab4c4..c3eb2c46c8f1 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -342,24 +342,6 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
342 return err; 342 return err;
343} 343}
344 344
345static struct dentry *fuse_materialise_dentry(struct dentry *dentry,
346 struct inode *inode)
347{
348 struct dentry *newent;
349
350 if (inode && S_ISDIR(inode->i_mode)) {
351 struct fuse_conn *fc = get_fuse_conn(inode);
352
353 mutex_lock(&fc->inst_mutex);
354 newent = d_materialise_unique(dentry, inode);
355 mutex_unlock(&fc->inst_mutex);
356 } else {
357 newent = d_materialise_unique(dentry, inode);
358 }
359
360 return newent;
361}
362
363static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, 345static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
364 unsigned int flags) 346 unsigned int flags)
365{ 347{
@@ -382,7 +364,7 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
382 if (inode && get_node_id(inode) == FUSE_ROOT_ID) 364 if (inode && get_node_id(inode) == FUSE_ROOT_ID)
383 goto out_iput; 365 goto out_iput;
384 366
385 newent = fuse_materialise_dentry(entry, inode); 367 newent = d_materialise_unique(entry, inode);
386 err = PTR_ERR(newent); 368 err = PTR_ERR(newent);
387 if (IS_ERR(newent)) 369 if (IS_ERR(newent))
388 goto out_err; 370 goto out_err;
@@ -601,21 +583,9 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req,
601 } 583 }
602 kfree(forget); 584 kfree(forget);
603 585
604 if (S_ISDIR(inode->i_mode)) { 586 err = d_instantiate_no_diralias(entry, inode);
605 struct dentry *alias; 587 if (err)
606 mutex_lock(&fc->inst_mutex); 588 return err;
607 alias = d_find_alias(inode);
608 if (alias) {
609 /* New directory must have moved since mkdir */
610 mutex_unlock(&fc->inst_mutex);
611 dput(alias);
612 iput(inode);
613 return -EBUSY;
614 }
615 d_instantiate(entry, inode);
616 mutex_unlock(&fc->inst_mutex);
617 } else
618 d_instantiate(entry, inode);
619 589
620 fuse_change_entry_timeout(entry, &outarg); 590 fuse_change_entry_timeout(entry, &outarg);
621 fuse_invalidate_attr(dir); 591 fuse_invalidate_attr(dir);
@@ -1284,7 +1254,7 @@ static int fuse_direntplus_link(struct file *file,
1284 if (!inode) 1254 if (!inode)
1285 goto out; 1255 goto out;
1286 1256
1287 alias = fuse_materialise_dentry(dentry, inode); 1257 alias = d_materialise_unique(dentry, inode);
1288 err = PTR_ERR(alias); 1258 err = PTR_ERR(alias);
1289 if (IS_ERR(alias)) 1259 if (IS_ERR(alias))
1290 goto out; 1260 goto out;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 4598345ab87d..7e70506297bc 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -334,7 +334,8 @@ static bool fuse_page_is_writeback(struct inode *inode, pgoff_t index)
334 334
335 BUG_ON(req->inode != inode); 335 BUG_ON(req->inode != inode);
336 curr_index = req->misc.write.in.offset >> PAGE_CACHE_SHIFT; 336 curr_index = req->misc.write.in.offset >> PAGE_CACHE_SHIFT;
337 if (curr_index == index) { 337 if (curr_index <= index &&
338 index < curr_index + req->num_pages) {
338 found = true; 339 found = true;
339 break; 340 break;
340 } 341 }
@@ -1409,8 +1410,13 @@ static ssize_t fuse_direct_write(struct file *file, const char __user *buf,
1409 1410
1410static void fuse_writepage_free(struct fuse_conn *fc, struct fuse_req *req) 1411static void fuse_writepage_free(struct fuse_conn *fc, struct fuse_req *req)
1411{ 1412{
1412 __free_page(req->pages[0]); 1413 int i;
1413 fuse_file_put(req->ff, false); 1414
1415 for (i = 0; i < req->num_pages; i++)
1416 __free_page(req->pages[i]);
1417
1418 if (req->ff)
1419 fuse_file_put(req->ff, false);
1414} 1420}
1415 1421
1416static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req) 1422static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req)
@@ -1418,30 +1424,34 @@ static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req)
1418 struct inode *inode = req->inode; 1424 struct inode *inode = req->inode;
1419 struct fuse_inode *fi = get_fuse_inode(inode); 1425 struct fuse_inode *fi = get_fuse_inode(inode);
1420 struct backing_dev_info *bdi = inode->i_mapping->backing_dev_info; 1426 struct backing_dev_info *bdi = inode->i_mapping->backing_dev_info;
1427 int i;
1421 1428
1422 list_del(&req->writepages_entry); 1429 list_del(&req->writepages_entry);
1423 dec_bdi_stat(bdi, BDI_WRITEBACK); 1430 for (i = 0; i < req->num_pages; i++) {
1424 dec_zone_page_state(req->pages[0], NR_WRITEBACK_TEMP); 1431 dec_bdi_stat(bdi, BDI_WRITEBACK);
1425 bdi_writeout_inc(bdi); 1432 dec_zone_page_state(req->pages[i], NR_WRITEBACK_TEMP);
1433 bdi_writeout_inc(bdi);
1434 }
1426 wake_up(&fi->page_waitq); 1435 wake_up(&fi->page_waitq);
1427} 1436}
1428 1437
1429/* Called under fc->lock, may release and reacquire it */ 1438/* Called under fc->lock, may release and reacquire it */
1430static void fuse_send_writepage(struct fuse_conn *fc, struct fuse_req *req) 1439static void fuse_send_writepage(struct fuse_conn *fc, struct fuse_req *req,
1440 loff_t size)
1431__releases(fc->lock) 1441__releases(fc->lock)
1432__acquires(fc->lock) 1442__acquires(fc->lock)
1433{ 1443{
1434 struct fuse_inode *fi = get_fuse_inode(req->inode); 1444 struct fuse_inode *fi = get_fuse_inode(req->inode);
1435 loff_t size = i_size_read(req->inode);
1436 struct fuse_write_in *inarg = &req->misc.write.in; 1445 struct fuse_write_in *inarg = &req->misc.write.in;
1446 __u64 data_size = req->num_pages * PAGE_CACHE_SIZE;
1437 1447
1438 if (!fc->connected) 1448 if (!fc->connected)
1439 goto out_free; 1449 goto out_free;
1440 1450
1441 if (inarg->offset + PAGE_CACHE_SIZE <= size) { 1451 if (inarg->offset + data_size <= size) {
1442 inarg->size = PAGE_CACHE_SIZE; 1452 inarg->size = data_size;
1443 } else if (inarg->offset < size) { 1453 } else if (inarg->offset < size) {
1444 inarg->size = size & (PAGE_CACHE_SIZE - 1); 1454 inarg->size = size - inarg->offset;
1445 } else { 1455 } else {
1446 /* Got truncated off completely */ 1456 /* Got truncated off completely */
1447 goto out_free; 1457 goto out_free;
@@ -1472,12 +1482,13 @@ __acquires(fc->lock)
1472{ 1482{
1473 struct fuse_conn *fc = get_fuse_conn(inode); 1483 struct fuse_conn *fc = get_fuse_conn(inode);
1474 struct fuse_inode *fi = get_fuse_inode(inode); 1484 struct fuse_inode *fi = get_fuse_inode(inode);
1485 size_t crop = i_size_read(inode);
1475 struct fuse_req *req; 1486 struct fuse_req *req;
1476 1487
1477 while (fi->writectr >= 0 && !list_empty(&fi->queued_writes)) { 1488 while (fi->writectr >= 0 && !list_empty(&fi->queued_writes)) {
1478 req = list_entry(fi->queued_writes.next, struct fuse_req, list); 1489 req = list_entry(fi->queued_writes.next, struct fuse_req, list);
1479 list_del_init(&req->list); 1490 list_del_init(&req->list);
1480 fuse_send_writepage(fc, req); 1491 fuse_send_writepage(fc, req, crop);
1481 } 1492 }
1482} 1493}
1483 1494
@@ -1488,12 +1499,62 @@ static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_req *req)
1488 1499
1489 mapping_set_error(inode->i_mapping, req->out.h.error); 1500 mapping_set_error(inode->i_mapping, req->out.h.error);
1490 spin_lock(&fc->lock); 1501 spin_lock(&fc->lock);
1502 while (req->misc.write.next) {
1503 struct fuse_conn *fc = get_fuse_conn(inode);
1504 struct fuse_write_in *inarg = &req->misc.write.in;
1505 struct fuse_req *next = req->misc.write.next;
1506 req->misc.write.next = next->misc.write.next;
1507 next->misc.write.next = NULL;
1508 next->ff = fuse_file_get(req->ff);
1509 list_add(&next->writepages_entry, &fi->writepages);
1510
1511 /*
1512 * Skip fuse_flush_writepages() to make it easy to crop requests
1513 * based on primary request size.
1514 *
1515 * 1st case (trivial): there are no concurrent activities using
1516 * fuse_set/release_nowrite. Then we're on safe side because
1517 * fuse_flush_writepages() would call fuse_send_writepage()
1518 * anyway.
1519 *
1520 * 2nd case: someone called fuse_set_nowrite and it is waiting
1521 * now for completion of all in-flight requests. This happens
1522 * rarely and no more than once per page, so this should be
1523 * okay.
1524 *
1525 * 3rd case: someone (e.g. fuse_do_setattr()) is in the middle
1526 * of fuse_set_nowrite..fuse_release_nowrite section. The fact
1527 * that fuse_set_nowrite returned implies that all in-flight
1528 * requests were completed along with all of their secondary
1529 * requests. Further primary requests are blocked by negative
1530 * writectr. Hence there cannot be any in-flight requests and
1531 * no invocations of fuse_writepage_end() while we're in
1532 * fuse_set_nowrite..fuse_release_nowrite section.
1533 */
1534 fuse_send_writepage(fc, next, inarg->offset + inarg->size);
1535 }
1491 fi->writectr--; 1536 fi->writectr--;
1492 fuse_writepage_finish(fc, req); 1537 fuse_writepage_finish(fc, req);
1493 spin_unlock(&fc->lock); 1538 spin_unlock(&fc->lock);
1494 fuse_writepage_free(fc, req); 1539 fuse_writepage_free(fc, req);
1495} 1540}
1496 1541
1542static struct fuse_file *fuse_write_file_get(struct fuse_conn *fc,
1543 struct fuse_inode *fi)
1544{
1545 struct fuse_file *ff = NULL;
1546
1547 spin_lock(&fc->lock);
1548 if (!WARN_ON(list_empty(&fi->write_files))) {
1549 ff = list_entry(fi->write_files.next, struct fuse_file,
1550 write_entry);
1551 fuse_file_get(ff);
1552 }
1553 spin_unlock(&fc->lock);
1554
1555 return ff;
1556}
1557
1497static int fuse_writepage_locked(struct page *page) 1558static int fuse_writepage_locked(struct page *page)
1498{ 1559{
1499 struct address_space *mapping = page->mapping; 1560 struct address_space *mapping = page->mapping;
@@ -1501,8 +1562,8 @@ static int fuse_writepage_locked(struct page *page)
1501 struct fuse_conn *fc = get_fuse_conn(inode); 1562 struct fuse_conn *fc = get_fuse_conn(inode);
1502 struct fuse_inode *fi = get_fuse_inode(inode); 1563 struct fuse_inode *fi = get_fuse_inode(inode);
1503 struct fuse_req *req; 1564 struct fuse_req *req;
1504 struct fuse_file *ff;
1505 struct page *tmp_page; 1565 struct page *tmp_page;
1566 int error = -ENOMEM;
1506 1567
1507 set_page_writeback(page); 1568 set_page_writeback(page);
1508 1569
@@ -1515,16 +1576,16 @@ static int fuse_writepage_locked(struct page *page)
1515 if (!tmp_page) 1576 if (!tmp_page)
1516 goto err_free; 1577 goto err_free;
1517 1578
1518 spin_lock(&fc->lock); 1579 error = -EIO;
1519 BUG_ON(list_empty(&fi->write_files)); 1580 req->ff = fuse_write_file_get(fc, fi);
1520 ff = list_entry(fi->write_files.next, struct fuse_file, write_entry); 1581 if (!req->ff)
1521 req->ff = fuse_file_get(ff); 1582 goto err_free;
1522 spin_unlock(&fc->lock);
1523 1583
1524 fuse_write_fill(req, ff, page_offset(page), 0); 1584 fuse_write_fill(req, req->ff, page_offset(page), 0);
1525 1585
1526 copy_highpage(tmp_page, page); 1586 copy_highpage(tmp_page, page);
1527 req->misc.write.in.write_flags |= FUSE_WRITE_CACHE; 1587 req->misc.write.in.write_flags |= FUSE_WRITE_CACHE;
1588 req->misc.write.next = NULL;
1528 req->in.argpages = 1; 1589 req->in.argpages = 1;
1529 req->num_pages = 1; 1590 req->num_pages = 1;
1530 req->pages[0] = tmp_page; 1591 req->pages[0] = tmp_page;
@@ -1550,19 +1611,263 @@ err_free:
1550 fuse_request_free(req); 1611 fuse_request_free(req);
1551err: 1612err:
1552 end_page_writeback(page); 1613 end_page_writeback(page);
1553 return -ENOMEM; 1614 return error;
1554} 1615}
1555 1616
1556static int fuse_writepage(struct page *page, struct writeback_control *wbc) 1617static int fuse_writepage(struct page *page, struct writeback_control *wbc)
1557{ 1618{
1558 int err; 1619 int err;
1559 1620
1621 if (fuse_page_is_writeback(page->mapping->host, page->index)) {
1622 /*
1623 * ->writepages() should be called for sync() and friends. We
1624 * should only get here on direct reclaim and then we are
1625 * allowed to skip a page which is already in flight
1626 */
1627 WARN_ON(wbc->sync_mode == WB_SYNC_ALL);
1628
1629 redirty_page_for_writepage(wbc, page);
1630 return 0;
1631 }
1632
1560 err = fuse_writepage_locked(page); 1633 err = fuse_writepage_locked(page);
1561 unlock_page(page); 1634 unlock_page(page);
1562 1635
1563 return err; 1636 return err;
1564} 1637}
1565 1638
1639struct fuse_fill_wb_data {
1640 struct fuse_req *req;
1641 struct fuse_file *ff;
1642 struct inode *inode;
1643 struct page **orig_pages;
1644};
1645
1646static void fuse_writepages_send(struct fuse_fill_wb_data *data)
1647{
1648 struct fuse_req *req = data->req;
1649 struct inode *inode = data->inode;
1650 struct fuse_conn *fc = get_fuse_conn(inode);
1651 struct fuse_inode *fi = get_fuse_inode(inode);
1652 int num_pages = req->num_pages;
1653 int i;
1654
1655 req->ff = fuse_file_get(data->ff);
1656 spin_lock(&fc->lock);
1657 list_add_tail(&req->list, &fi->queued_writes);
1658 fuse_flush_writepages(inode);
1659 spin_unlock(&fc->lock);
1660
1661 for (i = 0; i < num_pages; i++)
1662 end_page_writeback(data->orig_pages[i]);
1663}
1664
1665static bool fuse_writepage_in_flight(struct fuse_req *new_req,
1666 struct page *page)
1667{
1668 struct fuse_conn *fc = get_fuse_conn(new_req->inode);
1669 struct fuse_inode *fi = get_fuse_inode(new_req->inode);
1670 struct fuse_req *tmp;
1671 struct fuse_req *old_req;
1672 bool found = false;
1673 pgoff_t curr_index;
1674
1675 BUG_ON(new_req->num_pages != 0);
1676
1677 spin_lock(&fc->lock);
1678 list_del(&new_req->writepages_entry);
1679 list_for_each_entry(old_req, &fi->writepages, writepages_entry) {
1680 BUG_ON(old_req->inode != new_req->inode);
1681 curr_index = old_req->misc.write.in.offset >> PAGE_CACHE_SHIFT;
1682 if (curr_index <= page->index &&
1683 page->index < curr_index + old_req->num_pages) {
1684 found = true;
1685 break;
1686 }
1687 }
1688 if (!found) {
1689 list_add(&new_req->writepages_entry, &fi->writepages);
1690 goto out_unlock;
1691 }
1692
1693 new_req->num_pages = 1;
1694 for (tmp = old_req; tmp != NULL; tmp = tmp->misc.write.next) {
1695 BUG_ON(tmp->inode != new_req->inode);
1696 curr_index = tmp->misc.write.in.offset >> PAGE_CACHE_SHIFT;
1697 if (tmp->num_pages == 1 &&
1698 curr_index == page->index) {
1699 old_req = tmp;
1700 }
1701 }
1702
1703 if (old_req->num_pages == 1 && (old_req->state == FUSE_REQ_INIT ||
1704 old_req->state == FUSE_REQ_PENDING)) {
1705 struct backing_dev_info *bdi = page->mapping->backing_dev_info;
1706
1707 copy_highpage(old_req->pages[0], page);
1708 spin_unlock(&fc->lock);
1709
1710 dec_bdi_stat(bdi, BDI_WRITEBACK);
1711 dec_zone_page_state(page, NR_WRITEBACK_TEMP);
1712 bdi_writeout_inc(bdi);
1713 fuse_writepage_free(fc, new_req);
1714 fuse_request_free(new_req);
1715 goto out;
1716 } else {
1717 new_req->misc.write.next = old_req->misc.write.next;
1718 old_req->misc.write.next = new_req;
1719 }
1720out_unlock:
1721 spin_unlock(&fc->lock);
1722out:
1723 return found;
1724}
1725
1726static int fuse_writepages_fill(struct page *page,
1727 struct writeback_control *wbc, void *_data)
1728{
1729 struct fuse_fill_wb_data *data = _data;
1730 struct fuse_req *req = data->req;
1731 struct inode *inode = data->inode;
1732 struct fuse_conn *fc = get_fuse_conn(inode);
1733 struct page *tmp_page;
1734 bool is_writeback;
1735 int err;
1736
1737 if (!data->ff) {
1738 err = -EIO;
1739 data->ff = fuse_write_file_get(fc, get_fuse_inode(inode));
1740 if (!data->ff)
1741 goto out_unlock;
1742 }
1743
1744 /*
1745 * Being under writeback is unlikely but possible. For example direct
1746 * read to an mmaped fuse file will set the page dirty twice; once when
1747 * the pages are faulted with get_user_pages(), and then after the read
1748 * completed.
1749 */
1750 is_writeback = fuse_page_is_writeback(inode, page->index);
1751
1752 if (req && req->num_pages &&
1753 (is_writeback || req->num_pages == FUSE_MAX_PAGES_PER_REQ ||
1754 (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_write ||
1755 data->orig_pages[req->num_pages - 1]->index + 1 != page->index)) {
1756 fuse_writepages_send(data);
1757 data->req = NULL;
1758 }
1759 err = -ENOMEM;
1760 tmp_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
1761 if (!tmp_page)
1762 goto out_unlock;
1763
1764 /*
1765 * The page must not be redirtied until the writeout is completed
1766 * (i.e. userspace has sent a reply to the write request). Otherwise
1767 * there could be more than one temporary page instance for each real
1768 * page.
1769 *
1770 * This is ensured by holding the page lock in page_mkwrite() while
1771 * checking fuse_page_is_writeback(). We already hold the page lock
1772 * since clear_page_dirty_for_io() and keep it held until we add the
1773 * request to the fi->writepages list and increment req->num_pages.
1774 * After this fuse_page_is_writeback() will indicate that the page is
1775 * under writeback, so we can release the page lock.
1776 */
1777 if (data->req == NULL) {
1778 struct fuse_inode *fi = get_fuse_inode(inode);
1779
1780 err = -ENOMEM;
1781 req = fuse_request_alloc_nofs(FUSE_MAX_PAGES_PER_REQ);
1782 if (!req) {
1783 __free_page(tmp_page);
1784 goto out_unlock;
1785 }
1786
1787 fuse_write_fill(req, data->ff, page_offset(page), 0);
1788 req->misc.write.in.write_flags |= FUSE_WRITE_CACHE;
1789 req->misc.write.next = NULL;
1790 req->in.argpages = 1;
1791 req->background = 1;
1792 req->num_pages = 0;
1793 req->end = fuse_writepage_end;
1794 req->inode = inode;
1795
1796 spin_lock(&fc->lock);
1797 list_add(&req->writepages_entry, &fi->writepages);
1798 spin_unlock(&fc->lock);
1799
1800 data->req = req;
1801 }
1802 set_page_writeback(page);
1803
1804 copy_highpage(tmp_page, page);
1805 req->pages[req->num_pages] = tmp_page;
1806 req->page_descs[req->num_pages].offset = 0;
1807 req->page_descs[req->num_pages].length = PAGE_SIZE;
1808
1809 inc_bdi_stat(page->mapping->backing_dev_info, BDI_WRITEBACK);
1810 inc_zone_page_state(tmp_page, NR_WRITEBACK_TEMP);
1811
1812 err = 0;
1813 if (is_writeback && fuse_writepage_in_flight(req, page)) {
1814 end_page_writeback(page);
1815 data->req = NULL;
1816 goto out_unlock;
1817 }
1818 data->orig_pages[req->num_pages] = page;
1819
1820 /*
1821 * Protected by fc->lock against concurrent access by
1822 * fuse_page_is_writeback().
1823 */
1824 spin_lock(&fc->lock);
1825 req->num_pages++;
1826 spin_unlock(&fc->lock);
1827
1828out_unlock:
1829 unlock_page(page);
1830
1831 return err;
1832}
1833
1834static int fuse_writepages(struct address_space *mapping,
1835 struct writeback_control *wbc)
1836{
1837 struct inode *inode = mapping->host;
1838 struct fuse_fill_wb_data data;
1839 int err;
1840
1841 err = -EIO;
1842 if (is_bad_inode(inode))
1843 goto out;
1844
1845 data.inode = inode;
1846 data.req = NULL;
1847 data.ff = NULL;
1848
1849 err = -ENOMEM;
1850 data.orig_pages = kzalloc(sizeof(struct page *) *
1851 FUSE_MAX_PAGES_PER_REQ,
1852 GFP_NOFS);
1853 if (!data.orig_pages)
1854 goto out;
1855
1856 err = write_cache_pages(mapping, wbc, fuse_writepages_fill, &data);
1857 if (data.req) {
1858 /* Ignore errors if we can write at least one page */
1859 BUG_ON(!data.req->num_pages);
1860 fuse_writepages_send(&data);
1861 err = 0;
1862 }
1863 if (data.ff)
1864 fuse_file_put(data.ff, false);
1865
1866 kfree(data.orig_pages);
1867out:
1868 return err;
1869}
1870
1566static int fuse_launder_page(struct page *page) 1871static int fuse_launder_page(struct page *page)
1567{ 1872{
1568 int err = 0; 1873 int err = 0;
@@ -1602,14 +1907,17 @@ static void fuse_vma_close(struct vm_area_struct *vma)
1602static int fuse_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) 1907static int fuse_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
1603{ 1908{
1604 struct page *page = vmf->page; 1909 struct page *page = vmf->page;
1605 /* 1910 struct inode *inode = file_inode(vma->vm_file);
1606 * Don't use page->mapping as it may become NULL from a 1911
1607 * concurrent truncate. 1912 file_update_time(vma->vm_file);
1608 */ 1913 lock_page(page);
1609 struct inode *inode = vma->vm_file->f_mapping->host; 1914 if (page->mapping != inode->i_mapping) {
1915 unlock_page(page);
1916 return VM_FAULT_NOPAGE;
1917 }
1610 1918
1611 fuse_wait_on_page_writeback(inode, page->index); 1919 fuse_wait_on_page_writeback(inode, page->index);
1612 return 0; 1920 return VM_FAULT_LOCKED;
1613} 1921}
1614 1922
1615static const struct vm_operations_struct fuse_file_vm_ops = { 1923static const struct vm_operations_struct fuse_file_vm_ops = {
@@ -2581,6 +2889,7 @@ static const struct file_operations fuse_direct_io_file_operations = {
2581static const struct address_space_operations fuse_file_aops = { 2889static const struct address_space_operations fuse_file_aops = {
2582 .readpage = fuse_readpage, 2890 .readpage = fuse_readpage,
2583 .writepage = fuse_writepage, 2891 .writepage = fuse_writepage,
2892 .writepages = fuse_writepages,
2584 .launder_page = fuse_launder_page, 2893 .launder_page = fuse_launder_page,
2585 .readpages = fuse_readpages, 2894 .readpages = fuse_readpages,
2586 .set_page_dirty = __set_page_dirty_nobuffers, 2895 .set_page_dirty = __set_page_dirty_nobuffers,
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 5b9e6f3b6aef..7d2730912667 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -321,6 +321,7 @@ struct fuse_req {
321 struct { 321 struct {
322 struct fuse_write_in in; 322 struct fuse_write_in in;
323 struct fuse_write_out out; 323 struct fuse_write_out out;
324 struct fuse_req *next;
324 } write; 325 } write;
325 struct fuse_notify_retrieve_in retrieve_in; 326 struct fuse_notify_retrieve_in retrieve_in;
326 struct fuse_lk_in lk_in; 327 struct fuse_lk_in lk_in;
@@ -374,12 +375,11 @@ struct fuse_conn {
374 /** Lock protecting accessess to members of this structure */ 375 /** Lock protecting accessess to members of this structure */
375 spinlock_t lock; 376 spinlock_t lock;
376 377
377 /** Mutex protecting against directory alias creation */
378 struct mutex inst_mutex;
379
380 /** Refcount */ 378 /** Refcount */
381 atomic_t count; 379 atomic_t count;
382 380
381 struct rcu_head rcu;
382
383 /** The user id for this mount */ 383 /** The user id for this mount */
384 kuid_t user_id; 384 kuid_t user_id;
385 385
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index a8ce6dab60a0..d468643a68b2 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -565,7 +565,6 @@ void fuse_conn_init(struct fuse_conn *fc)
565{ 565{
566 memset(fc, 0, sizeof(*fc)); 566 memset(fc, 0, sizeof(*fc));
567 spin_lock_init(&fc->lock); 567 spin_lock_init(&fc->lock);
568 mutex_init(&fc->inst_mutex);
569 init_rwsem(&fc->killsb); 568 init_rwsem(&fc->killsb);
570 atomic_set(&fc->count, 1); 569 atomic_set(&fc->count, 1);
571 init_waitqueue_head(&fc->waitq); 570 init_waitqueue_head(&fc->waitq);
@@ -596,7 +595,6 @@ void fuse_conn_put(struct fuse_conn *fc)
596 if (atomic_dec_and_test(&fc->count)) { 595 if (atomic_dec_and_test(&fc->count)) {
597 if (fc->destroy_req) 596 if (fc->destroy_req)
598 fuse_request_free(fc->destroy_req); 597 fuse_request_free(fc->destroy_req);
599 mutex_destroy(&fc->inst_mutex);
600 fc->release(fc); 598 fc->release(fc);
601 } 599 }
602} 600}
@@ -920,7 +918,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
920 918
921static void fuse_free_conn(struct fuse_conn *fc) 919static void fuse_free_conn(struct fuse_conn *fc)
922{ 920{
923 kfree(fc); 921 kfree_rcu(fc, rcu);
924} 922}
925 923
926static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb) 924static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb)
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 109ce9325b76..1615df16cf4e 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -1514,13 +1514,6 @@ out:
1514 return NULL; 1514 return NULL;
1515} 1515}
1516 1516
1517static void gfs2_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
1518{
1519 char *s = nd_get_link(nd);
1520 if (!IS_ERR(s))
1521 kfree(s);
1522}
1523
1524/** 1517/**
1525 * gfs2_permission - 1518 * gfs2_permission -
1526 * @inode: The inode 1519 * @inode: The inode
@@ -1872,7 +1865,7 @@ const struct inode_operations gfs2_dir_iops = {
1872const struct inode_operations gfs2_symlink_iops = { 1865const struct inode_operations gfs2_symlink_iops = {
1873 .readlink = generic_readlink, 1866 .readlink = generic_readlink,
1874 .follow_link = gfs2_follow_link, 1867 .follow_link = gfs2_follow_link,
1875 .put_link = gfs2_put_link, 1868 .put_link = kfree_put_link,
1876 .permission = gfs2_permission, 1869 .permission = gfs2_permission,
1877 .setattr = gfs2_setattr, 1870 .setattr = gfs2_setattr,
1878 .getattr = gfs2_getattr, 1871 .getattr = gfs2_getattr,
diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h
index 1b398636e990..6797bf80f6e2 100644
--- a/fs/hpfs/hpfs_fn.h
+++ b/fs/hpfs/hpfs_fn.h
@@ -80,6 +80,7 @@ struct hpfs_sb_info {
80 unsigned sb_c_bitmap; /* current bitmap */ 80 unsigned sb_c_bitmap; /* current bitmap */
81 unsigned sb_max_fwd_alloc; /* max forwad allocation */ 81 unsigned sb_max_fwd_alloc; /* max forwad allocation */
82 int sb_timeshift; 82 int sb_timeshift;
83 struct rcu_head rcu;
83}; 84};
84 85
85/* Four 512-byte buffers and the 2k block obtained by concatenating them */ 86/* Four 512-byte buffers and the 2k block obtained by concatenating them */
diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c
index 345713d2f8f3..1b39afdd86fd 100644
--- a/fs/hpfs/namei.c
+++ b/fs/hpfs/namei.c
@@ -407,7 +407,7 @@ again:
407 /*printk("HPFS: truncating file before delete.\n");*/ 407 /*printk("HPFS: truncating file before delete.\n");*/
408 newattrs.ia_size = 0; 408 newattrs.ia_size = 0;
409 newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME; 409 newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME;
410 err = notify_change(dentry, &newattrs); 410 err = notify_change(dentry, &newattrs, NULL);
411 put_write_access(inode); 411 put_write_access(inode);
412 if (!err) 412 if (!err)
413 goto again; 413 goto again;
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index 4334cda8dba1..b8d01ef6f531 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -101,18 +101,24 @@ int hpfs_stop_cycles(struct super_block *s, int key, int *c1, int *c2,
101 return 0; 101 return 0;
102} 102}
103 103
104static void hpfs_put_super(struct super_block *s) 104static void free_sbi(struct hpfs_sb_info *sbi)
105{ 105{
106 struct hpfs_sb_info *sbi = hpfs_sb(s); 106 kfree(sbi->sb_cp_table);
107 kfree(sbi->sb_bmp_dir);
108 kfree(sbi);
109}
107 110
111static void lazy_free_sbi(struct rcu_head *rcu)
112{
113 free_sbi(container_of(rcu, struct hpfs_sb_info, rcu));
114}
115
116static void hpfs_put_super(struct super_block *s)
117{
108 hpfs_lock(s); 118 hpfs_lock(s);
109 unmark_dirty(s); 119 unmark_dirty(s);
110 hpfs_unlock(s); 120 hpfs_unlock(s);
111 121 call_rcu(&hpfs_sb(s)->rcu, lazy_free_sbi);
112 kfree(sbi->sb_cp_table);
113 kfree(sbi->sb_bmp_dir);
114 s->s_fs_info = NULL;
115 kfree(sbi);
116} 122}
117 123
118unsigned hpfs_count_one_bitmap(struct super_block *s, secno secno) 124unsigned hpfs_count_one_bitmap(struct super_block *s, secno secno)
@@ -485,9 +491,6 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
485 } 491 }
486 s->s_fs_info = sbi; 492 s->s_fs_info = sbi;
487 493
488 sbi->sb_bmp_dir = NULL;
489 sbi->sb_cp_table = NULL;
490
491 mutex_init(&sbi->hpfs_mutex); 494 mutex_init(&sbi->hpfs_mutex);
492 hpfs_lock(s); 495 hpfs_lock(s);
493 496
@@ -679,10 +682,7 @@ bail2: brelse(bh0);
679bail1: 682bail1:
680bail0: 683bail0:
681 hpfs_unlock(s); 684 hpfs_unlock(s);
682 kfree(sbi->sb_bmp_dir); 685 free_sbi(sbi);
683 kfree(sbi->sb_cp_table);
684 s->s_fs_info = NULL;
685 kfree(sbi);
686 return -EINVAL; 686 return -EINVAL;
687} 687}
688 688
diff --git a/fs/inode.c b/fs/inode.c
index b33ba8e021cc..4bcdad3c9361 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -773,15 +773,11 @@ static struct inode *find_inode(struct super_block *sb,
773 773
774repeat: 774repeat:
775 hlist_for_each_entry(inode, head, i_hash) { 775 hlist_for_each_entry(inode, head, i_hash) {
776 spin_lock(&inode->i_lock); 776 if (inode->i_sb != sb)
777 if (inode->i_sb != sb) {
778 spin_unlock(&inode->i_lock);
779 continue; 777 continue;
780 } 778 if (!test(inode, data))
781 if (!test(inode, data)) {
782 spin_unlock(&inode->i_lock);
783 continue; 779 continue;
784 } 780 spin_lock(&inode->i_lock);
785 if (inode->i_state & (I_FREEING|I_WILL_FREE)) { 781 if (inode->i_state & (I_FREEING|I_WILL_FREE)) {
786 __wait_on_freeing_inode(inode); 782 __wait_on_freeing_inode(inode);
787 goto repeat; 783 goto repeat;
@@ -804,15 +800,11 @@ static struct inode *find_inode_fast(struct super_block *sb,
804 800
805repeat: 801repeat:
806 hlist_for_each_entry(inode, head, i_hash) { 802 hlist_for_each_entry(inode, head, i_hash) {
807 spin_lock(&inode->i_lock); 803 if (inode->i_ino != ino)
808 if (inode->i_ino != ino) {
809 spin_unlock(&inode->i_lock);
810 continue; 804 continue;
811 } 805 if (inode->i_sb != sb)
812 if (inode->i_sb != sb) {
813 spin_unlock(&inode->i_lock);
814 continue; 806 continue;
815 } 807 spin_lock(&inode->i_lock);
816 if (inode->i_state & (I_FREEING|I_WILL_FREE)) { 808 if (inode->i_state & (I_FREEING|I_WILL_FREE)) {
817 __wait_on_freeing_inode(inode); 809 __wait_on_freeing_inode(inode);
818 goto repeat; 810 goto repeat;
@@ -951,6 +943,42 @@ void unlock_new_inode(struct inode *inode)
951EXPORT_SYMBOL(unlock_new_inode); 943EXPORT_SYMBOL(unlock_new_inode);
952 944
953/** 945/**
946 * lock_two_nondirectories - take two i_mutexes on non-directory objects
947 * @inode1: first inode to lock
948 * @inode2: second inode to lock
949 */
950void lock_two_nondirectories(struct inode *inode1, struct inode *inode2)
951{
952 WARN_ON_ONCE(S_ISDIR(inode1->i_mode));
953 if (inode1 == inode2 || !inode2) {
954 mutex_lock(&inode1->i_mutex);
955 return;
956 }
957 WARN_ON_ONCE(S_ISDIR(inode2->i_mode));
958 if (inode1 < inode2) {
959 mutex_lock(&inode1->i_mutex);
960 mutex_lock_nested(&inode2->i_mutex, I_MUTEX_NONDIR2);
961 } else {
962 mutex_lock(&inode2->i_mutex);
963 mutex_lock_nested(&inode1->i_mutex, I_MUTEX_NONDIR2);
964 }
965}
966EXPORT_SYMBOL(lock_two_nondirectories);
967
968/**
969 * unlock_two_nondirectories - release locks from lock_two_nondirectories()
970 * @inode1: first inode to unlock
971 * @inode2: second inode to unlock
972 */
973void unlock_two_nondirectories(struct inode *inode1, struct inode *inode2)
974{
975 mutex_unlock(&inode1->i_mutex);
976 if (inode2 && inode2 != inode1)
977 mutex_unlock(&inode2->i_mutex);
978}
979EXPORT_SYMBOL(unlock_two_nondirectories);
980
981/**
954 * iget5_locked - obtain an inode from a mounted file system 982 * iget5_locked - obtain an inode from a mounted file system
955 * @sb: super block of file system 983 * @sb: super block of file system
956 * @hashval: hash value (usually inode number) to get 984 * @hashval: hash value (usually inode number) to get
@@ -1575,7 +1603,11 @@ static int __remove_suid(struct dentry *dentry, int kill)
1575 struct iattr newattrs; 1603 struct iattr newattrs;
1576 1604
1577 newattrs.ia_valid = ATTR_FORCE | kill; 1605 newattrs.ia_valid = ATTR_FORCE | kill;
1578 return notify_change(dentry, &newattrs); 1606 /*
1607 * Note we call this on write, so notify_change will not
1608 * encounter any conflicting delegations:
1609 */
1610 return notify_change(dentry, &newattrs, NULL);
1579} 1611}
1580 1612
1581int file_remove_suid(struct file *file) 1613int file_remove_suid(struct file *file)
diff --git a/fs/internal.h b/fs/internal.h
index 513e0d859a6c..465742407466 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -9,8 +9,6 @@
9 * 2 of the License, or (at your option) any later version. 9 * 2 of the License, or (at your option) any later version.
10 */ 10 */
11 11
12#include <linux/lglock.h>
13
14struct super_block; 12struct super_block;
15struct file_system_type; 13struct file_system_type;
16struct linux_binprm; 14struct linux_binprm;
@@ -62,8 +60,6 @@ extern int sb_prepare_remount_readonly(struct super_block *);
62 60
63extern void __init mnt_init(void); 61extern void __init mnt_init(void);
64 62
65extern struct lglock vfsmount_lock;
66
67extern int __mnt_want_write(struct vfsmount *); 63extern int __mnt_want_write(struct vfsmount *);
68extern int __mnt_want_write_file(struct file *); 64extern int __mnt_want_write_file(struct file *);
69extern void __mnt_drop_write(struct vfsmount *); 65extern void __mnt_drop_write(struct vfsmount *);
@@ -77,9 +73,6 @@ extern void chroot_fs_refs(const struct path *, const struct path *);
77/* 73/*
78 * file_table.c 74 * file_table.c
79 */ 75 */
80extern void file_sb_list_add(struct file *f, struct super_block *sb);
81extern void file_sb_list_del(struct file *f);
82extern void mark_files_ro(struct super_block *);
83extern struct file *get_empty_filp(void); 76extern struct file *get_empty_filp(void);
84 77
85/* 78/*
diff --git a/fs/ioctl.c b/fs/ioctl.c
index fd507fb460f8..8ac3fad36192 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -37,7 +37,7 @@ static long vfs_ioctl(struct file *filp, unsigned int cmd,
37{ 37{
38 int error = -ENOTTY; 38 int error = -ENOTTY;
39 39
40 if (!filp->f_op || !filp->f_op->unlocked_ioctl) 40 if (!filp->f_op->unlocked_ioctl)
41 goto out; 41 goto out;
42 42
43 error = filp->f_op->unlocked_ioctl(filp, cmd, arg); 43 error = filp->f_op->unlocked_ioctl(filp, cmd, arg);
@@ -501,7 +501,7 @@ static int ioctl_fioasync(unsigned int fd, struct file *filp,
501 501
502 /* Did FASYNC state change ? */ 502 /* Did FASYNC state change ? */
503 if ((flag ^ filp->f_flags) & FASYNC) { 503 if ((flag ^ filp->f_flags) & FASYNC) {
504 if (filp->f_op && filp->f_op->fasync) 504 if (filp->f_op->fasync)
505 /* fasync() adjusts filp->f_flags */ 505 /* fasync() adjusts filp->f_flags */
506 error = filp->f_op->fasync(fd, filp, on); 506 error = filp->f_op->fasync(fd, filp, on);
507 else 507 else
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index e5d408a7ea4a..4a9e10ea13f2 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -181,7 +181,7 @@ struct iso9660_options{
181 * Compute the hash for the isofs name corresponding to the dentry. 181 * Compute the hash for the isofs name corresponding to the dentry.
182 */ 182 */
183static int 183static int
184isofs_hash_common(const struct dentry *dentry, struct qstr *qstr, int ms) 184isofs_hash_common(struct qstr *qstr, int ms)
185{ 185{
186 const char *name; 186 const char *name;
187 int len; 187 int len;
@@ -202,7 +202,7 @@ isofs_hash_common(const struct dentry *dentry, struct qstr *qstr, int ms)
202 * Compute the hash for the isofs name corresponding to the dentry. 202 * Compute the hash for the isofs name corresponding to the dentry.
203 */ 203 */
204static int 204static int
205isofs_hashi_common(const struct dentry *dentry, struct qstr *qstr, int ms) 205isofs_hashi_common(struct qstr *qstr, int ms)
206{ 206{
207 const char *name; 207 const char *name;
208 int len; 208 int len;
@@ -259,13 +259,13 @@ static int isofs_dentry_cmp_common(
259static int 259static int
260isofs_hash(const struct dentry *dentry, struct qstr *qstr) 260isofs_hash(const struct dentry *dentry, struct qstr *qstr)
261{ 261{
262 return isofs_hash_common(dentry, qstr, 0); 262 return isofs_hash_common(qstr, 0);
263} 263}
264 264
265static int 265static int
266isofs_hashi(const struct dentry *dentry, struct qstr *qstr) 266isofs_hashi(const struct dentry *dentry, struct qstr *qstr)
267{ 267{
268 return isofs_hashi_common(dentry, qstr, 0); 268 return isofs_hashi_common(qstr, 0);
269} 269}
270 270
271static int 271static int
@@ -286,13 +286,13 @@ isofs_dentry_cmpi(const struct dentry *parent, const struct dentry *dentry,
286static int 286static int
287isofs_hash_ms(const struct dentry *dentry, struct qstr *qstr) 287isofs_hash_ms(const struct dentry *dentry, struct qstr *qstr)
288{ 288{
289 return isofs_hash_common(dentry, qstr, 1); 289 return isofs_hash_common(qstr, 1);
290} 290}
291 291
292static int 292static int
293isofs_hashi_ms(const struct dentry *dentry, struct qstr *qstr) 293isofs_hashi_ms(const struct dentry *dentry, struct qstr *qstr)
294{ 294{
295 return isofs_hashi_common(dentry, qstr, 1); 295 return isofs_hashi_common(qstr, 1);
296} 296}
297 297
298static int 298static int
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index be0c39b66fe0..aa603e017d22 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -26,7 +26,6 @@
26#include <linux/mm.h> 26#include <linux/mm.h>
27#include <linux/highmem.h> 27#include <linux/highmem.h>
28#include <linux/hrtimer.h> 28#include <linux/hrtimer.h>
29#include <linux/backing-dev.h>
30 29
31static void __journal_temp_unlink_buffer(struct journal_head *jh); 30static void __journal_temp_unlink_buffer(struct journal_head *jh);
32 31
@@ -100,10 +99,11 @@ static int start_this_handle(journal_t *journal, handle_t *handle)
100 99
101alloc_transaction: 100alloc_transaction:
102 if (!journal->j_running_transaction) { 101 if (!journal->j_running_transaction) {
103 new_transaction = kzalloc(sizeof(*new_transaction), GFP_NOFS); 102 new_transaction = kzalloc(sizeof(*new_transaction),
103 GFP_NOFS|__GFP_NOFAIL);
104 if (!new_transaction) { 104 if (!new_transaction) {
105 congestion_wait(BLK_RW_ASYNC, HZ/50); 105 ret = -ENOMEM;
106 goto alloc_transaction; 106 goto out;
107 } 107 }
108 } 108 }
109 109
diff --git a/fs/libfs.c b/fs/libfs.c
index 3a3a9b53bf5a..5de06947ba5e 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -10,6 +10,7 @@
10#include <linux/vfs.h> 10#include <linux/vfs.h>
11#include <linux/quotaops.h> 11#include <linux/quotaops.h>
12#include <linux/mutex.h> 12#include <linux/mutex.h>
13#include <linux/namei.h>
13#include <linux/exportfs.h> 14#include <linux/exportfs.h>
14#include <linux/writeback.h> 15#include <linux/writeback.h>
15#include <linux/buffer_head.h> /* sync_mapping_buffers */ 16#include <linux/buffer_head.h> /* sync_mapping_buffers */
@@ -31,6 +32,7 @@ int simple_getattr(struct vfsmount *mnt, struct dentry *dentry,
31 stat->blocks = inode->i_mapping->nrpages << (PAGE_CACHE_SHIFT - 9); 32 stat->blocks = inode->i_mapping->nrpages << (PAGE_CACHE_SHIFT - 9);
32 return 0; 33 return 0;
33} 34}
35EXPORT_SYMBOL(simple_getattr);
34 36
35int simple_statfs(struct dentry *dentry, struct kstatfs *buf) 37int simple_statfs(struct dentry *dentry, struct kstatfs *buf)
36{ 38{
@@ -39,6 +41,7 @@ int simple_statfs(struct dentry *dentry, struct kstatfs *buf)
39 buf->f_namelen = NAME_MAX; 41 buf->f_namelen = NAME_MAX;
40 return 0; 42 return 0;
41} 43}
44EXPORT_SYMBOL(simple_statfs);
42 45
43/* 46/*
44 * Retaining negative dentries for an in-memory filesystem just wastes 47 * Retaining negative dentries for an in-memory filesystem just wastes
@@ -66,6 +69,7 @@ struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry, unsigned
66 d_add(dentry, NULL); 69 d_add(dentry, NULL);
67 return NULL; 70 return NULL;
68} 71}
72EXPORT_SYMBOL(simple_lookup);
69 73
70int dcache_dir_open(struct inode *inode, struct file *file) 74int dcache_dir_open(struct inode *inode, struct file *file)
71{ 75{
@@ -75,12 +79,14 @@ int dcache_dir_open(struct inode *inode, struct file *file)
75 79
76 return file->private_data ? 0 : -ENOMEM; 80 return file->private_data ? 0 : -ENOMEM;
77} 81}
82EXPORT_SYMBOL(dcache_dir_open);
78 83
79int dcache_dir_close(struct inode *inode, struct file *file) 84int dcache_dir_close(struct inode *inode, struct file *file)
80{ 85{
81 dput(file->private_data); 86 dput(file->private_data);
82 return 0; 87 return 0;
83} 88}
89EXPORT_SYMBOL(dcache_dir_close);
84 90
85loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence) 91loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence)
86{ 92{
@@ -123,6 +129,7 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence)
123 mutex_unlock(&dentry->d_inode->i_mutex); 129 mutex_unlock(&dentry->d_inode->i_mutex);
124 return offset; 130 return offset;
125} 131}
132EXPORT_SYMBOL(dcache_dir_lseek);
126 133
127/* Relationship between i_mode and the DT_xxx types */ 134/* Relationship between i_mode and the DT_xxx types */
128static inline unsigned char dt_type(struct inode *inode) 135static inline unsigned char dt_type(struct inode *inode)
@@ -172,11 +179,13 @@ int dcache_readdir(struct file *file, struct dir_context *ctx)
172 spin_unlock(&dentry->d_lock); 179 spin_unlock(&dentry->d_lock);
173 return 0; 180 return 0;
174} 181}
182EXPORT_SYMBOL(dcache_readdir);
175 183
176ssize_t generic_read_dir(struct file *filp, char __user *buf, size_t siz, loff_t *ppos) 184ssize_t generic_read_dir(struct file *filp, char __user *buf, size_t siz, loff_t *ppos)
177{ 185{
178 return -EISDIR; 186 return -EISDIR;
179} 187}
188EXPORT_SYMBOL(generic_read_dir);
180 189
181const struct file_operations simple_dir_operations = { 190const struct file_operations simple_dir_operations = {
182 .open = dcache_dir_open, 191 .open = dcache_dir_open,
@@ -186,10 +195,12 @@ const struct file_operations simple_dir_operations = {
186 .iterate = dcache_readdir, 195 .iterate = dcache_readdir,
187 .fsync = noop_fsync, 196 .fsync = noop_fsync,
188}; 197};
198EXPORT_SYMBOL(simple_dir_operations);
189 199
190const struct inode_operations simple_dir_inode_operations = { 200const struct inode_operations simple_dir_inode_operations = {
191 .lookup = simple_lookup, 201 .lookup = simple_lookup,
192}; 202};
203EXPORT_SYMBOL(simple_dir_inode_operations);
193 204
194static const struct super_operations simple_super_operations = { 205static const struct super_operations simple_super_operations = {
195 .statfs = simple_statfs, 206 .statfs = simple_statfs,
@@ -244,6 +255,7 @@ Enomem:
244 deactivate_locked_super(s); 255 deactivate_locked_super(s);
245 return ERR_PTR(-ENOMEM); 256 return ERR_PTR(-ENOMEM);
246} 257}
258EXPORT_SYMBOL(mount_pseudo);
247 259
248int simple_open(struct inode *inode, struct file *file) 260int simple_open(struct inode *inode, struct file *file)
249{ 261{
@@ -251,6 +263,7 @@ int simple_open(struct inode *inode, struct file *file)
251 file->private_data = inode->i_private; 263 file->private_data = inode->i_private;
252 return 0; 264 return 0;
253} 265}
266EXPORT_SYMBOL(simple_open);
254 267
255int simple_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) 268int simple_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
256{ 269{
@@ -263,6 +276,7 @@ int simple_link(struct dentry *old_dentry, struct inode *dir, struct dentry *den
263 d_instantiate(dentry, inode); 276 d_instantiate(dentry, inode);
264 return 0; 277 return 0;
265} 278}
279EXPORT_SYMBOL(simple_link);
266 280
267int simple_empty(struct dentry *dentry) 281int simple_empty(struct dentry *dentry)
268{ 282{
@@ -283,6 +297,7 @@ out:
283 spin_unlock(&dentry->d_lock); 297 spin_unlock(&dentry->d_lock);
284 return ret; 298 return ret;
285} 299}
300EXPORT_SYMBOL(simple_empty);
286 301
287int simple_unlink(struct inode *dir, struct dentry *dentry) 302int simple_unlink(struct inode *dir, struct dentry *dentry)
288{ 303{
@@ -293,6 +308,7 @@ int simple_unlink(struct inode *dir, struct dentry *dentry)
293 dput(dentry); 308 dput(dentry);
294 return 0; 309 return 0;
295} 310}
311EXPORT_SYMBOL(simple_unlink);
296 312
297int simple_rmdir(struct inode *dir, struct dentry *dentry) 313int simple_rmdir(struct inode *dir, struct dentry *dentry)
298{ 314{
@@ -304,6 +320,7 @@ int simple_rmdir(struct inode *dir, struct dentry *dentry)
304 drop_nlink(dir); 320 drop_nlink(dir);
305 return 0; 321 return 0;
306} 322}
323EXPORT_SYMBOL(simple_rmdir);
307 324
308int simple_rename(struct inode *old_dir, struct dentry *old_dentry, 325int simple_rename(struct inode *old_dir, struct dentry *old_dentry,
309 struct inode *new_dir, struct dentry *new_dentry) 326 struct inode *new_dir, struct dentry *new_dentry)
@@ -330,6 +347,7 @@ int simple_rename(struct inode *old_dir, struct dentry *old_dentry,
330 347
331 return 0; 348 return 0;
332} 349}
350EXPORT_SYMBOL(simple_rename);
333 351
334/** 352/**
335 * simple_setattr - setattr for simple filesystem 353 * simple_setattr - setattr for simple filesystem
@@ -370,6 +388,7 @@ int simple_readpage(struct file *file, struct page *page)
370 unlock_page(page); 388 unlock_page(page);
371 return 0; 389 return 0;
372} 390}
391EXPORT_SYMBOL(simple_readpage);
373 392
374int simple_write_begin(struct file *file, struct address_space *mapping, 393int simple_write_begin(struct file *file, struct address_space *mapping,
375 loff_t pos, unsigned len, unsigned flags, 394 loff_t pos, unsigned len, unsigned flags,
@@ -393,6 +412,7 @@ int simple_write_begin(struct file *file, struct address_space *mapping,
393 } 412 }
394 return 0; 413 return 0;
395} 414}
415EXPORT_SYMBOL(simple_write_begin);
396 416
397/** 417/**
398 * simple_write_end - .write_end helper for non-block-device FSes 418 * simple_write_end - .write_end helper for non-block-device FSes
@@ -444,6 +464,7 @@ int simple_write_end(struct file *file, struct address_space *mapping,
444 464
445 return copied; 465 return copied;
446} 466}
467EXPORT_SYMBOL(simple_write_end);
447 468
448/* 469/*
449 * the inodes created here are not hashed. If you use iunique to generate 470 * the inodes created here are not hashed. If you use iunique to generate
@@ -512,6 +533,7 @@ out:
512 dput(root); 533 dput(root);
513 return -ENOMEM; 534 return -ENOMEM;
514} 535}
536EXPORT_SYMBOL(simple_fill_super);
515 537
516static DEFINE_SPINLOCK(pin_fs_lock); 538static DEFINE_SPINLOCK(pin_fs_lock);
517 539
@@ -534,6 +556,7 @@ int simple_pin_fs(struct file_system_type *type, struct vfsmount **mount, int *c
534 mntput(mnt); 556 mntput(mnt);
535 return 0; 557 return 0;
536} 558}
559EXPORT_SYMBOL(simple_pin_fs);
537 560
538void simple_release_fs(struct vfsmount **mount, int *count) 561void simple_release_fs(struct vfsmount **mount, int *count)
539{ 562{
@@ -545,6 +568,7 @@ void simple_release_fs(struct vfsmount **mount, int *count)
545 spin_unlock(&pin_fs_lock); 568 spin_unlock(&pin_fs_lock);
546 mntput(mnt); 569 mntput(mnt);
547} 570}
571EXPORT_SYMBOL(simple_release_fs);
548 572
549/** 573/**
550 * simple_read_from_buffer - copy data from the buffer to user space 574 * simple_read_from_buffer - copy data from the buffer to user space
@@ -579,6 +603,7 @@ ssize_t simple_read_from_buffer(void __user *to, size_t count, loff_t *ppos,
579 *ppos = pos + count; 603 *ppos = pos + count;
580 return count; 604 return count;
581} 605}
606EXPORT_SYMBOL(simple_read_from_buffer);
582 607
583/** 608/**
584 * simple_write_to_buffer - copy data from user space to the buffer 609 * simple_write_to_buffer - copy data from user space to the buffer
@@ -613,6 +638,7 @@ ssize_t simple_write_to_buffer(void *to, size_t available, loff_t *ppos,
613 *ppos = pos + count; 638 *ppos = pos + count;
614 return count; 639 return count;
615} 640}
641EXPORT_SYMBOL(simple_write_to_buffer);
616 642
617/** 643/**
618 * memory_read_from_buffer - copy data from the buffer 644 * memory_read_from_buffer - copy data from the buffer
@@ -644,6 +670,7 @@ ssize_t memory_read_from_buffer(void *to, size_t count, loff_t *ppos,
644 670
645 return count; 671 return count;
646} 672}
673EXPORT_SYMBOL(memory_read_from_buffer);
647 674
648/* 675/*
649 * Transaction based IO. 676 * Transaction based IO.
@@ -665,6 +692,7 @@ void simple_transaction_set(struct file *file, size_t n)
665 smp_mb(); 692 smp_mb();
666 ar->size = n; 693 ar->size = n;
667} 694}
695EXPORT_SYMBOL(simple_transaction_set);
668 696
669char *simple_transaction_get(struct file *file, const char __user *buf, size_t size) 697char *simple_transaction_get(struct file *file, const char __user *buf, size_t size)
670{ 698{
@@ -696,6 +724,7 @@ char *simple_transaction_get(struct file *file, const char __user *buf, size_t s
696 724
697 return ar->data; 725 return ar->data;
698} 726}
727EXPORT_SYMBOL(simple_transaction_get);
699 728
700ssize_t simple_transaction_read(struct file *file, char __user *buf, size_t size, loff_t *pos) 729ssize_t simple_transaction_read(struct file *file, char __user *buf, size_t size, loff_t *pos)
701{ 730{
@@ -705,12 +734,14 @@ ssize_t simple_transaction_read(struct file *file, char __user *buf, size_t size
705 return 0; 734 return 0;
706 return simple_read_from_buffer(buf, size, pos, ar->data, ar->size); 735 return simple_read_from_buffer(buf, size, pos, ar->data, ar->size);
707} 736}
737EXPORT_SYMBOL(simple_transaction_read);
708 738
709int simple_transaction_release(struct inode *inode, struct file *file) 739int simple_transaction_release(struct inode *inode, struct file *file)
710{ 740{
711 free_page((unsigned long)file->private_data); 741 free_page((unsigned long)file->private_data);
712 return 0; 742 return 0;
713} 743}
744EXPORT_SYMBOL(simple_transaction_release);
714 745
715/* Simple attribute files */ 746/* Simple attribute files */
716 747
@@ -746,12 +777,14 @@ int simple_attr_open(struct inode *inode, struct file *file,
746 777
747 return nonseekable_open(inode, file); 778 return nonseekable_open(inode, file);
748} 779}
780EXPORT_SYMBOL_GPL(simple_attr_open);
749 781
750int simple_attr_release(struct inode *inode, struct file *file) 782int simple_attr_release(struct inode *inode, struct file *file)
751{ 783{
752 kfree(file->private_data); 784 kfree(file->private_data);
753 return 0; 785 return 0;
754} 786}
787EXPORT_SYMBOL_GPL(simple_attr_release); /* GPL-only? This? Really? */
755 788
756/* read from the buffer that is filled with the get function */ 789/* read from the buffer that is filled with the get function */
757ssize_t simple_attr_read(struct file *file, char __user *buf, 790ssize_t simple_attr_read(struct file *file, char __user *buf,
@@ -787,6 +820,7 @@ out:
787 mutex_unlock(&attr->mutex); 820 mutex_unlock(&attr->mutex);
788 return ret; 821 return ret;
789} 822}
823EXPORT_SYMBOL_GPL(simple_attr_read);
790 824
791/* interpret the buffer as a number to call the set function with */ 825/* interpret the buffer as a number to call the set function with */
792ssize_t simple_attr_write(struct file *file, const char __user *buf, 826ssize_t simple_attr_write(struct file *file, const char __user *buf,
@@ -819,6 +853,7 @@ out:
819 mutex_unlock(&attr->mutex); 853 mutex_unlock(&attr->mutex);
820 return ret; 854 return ret;
821} 855}
856EXPORT_SYMBOL_GPL(simple_attr_write);
822 857
823/** 858/**
824 * generic_fh_to_dentry - generic helper for the fh_to_dentry export operation 859 * generic_fh_to_dentry - generic helper for the fh_to_dentry export operation
@@ -957,39 +992,56 @@ int noop_fsync(struct file *file, loff_t start, loff_t end, int datasync)
957{ 992{
958 return 0; 993 return 0;
959} 994}
960
961EXPORT_SYMBOL(dcache_dir_close);
962EXPORT_SYMBOL(dcache_dir_lseek);
963EXPORT_SYMBOL(dcache_dir_open);
964EXPORT_SYMBOL(dcache_readdir);
965EXPORT_SYMBOL(generic_read_dir);
966EXPORT_SYMBOL(mount_pseudo);
967EXPORT_SYMBOL(simple_write_begin);
968EXPORT_SYMBOL(simple_write_end);
969EXPORT_SYMBOL(simple_dir_inode_operations);
970EXPORT_SYMBOL(simple_dir_operations);
971EXPORT_SYMBOL(simple_empty);
972EXPORT_SYMBOL(simple_fill_super);
973EXPORT_SYMBOL(simple_getattr);
974EXPORT_SYMBOL(simple_open);
975EXPORT_SYMBOL(simple_link);
976EXPORT_SYMBOL(simple_lookup);
977EXPORT_SYMBOL(simple_pin_fs);
978EXPORT_SYMBOL(simple_readpage);
979EXPORT_SYMBOL(simple_release_fs);
980EXPORT_SYMBOL(simple_rename);
981EXPORT_SYMBOL(simple_rmdir);
982EXPORT_SYMBOL(simple_statfs);
983EXPORT_SYMBOL(noop_fsync); 995EXPORT_SYMBOL(noop_fsync);
984EXPORT_SYMBOL(simple_unlink); 996
985EXPORT_SYMBOL(simple_read_from_buffer); 997void kfree_put_link(struct dentry *dentry, struct nameidata *nd,
986EXPORT_SYMBOL(simple_write_to_buffer); 998 void *cookie)
987EXPORT_SYMBOL(memory_read_from_buffer); 999{
988EXPORT_SYMBOL(simple_transaction_set); 1000 char *s = nd_get_link(nd);
989EXPORT_SYMBOL(simple_transaction_get); 1001 if (!IS_ERR(s))
990EXPORT_SYMBOL(simple_transaction_read); 1002 kfree(s);
991EXPORT_SYMBOL(simple_transaction_release); 1003}
992EXPORT_SYMBOL_GPL(simple_attr_open); 1004EXPORT_SYMBOL(kfree_put_link);
993EXPORT_SYMBOL_GPL(simple_attr_release); 1005
994EXPORT_SYMBOL_GPL(simple_attr_read); 1006/*
995EXPORT_SYMBOL_GPL(simple_attr_write); 1007 * nop .set_page_dirty method so that people can use .page_mkwrite on
1008 * anon inodes.
1009 */
1010static int anon_set_page_dirty(struct page *page)
1011{
1012 return 0;
1013};
1014
1015/*
1016 * A single inode exists for all anon_inode files. Contrary to pipes,
1017 * anon_inode inodes have no associated per-instance data, so we need
1018 * only allocate one of them.
1019 */
1020struct inode *alloc_anon_inode(struct super_block *s)
1021{
1022 static const struct address_space_operations anon_aops = {
1023 .set_page_dirty = anon_set_page_dirty,
1024 };
1025 struct inode *inode = new_inode_pseudo(s);
1026
1027 if (!inode)
1028 return ERR_PTR(-ENOMEM);
1029
1030 inode->i_ino = get_next_ino();
1031 inode->i_mapping->a_ops = &anon_aops;
1032
1033 /*
1034 * Mark the inode dirty from the very beginning,
1035 * that way it will never be moved to the dirty
1036 * list because mark_inode_dirty() will think
1037 * that it already _is_ on the dirty list.
1038 */
1039 inode->i_state = I_DIRTY;
1040 inode->i_mode = S_IRUSR | S_IWUSR;
1041 inode->i_uid = current_fsuid();
1042 inode->i_gid = current_fsgid();
1043 inode->i_flags |= S_PRIVATE;
1044 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
1045 return inode;
1046}
1047EXPORT_SYMBOL(alloc_anon_inode);
diff --git a/fs/locks.c b/fs/locks.c
index b27a3005d78d..f99d52bdd05a 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -134,7 +134,7 @@
134 134
135#define IS_POSIX(fl) (fl->fl_flags & FL_POSIX) 135#define IS_POSIX(fl) (fl->fl_flags & FL_POSIX)
136#define IS_FLOCK(fl) (fl->fl_flags & FL_FLOCK) 136#define IS_FLOCK(fl) (fl->fl_flags & FL_FLOCK)
137#define IS_LEASE(fl) (fl->fl_flags & FL_LEASE) 137#define IS_LEASE(fl) (fl->fl_flags & (FL_LEASE|FL_DELEG))
138 138
139static bool lease_breaking(struct file_lock *fl) 139static bool lease_breaking(struct file_lock *fl)
140{ 140{
@@ -1292,28 +1292,40 @@ static void time_out_leases(struct inode *inode)
1292 } 1292 }
1293} 1293}
1294 1294
1295static bool leases_conflict(struct file_lock *lease, struct file_lock *breaker)
1296{
1297 if ((breaker->fl_flags & FL_DELEG) && (lease->fl_flags & FL_LEASE))
1298 return false;
1299 return locks_conflict(breaker, lease);
1300}
1301
1295/** 1302/**
1296 * __break_lease - revoke all outstanding leases on file 1303 * __break_lease - revoke all outstanding leases on file
1297 * @inode: the inode of the file to return 1304 * @inode: the inode of the file to return
1298 * @mode: the open mode (read or write) 1305 * @mode: O_RDONLY: break only write leases; O_WRONLY or O_RDWR:
1306 * break all leases
1307 * @type: FL_LEASE: break leases and delegations; FL_DELEG: break
1308 * only delegations
1299 * 1309 *
1300 * break_lease (inlined for speed) has checked there already is at least 1310 * break_lease (inlined for speed) has checked there already is at least
1301 * some kind of lock (maybe a lease) on this file. Leases are broken on 1311 * some kind of lock (maybe a lease) on this file. Leases are broken on
1302 * a call to open() or truncate(). This function can sleep unless you 1312 * a call to open() or truncate(). This function can sleep unless you
1303 * specified %O_NONBLOCK to your open(). 1313 * specified %O_NONBLOCK to your open().
1304 */ 1314 */
1305int __break_lease(struct inode *inode, unsigned int mode) 1315int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
1306{ 1316{
1307 int error = 0; 1317 int error = 0;
1308 struct file_lock *new_fl, *flock; 1318 struct file_lock *new_fl, *flock;
1309 struct file_lock *fl; 1319 struct file_lock *fl;
1310 unsigned long break_time; 1320 unsigned long break_time;
1311 int i_have_this_lease = 0; 1321 int i_have_this_lease = 0;
1322 bool lease_conflict = false;
1312 int want_write = (mode & O_ACCMODE) != O_RDONLY; 1323 int want_write = (mode & O_ACCMODE) != O_RDONLY;
1313 1324
1314 new_fl = lease_alloc(NULL, want_write ? F_WRLCK : F_RDLCK); 1325 new_fl = lease_alloc(NULL, want_write ? F_WRLCK : F_RDLCK);
1315 if (IS_ERR(new_fl)) 1326 if (IS_ERR(new_fl))
1316 return PTR_ERR(new_fl); 1327 return PTR_ERR(new_fl);
1328 new_fl->fl_flags = type;
1317 1329
1318 spin_lock(&inode->i_lock); 1330 spin_lock(&inode->i_lock);
1319 1331
@@ -1323,13 +1335,16 @@ int __break_lease(struct inode *inode, unsigned int mode)
1323 if ((flock == NULL) || !IS_LEASE(flock)) 1335 if ((flock == NULL) || !IS_LEASE(flock))
1324 goto out; 1336 goto out;
1325 1337
1326 if (!locks_conflict(flock, new_fl)) 1338 for (fl = flock; fl && IS_LEASE(fl); fl = fl->fl_next) {
1339 if (leases_conflict(fl, new_fl)) {
1340 lease_conflict = true;
1341 if (fl->fl_owner == current->files)
1342 i_have_this_lease = 1;
1343 }
1344 }
1345 if (!lease_conflict)
1327 goto out; 1346 goto out;
1328 1347
1329 for (fl = flock; fl && IS_LEASE(fl); fl = fl->fl_next)
1330 if (fl->fl_owner == current->files)
1331 i_have_this_lease = 1;
1332
1333 break_time = 0; 1348 break_time = 0;
1334 if (lease_break_time > 0) { 1349 if (lease_break_time > 0) {
1335 break_time = jiffies + lease_break_time * HZ; 1350 break_time = jiffies + lease_break_time * HZ;
@@ -1338,6 +1353,8 @@ int __break_lease(struct inode *inode, unsigned int mode)
1338 } 1353 }
1339 1354
1340 for (fl = flock; fl && IS_LEASE(fl); fl = fl->fl_next) { 1355 for (fl = flock; fl && IS_LEASE(fl); fl = fl->fl_next) {
1356 if (!leases_conflict(fl, new_fl))
1357 continue;
1341 if (want_write) { 1358 if (want_write) {
1342 if (fl->fl_flags & FL_UNLOCK_PENDING) 1359 if (fl->fl_flags & FL_UNLOCK_PENDING)
1343 continue; 1360 continue;
@@ -1379,7 +1396,7 @@ restart:
1379 */ 1396 */
1380 for (flock = inode->i_flock; flock && IS_LEASE(flock); 1397 for (flock = inode->i_flock; flock && IS_LEASE(flock);
1381 flock = flock->fl_next) { 1398 flock = flock->fl_next) {
1382 if (locks_conflict(new_fl, flock)) 1399 if (leases_conflict(new_fl, flock))
1383 goto restart; 1400 goto restart;
1384 } 1401 }
1385 error = 0; 1402 error = 0;
@@ -1460,9 +1477,26 @@ static int generic_add_lease(struct file *filp, long arg, struct file_lock **flp
1460 struct file_lock *fl, **before, **my_before = NULL, *lease; 1477 struct file_lock *fl, **before, **my_before = NULL, *lease;
1461 struct dentry *dentry = filp->f_path.dentry; 1478 struct dentry *dentry = filp->f_path.dentry;
1462 struct inode *inode = dentry->d_inode; 1479 struct inode *inode = dentry->d_inode;
1480 bool is_deleg = (*flp)->fl_flags & FL_DELEG;
1463 int error; 1481 int error;
1464 1482
1465 lease = *flp; 1483 lease = *flp;
1484 /*
1485 * In the delegation case we need mutual exclusion with
1486 * a number of operations that take the i_mutex. We trylock
1487 * because delegations are an optional optimization, and if
1488 * there's some chance of a conflict--we'd rather not
1489 * bother, maybe that's a sign this just isn't a good file to
1490 * hand out a delegation on.
1491 */
1492 if (is_deleg && !mutex_trylock(&inode->i_mutex))
1493 return -EAGAIN;
1494
1495 if (is_deleg && arg == F_WRLCK) {
1496 /* Write delegations are not currently supported: */
1497 WARN_ON_ONCE(1);
1498 return -EINVAL;
1499 }
1466 1500
1467 error = -EAGAIN; 1501 error = -EAGAIN;
1468 if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0)) 1502 if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0))
@@ -1514,9 +1548,10 @@ static int generic_add_lease(struct file *filp, long arg, struct file_lock **flp
1514 goto out; 1548 goto out;
1515 1549
1516 locks_insert_lock(before, lease); 1550 locks_insert_lock(before, lease);
1517 return 0; 1551 error = 0;
1518
1519out: 1552out:
1553 if (is_deleg)
1554 mutex_unlock(&inode->i_mutex);
1520 return error; 1555 return error;
1521} 1556}
1522 1557
@@ -1579,7 +1614,7 @@ EXPORT_SYMBOL(generic_setlease);
1579 1614
1580static int __vfs_setlease(struct file *filp, long arg, struct file_lock **lease) 1615static int __vfs_setlease(struct file *filp, long arg, struct file_lock **lease)
1581{ 1616{
1582 if (filp->f_op && filp->f_op->setlease) 1617 if (filp->f_op->setlease)
1583 return filp->f_op->setlease(filp, arg, lease); 1618 return filp->f_op->setlease(filp, arg, lease);
1584 else 1619 else
1585 return generic_setlease(filp, arg, lease); 1620 return generic_setlease(filp, arg, lease);
@@ -1771,7 +1806,7 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd)
1771 if (error) 1806 if (error)
1772 goto out_free; 1807 goto out_free;
1773 1808
1774 if (f.file->f_op && f.file->f_op->flock) 1809 if (f.file->f_op->flock)
1775 error = f.file->f_op->flock(f.file, 1810 error = f.file->f_op->flock(f.file,
1776 (can_sleep) ? F_SETLKW : F_SETLK, 1811 (can_sleep) ? F_SETLKW : F_SETLK,
1777 lock); 1812 lock);
@@ -1797,7 +1832,7 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd)
1797 */ 1832 */
1798int vfs_test_lock(struct file *filp, struct file_lock *fl) 1833int vfs_test_lock(struct file *filp, struct file_lock *fl)
1799{ 1834{
1800 if (filp->f_op && filp->f_op->lock) 1835 if (filp->f_op->lock)
1801 return filp->f_op->lock(filp, F_GETLK, fl); 1836 return filp->f_op->lock(filp, F_GETLK, fl);
1802 posix_test_lock(filp, fl); 1837 posix_test_lock(filp, fl);
1803 return 0; 1838 return 0;
@@ -1909,7 +1944,7 @@ out:
1909 */ 1944 */
1910int vfs_lock_file(struct file *filp, unsigned int cmd, struct file_lock *fl, struct file_lock *conf) 1945int vfs_lock_file(struct file *filp, unsigned int cmd, struct file_lock *fl, struct file_lock *conf)
1911{ 1946{
1912 if (filp->f_op && filp->f_op->lock) 1947 if (filp->f_op->lock)
1913 return filp->f_op->lock(filp, cmd, fl); 1948 return filp->f_op->lock(filp, cmd, fl);
1914 else 1949 else
1915 return posix_lock_file(filp, fl, conf); 1950 return posix_lock_file(filp, fl, conf);
@@ -2182,7 +2217,7 @@ void locks_remove_flock(struct file *filp)
2182 if (!inode->i_flock) 2217 if (!inode->i_flock)
2183 return; 2218 return;
2184 2219
2185 if (filp->f_op && filp->f_op->flock) { 2220 if (filp->f_op->flock) {
2186 struct file_lock fl = { 2221 struct file_lock fl = {
2187 .fl_pid = current->tgid, 2222 .fl_pid = current->tgid,
2188 .fl_file = filp, 2223 .fl_file = filp,
@@ -2246,7 +2281,7 @@ EXPORT_SYMBOL(posix_unblock_lock);
2246 */ 2281 */
2247int vfs_cancel_lock(struct file *filp, struct file_lock *fl) 2282int vfs_cancel_lock(struct file *filp, struct file_lock *fl)
2248{ 2283{
2249 if (filp->f_op && filp->f_op->lock) 2284 if (filp->f_op->lock)
2250 return filp->f_op->lock(filp, F_CANCELLK, fl); 2285 return filp->f_op->lock(filp, F_CANCELLK, fl);
2251 return 0; 2286 return 0;
2252} 2287}
diff --git a/fs/mount.h b/fs/mount.h
index 64a858143ff9..d64c594be6c4 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -29,6 +29,7 @@ struct mount {
29 struct mount *mnt_parent; 29 struct mount *mnt_parent;
30 struct dentry *mnt_mountpoint; 30 struct dentry *mnt_mountpoint;
31 struct vfsmount mnt; 31 struct vfsmount mnt;
32 struct rcu_head mnt_rcu;
32#ifdef CONFIG_SMP 33#ifdef CONFIG_SMP
33 struct mnt_pcp __percpu *mnt_pcp; 34 struct mnt_pcp __percpu *mnt_pcp;
34#else 35#else
@@ -55,7 +56,7 @@ struct mount {
55 int mnt_group_id; /* peer group identifier */ 56 int mnt_group_id; /* peer group identifier */
56 int mnt_expiry_mark; /* true if marked for expiry */ 57 int mnt_expiry_mark; /* true if marked for expiry */
57 int mnt_pinned; 58 int mnt_pinned;
58 int mnt_ghosts; 59 struct path mnt_ex_mountpoint;
59}; 60};
60 61
61#define MNT_NS_INTERNAL ERR_PTR(-EINVAL) /* distinct from any mnt_namespace */ 62#define MNT_NS_INTERNAL ERR_PTR(-EINVAL) /* distinct from any mnt_namespace */
@@ -76,13 +77,28 @@ static inline int is_mounted(struct vfsmount *mnt)
76 return !IS_ERR_OR_NULL(real_mount(mnt)); 77 return !IS_ERR_OR_NULL(real_mount(mnt));
77} 78}
78 79
79extern struct mount *__lookup_mnt(struct vfsmount *, struct dentry *, int); 80extern struct mount *__lookup_mnt(struct vfsmount *, struct dentry *);
81extern struct mount *__lookup_mnt_last(struct vfsmount *, struct dentry *);
82
83extern bool legitimize_mnt(struct vfsmount *, unsigned);
80 84
81static inline void get_mnt_ns(struct mnt_namespace *ns) 85static inline void get_mnt_ns(struct mnt_namespace *ns)
82{ 86{
83 atomic_inc(&ns->count); 87 atomic_inc(&ns->count);
84} 88}
85 89
90extern seqlock_t mount_lock;
91
92static inline void lock_mount_hash(void)
93{
94 write_seqlock(&mount_lock);
95}
96
97static inline void unlock_mount_hash(void)
98{
99 write_sequnlock(&mount_lock);
100}
101
86struct proc_mounts { 102struct proc_mounts {
87 struct seq_file m; 103 struct seq_file m;
88 struct mnt_namespace *ns; 104 struct mnt_namespace *ns;
diff --git a/fs/namei.c b/fs/namei.c
index caa28051e197..e029a4cbff7d 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -482,18 +482,6 @@ EXPORT_SYMBOL(path_put);
482 * to restart the path walk from the beginning in ref-walk mode. 482 * to restart the path walk from the beginning in ref-walk mode.
483 */ 483 */
484 484
485static inline void lock_rcu_walk(void)
486{
487 br_read_lock(&vfsmount_lock);
488 rcu_read_lock();
489}
490
491static inline void unlock_rcu_walk(void)
492{
493 rcu_read_unlock();
494 br_read_unlock(&vfsmount_lock);
495}
496
497/** 485/**
498 * unlazy_walk - try to switch to ref-walk mode. 486 * unlazy_walk - try to switch to ref-walk mode.
499 * @nd: nameidata pathwalk data 487 * @nd: nameidata pathwalk data
@@ -512,26 +500,23 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry)
512 BUG_ON(!(nd->flags & LOOKUP_RCU)); 500 BUG_ON(!(nd->flags & LOOKUP_RCU));
513 501
514 /* 502 /*
515 * Get a reference to the parent first: we're 503 * After legitimizing the bastards, terminate_walk()
516 * going to make "path_put(nd->path)" valid in 504 * will do the right thing for non-RCU mode, and all our
517 * non-RCU context for "terminate_walk()". 505 * subsequent exit cases should rcu_read_unlock()
518 * 506 * before returning. Do vfsmount first; if dentry
519 * If this doesn't work, return immediately with 507 * can't be legitimized, just set nd->path.dentry to NULL
520 * RCU walking still active (and then we will do 508 * and rely on dput(NULL) being a no-op.
521 * the RCU walk cleanup in terminate_walk()).
522 */ 509 */
523 if (!lockref_get_not_dead(&parent->d_lockref)) 510 if (!legitimize_mnt(nd->path.mnt, nd->m_seq))
524 return -ECHILD; 511 return -ECHILD;
525
526 /*
527 * After the mntget(), we terminate_walk() will do
528 * the right thing for non-RCU mode, and all our
529 * subsequent exit cases should unlock_rcu_walk()
530 * before returning.
531 */
532 mntget(nd->path.mnt);
533 nd->flags &= ~LOOKUP_RCU; 512 nd->flags &= ~LOOKUP_RCU;
534 513
514 if (!lockref_get_not_dead(&parent->d_lockref)) {
515 nd->path.dentry = NULL;
516 rcu_read_unlock();
517 return -ECHILD;
518 }
519
535 /* 520 /*
536 * For a negative lookup, the lookup sequence point is the parents 521 * For a negative lookup, the lookup sequence point is the parents
537 * sequence point, and it only needs to revalidate the parent dentry. 522 * sequence point, and it only needs to revalidate the parent dentry.
@@ -566,17 +551,17 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry)
566 spin_unlock(&fs->lock); 551 spin_unlock(&fs->lock);
567 } 552 }
568 553
569 unlock_rcu_walk(); 554 rcu_read_unlock();
570 return 0; 555 return 0;
571 556
572unlock_and_drop_dentry: 557unlock_and_drop_dentry:
573 spin_unlock(&fs->lock); 558 spin_unlock(&fs->lock);
574drop_dentry: 559drop_dentry:
575 unlock_rcu_walk(); 560 rcu_read_unlock();
576 dput(dentry); 561 dput(dentry);
577 goto drop_root_mnt; 562 goto drop_root_mnt;
578out: 563out:
579 unlock_rcu_walk(); 564 rcu_read_unlock();
580drop_root_mnt: 565drop_root_mnt:
581 if (!(nd->flags & LOOKUP_ROOT)) 566 if (!(nd->flags & LOOKUP_ROOT))
582 nd->root.mnt = NULL; 567 nd->root.mnt = NULL;
@@ -608,17 +593,22 @@ static int complete_walk(struct nameidata *nd)
608 if (!(nd->flags & LOOKUP_ROOT)) 593 if (!(nd->flags & LOOKUP_ROOT))
609 nd->root.mnt = NULL; 594 nd->root.mnt = NULL;
610 595
596 if (!legitimize_mnt(nd->path.mnt, nd->m_seq)) {
597 rcu_read_unlock();
598 return -ECHILD;
599 }
611 if (unlikely(!lockref_get_not_dead(&dentry->d_lockref))) { 600 if (unlikely(!lockref_get_not_dead(&dentry->d_lockref))) {
612 unlock_rcu_walk(); 601 rcu_read_unlock();
602 mntput(nd->path.mnt);
613 return -ECHILD; 603 return -ECHILD;
614 } 604 }
615 if (read_seqcount_retry(&dentry->d_seq, nd->seq)) { 605 if (read_seqcount_retry(&dentry->d_seq, nd->seq)) {
616 unlock_rcu_walk(); 606 rcu_read_unlock();
617 dput(dentry); 607 dput(dentry);
608 mntput(nd->path.mnt);
618 return -ECHILD; 609 return -ECHILD;
619 } 610 }
620 mntget(nd->path.mnt); 611 rcu_read_unlock();
621 unlock_rcu_walk();
622 } 612 }
623 613
624 if (likely(!(nd->flags & LOOKUP_JUMPED))) 614 if (likely(!(nd->flags & LOOKUP_JUMPED)))
@@ -909,15 +899,15 @@ int follow_up(struct path *path)
909 struct mount *parent; 899 struct mount *parent;
910 struct dentry *mountpoint; 900 struct dentry *mountpoint;
911 901
912 br_read_lock(&vfsmount_lock); 902 read_seqlock_excl(&mount_lock);
913 parent = mnt->mnt_parent; 903 parent = mnt->mnt_parent;
914 if (parent == mnt) { 904 if (parent == mnt) {
915 br_read_unlock(&vfsmount_lock); 905 read_sequnlock_excl(&mount_lock);
916 return 0; 906 return 0;
917 } 907 }
918 mntget(&parent->mnt); 908 mntget(&parent->mnt);
919 mountpoint = dget(mnt->mnt_mountpoint); 909 mountpoint = dget(mnt->mnt_mountpoint);
920 br_read_unlock(&vfsmount_lock); 910 read_sequnlock_excl(&mount_lock);
921 dput(path->dentry); 911 dput(path->dentry);
922 path->dentry = mountpoint; 912 path->dentry = mountpoint;
923 mntput(path->mnt); 913 mntput(path->mnt);
@@ -1048,8 +1038,8 @@ static int follow_managed(struct path *path, unsigned flags)
1048 1038
1049 /* Something is mounted on this dentry in another 1039 /* Something is mounted on this dentry in another
1050 * namespace and/or whatever was mounted there in this 1040 * namespace and/or whatever was mounted there in this
1051 * namespace got unmounted before we managed to get the 1041 * namespace got unmounted before lookup_mnt() could
1052 * vfsmount_lock */ 1042 * get it */
1053 } 1043 }
1054 1044
1055 /* Handle an automount point */ 1045 /* Handle an automount point */
@@ -1111,7 +1101,7 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
1111 if (!d_mountpoint(path->dentry)) 1101 if (!d_mountpoint(path->dentry))
1112 break; 1102 break;
1113 1103
1114 mounted = __lookup_mnt(path->mnt, path->dentry, 1); 1104 mounted = __lookup_mnt(path->mnt, path->dentry);
1115 if (!mounted) 1105 if (!mounted)
1116 break; 1106 break;
1117 path->mnt = &mounted->mnt; 1107 path->mnt = &mounted->mnt;
@@ -1132,7 +1122,7 @@ static void follow_mount_rcu(struct nameidata *nd)
1132{ 1122{
1133 while (d_mountpoint(nd->path.dentry)) { 1123 while (d_mountpoint(nd->path.dentry)) {
1134 struct mount *mounted; 1124 struct mount *mounted;
1135 mounted = __lookup_mnt(nd->path.mnt, nd->path.dentry, 1); 1125 mounted = __lookup_mnt(nd->path.mnt, nd->path.dentry);
1136 if (!mounted) 1126 if (!mounted)
1137 break; 1127 break;
1138 nd->path.mnt = &mounted->mnt; 1128 nd->path.mnt = &mounted->mnt;
@@ -1174,7 +1164,7 @@ failed:
1174 nd->flags &= ~LOOKUP_RCU; 1164 nd->flags &= ~LOOKUP_RCU;
1175 if (!(nd->flags & LOOKUP_ROOT)) 1165 if (!(nd->flags & LOOKUP_ROOT))
1176 nd->root.mnt = NULL; 1166 nd->root.mnt = NULL;
1177 unlock_rcu_walk(); 1167 rcu_read_unlock();
1178 return -ECHILD; 1168 return -ECHILD;
1179} 1169}
1180 1170
@@ -1308,8 +1298,8 @@ static struct dentry *lookup_dcache(struct qstr *name, struct dentry *dir,
1308} 1298}
1309 1299
1310/* 1300/*
1311 * Call i_op->lookup on the dentry. The dentry must be negative but may be 1301 * Call i_op->lookup on the dentry. The dentry must be negative and
1312 * hashed if it was pouplated with DCACHE_NEED_LOOKUP. 1302 * unhashed.
1313 * 1303 *
1314 * dir->d_inode->i_mutex must be held 1304 * dir->d_inode->i_mutex must be held
1315 */ 1305 */
@@ -1501,7 +1491,7 @@ static void terminate_walk(struct nameidata *nd)
1501 nd->flags &= ~LOOKUP_RCU; 1491 nd->flags &= ~LOOKUP_RCU;
1502 if (!(nd->flags & LOOKUP_ROOT)) 1492 if (!(nd->flags & LOOKUP_ROOT))
1503 nd->root.mnt = NULL; 1493 nd->root.mnt = NULL;
1504 unlock_rcu_walk(); 1494 rcu_read_unlock();
1505 } 1495 }
1506} 1496}
1507 1497
@@ -1511,18 +1501,9 @@ static void terminate_walk(struct nameidata *nd)
1511 * so we keep a cache of "no, this doesn't need follow_link" 1501 * so we keep a cache of "no, this doesn't need follow_link"
1512 * for the common case. 1502 * for the common case.
1513 */ 1503 */
1514static inline int should_follow_link(struct inode *inode, int follow) 1504static inline int should_follow_link(struct dentry *dentry, int follow)
1515{ 1505{
1516 if (unlikely(!(inode->i_opflags & IOP_NOFOLLOW))) { 1506 return unlikely(d_is_symlink(dentry)) ? follow : 0;
1517 if (likely(inode->i_op->follow_link))
1518 return follow;
1519
1520 /* This gets set once for the inode lifetime */
1521 spin_lock(&inode->i_lock);
1522 inode->i_opflags |= IOP_NOFOLLOW;
1523 spin_unlock(&inode->i_lock);
1524 }
1525 return 0;
1526} 1507}
1527 1508
1528static inline int walk_component(struct nameidata *nd, struct path *path, 1509static inline int walk_component(struct nameidata *nd, struct path *path,
@@ -1552,7 +1533,7 @@ static inline int walk_component(struct nameidata *nd, struct path *path,
1552 if (!inode) 1533 if (!inode)
1553 goto out_path_put; 1534 goto out_path_put;
1554 1535
1555 if (should_follow_link(inode, follow)) { 1536 if (should_follow_link(path->dentry, follow)) {
1556 if (nd->flags & LOOKUP_RCU) { 1537 if (nd->flags & LOOKUP_RCU) {
1557 if (unlikely(unlazy_walk(nd, path->dentry))) { 1538 if (unlikely(unlazy_walk(nd, path->dentry))) {
1558 err = -ECHILD; 1539 err = -ECHILD;
@@ -1611,26 +1592,6 @@ static inline int nested_symlink(struct path *path, struct nameidata *nd)
1611} 1592}
1612 1593
1613/* 1594/*
1614 * We really don't want to look at inode->i_op->lookup
1615 * when we don't have to. So we keep a cache bit in
1616 * the inode ->i_opflags field that says "yes, we can
1617 * do lookup on this inode".
1618 */
1619static inline int can_lookup(struct inode *inode)
1620{
1621 if (likely(inode->i_opflags & IOP_LOOKUP))
1622 return 1;
1623 if (likely(!inode->i_op->lookup))
1624 return 0;
1625
1626 /* We do this once for the lifetime of the inode */
1627 spin_lock(&inode->i_lock);
1628 inode->i_opflags |= IOP_LOOKUP;
1629 spin_unlock(&inode->i_lock);
1630 return 1;
1631}
1632
1633/*
1634 * We can do the critical dentry name comparison and hashing 1595 * We can do the critical dentry name comparison and hashing
1635 * operations one word at a time, but we are limited to: 1596 * operations one word at a time, but we are limited to:
1636 * 1597 *
@@ -1833,7 +1794,7 @@ static int link_path_walk(const char *name, struct nameidata *nd)
1833 if (err) 1794 if (err)
1834 return err; 1795 return err;
1835 } 1796 }
1836 if (!can_lookup(nd->inode)) { 1797 if (!d_is_directory(nd->path.dentry)) {
1837 err = -ENOTDIR; 1798 err = -ENOTDIR;
1838 break; 1799 break;
1839 } 1800 }
@@ -1851,9 +1812,10 @@ static int path_init(int dfd, const char *name, unsigned int flags,
1851 nd->flags = flags | LOOKUP_JUMPED; 1812 nd->flags = flags | LOOKUP_JUMPED;
1852 nd->depth = 0; 1813 nd->depth = 0;
1853 if (flags & LOOKUP_ROOT) { 1814 if (flags & LOOKUP_ROOT) {
1854 struct inode *inode = nd->root.dentry->d_inode; 1815 struct dentry *root = nd->root.dentry;
1816 struct inode *inode = root->d_inode;
1855 if (*name) { 1817 if (*name) {
1856 if (!can_lookup(inode)) 1818 if (!d_is_directory(root))
1857 return -ENOTDIR; 1819 return -ENOTDIR;
1858 retval = inode_permission(inode, MAY_EXEC); 1820 retval = inode_permission(inode, MAY_EXEC);
1859 if (retval) 1821 if (retval)
@@ -1862,8 +1824,9 @@ static int path_init(int dfd, const char *name, unsigned int flags,
1862 nd->path = nd->root; 1824 nd->path = nd->root;
1863 nd->inode = inode; 1825 nd->inode = inode;
1864 if (flags & LOOKUP_RCU) { 1826 if (flags & LOOKUP_RCU) {
1865 lock_rcu_walk(); 1827 rcu_read_lock();
1866 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); 1828 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
1829 nd->m_seq = read_seqbegin(&mount_lock);
1867 } else { 1830 } else {
1868 path_get(&nd->path); 1831 path_get(&nd->path);
1869 } 1832 }
@@ -1872,9 +1835,10 @@ static int path_init(int dfd, const char *name, unsigned int flags,
1872 1835
1873 nd->root.mnt = NULL; 1836 nd->root.mnt = NULL;
1874 1837
1838 nd->m_seq = read_seqbegin(&mount_lock);
1875 if (*name=='/') { 1839 if (*name=='/') {
1876 if (flags & LOOKUP_RCU) { 1840 if (flags & LOOKUP_RCU) {
1877 lock_rcu_walk(); 1841 rcu_read_lock();
1878 set_root_rcu(nd); 1842 set_root_rcu(nd);
1879 } else { 1843 } else {
1880 set_root(nd); 1844 set_root(nd);
@@ -1886,7 +1850,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
1886 struct fs_struct *fs = current->fs; 1850 struct fs_struct *fs = current->fs;
1887 unsigned seq; 1851 unsigned seq;
1888 1852
1889 lock_rcu_walk(); 1853 rcu_read_lock();
1890 1854
1891 do { 1855 do {
1892 seq = read_seqcount_begin(&fs->seq); 1856 seq = read_seqcount_begin(&fs->seq);
@@ -1907,7 +1871,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
1907 dentry = f.file->f_path.dentry; 1871 dentry = f.file->f_path.dentry;
1908 1872
1909 if (*name) { 1873 if (*name) {
1910 if (!can_lookup(dentry->d_inode)) { 1874 if (!d_is_directory(dentry)) {
1911 fdput(f); 1875 fdput(f);
1912 return -ENOTDIR; 1876 return -ENOTDIR;
1913 } 1877 }
@@ -1918,7 +1882,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
1918 if (f.need_put) 1882 if (f.need_put)
1919 *fp = f.file; 1883 *fp = f.file;
1920 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); 1884 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
1921 lock_rcu_walk(); 1885 rcu_read_lock();
1922 } else { 1886 } else {
1923 path_get(&nd->path); 1887 path_get(&nd->path);
1924 fdput(f); 1888 fdput(f);
@@ -1989,7 +1953,7 @@ static int path_lookupat(int dfd, const char *name,
1989 err = complete_walk(nd); 1953 err = complete_walk(nd);
1990 1954
1991 if (!err && nd->flags & LOOKUP_DIRECTORY) { 1955 if (!err && nd->flags & LOOKUP_DIRECTORY) {
1992 if (!can_lookup(nd->inode)) { 1956 if (!d_is_directory(nd->path.dentry)) {
1993 path_put(&nd->path); 1957 path_put(&nd->path);
1994 err = -ENOTDIR; 1958 err = -ENOTDIR;
1995 } 1959 }
@@ -2281,7 +2245,7 @@ done:
2281 } 2245 }
2282 path->dentry = dentry; 2246 path->dentry = dentry;
2283 path->mnt = mntget(nd->path.mnt); 2247 path->mnt = mntget(nd->path.mnt);
2284 if (should_follow_link(dentry->d_inode, nd->flags & LOOKUP_FOLLOW)) 2248 if (should_follow_link(dentry, nd->flags & LOOKUP_FOLLOW))
2285 return 1; 2249 return 1;
2286 follow_mount(path); 2250 follow_mount(path);
2287 error = 0; 2251 error = 0;
@@ -2426,12 +2390,14 @@ static inline int check_sticky(struct inode *dir, struct inode *inode)
2426 * 10. We don't allow removal of NFS sillyrenamed files; it's handled by 2390 * 10. We don't allow removal of NFS sillyrenamed files; it's handled by
2427 * nfs_async_unlink(). 2391 * nfs_async_unlink().
2428 */ 2392 */
2429static int may_delete(struct inode *dir,struct dentry *victim,int isdir) 2393static int may_delete(struct inode *dir, struct dentry *victim, bool isdir)
2430{ 2394{
2395 struct inode *inode = victim->d_inode;
2431 int error; 2396 int error;
2432 2397
2433 if (!victim->d_inode) 2398 if (d_is_negative(victim))
2434 return -ENOENT; 2399 return -ENOENT;
2400 BUG_ON(!inode);
2435 2401
2436 BUG_ON(victim->d_parent->d_inode != dir); 2402 BUG_ON(victim->d_parent->d_inode != dir);
2437 audit_inode_child(dir, victim, AUDIT_TYPE_CHILD_DELETE); 2403 audit_inode_child(dir, victim, AUDIT_TYPE_CHILD_DELETE);
@@ -2441,15 +2407,16 @@ static int may_delete(struct inode *dir,struct dentry *victim,int isdir)
2441 return error; 2407 return error;
2442 if (IS_APPEND(dir)) 2408 if (IS_APPEND(dir))
2443 return -EPERM; 2409 return -EPERM;
2444 if (check_sticky(dir, victim->d_inode)||IS_APPEND(victim->d_inode)|| 2410
2445 IS_IMMUTABLE(victim->d_inode) || IS_SWAPFILE(victim->d_inode)) 2411 if (check_sticky(dir, inode) || IS_APPEND(inode) ||
2412 IS_IMMUTABLE(inode) || IS_SWAPFILE(inode))
2446 return -EPERM; 2413 return -EPERM;
2447 if (isdir) { 2414 if (isdir) {
2448 if (!S_ISDIR(victim->d_inode->i_mode)) 2415 if (!d_is_directory(victim) && !d_is_autodir(victim))
2449 return -ENOTDIR; 2416 return -ENOTDIR;
2450 if (IS_ROOT(victim)) 2417 if (IS_ROOT(victim))
2451 return -EBUSY; 2418 return -EBUSY;
2452 } else if (S_ISDIR(victim->d_inode->i_mode)) 2419 } else if (d_is_directory(victim) || d_is_autodir(victim))
2453 return -EISDIR; 2420 return -EISDIR;
2454 if (IS_DEADDIR(dir)) 2421 if (IS_DEADDIR(dir))
2455 return -ENOENT; 2422 return -ENOENT;
@@ -2983,7 +2950,7 @@ retry_lookup:
2983 /* 2950 /*
2984 * create/update audit record if it already exists. 2951 * create/update audit record if it already exists.
2985 */ 2952 */
2986 if (path->dentry->d_inode) 2953 if (d_is_positive(path->dentry))
2987 audit_inode(name, path->dentry, 0); 2954 audit_inode(name, path->dentry, 0);
2988 2955
2989 /* 2956 /*
@@ -3012,12 +2979,12 @@ retry_lookup:
3012finish_lookup: 2979finish_lookup:
3013 /* we _can_ be in RCU mode here */ 2980 /* we _can_ be in RCU mode here */
3014 error = -ENOENT; 2981 error = -ENOENT;
3015 if (!inode) { 2982 if (d_is_negative(path->dentry)) {
3016 path_to_nameidata(path, nd); 2983 path_to_nameidata(path, nd);
3017 goto out; 2984 goto out;
3018 } 2985 }
3019 2986
3020 if (should_follow_link(inode, !symlink_ok)) { 2987 if (should_follow_link(path->dentry, !symlink_ok)) {
3021 if (nd->flags & LOOKUP_RCU) { 2988 if (nd->flags & LOOKUP_RCU) {
3022 if (unlikely(unlazy_walk(nd, path->dentry))) { 2989 if (unlikely(unlazy_walk(nd, path->dentry))) {
3023 error = -ECHILD; 2990 error = -ECHILD;
@@ -3046,10 +3013,11 @@ finish_open:
3046 } 3013 }
3047 audit_inode(name, nd->path.dentry, 0); 3014 audit_inode(name, nd->path.dentry, 0);
3048 error = -EISDIR; 3015 error = -EISDIR;
3049 if ((open_flag & O_CREAT) && S_ISDIR(nd->inode->i_mode)) 3016 if ((open_flag & O_CREAT) &&
3017 (d_is_directory(nd->path.dentry) || d_is_autodir(nd->path.dentry)))
3050 goto out; 3018 goto out;
3051 error = -ENOTDIR; 3019 error = -ENOTDIR;
3052 if ((nd->flags & LOOKUP_DIRECTORY) && !can_lookup(nd->inode)) 3020 if ((nd->flags & LOOKUP_DIRECTORY) && !d_is_directory(nd->path.dentry))
3053 goto out; 3021 goto out;
3054 if (!S_ISREG(nd->inode->i_mode)) 3022 if (!S_ISREG(nd->inode->i_mode))
3055 will_truncate = false; 3023 will_truncate = false;
@@ -3275,7 +3243,7 @@ struct file *do_file_open_root(struct dentry *dentry, struct vfsmount *mnt,
3275 nd.root.mnt = mnt; 3243 nd.root.mnt = mnt;
3276 nd.root.dentry = dentry; 3244 nd.root.dentry = dentry;
3277 3245
3278 if (dentry->d_inode->i_op->follow_link && op->intent & LOOKUP_OPEN) 3246 if (d_is_symlink(dentry) && op->intent & LOOKUP_OPEN)
3279 return ERR_PTR(-ELOOP); 3247 return ERR_PTR(-ELOOP);
3280 3248
3281 file = path_openat(-1, &filename, &nd, op, flags | LOOKUP_RCU); 3249 file = path_openat(-1, &filename, &nd, op, flags | LOOKUP_RCU);
@@ -3325,8 +3293,9 @@ struct dentry *kern_path_create(int dfd, const char *pathname,
3325 goto unlock; 3293 goto unlock;
3326 3294
3327 error = -EEXIST; 3295 error = -EEXIST;
3328 if (dentry->d_inode) 3296 if (d_is_positive(dentry))
3329 goto fail; 3297 goto fail;
3298
3330 /* 3299 /*
3331 * Special case - lookup gave negative, but... we had foo/bar/ 3300 * Special case - lookup gave negative, but... we had foo/bar/
3332 * From the vfs_mknod() POV we just have a negative dentry - 3301 * From the vfs_mknod() POV we just have a negative dentry -
@@ -3647,8 +3616,27 @@ SYSCALL_DEFINE1(rmdir, const char __user *, pathname)
3647 return do_rmdir(AT_FDCWD, pathname); 3616 return do_rmdir(AT_FDCWD, pathname);
3648} 3617}
3649 3618
3650int vfs_unlink(struct inode *dir, struct dentry *dentry) 3619/**
3620 * vfs_unlink - unlink a filesystem object
3621 * @dir: parent directory
3622 * @dentry: victim
3623 * @delegated_inode: returns victim inode, if the inode is delegated.
3624 *
3625 * The caller must hold dir->i_mutex.
3626 *
3627 * If vfs_unlink discovers a delegation, it will return -EWOULDBLOCK and
3628 * return a reference to the inode in delegated_inode. The caller
3629 * should then break the delegation on that inode and retry. Because
3630 * breaking a delegation may take a long time, the caller should drop
3631 * dir->i_mutex before doing so.
3632 *
3633 * Alternatively, a caller may pass NULL for delegated_inode. This may
3634 * be appropriate for callers that expect the underlying filesystem not
3635 * to be NFS exported.
3636 */
3637int vfs_unlink(struct inode *dir, struct dentry *dentry, struct inode **delegated_inode)
3651{ 3638{
3639 struct inode *target = dentry->d_inode;
3652 int error = may_delete(dir, dentry, 0); 3640 int error = may_delete(dir, dentry, 0);
3653 3641
3654 if (error) 3642 if (error)
@@ -3657,22 +3645,26 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry)
3657 if (!dir->i_op->unlink) 3645 if (!dir->i_op->unlink)
3658 return -EPERM; 3646 return -EPERM;
3659 3647
3660 mutex_lock(&dentry->d_inode->i_mutex); 3648 mutex_lock(&target->i_mutex);
3661 if (d_mountpoint(dentry)) 3649 if (d_mountpoint(dentry))
3662 error = -EBUSY; 3650 error = -EBUSY;
3663 else { 3651 else {
3664 error = security_inode_unlink(dir, dentry); 3652 error = security_inode_unlink(dir, dentry);
3665 if (!error) { 3653 if (!error) {
3654 error = try_break_deleg(target, delegated_inode);
3655 if (error)
3656 goto out;
3666 error = dir->i_op->unlink(dir, dentry); 3657 error = dir->i_op->unlink(dir, dentry);
3667 if (!error) 3658 if (!error)
3668 dont_mount(dentry); 3659 dont_mount(dentry);
3669 } 3660 }
3670 } 3661 }
3671 mutex_unlock(&dentry->d_inode->i_mutex); 3662out:
3663 mutex_unlock(&target->i_mutex);
3672 3664
3673 /* We don't d_delete() NFS sillyrenamed files--they still exist. */ 3665 /* We don't d_delete() NFS sillyrenamed files--they still exist. */
3674 if (!error && !(dentry->d_flags & DCACHE_NFSFS_RENAMED)) { 3666 if (!error && !(dentry->d_flags & DCACHE_NFSFS_RENAMED)) {
3675 fsnotify_link_count(dentry->d_inode); 3667 fsnotify_link_count(target);
3676 d_delete(dentry); 3668 d_delete(dentry);
3677 } 3669 }
3678 3670
@@ -3692,6 +3684,7 @@ static long do_unlinkat(int dfd, const char __user *pathname)
3692 struct dentry *dentry; 3684 struct dentry *dentry;
3693 struct nameidata nd; 3685 struct nameidata nd;
3694 struct inode *inode = NULL; 3686 struct inode *inode = NULL;
3687 struct inode *delegated_inode = NULL;
3695 unsigned int lookup_flags = 0; 3688 unsigned int lookup_flags = 0;
3696retry: 3689retry:
3697 name = user_path_parent(dfd, pathname, &nd, lookup_flags); 3690 name = user_path_parent(dfd, pathname, &nd, lookup_flags);
@@ -3706,7 +3699,7 @@ retry:
3706 error = mnt_want_write(nd.path.mnt); 3699 error = mnt_want_write(nd.path.mnt);
3707 if (error) 3700 if (error)
3708 goto exit1; 3701 goto exit1;
3709 3702retry_deleg:
3710 mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); 3703 mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
3711 dentry = lookup_hash(&nd); 3704 dentry = lookup_hash(&nd);
3712 error = PTR_ERR(dentry); 3705 error = PTR_ERR(dentry);
@@ -3715,19 +3708,25 @@ retry:
3715 if (nd.last.name[nd.last.len]) 3708 if (nd.last.name[nd.last.len])
3716 goto slashes; 3709 goto slashes;
3717 inode = dentry->d_inode; 3710 inode = dentry->d_inode;
3718 if (!inode) 3711 if (d_is_negative(dentry))
3719 goto slashes; 3712 goto slashes;
3720 ihold(inode); 3713 ihold(inode);
3721 error = security_path_unlink(&nd.path, dentry); 3714 error = security_path_unlink(&nd.path, dentry);
3722 if (error) 3715 if (error)
3723 goto exit2; 3716 goto exit2;
3724 error = vfs_unlink(nd.path.dentry->d_inode, dentry); 3717 error = vfs_unlink(nd.path.dentry->d_inode, dentry, &delegated_inode);
3725exit2: 3718exit2:
3726 dput(dentry); 3719 dput(dentry);
3727 } 3720 }
3728 mutex_unlock(&nd.path.dentry->d_inode->i_mutex); 3721 mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
3729 if (inode) 3722 if (inode)
3730 iput(inode); /* truncate the inode here */ 3723 iput(inode); /* truncate the inode here */
3724 inode = NULL;
3725 if (delegated_inode) {
3726 error = break_deleg_wait(&delegated_inode);
3727 if (!error)
3728 goto retry_deleg;
3729 }
3731 mnt_drop_write(nd.path.mnt); 3730 mnt_drop_write(nd.path.mnt);
3732exit1: 3731exit1:
3733 path_put(&nd.path); 3732 path_put(&nd.path);
@@ -3740,8 +3739,12 @@ exit1:
3740 return error; 3739 return error;
3741 3740
3742slashes: 3741slashes:
3743 error = !dentry->d_inode ? -ENOENT : 3742 if (d_is_negative(dentry))
3744 S_ISDIR(dentry->d_inode->i_mode) ? -EISDIR : -ENOTDIR; 3743 error = -ENOENT;
3744 else if (d_is_directory(dentry) || d_is_autodir(dentry))
3745 error = -EISDIR;
3746 else
3747 error = -ENOTDIR;
3745 goto exit2; 3748 goto exit2;
3746} 3749}
3747 3750
@@ -3817,7 +3820,26 @@ SYSCALL_DEFINE2(symlink, const char __user *, oldname, const char __user *, newn
3817 return sys_symlinkat(oldname, AT_FDCWD, newname); 3820 return sys_symlinkat(oldname, AT_FDCWD, newname);
3818} 3821}
3819 3822
3820int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry) 3823/**
3824 * vfs_link - create a new link
3825 * @old_dentry: object to be linked
3826 * @dir: new parent
3827 * @new_dentry: where to create the new link
3828 * @delegated_inode: returns inode needing a delegation break
3829 *
3830 * The caller must hold dir->i_mutex
3831 *
3832 * If vfs_link discovers a delegation on the to-be-linked file in need
3833 * of breaking, it will return -EWOULDBLOCK and return a reference to the
3834 * inode in delegated_inode. The caller should then break the delegation
3835 * and retry. Because breaking a delegation may take a long time, the
3836 * caller should drop the i_mutex before doing so.
3837 *
3838 * Alternatively, a caller may pass NULL for delegated_inode. This may
3839 * be appropriate for callers that expect the underlying filesystem not
3840 * to be NFS exported.
3841 */
3842int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry, struct inode **delegated_inode)
3821{ 3843{
3822 struct inode *inode = old_dentry->d_inode; 3844 struct inode *inode = old_dentry->d_inode;
3823 unsigned max_links = dir->i_sb->s_max_links; 3845 unsigned max_links = dir->i_sb->s_max_links;
@@ -3853,8 +3875,11 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
3853 error = -ENOENT; 3875 error = -ENOENT;
3854 else if (max_links && inode->i_nlink >= max_links) 3876 else if (max_links && inode->i_nlink >= max_links)
3855 error = -EMLINK; 3877 error = -EMLINK;
3856 else 3878 else {
3857 error = dir->i_op->link(old_dentry, dir, new_dentry); 3879 error = try_break_deleg(inode, delegated_inode);
3880 if (!error)
3881 error = dir->i_op->link(old_dentry, dir, new_dentry);
3882 }
3858 3883
3859 if (!error && (inode->i_state & I_LINKABLE)) { 3884 if (!error && (inode->i_state & I_LINKABLE)) {
3860 spin_lock(&inode->i_lock); 3885 spin_lock(&inode->i_lock);
@@ -3881,6 +3906,7 @@ SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname,
3881{ 3906{
3882 struct dentry *new_dentry; 3907 struct dentry *new_dentry;
3883 struct path old_path, new_path; 3908 struct path old_path, new_path;
3909 struct inode *delegated_inode = NULL;
3884 int how = 0; 3910 int how = 0;
3885 int error; 3911 int error;
3886 3912
@@ -3919,9 +3945,14 @@ retry:
3919 error = security_path_link(old_path.dentry, &new_path, new_dentry); 3945 error = security_path_link(old_path.dentry, &new_path, new_dentry);
3920 if (error) 3946 if (error)
3921 goto out_dput; 3947 goto out_dput;
3922 error = vfs_link(old_path.dentry, new_path.dentry->d_inode, new_dentry); 3948 error = vfs_link(old_path.dentry, new_path.dentry->d_inode, new_dentry, &delegated_inode);
3923out_dput: 3949out_dput:
3924 done_path_create(&new_path, new_dentry); 3950 done_path_create(&new_path, new_dentry);
3951 if (delegated_inode) {
3952 error = break_deleg_wait(&delegated_inode);
3953 if (!error)
3954 goto retry;
3955 }
3925 if (retry_estale(error, how)) { 3956 if (retry_estale(error, how)) {
3926 how |= LOOKUP_REVAL; 3957 how |= LOOKUP_REVAL;
3927 goto retry; 3958 goto retry;
@@ -3946,7 +3977,8 @@ SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname
3946 * That's where 4.4 screws up. Current fix: serialization on 3977 * That's where 4.4 screws up. Current fix: serialization on
3947 * sb->s_vfs_rename_mutex. We might be more accurate, but that's another 3978 * sb->s_vfs_rename_mutex. We might be more accurate, but that's another
3948 * story. 3979 * story.
3949 * c) we have to lock _three_ objects - parents and victim (if it exists). 3980 * c) we have to lock _four_ objects - parents and victim (if it exists),
3981 * and source (if it is not a directory).
3950 * And that - after we got ->i_mutex on parents (until then we don't know 3982 * And that - after we got ->i_mutex on parents (until then we don't know
3951 * whether the target exists). Solution: try to be smart with locking 3983 * whether the target exists). Solution: try to be smart with locking
3952 * order for inodes. We rely on the fact that tree topology may change 3984 * order for inodes. We rely on the fact that tree topology may change
@@ -4019,9 +4051,11 @@ out:
4019} 4051}
4020 4052
4021static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry, 4053static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
4022 struct inode *new_dir, struct dentry *new_dentry) 4054 struct inode *new_dir, struct dentry *new_dentry,
4055 struct inode **delegated_inode)
4023{ 4056{
4024 struct inode *target = new_dentry->d_inode; 4057 struct inode *target = new_dentry->d_inode;
4058 struct inode *source = old_dentry->d_inode;
4025 int error; 4059 int error;
4026 4060
4027 error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry); 4061 error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry);
@@ -4029,13 +4063,20 @@ static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
4029 return error; 4063 return error;
4030 4064
4031 dget(new_dentry); 4065 dget(new_dentry);
4032 if (target) 4066 lock_two_nondirectories(source, target);
4033 mutex_lock(&target->i_mutex);
4034 4067
4035 error = -EBUSY; 4068 error = -EBUSY;
4036 if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) 4069 if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry))
4037 goto out; 4070 goto out;
4038 4071
4072 error = try_break_deleg(source, delegated_inode);
4073 if (error)
4074 goto out;
4075 if (target) {
4076 error = try_break_deleg(target, delegated_inode);
4077 if (error)
4078 goto out;
4079 }
4039 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); 4080 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
4040 if (error) 4081 if (error)
4041 goto out; 4082 goto out;
@@ -4045,17 +4086,38 @@ static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
4045 if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) 4086 if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE))
4046 d_move(old_dentry, new_dentry); 4087 d_move(old_dentry, new_dentry);
4047out: 4088out:
4048 if (target) 4089 unlock_two_nondirectories(source, target);
4049 mutex_unlock(&target->i_mutex);
4050 dput(new_dentry); 4090 dput(new_dentry);
4051 return error; 4091 return error;
4052} 4092}
4053 4093
4094/**
4095 * vfs_rename - rename a filesystem object
4096 * @old_dir: parent of source
4097 * @old_dentry: source
4098 * @new_dir: parent of destination
4099 * @new_dentry: destination
4100 * @delegated_inode: returns an inode needing a delegation break
4101 *
4102 * The caller must hold multiple mutexes--see lock_rename()).
4103 *
4104 * If vfs_rename discovers a delegation in need of breaking at either
4105 * the source or destination, it will return -EWOULDBLOCK and return a
4106 * reference to the inode in delegated_inode. The caller should then
4107 * break the delegation and retry. Because breaking a delegation may
4108 * take a long time, the caller should drop all locks before doing
4109 * so.
4110 *
4111 * Alternatively, a caller may pass NULL for delegated_inode. This may
4112 * be appropriate for callers that expect the underlying filesystem not
4113 * to be NFS exported.
4114 */
4054int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, 4115int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
4055 struct inode *new_dir, struct dentry *new_dentry) 4116 struct inode *new_dir, struct dentry *new_dentry,
4117 struct inode **delegated_inode)
4056{ 4118{
4057 int error; 4119 int error;
4058 int is_dir = S_ISDIR(old_dentry->d_inode->i_mode); 4120 int is_dir = d_is_directory(old_dentry) || d_is_autodir(old_dentry);
4059 const unsigned char *old_name; 4121 const unsigned char *old_name;
4060 4122
4061 if (old_dentry->d_inode == new_dentry->d_inode) 4123 if (old_dentry->d_inode == new_dentry->d_inode)
@@ -4080,7 +4142,7 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
4080 if (is_dir) 4142 if (is_dir)
4081 error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry); 4143 error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry);
4082 else 4144 else
4083 error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry); 4145 error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry,delegated_inode);
4084 if (!error) 4146 if (!error)
4085 fsnotify_move(old_dir, new_dir, old_name, is_dir, 4147 fsnotify_move(old_dir, new_dir, old_name, is_dir,
4086 new_dentry->d_inode, old_dentry); 4148 new_dentry->d_inode, old_dentry);
@@ -4096,6 +4158,7 @@ SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname,
4096 struct dentry *old_dentry, *new_dentry; 4158 struct dentry *old_dentry, *new_dentry;
4097 struct dentry *trap; 4159 struct dentry *trap;
4098 struct nameidata oldnd, newnd; 4160 struct nameidata oldnd, newnd;
4161 struct inode *delegated_inode = NULL;
4099 struct filename *from; 4162 struct filename *from;
4100 struct filename *to; 4163 struct filename *to;
4101 unsigned int lookup_flags = 0; 4164 unsigned int lookup_flags = 0;
@@ -4135,6 +4198,7 @@ retry:
4135 newnd.flags &= ~LOOKUP_PARENT; 4198 newnd.flags &= ~LOOKUP_PARENT;
4136 newnd.flags |= LOOKUP_RENAME_TARGET; 4199 newnd.flags |= LOOKUP_RENAME_TARGET;
4137 4200
4201retry_deleg:
4138 trap = lock_rename(new_dir, old_dir); 4202 trap = lock_rename(new_dir, old_dir);
4139 4203
4140 old_dentry = lookup_hash(&oldnd); 4204 old_dentry = lookup_hash(&oldnd);
@@ -4143,10 +4207,10 @@ retry:
4143 goto exit3; 4207 goto exit3;
4144 /* source must exist */ 4208 /* source must exist */
4145 error = -ENOENT; 4209 error = -ENOENT;
4146 if (!old_dentry->d_inode) 4210 if (d_is_negative(old_dentry))
4147 goto exit4; 4211 goto exit4;
4148 /* unless the source is a directory trailing slashes give -ENOTDIR */ 4212 /* unless the source is a directory trailing slashes give -ENOTDIR */
4149 if (!S_ISDIR(old_dentry->d_inode->i_mode)) { 4213 if (!d_is_directory(old_dentry) && !d_is_autodir(old_dentry)) {
4150 error = -ENOTDIR; 4214 error = -ENOTDIR;
4151 if (oldnd.last.name[oldnd.last.len]) 4215 if (oldnd.last.name[oldnd.last.len])
4152 goto exit4; 4216 goto exit4;
@@ -4171,13 +4235,19 @@ retry:
4171 if (error) 4235 if (error)
4172 goto exit5; 4236 goto exit5;
4173 error = vfs_rename(old_dir->d_inode, old_dentry, 4237 error = vfs_rename(old_dir->d_inode, old_dentry,
4174 new_dir->d_inode, new_dentry); 4238 new_dir->d_inode, new_dentry,
4239 &delegated_inode);
4175exit5: 4240exit5:
4176 dput(new_dentry); 4241 dput(new_dentry);
4177exit4: 4242exit4:
4178 dput(old_dentry); 4243 dput(old_dentry);
4179exit3: 4244exit3:
4180 unlock_rename(new_dir, old_dir); 4245 unlock_rename(new_dir, old_dir);
4246 if (delegated_inode) {
4247 error = break_deleg_wait(&delegated_inode);
4248 if (!error)
4249 goto retry_deleg;
4250 }
4181 mnt_drop_write(oldnd.path.mnt); 4251 mnt_drop_write(oldnd.path.mnt);
4182exit2: 4252exit2:
4183 if (retry_estale(error, lookup_flags)) 4253 if (retry_estale(error, lookup_flags))
diff --git a/fs/namespace.c b/fs/namespace.c
index da5c49483430..ac2ce8a766e1 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -39,7 +39,7 @@ static int mnt_group_start = 1;
39static struct list_head *mount_hashtable __read_mostly; 39static struct list_head *mount_hashtable __read_mostly;
40static struct list_head *mountpoint_hashtable __read_mostly; 40static struct list_head *mountpoint_hashtable __read_mostly;
41static struct kmem_cache *mnt_cache __read_mostly; 41static struct kmem_cache *mnt_cache __read_mostly;
42static struct rw_semaphore namespace_sem; 42static DECLARE_RWSEM(namespace_sem);
43 43
44/* /sys/fs */ 44/* /sys/fs */
45struct kobject *fs_kobj; 45struct kobject *fs_kobj;
@@ -53,7 +53,7 @@ EXPORT_SYMBOL_GPL(fs_kobj);
53 * It should be taken for write in all cases where the vfsmount 53 * It should be taken for write in all cases where the vfsmount
54 * tree or hash is modified or when a vfsmount structure is modified. 54 * tree or hash is modified or when a vfsmount structure is modified.
55 */ 55 */
56DEFINE_BRLOCK(vfsmount_lock); 56__cacheline_aligned_in_smp DEFINE_SEQLOCK(mount_lock);
57 57
58static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry) 58static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry)
59{ 59{
@@ -63,8 +63,6 @@ static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry)
63 return tmp & (HASH_SIZE - 1); 63 return tmp & (HASH_SIZE - 1);
64} 64}
65 65
66#define MNT_WRITER_UNDERFLOW_LIMIT -(1<<16)
67
68/* 66/*
69 * allocation is serialized by namespace_sem, but we need the spinlock to 67 * allocation is serialized by namespace_sem, but we need the spinlock to
70 * serialize with freeing. 68 * serialize with freeing.
@@ -458,7 +456,7 @@ static int mnt_make_readonly(struct mount *mnt)
458{ 456{
459 int ret = 0; 457 int ret = 0;
460 458
461 br_write_lock(&vfsmount_lock); 459 lock_mount_hash();
462 mnt->mnt.mnt_flags |= MNT_WRITE_HOLD; 460 mnt->mnt.mnt_flags |= MNT_WRITE_HOLD;
463 /* 461 /*
464 * After storing MNT_WRITE_HOLD, we'll read the counters. This store 462 * After storing MNT_WRITE_HOLD, we'll read the counters. This store
@@ -492,15 +490,15 @@ static int mnt_make_readonly(struct mount *mnt)
492 */ 490 */
493 smp_wmb(); 491 smp_wmb();
494 mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD; 492 mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD;
495 br_write_unlock(&vfsmount_lock); 493 unlock_mount_hash();
496 return ret; 494 return ret;
497} 495}
498 496
499static void __mnt_unmake_readonly(struct mount *mnt) 497static void __mnt_unmake_readonly(struct mount *mnt)
500{ 498{
501 br_write_lock(&vfsmount_lock); 499 lock_mount_hash();
502 mnt->mnt.mnt_flags &= ~MNT_READONLY; 500 mnt->mnt.mnt_flags &= ~MNT_READONLY;
503 br_write_unlock(&vfsmount_lock); 501 unlock_mount_hash();
504} 502}
505 503
506int sb_prepare_remount_readonly(struct super_block *sb) 504int sb_prepare_remount_readonly(struct super_block *sb)
@@ -512,7 +510,7 @@ int sb_prepare_remount_readonly(struct super_block *sb)
512 if (atomic_long_read(&sb->s_remove_count)) 510 if (atomic_long_read(&sb->s_remove_count))
513 return -EBUSY; 511 return -EBUSY;
514 512
515 br_write_lock(&vfsmount_lock); 513 lock_mount_hash();
516 list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) { 514 list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) {
517 if (!(mnt->mnt.mnt_flags & MNT_READONLY)) { 515 if (!(mnt->mnt.mnt_flags & MNT_READONLY)) {
518 mnt->mnt.mnt_flags |= MNT_WRITE_HOLD; 516 mnt->mnt.mnt_flags |= MNT_WRITE_HOLD;
@@ -534,7 +532,7 @@ int sb_prepare_remount_readonly(struct super_block *sb)
534 if (mnt->mnt.mnt_flags & MNT_WRITE_HOLD) 532 if (mnt->mnt.mnt_flags & MNT_WRITE_HOLD)
535 mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD; 533 mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD;
536 } 534 }
537 br_write_unlock(&vfsmount_lock); 535 unlock_mount_hash();
538 536
539 return err; 537 return err;
540} 538}
@@ -549,30 +547,56 @@ static void free_vfsmnt(struct mount *mnt)
549 kmem_cache_free(mnt_cache, mnt); 547 kmem_cache_free(mnt_cache, mnt);
550} 548}
551 549
550/* call under rcu_read_lock */
551bool legitimize_mnt(struct vfsmount *bastard, unsigned seq)
552{
553 struct mount *mnt;
554 if (read_seqretry(&mount_lock, seq))
555 return false;
556 if (bastard == NULL)
557 return true;
558 mnt = real_mount(bastard);
559 mnt_add_count(mnt, 1);
560 if (likely(!read_seqretry(&mount_lock, seq)))
561 return true;
562 if (bastard->mnt_flags & MNT_SYNC_UMOUNT) {
563 mnt_add_count(mnt, -1);
564 return false;
565 }
566 rcu_read_unlock();
567 mntput(bastard);
568 rcu_read_lock();
569 return false;
570}
571
552/* 572/*
553 * find the first or last mount at @dentry on vfsmount @mnt depending on 573 * find the first mount at @dentry on vfsmount @mnt.
554 * @dir. If @dir is set return the first mount else return the last mount. 574 * call under rcu_read_lock()
555 * vfsmount_lock must be held for read or write.
556 */ 575 */
557struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry, 576struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry)
558 int dir)
559{ 577{
560 struct list_head *head = mount_hashtable + hash(mnt, dentry); 578 struct list_head *head = mount_hashtable + hash(mnt, dentry);
561 struct list_head *tmp = head; 579 struct mount *p;
562 struct mount *p, *found = NULL;
563 580
564 for (;;) { 581 list_for_each_entry_rcu(p, head, mnt_hash)
565 tmp = dir ? tmp->next : tmp->prev; 582 if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry)
566 p = NULL; 583 return p;
567 if (tmp == head) 584 return NULL;
568 break; 585}
569 p = list_entry(tmp, struct mount, mnt_hash); 586
570 if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry) { 587/*
571 found = p; 588 * find the last mount at @dentry on vfsmount @mnt.
572 break; 589 * mount_lock must be held.
573 } 590 */
574 } 591struct mount *__lookup_mnt_last(struct vfsmount *mnt, struct dentry *dentry)
575 return found; 592{
593 struct list_head *head = mount_hashtable + hash(mnt, dentry);
594 struct mount *p;
595
596 list_for_each_entry_reverse(p, head, mnt_hash)
597 if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry)
598 return p;
599 return NULL;
576} 600}
577 601
578/* 602/*
@@ -594,17 +618,17 @@ struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry,
594struct vfsmount *lookup_mnt(struct path *path) 618struct vfsmount *lookup_mnt(struct path *path)
595{ 619{
596 struct mount *child_mnt; 620 struct mount *child_mnt;
621 struct vfsmount *m;
622 unsigned seq;
597 623
598 br_read_lock(&vfsmount_lock); 624 rcu_read_lock();
599 child_mnt = __lookup_mnt(path->mnt, path->dentry, 1); 625 do {
600 if (child_mnt) { 626 seq = read_seqbegin(&mount_lock);
601 mnt_add_count(child_mnt, 1); 627 child_mnt = __lookup_mnt(path->mnt, path->dentry);
602 br_read_unlock(&vfsmount_lock); 628 m = child_mnt ? &child_mnt->mnt : NULL;
603 return &child_mnt->mnt; 629 } while (!legitimize_mnt(m, seq));
604 } else { 630 rcu_read_unlock();
605 br_read_unlock(&vfsmount_lock); 631 return m;
606 return NULL;
607 }
608} 632}
609 633
610static struct mountpoint *new_mountpoint(struct dentry *dentry) 634static struct mountpoint *new_mountpoint(struct dentry *dentry)
@@ -796,9 +820,9 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void
796 mnt->mnt.mnt_sb = root->d_sb; 820 mnt->mnt.mnt_sb = root->d_sb;
797 mnt->mnt_mountpoint = mnt->mnt.mnt_root; 821 mnt->mnt_mountpoint = mnt->mnt.mnt_root;
798 mnt->mnt_parent = mnt; 822 mnt->mnt_parent = mnt;
799 br_write_lock(&vfsmount_lock); 823 lock_mount_hash();
800 list_add_tail(&mnt->mnt_instance, &root->d_sb->s_mounts); 824 list_add_tail(&mnt->mnt_instance, &root->d_sb->s_mounts);
801 br_write_unlock(&vfsmount_lock); 825 unlock_mount_hash();
802 return &mnt->mnt; 826 return &mnt->mnt;
803} 827}
804EXPORT_SYMBOL_GPL(vfs_kern_mount); 828EXPORT_SYMBOL_GPL(vfs_kern_mount);
@@ -839,9 +863,9 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
839 mnt->mnt.mnt_root = dget(root); 863 mnt->mnt.mnt_root = dget(root);
840 mnt->mnt_mountpoint = mnt->mnt.mnt_root; 864 mnt->mnt_mountpoint = mnt->mnt.mnt_root;
841 mnt->mnt_parent = mnt; 865 mnt->mnt_parent = mnt;
842 br_write_lock(&vfsmount_lock); 866 lock_mount_hash();
843 list_add_tail(&mnt->mnt_instance, &sb->s_mounts); 867 list_add_tail(&mnt->mnt_instance, &sb->s_mounts);
844 br_write_unlock(&vfsmount_lock); 868 unlock_mount_hash();
845 869
846 if ((flag & CL_SLAVE) || 870 if ((flag & CL_SLAVE) ||
847 ((flag & CL_SHARED_TO_SLAVE) && IS_MNT_SHARED(old))) { 871 ((flag & CL_SHARED_TO_SLAVE) && IS_MNT_SHARED(old))) {
@@ -872,64 +896,66 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
872 return ERR_PTR(err); 896 return ERR_PTR(err);
873} 897}
874 898
875static inline void mntfree(struct mount *mnt) 899static void delayed_free(struct rcu_head *head)
876{ 900{
877 struct vfsmount *m = &mnt->mnt; 901 struct mount *mnt = container_of(head, struct mount, mnt_rcu);
878 struct super_block *sb = m->mnt_sb; 902 kfree(mnt->mnt_devname);
879 903#ifdef CONFIG_SMP
880 /* 904 free_percpu(mnt->mnt_pcp);
881 * This probably indicates that somebody messed 905#endif
882 * up a mnt_want/drop_write() pair. If this 906 kmem_cache_free(mnt_cache, mnt);
883 * happens, the filesystem was probably unable
884 * to make r/w->r/o transitions.
885 */
886 /*
887 * The locking used to deal with mnt_count decrement provides barriers,
888 * so mnt_get_writers() below is safe.
889 */
890 WARN_ON(mnt_get_writers(mnt));
891 fsnotify_vfsmount_delete(m);
892 dput(m->mnt_root);
893 free_vfsmnt(mnt);
894 deactivate_super(sb);
895} 907}
896 908
897static void mntput_no_expire(struct mount *mnt) 909static void mntput_no_expire(struct mount *mnt)
898{ 910{
899put_again: 911put_again:
900#ifdef CONFIG_SMP 912 rcu_read_lock();
901 br_read_lock(&vfsmount_lock); 913 mnt_add_count(mnt, -1);
902 if (likely(mnt->mnt_ns)) { 914 if (likely(mnt->mnt_ns)) { /* shouldn't be the last one */
903 /* shouldn't be the last one */ 915 rcu_read_unlock();
904 mnt_add_count(mnt, -1);
905 br_read_unlock(&vfsmount_lock);
906 return; 916 return;
907 } 917 }
908 br_read_unlock(&vfsmount_lock); 918 lock_mount_hash();
909
910 br_write_lock(&vfsmount_lock);
911 mnt_add_count(mnt, -1);
912 if (mnt_get_count(mnt)) { 919 if (mnt_get_count(mnt)) {
913 br_write_unlock(&vfsmount_lock); 920 rcu_read_unlock();
921 unlock_mount_hash();
914 return; 922 return;
915 } 923 }
916#else
917 mnt_add_count(mnt, -1);
918 if (likely(mnt_get_count(mnt)))
919 return;
920 br_write_lock(&vfsmount_lock);
921#endif
922 if (unlikely(mnt->mnt_pinned)) { 924 if (unlikely(mnt->mnt_pinned)) {
923 mnt_add_count(mnt, mnt->mnt_pinned + 1); 925 mnt_add_count(mnt, mnt->mnt_pinned + 1);
924 mnt->mnt_pinned = 0; 926 mnt->mnt_pinned = 0;
925 br_write_unlock(&vfsmount_lock); 927 rcu_read_unlock();
928 unlock_mount_hash();
926 acct_auto_close_mnt(&mnt->mnt); 929 acct_auto_close_mnt(&mnt->mnt);
927 goto put_again; 930 goto put_again;
928 } 931 }
932 if (unlikely(mnt->mnt.mnt_flags & MNT_DOOMED)) {
933 rcu_read_unlock();
934 unlock_mount_hash();
935 return;
936 }
937 mnt->mnt.mnt_flags |= MNT_DOOMED;
938 rcu_read_unlock();
929 939
930 list_del(&mnt->mnt_instance); 940 list_del(&mnt->mnt_instance);
931 br_write_unlock(&vfsmount_lock); 941 unlock_mount_hash();
932 mntfree(mnt); 942
943 /*
944 * This probably indicates that somebody messed
945 * up a mnt_want/drop_write() pair. If this
946 * happens, the filesystem was probably unable
947 * to make r/w->r/o transitions.
948 */
949 /*
950 * The locking used to deal with mnt_count decrement provides barriers,
951 * so mnt_get_writers() below is safe.
952 */
953 WARN_ON(mnt_get_writers(mnt));
954 fsnotify_vfsmount_delete(&mnt->mnt);
955 dput(mnt->mnt.mnt_root);
956 deactivate_super(mnt->mnt.mnt_sb);
957 mnt_free_id(mnt);
958 call_rcu(&mnt->mnt_rcu, delayed_free);
933} 959}
934 960
935void mntput(struct vfsmount *mnt) 961void mntput(struct vfsmount *mnt)
@@ -954,21 +980,21 @@ EXPORT_SYMBOL(mntget);
954 980
955void mnt_pin(struct vfsmount *mnt) 981void mnt_pin(struct vfsmount *mnt)
956{ 982{
957 br_write_lock(&vfsmount_lock); 983 lock_mount_hash();
958 real_mount(mnt)->mnt_pinned++; 984 real_mount(mnt)->mnt_pinned++;
959 br_write_unlock(&vfsmount_lock); 985 unlock_mount_hash();
960} 986}
961EXPORT_SYMBOL(mnt_pin); 987EXPORT_SYMBOL(mnt_pin);
962 988
963void mnt_unpin(struct vfsmount *m) 989void mnt_unpin(struct vfsmount *m)
964{ 990{
965 struct mount *mnt = real_mount(m); 991 struct mount *mnt = real_mount(m);
966 br_write_lock(&vfsmount_lock); 992 lock_mount_hash();
967 if (mnt->mnt_pinned) { 993 if (mnt->mnt_pinned) {
968 mnt_add_count(mnt, 1); 994 mnt_add_count(mnt, 1);
969 mnt->mnt_pinned--; 995 mnt->mnt_pinned--;
970 } 996 }
971 br_write_unlock(&vfsmount_lock); 997 unlock_mount_hash();
972} 998}
973EXPORT_SYMBOL(mnt_unpin); 999EXPORT_SYMBOL(mnt_unpin);
974 1000
@@ -1085,12 +1111,12 @@ int may_umount_tree(struct vfsmount *m)
1085 BUG_ON(!m); 1111 BUG_ON(!m);
1086 1112
1087 /* write lock needed for mnt_get_count */ 1113 /* write lock needed for mnt_get_count */
1088 br_write_lock(&vfsmount_lock); 1114 lock_mount_hash();
1089 for (p = mnt; p; p = next_mnt(p, mnt)) { 1115 for (p = mnt; p; p = next_mnt(p, mnt)) {
1090 actual_refs += mnt_get_count(p); 1116 actual_refs += mnt_get_count(p);
1091 minimum_refs += 2; 1117 minimum_refs += 2;
1092 } 1118 }
1093 br_write_unlock(&vfsmount_lock); 1119 unlock_mount_hash();
1094 1120
1095 if (actual_refs > minimum_refs) 1121 if (actual_refs > minimum_refs)
1096 return 0; 1122 return 0;
@@ -1117,10 +1143,10 @@ int may_umount(struct vfsmount *mnt)
1117{ 1143{
1118 int ret = 1; 1144 int ret = 1;
1119 down_read(&namespace_sem); 1145 down_read(&namespace_sem);
1120 br_write_lock(&vfsmount_lock); 1146 lock_mount_hash();
1121 if (propagate_mount_busy(real_mount(mnt), 2)) 1147 if (propagate_mount_busy(real_mount(mnt), 2))
1122 ret = 0; 1148 ret = 0;
1123 br_write_unlock(&vfsmount_lock); 1149 unlock_mount_hash();
1124 up_read(&namespace_sem); 1150 up_read(&namespace_sem);
1125 return ret; 1151 return ret;
1126} 1152}
@@ -1142,23 +1168,13 @@ static void namespace_unlock(void)
1142 list_splice_init(&unmounted, &head); 1168 list_splice_init(&unmounted, &head);
1143 up_write(&namespace_sem); 1169 up_write(&namespace_sem);
1144 1170
1171 synchronize_rcu();
1172
1145 while (!list_empty(&head)) { 1173 while (!list_empty(&head)) {
1146 mnt = list_first_entry(&head, struct mount, mnt_hash); 1174 mnt = list_first_entry(&head, struct mount, mnt_hash);
1147 list_del_init(&mnt->mnt_hash); 1175 list_del_init(&mnt->mnt_hash);
1148 if (mnt_has_parent(mnt)) { 1176 if (mnt->mnt_ex_mountpoint.mnt)
1149 struct dentry *dentry; 1177 path_put(&mnt->mnt_ex_mountpoint);
1150 struct mount *m;
1151
1152 br_write_lock(&vfsmount_lock);
1153 dentry = mnt->mnt_mountpoint;
1154 m = mnt->mnt_parent;
1155 mnt->mnt_mountpoint = mnt->mnt.mnt_root;
1156 mnt->mnt_parent = mnt;
1157 m->mnt_ghosts--;
1158 br_write_unlock(&vfsmount_lock);
1159 dput(dentry);
1160 mntput(&m->mnt);
1161 }
1162 mntput(&mnt->mnt); 1178 mntput(&mnt->mnt);
1163 } 1179 }
1164} 1180}
@@ -1169,10 +1185,13 @@ static inline void namespace_lock(void)
1169} 1185}
1170 1186
1171/* 1187/*
1172 * vfsmount lock must be held for write 1188 * mount_lock must be held
1173 * namespace_sem must be held for write 1189 * namespace_sem must be held for write
1190 * how = 0 => just this tree, don't propagate
1191 * how = 1 => propagate; we know that nobody else has reference to any victims
1192 * how = 2 => lazy umount
1174 */ 1193 */
1175void umount_tree(struct mount *mnt, int propagate) 1194void umount_tree(struct mount *mnt, int how)
1176{ 1195{
1177 LIST_HEAD(tmp_list); 1196 LIST_HEAD(tmp_list);
1178 struct mount *p; 1197 struct mount *p;
@@ -1180,7 +1199,7 @@ void umount_tree(struct mount *mnt, int propagate)
1180 for (p = mnt; p; p = next_mnt(p, mnt)) 1199 for (p = mnt; p; p = next_mnt(p, mnt))
1181 list_move(&p->mnt_hash, &tmp_list); 1200 list_move(&p->mnt_hash, &tmp_list);
1182 1201
1183 if (propagate) 1202 if (how)
1184 propagate_umount(&tmp_list); 1203 propagate_umount(&tmp_list);
1185 1204
1186 list_for_each_entry(p, &tmp_list, mnt_hash) { 1205 list_for_each_entry(p, &tmp_list, mnt_hash) {
@@ -1188,10 +1207,16 @@ void umount_tree(struct mount *mnt, int propagate)
1188 list_del_init(&p->mnt_list); 1207 list_del_init(&p->mnt_list);
1189 __touch_mnt_namespace(p->mnt_ns); 1208 __touch_mnt_namespace(p->mnt_ns);
1190 p->mnt_ns = NULL; 1209 p->mnt_ns = NULL;
1210 if (how < 2)
1211 p->mnt.mnt_flags |= MNT_SYNC_UMOUNT;
1191 list_del_init(&p->mnt_child); 1212 list_del_init(&p->mnt_child);
1192 if (mnt_has_parent(p)) { 1213 if (mnt_has_parent(p)) {
1193 p->mnt_parent->mnt_ghosts++;
1194 put_mountpoint(p->mnt_mp); 1214 put_mountpoint(p->mnt_mp);
1215 /* move the reference to mountpoint into ->mnt_ex_mountpoint */
1216 p->mnt_ex_mountpoint.dentry = p->mnt_mountpoint;
1217 p->mnt_ex_mountpoint.mnt = &p->mnt_parent->mnt;
1218 p->mnt_mountpoint = p->mnt.mnt_root;
1219 p->mnt_parent = p;
1195 p->mnt_mp = NULL; 1220 p->mnt_mp = NULL;
1196 } 1221 }
1197 change_mnt_propagation(p, MS_PRIVATE); 1222 change_mnt_propagation(p, MS_PRIVATE);
@@ -1225,12 +1250,12 @@ static int do_umount(struct mount *mnt, int flags)
1225 * probably don't strictly need the lock here if we examined 1250 * probably don't strictly need the lock here if we examined
1226 * all race cases, but it's a slowpath. 1251 * all race cases, but it's a slowpath.
1227 */ 1252 */
1228 br_write_lock(&vfsmount_lock); 1253 lock_mount_hash();
1229 if (mnt_get_count(mnt) != 2) { 1254 if (mnt_get_count(mnt) != 2) {
1230 br_write_unlock(&vfsmount_lock); 1255 unlock_mount_hash();
1231 return -EBUSY; 1256 return -EBUSY;
1232 } 1257 }
1233 br_write_unlock(&vfsmount_lock); 1258 unlock_mount_hash();
1234 1259
1235 if (!xchg(&mnt->mnt_expiry_mark, 1)) 1260 if (!xchg(&mnt->mnt_expiry_mark, 1))
1236 return -EAGAIN; 1261 return -EAGAIN;
@@ -1272,19 +1297,23 @@ static int do_umount(struct mount *mnt, int flags)
1272 } 1297 }
1273 1298
1274 namespace_lock(); 1299 namespace_lock();
1275 br_write_lock(&vfsmount_lock); 1300 lock_mount_hash();
1276 event++; 1301 event++;
1277 1302
1278 if (!(flags & MNT_DETACH)) 1303 if (flags & MNT_DETACH) {
1279 shrink_submounts(mnt);
1280
1281 retval = -EBUSY;
1282 if (flags & MNT_DETACH || !propagate_mount_busy(mnt, 2)) {
1283 if (!list_empty(&mnt->mnt_list)) 1304 if (!list_empty(&mnt->mnt_list))
1284 umount_tree(mnt, 1); 1305 umount_tree(mnt, 2);
1285 retval = 0; 1306 retval = 0;
1307 } else {
1308 shrink_submounts(mnt);
1309 retval = -EBUSY;
1310 if (!propagate_mount_busy(mnt, 2)) {
1311 if (!list_empty(&mnt->mnt_list))
1312 umount_tree(mnt, 1);
1313 retval = 0;
1314 }
1286 } 1315 }
1287 br_write_unlock(&vfsmount_lock); 1316 unlock_mount_hash();
1288 namespace_unlock(); 1317 namespace_unlock();
1289 return retval; 1318 return retval;
1290} 1319}
@@ -1427,18 +1456,18 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
1427 q = clone_mnt(p, p->mnt.mnt_root, flag); 1456 q = clone_mnt(p, p->mnt.mnt_root, flag);
1428 if (IS_ERR(q)) 1457 if (IS_ERR(q))
1429 goto out; 1458 goto out;
1430 br_write_lock(&vfsmount_lock); 1459 lock_mount_hash();
1431 list_add_tail(&q->mnt_list, &res->mnt_list); 1460 list_add_tail(&q->mnt_list, &res->mnt_list);
1432 attach_mnt(q, parent, p->mnt_mp); 1461 attach_mnt(q, parent, p->mnt_mp);
1433 br_write_unlock(&vfsmount_lock); 1462 unlock_mount_hash();
1434 } 1463 }
1435 } 1464 }
1436 return res; 1465 return res;
1437out: 1466out:
1438 if (res) { 1467 if (res) {
1439 br_write_lock(&vfsmount_lock); 1468 lock_mount_hash();
1440 umount_tree(res, 0); 1469 umount_tree(res, 0);
1441 br_write_unlock(&vfsmount_lock); 1470 unlock_mount_hash();
1442 } 1471 }
1443 return q; 1472 return q;
1444} 1473}
@@ -1460,9 +1489,9 @@ struct vfsmount *collect_mounts(struct path *path)
1460void drop_collected_mounts(struct vfsmount *mnt) 1489void drop_collected_mounts(struct vfsmount *mnt)
1461{ 1490{
1462 namespace_lock(); 1491 namespace_lock();
1463 br_write_lock(&vfsmount_lock); 1492 lock_mount_hash();
1464 umount_tree(real_mount(mnt), 0); 1493 umount_tree(real_mount(mnt), 0);
1465 br_write_unlock(&vfsmount_lock); 1494 unlock_mount_hash();
1466 namespace_unlock(); 1495 namespace_unlock();
1467} 1496}
1468 1497
@@ -1589,7 +1618,7 @@ static int attach_recursive_mnt(struct mount *source_mnt,
1589 if (err) 1618 if (err)
1590 goto out_cleanup_ids; 1619 goto out_cleanup_ids;
1591 1620
1592 br_write_lock(&vfsmount_lock); 1621 lock_mount_hash();
1593 1622
1594 if (IS_MNT_SHARED(dest_mnt)) { 1623 if (IS_MNT_SHARED(dest_mnt)) {
1595 for (p = source_mnt; p; p = next_mnt(p, source_mnt)) 1624 for (p = source_mnt; p; p = next_mnt(p, source_mnt))
@@ -1608,7 +1637,7 @@ static int attach_recursive_mnt(struct mount *source_mnt,
1608 list_del_init(&child->mnt_hash); 1637 list_del_init(&child->mnt_hash);
1609 commit_tree(child); 1638 commit_tree(child);
1610 } 1639 }
1611 br_write_unlock(&vfsmount_lock); 1640 unlock_mount_hash();
1612 1641
1613 return 0; 1642 return 0;
1614 1643
@@ -1710,10 +1739,10 @@ static int do_change_type(struct path *path, int flag)
1710 goto out_unlock; 1739 goto out_unlock;
1711 } 1740 }
1712 1741
1713 br_write_lock(&vfsmount_lock); 1742 lock_mount_hash();
1714 for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL)) 1743 for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL))
1715 change_mnt_propagation(m, type); 1744 change_mnt_propagation(m, type);
1716 br_write_unlock(&vfsmount_lock); 1745 unlock_mount_hash();
1717 1746
1718 out_unlock: 1747 out_unlock:
1719 namespace_unlock(); 1748 namespace_unlock();
@@ -1785,9 +1814,9 @@ static int do_loopback(struct path *path, const char *old_name,
1785 1814
1786 err = graft_tree(mnt, parent, mp); 1815 err = graft_tree(mnt, parent, mp);
1787 if (err) { 1816 if (err) {
1788 br_write_lock(&vfsmount_lock); 1817 lock_mount_hash();
1789 umount_tree(mnt, 0); 1818 umount_tree(mnt, 0);
1790 br_write_unlock(&vfsmount_lock); 1819 unlock_mount_hash();
1791 } 1820 }
1792out2: 1821out2:
1793 unlock_mount(mp); 1822 unlock_mount(mp);
@@ -1846,17 +1875,13 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
1846 else 1875 else
1847 err = do_remount_sb(sb, flags, data, 0); 1876 err = do_remount_sb(sb, flags, data, 0);
1848 if (!err) { 1877 if (!err) {
1849 br_write_lock(&vfsmount_lock); 1878 lock_mount_hash();
1850 mnt_flags |= mnt->mnt.mnt_flags & MNT_PROPAGATION_MASK; 1879 mnt_flags |= mnt->mnt.mnt_flags & MNT_PROPAGATION_MASK;
1851 mnt->mnt.mnt_flags = mnt_flags; 1880 mnt->mnt.mnt_flags = mnt_flags;
1852 br_write_unlock(&vfsmount_lock);
1853 }
1854 up_write(&sb->s_umount);
1855 if (!err) {
1856 br_write_lock(&vfsmount_lock);
1857 touch_mnt_namespace(mnt->mnt_ns); 1881 touch_mnt_namespace(mnt->mnt_ns);
1858 br_write_unlock(&vfsmount_lock); 1882 unlock_mount_hash();
1859 } 1883 }
1884 up_write(&sb->s_umount);
1860 return err; 1885 return err;
1861} 1886}
1862 1887
@@ -1972,7 +1997,7 @@ static int do_add_mount(struct mount *newmnt, struct path *path, int mnt_flags)
1972 struct mount *parent; 1997 struct mount *parent;
1973 int err; 1998 int err;
1974 1999
1975 mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL); 2000 mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | MNT_DOOMED | MNT_SYNC_UMOUNT);
1976 2001
1977 mp = lock_mount(path); 2002 mp = lock_mount(path);
1978 if (IS_ERR(mp)) 2003 if (IS_ERR(mp))
@@ -2077,9 +2102,7 @@ fail:
2077 /* remove m from any expiration list it may be on */ 2102 /* remove m from any expiration list it may be on */
2078 if (!list_empty(&mnt->mnt_expire)) { 2103 if (!list_empty(&mnt->mnt_expire)) {
2079 namespace_lock(); 2104 namespace_lock();
2080 br_write_lock(&vfsmount_lock);
2081 list_del_init(&mnt->mnt_expire); 2105 list_del_init(&mnt->mnt_expire);
2082 br_write_unlock(&vfsmount_lock);
2083 namespace_unlock(); 2106 namespace_unlock();
2084 } 2107 }
2085 mntput(m); 2108 mntput(m);
@@ -2095,11 +2118,9 @@ fail:
2095void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list) 2118void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list)
2096{ 2119{
2097 namespace_lock(); 2120 namespace_lock();
2098 br_write_lock(&vfsmount_lock);
2099 2121
2100 list_add_tail(&real_mount(mnt)->mnt_expire, expiry_list); 2122 list_add_tail(&real_mount(mnt)->mnt_expire, expiry_list);
2101 2123
2102 br_write_unlock(&vfsmount_lock);
2103 namespace_unlock(); 2124 namespace_unlock();
2104} 2125}
2105EXPORT_SYMBOL(mnt_set_expiry); 2126EXPORT_SYMBOL(mnt_set_expiry);
@@ -2118,7 +2139,7 @@ void mark_mounts_for_expiry(struct list_head *mounts)
2118 return; 2139 return;
2119 2140
2120 namespace_lock(); 2141 namespace_lock();
2121 br_write_lock(&vfsmount_lock); 2142 lock_mount_hash();
2122 2143
2123 /* extract from the expiration list every vfsmount that matches the 2144 /* extract from the expiration list every vfsmount that matches the
2124 * following criteria: 2145 * following criteria:
@@ -2137,7 +2158,7 @@ void mark_mounts_for_expiry(struct list_head *mounts)
2137 touch_mnt_namespace(mnt->mnt_ns); 2158 touch_mnt_namespace(mnt->mnt_ns);
2138 umount_tree(mnt, 1); 2159 umount_tree(mnt, 1);
2139 } 2160 }
2140 br_write_unlock(&vfsmount_lock); 2161 unlock_mount_hash();
2141 namespace_unlock(); 2162 namespace_unlock();
2142} 2163}
2143 2164
@@ -2193,7 +2214,7 @@ resume:
2193 * process a list of expirable mountpoints with the intent of discarding any 2214 * process a list of expirable mountpoints with the intent of discarding any
2194 * submounts of a specific parent mountpoint 2215 * submounts of a specific parent mountpoint
2195 * 2216 *
2196 * vfsmount_lock must be held for write 2217 * mount_lock must be held for write
2197 */ 2218 */
2198static void shrink_submounts(struct mount *mnt) 2219static void shrink_submounts(struct mount *mnt)
2199{ 2220{
@@ -2414,20 +2435,25 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns)
2414 return new_ns; 2435 return new_ns;
2415} 2436}
2416 2437
2417/* 2438struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
2418 * Allocate a new namespace structure and populate it with contents 2439 struct user_namespace *user_ns, struct fs_struct *new_fs)
2419 * copied from the namespace of the passed in task structure.
2420 */
2421static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
2422 struct user_namespace *user_ns, struct fs_struct *fs)
2423{ 2440{
2424 struct mnt_namespace *new_ns; 2441 struct mnt_namespace *new_ns;
2425 struct vfsmount *rootmnt = NULL, *pwdmnt = NULL; 2442 struct vfsmount *rootmnt = NULL, *pwdmnt = NULL;
2426 struct mount *p, *q; 2443 struct mount *p, *q;
2427 struct mount *old = mnt_ns->root; 2444 struct mount *old;
2428 struct mount *new; 2445 struct mount *new;
2429 int copy_flags; 2446 int copy_flags;
2430 2447
2448 BUG_ON(!ns);
2449
2450 if (likely(!(flags & CLONE_NEWNS))) {
2451 get_mnt_ns(ns);
2452 return ns;
2453 }
2454
2455 old = ns->root;
2456
2431 new_ns = alloc_mnt_ns(user_ns); 2457 new_ns = alloc_mnt_ns(user_ns);
2432 if (IS_ERR(new_ns)) 2458 if (IS_ERR(new_ns))
2433 return new_ns; 2459 return new_ns;
@@ -2435,7 +2461,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
2435 namespace_lock(); 2461 namespace_lock();
2436 /* First pass: copy the tree topology */ 2462 /* First pass: copy the tree topology */
2437 copy_flags = CL_COPY_UNBINDABLE | CL_EXPIRE; 2463 copy_flags = CL_COPY_UNBINDABLE | CL_EXPIRE;
2438 if (user_ns != mnt_ns->user_ns) 2464 if (user_ns != ns->user_ns)
2439 copy_flags |= CL_SHARED_TO_SLAVE | CL_UNPRIVILEGED; 2465 copy_flags |= CL_SHARED_TO_SLAVE | CL_UNPRIVILEGED;
2440 new = copy_tree(old, old->mnt.mnt_root, copy_flags); 2466 new = copy_tree(old, old->mnt.mnt_root, copy_flags);
2441 if (IS_ERR(new)) { 2467 if (IS_ERR(new)) {
@@ -2444,9 +2470,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
2444 return ERR_CAST(new); 2470 return ERR_CAST(new);
2445 } 2471 }
2446 new_ns->root = new; 2472 new_ns->root = new;
2447 br_write_lock(&vfsmount_lock);
2448 list_add_tail(&new_ns->list, &new->mnt_list); 2473 list_add_tail(&new_ns->list, &new->mnt_list);
2449 br_write_unlock(&vfsmount_lock);
2450 2474
2451 /* 2475 /*
2452 * Second pass: switch the tsk->fs->* elements and mark new vfsmounts 2476 * Second pass: switch the tsk->fs->* elements and mark new vfsmounts
@@ -2457,13 +2481,13 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
2457 q = new; 2481 q = new;
2458 while (p) { 2482 while (p) {
2459 q->mnt_ns = new_ns; 2483 q->mnt_ns = new_ns;
2460 if (fs) { 2484 if (new_fs) {
2461 if (&p->mnt == fs->root.mnt) { 2485 if (&p->mnt == new_fs->root.mnt) {
2462 fs->root.mnt = mntget(&q->mnt); 2486 new_fs->root.mnt = mntget(&q->mnt);
2463 rootmnt = &p->mnt; 2487 rootmnt = &p->mnt;
2464 } 2488 }
2465 if (&p->mnt == fs->pwd.mnt) { 2489 if (&p->mnt == new_fs->pwd.mnt) {
2466 fs->pwd.mnt = mntget(&q->mnt); 2490 new_fs->pwd.mnt = mntget(&q->mnt);
2467 pwdmnt = &p->mnt; 2491 pwdmnt = &p->mnt;
2468 } 2492 }
2469 } 2493 }
@@ -2484,23 +2508,6 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
2484 return new_ns; 2508 return new_ns;
2485} 2509}
2486 2510
2487struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
2488 struct user_namespace *user_ns, struct fs_struct *new_fs)
2489{
2490 struct mnt_namespace *new_ns;
2491
2492 BUG_ON(!ns);
2493 get_mnt_ns(ns);
2494
2495 if (!(flags & CLONE_NEWNS))
2496 return ns;
2497
2498 new_ns = dup_mnt_ns(ns, user_ns, new_fs);
2499
2500 put_mnt_ns(ns);
2501 return new_ns;
2502}
2503
2504/** 2511/**
2505 * create_mnt_ns - creates a private namespace and adds a root filesystem 2512 * create_mnt_ns - creates a private namespace and adds a root filesystem
2506 * @mnt: pointer to the new root filesystem mountpoint 2513 * @mnt: pointer to the new root filesystem mountpoint
@@ -2593,7 +2600,7 @@ out_type:
2593/* 2600/*
2594 * Return true if path is reachable from root 2601 * Return true if path is reachable from root
2595 * 2602 *
2596 * namespace_sem or vfsmount_lock is held 2603 * namespace_sem or mount_lock is held
2597 */ 2604 */
2598bool is_path_reachable(struct mount *mnt, struct dentry *dentry, 2605bool is_path_reachable(struct mount *mnt, struct dentry *dentry,
2599 const struct path *root) 2606 const struct path *root)
@@ -2608,9 +2615,9 @@ bool is_path_reachable(struct mount *mnt, struct dentry *dentry,
2608int path_is_under(struct path *path1, struct path *path2) 2615int path_is_under(struct path *path1, struct path *path2)
2609{ 2616{
2610 int res; 2617 int res;
2611 br_read_lock(&vfsmount_lock); 2618 read_seqlock_excl(&mount_lock);
2612 res = is_path_reachable(real_mount(path1->mnt), path1->dentry, path2); 2619 res = is_path_reachable(real_mount(path1->mnt), path1->dentry, path2);
2613 br_read_unlock(&vfsmount_lock); 2620 read_sequnlock_excl(&mount_lock);
2614 return res; 2621 return res;
2615} 2622}
2616EXPORT_SYMBOL(path_is_under); 2623EXPORT_SYMBOL(path_is_under);
@@ -2701,7 +2708,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
2701 if (!is_path_reachable(old_mnt, old.dentry, &new)) 2708 if (!is_path_reachable(old_mnt, old.dentry, &new))
2702 goto out4; 2709 goto out4;
2703 root_mp->m_count++; /* pin it so it won't go away */ 2710 root_mp->m_count++; /* pin it so it won't go away */
2704 br_write_lock(&vfsmount_lock); 2711 lock_mount_hash();
2705 detach_mnt(new_mnt, &parent_path); 2712 detach_mnt(new_mnt, &parent_path);
2706 detach_mnt(root_mnt, &root_parent); 2713 detach_mnt(root_mnt, &root_parent);
2707 if (root_mnt->mnt.mnt_flags & MNT_LOCKED) { 2714 if (root_mnt->mnt.mnt_flags & MNT_LOCKED) {
@@ -2713,7 +2720,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
2713 /* mount new_root on / */ 2720 /* mount new_root on / */
2714 attach_mnt(new_mnt, real_mount(root_parent.mnt), root_mp); 2721 attach_mnt(new_mnt, real_mount(root_parent.mnt), root_mp);
2715 touch_mnt_namespace(current->nsproxy->mnt_ns); 2722 touch_mnt_namespace(current->nsproxy->mnt_ns);
2716 br_write_unlock(&vfsmount_lock); 2723 unlock_mount_hash();
2717 chroot_fs_refs(&root, &new); 2724 chroot_fs_refs(&root, &new);
2718 put_mountpoint(root_mp); 2725 put_mountpoint(root_mp);
2719 error = 0; 2726 error = 0;
@@ -2767,8 +2774,6 @@ void __init mnt_init(void)
2767 unsigned u; 2774 unsigned u;
2768 int err; 2775 int err;
2769 2776
2770 init_rwsem(&namespace_sem);
2771
2772 mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct mount), 2777 mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct mount),
2773 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); 2778 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
2774 2779
@@ -2785,8 +2790,6 @@ void __init mnt_init(void)
2785 for (u = 0; u < HASH_SIZE; u++) 2790 for (u = 0; u < HASH_SIZE; u++)
2786 INIT_LIST_HEAD(&mountpoint_hashtable[u]); 2791 INIT_LIST_HEAD(&mountpoint_hashtable[u]);
2787 2792
2788 br_lock_init(&vfsmount_lock);
2789
2790 err = sysfs_init(); 2793 err = sysfs_init();
2791 if (err) 2794 if (err)
2792 printk(KERN_WARNING "%s: sysfs_init error: %d\n", 2795 printk(KERN_WARNING "%s: sysfs_init error: %d\n",
@@ -2802,11 +2805,7 @@ void put_mnt_ns(struct mnt_namespace *ns)
2802{ 2805{
2803 if (!atomic_dec_and_test(&ns->count)) 2806 if (!atomic_dec_and_test(&ns->count))
2804 return; 2807 return;
2805 namespace_lock(); 2808 drop_collected_mounts(&ns->root->mnt);
2806 br_write_lock(&vfsmount_lock);
2807 umount_tree(ns->root, 0);
2808 br_write_unlock(&vfsmount_lock);
2809 namespace_unlock();
2810 free_mnt_ns(ns); 2809 free_mnt_ns(ns);
2811} 2810}
2812 2811
@@ -2829,9 +2828,8 @@ void kern_unmount(struct vfsmount *mnt)
2829{ 2828{
2830 /* release long term mount so mount point can be released */ 2829 /* release long term mount so mount point can be released */
2831 if (!IS_ERR_OR_NULL(mnt)) { 2830 if (!IS_ERR_OR_NULL(mnt)) {
2832 br_write_lock(&vfsmount_lock);
2833 real_mount(mnt)->mnt_ns = NULL; 2831 real_mount(mnt)->mnt_ns = NULL;
2834 br_write_unlock(&vfsmount_lock); 2832 synchronize_rcu(); /* yecchhh... */
2835 mntput(mnt); 2833 mntput(mnt);
2836 } 2834 }
2837} 2835}
@@ -2875,7 +2873,7 @@ bool fs_fully_visible(struct file_system_type *type)
2875 if (unlikely(!ns)) 2873 if (unlikely(!ns))
2876 return false; 2874 return false;
2877 2875
2878 namespace_lock(); 2876 down_read(&namespace_sem);
2879 list_for_each_entry(mnt, &ns->list, mnt_list) { 2877 list_for_each_entry(mnt, &ns->list, mnt_list) {
2880 struct mount *child; 2878 struct mount *child;
2881 if (mnt->mnt.mnt_sb->s_type != type) 2879 if (mnt->mnt.mnt_sb->s_type != type)
@@ -2896,7 +2894,7 @@ bool fs_fully_visible(struct file_system_type *type)
2896 next: ; 2894 next: ;
2897 } 2895 }
2898found: 2896found:
2899 namespace_unlock(); 2897 up_read(&namespace_sem);
2900 return visible; 2898 return visible;
2901} 2899}
2902 2900
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index 3be047474bfc..c320ac52353e 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c
@@ -339,9 +339,8 @@ ncp_lookup_validate(struct dentry *dentry, unsigned int flags)
339 if (val) 339 if (val)
340 goto finished; 340 goto finished;
341 341
342 DDPRINTK("ncp_lookup_validate: %s/%s not valid, age=%ld, server lookup\n", 342 DDPRINTK("ncp_lookup_validate: %pd2 not valid, age=%ld, server lookup\n",
343 dentry->d_parent->d_name.name, dentry->d_name.name, 343 dentry, NCP_GET_AGE(dentry));
344 NCP_GET_AGE(dentry));
345 344
346 len = sizeof(__name); 345 len = sizeof(__name);
347 if (ncp_is_server_root(dir)) { 346 if (ncp_is_server_root(dir)) {
@@ -359,8 +358,8 @@ ncp_lookup_validate(struct dentry *dentry, unsigned int flags)
359 res = ncp_obtain_info(server, dir, __name, &(finfo.i)); 358 res = ncp_obtain_info(server, dir, __name, &(finfo.i));
360 } 359 }
361 finfo.volume = finfo.i.volNumber; 360 finfo.volume = finfo.i.volNumber;
362 DDPRINTK("ncp_lookup_validate: looked for %s/%s, res=%d\n", 361 DDPRINTK("ncp_lookup_validate: looked for %pd/%s, res=%d\n",
363 dentry->d_parent->d_name.name, __name, res); 362 dentry->d_parent, __name, res);
364 /* 363 /*
365 * If we didn't find it, or if it has a different dirEntNum to 364 * If we didn't find it, or if it has a different dirEntNum to
366 * what we remember, it's not valid any more. 365 * what we remember, it's not valid any more.
@@ -454,8 +453,7 @@ static int ncp_readdir(struct file *file, struct dir_context *ctx)
454 ctl.page = NULL; 453 ctl.page = NULL;
455 ctl.cache = NULL; 454 ctl.cache = NULL;
456 455
457 DDPRINTK("ncp_readdir: reading %s/%s, pos=%d\n", 456 DDPRINTK("ncp_readdir: reading %pD2, pos=%d\n", file,
458 dentry->d_parent->d_name.name, dentry->d_name.name,
459 (int) ctx->pos); 457 (int) ctx->pos);
460 458
461 result = -EIO; 459 result = -EIO;
@@ -740,12 +738,10 @@ ncp_do_readdir(struct file *file, struct dir_context *ctx,
740 int more; 738 int more;
741 size_t bufsize; 739 size_t bufsize;
742 740
743 DPRINTK("ncp_do_readdir: %s/%s, fpos=%ld\n", 741 DPRINTK("ncp_do_readdir: %pD2, fpos=%ld\n", file,
744 dentry->d_parent->d_name.name, dentry->d_name.name,
745 (unsigned long) ctx->pos); 742 (unsigned long) ctx->pos);
746 PPRINTK("ncp_do_readdir: init %s, volnum=%d, dirent=%u\n", 743 PPRINTK("ncp_do_readdir: init %pD, volnum=%d, dirent=%u\n",
747 dentry->d_name.name, NCP_FINFO(dir)->volNumber, 744 file, NCP_FINFO(dir)->volNumber, NCP_FINFO(dir)->dirEntNum);
748 NCP_FINFO(dir)->dirEntNum);
749 745
750 err = ncp_initialize_search(server, dir, &seq); 746 err = ncp_initialize_search(server, dir, &seq);
751 if (err) { 747 if (err) {
@@ -850,8 +846,7 @@ static struct dentry *ncp_lookup(struct inode *dir, struct dentry *dentry, unsig
850 if (!ncp_conn_valid(server)) 846 if (!ncp_conn_valid(server))
851 goto finished; 847 goto finished;
852 848
853 PPRINTK("ncp_lookup: server lookup for %s/%s\n", 849 PPRINTK("ncp_lookup: server lookup for %pd2\n", dentry);
854 dentry->d_parent->d_name.name, dentry->d_name.name);
855 850
856 len = sizeof(__name); 851 len = sizeof(__name);
857 if (ncp_is_server_root(dir)) { 852 if (ncp_is_server_root(dir)) {
@@ -867,8 +862,7 @@ static struct dentry *ncp_lookup(struct inode *dir, struct dentry *dentry, unsig
867 if (!res) 862 if (!res)
868 res = ncp_obtain_info(server, dir, __name, &(finfo.i)); 863 res = ncp_obtain_info(server, dir, __name, &(finfo.i));
869 } 864 }
870 PPRINTK("ncp_lookup: looked for %s/%s, res=%d\n", 865 PPRINTK("ncp_lookup: looked for %pd2, res=%d\n", dentry, res);
871 dentry->d_parent->d_name.name, __name, res);
872 /* 866 /*
873 * If we didn't find an entry, make a negative dentry. 867 * If we didn't find an entry, make a negative dentry.
874 */ 868 */
@@ -915,8 +909,7 @@ out:
915 return error; 909 return error;
916 910
917out_close: 911out_close:
918 PPRINTK("ncp_instantiate: %s/%s failed, closing file\n", 912 PPRINTK("ncp_instantiate: %pd2 failed, closing file\n", dentry);
919 dentry->d_parent->d_name.name, dentry->d_name.name);
920 ncp_close_file(NCP_SERVER(dir), finfo->file_handle); 913 ncp_close_file(NCP_SERVER(dir), finfo->file_handle);
921 goto out; 914 goto out;
922} 915}
@@ -930,8 +923,7 @@ int ncp_create_new(struct inode *dir, struct dentry *dentry, umode_t mode,
930 int opmode; 923 int opmode;
931 __u8 __name[NCP_MAXPATHLEN + 1]; 924 __u8 __name[NCP_MAXPATHLEN + 1];
932 925
933 PPRINTK("ncp_create_new: creating %s/%s, mode=%hx\n", 926 PPRINTK("ncp_create_new: creating %pd2, mode=%hx\n", dentry, mode);
934 dentry->d_parent->d_name.name, dentry->d_name.name, mode);
935 927
936 ncp_age_dentry(server, dentry); 928 ncp_age_dentry(server, dentry);
937 len = sizeof(__name); 929 len = sizeof(__name);
@@ -960,8 +952,7 @@ int ncp_create_new(struct inode *dir, struct dentry *dentry, umode_t mode,
960 error = -ENAMETOOLONG; 952 error = -ENAMETOOLONG;
961 else if (result < 0) 953 else if (result < 0)
962 error = result; 954 error = result;
963 DPRINTK("ncp_create: %s/%s failed\n", 955 DPRINTK("ncp_create: %pd2 failed\n", dentry);
964 dentry->d_parent->d_name.name, dentry->d_name.name);
965 goto out; 956 goto out;
966 } 957 }
967 opmode = O_WRONLY; 958 opmode = O_WRONLY;
@@ -994,8 +985,7 @@ static int ncp_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
994 int error, len; 985 int error, len;
995 __u8 __name[NCP_MAXPATHLEN + 1]; 986 __u8 __name[NCP_MAXPATHLEN + 1];
996 987
997 DPRINTK("ncp_mkdir: making %s/%s\n", 988 DPRINTK("ncp_mkdir: making %pd2\n", dentry);
998 dentry->d_parent->d_name.name, dentry->d_name.name);
999 989
1000 ncp_age_dentry(server, dentry); 990 ncp_age_dentry(server, dentry);
1001 len = sizeof(__name); 991 len = sizeof(__name);
@@ -1032,8 +1022,7 @@ static int ncp_rmdir(struct inode *dir, struct dentry *dentry)
1032 int error, result, len; 1022 int error, result, len;
1033 __u8 __name[NCP_MAXPATHLEN + 1]; 1023 __u8 __name[NCP_MAXPATHLEN + 1];
1034 1024
1035 DPRINTK("ncp_rmdir: removing %s/%s\n", 1025 DPRINTK("ncp_rmdir: removing %pd2\n", dentry);
1036 dentry->d_parent->d_name.name, dentry->d_name.name);
1037 1026
1038 len = sizeof(__name); 1027 len = sizeof(__name);
1039 error = ncp_io2vol(server, __name, &len, dentry->d_name.name, 1028 error = ncp_io2vol(server, __name, &len, dentry->d_name.name,
@@ -1078,8 +1067,7 @@ static int ncp_unlink(struct inode *dir, struct dentry *dentry)
1078 int error; 1067 int error;
1079 1068
1080 server = NCP_SERVER(dir); 1069 server = NCP_SERVER(dir);
1081 DPRINTK("ncp_unlink: unlinking %s/%s\n", 1070 DPRINTK("ncp_unlink: unlinking %pd2\n", dentry);
1082 dentry->d_parent->d_name.name, dentry->d_name.name);
1083 1071
1084 /* 1072 /*
1085 * Check whether to close the file ... 1073 * Check whether to close the file ...
@@ -1099,8 +1087,7 @@ static int ncp_unlink(struct inode *dir, struct dentry *dentry)
1099#endif 1087#endif
1100 switch (error) { 1088 switch (error) {
1101 case 0x00: 1089 case 0x00:
1102 DPRINTK("ncp: removed %s/%s\n", 1090 DPRINTK("ncp: removed %pd2\n", dentry);
1103 dentry->d_parent->d_name.name, dentry->d_name.name);
1104 break; 1091 break;
1105 case 0x85: 1092 case 0x85:
1106 case 0x8A: 1093 case 0x8A:
@@ -1133,9 +1120,7 @@ static int ncp_rename(struct inode *old_dir, struct dentry *old_dentry,
1133 int old_len, new_len; 1120 int old_len, new_len;
1134 __u8 __old_name[NCP_MAXPATHLEN + 1], __new_name[NCP_MAXPATHLEN + 1]; 1121 __u8 __old_name[NCP_MAXPATHLEN + 1], __new_name[NCP_MAXPATHLEN + 1];
1135 1122
1136 DPRINTK("ncp_rename: %s/%s to %s/%s\n", 1123 DPRINTK("ncp_rename: %pd2 to %pd2\n", old_dentry, new_dentry);
1137 old_dentry->d_parent->d_name.name, old_dentry->d_name.name,
1138 new_dentry->d_parent->d_name.name, new_dentry->d_name.name);
1139 1124
1140 ncp_age_dentry(server, old_dentry); 1125 ncp_age_dentry(server, old_dentry);
1141 ncp_age_dentry(server, new_dentry); 1126 ncp_age_dentry(server, new_dentry);
@@ -1165,8 +1150,8 @@ static int ncp_rename(struct inode *old_dir, struct dentry *old_dentry,
1165#endif 1150#endif
1166 switch (error) { 1151 switch (error) {
1167 case 0x00: 1152 case 0x00:
1168 DPRINTK("ncp renamed %s -> %s.\n", 1153 DPRINTK("ncp renamed %pd -> %pd.\n",
1169 old_dentry->d_name.name,new_dentry->d_name.name); 1154 old_dentry, new_dentry);
1170 break; 1155 break;
1171 case 0x9E: 1156 case 0x9E:
1172 error = -ENAMETOOLONG; 1157 error = -ENAMETOOLONG;
diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c
index 122e260247f5..8f5074e1ecb9 100644
--- a/fs/ncpfs/file.c
+++ b/fs/ncpfs/file.c
@@ -107,8 +107,7 @@ ncp_file_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
107 void* freepage; 107 void* freepage;
108 size_t freelen; 108 size_t freelen;
109 109
110 DPRINTK("ncp_file_read: enter %s/%s\n", 110 DPRINTK("ncp_file_read: enter %pd2\n", dentry);
111 dentry->d_parent->d_name.name, dentry->d_name.name);
112 111
113 pos = *ppos; 112 pos = *ppos;
114 113
@@ -166,8 +165,7 @@ ncp_file_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
166 165
167 file_accessed(file); 166 file_accessed(file);
168 167
169 DPRINTK("ncp_file_read: exit %s/%s\n", 168 DPRINTK("ncp_file_read: exit %pd2\n", dentry);
170 dentry->d_parent->d_name.name, dentry->d_name.name);
171outrel: 169outrel:
172 ncp_inode_close(inode); 170 ncp_inode_close(inode);
173 return already_read ? already_read : error; 171 return already_read ? already_read : error;
@@ -184,8 +182,7 @@ ncp_file_write(struct file *file, const char __user *buf, size_t count, loff_t *
184 int errno; 182 int errno;
185 void* bouncebuffer; 183 void* bouncebuffer;
186 184
187 DPRINTK("ncp_file_write: enter %s/%s\n", 185 DPRINTK("ncp_file_write: enter %pd2\n", dentry);
188 dentry->d_parent->d_name.name, dentry->d_name.name);
189 if ((ssize_t) count < 0) 186 if ((ssize_t) count < 0)
190 return -EINVAL; 187 return -EINVAL;
191 pos = *ppos; 188 pos = *ppos;
@@ -264,8 +261,7 @@ ncp_file_write(struct file *file, const char __user *buf, size_t count, loff_t *
264 i_size_write(inode, pos); 261 i_size_write(inode, pos);
265 mutex_unlock(&inode->i_mutex); 262 mutex_unlock(&inode->i_mutex);
266 } 263 }
267 DPRINTK("ncp_file_write: exit %s/%s\n", 264 DPRINTK("ncp_file_write: exit %pd2\n", dentry);
268 dentry->d_parent->d_name.name, dentry->d_name.name);
269outrel: 265outrel:
270 ncp_inode_close(inode); 266 ncp_inode_close(inode);
271 return already_written ? already_written : errno; 267 return already_written ? already_written : errno;
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 4659da67e7f6..2cf2ebecb55f 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -782,6 +782,17 @@ out:
782 return error; 782 return error;
783} 783}
784 784
785static void delayed_free(struct rcu_head *p)
786{
787 struct ncp_server *server = container_of(p, struct ncp_server, rcu);
788#ifdef CONFIG_NCPFS_NLS
789 /* unload the NLS charsets */
790 unload_nls(server->nls_vol);
791 unload_nls(server->nls_io);
792#endif /* CONFIG_NCPFS_NLS */
793 kfree(server);
794}
795
785static void ncp_put_super(struct super_block *sb) 796static void ncp_put_super(struct super_block *sb)
786{ 797{
787 struct ncp_server *server = NCP_SBP(sb); 798 struct ncp_server *server = NCP_SBP(sb);
@@ -792,11 +803,6 @@ static void ncp_put_super(struct super_block *sb)
792 803
793 ncp_stop_tasks(server); 804 ncp_stop_tasks(server);
794 805
795#ifdef CONFIG_NCPFS_NLS
796 /* unload the NLS charsets */
797 unload_nls(server->nls_vol);
798 unload_nls(server->nls_io);
799#endif /* CONFIG_NCPFS_NLS */
800 mutex_destroy(&server->rcv.creq_mutex); 806 mutex_destroy(&server->rcv.creq_mutex);
801 mutex_destroy(&server->root_setup_lock); 807 mutex_destroy(&server->root_setup_lock);
802 mutex_destroy(&server->mutex); 808 mutex_destroy(&server->mutex);
@@ -813,8 +819,7 @@ static void ncp_put_super(struct super_block *sb)
813 vfree(server->rxbuf); 819 vfree(server->rxbuf);
814 vfree(server->txbuf); 820 vfree(server->txbuf);
815 vfree(server->packet); 821 vfree(server->packet);
816 sb->s_fs_info = NULL; 822 call_rcu(&server->rcu, delayed_free);
817 kfree(server);
818} 823}
819 824
820static int ncp_statfs(struct dentry *dentry, struct kstatfs *buf) 825static int ncp_statfs(struct dentry *dentry, struct kstatfs *buf)
diff --git a/fs/ncpfs/ncp_fs_sb.h b/fs/ncpfs/ncp_fs_sb.h
index c51b2c543539..b81e97adc5a9 100644
--- a/fs/ncpfs/ncp_fs_sb.h
+++ b/fs/ncpfs/ncp_fs_sb.h
@@ -38,7 +38,7 @@ struct ncp_mount_data_kernel {
38}; 38};
39 39
40struct ncp_server { 40struct ncp_server {
41 41 struct rcu_head rcu;
42 struct ncp_mount_data_kernel m; /* Nearly all of the mount data is of 42 struct ncp_mount_data_kernel m; /* Nearly all of the mount data is of
43 interest for us later, so we store 43 interest for us later, so we store
44 it completely. */ 44 it completely. */
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 9a8676f33350..812154aff981 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -98,9 +98,7 @@ nfs_opendir(struct inode *inode, struct file *filp)
98 struct nfs_open_dir_context *ctx; 98 struct nfs_open_dir_context *ctx;
99 struct rpc_cred *cred; 99 struct rpc_cred *cred;
100 100
101 dfprintk(FILE, "NFS: open dir(%s/%s)\n", 101 dfprintk(FILE, "NFS: open dir(%pD2)\n", filp);
102 filp->f_path.dentry->d_parent->d_name.name,
103 filp->f_path.dentry->d_name.name);
104 102
105 nfs_inc_stats(inode, NFSIOS_VFSOPEN); 103 nfs_inc_stats(inode, NFSIOS_VFSOPEN);
106 104
@@ -297,11 +295,10 @@ int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_des
297 if (ctx->duped > 0 295 if (ctx->duped > 0
298 && ctx->dup_cookie == *desc->dir_cookie) { 296 && ctx->dup_cookie == *desc->dir_cookie) {
299 if (printk_ratelimit()) { 297 if (printk_ratelimit()) {
300 pr_notice("NFS: directory %s/%s contains a readdir loop." 298 pr_notice("NFS: directory %pD2 contains a readdir loop."
301 "Please contact your server vendor. " 299 "Please contact your server vendor. "
302 "The file: %s has duplicate cookie %llu\n", 300 "The file: %s has duplicate cookie %llu\n",
303 desc->file->f_dentry->d_parent->d_name.name, 301 desc->file,
304 desc->file->f_dentry->d_name.name,
305 array->array[i].string.name, 302 array->array[i].string.name,
306 *desc->dir_cookie); 303 *desc->dir_cookie);
307 } 304 }
@@ -822,9 +819,8 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
822 struct nfs_open_dir_context *dir_ctx = file->private_data; 819 struct nfs_open_dir_context *dir_ctx = file->private_data;
823 int res = 0; 820 int res = 0;
824 821
825 dfprintk(FILE, "NFS: readdir(%s/%s) starting at cookie %llu\n", 822 dfprintk(FILE, "NFS: readdir(%pD2) starting at cookie %llu\n",
826 dentry->d_parent->d_name.name, dentry->d_name.name, 823 file, (long long)ctx->pos);
827 (long long)ctx->pos);
828 nfs_inc_stats(inode, NFSIOS_VFSGETDENTS); 824 nfs_inc_stats(inode, NFSIOS_VFSGETDENTS);
829 825
830 /* 826 /*
@@ -880,22 +876,17 @@ out:
880 nfs_unblock_sillyrename(dentry); 876 nfs_unblock_sillyrename(dentry);
881 if (res > 0) 877 if (res > 0)
882 res = 0; 878 res = 0;
883 dfprintk(FILE, "NFS: readdir(%s/%s) returns %d\n", 879 dfprintk(FILE, "NFS: readdir(%pD2) returns %d\n", file, res);
884 dentry->d_parent->d_name.name, dentry->d_name.name,
885 res);
886 return res; 880 return res;
887} 881}
888 882
889static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int whence) 883static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int whence)
890{ 884{
891 struct dentry *dentry = filp->f_path.dentry; 885 struct inode *inode = file_inode(filp);
892 struct inode *inode = dentry->d_inode;
893 struct nfs_open_dir_context *dir_ctx = filp->private_data; 886 struct nfs_open_dir_context *dir_ctx = filp->private_data;
894 887
895 dfprintk(FILE, "NFS: llseek dir(%s/%s, %lld, %d)\n", 888 dfprintk(FILE, "NFS: llseek dir(%pD2, %lld, %d)\n",
896 dentry->d_parent->d_name.name, 889 filp, offset, whence);
897 dentry->d_name.name,
898 offset, whence);
899 890
900 mutex_lock(&inode->i_mutex); 891 mutex_lock(&inode->i_mutex);
901 switch (whence) { 892 switch (whence) {
@@ -925,15 +916,12 @@ out:
925static int nfs_fsync_dir(struct file *filp, loff_t start, loff_t end, 916static int nfs_fsync_dir(struct file *filp, loff_t start, loff_t end,
926 int datasync) 917 int datasync)
927{ 918{
928 struct dentry *dentry = filp->f_path.dentry; 919 struct inode *inode = file_inode(filp);
929 struct inode *inode = dentry->d_inode;
930 920
931 dfprintk(FILE, "NFS: fsync dir(%s/%s) datasync %d\n", 921 dfprintk(FILE, "NFS: fsync dir(%pD2) datasync %d\n", filp, datasync);
932 dentry->d_parent->d_name.name, dentry->d_name.name,
933 datasync);
934 922
935 mutex_lock(&inode->i_mutex); 923 mutex_lock(&inode->i_mutex);
936 nfs_inc_stats(dentry->d_inode, NFSIOS_VFSFSYNC); 924 nfs_inc_stats(inode, NFSIOS_VFSFSYNC);
937 mutex_unlock(&inode->i_mutex); 925 mutex_unlock(&inode->i_mutex);
938 return 0; 926 return 0;
939} 927}
@@ -1073,9 +1061,8 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
1073 } 1061 }
1074 1062
1075 if (is_bad_inode(inode)) { 1063 if (is_bad_inode(inode)) {
1076 dfprintk(LOOKUPCACHE, "%s: %s/%s has dud inode\n", 1064 dfprintk(LOOKUPCACHE, "%s: %pd2 has dud inode\n",
1077 __func__, dentry->d_parent->d_name.name, 1065 __func__, dentry);
1078 dentry->d_name.name);
1079 goto out_bad; 1066 goto out_bad;
1080 } 1067 }
1081 1068
@@ -1125,9 +1112,8 @@ out_set_verifier:
1125 nfs_advise_use_readdirplus(dir); 1112 nfs_advise_use_readdirplus(dir);
1126 out_valid_noent: 1113 out_valid_noent:
1127 dput(parent); 1114 dput(parent);
1128 dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is valid\n", 1115 dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is valid\n",
1129 __func__, dentry->d_parent->d_name.name, 1116 __func__, dentry);
1130 dentry->d_name.name);
1131 return 1; 1117 return 1;
1132out_zap_parent: 1118out_zap_parent:
1133 nfs_zap_caches(dir); 1119 nfs_zap_caches(dir);
@@ -1153,18 +1139,16 @@ out_zap_parent:
1153 goto out_valid; 1139 goto out_valid;
1154 1140
1155 dput(parent); 1141 dput(parent);
1156 dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is invalid\n", 1142 dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is invalid\n",
1157 __func__, dentry->d_parent->d_name.name, 1143 __func__, dentry);
1158 dentry->d_name.name);
1159 return 0; 1144 return 0;
1160out_error: 1145out_error:
1161 nfs_free_fattr(fattr); 1146 nfs_free_fattr(fattr);
1162 nfs_free_fhandle(fhandle); 1147 nfs_free_fhandle(fhandle);
1163 nfs4_label_free(label); 1148 nfs4_label_free(label);
1164 dput(parent); 1149 dput(parent);
1165 dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) lookup returned error %d\n", 1150 dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) lookup returned error %d\n",
1166 __func__, dentry->d_parent->d_name.name, 1151 __func__, dentry, error);
1167 dentry->d_name.name, error);
1168 return error; 1152 return error;
1169} 1153}
1170 1154
@@ -1188,16 +1172,14 @@ static int nfs_weak_revalidate(struct dentry *dentry, unsigned int flags)
1188 * eventually need to do something more here. 1172 * eventually need to do something more here.
1189 */ 1173 */
1190 if (!inode) { 1174 if (!inode) {
1191 dfprintk(LOOKUPCACHE, "%s: %s/%s has negative inode\n", 1175 dfprintk(LOOKUPCACHE, "%s: %pd2 has negative inode\n",
1192 __func__, dentry->d_parent->d_name.name, 1176 __func__, dentry);
1193 dentry->d_name.name);
1194 return 1; 1177 return 1;
1195 } 1178 }
1196 1179
1197 if (is_bad_inode(inode)) { 1180 if (is_bad_inode(inode)) {
1198 dfprintk(LOOKUPCACHE, "%s: %s/%s has dud inode\n", 1181 dfprintk(LOOKUPCACHE, "%s: %pd2 has dud inode\n",
1199 __func__, dentry->d_parent->d_name.name, 1182 __func__, dentry);
1200 dentry->d_name.name);
1201 return 0; 1183 return 0;
1202 } 1184 }
1203 1185
@@ -1212,9 +1194,8 @@ static int nfs_weak_revalidate(struct dentry *dentry, unsigned int flags)
1212 */ 1194 */
1213static int nfs_dentry_delete(const struct dentry *dentry) 1195static int nfs_dentry_delete(const struct dentry *dentry)
1214{ 1196{
1215 dfprintk(VFS, "NFS: dentry_delete(%s/%s, %x)\n", 1197 dfprintk(VFS, "NFS: dentry_delete(%pd2, %x)\n",
1216 dentry->d_parent->d_name.name, dentry->d_name.name, 1198 dentry, dentry->d_flags);
1217 dentry->d_flags);
1218 1199
1219 /* Unhash any dentry with a stale inode */ 1200 /* Unhash any dentry with a stale inode */
1220 if (dentry->d_inode != NULL && NFS_STALE(dentry->d_inode)) 1201 if (dentry->d_inode != NULL && NFS_STALE(dentry->d_inode))
@@ -1292,8 +1273,7 @@ struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned in
1292 struct nfs4_label *label = NULL; 1273 struct nfs4_label *label = NULL;
1293 int error; 1274 int error;
1294 1275
1295 dfprintk(VFS, "NFS: lookup(%s/%s)\n", 1276 dfprintk(VFS, "NFS: lookup(%pd2)\n", dentry);
1296 dentry->d_parent->d_name.name, dentry->d_name.name);
1297 nfs_inc_stats(dir, NFSIOS_VFSLOOKUP); 1277 nfs_inc_stats(dir, NFSIOS_VFSLOOKUP);
1298 1278
1299 res = ERR_PTR(-ENAMETOOLONG); 1279 res = ERR_PTR(-ENAMETOOLONG);
@@ -1424,8 +1404,8 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
1424 /* Expect a negative dentry */ 1404 /* Expect a negative dentry */
1425 BUG_ON(dentry->d_inode); 1405 BUG_ON(dentry->d_inode);
1426 1406
1427 dfprintk(VFS, "NFS: atomic_open(%s/%ld), %s\n", 1407 dfprintk(VFS, "NFS: atomic_open(%s/%ld), %pd\n",
1428 dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); 1408 dir->i_sb->s_id, dir->i_ino, dentry);
1429 1409
1430 err = nfs_check_flags(open_flags); 1410 err = nfs_check_flags(open_flags);
1431 if (err) 1411 if (err)
@@ -1614,8 +1594,8 @@ int nfs_create(struct inode *dir, struct dentry *dentry,
1614 int open_flags = excl ? O_CREAT | O_EXCL : O_CREAT; 1594 int open_flags = excl ? O_CREAT | O_EXCL : O_CREAT;
1615 int error; 1595 int error;
1616 1596
1617 dfprintk(VFS, "NFS: create(%s/%ld), %s\n", 1597 dfprintk(VFS, "NFS: create(%s/%ld), %pd\n",
1618 dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); 1598 dir->i_sb->s_id, dir->i_ino, dentry);
1619 1599
1620 attr.ia_mode = mode; 1600 attr.ia_mode = mode;
1621 attr.ia_valid = ATTR_MODE; 1601 attr.ia_valid = ATTR_MODE;
@@ -1641,8 +1621,8 @@ nfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev)
1641 struct iattr attr; 1621 struct iattr attr;
1642 int status; 1622 int status;
1643 1623
1644 dfprintk(VFS, "NFS: mknod(%s/%ld), %s\n", 1624 dfprintk(VFS, "NFS: mknod(%s/%ld), %pd\n",
1645 dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); 1625 dir->i_sb->s_id, dir->i_ino, dentry);
1646 1626
1647 if (!new_valid_dev(rdev)) 1627 if (!new_valid_dev(rdev))
1648 return -EINVAL; 1628 return -EINVAL;
@@ -1670,8 +1650,8 @@ int nfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
1670 struct iattr attr; 1650 struct iattr attr;
1671 int error; 1651 int error;
1672 1652
1673 dfprintk(VFS, "NFS: mkdir(%s/%ld), %s\n", 1653 dfprintk(VFS, "NFS: mkdir(%s/%ld), %pd\n",
1674 dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); 1654 dir->i_sb->s_id, dir->i_ino, dentry);
1675 1655
1676 attr.ia_valid = ATTR_MODE; 1656 attr.ia_valid = ATTR_MODE;
1677 attr.ia_mode = mode | S_IFDIR; 1657 attr.ia_mode = mode | S_IFDIR;
@@ -1698,8 +1678,8 @@ int nfs_rmdir(struct inode *dir, struct dentry *dentry)
1698{ 1678{
1699 int error; 1679 int error;
1700 1680
1701 dfprintk(VFS, "NFS: rmdir(%s/%ld), %s\n", 1681 dfprintk(VFS, "NFS: rmdir(%s/%ld), %pd\n",
1702 dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); 1682 dir->i_sb->s_id, dir->i_ino, dentry);
1703 1683
1704 trace_nfs_rmdir_enter(dir, dentry); 1684 trace_nfs_rmdir_enter(dir, dentry);
1705 if (dentry->d_inode) { 1685 if (dentry->d_inode) {
@@ -1734,8 +1714,7 @@ static int nfs_safe_remove(struct dentry *dentry)
1734 struct inode *inode = dentry->d_inode; 1714 struct inode *inode = dentry->d_inode;
1735 int error = -EBUSY; 1715 int error = -EBUSY;
1736 1716
1737 dfprintk(VFS, "NFS: safe_remove(%s/%s)\n", 1717 dfprintk(VFS, "NFS: safe_remove(%pd2)\n", dentry);
1738 dentry->d_parent->d_name.name, dentry->d_name.name);
1739 1718
1740 /* If the dentry was sillyrenamed, we simply call d_delete() */ 1719 /* If the dentry was sillyrenamed, we simply call d_delete() */
1741 if (dentry->d_flags & DCACHE_NFSFS_RENAMED) { 1720 if (dentry->d_flags & DCACHE_NFSFS_RENAMED) {
@@ -1768,8 +1747,8 @@ int nfs_unlink(struct inode *dir, struct dentry *dentry)
1768 int error; 1747 int error;
1769 int need_rehash = 0; 1748 int need_rehash = 0;
1770 1749
1771 dfprintk(VFS, "NFS: unlink(%s/%ld, %s)\n", dir->i_sb->s_id, 1750 dfprintk(VFS, "NFS: unlink(%s/%ld, %pd)\n", dir->i_sb->s_id,
1772 dir->i_ino, dentry->d_name.name); 1751 dir->i_ino, dentry);
1773 1752
1774 trace_nfs_unlink_enter(dir, dentry); 1753 trace_nfs_unlink_enter(dir, dentry);
1775 spin_lock(&dentry->d_lock); 1754 spin_lock(&dentry->d_lock);
@@ -1819,8 +1798,8 @@ int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
1819 unsigned int pathlen = strlen(symname); 1798 unsigned int pathlen = strlen(symname);
1820 int error; 1799 int error;
1821 1800
1822 dfprintk(VFS, "NFS: symlink(%s/%ld, %s, %s)\n", dir->i_sb->s_id, 1801 dfprintk(VFS, "NFS: symlink(%s/%ld, %pd, %s)\n", dir->i_sb->s_id,
1823 dir->i_ino, dentry->d_name.name, symname); 1802 dir->i_ino, dentry, symname);
1824 1803
1825 if (pathlen > PAGE_SIZE) 1804 if (pathlen > PAGE_SIZE)
1826 return -ENAMETOOLONG; 1805 return -ENAMETOOLONG;
@@ -1842,9 +1821,9 @@ int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
1842 error = NFS_PROTO(dir)->symlink(dir, dentry, page, pathlen, &attr); 1821 error = NFS_PROTO(dir)->symlink(dir, dentry, page, pathlen, &attr);
1843 trace_nfs_symlink_exit(dir, dentry, error); 1822 trace_nfs_symlink_exit(dir, dentry, error);
1844 if (error != 0) { 1823 if (error != 0) {
1845 dfprintk(VFS, "NFS: symlink(%s/%ld, %s, %s) error %d\n", 1824 dfprintk(VFS, "NFS: symlink(%s/%ld, %pd, %s) error %d\n",
1846 dir->i_sb->s_id, dir->i_ino, 1825 dir->i_sb->s_id, dir->i_ino,
1847 dentry->d_name.name, symname, error); 1826 dentry, symname, error);
1848 d_drop(dentry); 1827 d_drop(dentry);
1849 __free_page(page); 1828 __free_page(page);
1850 return error; 1829 return error;
@@ -1871,9 +1850,8 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
1871 struct inode *inode = old_dentry->d_inode; 1850 struct inode *inode = old_dentry->d_inode;
1872 int error; 1851 int error;
1873 1852
1874 dfprintk(VFS, "NFS: link(%s/%s -> %s/%s)\n", 1853 dfprintk(VFS, "NFS: link(%pd2 -> %pd2)\n",
1875 old_dentry->d_parent->d_name.name, old_dentry->d_name.name, 1854 old_dentry, dentry);
1876 dentry->d_parent->d_name.name, dentry->d_name.name);
1877 1855
1878 trace_nfs_link_enter(inode, dir, dentry); 1856 trace_nfs_link_enter(inode, dir, dentry);
1879 NFS_PROTO(inode)->return_delegation(inode); 1857 NFS_PROTO(inode)->return_delegation(inode);
@@ -1921,9 +1899,8 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1921 struct dentry *dentry = NULL, *rehash = NULL; 1899 struct dentry *dentry = NULL, *rehash = NULL;
1922 int error = -EBUSY; 1900 int error = -EBUSY;
1923 1901
1924 dfprintk(VFS, "NFS: rename(%s/%s -> %s/%s, ct=%d)\n", 1902 dfprintk(VFS, "NFS: rename(%pd2 -> %pd2, ct=%d)\n",
1925 old_dentry->d_parent->d_name.name, old_dentry->d_name.name, 1903 old_dentry, new_dentry,
1926 new_dentry->d_parent->d_name.name, new_dentry->d_name.name,
1927 d_count(new_dentry)); 1904 d_count(new_dentry));
1928 1905
1929 trace_nfs_rename_enter(old_dir, old_dentry, new_dir, new_dentry); 1906 trace_nfs_rename_enter(old_dir, old_dentry, new_dir, new_dentry);
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 91ff089d3412..d71d66c9e0a1 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -124,9 +124,8 @@ static inline int put_dreq(struct nfs_direct_req *dreq)
124ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t pos, unsigned long nr_segs) 124ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t pos, unsigned long nr_segs)
125{ 125{
126#ifndef CONFIG_NFS_SWAP 126#ifndef CONFIG_NFS_SWAP
127 dprintk("NFS: nfs_direct_IO (%s) off/no(%Ld/%lu) EINVAL\n", 127 dprintk("NFS: nfs_direct_IO (%pD) off/no(%Ld/%lu) EINVAL\n",
128 iocb->ki_filp->f_path.dentry->d_name.name, 128 iocb->ki_filp, (long long) pos, nr_segs);
129 (long long) pos, nr_segs);
130 129
131 return -EINVAL; 130 return -EINVAL;
132#else 131#else
@@ -909,10 +908,8 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov,
909 count = iov_length(iov, nr_segs); 908 count = iov_length(iov, nr_segs);
910 nfs_add_stats(mapping->host, NFSIOS_DIRECTREADBYTES, count); 909 nfs_add_stats(mapping->host, NFSIOS_DIRECTREADBYTES, count);
911 910
912 dfprintk(FILE, "NFS: direct read(%s/%s, %zd@%Ld)\n", 911 dfprintk(FILE, "NFS: direct read(%pD2, %zd@%Ld)\n",
913 file->f_path.dentry->d_parent->d_name.name, 912 file, count, (long long) pos);
914 file->f_path.dentry->d_name.name,
915 count, (long long) pos);
916 913
917 retval = 0; 914 retval = 0;
918 if (!count) 915 if (!count)
@@ -965,10 +962,8 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
965 count = iov_length(iov, nr_segs); 962 count = iov_length(iov, nr_segs);
966 nfs_add_stats(mapping->host, NFSIOS_DIRECTWRITTENBYTES, count); 963 nfs_add_stats(mapping->host, NFSIOS_DIRECTWRITTENBYTES, count);
967 964
968 dfprintk(FILE, "NFS: direct write(%s/%s, %zd@%Ld)\n", 965 dfprintk(FILE, "NFS: direct write(%pD2, %zd@%Ld)\n",
969 file->f_path.dentry->d_parent->d_name.name, 966 file, count, (long long) pos);
970 file->f_path.dentry->d_name.name,
971 count, (long long) pos);
972 967
973 retval = generic_write_checks(file, &pos, &count, 0); 968 retval = generic_write_checks(file, &pos, &count, 0);
974 if (retval) 969 if (retval)
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 1e6bfdbc1aff..e2fcacf07de3 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -65,9 +65,7 @@ nfs_file_open(struct inode *inode, struct file *filp)
65{ 65{
66 int res; 66 int res;
67 67
68 dprintk("NFS: open file(%s/%s)\n", 68 dprintk("NFS: open file(%pD2)\n", filp);
69 filp->f_path.dentry->d_parent->d_name.name,
70 filp->f_path.dentry->d_name.name);
71 69
72 nfs_inc_stats(inode, NFSIOS_VFSOPEN); 70 nfs_inc_stats(inode, NFSIOS_VFSOPEN);
73 res = nfs_check_flags(filp->f_flags); 71 res = nfs_check_flags(filp->f_flags);
@@ -81,9 +79,7 @@ nfs_file_open(struct inode *inode, struct file *filp)
81int 79int
82nfs_file_release(struct inode *inode, struct file *filp) 80nfs_file_release(struct inode *inode, struct file *filp)
83{ 81{
84 dprintk("NFS: release(%s/%s)\n", 82 dprintk("NFS: release(%pD2)\n", filp);
85 filp->f_path.dentry->d_parent->d_name.name,
86 filp->f_path.dentry->d_name.name);
87 83
88 nfs_inc_stats(inode, NFSIOS_VFSRELEASE); 84 nfs_inc_stats(inode, NFSIOS_VFSRELEASE);
89 return nfs_release(inode, filp); 85 return nfs_release(inode, filp);
@@ -123,10 +119,8 @@ force_reval:
123 119
124loff_t nfs_file_llseek(struct file *filp, loff_t offset, int whence) 120loff_t nfs_file_llseek(struct file *filp, loff_t offset, int whence)
125{ 121{
126 dprintk("NFS: llseek file(%s/%s, %lld, %d)\n", 122 dprintk("NFS: llseek file(%pD2, %lld, %d)\n",
127 filp->f_path.dentry->d_parent->d_name.name, 123 filp, offset, whence);
128 filp->f_path.dentry->d_name.name,
129 offset, whence);
130 124
131 /* 125 /*
132 * whence == SEEK_END || SEEK_DATA || SEEK_HOLE => we must revalidate 126 * whence == SEEK_END || SEEK_DATA || SEEK_HOLE => we must revalidate
@@ -150,12 +144,9 @@ EXPORT_SYMBOL_GPL(nfs_file_llseek);
150int 144int
151nfs_file_flush(struct file *file, fl_owner_t id) 145nfs_file_flush(struct file *file, fl_owner_t id)
152{ 146{
153 struct dentry *dentry = file->f_path.dentry; 147 struct inode *inode = file_inode(file);
154 struct inode *inode = dentry->d_inode;
155 148
156 dprintk("NFS: flush(%s/%s)\n", 149 dprintk("NFS: flush(%pD2)\n", file);
157 dentry->d_parent->d_name.name,
158 dentry->d_name.name);
159 150
160 nfs_inc_stats(inode, NFSIOS_VFSFLUSH); 151 nfs_inc_stats(inode, NFSIOS_VFSFLUSH);
161 if ((file->f_mode & FMODE_WRITE) == 0) 152 if ((file->f_mode & FMODE_WRITE) == 0)
@@ -177,15 +168,14 @@ ssize_t
177nfs_file_read(struct kiocb *iocb, const struct iovec *iov, 168nfs_file_read(struct kiocb *iocb, const struct iovec *iov,
178 unsigned long nr_segs, loff_t pos) 169 unsigned long nr_segs, loff_t pos)
179{ 170{
180 struct dentry * dentry = iocb->ki_filp->f_path.dentry; 171 struct inode *inode = file_inode(iocb->ki_filp);
181 struct inode * inode = dentry->d_inode;
182 ssize_t result; 172 ssize_t result;
183 173
184 if (iocb->ki_filp->f_flags & O_DIRECT) 174 if (iocb->ki_filp->f_flags & O_DIRECT)
185 return nfs_file_direct_read(iocb, iov, nr_segs, pos, true); 175 return nfs_file_direct_read(iocb, iov, nr_segs, pos, true);
186 176
187 dprintk("NFS: read(%s/%s, %lu@%lu)\n", 177 dprintk("NFS: read(%pD2, %lu@%lu)\n",
188 dentry->d_parent->d_name.name, dentry->d_name.name, 178 iocb->ki_filp,
189 (unsigned long) iov_length(iov, nr_segs), (unsigned long) pos); 179 (unsigned long) iov_length(iov, nr_segs), (unsigned long) pos);
190 180
191 result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping); 181 result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping);
@@ -203,13 +193,11 @@ nfs_file_splice_read(struct file *filp, loff_t *ppos,
203 struct pipe_inode_info *pipe, size_t count, 193 struct pipe_inode_info *pipe, size_t count,
204 unsigned int flags) 194 unsigned int flags)
205{ 195{
206 struct dentry *dentry = filp->f_path.dentry; 196 struct inode *inode = file_inode(filp);
207 struct inode *inode = dentry->d_inode;
208 ssize_t res; 197 ssize_t res;
209 198
210 dprintk("NFS: splice_read(%s/%s, %lu@%Lu)\n", 199 dprintk("NFS: splice_read(%pD2, %lu@%Lu)\n",
211 dentry->d_parent->d_name.name, dentry->d_name.name, 200 filp, (unsigned long) count, (unsigned long long) *ppos);
212 (unsigned long) count, (unsigned long long) *ppos);
213 201
214 res = nfs_revalidate_mapping(inode, filp->f_mapping); 202 res = nfs_revalidate_mapping(inode, filp->f_mapping);
215 if (!res) { 203 if (!res) {
@@ -224,12 +212,10 @@ EXPORT_SYMBOL_GPL(nfs_file_splice_read);
224int 212int
225nfs_file_mmap(struct file * file, struct vm_area_struct * vma) 213nfs_file_mmap(struct file * file, struct vm_area_struct * vma)
226{ 214{
227 struct dentry *dentry = file->f_path.dentry; 215 struct inode *inode = file_inode(file);
228 struct inode *inode = dentry->d_inode;
229 int status; 216 int status;
230 217
231 dprintk("NFS: mmap(%s/%s)\n", 218 dprintk("NFS: mmap(%pD2)\n", file);
232 dentry->d_parent->d_name.name, dentry->d_name.name);
233 219
234 /* Note: generic_file_mmap() returns ENOSYS on nommu systems 220 /* Note: generic_file_mmap() returns ENOSYS on nommu systems
235 * so we call that before revalidating the mapping 221 * so we call that before revalidating the mapping
@@ -258,15 +244,12 @@ EXPORT_SYMBOL_GPL(nfs_file_mmap);
258int 244int
259nfs_file_fsync_commit(struct file *file, loff_t start, loff_t end, int datasync) 245nfs_file_fsync_commit(struct file *file, loff_t start, loff_t end, int datasync)
260{ 246{
261 struct dentry *dentry = file->f_path.dentry;
262 struct nfs_open_context *ctx = nfs_file_open_context(file); 247 struct nfs_open_context *ctx = nfs_file_open_context(file);
263 struct inode *inode = dentry->d_inode; 248 struct inode *inode = file_inode(file);
264 int have_error, do_resend, status; 249 int have_error, do_resend, status;
265 int ret = 0; 250 int ret = 0;
266 251
267 dprintk("NFS: fsync file(%s/%s) datasync %d\n", 252 dprintk("NFS: fsync file(%pD2) datasync %d\n", file, datasync);
268 dentry->d_parent->d_name.name, dentry->d_name.name,
269 datasync);
270 253
271 nfs_inc_stats(inode, NFSIOS_VFSFSYNC); 254 nfs_inc_stats(inode, NFSIOS_VFSFSYNC);
272 do_resend = test_and_clear_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags); 255 do_resend = test_and_clear_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags);
@@ -371,10 +354,8 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping,
371 struct page *page; 354 struct page *page;
372 int once_thru = 0; 355 int once_thru = 0;
373 356
374 dfprintk(PAGECACHE, "NFS: write_begin(%s/%s(%ld), %u@%lld)\n", 357 dfprintk(PAGECACHE, "NFS: write_begin(%pD2(%ld), %u@%lld)\n",
375 file->f_path.dentry->d_parent->d_name.name, 358 file, mapping->host->i_ino, len, (long long) pos);
376 file->f_path.dentry->d_name.name,
377 mapping->host->i_ino, len, (long long) pos);
378 359
379start: 360start:
380 /* 361 /*
@@ -414,10 +395,8 @@ static int nfs_write_end(struct file *file, struct address_space *mapping,
414 struct nfs_open_context *ctx = nfs_file_open_context(file); 395 struct nfs_open_context *ctx = nfs_file_open_context(file);
415 int status; 396 int status;
416 397
417 dfprintk(PAGECACHE, "NFS: write_end(%s/%s(%ld), %u@%lld)\n", 398 dfprintk(PAGECACHE, "NFS: write_end(%pD2(%ld), %u@%lld)\n",
418 file->f_path.dentry->d_parent->d_name.name, 399 file, mapping->host->i_ino, len, (long long) pos);
419 file->f_path.dentry->d_name.name,
420 mapping->host->i_ino, len, (long long) pos);
421 400
422 /* 401 /*
423 * Zero any uninitialised parts of the page, and then mark the page 402 * Zero any uninitialised parts of the page, and then mark the page
@@ -601,22 +580,21 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
601{ 580{
602 struct page *page = vmf->page; 581 struct page *page = vmf->page;
603 struct file *filp = vma->vm_file; 582 struct file *filp = vma->vm_file;
604 struct dentry *dentry = filp->f_path.dentry; 583 struct inode *inode = file_inode(filp);
605 unsigned pagelen; 584 unsigned pagelen;
606 int ret = VM_FAULT_NOPAGE; 585 int ret = VM_FAULT_NOPAGE;
607 struct address_space *mapping; 586 struct address_space *mapping;
608 587
609 dfprintk(PAGECACHE, "NFS: vm_page_mkwrite(%s/%s(%ld), offset %lld)\n", 588 dfprintk(PAGECACHE, "NFS: vm_page_mkwrite(%pD2(%ld), offset %lld)\n",
610 dentry->d_parent->d_name.name, dentry->d_name.name, 589 filp, filp->f_mapping->host->i_ino,
611 filp->f_mapping->host->i_ino,
612 (long long)page_offset(page)); 590 (long long)page_offset(page));
613 591
614 /* make sure the cache has finished storing the page */ 592 /* make sure the cache has finished storing the page */
615 nfs_fscache_wait_on_page_write(NFS_I(dentry->d_inode), page); 593 nfs_fscache_wait_on_page_write(NFS_I(inode), page);
616 594
617 lock_page(page); 595 lock_page(page);
618 mapping = page_file_mapping(page); 596 mapping = page_file_mapping(page);
619 if (mapping != dentry->d_inode->i_mapping) 597 if (mapping != inode->i_mapping)
620 goto out_unlock; 598 goto out_unlock;
621 599
622 wait_on_page_writeback(page); 600 wait_on_page_writeback(page);
@@ -659,22 +637,21 @@ static int nfs_need_sync_write(struct file *filp, struct inode *inode)
659ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov, 637ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov,
660 unsigned long nr_segs, loff_t pos) 638 unsigned long nr_segs, loff_t pos)
661{ 639{
662 struct dentry * dentry = iocb->ki_filp->f_path.dentry; 640 struct file *file = iocb->ki_filp;
663 struct inode * inode = dentry->d_inode; 641 struct inode *inode = file_inode(file);
664 unsigned long written = 0; 642 unsigned long written = 0;
665 ssize_t result; 643 ssize_t result;
666 size_t count = iov_length(iov, nr_segs); 644 size_t count = iov_length(iov, nr_segs);
667 645
668 result = nfs_key_timeout_notify(iocb->ki_filp, inode); 646 result = nfs_key_timeout_notify(file, inode);
669 if (result) 647 if (result)
670 return result; 648 return result;
671 649
672 if (iocb->ki_filp->f_flags & O_DIRECT) 650 if (file->f_flags & O_DIRECT)
673 return nfs_file_direct_write(iocb, iov, nr_segs, pos, true); 651 return nfs_file_direct_write(iocb, iov, nr_segs, pos, true);
674 652
675 dprintk("NFS: write(%s/%s, %lu@%Ld)\n", 653 dprintk("NFS: write(%pD2, %lu@%Ld)\n",
676 dentry->d_parent->d_name.name, dentry->d_name.name, 654 file, (unsigned long) count, (long long) pos);
677 (unsigned long) count, (long long) pos);
678 655
679 result = -EBUSY; 656 result = -EBUSY;
680 if (IS_SWAPFILE(inode)) 657 if (IS_SWAPFILE(inode))
@@ -682,8 +659,8 @@ ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov,
682 /* 659 /*
683 * O_APPEND implies that we must revalidate the file length. 660 * O_APPEND implies that we must revalidate the file length.
684 */ 661 */
685 if (iocb->ki_filp->f_flags & O_APPEND) { 662 if (file->f_flags & O_APPEND) {
686 result = nfs_revalidate_file_size(inode, iocb->ki_filp); 663 result = nfs_revalidate_file_size(inode, file);
687 if (result) 664 if (result)
688 goto out; 665 goto out;
689 } 666 }
@@ -697,8 +674,8 @@ ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov,
697 written = result; 674 written = result;
698 675
699 /* Return error values for O_DSYNC and IS_SYNC() */ 676 /* Return error values for O_DSYNC and IS_SYNC() */
700 if (result >= 0 && nfs_need_sync_write(iocb->ki_filp, inode)) { 677 if (result >= 0 && nfs_need_sync_write(file, inode)) {
701 int err = vfs_fsync(iocb->ki_filp, 0); 678 int err = vfs_fsync(file, 0);
702 if (err < 0) 679 if (err < 0)
703 result = err; 680 result = err;
704 } 681 }
@@ -717,14 +694,12 @@ ssize_t nfs_file_splice_write(struct pipe_inode_info *pipe,
717 struct file *filp, loff_t *ppos, 694 struct file *filp, loff_t *ppos,
718 size_t count, unsigned int flags) 695 size_t count, unsigned int flags)
719{ 696{
720 struct dentry *dentry = filp->f_path.dentry; 697 struct inode *inode = file_inode(filp);
721 struct inode *inode = dentry->d_inode;
722 unsigned long written = 0; 698 unsigned long written = 0;
723 ssize_t ret; 699 ssize_t ret;
724 700
725 dprintk("NFS splice_write(%s/%s, %lu@%llu)\n", 701 dprintk("NFS splice_write(%pD2, %lu@%llu)\n",
726 dentry->d_parent->d_name.name, dentry->d_name.name, 702 filp, (unsigned long) count, (unsigned long long) *ppos);
727 (unsigned long) count, (unsigned long long) *ppos);
728 703
729 /* 704 /*
730 * The combination of splice and an O_APPEND destination is disallowed. 705 * The combination of splice and an O_APPEND destination is disallowed.
@@ -883,10 +858,8 @@ int nfs_lock(struct file *filp, int cmd, struct file_lock *fl)
883 int ret = -ENOLCK; 858 int ret = -ENOLCK;
884 int is_local = 0; 859 int is_local = 0;
885 860
886 dprintk("NFS: lock(%s/%s, t=%x, fl=%x, r=%lld:%lld)\n", 861 dprintk("NFS: lock(%pD2, t=%x, fl=%x, r=%lld:%lld)\n",
887 filp->f_path.dentry->d_parent->d_name.name, 862 filp, fl->fl_type, fl->fl_flags,
888 filp->f_path.dentry->d_name.name,
889 fl->fl_type, fl->fl_flags,
890 (long long)fl->fl_start, (long long)fl->fl_end); 863 (long long)fl->fl_start, (long long)fl->fl_end);
891 864
892 nfs_inc_stats(inode, NFSIOS_VFSLOCK); 865 nfs_inc_stats(inode, NFSIOS_VFSLOCK);
@@ -923,10 +896,8 @@ int nfs_flock(struct file *filp, int cmd, struct file_lock *fl)
923 struct inode *inode = filp->f_mapping->host; 896 struct inode *inode = filp->f_mapping->host;
924 int is_local = 0; 897 int is_local = 0;
925 898
926 dprintk("NFS: flock(%s/%s, t=%x, fl=%x)\n", 899 dprintk("NFS: flock(%pD2, t=%x, fl=%x)\n",
927 filp->f_path.dentry->d_parent->d_name.name, 900 filp, fl->fl_type, fl->fl_flags);
928 filp->f_path.dentry->d_name.name,
929 fl->fl_type, fl->fl_flags);
930 901
931 if (!(fl->fl_flags & FL_FLOCK)) 902 if (!(fl->fl_flags & FL_FLOCK))
932 return -ENOLCK; 903 return -ENOLCK;
@@ -960,9 +931,7 @@ EXPORT_SYMBOL_GPL(nfs_flock);
960 */ 931 */
961int nfs_setlease(struct file *file, long arg, struct file_lock **fl) 932int nfs_setlease(struct file *file, long arg, struct file_lock **fl)
962{ 933{
963 dprintk("NFS: setlease(%s/%s, arg=%ld)\n", 934 dprintk("NFS: setlease(%pD2, arg=%ld)\n", file, arg);
964 file->f_path.dentry->d_parent->d_name.name,
965 file->f_path.dentry->d_name.name, arg);
966 return -EINVAL; 935 return -EINVAL;
967} 936}
968EXPORT_SYMBOL_GPL(nfs_setlease); 937EXPORT_SYMBOL_GPL(nfs_setlease);
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 348b535cd786..b5a0afc3ee10 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -253,9 +253,8 @@ struct vfsmount *nfs_do_submount(struct dentry *dentry, struct nfs_fh *fh,
253 253
254 dprintk("--> nfs_do_submount()\n"); 254 dprintk("--> nfs_do_submount()\n");
255 255
256 dprintk("%s: submounting on %s/%s\n", __func__, 256 dprintk("%s: submounting on %pd2\n", __func__,
257 dentry->d_parent->d_name.name, 257 dentry);
258 dentry->d_name.name);
259 if (page == NULL) 258 if (page == NULL)
260 goto out; 259 goto out;
261 devname = nfs_devname(dentry, page, PAGE_SIZE); 260 devname = nfs_devname(dentry, page, PAGE_SIZE);
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 90cb10d7b693..01b6f6a49d16 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -321,7 +321,7 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
321 umode_t mode = sattr->ia_mode; 321 umode_t mode = sattr->ia_mode;
322 int status = -ENOMEM; 322 int status = -ENOMEM;
323 323
324 dprintk("NFS call create %s\n", dentry->d_name.name); 324 dprintk("NFS call create %pd\n", dentry);
325 325
326 data = nfs3_alloc_createdata(); 326 data = nfs3_alloc_createdata();
327 if (data == NULL) 327 if (data == NULL)
@@ -548,7 +548,7 @@ nfs3_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page,
548 if (len > NFS3_MAXPATHLEN) 548 if (len > NFS3_MAXPATHLEN)
549 return -ENAMETOOLONG; 549 return -ENAMETOOLONG;
550 550
551 dprintk("NFS call symlink %s\n", dentry->d_name.name); 551 dprintk("NFS call symlink %pd\n", dentry);
552 552
553 data = nfs3_alloc_createdata(); 553 data = nfs3_alloc_createdata();
554 if (data == NULL) 554 if (data == NULL)
@@ -576,7 +576,7 @@ nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
576 umode_t mode = sattr->ia_mode; 576 umode_t mode = sattr->ia_mode;
577 int status = -ENOMEM; 577 int status = -ENOMEM;
578 578
579 dprintk("NFS call mkdir %s\n", dentry->d_name.name); 579 dprintk("NFS call mkdir %pd\n", dentry);
580 580
581 sattr->ia_mode &= ~current_umask(); 581 sattr->ia_mode &= ~current_umask();
582 582
@@ -695,7 +695,7 @@ nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
695 umode_t mode = sattr->ia_mode; 695 umode_t mode = sattr->ia_mode;
696 int status = -ENOMEM; 696 int status = -ENOMEM;
697 697
698 dprintk("NFS call mknod %s %u:%u\n", dentry->d_name.name, 698 dprintk("NFS call mknod %pd %u:%u\n", dentry,
699 MAJOR(rdev), MINOR(rdev)); 699 MAJOR(rdev), MINOR(rdev));
700 700
701 sattr->ia_mode &= ~current_umask(); 701 sattr->ia_mode &= ~current_umask();
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index 1f01b55692ee..8de3407e0360 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -31,9 +31,7 @@ nfs4_file_open(struct inode *inode, struct file *filp)
31 * -EOPENSTALE. The VFS will retry the lookup/create/open. 31 * -EOPENSTALE. The VFS will retry the lookup/create/open.
32 */ 32 */
33 33
34 dprintk("NFS: open file(%s/%s)\n", 34 dprintk("NFS: open file(%pd2)\n", dentry);
35 dentry->d_parent->d_name.name,
36 dentry->d_name.name);
37 35
38 if ((openflags & O_ACCMODE) == 3) 36 if ((openflags & O_ACCMODE) == 3)
39 openflags--; 37 openflags--;
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c
index c08cbf40c59e..4e7f05d3e9db 100644
--- a/fs/nfs/nfs4namespace.c
+++ b/fs/nfs/nfs4namespace.c
@@ -292,8 +292,7 @@ static struct vfsmount *nfs_follow_referral(struct dentry *dentry,
292 if (locations == NULL || locations->nlocations <= 0) 292 if (locations == NULL || locations->nlocations <= 0)
293 goto out; 293 goto out;
294 294
295 dprintk("%s: referral at %s/%s\n", __func__, 295 dprintk("%s: referral at %pd2\n", __func__, dentry);
296 dentry->d_parent->d_name.name, dentry->d_name.name);
297 296
298 page = (char *) __get_free_page(GFP_USER); 297 page = (char *) __get_free_page(GFP_USER);
299 if (!page) 298 if (!page)
@@ -357,8 +356,8 @@ static struct vfsmount *nfs_do_refmount(struct rpc_clnt *client, struct dentry *
357 mnt = ERR_PTR(-ENOENT); 356 mnt = ERR_PTR(-ENOENT);
358 357
359 parent = dget_parent(dentry); 358 parent = dget_parent(dentry);
360 dprintk("%s: getting locations for %s/%s\n", 359 dprintk("%s: getting locations for %pd2\n",
361 __func__, parent->d_name.name, dentry->d_name.name); 360 __func__, dentry);
362 361
363 err = nfs4_proc_fs_locations(client, parent->d_inode, &dentry->d_name, fs_locations, page); 362 err = nfs4_proc_fs_locations(client, parent->d_inode, &dentry->d_name, fs_locations, page);
364 dput(parent); 363 dput(parent);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 5ab33c0792df..659990c0109e 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3771,9 +3771,8 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
3771 }; 3771 };
3772 int status; 3772 int status;
3773 3773
3774 dprintk("%s: dentry = %s/%s, cookie = %Lu\n", __func__, 3774 dprintk("%s: dentry = %pd2, cookie = %Lu\n", __func__,
3775 dentry->d_parent->d_name.name, 3775 dentry,
3776 dentry->d_name.name,
3777 (unsigned long long)cookie); 3776 (unsigned long long)cookie);
3778 nfs4_setup_readdir(cookie, NFS_I(dir)->cookieverf, dentry, &args); 3777 nfs4_setup_readdir(cookie, NFS_I(dir)->cookieverf, dentry, &args);
3779 res.pgbase = args.pgbase; 3778 res.pgbase = args.pgbase;
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index a8f57c728df5..fddbba2d9eff 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -235,7 +235,7 @@ nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
235 }; 235 };
236 int status = -ENOMEM; 236 int status = -ENOMEM;
237 237
238 dprintk("NFS call create %s\n", dentry->d_name.name); 238 dprintk("NFS call create %pd\n", dentry);
239 data = nfs_alloc_createdata(dir, dentry, sattr); 239 data = nfs_alloc_createdata(dir, dentry, sattr);
240 if (data == NULL) 240 if (data == NULL)
241 goto out; 241 goto out;
@@ -265,7 +265,7 @@ nfs_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
265 umode_t mode; 265 umode_t mode;
266 int status = -ENOMEM; 266 int status = -ENOMEM;
267 267
268 dprintk("NFS call mknod %s\n", dentry->d_name.name); 268 dprintk("NFS call mknod %pd\n", dentry);
269 269
270 mode = sattr->ia_mode; 270 mode = sattr->ia_mode;
271 if (S_ISFIFO(mode)) { 271 if (S_ISFIFO(mode)) {
@@ -423,7 +423,7 @@ nfs_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page,
423 }; 423 };
424 int status = -ENAMETOOLONG; 424 int status = -ENAMETOOLONG;
425 425
426 dprintk("NFS call symlink %s\n", dentry->d_name.name); 426 dprintk("NFS call symlink %pd\n", dentry);
427 427
428 if (len > NFS2_MAXPATHLEN) 428 if (len > NFS2_MAXPATHLEN)
429 goto out; 429 goto out;
@@ -462,7 +462,7 @@ nfs_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
462 }; 462 };
463 int status = -ENOMEM; 463 int status = -ENOMEM;
464 464
465 dprintk("NFS call mkdir %s\n", dentry->d_name.name); 465 dprintk("NFS call mkdir %pd\n", dentry);
466 data = nfs_alloc_createdata(dir, dentry, sattr); 466 data = nfs_alloc_createdata(dir, dentry, sattr);
467 if (data == NULL) 467 if (data == NULL)
468 goto out; 468 goto out;
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 0c29b1bb3936..11d78944de79 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -495,9 +495,8 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry)
495 struct rpc_task *task; 495 struct rpc_task *task;
496 int error = -EBUSY; 496 int error = -EBUSY;
497 497
498 dfprintk(VFS, "NFS: silly-rename(%s/%s, ct=%d)\n", 498 dfprintk(VFS, "NFS: silly-rename(%pd2, ct=%d)\n",
499 dentry->d_parent->d_name.name, dentry->d_name.name, 499 dentry, d_count(dentry));
500 d_count(dentry));
501 nfs_inc_stats(dir, NFSIOS_SILLYRENAME); 500 nfs_inc_stats(dir, NFSIOS_SILLYRENAME);
502 501
503 /* 502 /*
@@ -521,8 +520,8 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry)
521 SILLYNAME_FILEID_LEN, fileid, 520 SILLYNAME_FILEID_LEN, fileid,
522 SILLYNAME_COUNTER_LEN, sillycounter); 521 SILLYNAME_COUNTER_LEN, sillycounter);
523 522
524 dfprintk(VFS, "NFS: trying to rename %s to %s\n", 523 dfprintk(VFS, "NFS: trying to rename %pd to %s\n",
525 dentry->d_name.name, silly); 524 dentry, silly);
526 525
527 sdentry = lookup_one_len(silly, dentry->d_parent, slen); 526 sdentry = lookup_one_len(silly, dentry->d_parent, slen);
528 /* 527 /*
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index ac1dc331ba31..c1d548211c31 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -954,10 +954,8 @@ int nfs_updatepage(struct file *file, struct page *page,
954 954
955 nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE); 955 nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE);
956 956
957 dprintk("NFS: nfs_updatepage(%s/%s %d@%lld)\n", 957 dprintk("NFS: nfs_updatepage(%pD2 %d@%lld)\n",
958 file->f_path.dentry->d_parent->d_name.name, 958 file, count, (long long)(page_file_offset(page) + offset));
959 file->f_path.dentry->d_name.name, count,
960 (long long)(page_file_offset(page) + offset));
961 959
962 if (nfs_can_extend_write(file, page, inode)) { 960 if (nfs_can_extend_write(file, page, inode)) {
963 count = max(count + offset, nfs_page_length(page)); 961 count = max(count + offset, nfs_page_length(page));
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index e0a65a9e37e9..9c271f42604a 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -385,8 +385,8 @@ purge_old(struct dentry *parent, struct dentry *child, struct nfsd_net *nn)
385 385
386 status = vfs_rmdir(parent->d_inode, child); 386 status = vfs_rmdir(parent->d_inode, child);
387 if (status) 387 if (status)
388 printk("failed to remove client recovery directory %s\n", 388 printk("failed to remove client recovery directory %pd\n",
389 child->d_name.name); 389 child);
390 /* Keep trying, success or failure: */ 390 /* Keep trying, success or failure: */
391 return 0; 391 return 0;
392} 392}
@@ -410,15 +410,15 @@ out:
410 nfs4_release_reclaim(nn); 410 nfs4_release_reclaim(nn);
411 if (status) 411 if (status)
412 printk("nfsd4: failed to purge old clients from recovery" 412 printk("nfsd4: failed to purge old clients from recovery"
413 " directory %s\n", nn->rec_file->f_path.dentry->d_name.name); 413 " directory %pD\n", nn->rec_file);
414} 414}
415 415
416static int 416static int
417load_recdir(struct dentry *parent, struct dentry *child, struct nfsd_net *nn) 417load_recdir(struct dentry *parent, struct dentry *child, struct nfsd_net *nn)
418{ 418{
419 if (child->d_name.len != HEXDIR_LEN - 1) { 419 if (child->d_name.len != HEXDIR_LEN - 1) {
420 printk("nfsd4: illegal name %s in recovery directory\n", 420 printk("nfsd4: illegal name %pd in recovery directory\n",
421 child->d_name.name); 421 child);
422 /* Keep trying; maybe the others are OK: */ 422 /* Keep trying; maybe the others are OK: */
423 return 0; 423 return 0;
424 } 424 }
@@ -437,7 +437,7 @@ nfsd4_recdir_load(struct net *net) {
437 status = nfsd4_list_rec_dir(load_recdir, nn); 437 status = nfsd4_list_rec_dir(load_recdir, nn);
438 if (status) 438 if (status)
439 printk("nfsd4: failed loading clients from recovery" 439 printk("nfsd4: failed loading clients from recovery"
440 " directory %s\n", nn->rec_file->f_path.dentry->d_name.name); 440 " directory %pD\n", nn->rec_file);
441 return status; 441 return status;
442} 442}
443 443
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 0874998a49cd..f36a30a9f2d1 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -3008,7 +3008,7 @@ static struct file_lock *nfs4_alloc_init_lease(struct nfs4_delegation *dp, int f
3008 return NULL; 3008 return NULL;
3009 locks_init_lock(fl); 3009 locks_init_lock(fl);
3010 fl->fl_lmops = &nfsd_lease_mng_ops; 3010 fl->fl_lmops = &nfsd_lease_mng_ops;
3011 fl->fl_flags = FL_LEASE; 3011 fl->fl_flags = FL_DELEG;
3012 fl->fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK; 3012 fl->fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK;
3013 fl->fl_end = OFFSET_MAX; 3013 fl->fl_end = OFFSET_MAX;
3014 fl->fl_owner = (fl_owner_t)(dp->dl_file); 3014 fl->fl_owner = (fl_owner_t)(dp->dl_file);
@@ -3843,9 +3843,8 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
3843 struct nfs4_ol_stateid *stp; 3843 struct nfs4_ol_stateid *stp;
3844 struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); 3844 struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
3845 3845
3846 dprintk("NFSD: nfsd4_open_confirm on file %.*s\n", 3846 dprintk("NFSD: nfsd4_open_confirm on file %pd\n",
3847 (int)cstate->current_fh.fh_dentry->d_name.len, 3847 cstate->current_fh.fh_dentry);
3848 cstate->current_fh.fh_dentry->d_name.name);
3849 3848
3850 status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0); 3849 status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0);
3851 if (status) 3850 if (status)
@@ -3922,9 +3921,8 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp,
3922 struct nfs4_ol_stateid *stp; 3921 struct nfs4_ol_stateid *stp;
3923 struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); 3922 struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
3924 3923
3925 dprintk("NFSD: nfsd4_open_downgrade on file %.*s\n", 3924 dprintk("NFSD: nfsd4_open_downgrade on file %pd\n",
3926 (int)cstate->current_fh.fh_dentry->d_name.len, 3925 cstate->current_fh.fh_dentry);
3927 cstate->current_fh.fh_dentry->d_name.name);
3928 3926
3929 /* We don't yet support WANT bits: */ 3927 /* We don't yet support WANT bits: */
3930 if (od->od_deleg_want) 3928 if (od->od_deleg_want)
@@ -3980,9 +3978,8 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
3980 struct net *net = SVC_NET(rqstp); 3978 struct net *net = SVC_NET(rqstp);
3981 struct nfsd_net *nn = net_generic(net, nfsd_net_id); 3979 struct nfsd_net *nn = net_generic(net, nfsd_net_id);
3982 3980
3983 dprintk("NFSD: nfsd4_close on file %.*s\n", 3981 dprintk("NFSD: nfsd4_close on file %pd\n",
3984 (int)cstate->current_fh.fh_dentry->d_name.len, 3982 cstate->current_fh.fh_dentry);
3985 cstate->current_fh.fh_dentry->d_name.name);
3986 3983
3987 nfs4_lock_state(); 3984 nfs4_lock_state();
3988 status = nfs4_preprocess_seqid_op(cstate, close->cl_seqid, 3985 status = nfs4_preprocess_seqid_op(cstate, close->cl_seqid,
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index 814afaa4458a..3d0e15ae6f72 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -47,7 +47,7 @@ static int nfsd_acceptable(void *expv, struct dentry *dentry)
47 tdentry = parent; 47 tdentry = parent;
48 } 48 }
49 if (tdentry != exp->ex_path.dentry) 49 if (tdentry != exp->ex_path.dentry)
50 dprintk("nfsd_acceptable failed at %p %s\n", tdentry, tdentry->d_name.name); 50 dprintk("nfsd_acceptable failed at %p %pd\n", tdentry, tdentry);
51 rv = (tdentry == exp->ex_path.dentry); 51 rv = (tdentry == exp->ex_path.dentry);
52 dput(tdentry); 52 dput(tdentry);
53 return rv; 53 return rv;
@@ -253,8 +253,8 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp)
253 253
254 if (S_ISDIR(dentry->d_inode->i_mode) && 254 if (S_ISDIR(dentry->d_inode->i_mode) &&
255 (dentry->d_flags & DCACHE_DISCONNECTED)) { 255 (dentry->d_flags & DCACHE_DISCONNECTED)) {
256 printk("nfsd: find_fh_dentry returned a DISCONNECTED directory: %s/%s\n", 256 printk("nfsd: find_fh_dentry returned a DISCONNECTED directory: %pd2\n",
257 dentry->d_parent->d_name.name, dentry->d_name.name); 257 dentry);
258 } 258 }
259 259
260 fhp->fh_dentry = dentry; 260 fhp->fh_dentry = dentry;
@@ -361,10 +361,9 @@ skip_pseudoflavor_check:
361 error = nfsd_permission(rqstp, exp, dentry, access); 361 error = nfsd_permission(rqstp, exp, dentry, access);
362 362
363 if (error) { 363 if (error) {
364 dprintk("fh_verify: %s/%s permission failure, " 364 dprintk("fh_verify: %pd2 permission failure, "
365 "acc=%x, error=%d\n", 365 "acc=%x, error=%d\n",
366 dentry->d_parent->d_name.name, 366 dentry,
367 dentry->d_name.name,
368 access, ntohl(error)); 367 access, ntohl(error));
369 } 368 }
370out: 369out:
@@ -514,14 +513,13 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
514 */ 513 */
515 514
516 struct inode * inode = dentry->d_inode; 515 struct inode * inode = dentry->d_inode;
517 struct dentry *parent = dentry->d_parent;
518 __u32 *datap; 516 __u32 *datap;
519 dev_t ex_dev = exp_sb(exp)->s_dev; 517 dev_t ex_dev = exp_sb(exp)->s_dev;
520 518
521 dprintk("nfsd: fh_compose(exp %02x:%02x/%ld %s/%s, ino=%ld)\n", 519 dprintk("nfsd: fh_compose(exp %02x:%02x/%ld %pd2, ino=%ld)\n",
522 MAJOR(ex_dev), MINOR(ex_dev), 520 MAJOR(ex_dev), MINOR(ex_dev),
523 (long) exp->ex_path.dentry->d_inode->i_ino, 521 (long) exp->ex_path.dentry->d_inode->i_ino,
524 parent->d_name.name, dentry->d_name.name, 522 dentry,
525 (inode ? inode->i_ino : 0)); 523 (inode ? inode->i_ino : 0));
526 524
527 /* Choose filehandle version and fsid type based on 525 /* Choose filehandle version and fsid type based on
@@ -534,13 +532,13 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
534 fh_put(ref_fh); 532 fh_put(ref_fh);
535 533
536 if (fhp->fh_locked || fhp->fh_dentry) { 534 if (fhp->fh_locked || fhp->fh_dentry) {
537 printk(KERN_ERR "fh_compose: fh %s/%s not initialized!\n", 535 printk(KERN_ERR "fh_compose: fh %pd2 not initialized!\n",
538 parent->d_name.name, dentry->d_name.name); 536 dentry);
539 } 537 }
540 if (fhp->fh_maxsize < NFS_FHSIZE) 538 if (fhp->fh_maxsize < NFS_FHSIZE)
541 printk(KERN_ERR "fh_compose: called with maxsize %d! %s/%s\n", 539 printk(KERN_ERR "fh_compose: called with maxsize %d! %pd2\n",
542 fhp->fh_maxsize, 540 fhp->fh_maxsize,
543 parent->d_name.name, dentry->d_name.name); 541 dentry);
544 542
545 fhp->fh_dentry = dget(dentry); /* our internal copy */ 543 fhp->fh_dentry = dget(dentry); /* our internal copy */
546 fhp->fh_export = exp; 544 fhp->fh_export = exp;
@@ -613,8 +611,8 @@ out_bad:
613 printk(KERN_ERR "fh_update: fh not verified!\n"); 611 printk(KERN_ERR "fh_update: fh not verified!\n");
614 goto out; 612 goto out;
615out_negative: 613out_negative:
616 printk(KERN_ERR "fh_update: %s/%s still negative!\n", 614 printk(KERN_ERR "fh_update: %pd2 still negative!\n",
617 dentry->d_parent->d_name.name, dentry->d_name.name); 615 dentry);
618 goto out; 616 goto out;
619} 617}
620 618
diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h
index e5e6707ba687..4775bc4896c8 100644
--- a/fs/nfsd/nfsfh.h
+++ b/fs/nfsd/nfsfh.h
@@ -173,8 +173,8 @@ fh_lock_nested(struct svc_fh *fhp, unsigned int subclass)
173 BUG_ON(!dentry); 173 BUG_ON(!dentry);
174 174
175 if (fhp->fh_locked) { 175 if (fhp->fh_locked) {
176 printk(KERN_WARNING "fh_lock: %s/%s already locked!\n", 176 printk(KERN_WARNING "fh_lock: %pd2 already locked!\n",
177 dentry->d_parent->d_name.name, dentry->d_name.name); 177 dentry);
178 return; 178 return;
179 } 179 }
180 180
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index c827acb0e943..94b5f5d2bfed 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -427,7 +427,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
427 goto out_nfserr; 427 goto out_nfserr;
428 fh_lock(fhp); 428 fh_lock(fhp);
429 429
430 host_err = notify_change(dentry, iap); 430 host_err = notify_change(dentry, iap, NULL);
431 err = nfserrno(host_err); 431 err = nfserrno(host_err);
432 fh_unlock(fhp); 432 fh_unlock(fhp);
433 } 433 }
@@ -988,7 +988,11 @@ static void kill_suid(struct dentry *dentry)
988 ia.ia_valid = ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV; 988 ia.ia_valid = ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV;
989 989
990 mutex_lock(&dentry->d_inode->i_mutex); 990 mutex_lock(&dentry->d_inode->i_mutex);
991 notify_change(dentry, &ia); 991 /*
992 * Note we call this on write, so notify_change will not
993 * encounter any conflicting delegations:
994 */
995 notify_change(dentry, &ia, NULL);
992 mutex_unlock(&dentry->d_inode->i_mutex); 996 mutex_unlock(&dentry->d_inode->i_mutex);
993} 997}
994 998
@@ -1317,9 +1321,8 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
1317 if (!fhp->fh_locked) { 1321 if (!fhp->fh_locked) {
1318 /* not actually possible */ 1322 /* not actually possible */
1319 printk(KERN_ERR 1323 printk(KERN_ERR
1320 "nfsd_create: parent %s/%s not locked!\n", 1324 "nfsd_create: parent %pd2 not locked!\n",
1321 dentry->d_parent->d_name.name, 1325 dentry);
1322 dentry->d_name.name);
1323 err = nfserr_io; 1326 err = nfserr_io;
1324 goto out; 1327 goto out;
1325 } 1328 }
@@ -1329,8 +1332,8 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
1329 */ 1332 */
1330 err = nfserr_exist; 1333 err = nfserr_exist;
1331 if (dchild->d_inode) { 1334 if (dchild->d_inode) {
1332 dprintk("nfsd_create: dentry %s/%s not negative!\n", 1335 dprintk("nfsd_create: dentry %pd/%pd not negative!\n",
1333 dentry->d_name.name, dchild->d_name.name); 1336 dentry, dchild);
1334 goto out; 1337 goto out;
1335 } 1338 }
1336 1339
@@ -1737,7 +1740,7 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
1737 err = nfserrno(host_err); 1740 err = nfserrno(host_err);
1738 goto out_dput; 1741 goto out_dput;
1739 } 1742 }
1740 host_err = vfs_link(dold, dirp, dnew); 1743 host_err = vfs_link(dold, dirp, dnew, NULL);
1741 if (!host_err) { 1744 if (!host_err) {
1742 err = nfserrno(commit_metadata(ffhp)); 1745 err = nfserrno(commit_metadata(ffhp));
1743 if (!err) 1746 if (!err)
@@ -1838,7 +1841,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
1838 if (host_err) 1841 if (host_err)
1839 goto out_dput_new; 1842 goto out_dput_new;
1840 } 1843 }
1841 host_err = vfs_rename(fdir, odentry, tdir, ndentry); 1844 host_err = vfs_rename(fdir, odentry, tdir, ndentry, NULL);
1842 if (!host_err) { 1845 if (!host_err) {
1843 host_err = commit_metadata(tfhp); 1846 host_err = commit_metadata(tfhp);
1844 if (!host_err) 1847 if (!host_err)
@@ -1911,7 +1914,7 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
1911 if (host_err) 1914 if (host_err)
1912 goto out_put; 1915 goto out_put;
1913 if (type != S_IFDIR) 1916 if (type != S_IFDIR)
1914 host_err = vfs_unlink(dirp, rdentry); 1917 host_err = vfs_unlink(dirp, rdentry, NULL);
1915 else 1918 else
1916 host_err = vfs_rmdir(dirp, rdentry); 1919 host_err = vfs_rmdir(dirp, rdentry);
1917 if (!host_err) 1920 if (!host_err)
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 2778b0255dc6..ffb9b3675736 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -55,7 +55,7 @@
55 * 55 *
56 * Return 1 if the attributes match and 0 if not. 56 * Return 1 if the attributes match and 0 if not.
57 * 57 *
58 * NOTE: This function runs with the inode->i_lock spin lock held so it is not 58 * NOTE: This function runs with the inode_hash_lock spin lock held so it is not
59 * allowed to sleep. 59 * allowed to sleep.
60 */ 60 */
61int ntfs_test_inode(struct inode *vi, ntfs_attr *na) 61int ntfs_test_inode(struct inode *vi, ntfs_attr *na)
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index f87f9bd1edff..f29a90fde619 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -386,19 +386,9 @@ static int ocfs2_read_locked_inode(struct inode *inode,
386 u32 generation = 0; 386 u32 generation = 0;
387 387
388 status = -EINVAL; 388 status = -EINVAL;
389 if (inode == NULL || inode->i_sb == NULL) {
390 mlog(ML_ERROR, "bad inode\n");
391 return status;
392 }
393 sb = inode->i_sb; 389 sb = inode->i_sb;
394 osb = OCFS2_SB(sb); 390 osb = OCFS2_SB(sb);
395 391
396 if (!args) {
397 mlog(ML_ERROR, "bad inode args\n");
398 make_bad_inode(inode);
399 return status;
400 }
401
402 /* 392 /*
403 * To improve performance of cold-cache inode stats, we take 393 * To improve performance of cold-cache inode stats, we take
404 * the cluster lock here if possible. 394 * the cluster lock here if possible.
diff --git a/fs/open.c b/fs/open.c
index d420331ca32a..4b3e1edf2fe4 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -57,7 +57,8 @@ int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
57 newattrs.ia_valid |= ret | ATTR_FORCE; 57 newattrs.ia_valid |= ret | ATTR_FORCE;
58 58
59 mutex_lock(&dentry->d_inode->i_mutex); 59 mutex_lock(&dentry->d_inode->i_mutex);
60 ret = notify_change(dentry, &newattrs); 60 /* Note any delegations or leases have already been broken: */
61 ret = notify_change(dentry, &newattrs, NULL);
61 mutex_unlock(&dentry->d_inode->i_mutex); 62 mutex_unlock(&dentry->d_inode->i_mutex);
62 return ret; 63 return ret;
63} 64}
@@ -464,21 +465,28 @@ out:
464static int chmod_common(struct path *path, umode_t mode) 465static int chmod_common(struct path *path, umode_t mode)
465{ 466{
466 struct inode *inode = path->dentry->d_inode; 467 struct inode *inode = path->dentry->d_inode;
468 struct inode *delegated_inode = NULL;
467 struct iattr newattrs; 469 struct iattr newattrs;
468 int error; 470 int error;
469 471
470 error = mnt_want_write(path->mnt); 472 error = mnt_want_write(path->mnt);
471 if (error) 473 if (error)
472 return error; 474 return error;
475retry_deleg:
473 mutex_lock(&inode->i_mutex); 476 mutex_lock(&inode->i_mutex);
474 error = security_path_chmod(path, mode); 477 error = security_path_chmod(path, mode);
475 if (error) 478 if (error)
476 goto out_unlock; 479 goto out_unlock;
477 newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); 480 newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
478 newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; 481 newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
479 error = notify_change(path->dentry, &newattrs); 482 error = notify_change(path->dentry, &newattrs, &delegated_inode);
480out_unlock: 483out_unlock:
481 mutex_unlock(&inode->i_mutex); 484 mutex_unlock(&inode->i_mutex);
485 if (delegated_inode) {
486 error = break_deleg_wait(&delegated_inode);
487 if (!error)
488 goto retry_deleg;
489 }
482 mnt_drop_write(path->mnt); 490 mnt_drop_write(path->mnt);
483 return error; 491 return error;
484} 492}
@@ -522,6 +530,7 @@ SYSCALL_DEFINE2(chmod, const char __user *, filename, umode_t, mode)
522static int chown_common(struct path *path, uid_t user, gid_t group) 530static int chown_common(struct path *path, uid_t user, gid_t group)
523{ 531{
524 struct inode *inode = path->dentry->d_inode; 532 struct inode *inode = path->dentry->d_inode;
533 struct inode *delegated_inode = NULL;
525 int error; 534 int error;
526 struct iattr newattrs; 535 struct iattr newattrs;
527 kuid_t uid; 536 kuid_t uid;
@@ -546,12 +555,17 @@ static int chown_common(struct path *path, uid_t user, gid_t group)
546 if (!S_ISDIR(inode->i_mode)) 555 if (!S_ISDIR(inode->i_mode))
547 newattrs.ia_valid |= 556 newattrs.ia_valid |=
548 ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV; 557 ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV;
558retry_deleg:
549 mutex_lock(&inode->i_mutex); 559 mutex_lock(&inode->i_mutex);
550 error = security_path_chown(path, uid, gid); 560 error = security_path_chown(path, uid, gid);
551 if (!error) 561 if (!error)
552 error = notify_change(path->dentry, &newattrs); 562 error = notify_change(path->dentry, &newattrs, &delegated_inode);
553 mutex_unlock(&inode->i_mutex); 563 mutex_unlock(&inode->i_mutex);
554 564 if (delegated_inode) {
565 error = break_deleg_wait(&delegated_inode);
566 if (!error)
567 goto retry_deleg;
568 }
555 return error; 569 return error;
556} 570}
557 571
@@ -685,7 +699,6 @@ static int do_dentry_open(struct file *f,
685 } 699 }
686 700
687 f->f_mapping = inode->i_mapping; 701 f->f_mapping = inode->i_mapping;
688 file_sb_list_add(f, inode->i_sb);
689 702
690 if (unlikely(f->f_mode & FMODE_PATH)) { 703 if (unlikely(f->f_mode & FMODE_PATH)) {
691 f->f_op = &empty_fops; 704 f->f_op = &empty_fops;
@@ -693,6 +706,10 @@ static int do_dentry_open(struct file *f,
693 } 706 }
694 707
695 f->f_op = fops_get(inode->i_fop); 708 f->f_op = fops_get(inode->i_fop);
709 if (unlikely(WARN_ON(!f->f_op))) {
710 error = -ENODEV;
711 goto cleanup_all;
712 }
696 713
697 error = security_file_open(f, cred); 714 error = security_file_open(f, cred);
698 if (error) 715 if (error)
@@ -702,7 +719,7 @@ static int do_dentry_open(struct file *f,
702 if (error) 719 if (error)
703 goto cleanup_all; 720 goto cleanup_all;
704 721
705 if (!open && f->f_op) 722 if (!open)
706 open = f->f_op->open; 723 open = f->f_op->open;
707 if (open) { 724 if (open) {
708 error = open(inode, f); 725 error = open(inode, f);
@@ -720,7 +737,6 @@ static int do_dentry_open(struct file *f,
720 737
721cleanup_all: 738cleanup_all:
722 fops_put(f->f_op); 739 fops_put(f->f_op);
723 file_sb_list_del(f);
724 if (f->f_mode & FMODE_WRITE) { 740 if (f->f_mode & FMODE_WRITE) {
725 put_write_access(inode); 741 put_write_access(inode);
726 if (!special_file(inode->i_mode)) { 742 if (!special_file(inode->i_mode)) {
@@ -1023,7 +1039,7 @@ int filp_close(struct file *filp, fl_owner_t id)
1023 return 0; 1039 return 0;
1024 } 1040 }
1025 1041
1026 if (filp->f_op && filp->f_op->flush) 1042 if (filp->f_op->flush)
1027 retval = filp->f_op->flush(filp, id); 1043 retval = filp->f_op->flush(filp, id);
1028 1044
1029 if (likely(!(filp->f_mode & FMODE_PATH))) { 1045 if (likely(!(filp->f_mode & FMODE_PATH))) {
diff --git a/fs/pnode.c b/fs/pnode.c
index 9af0df15256e..c7221bb19801 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -264,12 +264,12 @@ int propagate_mnt(struct mount *dest_mnt, struct mountpoint *dest_mp,
264 prev_src_mnt = child; 264 prev_src_mnt = child;
265 } 265 }
266out: 266out:
267 br_write_lock(&vfsmount_lock); 267 lock_mount_hash();
268 while (!list_empty(&tmp_list)) { 268 while (!list_empty(&tmp_list)) {
269 child = list_first_entry(&tmp_list, struct mount, mnt_hash); 269 child = list_first_entry(&tmp_list, struct mount, mnt_hash);
270 umount_tree(child, 0); 270 umount_tree(child, 0);
271 } 271 }
272 br_write_unlock(&vfsmount_lock); 272 unlock_mount_hash();
273 return ret; 273 return ret;
274} 274}
275 275
@@ -278,8 +278,7 @@ out:
278 */ 278 */
279static inline int do_refcount_check(struct mount *mnt, int count) 279static inline int do_refcount_check(struct mount *mnt, int count)
280{ 280{
281 int mycount = mnt_get_count(mnt) - mnt->mnt_ghosts; 281 return mnt_get_count(mnt) > count;
282 return (mycount > count);
283} 282}
284 283
285/* 284/*
@@ -311,7 +310,7 @@ int propagate_mount_busy(struct mount *mnt, int refcnt)
311 310
312 for (m = propagation_next(parent, parent); m; 311 for (m = propagation_next(parent, parent); m;
313 m = propagation_next(m, parent)) { 312 m = propagation_next(m, parent)) {
314 child = __lookup_mnt(&m->mnt, mnt->mnt_mountpoint, 0); 313 child = __lookup_mnt_last(&m->mnt, mnt->mnt_mountpoint);
315 if (child && list_empty(&child->mnt_mounts) && 314 if (child && list_empty(&child->mnt_mounts) &&
316 (ret = do_refcount_check(child, 1))) 315 (ret = do_refcount_check(child, 1)))
317 break; 316 break;
@@ -333,8 +332,8 @@ static void __propagate_umount(struct mount *mnt)
333 for (m = propagation_next(parent, parent); m; 332 for (m = propagation_next(parent, parent); m;
334 m = propagation_next(m, parent)) { 333 m = propagation_next(m, parent)) {
335 334
336 struct mount *child = __lookup_mnt(&m->mnt, 335 struct mount *child = __lookup_mnt_last(&m->mnt,
337 mnt->mnt_mountpoint, 0); 336 mnt->mnt_mountpoint);
338 /* 337 /*
339 * umount the child only if the child has no 338 * umount the child only if the child has no
340 * other children 339 * other children
diff --git a/fs/proc/self.c b/fs/proc/self.c
index 6b6a993b5c25..ffeb202ec942 100644
--- a/fs/proc/self.c
+++ b/fs/proc/self.c
@@ -36,18 +36,10 @@ static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
36 return NULL; 36 return NULL;
37} 37}
38 38
39static void proc_self_put_link(struct dentry *dentry, struct nameidata *nd,
40 void *cookie)
41{
42 char *s = nd_get_link(nd);
43 if (!IS_ERR(s))
44 kfree(s);
45}
46
47static const struct inode_operations proc_self_inode_operations = { 39static const struct inode_operations proc_self_inode_operations = {
48 .readlink = proc_self_readlink, 40 .readlink = proc_self_readlink,
49 .follow_link = proc_self_follow_link, 41 .follow_link = proc_self_follow_link,
50 .put_link = proc_self_put_link, 42 .put_link = kfree_put_link,
51}; 43};
52 44
53static unsigned self_inum; 45static unsigned self_inum;
diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c
index 5fe34c355e85..439406e081af 100644
--- a/fs/proc_namespace.c
+++ b/fs/proc_namespace.c
@@ -20,15 +20,15 @@ static unsigned mounts_poll(struct file *file, poll_table *wait)
20 struct proc_mounts *p = proc_mounts(file->private_data); 20 struct proc_mounts *p = proc_mounts(file->private_data);
21 struct mnt_namespace *ns = p->ns; 21 struct mnt_namespace *ns = p->ns;
22 unsigned res = POLLIN | POLLRDNORM; 22 unsigned res = POLLIN | POLLRDNORM;
23 int event;
23 24
24 poll_wait(file, &p->ns->poll, wait); 25 poll_wait(file, &p->ns->poll, wait);
25 26
26 br_read_lock(&vfsmount_lock); 27 event = ACCESS_ONCE(ns->event);
27 if (p->m.poll_event != ns->event) { 28 if (p->m.poll_event != event) {
28 p->m.poll_event = ns->event; 29 p->m.poll_event = event;
29 res |= POLLERR | POLLPRI; 30 res |= POLLERR | POLLPRI;
30 } 31 }
31 br_read_unlock(&vfsmount_lock);
32 32
33 return res; 33 return res;
34} 34}
diff --git a/fs/qnx4/namei.c b/fs/qnx4/namei.c
index d024505ba007..e62c8183777a 100644
--- a/fs/qnx4/namei.c
+++ b/fs/qnx4/namei.c
@@ -60,10 +60,6 @@ static struct buffer_head *qnx4_find_entry(int len, struct inode *dir,
60 struct buffer_head *bh; 60 struct buffer_head *bh;
61 61
62 *res_dir = NULL; 62 *res_dir = NULL;
63 if (!dir->i_sb) {
64 printk(KERN_WARNING "qnx4: no superblock on dir.\n");
65 return NULL;
66 }
67 bh = NULL; 63 bh = NULL;
68 block = offset = blkofs = 0; 64 block = offset = blkofs = 0;
69 while (blkofs * QNX4_BLOCK_SIZE + offset < dir->i_size) { 65 while (blkofs * QNX4_BLOCK_SIZE + offset < dir->i_size) {
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index dea86e8967ee..2b363e23f36e 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -117,6 +117,7 @@ static int quota_setinfo(struct super_block *sb, int type, void __user *addr)
117 117
118static void copy_to_if_dqblk(struct if_dqblk *dst, struct fs_disk_quota *src) 118static void copy_to_if_dqblk(struct if_dqblk *dst, struct fs_disk_quota *src)
119{ 119{
120 memset(dst, 0, sizeof(*dst));
120 dst->dqb_bhardlimit = src->d_blk_hardlimit; 121 dst->dqb_bhardlimit = src->d_blk_hardlimit;
121 dst->dqb_bsoftlimit = src->d_blk_softlimit; 122 dst->dqb_bsoftlimit = src->d_blk_softlimit;
122 dst->dqb_curspace = src->d_bcount; 123 dst->dqb_curspace = src->d_bcount;
diff --git a/fs/read_write.c b/fs/read_write.c
index e3cd280b158c..58e440df1bc6 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -257,7 +257,7 @@ loff_t vfs_llseek(struct file *file, loff_t offset, int whence)
257 257
258 fn = no_llseek; 258 fn = no_llseek;
259 if (file->f_mode & FMODE_LSEEK) { 259 if (file->f_mode & FMODE_LSEEK) {
260 if (file->f_op && file->f_op->llseek) 260 if (file->f_op->llseek)
261 fn = file->f_op->llseek; 261 fn = file->f_op->llseek;
262 } 262 }
263 return fn(file, offset, whence); 263 return fn(file, offset, whence);
@@ -384,7 +384,7 @@ ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
384 384
385 if (!(file->f_mode & FMODE_READ)) 385 if (!(file->f_mode & FMODE_READ))
386 return -EBADF; 386 return -EBADF;
387 if (!file->f_op || (!file->f_op->read && !file->f_op->aio_read)) 387 if (!file->f_op->read && !file->f_op->aio_read)
388 return -EINVAL; 388 return -EINVAL;
389 if (unlikely(!access_ok(VERIFY_WRITE, buf, count))) 389 if (unlikely(!access_ok(VERIFY_WRITE, buf, count)))
390 return -EFAULT; 390 return -EFAULT;
@@ -433,7 +433,7 @@ ssize_t __kernel_write(struct file *file, const char *buf, size_t count, loff_t
433 const char __user *p; 433 const char __user *p;
434 ssize_t ret; 434 ssize_t ret;
435 435
436 if (!file->f_op || (!file->f_op->write && !file->f_op->aio_write)) 436 if (!file->f_op->write && !file->f_op->aio_write)
437 return -EINVAL; 437 return -EINVAL;
438 438
439 old_fs = get_fs(); 439 old_fs = get_fs();
@@ -460,7 +460,7 @@ ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_
460 460
461 if (!(file->f_mode & FMODE_WRITE)) 461 if (!(file->f_mode & FMODE_WRITE))
462 return -EBADF; 462 return -EBADF;
463 if (!file->f_op || (!file->f_op->write && !file->f_op->aio_write)) 463 if (!file->f_op->write && !file->f_op->aio_write)
464 return -EINVAL; 464 return -EINVAL;
465 if (unlikely(!access_ok(VERIFY_READ, buf, count))) 465 if (unlikely(!access_ok(VERIFY_READ, buf, count)))
466 return -EFAULT; 466 return -EFAULT;
@@ -727,11 +727,6 @@ static ssize_t do_readv_writev(int type, struct file *file,
727 io_fn_t fn; 727 io_fn_t fn;
728 iov_fn_t fnv; 728 iov_fn_t fnv;
729 729
730 if (!file->f_op) {
731 ret = -EINVAL;
732 goto out;
733 }
734
735 ret = rw_copy_check_uvector(type, uvector, nr_segs, 730 ret = rw_copy_check_uvector(type, uvector, nr_segs,
736 ARRAY_SIZE(iovstack), iovstack, &iov); 731 ARRAY_SIZE(iovstack), iovstack, &iov);
737 if (ret <= 0) 732 if (ret <= 0)
@@ -778,7 +773,7 @@ ssize_t vfs_readv(struct file *file, const struct iovec __user *vec,
778{ 773{
779 if (!(file->f_mode & FMODE_READ)) 774 if (!(file->f_mode & FMODE_READ))
780 return -EBADF; 775 return -EBADF;
781 if (!file->f_op || (!file->f_op->aio_read && !file->f_op->read)) 776 if (!file->f_op->aio_read && !file->f_op->read)
782 return -EINVAL; 777 return -EINVAL;
783 778
784 return do_readv_writev(READ, file, vec, vlen, pos); 779 return do_readv_writev(READ, file, vec, vlen, pos);
@@ -791,7 +786,7 @@ ssize_t vfs_writev(struct file *file, const struct iovec __user *vec,
791{ 786{
792 if (!(file->f_mode & FMODE_WRITE)) 787 if (!(file->f_mode & FMODE_WRITE))
793 return -EBADF; 788 return -EBADF;
794 if (!file->f_op || (!file->f_op->aio_write && !file->f_op->write)) 789 if (!file->f_op->aio_write && !file->f_op->write)
795 return -EINVAL; 790 return -EINVAL;
796 791
797 return do_readv_writev(WRITE, file, vec, vlen, pos); 792 return do_readv_writev(WRITE, file, vec, vlen, pos);
@@ -906,10 +901,6 @@ static ssize_t compat_do_readv_writev(int type, struct file *file,
906 io_fn_t fn; 901 io_fn_t fn;
907 iov_fn_t fnv; 902 iov_fn_t fnv;
908 903
909 ret = -EINVAL;
910 if (!file->f_op)
911 goto out;
912
913 ret = -EFAULT; 904 ret = -EFAULT;
914 if (!access_ok(VERIFY_READ, uvector, nr_segs*sizeof(*uvector))) 905 if (!access_ok(VERIFY_READ, uvector, nr_segs*sizeof(*uvector)))
915 goto out; 906 goto out;
@@ -965,7 +956,7 @@ static size_t compat_readv(struct file *file,
965 goto out; 956 goto out;
966 957
967 ret = -EINVAL; 958 ret = -EINVAL;
968 if (!file->f_op || (!file->f_op->aio_read && !file->f_op->read)) 959 if (!file->f_op->aio_read && !file->f_op->read)
969 goto out; 960 goto out;
970 961
971 ret = compat_do_readv_writev(READ, file, vec, vlen, pos); 962 ret = compat_do_readv_writev(READ, file, vec, vlen, pos);
@@ -1032,7 +1023,7 @@ static size_t compat_writev(struct file *file,
1032 goto out; 1023 goto out;
1033 1024
1034 ret = -EINVAL; 1025 ret = -EINVAL;
1035 if (!file->f_op || (!file->f_op->aio_write && !file->f_op->write)) 1026 if (!file->f_op->aio_write && !file->f_op->write)
1036 goto out; 1027 goto out;
1037 1028
1038 ret = compat_do_readv_writev(WRITE, file, vec, vlen, pos); 1029 ret = compat_do_readv_writev(WRITE, file, vec, vlen, pos);
diff --git a/fs/readdir.c b/fs/readdir.c
index 93d71e574310..5b53d995cae6 100644
--- a/fs/readdir.c
+++ b/fs/readdir.c
@@ -24,7 +24,7 @@ int iterate_dir(struct file *file, struct dir_context *ctx)
24{ 24{
25 struct inode *inode = file_inode(file); 25 struct inode *inode = file_inode(file);
26 int res = -ENOTDIR; 26 int res = -ENOTDIR;
27 if (!file->f_op || !file->f_op->iterate) 27 if (!file->f_op->iterate)
28 goto out; 28 goto out;
29 29
30 res = security_file_permission(file, MAY_READ); 30 res = security_file_permission(file, MAY_READ);
diff --git a/fs/select.c b/fs/select.c
index dfd5cb18c012..467bb1cb3ea5 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -454,7 +454,7 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time)
454 const struct file_operations *f_op; 454 const struct file_operations *f_op;
455 f_op = f.file->f_op; 455 f_op = f.file->f_op;
456 mask = DEFAULT_POLLMASK; 456 mask = DEFAULT_POLLMASK;
457 if (f_op && f_op->poll) { 457 if (f_op->poll) {
458 wait_key_set(wait, in, out, 458 wait_key_set(wait, in, out,
459 bit, busy_flag); 459 bit, busy_flag);
460 mask = (*f_op->poll)(f.file, wait); 460 mask = (*f_op->poll)(f.file, wait);
@@ -761,7 +761,7 @@ static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait,
761 mask = POLLNVAL; 761 mask = POLLNVAL;
762 if (f.file) { 762 if (f.file) {
763 mask = DEFAULT_POLLMASK; 763 mask = DEFAULT_POLLMASK;
764 if (f.file->f_op && f.file->f_op->poll) { 764 if (f.file->f_op->poll) {
765 pwait->_key = pollfd->events|POLLERR|POLLHUP; 765 pwait->_key = pollfd->events|POLLERR|POLLHUP;
766 pwait->_key |= busy_flag; 766 pwait->_key |= busy_flag;
767 mask = f.file->f_op->poll(f.file, pwait); 767 mask = f.file->f_op->poll(f.file, pwait);
diff --git a/fs/splice.c b/fs/splice.c
index 3b7ee656f3aa..46a08f772d7d 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -695,7 +695,7 @@ static int pipe_to_sendpage(struct pipe_inode_info *pipe,
695 loff_t pos = sd->pos; 695 loff_t pos = sd->pos;
696 int more; 696 int more;
697 697
698 if (!likely(file->f_op && file->f_op->sendpage)) 698 if (!likely(file->f_op->sendpage))
699 return -EINVAL; 699 return -EINVAL;
700 700
701 more = (sd->flags & SPLICE_F_MORE) ? MSG_MORE : 0; 701 more = (sd->flags & SPLICE_F_MORE) ? MSG_MORE : 0;
@@ -1099,7 +1099,7 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
1099 ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, 1099 ssize_t (*splice_write)(struct pipe_inode_info *, struct file *,
1100 loff_t *, size_t, unsigned int); 1100 loff_t *, size_t, unsigned int);
1101 1101
1102 if (out->f_op && out->f_op->splice_write) 1102 if (out->f_op->splice_write)
1103 splice_write = out->f_op->splice_write; 1103 splice_write = out->f_op->splice_write;
1104 else 1104 else
1105 splice_write = default_file_splice_write; 1105 splice_write = default_file_splice_write;
@@ -1125,7 +1125,7 @@ static long do_splice_to(struct file *in, loff_t *ppos,
1125 if (unlikely(ret < 0)) 1125 if (unlikely(ret < 0))
1126 return ret; 1126 return ret;
1127 1127
1128 if (in->f_op && in->f_op->splice_read) 1128 if (in->f_op->splice_read)
1129 splice_read = in->f_op->splice_read; 1129 splice_read = in->f_op->splice_read;
1130 else 1130 else
1131 splice_read = default_file_splice_read; 1131 splice_read = default_file_splice_read;
diff --git a/fs/stat.c b/fs/stat.c
index d0ea7ef75e26..ae0c3cef9927 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -37,14 +37,21 @@ void generic_fillattr(struct inode *inode, struct kstat *stat)
37 37
38EXPORT_SYMBOL(generic_fillattr); 38EXPORT_SYMBOL(generic_fillattr);
39 39
40int vfs_getattr(struct path *path, struct kstat *stat) 40/**
41 * vfs_getattr_nosec - getattr without security checks
42 * @path: file to get attributes from
43 * @stat: structure to return attributes in
44 *
45 * Get attributes without calling security_inode_getattr.
46 *
47 * Currently the only caller other than vfs_getattr is internal to the
48 * filehandle lookup code, which uses only the inode number and returns
49 * no attributes to any user. Any other code probably wants
50 * vfs_getattr.
51 */
52int vfs_getattr_nosec(struct path *path, struct kstat *stat)
41{ 53{
42 struct inode *inode = path->dentry->d_inode; 54 struct inode *inode = path->dentry->d_inode;
43 int retval;
44
45 retval = security_inode_getattr(path->mnt, path->dentry);
46 if (retval)
47 return retval;
48 55
49 if (inode->i_op->getattr) 56 if (inode->i_op->getattr)
50 return inode->i_op->getattr(path->mnt, path->dentry, stat); 57 return inode->i_op->getattr(path->mnt, path->dentry, stat);
@@ -53,6 +60,18 @@ int vfs_getattr(struct path *path, struct kstat *stat)
53 return 0; 60 return 0;
54} 61}
55 62
63EXPORT_SYMBOL(vfs_getattr_nosec);
64
65int vfs_getattr(struct path *path, struct kstat *stat)
66{
67 int retval;
68
69 retval = security_inode_getattr(path->mnt, path->dentry);
70 if (retval)
71 return retval;
72 return vfs_getattr_nosec(path, stat);
73}
74
56EXPORT_SYMBOL(vfs_getattr); 75EXPORT_SYMBOL(vfs_getattr);
57 76
58int vfs_fstat(unsigned int fd, struct kstat *stat) 77int vfs_fstat(unsigned int fd, struct kstat *stat)
diff --git a/fs/super.c b/fs/super.c
index 0225c20f8770..e5f6c2cfac38 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -129,33 +129,24 @@ static unsigned long super_cache_count(struct shrinker *shrink,
129 return total_objects; 129 return total_objects;
130} 130}
131 131
132static int init_sb_writers(struct super_block *s, struct file_system_type *type) 132/**
133{ 133 * destroy_super - frees a superblock
134 int err; 134 * @s: superblock to free
135 int i; 135 *
136 136 * Frees a superblock.
137 for (i = 0; i < SB_FREEZE_LEVELS; i++) { 137 */
138 err = percpu_counter_init(&s->s_writers.counter[i], 0); 138static void destroy_super(struct super_block *s)
139 if (err < 0)
140 goto err_out;
141 lockdep_init_map(&s->s_writers.lock_map[i], sb_writers_name[i],
142 &type->s_writers_key[i], 0);
143 }
144 init_waitqueue_head(&s->s_writers.wait);
145 init_waitqueue_head(&s->s_writers.wait_unfrozen);
146 return 0;
147err_out:
148 while (--i >= 0)
149 percpu_counter_destroy(&s->s_writers.counter[i]);
150 return err;
151}
152
153static void destroy_sb_writers(struct super_block *s)
154{ 139{
155 int i; 140 int i;
156 141 list_lru_destroy(&s->s_dentry_lru);
142 list_lru_destroy(&s->s_inode_lru);
157 for (i = 0; i < SB_FREEZE_LEVELS; i++) 143 for (i = 0; i < SB_FREEZE_LEVELS; i++)
158 percpu_counter_destroy(&s->s_writers.counter[i]); 144 percpu_counter_destroy(&s->s_writers.counter[i]);
145 security_sb_free(s);
146 WARN_ON(!list_empty(&s->s_mounts));
147 kfree(s->s_subtype);
148 kfree(s->s_options);
149 kfree_rcu(s, rcu);
159} 150}
160 151
161/** 152/**
@@ -170,111 +161,74 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
170{ 161{
171 struct super_block *s = kzalloc(sizeof(struct super_block), GFP_USER); 162 struct super_block *s = kzalloc(sizeof(struct super_block), GFP_USER);
172 static const struct super_operations default_op; 163 static const struct super_operations default_op;
164 int i;
173 165
174 if (s) { 166 if (!s)
175 if (security_sb_alloc(s)) 167 return NULL;
176 goto out_free_sb;
177 168
178#ifdef CONFIG_SMP 169 if (security_sb_alloc(s))
179 s->s_files = alloc_percpu(struct list_head); 170 goto fail;
180 if (!s->s_files)
181 goto err_out;
182 else {
183 int i;
184 171
185 for_each_possible_cpu(i) 172 for (i = 0; i < SB_FREEZE_LEVELS; i++) {
186 INIT_LIST_HEAD(per_cpu_ptr(s->s_files, i)); 173 if (percpu_counter_init(&s->s_writers.counter[i], 0) < 0)
187 } 174 goto fail;
188#else 175 lockdep_init_map(&s->s_writers.lock_map[i], sb_writers_name[i],
189 INIT_LIST_HEAD(&s->s_files); 176 &type->s_writers_key[i], 0);
190#endif
191 if (init_sb_writers(s, type))
192 goto err_out;
193 s->s_flags = flags;
194 s->s_bdi = &default_backing_dev_info;
195 INIT_HLIST_NODE(&s->s_instances);
196 INIT_HLIST_BL_HEAD(&s->s_anon);
197 INIT_LIST_HEAD(&s->s_inodes);
198
199 if (list_lru_init(&s->s_dentry_lru))
200 goto err_out;
201 if (list_lru_init(&s->s_inode_lru))
202 goto err_out_dentry_lru;
203
204 INIT_LIST_HEAD(&s->s_mounts);
205 init_rwsem(&s->s_umount);
206 lockdep_set_class(&s->s_umount, &type->s_umount_key);
207 /*
208 * sget() can have s_umount recursion.
209 *
210 * When it cannot find a suitable sb, it allocates a new
211 * one (this one), and tries again to find a suitable old
212 * one.
213 *
214 * In case that succeeds, it will acquire the s_umount
215 * lock of the old one. Since these are clearly distrinct
216 * locks, and this object isn't exposed yet, there's no
217 * risk of deadlocks.
218 *
219 * Annotate this by putting this lock in a different
220 * subclass.
221 */
222 down_write_nested(&s->s_umount, SINGLE_DEPTH_NESTING);
223 s->s_count = 1;
224 atomic_set(&s->s_active, 1);
225 mutex_init(&s->s_vfs_rename_mutex);
226 lockdep_set_class(&s->s_vfs_rename_mutex, &type->s_vfs_rename_key);
227 mutex_init(&s->s_dquot.dqio_mutex);
228 mutex_init(&s->s_dquot.dqonoff_mutex);
229 init_rwsem(&s->s_dquot.dqptr_sem);
230 s->s_maxbytes = MAX_NON_LFS;
231 s->s_op = &default_op;
232 s->s_time_gran = 1000000000;
233 s->cleancache_poolid = -1;
234
235 s->s_shrink.seeks = DEFAULT_SEEKS;
236 s->s_shrink.scan_objects = super_cache_scan;
237 s->s_shrink.count_objects = super_cache_count;
238 s->s_shrink.batch = 1024;
239 s->s_shrink.flags = SHRINKER_NUMA_AWARE;
240 } 177 }
241out: 178 init_waitqueue_head(&s->s_writers.wait);
179 init_waitqueue_head(&s->s_writers.wait_unfrozen);
180 s->s_flags = flags;
181 s->s_bdi = &default_backing_dev_info;
182 INIT_HLIST_NODE(&s->s_instances);
183 INIT_HLIST_BL_HEAD(&s->s_anon);
184 INIT_LIST_HEAD(&s->s_inodes);
185
186 if (list_lru_init(&s->s_dentry_lru))
187 goto fail;
188 if (list_lru_init(&s->s_inode_lru))
189 goto fail;
190
191 INIT_LIST_HEAD(&s->s_mounts);
192 init_rwsem(&s->s_umount);
193 lockdep_set_class(&s->s_umount, &type->s_umount_key);
194 /*
195 * sget() can have s_umount recursion.
196 *
197 * When it cannot find a suitable sb, it allocates a new
198 * one (this one), and tries again to find a suitable old
199 * one.
200 *
201 * In case that succeeds, it will acquire the s_umount
202 * lock of the old one. Since these are clearly distrinct
203 * locks, and this object isn't exposed yet, there's no
204 * risk of deadlocks.
205 *
206 * Annotate this by putting this lock in a different
207 * subclass.
208 */
209 down_write_nested(&s->s_umount, SINGLE_DEPTH_NESTING);
210 s->s_count = 1;
211 atomic_set(&s->s_active, 1);
212 mutex_init(&s->s_vfs_rename_mutex);
213 lockdep_set_class(&s->s_vfs_rename_mutex, &type->s_vfs_rename_key);
214 mutex_init(&s->s_dquot.dqio_mutex);
215 mutex_init(&s->s_dquot.dqonoff_mutex);
216 init_rwsem(&s->s_dquot.dqptr_sem);
217 s->s_maxbytes = MAX_NON_LFS;
218 s->s_op = &default_op;
219 s->s_time_gran = 1000000000;
220 s->cleancache_poolid = -1;
221
222 s->s_shrink.seeks = DEFAULT_SEEKS;
223 s->s_shrink.scan_objects = super_cache_scan;
224 s->s_shrink.count_objects = super_cache_count;
225 s->s_shrink.batch = 1024;
226 s->s_shrink.flags = SHRINKER_NUMA_AWARE;
242 return s; 227 return s;
243 228
244err_out_dentry_lru: 229fail:
245 list_lru_destroy(&s->s_dentry_lru); 230 destroy_super(s);
246err_out: 231 return NULL;
247 security_sb_free(s);
248#ifdef CONFIG_SMP
249 if (s->s_files)
250 free_percpu(s->s_files);
251#endif
252 destroy_sb_writers(s);
253out_free_sb:
254 kfree(s);
255 s = NULL;
256 goto out;
257}
258
259/**
260 * destroy_super - frees a superblock
261 * @s: superblock to free
262 *
263 * Frees a superblock.
264 */
265static inline void destroy_super(struct super_block *s)
266{
267 list_lru_destroy(&s->s_dentry_lru);
268 list_lru_destroy(&s->s_inode_lru);
269#ifdef CONFIG_SMP
270 free_percpu(s->s_files);
271#endif
272 destroy_sb_writers(s);
273 security_sb_free(s);
274 WARN_ON(!list_empty(&s->s_mounts));
275 kfree(s->s_subtype);
276 kfree(s->s_options);
277 kfree(s);
278} 232}
279 233
280/* Superblock refcounting */ 234/* Superblock refcounting */
@@ -756,7 +710,8 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
756 make sure there are no rw files opened */ 710 make sure there are no rw files opened */
757 if (remount_ro) { 711 if (remount_ro) {
758 if (force) { 712 if (force) {
759 mark_files_ro(sb); 713 sb->s_readonly_remount = 1;
714 smp_wmb();
760 } else { 715 } else {
761 retval = sb_prepare_remount_readonly(sb); 716 retval = sb_prepare_remount_readonly(sb);
762 if (retval) 717 if (retval)
diff --git a/fs/sync.c b/fs/sync.c
index ff96f99fef64..f15537452231 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -180,7 +180,7 @@ SYSCALL_DEFINE1(syncfs, int, fd)
180 */ 180 */
181int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync) 181int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync)
182{ 182{
183 if (!file->f_op || !file->f_op->fsync) 183 if (!file->f_op->fsync)
184 return -EINVAL; 184 return -EINVAL;
185 return file->f_op->fsync(file, start, end, datasync); 185 return file->f_op->fsync(file, start, end, datasync);
186} 186}
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index 6e025e02ffde..cc1febd8fadf 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -2563,9 +2563,9 @@ static int corrupt_data(const struct ubifs_info *c, const void *buf,
2563 unsigned int from, to, ffs = chance(1, 2); 2563 unsigned int from, to, ffs = chance(1, 2);
2564 unsigned char *p = (void *)buf; 2564 unsigned char *p = (void *)buf;
2565 2565
2566 from = prandom_u32() % (len + 1); 2566 from = prandom_u32() % len;
2567 /* Corruption may only span one max. write unit */ 2567 /* Corruption span max to end of write unit */
2568 to = min(len, ALIGN(from, c->max_write_size)); 2568 to = min(len, ALIGN(from + 1, c->max_write_size));
2569 2569
2570 ubifs_warn("filled bytes %u-%u with %s", from, to - 1, 2570 ubifs_warn("filled bytes %u-%u with %s", from, to - 1,
2571 ffs ? "0xFFs" : "random data"); 2571 ffs ? "0xFFs" : "random data");
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index 6b4947f75af7..ea41649e4ca5 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -192,8 +192,7 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry,
192 struct ubifs_dent_node *dent; 192 struct ubifs_dent_node *dent;
193 struct ubifs_info *c = dir->i_sb->s_fs_info; 193 struct ubifs_info *c = dir->i_sb->s_fs_info;
194 194
195 dbg_gen("'%.*s' in dir ino %lu", 195 dbg_gen("'%pd' in dir ino %lu", dentry, dir->i_ino);
196 dentry->d_name.len, dentry->d_name.name, dir->i_ino);
197 196
198 if (dentry->d_name.len > UBIFS_MAX_NLEN) 197 if (dentry->d_name.len > UBIFS_MAX_NLEN)
199 return ERR_PTR(-ENAMETOOLONG); 198 return ERR_PTR(-ENAMETOOLONG);
@@ -225,8 +224,8 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry,
225 * checking. 224 * checking.
226 */ 225 */
227 err = PTR_ERR(inode); 226 err = PTR_ERR(inode);
228 ubifs_err("dead directory entry '%.*s', error %d", 227 ubifs_err("dead directory entry '%pd', error %d",
229 dentry->d_name.len, dentry->d_name.name, err); 228 dentry, err);
230 ubifs_ro_mode(c, err); 229 ubifs_ro_mode(c, err);
231 goto out; 230 goto out;
232 } 231 }
@@ -260,8 +259,8 @@ static int ubifs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
260 * parent directory inode. 259 * parent directory inode.
261 */ 260 */
262 261
263 dbg_gen("dent '%.*s', mode %#hx in dir ino %lu", 262 dbg_gen("dent '%pd', mode %#hx in dir ino %lu",
264 dentry->d_name.len, dentry->d_name.name, mode, dir->i_ino); 263 dentry, mode, dir->i_ino);
265 264
266 err = ubifs_budget_space(c, &req); 265 err = ubifs_budget_space(c, &req);
267 if (err) 266 if (err)
@@ -509,8 +508,8 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir,
509 * changing the parent inode. 508 * changing the parent inode.
510 */ 509 */
511 510
512 dbg_gen("dent '%.*s' to ino %lu (nlink %d) in dir ino %lu", 511 dbg_gen("dent '%pd' to ino %lu (nlink %d) in dir ino %lu",
513 dentry->d_name.len, dentry->d_name.name, inode->i_ino, 512 dentry, inode->i_ino,
514 inode->i_nlink, dir->i_ino); 513 inode->i_nlink, dir->i_ino);
515 ubifs_assert(mutex_is_locked(&dir->i_mutex)); 514 ubifs_assert(mutex_is_locked(&dir->i_mutex));
516 ubifs_assert(mutex_is_locked(&inode->i_mutex)); 515 ubifs_assert(mutex_is_locked(&inode->i_mutex));
@@ -566,8 +565,8 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry)
566 * deletions. 565 * deletions.
567 */ 566 */
568 567
569 dbg_gen("dent '%.*s' from ino %lu (nlink %d) in dir ino %lu", 568 dbg_gen("dent '%pd' from ino %lu (nlink %d) in dir ino %lu",
570 dentry->d_name.len, dentry->d_name.name, inode->i_ino, 569 dentry, inode->i_ino,
571 inode->i_nlink, dir->i_ino); 570 inode->i_nlink, dir->i_ino);
572 ubifs_assert(mutex_is_locked(&dir->i_mutex)); 571 ubifs_assert(mutex_is_locked(&dir->i_mutex));
573 ubifs_assert(mutex_is_locked(&inode->i_mutex)); 572 ubifs_assert(mutex_is_locked(&inode->i_mutex));
@@ -656,8 +655,8 @@ static int ubifs_rmdir(struct inode *dir, struct dentry *dentry)
656 * because we have extra space reserved for deletions. 655 * because we have extra space reserved for deletions.
657 */ 656 */
658 657
659 dbg_gen("directory '%.*s', ino %lu in dir ino %lu", dentry->d_name.len, 658 dbg_gen("directory '%pd', ino %lu in dir ino %lu", dentry,
660 dentry->d_name.name, inode->i_ino, dir->i_ino); 659 inode->i_ino, dir->i_ino);
661 ubifs_assert(mutex_is_locked(&dir->i_mutex)); 660 ubifs_assert(mutex_is_locked(&dir->i_mutex));
662 ubifs_assert(mutex_is_locked(&inode->i_mutex)); 661 ubifs_assert(mutex_is_locked(&inode->i_mutex));
663 err = check_dir_empty(c, dentry->d_inode); 662 err = check_dir_empty(c, dentry->d_inode);
@@ -716,8 +715,8 @@ static int ubifs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
716 * directory inode. 715 * directory inode.
717 */ 716 */
718 717
719 dbg_gen("dent '%.*s', mode %#hx in dir ino %lu", 718 dbg_gen("dent '%pd', mode %#hx in dir ino %lu",
720 dentry->d_name.len, dentry->d_name.name, mode, dir->i_ino); 719 dentry, mode, dir->i_ino);
721 720
722 err = ubifs_budget_space(c, &req); 721 err = ubifs_budget_space(c, &req);
723 if (err) 722 if (err)
@@ -778,8 +777,7 @@ static int ubifs_mknod(struct inode *dir, struct dentry *dentry,
778 * directory inode. 777 * directory inode.
779 */ 778 */
780 779
781 dbg_gen("dent '%.*s' in dir ino %lu", 780 dbg_gen("dent '%pd' in dir ino %lu", dentry, dir->i_ino);
782 dentry->d_name.len, dentry->d_name.name, dir->i_ino);
783 781
784 if (!new_valid_dev(rdev)) 782 if (!new_valid_dev(rdev))
785 return -EINVAL; 783 return -EINVAL;
@@ -853,8 +851,8 @@ static int ubifs_symlink(struct inode *dir, struct dentry *dentry,
853 * directory inode. 851 * directory inode.
854 */ 852 */
855 853
856 dbg_gen("dent '%.*s', target '%s' in dir ino %lu", dentry->d_name.len, 854 dbg_gen("dent '%pd', target '%s' in dir ino %lu", dentry,
857 dentry->d_name.name, symname, dir->i_ino); 855 symname, dir->i_ino);
858 856
859 if (len > UBIFS_MAX_INO_DATA) 857 if (len > UBIFS_MAX_INO_DATA)
860 return -ENAMETOOLONG; 858 return -ENAMETOOLONG;
@@ -979,10 +977,9 @@ static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry,
979 * separately. 977 * separately.
980 */ 978 */
981 979
982 dbg_gen("dent '%.*s' ino %lu in dir ino %lu to dent '%.*s' in dir ino %lu", 980 dbg_gen("dent '%pd' ino %lu in dir ino %lu to dent '%pd' in dir ino %lu",
983 old_dentry->d_name.len, old_dentry->d_name.name, 981 old_dentry, old_inode->i_ino, old_dir->i_ino,
984 old_inode->i_ino, old_dir->i_ino, new_dentry->d_name.len, 982 new_dentry, new_dir->i_ino);
985 new_dentry->d_name.name, new_dir->i_ino);
986 ubifs_assert(mutex_is_locked(&old_dir->i_mutex)); 983 ubifs_assert(mutex_is_locked(&old_dir->i_mutex));
987 ubifs_assert(mutex_is_locked(&new_dir->i_mutex)); 984 ubifs_assert(mutex_is_locked(&new_dir->i_mutex));
988 if (unlink) 985 if (unlink)
diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c
index 76ca53cd3eee..9718da86ad01 100644
--- a/fs/ubifs/gc.c
+++ b/fs/ubifs/gc.c
@@ -668,8 +668,7 @@ int ubifs_garbage_collect(struct ubifs_info *c, int anyway)
668 ubifs_assert(!wbuf->used); 668 ubifs_assert(!wbuf->used);
669 669
670 for (i = 0; ; i++) { 670 for (i = 0; ; i++) {
671 int space_before = c->leb_size - wbuf->offs - wbuf->used; 671 int space_before, space_after;
672 int space_after;
673 672
674 cond_resched(); 673 cond_resched();
675 674
diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c
index afaad07f3b29..0e045e75abd8 100644
--- a/fs/ubifs/journal.c
+++ b/fs/ubifs/journal.c
@@ -933,10 +933,8 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir,
933 int move = (old_dir != new_dir); 933 int move = (old_dir != new_dir);
934 struct ubifs_inode *uninitialized_var(new_ui); 934 struct ubifs_inode *uninitialized_var(new_ui);
935 935
936 dbg_jnl("dent '%.*s' in dir ino %lu to dent '%.*s' in dir ino %lu", 936 dbg_jnl("dent '%pd' in dir ino %lu to dent '%pd' in dir ino %lu",
937 old_dentry->d_name.len, old_dentry->d_name.name, 937 old_dentry, old_dir->i_ino, new_dentry, new_dir->i_ino);
938 old_dir->i_ino, new_dentry->d_name.len,
939 new_dentry->d_name.name, new_dir->i_ino);
940 ubifs_assert(ubifs_inode(old_dir)->data_len == 0); 938 ubifs_assert(ubifs_inode(old_dir)->data_len == 0);
941 ubifs_assert(ubifs_inode(new_dir)->data_len == 0); 939 ubifs_assert(ubifs_inode(new_dir)->data_len == 0);
942 ubifs_assert(mutex_is_locked(&ubifs_inode(old_dir)->ui_mutex)); 940 ubifs_assert(mutex_is_locked(&ubifs_inode(old_dir)->ui_mutex));
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 3e4aa7281e04..f69daa514a57 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -1630,8 +1630,10 @@ static int ubifs_remount_rw(struct ubifs_info *c)
1630 } 1630 }
1631 1631
1632 c->write_reserve_buf = kmalloc(COMPRESSED_DATA_NODE_BUF_SZ, GFP_KERNEL); 1632 c->write_reserve_buf = kmalloc(COMPRESSED_DATA_NODE_BUF_SZ, GFP_KERNEL);
1633 if (!c->write_reserve_buf) 1633 if (!c->write_reserve_buf) {
1634 err = -ENOMEM;
1634 goto out; 1635 goto out;
1636 }
1635 1637
1636 err = ubifs_lpt_init(c, 0, 1); 1638 err = ubifs_lpt_init(c, 0, 1);
1637 if (err) 1639 if (err)
@@ -2064,8 +2066,10 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent)
2064 } 2066 }
2065 2067
2066 sb->s_root = d_make_root(root); 2068 sb->s_root = d_make_root(root);
2067 if (!sb->s_root) 2069 if (!sb->s_root) {
2070 err = -ENOMEM;
2068 goto out_umount; 2071 goto out_umount;
2072 }
2069 2073
2070 mutex_unlock(&c->umount_mutex); 2074 mutex_unlock(&c->umount_mutex);
2071 return 0; 2075 return 0;
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c
index 0f7139bdb2c2..5e0a63b1b0d5 100644
--- a/fs/ubifs/xattr.c
+++ b/fs/ubifs/xattr.c
@@ -303,8 +303,8 @@ int ubifs_setxattr(struct dentry *dentry, const char *name,
303 union ubifs_key key; 303 union ubifs_key key;
304 int err, type; 304 int err, type;
305 305
306 dbg_gen("xattr '%s', host ino %lu ('%.*s'), size %zd", name, 306 dbg_gen("xattr '%s', host ino %lu ('%pd'), size %zd", name,
307 host->i_ino, dentry->d_name.len, dentry->d_name.name, size); 307 host->i_ino, dentry, size);
308 ubifs_assert(mutex_is_locked(&host->i_mutex)); 308 ubifs_assert(mutex_is_locked(&host->i_mutex));
309 309
310 if (size > UBIFS_MAX_INO_DATA) 310 if (size > UBIFS_MAX_INO_DATA)
@@ -367,8 +367,8 @@ ssize_t ubifs_getxattr(struct dentry *dentry, const char *name, void *buf,
367 union ubifs_key key; 367 union ubifs_key key;
368 int err; 368 int err;
369 369
370 dbg_gen("xattr '%s', ino %lu ('%.*s'), buf size %zd", name, 370 dbg_gen("xattr '%s', ino %lu ('%pd'), buf size %zd", name,
371 host->i_ino, dentry->d_name.len, dentry->d_name.name, size); 371 host->i_ino, dentry, size);
372 372
373 err = check_namespace(&nm); 373 err = check_namespace(&nm);
374 if (err < 0) 374 if (err < 0)
@@ -426,8 +426,8 @@ ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size)
426 int err, len, written = 0; 426 int err, len, written = 0;
427 struct qstr nm = { .name = NULL }; 427 struct qstr nm = { .name = NULL };
428 428
429 dbg_gen("ino %lu ('%.*s'), buffer size %zd", host->i_ino, 429 dbg_gen("ino %lu ('%pd'), buffer size %zd", host->i_ino,
430 dentry->d_name.len, dentry->d_name.name, size); 430 dentry, size);
431 431
432 len = host_ui->xattr_names + host_ui->xattr_cnt; 432 len = host_ui->xattr_names + host_ui->xattr_cnt;
433 if (!buffer) 433 if (!buffer)
@@ -529,8 +529,8 @@ int ubifs_removexattr(struct dentry *dentry, const char *name)
529 union ubifs_key key; 529 union ubifs_key key;
530 int err; 530 int err;
531 531
532 dbg_gen("xattr '%s', ino %lu ('%.*s')", name, 532 dbg_gen("xattr '%s', ino %lu ('%pd')", name,
533 host->i_ino, dentry->d_name.len, dentry->d_name.name); 533 host->i_ino, dentry);
534 ubifs_assert(mutex_is_locked(&host->i_mutex)); 534 ubifs_assert(mutex_is_locked(&host->i_mutex));
535 535
536 err = check_namespace(&nm); 536 err = check_namespace(&nm);
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 91219385691d..3306b9f69bed 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -76,6 +76,9 @@
76 76
77#define UDF_DEFAULT_BLOCKSIZE 2048 77#define UDF_DEFAULT_BLOCKSIZE 2048
78 78
79#define VSD_FIRST_SECTOR_OFFSET 32768
80#define VSD_MAX_SECTOR_OFFSET 0x800000
81
79enum { UDF_MAX_LINKS = 0xffff }; 82enum { UDF_MAX_LINKS = 0xffff };
80 83
81/* These are the "meat" - everything else is stuffing */ 84/* These are the "meat" - everything else is stuffing */
@@ -685,7 +688,7 @@ out_unlock:
685static loff_t udf_check_vsd(struct super_block *sb) 688static loff_t udf_check_vsd(struct super_block *sb)
686{ 689{
687 struct volStructDesc *vsd = NULL; 690 struct volStructDesc *vsd = NULL;
688 loff_t sector = 32768; 691 loff_t sector = VSD_FIRST_SECTOR_OFFSET;
689 int sectorsize; 692 int sectorsize;
690 struct buffer_head *bh = NULL; 693 struct buffer_head *bh = NULL;
691 int nsr02 = 0; 694 int nsr02 = 0;
@@ -703,8 +706,18 @@ static loff_t udf_check_vsd(struct super_block *sb)
703 udf_debug("Starting at sector %u (%ld byte sectors)\n", 706 udf_debug("Starting at sector %u (%ld byte sectors)\n",
704 (unsigned int)(sector >> sb->s_blocksize_bits), 707 (unsigned int)(sector >> sb->s_blocksize_bits),
705 sb->s_blocksize); 708 sb->s_blocksize);
706 /* Process the sequence (if applicable) */ 709 /* Process the sequence (if applicable). The hard limit on the sector
707 for (; !nsr02 && !nsr03; sector += sectorsize) { 710 * offset is arbitrary, hopefully large enough so that all valid UDF
711 * filesystems will be recognised. There is no mention of an upper
712 * bound to the size of the volume recognition area in the standard.
713 * The limit will prevent the code to read all the sectors of a
714 * specially crafted image (like a bluray disc full of CD001 sectors),
715 * potentially causing minutes or even hours of uninterruptible I/O
716 * activity. This actually happened with uninitialised SSD partitions
717 * (all 0xFF) before the check for the limit and all valid IDs were
718 * added */
719 for (; !nsr02 && !nsr03 && sector < VSD_MAX_SECTOR_OFFSET;
720 sector += sectorsize) {
708 /* Read a block */ 721 /* Read a block */
709 bh = udf_tread(sb, sector >> sb->s_blocksize_bits); 722 bh = udf_tread(sb, sector >> sb->s_blocksize_bits);
710 if (!bh) 723 if (!bh)
@@ -714,10 +727,7 @@ static loff_t udf_check_vsd(struct super_block *sb)
714 vsd = (struct volStructDesc *)(bh->b_data + 727 vsd = (struct volStructDesc *)(bh->b_data +
715 (sector & (sb->s_blocksize - 1))); 728 (sector & (sb->s_blocksize - 1)));
716 729
717 if (vsd->stdIdent[0] == 0) { 730 if (!strncmp(vsd->stdIdent, VSD_STD_ID_CD001,
718 brelse(bh);
719 break;
720 } else if (!strncmp(vsd->stdIdent, VSD_STD_ID_CD001,
721 VSD_STD_ID_LEN)) { 731 VSD_STD_ID_LEN)) {
722 switch (vsd->structType) { 732 switch (vsd->structType) {
723 case 0: 733 case 0:
@@ -753,6 +763,17 @@ static loff_t udf_check_vsd(struct super_block *sb)
753 else if (!strncmp(vsd->stdIdent, VSD_STD_ID_NSR03, 763 else if (!strncmp(vsd->stdIdent, VSD_STD_ID_NSR03,
754 VSD_STD_ID_LEN)) 764 VSD_STD_ID_LEN))
755 nsr03 = sector; 765 nsr03 = sector;
766 else if (!strncmp(vsd->stdIdent, VSD_STD_ID_BOOT2,
767 VSD_STD_ID_LEN))
768 ; /* nothing */
769 else if (!strncmp(vsd->stdIdent, VSD_STD_ID_CDW02,
770 VSD_STD_ID_LEN))
771 ; /* nothing */
772 else {
773 /* invalid id : end of volume recognition area */
774 brelse(bh);
775 break;
776 }
756 brelse(bh); 777 brelse(bh);
757 } 778 }
758 779
@@ -760,7 +781,8 @@ static loff_t udf_check_vsd(struct super_block *sb)
760 return nsr03; 781 return nsr03;
761 else if (nsr02) 782 else if (nsr02)
762 return nsr02; 783 return nsr02;
763 else if (sector - (sbi->s_session << sb->s_blocksize_bits) == 32768) 784 else if (!bh && sector - (sbi->s_session << sb->s_blocksize_bits) ==
785 VSD_FIRST_SECTOR_OFFSET)
764 return -1; 786 return -1;
765 else 787 else
766 return 0; 788 return 0;
@@ -1270,6 +1292,9 @@ static int udf_load_partdesc(struct super_block *sb, sector_t block)
1270 * PHYSICAL partitions are already set up 1292 * PHYSICAL partitions are already set up
1271 */ 1293 */
1272 type1_idx = i; 1294 type1_idx = i;
1295#ifdef UDFFS_DEBUG
1296 map = NULL; /* supress 'maybe used uninitialized' warning */
1297#endif
1273 for (i = 0; i < sbi->s_partitions; i++) { 1298 for (i = 0; i < sbi->s_partitions; i++) {
1274 map = &sbi->s_partmaps[i]; 1299 map = &sbi->s_partmaps[i];
1275 1300
@@ -1891,7 +1916,9 @@ static int udf_load_vrs(struct super_block *sb, struct udf_options *uopt,
1891 return 0; 1916 return 0;
1892 } 1917 }
1893 if (nsr_off == -1) 1918 if (nsr_off == -1)
1894 udf_debug("Failed to read byte 32768. Assuming open disc. Skipping validity check\n"); 1919 udf_debug("Failed to read sector at offset %d. "
1920 "Assuming open disc. Skipping validity "
1921 "check\n", VSD_FIRST_SECTOR_OFFSET);
1895 if (!sbi->s_last_block) 1922 if (!sbi->s_last_block)
1896 sbi->s_last_block = udf_get_last_block(sb); 1923 sbi->s_last_block = udf_get_last_block(sb);
1897 } else { 1924 } else {
diff --git a/fs/utimes.c b/fs/utimes.c
index f4fb7eca10e8..aa138d64560a 100644
--- a/fs/utimes.c
+++ b/fs/utimes.c
@@ -53,6 +53,7 @@ static int utimes_common(struct path *path, struct timespec *times)
53 int error; 53 int error;
54 struct iattr newattrs; 54 struct iattr newattrs;
55 struct inode *inode = path->dentry->d_inode; 55 struct inode *inode = path->dentry->d_inode;
56 struct inode *delegated_inode = NULL;
56 57
57 error = mnt_want_write(path->mnt); 58 error = mnt_want_write(path->mnt);
58 if (error) 59 if (error)
@@ -101,9 +102,15 @@ static int utimes_common(struct path *path, struct timespec *times)
101 goto mnt_drop_write_and_out; 102 goto mnt_drop_write_and_out;
102 } 103 }
103 } 104 }
105retry_deleg:
104 mutex_lock(&inode->i_mutex); 106 mutex_lock(&inode->i_mutex);
105 error = notify_change(path->dentry, &newattrs); 107 error = notify_change(path->dentry, &newattrs, &delegated_inode);
106 mutex_unlock(&inode->i_mutex); 108 mutex_unlock(&inode->i_mutex);
109 if (delegated_inode) {
110 error = break_deleg_wait(&delegated_inode);
111 if (!error)
112 goto retry_deleg;
113 }
107 114
108mnt_drop_write_and_out: 115mnt_drop_write_and_out:
109 mnt_drop_write(path->mnt); 116 mnt_drop_write(path->mnt);
diff --git a/include/asm-generic/siginfo.h b/include/asm-generic/siginfo.h
index b685d3bd32e2..3d1a3af5cf59 100644
--- a/include/asm-generic/siginfo.h
+++ b/include/asm-generic/siginfo.h
@@ -32,6 +32,6 @@ static inline void copy_siginfo(struct siginfo *to, struct siginfo *from)
32 32
33#endif 33#endif
34 34
35extern int copy_siginfo_to_user(struct siginfo __user *to, struct siginfo *from); 35extern int copy_siginfo_to_user(struct siginfo __user *to, const struct siginfo *from);
36 36
37#endif 37#endif
diff --git a/include/linux/anon_inodes.h b/include/linux/anon_inodes.h
index cf573c22b81e..8013a45242fe 100644
--- a/include/linux/anon_inodes.h
+++ b/include/linux/anon_inodes.h
@@ -13,9 +13,6 @@ struct file_operations;
13struct file *anon_inode_getfile(const char *name, 13struct file *anon_inode_getfile(const char *name,
14 const struct file_operations *fops, 14 const struct file_operations *fops,
15 void *priv, int flags); 15 void *priv, int flags);
16struct file *anon_inode_getfile_private(const char *name,
17 const struct file_operations *fops,
18 void *priv, int flags);
19int anon_inode_getfd(const char *name, const struct file_operations *fops, 16int anon_inode_getfd(const char *name, const struct file_operations *fops,
20 void *priv, int flags); 17 void *priv, int flags);
21 18
diff --git a/include/linux/ata.h b/include/linux/ata.h
index bf4c69ca76df..f2f4d8da97c0 100644
--- a/include/linux/ata.h
+++ b/include/linux/ata.h
@@ -219,6 +219,7 @@ enum {
219 ATA_CMD_IDLE = 0xE3, /* place in idle power mode */ 219 ATA_CMD_IDLE = 0xE3, /* place in idle power mode */
220 ATA_CMD_EDD = 0x90, /* execute device diagnostic */ 220 ATA_CMD_EDD = 0x90, /* execute device diagnostic */
221 ATA_CMD_DOWNLOAD_MICRO = 0x92, 221 ATA_CMD_DOWNLOAD_MICRO = 0x92,
222 ATA_CMD_DOWNLOAD_MICRO_DMA = 0x93,
222 ATA_CMD_NOP = 0x00, 223 ATA_CMD_NOP = 0x00,
223 ATA_CMD_FLUSH = 0xE7, 224 ATA_CMD_FLUSH = 0xE7,
224 ATA_CMD_FLUSH_EXT = 0xEA, 225 ATA_CMD_FLUSH_EXT = 0xEA,
@@ -268,12 +269,15 @@ enum {
268 ATA_CMD_WRITE_LOG_EXT = 0x3F, 269 ATA_CMD_WRITE_LOG_EXT = 0x3F,
269 ATA_CMD_READ_LOG_DMA_EXT = 0x47, 270 ATA_CMD_READ_LOG_DMA_EXT = 0x47,
270 ATA_CMD_WRITE_LOG_DMA_EXT = 0x57, 271 ATA_CMD_WRITE_LOG_DMA_EXT = 0x57,
272 ATA_CMD_TRUSTED_NONDATA = 0x5B,
271 ATA_CMD_TRUSTED_RCV = 0x5C, 273 ATA_CMD_TRUSTED_RCV = 0x5C,
272 ATA_CMD_TRUSTED_RCV_DMA = 0x5D, 274 ATA_CMD_TRUSTED_RCV_DMA = 0x5D,
273 ATA_CMD_TRUSTED_SND = 0x5E, 275 ATA_CMD_TRUSTED_SND = 0x5E,
274 ATA_CMD_TRUSTED_SND_DMA = 0x5F, 276 ATA_CMD_TRUSTED_SND_DMA = 0x5F,
275 ATA_CMD_PMP_READ = 0xE4, 277 ATA_CMD_PMP_READ = 0xE4,
278 ATA_CMD_PMP_READ_DMA = 0xE9,
276 ATA_CMD_PMP_WRITE = 0xE8, 279 ATA_CMD_PMP_WRITE = 0xE8,
280 ATA_CMD_PMP_WRITE_DMA = 0xEB,
277 ATA_CMD_CONF_OVERLAY = 0xB1, 281 ATA_CMD_CONF_OVERLAY = 0xB1,
278 ATA_CMD_SEC_SET_PASS = 0xF1, 282 ATA_CMD_SEC_SET_PASS = 0xF1,
279 ATA_CMD_SEC_UNLOCK = 0xF2, 283 ATA_CMD_SEC_UNLOCK = 0xF2,
@@ -292,6 +296,9 @@ enum {
292 ATA_CMD_CFA_TRANS_SECT = 0x87, 296 ATA_CMD_CFA_TRANS_SECT = 0x87,
293 ATA_CMD_CFA_ERASE = 0xC0, 297 ATA_CMD_CFA_ERASE = 0xC0,
294 ATA_CMD_CFA_WRITE_MULT_NE = 0xCD, 298 ATA_CMD_CFA_WRITE_MULT_NE = 0xCD,
299 ATA_CMD_REQ_SENSE_DATA = 0x0B,
300 ATA_CMD_SANITIZE_DEVICE = 0xB4,
301
295 /* marked obsolete in the ATA/ATAPI-7 spec */ 302 /* marked obsolete in the ATA/ATAPI-7 spec */
296 ATA_CMD_RESTORE = 0x10, 303 ATA_CMD_RESTORE = 0x10,
297 304
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index 7554fd410bcc..fd8bf3219ef7 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -56,11 +56,12 @@ struct linux_binprm {
56 56
57/* Function parameter for binfmt->coredump */ 57/* Function parameter for binfmt->coredump */
58struct coredump_params { 58struct coredump_params {
59 siginfo_t *siginfo; 59 const siginfo_t *siginfo;
60 struct pt_regs *regs; 60 struct pt_regs *regs;
61 struct file *file; 61 struct file *file;
62 unsigned long limit; 62 unsigned long limit;
63 unsigned long mm_flags; 63 unsigned long mm_flags;
64 loff_t written;
64}; 65};
65 66
66/* 67/*
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 3561d305b1e0..39c1d9469677 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -612,11 +612,6 @@ struct cgroup_subsys {
612 int subsys_id; 612 int subsys_id;
613 int disabled; 613 int disabled;
614 int early_init; 614 int early_init;
615 /*
616 * True if this subsys uses ID. ID is not available before cgroup_init()
617 * (not available in early_init time.)
618 */
619 bool use_id;
620 615
621 /* 616 /*
622 * If %false, this subsystem is properly hierarchical - 617 * If %false, this subsystem is properly hierarchical -
@@ -642,9 +637,6 @@ struct cgroup_subsys {
642 */ 637 */
643 struct cgroupfs_root *root; 638 struct cgroupfs_root *root;
644 struct list_head sibling; 639 struct list_head sibling;
645 /* used when use_id == true */
646 struct idr idr;
647 spinlock_t id_lock;
648 640
649 /* list of cftype_sets */ 641 /* list of cftype_sets */
650 struct list_head cftsets; 642 struct list_head cftsets;
@@ -875,35 +867,6 @@ int css_scan_tasks(struct cgroup_subsys_state *css,
875int cgroup_attach_task_all(struct task_struct *from, struct task_struct *); 867int cgroup_attach_task_all(struct task_struct *from, struct task_struct *);
876int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from); 868int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from);
877 869
878/*
879 * CSS ID is ID for cgroup_subsys_state structs under subsys. This only works
880 * if cgroup_subsys.use_id == true. It can be used for looking up and scanning.
881 * CSS ID is assigned at cgroup allocation (create) automatically
882 * and removed when subsys calls free_css_id() function. This is because
883 * the lifetime of cgroup_subsys_state is subsys's matter.
884 *
885 * Looking up and scanning function should be called under rcu_read_lock().
886 * Taking cgroup_mutex is not necessary for following calls.
887 * But the css returned by this routine can be "not populated yet" or "being
888 * destroyed". The caller should check css and cgroup's status.
889 */
890
891/*
892 * Typically Called at ->destroy(), or somewhere the subsys frees
893 * cgroup_subsys_state.
894 */
895void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css);
896
897/* Find a cgroup_subsys_state which has given ID */
898
899struct cgroup_subsys_state *css_lookup(struct cgroup_subsys *ss, int id);
900
901/* Returns true if root is ancestor of cg */
902bool css_is_ancestor(struct cgroup_subsys_state *cg,
903 const struct cgroup_subsys_state *root);
904
905/* Get id and depth of css */
906unsigned short css_id(struct cgroup_subsys_state *css);
907struct cgroup_subsys_state *css_from_dir(struct dentry *dentry, 870struct cgroup_subsys_state *css_from_dir(struct dentry *dentry,
908 struct cgroup_subsys *ss); 871 struct cgroup_subsys *ss);
909 872
diff --git a/include/linux/compat.h b/include/linux/compat.h
index ada34c92b684..eb8a49d75ab3 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -362,7 +362,7 @@ long compat_get_bitmap(unsigned long *mask, const compat_ulong_t __user *umask,
362long compat_put_bitmap(compat_ulong_t __user *umask, unsigned long *mask, 362long compat_put_bitmap(compat_ulong_t __user *umask, unsigned long *mask,
363 unsigned long bitmap_size); 363 unsigned long bitmap_size);
364int copy_siginfo_from_user32(siginfo_t *to, struct compat_siginfo __user *from); 364int copy_siginfo_from_user32(siginfo_t *to, struct compat_siginfo __user *from);
365int copy_siginfo_to_user32(struct compat_siginfo __user *to, siginfo_t *from); 365int copy_siginfo_to_user32(struct compat_siginfo __user *to, const siginfo_t *from);
366int get_compat_sigevent(struct sigevent *event, 366int get_compat_sigevent(struct sigevent *event,
367 const struct compat_sigevent __user *u_event); 367 const struct compat_sigevent __user *u_event);
368long compat_sys_rt_tgsigqueueinfo(compat_pid_t tgid, compat_pid_t pid, int sig, 368long compat_sys_rt_tgsigqueueinfo(compat_pid_t tgid, compat_pid_t pid, int sig,
diff --git a/include/linux/coredump.h b/include/linux/coredump.h
index a98f1ca60407..d016a121a8c4 100644
--- a/include/linux/coredump.h
+++ b/include/linux/coredump.h
@@ -10,12 +10,14 @@
10 * These are the only things you should do on a core-file: use only these 10 * These are the only things you should do on a core-file: use only these
11 * functions to write out all the necessary info. 11 * functions to write out all the necessary info.
12 */ 12 */
13extern int dump_write(struct file *file, const void *addr, int nr); 13struct coredump_params;
14extern int dump_seek(struct file *file, loff_t off); 14extern int dump_skip(struct coredump_params *cprm, size_t nr);
15extern int dump_emit(struct coredump_params *cprm, const void *addr, int nr);
16extern int dump_align(struct coredump_params *cprm, int align);
15#ifdef CONFIG_COREDUMP 17#ifdef CONFIG_COREDUMP
16extern void do_coredump(siginfo_t *siginfo); 18extern void do_coredump(const siginfo_t *siginfo);
17#else 19#else
18static inline void do_coredump(siginfo_t *siginfo) {} 20static inline void do_coredump(const siginfo_t *siginfo) {}
19#endif 21#endif
20 22
21#endif /* _LINUX_COREDUMP_H */ 23#endif /* _LINUX_COREDUMP_H */
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 59066e0b4ff1..57e87e749a48 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -169,13 +169,13 @@ struct dentry_operations {
169 */ 169 */
170 170
171/* d_flags entries */ 171/* d_flags entries */
172#define DCACHE_OP_HASH 0x0001 172#define DCACHE_OP_HASH 0x00000001
173#define DCACHE_OP_COMPARE 0x0002 173#define DCACHE_OP_COMPARE 0x00000002
174#define DCACHE_OP_REVALIDATE 0x0004 174#define DCACHE_OP_REVALIDATE 0x00000004
175#define DCACHE_OP_DELETE 0x0008 175#define DCACHE_OP_DELETE 0x00000008
176#define DCACHE_OP_PRUNE 0x0010 176#define DCACHE_OP_PRUNE 0x00000010
177 177
178#define DCACHE_DISCONNECTED 0x0020 178#define DCACHE_DISCONNECTED 0x00000020
179 /* This dentry is possibly not currently connected to the dcache tree, in 179 /* This dentry is possibly not currently connected to the dcache tree, in
180 * which case its parent will either be itself, or will have this flag as 180 * which case its parent will either be itself, or will have this flag as
181 * well. nfsd will not use a dentry with this bit set, but will first 181 * well. nfsd will not use a dentry with this bit set, but will first
@@ -186,30 +186,38 @@ struct dentry_operations {
186 * dentry into place and return that dentry rather than the passed one, 186 * dentry into place and return that dentry rather than the passed one,
187 * typically using d_splice_alias. */ 187 * typically using d_splice_alias. */
188 188
189#define DCACHE_REFERENCED 0x0040 /* Recently used, don't discard. */ 189#define DCACHE_REFERENCED 0x00000040 /* Recently used, don't discard. */
190#define DCACHE_RCUACCESS 0x0080 /* Entry has ever been RCU-visible */ 190#define DCACHE_RCUACCESS 0x00000080 /* Entry has ever been RCU-visible */
191 191
192#define DCACHE_CANT_MOUNT 0x0100 192#define DCACHE_CANT_MOUNT 0x00000100
193#define DCACHE_GENOCIDE 0x0200 193#define DCACHE_GENOCIDE 0x00000200
194#define DCACHE_SHRINK_LIST 0x0400 194#define DCACHE_SHRINK_LIST 0x00000400
195 195
196#define DCACHE_OP_WEAK_REVALIDATE 0x0800 196#define DCACHE_OP_WEAK_REVALIDATE 0x00000800
197 197
198#define DCACHE_NFSFS_RENAMED 0x1000 198#define DCACHE_NFSFS_RENAMED 0x00001000
199 /* this dentry has been "silly renamed" and has to be deleted on the last 199 /* this dentry has been "silly renamed" and has to be deleted on the last
200 * dput() */ 200 * dput() */
201#define DCACHE_COOKIE 0x2000 /* For use by dcookie subsystem */ 201#define DCACHE_COOKIE 0x00002000 /* For use by dcookie subsystem */
202#define DCACHE_FSNOTIFY_PARENT_WATCHED 0x4000 202#define DCACHE_FSNOTIFY_PARENT_WATCHED 0x00004000
203 /* Parent inode is watched by some fsnotify listener */ 203 /* Parent inode is watched by some fsnotify listener */
204 204
205#define DCACHE_MOUNTED 0x10000 /* is a mountpoint */ 205#define DCACHE_DENTRY_KILLED 0x00008000
206#define DCACHE_NEED_AUTOMOUNT 0x20000 /* handle automount on this dir */ 206
207#define DCACHE_MANAGE_TRANSIT 0x40000 /* manage transit from this dirent */ 207#define DCACHE_MOUNTED 0x00010000 /* is a mountpoint */
208#define DCACHE_NEED_AUTOMOUNT 0x00020000 /* handle automount on this dir */
209#define DCACHE_MANAGE_TRANSIT 0x00040000 /* manage transit from this dirent */
208#define DCACHE_MANAGED_DENTRY \ 210#define DCACHE_MANAGED_DENTRY \
209 (DCACHE_MOUNTED|DCACHE_NEED_AUTOMOUNT|DCACHE_MANAGE_TRANSIT) 211 (DCACHE_MOUNTED|DCACHE_NEED_AUTOMOUNT|DCACHE_MANAGE_TRANSIT)
210 212
211#define DCACHE_LRU_LIST 0x80000 213#define DCACHE_LRU_LIST 0x00080000
212#define DCACHE_DENTRY_KILLED 0x100000 214
215#define DCACHE_ENTRY_TYPE 0x00700000
216#define DCACHE_MISS_TYPE 0x00000000 /* Negative dentry */
217#define DCACHE_DIRECTORY_TYPE 0x00100000 /* Normal directory */
218#define DCACHE_AUTODIR_TYPE 0x00200000 /* Lookupless directory (presumed automount) */
219#define DCACHE_SYMLINK_TYPE 0x00300000 /* Symlink */
220#define DCACHE_FILE_TYPE 0x00400000 /* Other file type */
213 221
214extern seqlock_t rename_lock; 222extern seqlock_t rename_lock;
215 223
@@ -224,6 +232,7 @@ static inline int dname_external(const struct dentry *dentry)
224extern void d_instantiate(struct dentry *, struct inode *); 232extern void d_instantiate(struct dentry *, struct inode *);
225extern struct dentry * d_instantiate_unique(struct dentry *, struct inode *); 233extern struct dentry * d_instantiate_unique(struct dentry *, struct inode *);
226extern struct dentry * d_materialise_unique(struct dentry *, struct inode *); 234extern struct dentry * d_materialise_unique(struct dentry *, struct inode *);
235extern int d_instantiate_no_diralias(struct dentry *, struct inode *);
227extern void __d_drop(struct dentry *dentry); 236extern void __d_drop(struct dentry *dentry);
228extern void d_drop(struct dentry *dentry); 237extern void d_drop(struct dentry *dentry);
229extern void d_delete(struct dentry *); 238extern void d_delete(struct dentry *);
@@ -393,6 +402,61 @@ static inline bool d_mountpoint(const struct dentry *dentry)
393 return dentry->d_flags & DCACHE_MOUNTED; 402 return dentry->d_flags & DCACHE_MOUNTED;
394} 403}
395 404
405/*
406 * Directory cache entry type accessor functions.
407 */
408static inline void __d_set_type(struct dentry *dentry, unsigned type)
409{
410 dentry->d_flags = (dentry->d_flags & ~DCACHE_ENTRY_TYPE) | type;
411}
412
413static inline void __d_clear_type(struct dentry *dentry)
414{
415 __d_set_type(dentry, DCACHE_MISS_TYPE);
416}
417
418static inline void d_set_type(struct dentry *dentry, unsigned type)
419{
420 spin_lock(&dentry->d_lock);
421 __d_set_type(dentry, type);
422 spin_unlock(&dentry->d_lock);
423}
424
425static inline unsigned __d_entry_type(const struct dentry *dentry)
426{
427 return dentry->d_flags & DCACHE_ENTRY_TYPE;
428}
429
430static inline bool d_is_directory(const struct dentry *dentry)
431{
432 return __d_entry_type(dentry) == DCACHE_DIRECTORY_TYPE;
433}
434
435static inline bool d_is_autodir(const struct dentry *dentry)
436{
437 return __d_entry_type(dentry) == DCACHE_AUTODIR_TYPE;
438}
439
440static inline bool d_is_symlink(const struct dentry *dentry)
441{
442 return __d_entry_type(dentry) == DCACHE_SYMLINK_TYPE;
443}
444
445static inline bool d_is_file(const struct dentry *dentry)
446{
447 return __d_entry_type(dentry) == DCACHE_FILE_TYPE;
448}
449
450static inline bool d_is_negative(const struct dentry *dentry)
451{
452 return __d_entry_type(dentry) == DCACHE_MISS_TYPE;
453}
454
455static inline bool d_is_positive(const struct dentry *dentry)
456{
457 return !d_is_negative(dentry);
458}
459
396extern int sysctl_vfs_cache_pressure; 460extern int sysctl_vfs_cache_pressure;
397 461
398static inline unsigned long vfs_pressure_ratio(unsigned long val) 462static inline unsigned long vfs_pressure_ratio(unsigned long val)
diff --git a/include/linux/elf.h b/include/linux/elf.h
index 40a3c0e01b2b..67a5fa7830c4 100644
--- a/include/linux/elf.h
+++ b/include/linux/elf.h
@@ -39,13 +39,13 @@ extern Elf64_Dyn _DYNAMIC [];
39 39
40/* Optional callbacks to write extra ELF notes. */ 40/* Optional callbacks to write extra ELF notes. */
41struct file; 41struct file;
42struct coredump_params;
42 43
43#ifndef ARCH_HAVE_EXTRA_ELF_NOTES 44#ifndef ARCH_HAVE_EXTRA_ELF_NOTES
44static inline int elf_coredump_extra_notes_size(void) { return 0; } 45static inline int elf_coredump_extra_notes_size(void) { return 0; }
45static inline int elf_coredump_extra_notes_write(struct file *file, 46static inline int elf_coredump_extra_notes_write(struct coredump_params *cprm) { return 0; }
46 loff_t *foffset) { return 0; }
47#else 47#else
48extern int elf_coredump_extra_notes_size(void); 48extern int elf_coredump_extra_notes_size(void);
49extern int elf_coredump_extra_notes_write(struct file *file, loff_t *foffset); 49extern int elf_coredump_extra_notes_write(struct coredump_params *cprm);
50#endif 50#endif
51#endif /* _LINUX_ELF_H */ 51#endif /* _LINUX_ELF_H */
diff --git a/include/linux/elfcore.h b/include/linux/elfcore.h
index cdd3d13efce7..698d51a0eea3 100644
--- a/include/linux/elfcore.h
+++ b/include/linux/elfcore.h
@@ -6,6 +6,8 @@
6#include <asm/elf.h> 6#include <asm/elf.h>
7#include <uapi/linux/elfcore.h> 7#include <uapi/linux/elfcore.h>
8 8
9struct coredump_params;
10
9static inline void elf_core_copy_regs(elf_gregset_t *elfregs, struct pt_regs *regs) 11static inline void elf_core_copy_regs(elf_gregset_t *elfregs, struct pt_regs *regs)
10{ 12{
11#ifdef ELF_CORE_COPY_REGS 13#ifdef ELF_CORE_COPY_REGS
@@ -63,10 +65,9 @@ static inline int elf_core_copy_task_xfpregs(struct task_struct *t, elf_fpxregse
63 */ 65 */
64extern Elf_Half elf_core_extra_phdrs(void); 66extern Elf_Half elf_core_extra_phdrs(void);
65extern int 67extern int
66elf_core_write_extra_phdrs(struct file *file, loff_t offset, size_t *size, 68elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset);
67 unsigned long limit);
68extern int 69extern int
69elf_core_write_extra_data(struct file *file, size_t *size, unsigned long limit); 70elf_core_write_extra_data(struct coredump_params *cprm);
70extern size_t elf_core_extra_data_size(void); 71extern size_t elf_core_extra_data_size(void);
71 72
72#endif /* _LINUX_ELFCORE_H */ 73#endif /* _LINUX_ELFCORE_H */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 955dff5da56a..bf5d574ebdf4 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -623,10 +623,13 @@ static inline int inode_unhashed(struct inode *inode)
623 * 0: the object of the current VFS operation 623 * 0: the object of the current VFS operation
624 * 1: parent 624 * 1: parent
625 * 2: child/target 625 * 2: child/target
626 * 3: quota file 626 * 3: xattr
627 * 4: second non-directory
628 * The last is for certain operations (such as rename) which lock two
629 * non-directories at once.
627 * 630 *
628 * The locking order between these classes is 631 * The locking order between these classes is
629 * parent -> child -> normal -> xattr -> quota 632 * parent -> child -> normal -> xattr -> second non-directory
630 */ 633 */
631enum inode_i_mutex_lock_class 634enum inode_i_mutex_lock_class
632{ 635{
@@ -634,9 +637,12 @@ enum inode_i_mutex_lock_class
634 I_MUTEX_PARENT, 637 I_MUTEX_PARENT,
635 I_MUTEX_CHILD, 638 I_MUTEX_CHILD,
636 I_MUTEX_XATTR, 639 I_MUTEX_XATTR,
637 I_MUTEX_QUOTA 640 I_MUTEX_NONDIR2
638}; 641};
639 642
643void lock_two_nondirectories(struct inode *, struct inode*);
644void unlock_two_nondirectories(struct inode *, struct inode*);
645
640/* 646/*
641 * NOTE: in a 32bit arch with a preemptable kernel and 647 * NOTE: in a 32bit arch with a preemptable kernel and
642 * an UP compile the i_size_read/write must be atomic 648 * an UP compile the i_size_read/write must be atomic
@@ -764,12 +770,7 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index)
764#define FILE_MNT_WRITE_RELEASED 2 770#define FILE_MNT_WRITE_RELEASED 2
765 771
766struct file { 772struct file {
767 /*
768 * fu_list becomes invalid after file_free is called and queued via
769 * fu_rcuhead for RCU freeing
770 */
771 union { 773 union {
772 struct list_head fu_list;
773 struct llist_node fu_llist; 774 struct llist_node fu_llist;
774 struct rcu_head fu_rcuhead; 775 struct rcu_head fu_rcuhead;
775 } f_u; 776 } f_u;
@@ -783,9 +784,6 @@ struct file {
783 * Must not be taken from IRQ context. 784 * Must not be taken from IRQ context.
784 */ 785 */
785 spinlock_t f_lock; 786 spinlock_t f_lock;
786#ifdef CONFIG_SMP
787 int f_sb_list_cpu;
788#endif
789 atomic_long_t f_count; 787 atomic_long_t f_count;
790 unsigned int f_flags; 788 unsigned int f_flags;
791 fmode_t f_mode; 789 fmode_t f_mode;
@@ -882,6 +880,7 @@ static inline int file_check_writeable(struct file *filp)
882 880
883#define FL_POSIX 1 881#define FL_POSIX 1
884#define FL_FLOCK 2 882#define FL_FLOCK 2
883#define FL_DELEG 4 /* NFSv4 delegation */
885#define FL_ACCESS 8 /* not trying to lock, just looking */ 884#define FL_ACCESS 8 /* not trying to lock, just looking */
886#define FL_EXISTS 16 /* when unlocking, test for existence */ 885#define FL_EXISTS 16 /* when unlocking, test for existence */
887#define FL_LEASE 32 /* lease held on this file */ 886#define FL_LEASE 32 /* lease held on this file */
@@ -1023,7 +1022,7 @@ extern int vfs_test_lock(struct file *, struct file_lock *);
1023extern int vfs_lock_file(struct file *, unsigned int, struct file_lock *, struct file_lock *); 1022extern int vfs_lock_file(struct file *, unsigned int, struct file_lock *, struct file_lock *);
1024extern int vfs_cancel_lock(struct file *filp, struct file_lock *fl); 1023extern int vfs_cancel_lock(struct file *filp, struct file_lock *fl);
1025extern int flock_lock_file_wait(struct file *filp, struct file_lock *fl); 1024extern int flock_lock_file_wait(struct file *filp, struct file_lock *fl);
1026extern int __break_lease(struct inode *inode, unsigned int flags); 1025extern int __break_lease(struct inode *inode, unsigned int flags, unsigned int type);
1027extern void lease_get_mtime(struct inode *, struct timespec *time); 1026extern void lease_get_mtime(struct inode *, struct timespec *time);
1028extern int generic_setlease(struct file *, long, struct file_lock **); 1027extern int generic_setlease(struct file *, long, struct file_lock **);
1029extern int vfs_setlease(struct file *, long, struct file_lock **); 1028extern int vfs_setlease(struct file *, long, struct file_lock **);
@@ -1132,7 +1131,7 @@ static inline int flock_lock_file_wait(struct file *filp,
1132 return -ENOLCK; 1131 return -ENOLCK;
1133} 1132}
1134 1133
1135static inline int __break_lease(struct inode *inode, unsigned int mode) 1134static inline int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
1136{ 1135{
1137 return 0; 1136 return 0;
1138} 1137}
@@ -1264,11 +1263,6 @@ struct super_block {
1264 1263
1265 struct list_head s_inodes; /* all inodes */ 1264 struct list_head s_inodes; /* all inodes */
1266 struct hlist_bl_head s_anon; /* anonymous dentries for (nfs) exporting */ 1265 struct hlist_bl_head s_anon; /* anonymous dentries for (nfs) exporting */
1267#ifdef CONFIG_SMP
1268 struct list_head __percpu *s_files;
1269#else
1270 struct list_head s_files;
1271#endif
1272 struct list_head s_mounts; /* list of mounts; _not_ for fs use */ 1266 struct list_head s_mounts; /* list of mounts; _not_ for fs use */
1273 struct block_device *s_bdev; 1267 struct block_device *s_bdev;
1274 struct backing_dev_info *s_bdi; 1268 struct backing_dev_info *s_bdi;
@@ -1330,6 +1324,7 @@ struct super_block {
1330 */ 1324 */
1331 struct list_lru s_dentry_lru ____cacheline_aligned_in_smp; 1325 struct list_lru s_dentry_lru ____cacheline_aligned_in_smp;
1332 struct list_lru s_inode_lru ____cacheline_aligned_in_smp; 1326 struct list_lru s_inode_lru ____cacheline_aligned_in_smp;
1327 struct rcu_head rcu;
1333}; 1328};
1334 1329
1335extern struct timespec current_fs_time(struct super_block *sb); 1330extern struct timespec current_fs_time(struct super_block *sb);
@@ -1458,10 +1453,10 @@ extern int vfs_create(struct inode *, struct dentry *, umode_t, bool);
1458extern int vfs_mkdir(struct inode *, struct dentry *, umode_t); 1453extern int vfs_mkdir(struct inode *, struct dentry *, umode_t);
1459extern int vfs_mknod(struct inode *, struct dentry *, umode_t, dev_t); 1454extern int vfs_mknod(struct inode *, struct dentry *, umode_t, dev_t);
1460extern int vfs_symlink(struct inode *, struct dentry *, const char *); 1455extern int vfs_symlink(struct inode *, struct dentry *, const char *);
1461extern int vfs_link(struct dentry *, struct inode *, struct dentry *); 1456extern int vfs_link(struct dentry *, struct inode *, struct dentry *, struct inode **);
1462extern int vfs_rmdir(struct inode *, struct dentry *); 1457extern int vfs_rmdir(struct inode *, struct dentry *);
1463extern int vfs_unlink(struct inode *, struct dentry *); 1458extern int vfs_unlink(struct inode *, struct dentry *, struct inode **);
1464extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); 1459extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *, struct inode **);
1465 1460
1466/* 1461/*
1467 * VFS dentry helper functions. 1462 * VFS dentry helper functions.
@@ -1875,6 +1870,17 @@ extern struct dentry *mount_pseudo(struct file_system_type *, char *,
1875 (((fops) && try_module_get((fops)->owner) ? (fops) : NULL)) 1870 (((fops) && try_module_get((fops)->owner) ? (fops) : NULL))
1876#define fops_put(fops) \ 1871#define fops_put(fops) \
1877 do { if (fops) module_put((fops)->owner); } while(0) 1872 do { if (fops) module_put((fops)->owner); } while(0)
1873/*
1874 * This one is to be used *ONLY* from ->open() instances.
1875 * fops must be non-NULL, pinned down *and* module dependencies
1876 * should be sufficient to pin the caller down as well.
1877 */
1878#define replace_fops(f, fops) \
1879 do { \
1880 struct file *__file = (f); \
1881 fops_put(__file->f_op); \
1882 BUG_ON(!(__file->f_op = (fops))); \
1883 } while(0)
1878 1884
1879extern int register_filesystem(struct file_system_type *); 1885extern int register_filesystem(struct file_system_type *);
1880extern int unregister_filesystem(struct file_system_type *); 1886extern int unregister_filesystem(struct file_system_type *);
@@ -1899,6 +1905,9 @@ extern bool fs_fully_visible(struct file_system_type *);
1899 1905
1900extern int current_umask(void); 1906extern int current_umask(void);
1901 1907
1908extern void ihold(struct inode * inode);
1909extern void iput(struct inode *);
1910
1902/* /sys/fs */ 1911/* /sys/fs */
1903extern struct kobject *fs_kobj; 1912extern struct kobject *fs_kobj;
1904 1913
@@ -1955,9 +1964,39 @@ static inline int locks_verify_truncate(struct inode *inode,
1955static inline int break_lease(struct inode *inode, unsigned int mode) 1964static inline int break_lease(struct inode *inode, unsigned int mode)
1956{ 1965{
1957 if (inode->i_flock) 1966 if (inode->i_flock)
1958 return __break_lease(inode, mode); 1967 return __break_lease(inode, mode, FL_LEASE);
1968 return 0;
1969}
1970
1971static inline int break_deleg(struct inode *inode, unsigned int mode)
1972{
1973 if (inode->i_flock)
1974 return __break_lease(inode, mode, FL_DELEG);
1959 return 0; 1975 return 0;
1960} 1976}
1977
1978static inline int try_break_deleg(struct inode *inode, struct inode **delegated_inode)
1979{
1980 int ret;
1981
1982 ret = break_deleg(inode, O_WRONLY|O_NONBLOCK);
1983 if (ret == -EWOULDBLOCK && delegated_inode) {
1984 *delegated_inode = inode;
1985 ihold(inode);
1986 }
1987 return ret;
1988}
1989
1990static inline int break_deleg_wait(struct inode **delegated_inode)
1991{
1992 int ret;
1993
1994 ret = break_deleg(*delegated_inode, O_WRONLY);
1995 iput(*delegated_inode);
1996 *delegated_inode = NULL;
1997 return ret;
1998}
1999
1961#else /* !CONFIG_FILE_LOCKING */ 2000#else /* !CONFIG_FILE_LOCKING */
1962static inline int locks_mandatory_locked(struct inode *inode) 2001static inline int locks_mandatory_locked(struct inode *inode)
1963{ 2002{
@@ -1997,6 +2036,22 @@ static inline int break_lease(struct inode *inode, unsigned int mode)
1997 return 0; 2036 return 0;
1998} 2037}
1999 2038
2039static inline int break_deleg(struct inode *inode, unsigned int mode)
2040{
2041 return 0;
2042}
2043
2044static inline int try_break_deleg(struct inode *inode, struct inode **delegated_inode)
2045{
2046 return 0;
2047}
2048
2049static inline int break_deleg_wait(struct inode **delegated_inode)
2050{
2051 BUG();
2052 return 0;
2053}
2054
2000#endif /* CONFIG_FILE_LOCKING */ 2055#endif /* CONFIG_FILE_LOCKING */
2001 2056
2002/* fs/open.c */ 2057/* fs/open.c */
@@ -2223,7 +2278,7 @@ extern void emergency_remount(void);
2223#ifdef CONFIG_BLOCK 2278#ifdef CONFIG_BLOCK
2224extern sector_t bmap(struct inode *, sector_t); 2279extern sector_t bmap(struct inode *, sector_t);
2225#endif 2280#endif
2226extern int notify_change(struct dentry *, struct iattr *); 2281extern int notify_change(struct dentry *, struct iattr *, struct inode **);
2227extern int inode_permission(struct inode *, int); 2282extern int inode_permission(struct inode *, int);
2228extern int generic_permission(struct inode *, int); 2283extern int generic_permission(struct inode *, int);
2229 2284
@@ -2337,8 +2392,6 @@ extern loff_t vfs_llseek(struct file *file, loff_t offset, int whence);
2337extern int inode_init_always(struct super_block *, struct inode *); 2392extern int inode_init_always(struct super_block *, struct inode *);
2338extern void inode_init_once(struct inode *); 2393extern void inode_init_once(struct inode *);
2339extern void address_space_init_once(struct address_space *mapping); 2394extern void address_space_init_once(struct address_space *mapping);
2340extern void ihold(struct inode * inode);
2341extern void iput(struct inode *);
2342extern struct inode * igrab(struct inode *); 2395extern struct inode * igrab(struct inode *);
2343extern ino_t iunique(struct super_block *, ino_t); 2396extern ino_t iunique(struct super_block *, ino_t);
2344extern int inode_needs_sync(struct inode *inode); 2397extern int inode_needs_sync(struct inode *inode);
@@ -2507,8 +2560,10 @@ extern int __page_symlink(struct inode *inode, const char *symname, int len,
2507 int nofs); 2560 int nofs);
2508extern int page_symlink(struct inode *inode, const char *symname, int len); 2561extern int page_symlink(struct inode *inode, const char *symname, int len);
2509extern const struct inode_operations page_symlink_inode_operations; 2562extern const struct inode_operations page_symlink_inode_operations;
2563extern void kfree_put_link(struct dentry *, struct nameidata *, void *);
2510extern int generic_readlink(struct dentry *, char __user *, int); 2564extern int generic_readlink(struct dentry *, char __user *, int);
2511extern void generic_fillattr(struct inode *, struct kstat *); 2565extern void generic_fillattr(struct inode *, struct kstat *);
2566int vfs_getattr_nosec(struct path *path, struct kstat *stat);
2512extern int vfs_getattr(struct path *, struct kstat *); 2567extern int vfs_getattr(struct path *, struct kstat *);
2513void __inode_add_bytes(struct inode *inode, loff_t bytes); 2568void __inode_add_bytes(struct inode *inode, loff_t bytes);
2514void inode_add_bytes(struct inode *inode, loff_t bytes); 2569void inode_add_bytes(struct inode *inode, loff_t bytes);
@@ -2567,6 +2622,7 @@ extern int simple_write_begin(struct file *file, struct address_space *mapping,
2567extern int simple_write_end(struct file *file, struct address_space *mapping, 2622extern int simple_write_end(struct file *file, struct address_space *mapping,
2568 loff_t pos, unsigned len, unsigned copied, 2623 loff_t pos, unsigned len, unsigned copied,
2569 struct page *page, void *fsdata); 2624 struct page *page, void *fsdata);
2625extern struct inode *alloc_anon_inode(struct super_block *);
2570 2626
2571extern struct dentry *simple_lookup(struct inode *, struct dentry *, unsigned int flags); 2627extern struct dentry *simple_lookup(struct inode *, struct dentry *, unsigned int flags);
2572extern ssize_t generic_read_dir(struct file *, char __user *, size_t, loff_t *); 2628extern ssize_t generic_read_dir(struct file *, char __user *, size_t, loff_t *);
diff --git a/include/linux/lglock.h b/include/linux/lglock.h
index 0d24e932db0b..96549abe8842 100644
--- a/include/linux/lglock.h
+++ b/include/linux/lglock.h
@@ -25,16 +25,6 @@
25#include <linux/cpu.h> 25#include <linux/cpu.h>
26#include <linux/notifier.h> 26#include <linux/notifier.h>
27 27
28/* can make br locks by using local lock for read side, global lock for write */
29#define br_lock_init(name) lg_lock_init(name, #name)
30#define br_read_lock(name) lg_local_lock(name)
31#define br_read_unlock(name) lg_local_unlock(name)
32#define br_write_lock(name) lg_global_lock(name)
33#define br_write_unlock(name) lg_global_unlock(name)
34
35#define DEFINE_BRLOCK(name) DEFINE_LGLOCK(name)
36#define DEFINE_STATIC_BRLOCK(name) DEFINE_STATIC_LGLOCK(name)
37
38#ifdef CONFIG_DEBUG_LOCK_ALLOC 28#ifdef CONFIG_DEBUG_LOCK_ALLOC
39#define LOCKDEP_INIT_MAP lockdep_init_map 29#define LOCKDEP_INIT_MAP lockdep_init_map
40#else 30#else
diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h
index cb358355ef43..f7eaf2d60083 100644
--- a/include/linux/miscdevice.h
+++ b/include/linux/miscdevice.h
@@ -31,6 +31,7 @@
31#define I2O_MINOR 166 31#define I2O_MINOR 166
32#define MICROCODE_MINOR 184 32#define MICROCODE_MINOR 184
33#define TUN_MINOR 200 33#define TUN_MINOR 200
34#define CUSE_MINOR 203
34#define MWAVE_MINOR 219 /* ACP/Mwave Modem */ 35#define MWAVE_MINOR 219 /* ACP/Mwave Modem */
35#define MPT_MINOR 220 36#define MPT_MINOR 220
36#define MPT2SAS_MINOR 221 37#define MPT2SAS_MINOR 221
diff --git a/include/linux/mount.h b/include/linux/mount.h
index 38cd98f112a0..371d346fa270 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -49,6 +49,8 @@ struct mnt_namespace;
49 49
50#define MNT_LOCK_READONLY 0x400000 50#define MNT_LOCK_READONLY 0x400000
51#define MNT_LOCKED 0x800000 51#define MNT_LOCKED 0x800000
52#define MNT_DOOMED 0x1000000
53#define MNT_SYNC_UMOUNT 0x2000000
52 54
53struct vfsmount { 55struct vfsmount {
54 struct dentry *mnt_root; /* root of the mounted tree */ 56 struct dentry *mnt_root; /* root of the mounted tree */
diff --git a/include/linux/namei.h b/include/linux/namei.h
index 8e47bc7a1665..492de72560fa 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -16,7 +16,7 @@ struct nameidata {
16 struct path root; 16 struct path root;
17 struct inode *inode; /* path.dentry.d_inode */ 17 struct inode *inode; /* path.dentry.d_inode */
18 unsigned int flags; 18 unsigned int flags;
19 unsigned seq; 19 unsigned seq, m_seq;
20 int last_type; 20 int last_type;
21 unsigned depth; 21 unsigned depth;
22 char *saved_names[MAX_NESTED_LINKS + 1]; 22 char *saved_names[MAX_NESTED_LINKS + 1];
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index c74088ab103b..9e4761caa80c 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -375,22 +375,6 @@ do { \
375# define this_cpu_or(pcp, val) __pcpu_size_call(this_cpu_or_, (pcp), (val)) 375# define this_cpu_or(pcp, val) __pcpu_size_call(this_cpu_or_, (pcp), (val))
376#endif 376#endif
377 377
378#ifndef this_cpu_xor
379# ifndef this_cpu_xor_1
380# define this_cpu_xor_1(pcp, val) _this_cpu_generic_to_op((pcp), (val), ^=)
381# endif
382# ifndef this_cpu_xor_2
383# define this_cpu_xor_2(pcp, val) _this_cpu_generic_to_op((pcp), (val), ^=)
384# endif
385# ifndef this_cpu_xor_4
386# define this_cpu_xor_4(pcp, val) _this_cpu_generic_to_op((pcp), (val), ^=)
387# endif
388# ifndef this_cpu_xor_8
389# define this_cpu_xor_8(pcp, val) _this_cpu_generic_to_op((pcp), (val), ^=)
390# endif
391# define this_cpu_xor(pcp, val) __pcpu_size_call(this_cpu_or_, (pcp), (val))
392#endif
393
394#define _this_cpu_generic_add_return(pcp, val) \ 378#define _this_cpu_generic_add_return(pcp, val) \
395({ \ 379({ \
396 typeof(pcp) ret__; \ 380 typeof(pcp) ret__; \
@@ -629,22 +613,6 @@ do { \
629# define __this_cpu_or(pcp, val) __pcpu_size_call(__this_cpu_or_, (pcp), (val)) 613# define __this_cpu_or(pcp, val) __pcpu_size_call(__this_cpu_or_, (pcp), (val))
630#endif 614#endif
631 615
632#ifndef __this_cpu_xor
633# ifndef __this_cpu_xor_1
634# define __this_cpu_xor_1(pcp, val) __this_cpu_generic_to_op((pcp), (val), ^=)
635# endif
636# ifndef __this_cpu_xor_2
637# define __this_cpu_xor_2(pcp, val) __this_cpu_generic_to_op((pcp), (val), ^=)
638# endif
639# ifndef __this_cpu_xor_4
640# define __this_cpu_xor_4(pcp, val) __this_cpu_generic_to_op((pcp), (val), ^=)
641# endif
642# ifndef __this_cpu_xor_8
643# define __this_cpu_xor_8(pcp, val) __this_cpu_generic_to_op((pcp), (val), ^=)
644# endif
645# define __this_cpu_xor(pcp, val) __pcpu_size_call(__this_cpu_xor_, (pcp), (val))
646#endif
647
648#define __this_cpu_generic_add_return(pcp, val) \ 616#define __this_cpu_generic_add_return(pcp, val) \
649({ \ 617({ \
650 __this_cpu_add(pcp, val); \ 618 __this_cpu_add(pcp, val); \
diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index e2772666f004..7246ef3d4455 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -23,6 +23,7 @@ struct bsd_acct_struct;
23struct pid_namespace { 23struct pid_namespace {
24 struct kref kref; 24 struct kref kref;
25 struct pidmap pidmap[PIDMAP_ENTRIES]; 25 struct pidmap pidmap[PIDMAP_ENTRIES];
26 struct rcu_head rcu;
26 int last_pid; 27 int last_pid;
27 unsigned int nr_hashed; 28 unsigned int nr_hashed;
28 struct task_struct *child_reaper; 29 struct task_struct *child_reaper;
diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index 52ae54828eda..e0dc355fa317 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -36,6 +36,11 @@
36 { CURSEG_COLD_NODE, "Cold NODE" }, \ 36 { CURSEG_COLD_NODE, "Cold NODE" }, \
37 { NO_CHECK_TYPE, "No TYPE" }) 37 { NO_CHECK_TYPE, "No TYPE" })
38 38
39#define show_file_type(type) \
40 __print_symbolic(type, \
41 { 0, "FILE" }, \
42 { 1, "DIR" })
43
39#define show_gc_type(type) \ 44#define show_gc_type(type) \
40 __print_symbolic(type, \ 45 __print_symbolic(type, \
41 { FG_GC, "Foreground GC" }, \ 46 { FG_GC, "Foreground GC" }, \
@@ -623,6 +628,52 @@ TRACE_EVENT(f2fs_do_submit_bio,
623 __entry->size) 628 __entry->size)
624); 629);
625 630
631DECLARE_EVENT_CLASS(f2fs__page,
632
633 TP_PROTO(struct page *page, int type),
634
635 TP_ARGS(page, type),
636
637 TP_STRUCT__entry(
638 __field(dev_t, dev)
639 __field(ino_t, ino)
640 __field(int, type)
641 __field(int, dir)
642 __field(pgoff_t, index)
643 __field(int, dirty)
644 ),
645
646 TP_fast_assign(
647 __entry->dev = page->mapping->host->i_sb->s_dev;
648 __entry->ino = page->mapping->host->i_ino;
649 __entry->type = type;
650 __entry->dir = S_ISDIR(page->mapping->host->i_mode);
651 __entry->index = page->index;
652 __entry->dirty = PageDirty(page);
653 ),
654
655 TP_printk("dev = (%d,%d), ino = %lu, %s, %s, index = %lu, dirty = %d",
656 show_dev_ino(__entry),
657 show_block_type(__entry->type),
658 show_file_type(__entry->dir),
659 (unsigned long)__entry->index,
660 __entry->dirty)
661);
662
663DEFINE_EVENT(f2fs__page, f2fs_set_page_dirty,
664
665 TP_PROTO(struct page *page, int type),
666
667 TP_ARGS(page, type)
668);
669
670DEFINE_EVENT(f2fs__page, f2fs_vm_page_mkwrite,
671
672 TP_PROTO(struct page *page, int type),
673
674 TP_ARGS(page, type)
675);
676
626TRACE_EVENT(f2fs_submit_write_page, 677TRACE_EVENT(f2fs_submit_write_page,
627 678
628 TP_PROTO(struct page *page, block_t blk_addr, int type), 679 TP_PROTO(struct page *page, block_t blk_addr, int type),
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index ae1996d3c539..95827ce2f3c7 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -886,7 +886,7 @@ SYSCALL_DEFINE1(mq_unlink, const char __user *, u_name)
886 err = -ENOENT; 886 err = -ENOENT;
887 } else { 887 } else {
888 ihold(inode); 888 ihold(inode);
889 err = vfs_unlink(dentry->d_parent->d_inode, dentry); 889 err = vfs_unlink(dentry->d_parent->d_inode, dentry, NULL);
890 } 890 }
891 dput(dentry); 891 dput(dentry);
892 892
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 8bd9cfdc70d7..e0839bcd48c8 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -125,38 +125,6 @@ struct cfent {
125}; 125};
126 126
127/* 127/*
128 * CSS ID -- ID per subsys's Cgroup Subsys State(CSS). used only when
129 * cgroup_subsys->use_id != 0.
130 */
131#define CSS_ID_MAX (65535)
132struct css_id {
133 /*
134 * The css to which this ID points. This pointer is set to valid value
135 * after cgroup is populated. If cgroup is removed, this will be NULL.
136 * This pointer is expected to be RCU-safe because destroy()
137 * is called after synchronize_rcu(). But for safe use, css_tryget()
138 * should be used for avoiding race.
139 */
140 struct cgroup_subsys_state __rcu *css;
141 /*
142 * ID of this css.
143 */
144 unsigned short id;
145 /*
146 * Depth in hierarchy which this ID belongs to.
147 */
148 unsigned short depth;
149 /*
150 * ID is freed by RCU. (and lookup routine is RCU safe.)
151 */
152 struct rcu_head rcu_head;
153 /*
154 * Hierarchy of CSS ID belongs to.
155 */
156 unsigned short stack[0]; /* Array of Length (depth+1) */
157};
158
159/*
160 * cgroup_event represents events which userspace want to receive. 128 * cgroup_event represents events which userspace want to receive.
161 */ 129 */
162struct cgroup_event { 130struct cgroup_event {
@@ -387,9 +355,6 @@ struct cgrp_cset_link {
387static struct css_set init_css_set; 355static struct css_set init_css_set;
388static struct cgrp_cset_link init_cgrp_cset_link; 356static struct cgrp_cset_link init_cgrp_cset_link;
389 357
390static int cgroup_init_idr(struct cgroup_subsys *ss,
391 struct cgroup_subsys_state *css);
392
393/* 358/*
394 * css_set_lock protects the list of css_set objects, and the chain of 359 * css_set_lock protects the list of css_set objects, and the chain of
395 * tasks off each css_set. Nests outside task->alloc_lock due to 360 * tasks off each css_set. Nests outside task->alloc_lock due to
@@ -841,8 +806,6 @@ static struct backing_dev_info cgroup_backing_dev_info = {
841 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, 806 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK,
842}; 807};
843 808
844static int alloc_css_id(struct cgroup_subsys_state *child_css);
845
846static struct inode *cgroup_new_inode(umode_t mode, struct super_block *sb) 809static struct inode *cgroup_new_inode(umode_t mode, struct super_block *sb)
847{ 810{
848 struct inode *inode = new_inode(sb); 811 struct inode *inode = new_inode(sb);
@@ -4240,21 +4203,6 @@ static int cgroup_populate_dir(struct cgroup *cgrp, unsigned long subsys_mask)
4240 goto err; 4203 goto err;
4241 } 4204 }
4242 } 4205 }
4243
4244 /* This cgroup is ready now */
4245 for_each_root_subsys(cgrp->root, ss) {
4246 struct cgroup_subsys_state *css = cgroup_css(cgrp, ss);
4247 struct css_id *id = rcu_dereference_protected(css->id, true);
4248
4249 /*
4250 * Update id->css pointer and make this css visible from
4251 * CSS ID functions. This pointer will be dereferened
4252 * from RCU-read-side without locks.
4253 */
4254 if (id)
4255 rcu_assign_pointer(id->css, css);
4256 }
4257
4258 return 0; 4206 return 0;
4259err: 4207err:
4260 cgroup_clear_dir(cgrp, subsys_mask); 4208 cgroup_clear_dir(cgrp, subsys_mask);
@@ -4323,7 +4271,6 @@ static void init_css(struct cgroup_subsys_state *css, struct cgroup_subsys *ss,
4323 css->cgroup = cgrp; 4271 css->cgroup = cgrp;
4324 css->ss = ss; 4272 css->ss = ss;
4325 css->flags = 0; 4273 css->flags = 0;
4326 css->id = NULL;
4327 4274
4328 if (cgrp->parent) 4275 if (cgrp->parent)
4329 css->parent = cgroup_css(cgrp->parent, ss); 4276 css->parent = cgroup_css(cgrp->parent, ss);
@@ -4455,12 +4402,6 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
4455 goto err_free_all; 4402 goto err_free_all;
4456 4403
4457 init_css(css, ss, cgrp); 4404 init_css(css, ss, cgrp);
4458
4459 if (ss->use_id) {
4460 err = alloc_css_id(css);
4461 if (err)
4462 goto err_free_all;
4463 }
4464 } 4405 }
4465 4406
4466 /* 4407 /*
@@ -4925,12 +4866,6 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss)
4925 4866
4926 /* our new subsystem will be attached to the dummy hierarchy. */ 4867 /* our new subsystem will be attached to the dummy hierarchy. */
4927 init_css(css, ss, cgroup_dummy_top); 4868 init_css(css, ss, cgroup_dummy_top);
4928 /* init_idr must be after init_css() because it sets css->id. */
4929 if (ss->use_id) {
4930 ret = cgroup_init_idr(ss, css);
4931 if (ret)
4932 goto err_unload;
4933 }
4934 4869
4935 /* 4870 /*
4936 * Now we need to entangle the css into the existing css_sets. unlike 4871 * Now we need to entangle the css into the existing css_sets. unlike
@@ -4996,9 +4931,6 @@ void cgroup_unload_subsys(struct cgroup_subsys *ss)
4996 4931
4997 offline_css(cgroup_css(cgroup_dummy_top, ss)); 4932 offline_css(cgroup_css(cgroup_dummy_top, ss));
4998 4933
4999 if (ss->use_id)
5000 idr_destroy(&ss->idr);
5001
5002 /* deassign the subsys_id */ 4934 /* deassign the subsys_id */
5003 cgroup_subsys[ss->subsys_id] = NULL; 4935 cgroup_subsys[ss->subsys_id] = NULL;
5004 4936
@@ -5025,8 +4957,7 @@ void cgroup_unload_subsys(struct cgroup_subsys *ss)
5025 /* 4957 /*
5026 * remove subsystem's css from the cgroup_dummy_top and free it - 4958 * remove subsystem's css from the cgroup_dummy_top and free it -
5027 * need to free before marking as null because ss->css_free needs 4959 * need to free before marking as null because ss->css_free needs
5028 * the cgrp->subsys pointer to find their state. note that this 4960 * the cgrp->subsys pointer to find their state.
5029 * also takes care of freeing the css_id.
5030 */ 4961 */
5031 ss->css_free(cgroup_css(cgroup_dummy_top, ss)); 4962 ss->css_free(cgroup_css(cgroup_dummy_top, ss));
5032 RCU_INIT_POINTER(cgroup_dummy_top->subsys[ss->subsys_id], NULL); 4963 RCU_INIT_POINTER(cgroup_dummy_top->subsys[ss->subsys_id], NULL);
@@ -5097,8 +5028,6 @@ int __init cgroup_init(void)
5097 for_each_builtin_subsys(ss, i) { 5028 for_each_builtin_subsys(ss, i) {
5098 if (!ss->early_init) 5029 if (!ss->early_init)
5099 cgroup_init_subsys(ss); 5030 cgroup_init_subsys(ss);
5100 if (ss->use_id)
5101 cgroup_init_idr(ss, init_css_set.subsys[ss->subsys_id]);
5102 } 5031 }
5103 5032
5104 /* allocate id for the dummy hierarchy */ 5033 /* allocate id for the dummy hierarchy */
@@ -5518,181 +5447,6 @@ static int __init cgroup_disable(char *str)
5518} 5447}
5519__setup("cgroup_disable=", cgroup_disable); 5448__setup("cgroup_disable=", cgroup_disable);
5520 5449
5521/*
5522 * Functons for CSS ID.
5523 */
5524
5525/* to get ID other than 0, this should be called when !cgroup_is_dead() */
5526unsigned short css_id(struct cgroup_subsys_state *css)
5527{
5528 struct css_id *cssid;
5529
5530 /*
5531 * This css_id() can return correct value when somone has refcnt
5532 * on this or this is under rcu_read_lock(). Once css->id is allocated,
5533 * it's unchanged until freed.
5534 */
5535 cssid = rcu_dereference_raw(css->id);
5536
5537 if (cssid)
5538 return cssid->id;
5539 return 0;
5540}
5541EXPORT_SYMBOL_GPL(css_id);
5542
5543/**
5544 * css_is_ancestor - test "root" css is an ancestor of "child"
5545 * @child: the css to be tested.
5546 * @root: the css supporsed to be an ancestor of the child.
5547 *
5548 * Returns true if "root" is an ancestor of "child" in its hierarchy. Because
5549 * this function reads css->id, the caller must hold rcu_read_lock().
5550 * But, considering usual usage, the csses should be valid objects after test.
5551 * Assuming that the caller will do some action to the child if this returns
5552 * returns true, the caller must take "child";s reference count.
5553 * If "child" is valid object and this returns true, "root" is valid, too.
5554 */
5555
5556bool css_is_ancestor(struct cgroup_subsys_state *child,
5557 const struct cgroup_subsys_state *root)
5558{
5559 struct css_id *child_id;
5560 struct css_id *root_id;
5561
5562 child_id = rcu_dereference(child->id);
5563 if (!child_id)
5564 return false;
5565 root_id = rcu_dereference(root->id);
5566 if (!root_id)
5567 return false;
5568 if (child_id->depth < root_id->depth)
5569 return false;
5570 if (child_id->stack[root_id->depth] != root_id->id)
5571 return false;
5572 return true;
5573}
5574
5575void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css)
5576{
5577 struct css_id *id = rcu_dereference_protected(css->id, true);
5578
5579 /* When this is called before css_id initialization, id can be NULL */
5580 if (!id)
5581 return;
5582
5583 BUG_ON(!ss->use_id);
5584
5585 rcu_assign_pointer(id->css, NULL);
5586 rcu_assign_pointer(css->id, NULL);
5587 spin_lock(&ss->id_lock);
5588 idr_remove(&ss->idr, id->id);
5589 spin_unlock(&ss->id_lock);
5590 kfree_rcu(id, rcu_head);
5591}
5592EXPORT_SYMBOL_GPL(free_css_id);
5593
5594/*
5595 * This is called by init or create(). Then, calls to this function are
5596 * always serialized (By cgroup_mutex() at create()).
5597 */
5598
5599static struct css_id *get_new_cssid(struct cgroup_subsys *ss, int depth)
5600{
5601 struct css_id *newid;
5602 int ret, size;
5603
5604 BUG_ON(!ss->use_id);
5605
5606 size = sizeof(*newid) + sizeof(unsigned short) * (depth + 1);
5607 newid = kzalloc(size, GFP_KERNEL);
5608 if (!newid)
5609 return ERR_PTR(-ENOMEM);
5610
5611 idr_preload(GFP_KERNEL);
5612 spin_lock(&ss->id_lock);
5613 /* Don't use 0. allocates an ID of 1-65535 */
5614 ret = idr_alloc(&ss->idr, newid, 1, CSS_ID_MAX + 1, GFP_NOWAIT);
5615 spin_unlock(&ss->id_lock);
5616 idr_preload_end();
5617
5618 /* Returns error when there are no free spaces for new ID.*/
5619 if (ret < 0)
5620 goto err_out;
5621
5622 newid->id = ret;
5623 newid->depth = depth;
5624 return newid;
5625err_out:
5626 kfree(newid);
5627 return ERR_PTR(ret);
5628
5629}
5630
5631static int __init_or_module cgroup_init_idr(struct cgroup_subsys *ss,
5632 struct cgroup_subsys_state *rootcss)
5633{
5634 struct css_id *newid;
5635
5636 spin_lock_init(&ss->id_lock);
5637 idr_init(&ss->idr);
5638
5639 newid = get_new_cssid(ss, 0);
5640 if (IS_ERR(newid))
5641 return PTR_ERR(newid);
5642
5643 newid->stack[0] = newid->id;
5644 RCU_INIT_POINTER(newid->css, rootcss);
5645 RCU_INIT_POINTER(rootcss->id, newid);
5646 return 0;
5647}
5648
5649static int alloc_css_id(struct cgroup_subsys_state *child_css)
5650{
5651 struct cgroup_subsys_state *parent_css = css_parent(child_css);
5652 struct css_id *child_id, *parent_id;
5653 int i, depth;
5654
5655 parent_id = rcu_dereference_protected(parent_css->id, true);
5656 depth = parent_id->depth + 1;
5657
5658 child_id = get_new_cssid(child_css->ss, depth);
5659 if (IS_ERR(child_id))
5660 return PTR_ERR(child_id);
5661
5662 for (i = 0; i < depth; i++)
5663 child_id->stack[i] = parent_id->stack[i];
5664 child_id->stack[depth] = child_id->id;
5665 /*
5666 * child_id->css pointer will be set after this cgroup is available
5667 * see cgroup_populate_dir()
5668 */
5669 rcu_assign_pointer(child_css->id, child_id);
5670
5671 return 0;
5672}
5673
5674/**
5675 * css_lookup - lookup css by id
5676 * @ss: cgroup subsys to be looked into.
5677 * @id: the id
5678 *
5679 * Returns pointer to cgroup_subsys_state if there is valid one with id.
5680 * NULL if not. Should be called under rcu_read_lock()
5681 */
5682struct cgroup_subsys_state *css_lookup(struct cgroup_subsys *ss, int id)
5683{
5684 struct css_id *cssid = NULL;
5685
5686 BUG_ON(!ss->use_id);
5687 cssid = idr_find(&ss->idr, id);
5688
5689 if (unlikely(!cssid))
5690 return NULL;
5691
5692 return rcu_dereference(cssid->css);
5693}
5694EXPORT_SYMBOL_GPL(css_lookup);
5695
5696/** 5450/**
5697 * css_from_dir - get corresponding css from the dentry of a cgroup dir 5451 * css_from_dir - get corresponding css from the dentry of a cgroup dir
5698 * @dentry: directory dentry of interest 5452 * @dentry: directory dentry of interest
diff --git a/kernel/elfcore.c b/kernel/elfcore.c
index ff915efef66d..e556751d15d9 100644
--- a/kernel/elfcore.c
+++ b/kernel/elfcore.c
@@ -1,23 +1,19 @@
1#include <linux/elf.h> 1#include <linux/elf.h>
2#include <linux/fs.h> 2#include <linux/fs.h>
3#include <linux/mm.h> 3#include <linux/mm.h>
4 4#include <linux/binfmts.h>
5#include <asm/elf.h>
6
7 5
8Elf_Half __weak elf_core_extra_phdrs(void) 6Elf_Half __weak elf_core_extra_phdrs(void)
9{ 7{
10 return 0; 8 return 0;
11} 9}
12 10
13int __weak elf_core_write_extra_phdrs(struct file *file, loff_t offset, size_t *size, 11int __weak elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset)
14 unsigned long limit)
15{ 12{
16 return 1; 13 return 1;
17} 14}
18 15
19int __weak elf_core_write_extra_data(struct file *file, size_t *size, 16int __weak elf_core_write_extra_data(struct coredump_params *cprm)
20 unsigned long limit)
21{ 17{
22 return 1; 18 return 1;
23} 19}
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index 42086551a24a..06c62de9c711 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -132,6 +132,12 @@ out:
132 return ERR_PTR(err); 132 return ERR_PTR(err);
133} 133}
134 134
135static void delayed_free_pidns(struct rcu_head *p)
136{
137 kmem_cache_free(pid_ns_cachep,
138 container_of(p, struct pid_namespace, rcu));
139}
140
135static void destroy_pid_namespace(struct pid_namespace *ns) 141static void destroy_pid_namespace(struct pid_namespace *ns)
136{ 142{
137 int i; 143 int i;
@@ -140,7 +146,7 @@ static void destroy_pid_namespace(struct pid_namespace *ns)
140 for (i = 0; i < PIDMAP_ENTRIES; i++) 146 for (i = 0; i < PIDMAP_ENTRIES; i++)
141 kfree(ns->pidmap[i].page); 147 kfree(ns->pidmap[i].page);
142 put_user_ns(ns->user_ns); 148 put_user_ns(ns->user_ns);
143 kmem_cache_free(pid_ns_cachep, ns); 149 call_rcu(&ns->rcu, delayed_free_pidns);
144} 150}
145 151
146struct pid_namespace *copy_pid_ns(unsigned long flags, 152struct pid_namespace *copy_pid_ns(unsigned long flags,
diff --git a/kernel/signal.c b/kernel/signal.c
index ded28b91fa53..940b30ee9a30 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2723,7 +2723,7 @@ COMPAT_SYSCALL_DEFINE2(rt_sigpending, compat_sigset_t __user *, uset,
2723 2723
2724#ifndef HAVE_ARCH_COPY_SIGINFO_TO_USER 2724#ifndef HAVE_ARCH_COPY_SIGINFO_TO_USER
2725 2725
2726int copy_siginfo_to_user(siginfo_t __user *to, siginfo_t *from) 2726int copy_siginfo_to_user(siginfo_t __user *to, const siginfo_t *from)
2727{ 2727{
2728 int err; 2728 int err;
2729 2729
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 3d4bb07c7679..f20a57b7faf2 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -500,6 +500,29 @@ static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg)
500 return (memcg == root_mem_cgroup); 500 return (memcg == root_mem_cgroup);
501} 501}
502 502
503/*
504 * We restrict the id in the range of [1, 65535], so it can fit into
505 * an unsigned short.
506 */
507#define MEM_CGROUP_ID_MAX USHRT_MAX
508
509static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg)
510{
511 /*
512 * The ID of the root cgroup is 0, but memcg treat 0 as an
513 * invalid ID, so we return (cgroup_id + 1).
514 */
515 return memcg->css.cgroup->id + 1;
516}
517
518static inline struct mem_cgroup *mem_cgroup_from_id(unsigned short id)
519{
520 struct cgroup_subsys_state *css;
521
522 css = css_from_id(id - 1, &mem_cgroup_subsys);
523 return mem_cgroup_from_css(css);
524}
525
503/* Writing them here to avoid exposing memcg's inner layout */ 526/* Writing them here to avoid exposing memcg's inner layout */
504#if defined(CONFIG_INET) && defined(CONFIG_MEMCG_KMEM) 527#if defined(CONFIG_INET) && defined(CONFIG_MEMCG_KMEM)
505 528
@@ -571,16 +594,11 @@ static void disarm_sock_keys(struct mem_cgroup *memcg)
571#ifdef CONFIG_MEMCG_KMEM 594#ifdef CONFIG_MEMCG_KMEM
572/* 595/*
573 * This will be the memcg's index in each cache's ->memcg_params->memcg_caches. 596 * This will be the memcg's index in each cache's ->memcg_params->memcg_caches.
574 * There are two main reasons for not using the css_id for this: 597 * The main reason for not using cgroup id for this:
575 * 1) this works better in sparse environments, where we have a lot of memcgs, 598 * this works better in sparse environments, where we have a lot of memcgs,
576 * but only a few kmem-limited. Or also, if we have, for instance, 200 599 * but only a few kmem-limited. Or also, if we have, for instance, 200
577 * memcgs, and none but the 200th is kmem-limited, we'd have to have a 600 * memcgs, and none but the 200th is kmem-limited, we'd have to have a
578 * 200 entry array for that. 601 * 200 entry array for that.
579 *
580 * 2) In order not to violate the cgroup API, we would like to do all memory
581 * allocation in ->create(). At that point, we haven't yet allocated the
582 * css_id. Having a separate index prevents us from messing with the cgroup
583 * core for this
584 * 602 *
585 * The current size of the caches array is stored in 603 * The current size of the caches array is stored in
586 * memcg_limited_groups_array_size. It will double each time we have to 604 * memcg_limited_groups_array_size. It will double each time we have to
@@ -595,14 +613,14 @@ int memcg_limited_groups_array_size;
595 * cgroups is a reasonable guess. In the future, it could be a parameter or 613 * cgroups is a reasonable guess. In the future, it could be a parameter or
596 * tunable, but that is strictly not necessary. 614 * tunable, but that is strictly not necessary.
597 * 615 *
598 * MAX_SIZE should be as large as the number of css_ids. Ideally, we could get 616 * MAX_SIZE should be as large as the number of cgrp_ids. Ideally, we could get
599 * this constant directly from cgroup, but it is understandable that this is 617 * this constant directly from cgroup, but it is understandable that this is
600 * better kept as an internal representation in cgroup.c. In any case, the 618 * better kept as an internal representation in cgroup.c. In any case, the
601 * css_id space is not getting any smaller, and we don't have to necessarily 619 * cgrp_id space is not getting any smaller, and we don't have to necessarily
602 * increase ours as well if it increases. 620 * increase ours as well if it increases.
603 */ 621 */
604#define MEMCG_CACHES_MIN_SIZE 4 622#define MEMCG_CACHES_MIN_SIZE 4
605#define MEMCG_CACHES_MAX_SIZE 65535 623#define MEMCG_CACHES_MAX_SIZE MEM_CGROUP_ID_MAX
606 624
607/* 625/*
608 * A lot of the calls to the cache allocation functions are expected to be 626 * A lot of the calls to the cache allocation functions are expected to be
@@ -1409,7 +1427,7 @@ bool __mem_cgroup_same_or_subtree(const struct mem_cgroup *root_memcg,
1409 return true; 1427 return true;
1410 if (!root_memcg->use_hierarchy || !memcg) 1428 if (!root_memcg->use_hierarchy || !memcg)
1411 return false; 1429 return false;
1412 return css_is_ancestor(&memcg->css, &root_memcg->css); 1430 return cgroup_is_descendant(memcg->css.cgroup, root_memcg->css.cgroup);
1413} 1431}
1414 1432
1415static bool mem_cgroup_same_or_subtree(const struct mem_cgroup *root_memcg, 1433static bool mem_cgroup_same_or_subtree(const struct mem_cgroup *root_memcg,
@@ -2827,15 +2845,10 @@ static void __mem_cgroup_cancel_local_charge(struct mem_cgroup *memcg,
2827 */ 2845 */
2828static struct mem_cgroup *mem_cgroup_lookup(unsigned short id) 2846static struct mem_cgroup *mem_cgroup_lookup(unsigned short id)
2829{ 2847{
2830 struct cgroup_subsys_state *css;
2831
2832 /* ID 0 is unused ID */ 2848 /* ID 0 is unused ID */
2833 if (!id) 2849 if (!id)
2834 return NULL; 2850 return NULL;
2835 css = css_lookup(&mem_cgroup_subsys, id); 2851 return mem_cgroup_from_id(id);
2836 if (!css)
2837 return NULL;
2838 return mem_cgroup_from_css(css);
2839} 2852}
2840 2853
2841struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page) 2854struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
@@ -4344,7 +4357,7 @@ mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout)
4344 * css_get() was called in uncharge(). 4357 * css_get() was called in uncharge().
4345 */ 4358 */
4346 if (do_swap_account && swapout && memcg) 4359 if (do_swap_account && swapout && memcg)
4347 swap_cgroup_record(ent, css_id(&memcg->css)); 4360 swap_cgroup_record(ent, mem_cgroup_id(memcg));
4348} 4361}
4349#endif 4362#endif
4350 4363
@@ -4396,8 +4409,8 @@ static int mem_cgroup_move_swap_account(swp_entry_t entry,
4396{ 4409{
4397 unsigned short old_id, new_id; 4410 unsigned short old_id, new_id;
4398 4411
4399 old_id = css_id(&from->css); 4412 old_id = mem_cgroup_id(from);
4400 new_id = css_id(&to->css); 4413 new_id = mem_cgroup_id(to);
4401 4414
4402 if (swap_cgroup_cmpxchg(entry, old_id, new_id) == old_id) { 4415 if (swap_cgroup_cmpxchg(entry, old_id, new_id) == old_id) {
4403 mem_cgroup_swap_statistics(from, false); 4416 mem_cgroup_swap_statistics(from, false);
@@ -6165,7 +6178,6 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg)
6165 size_t size = memcg_size(); 6178 size_t size = memcg_size();
6166 6179
6167 mem_cgroup_remove_from_trees(memcg); 6180 mem_cgroup_remove_from_trees(memcg);
6168 free_css_id(&mem_cgroup_subsys, &memcg->css);
6169 6181
6170 for_each_node(node) 6182 for_each_node(node)
6171 free_mem_cgroup_per_zone_info(memcg, node); 6183 free_mem_cgroup_per_zone_info(memcg, node);
@@ -6268,6 +6280,9 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css)
6268 struct mem_cgroup *parent = mem_cgroup_from_css(css_parent(css)); 6280 struct mem_cgroup *parent = mem_cgroup_from_css(css_parent(css));
6269 int error = 0; 6281 int error = 0;
6270 6282
6283 if (css->cgroup->id > MEM_CGROUP_ID_MAX)
6284 return -ENOSPC;
6285
6271 if (!parent) 6286 if (!parent)
6272 return 0; 6287 return 0;
6273 6288
@@ -6539,7 +6554,7 @@ static enum mc_target_type get_mctgt_type(struct vm_area_struct *vma,
6539 } 6554 }
6540 /* There is a swap entry and a page doesn't exist or isn't charged */ 6555 /* There is a swap entry and a page doesn't exist or isn't charged */
6541 if (ent.val && !ret && 6556 if (ent.val && !ret &&
6542 css_id(&mc.from->css) == lookup_swap_cgroup_id(ent)) { 6557 mem_cgroup_id(mc.from) == lookup_swap_cgroup_id(ent)) {
6543 ret = MC_TARGET_SWAP; 6558 ret = MC_TARGET_SWAP;
6544 if (target) 6559 if (target)
6545 target->ent = ent; 6560 target->ent = ent;
@@ -6959,7 +6974,6 @@ struct cgroup_subsys mem_cgroup_subsys = {
6959 .bind = mem_cgroup_bind, 6974 .bind = mem_cgroup_bind,
6960 .base_cftypes = mem_cgroup_files, 6975 .base_cftypes = mem_cgroup_files,
6961 .early_init = 0, 6976 .early_init = 0,
6962 .use_id = 1,
6963}; 6977};
6964 6978
6965#ifdef CONFIG_MEMCG_SWAP 6979#ifdef CONFIG_MEMCG_SWAP
diff --git a/mm/memory.c b/mm/memory.c
index 15744b2cf919..bf8665849a5f 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -679,7 +679,7 @@ static void print_bad_pte(struct vm_area_struct *vma, unsigned long addr,
679 if (vma->vm_ops) 679 if (vma->vm_ops)
680 printk(KERN_ALERT "vma->vm_ops->fault: %pSR\n", 680 printk(KERN_ALERT "vma->vm_ops->fault: %pSR\n",
681 vma->vm_ops->fault); 681 vma->vm_ops->fault);
682 if (vma->vm_file && vma->vm_file->f_op) 682 if (vma->vm_file)
683 printk(KERN_ALERT "vma->vm_file->f_op->mmap: %pSR\n", 683 printk(KERN_ALERT "vma->vm_file->f_op->mmap: %pSR\n",
684 vma->vm_file->f_op->mmap); 684 vma->vm_file->f_op->mmap);
685 dump_stack(); 685 dump_stack();
diff --git a/mm/mmap.c b/mm/mmap.c
index 803048e9c568..5a6baddde15d 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1297,7 +1297,7 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
1297 vm_flags &= ~VM_MAYEXEC; 1297 vm_flags &= ~VM_MAYEXEC;
1298 } 1298 }
1299 1299
1300 if (!file->f_op || !file->f_op->mmap) 1300 if (!file->f_op->mmap)
1301 return -ENODEV; 1301 return -ENODEV;
1302 if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP)) 1302 if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))
1303 return -EINVAL; 1303 return -EINVAL;
@@ -1949,7 +1949,7 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
1949 return -ENOMEM; 1949 return -ENOMEM;
1950 1950
1951 get_area = current->mm->get_unmapped_area; 1951 get_area = current->mm->get_unmapped_area;
1952 if (file && file->f_op && file->f_op->get_unmapped_area) 1952 if (file && file->f_op->get_unmapped_area)
1953 get_area = file->f_op->get_unmapped_area; 1953 get_area = file->f_op->get_unmapped_area;
1954 addr = get_area(file, addr, len, pgoff, flags); 1954 addr = get_area(file, addr, len, pgoff, flags);
1955 if (IS_ERR_VALUE(addr)) 1955 if (IS_ERR_VALUE(addr))
diff --git a/mm/nommu.c b/mm/nommu.c
index d8a957bb9e31..fec093adad9c 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -937,7 +937,7 @@ static int validate_mmap_request(struct file *file,
937 struct address_space *mapping; 937 struct address_space *mapping;
938 938
939 /* files must support mmap */ 939 /* files must support mmap */
940 if (!file->f_op || !file->f_op->mmap) 940 if (!file->f_op->mmap)
941 return -ENODEV; 941 return -ENODEV;
942 942
943 /* work out if what we've got could possibly be shared 943 /* work out if what we've got could possibly be shared
diff --git a/mm/percpu.c b/mm/percpu.c
index 8c8e08f3a692..0d10defe951e 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1706,8 +1706,9 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
1706 1706
1707out_free_areas: 1707out_free_areas:
1708 for (group = 0; group < ai->nr_groups; group++) 1708 for (group = 0; group < ai->nr_groups; group++)
1709 free_fn(areas[group], 1709 if (areas[group])
1710 ai->groups[group].nr_units * ai->unit_size); 1710 free_fn(areas[group],
1711 ai->groups[group].nr_units * ai->unit_size);
1711out_free: 1712out_free:
1712 pcpu_free_alloc_info(ai); 1713 pcpu_free_alloc_info(ai);
1713 if (areas) 1714 if (areas)
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 3ffda1b3799b..9321a7763067 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -244,10 +244,10 @@ p9_fd_poll(struct p9_client *client, struct poll_table_struct *pt)
244 if (!ts) 244 if (!ts)
245 return -EREMOTEIO; 245 return -EREMOTEIO;
246 246
247 if (!ts->rd->f_op || !ts->rd->f_op->poll) 247 if (!ts->rd->f_op->poll)
248 return -EIO; 248 return -EIO;
249 249
250 if (!ts->wr->f_op || !ts->wr->f_op->poll) 250 if (!ts->wr->f_op->poll)
251 return -EIO; 251 return -EIO;
252 252
253 ret = ts->rd->f_op->poll(ts->rd, pt); 253 ret = ts->rd->f_op->poll(ts->rd, pt);
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index f94567b45bb3..d0d14a04dce1 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -519,8 +519,8 @@ static int __rpc_create_common(struct inode *dir, struct dentry *dentry,
519 d_add(dentry, inode); 519 d_add(dentry, inode);
520 return 0; 520 return 0;
521out_err: 521out_err:
522 printk(KERN_WARNING "%s: %s failed to allocate inode for dentry %s\n", 522 printk(KERN_WARNING "%s: %s failed to allocate inode for dentry %pd\n",
523 __FILE__, __func__, dentry->d_name.name); 523 __FILE__, __func__, dentry);
524 dput(dentry); 524 dput(dentry);
525 return -ENOMEM; 525 return -ENOMEM;
526} 526}
@@ -755,8 +755,8 @@ static int rpc_populate(struct dentry *parent,
755out_bad: 755out_bad:
756 __rpc_depopulate(parent, files, start, eof); 756 __rpc_depopulate(parent, files, start, eof);
757 mutex_unlock(&dir->i_mutex); 757 mutex_unlock(&dir->i_mutex);
758 printk(KERN_WARNING "%s: %s failed to populate directory %s\n", 758 printk(KERN_WARNING "%s: %s failed to populate directory %pd\n",
759 __FILE__, __func__, parent->d_name.name); 759 __FILE__, __func__, parent);
760 return err; 760 return err;
761} 761}
762 762
@@ -852,8 +852,8 @@ out:
852 return dentry; 852 return dentry;
853out_err: 853out_err:
854 dentry = ERR_PTR(err); 854 dentry = ERR_PTR(err);
855 printk(KERN_WARNING "%s: %s() failed to create pipe %s/%s (errno = %d)\n", 855 printk(KERN_WARNING "%s: %s() failed to create pipe %pd/%s (errno = %d)\n",
856 __FILE__, __func__, parent->d_name.name, name, 856 __FILE__, __func__, parent, name,
857 err); 857 err);
858 goto out; 858 goto out;
859} 859}
diff --git a/security/device_cgroup.c b/security/device_cgroup.c
index c123628d3f84..7c2a0a71049e 100644
--- a/security/device_cgroup.c
+++ b/security/device_cgroup.c
@@ -63,16 +63,6 @@ static inline struct dev_cgroup *task_devcgroup(struct task_struct *task)
63 63
64struct cgroup_subsys devices_subsys; 64struct cgroup_subsys devices_subsys;
65 65
66static int devcgroup_can_attach(struct cgroup_subsys_state *new_css,
67 struct cgroup_taskset *set)
68{
69 struct task_struct *task = cgroup_taskset_first(set);
70
71 if (current != task && !capable(CAP_SYS_ADMIN))
72 return -EPERM;
73 return 0;
74}
75
76/* 66/*
77 * called under devcgroup_mutex 67 * called under devcgroup_mutex
78 */ 68 */
@@ -697,7 +687,6 @@ static struct cftype dev_cgroup_files[] = {
697 687
698struct cgroup_subsys devices_subsys = { 688struct cgroup_subsys devices_subsys = {
699 .name = "devices", 689 .name = "devices",
700 .can_attach = devcgroup_can_attach,
701 .css_alloc = devcgroup_css_alloc, 690 .css_alloc = devcgroup_css_alloc,
702 .css_free = devcgroup_css_free, 691 .css_free = devcgroup_css_free,
703 .css_online = devcgroup_online, 692 .css_online = devcgroup_online,
diff --git a/sound/core/sound.c b/sound/core/sound.c
index f002bd911dae..437c25ea6403 100644
--- a/sound/core/sound.c
+++ b/sound/core/sound.c
@@ -153,7 +153,7 @@ static int snd_open(struct inode *inode, struct file *file)
153{ 153{
154 unsigned int minor = iminor(inode); 154 unsigned int minor = iminor(inode);
155 struct snd_minor *mptr = NULL; 155 struct snd_minor *mptr = NULL;
156 const struct file_operations *old_fops; 156 const struct file_operations *new_fops;
157 int err = 0; 157 int err = 0;
158 158
159 if (minor >= ARRAY_SIZE(snd_minors)) 159 if (minor >= ARRAY_SIZE(snd_minors))
@@ -167,24 +167,14 @@ static int snd_open(struct inode *inode, struct file *file)
167 return -ENODEV; 167 return -ENODEV;
168 } 168 }
169 } 169 }
170 old_fops = file->f_op; 170 new_fops = fops_get(mptr->f_ops);
171 file->f_op = fops_get(mptr->f_ops);
172 if (file->f_op == NULL) {
173 file->f_op = old_fops;
174 err = -ENODEV;
175 }
176 mutex_unlock(&sound_mutex); 171 mutex_unlock(&sound_mutex);
177 if (err < 0) 172 if (!new_fops)
178 return err; 173 return -ENODEV;
174 replace_fops(file, new_fops);
179 175
180 if (file->f_op->open) { 176 if (file->f_op->open)
181 err = file->f_op->open(inode, file); 177 err = file->f_op->open(inode, file);
182 if (err) {
183 fops_put(file->f_op);
184 file->f_op = fops_get(old_fops);
185 }
186 }
187 fops_put(old_fops);
188 return err; 178 return err;
189} 179}
190 180
diff --git a/sound/sound_core.c b/sound/sound_core.c
index 45759f4cca75..11e953a1fa45 100644
--- a/sound/sound_core.c
+++ b/sound/sound_core.c
@@ -626,31 +626,20 @@ static int soundcore_open(struct inode *inode, struct file *file)
626 if (s) 626 if (s)
627 new_fops = fops_get(s->unit_fops); 627 new_fops = fops_get(s->unit_fops);
628 } 628 }
629 spin_unlock(&sound_loader_lock);
629 if (new_fops) { 630 if (new_fops) {
630 /* 631 /*
631 * We rely upon the fact that we can't be unloaded while the 632 * We rely upon the fact that we can't be unloaded while the
632 * subdriver is there, so if ->open() is successful we can 633 * subdriver is there.
633 * safely drop the reference counter and if it is not we can
634 * revert to old ->f_op. Ugly, indeed, but that's the cost of
635 * switching ->f_op in the first place.
636 */ 634 */
637 int err = 0; 635 int err = 0;
638 const struct file_operations *old_fops = file->f_op; 636 replace_fops(file, new_fops);
639 file->f_op = new_fops;
640 spin_unlock(&sound_loader_lock);
641 637
642 if (file->f_op->open) 638 if (file->f_op->open)
643 err = file->f_op->open(inode,file); 639 err = file->f_op->open(inode,file);
644 640
645 if (err) {
646 fops_put(file->f_op);
647 file->f_op = fops_get(old_fops);
648 }
649
650 fops_put(old_fops);
651 return err; 641 return err;
652 } 642 }
653 spin_unlock(&sound_loader_lock);
654 return -ENODEV; 643 return -ENODEV;
655} 644}
656 645