aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig74
-rw-r--r--fs/aio.c2
-rw-r--r--fs/anon_inodes.c11
-rw-r--r--fs/autofs4/autofs_i.h28
-rw-r--r--fs/autofs4/expire.c91
-rw-r--r--fs/autofs4/inode.c33
-rw-r--r--fs/autofs4/root.c589
-rw-r--r--fs/autofs4/waitq.c267
-rw-r--r--fs/binfmt_elf.c71
-rw-r--r--fs/binfmt_elf_fdpic.c26
-rw-r--r--fs/binfmt_misc.c20
-rw-r--r--fs/coda/coda_linux.c6
-rw-r--r--fs/coda/psdev.c4
-rw-r--r--fs/coda/upcall.c15
-rw-r--r--fs/compat.c22
-rw-r--r--fs/compat_ioctl.c3
-rw-r--r--fs/dcache.c335
-rw-r--r--fs/dlm/plock.c2
-rw-r--r--fs/dquot.c129
-rw-r--r--fs/ecryptfs/Makefile2
-rw-r--r--fs/ecryptfs/crypto.c37
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h23
-rw-r--r--fs/ecryptfs/file.c17
-rw-r--r--fs/ecryptfs/inode.c31
-rw-r--r--fs/ecryptfs/keystore.c9
-rw-r--r--fs/ecryptfs/kthread.c203
-rw-r--r--fs/ecryptfs/main.c79
-rw-r--r--fs/ecryptfs/miscdev.c59
-rw-r--r--fs/ecryptfs/mmap.c11
-rw-r--r--fs/eventfd.c17
-rw-r--r--fs/eventpoll.c30
-rw-r--r--fs/exec.c143
-rw-r--r--fs/ext2/super.c1
-rw-r--r--fs/ext2/xattr_security.c2
-rw-r--r--fs/ext2/xattr_trusted.c4
-rw-r--r--fs/ext2/xattr_user.c4
-rw-r--r--fs/ext3/dir.c14
-rw-r--r--fs/ext3/ialloc.c9
-rw-r--r--fs/ext3/inode.c46
-rw-r--r--fs/ext3/namei.c26
-rw-r--r--fs/ext3/super.c78
-rw-r--r--fs/ext3/xattr_security.c2
-rw-r--r--fs/ext3/xattr_trusted.c4
-rw-r--r--fs/ext3/xattr_user.c4
-rw-r--r--fs/fat/dir.c229
-rw-r--r--fs/fat/inode.c34
-rw-r--r--fs/fat/misc.c10
-rw-r--r--fs/fcntl.c15
-rw-r--r--fs/fuse/dir.c139
-rw-r--r--fs/fuse/file.c11
-rw-r--r--fs/fuse/fuse_i.h10
-rw-r--r--fs/fuse/inode.c177
-rw-r--r--fs/hfs/bitmap.c8
-rw-r--r--fs/hfs/btree.c2
-rw-r--r--fs/hfs/extent.c14
-rw-r--r--fs/hfs/hfs_fs.h5
-rw-r--r--fs/hfs/inode.c4
-rw-r--r--fs/hfs/super.c2
-rw-r--r--fs/hfsplus/extents.c14
-rw-r--r--fs/hfsplus/hfsplus_fs.h3
-rw-r--r--fs/hfsplus/inode.c4
-rw-r--r--fs/hfsplus/super.c2
-rw-r--r--fs/hugetlbfs/inode.c101
-rw-r--r--fs/inotify_user.c18
-rw-r--r--fs/isofs/rock.c22
-rw-r--r--fs/jbd/commit.c64
-rw-r--r--fs/jbd/journal.c8
-rw-r--r--fs/jbd/revoke.c163
-rw-r--r--fs/jbd/transaction.c57
-rw-r--r--fs/jfs/super.c1
-rw-r--r--fs/lockd/clntproc.c10
-rw-r--r--fs/lockd/svclock.c13
-rw-r--r--fs/locks.c90
-rw-r--r--fs/minix/inode.c3
-rw-r--r--fs/minix/minix.h6
-rw-r--r--fs/minix/namei.c24
-rw-r--r--fs/msdos/namei.c21
-rw-r--r--fs/nfs/nfsroot.c2
-rw-r--r--fs/nfsd/lockd.c13
-rw-r--r--fs/open.c3
-rw-r--r--fs/partitions/check.c38
-rw-r--r--fs/partitions/efi.c42
-rw-r--r--fs/partitions/ldm.c70
-rw-r--r--fs/partitions/ldm.h5
-rw-r--r--fs/pipe.c35
-rw-r--r--fs/proc/Kconfig59
-rw-r--r--fs/proc/base.c86
-rw-r--r--fs/proc/generic.c14
-rw-r--r--fs/proc/inode.c81
-rw-r--r--fs/proc/internal.h8
-rw-r--r--fs/proc/kcore.c10
-rw-r--r--fs/proc/kmsg.c2
-rw-r--r--fs/proc/proc_misc.c19
-rw-r--r--fs/quota.c18
-rw-r--r--fs/quota_v1.c1
-rw-r--r--fs/quota_v2.c1
-rw-r--r--fs/reiserfs/journal.c42
-rw-r--r--fs/reiserfs/super.c124
-rw-r--r--fs/reiserfs/xattr_security.c2
-rw-r--r--fs/reiserfs/xattr_trusted.c2
-rw-r--r--fs/reiserfs/xattr_user.c2
-rw-r--r--fs/signalfd.c19
-rw-r--r--fs/smbfs/cache.c1
-rw-r--r--fs/smbfs/proc.c1
-rw-r--r--fs/super.c1
-rw-r--r--fs/sync.c3
-rw-r--r--fs/timerfd.c9
-rw-r--r--fs/ufs/super.c3
-rw-r--r--fs/vfat/namei.c2
109 files changed, 2814 insertions, 1771 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index 37db79a2ff95..97e3bdedb1e6 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -902,65 +902,7 @@ endif # BLOCK
902 902
903menu "Pseudo filesystems" 903menu "Pseudo filesystems"
904 904
905config PROC_FS 905source "fs/proc/Kconfig"
906 bool "/proc file system support" if EMBEDDED
907 default y
908 help
909 This is a virtual file system providing information about the status
910 of the system. "Virtual" means that it doesn't take up any space on
911 your hard disk: the files are created on the fly by the kernel when
912 you try to access them. Also, you cannot read the files with older
913 version of the program less: you need to use more or cat.
914
915 It's totally cool; for example, "cat /proc/interrupts" gives
916 information about what the different IRQs are used for at the moment
917 (there is a small number of Interrupt ReQuest lines in your computer
918 that are used by the attached devices to gain the CPU's attention --
919 often a source of trouble if two devices are mistakenly configured
920 to use the same IRQ). The program procinfo to display some
921 information about your system gathered from the /proc file system.
922
923 Before you can use the /proc file system, it has to be mounted,
924 meaning it has to be given a location in the directory hierarchy.
925 That location should be /proc. A command such as "mount -t proc proc
926 /proc" or the equivalent line in /etc/fstab does the job.
927
928 The /proc file system is explained in the file
929 <file:Documentation/filesystems/proc.txt> and on the proc(5) manpage
930 ("man 5 proc").
931
932 This option will enlarge your kernel by about 67 KB. Several
933 programs depend on this, so everyone should say Y here.
934
935config PROC_KCORE
936 bool "/proc/kcore support" if !ARM
937 depends on PROC_FS && MMU
938
939config PROC_VMCORE
940 bool "/proc/vmcore support (EXPERIMENTAL)"
941 depends on PROC_FS && CRASH_DUMP
942 default y
943 help
944 Exports the dump image of crashed kernel in ELF format.
945
946config PROC_SYSCTL
947 bool "Sysctl support (/proc/sys)" if EMBEDDED
948 depends on PROC_FS
949 select SYSCTL
950 default y
951 ---help---
952 The sysctl interface provides a means of dynamically changing
953 certain kernel parameters and variables on the fly without requiring
954 a recompile of the kernel or reboot of the system. The primary
955 interface is through /proc/sys. If you say Y here a tree of
956 modifiable sysctl entries will be generated beneath the
957 /proc/sys directory. They are explained in the files
958 in <file:Documentation/sysctl/>. Note that enabling this
959 option will enlarge the kernel by at least 8 KB.
960
961 As it is generally a good thing, you should say Y here unless
962 building a kernel for install/rescue disks or your system is very
963 limited in memory.
964 906
965config SYSFS 907config SYSFS
966 bool "sysfs file system support" if EMBEDDED 908 bool "sysfs file system support" if EMBEDDED
@@ -2093,20 +2035,6 @@ config CODA_FS
2093 To compile the coda client support as a module, choose M here: the 2035 To compile the coda client support as a module, choose M here: the
2094 module will be called coda. 2036 module will be called coda.
2095 2037
2096config CODA_FS_OLD_API
2097 bool "Use 96-bit Coda file identifiers"
2098 depends on CODA_FS
2099 help
2100 A new kernel-userspace API had to be introduced for Coda v6.0
2101 to support larger 128-bit file identifiers as needed by the
2102 new realms implementation.
2103
2104 However this new API is not backward compatible with older
2105 clients. If you really need to run the old Coda userspace
2106 cache manager then say Y.
2107
2108 For most cases you probably want to say N.
2109
2110config AFS_FS 2038config AFS_FS
2111 tristate "Andrew File System support (AFS) (EXPERIMENTAL)" 2039 tristate "Andrew File System support (AFS) (EXPERIMENTAL)"
2112 depends on INET && EXPERIMENTAL 2040 depends on INET && EXPERIMENTAL
diff --git a/fs/aio.c b/fs/aio.c
index 0fb3117ddd93..0051fd94b44e 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -586,7 +586,6 @@ static void use_mm(struct mm_struct *mm)
586 struct task_struct *tsk = current; 586 struct task_struct *tsk = current;
587 587
588 task_lock(tsk); 588 task_lock(tsk);
589 tsk->flags |= PF_BORROWED_MM;
590 active_mm = tsk->active_mm; 589 active_mm = tsk->active_mm;
591 atomic_inc(&mm->mm_count); 590 atomic_inc(&mm->mm_count);
592 tsk->mm = mm; 591 tsk->mm = mm;
@@ -610,7 +609,6 @@ static void unuse_mm(struct mm_struct *mm)
610 struct task_struct *tsk = current; 609 struct task_struct *tsk = current;
611 610
612 task_lock(tsk); 611 task_lock(tsk);
613 tsk->flags &= ~PF_BORROWED_MM;
614 tsk->mm = NULL; 612 tsk->mm = NULL;
615 /* active_mm is still 'mm' */ 613 /* active_mm is still 'mm' */
616 enter_lazy_tlb(mm, tsk); 614 enter_lazy_tlb(mm, tsk);
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 977ef208c051..3662dd44896b 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -58,8 +58,9 @@ static struct dentry_operations anon_inodefs_dentry_operations = {
58 * of the file 58 * of the file
59 * 59 *
60 * @name: [in] name of the "class" of the new file 60 * @name: [in] name of the "class" of the new file
61 * @fops [in] file operations for the new file 61 * @fops: [in] file operations for the new file
62 * @priv [in] private data for the new file (will be file's private_data) 62 * @priv: [in] private data for the new file (will be file's private_data)
63 * @flags: [in] flags
63 * 64 *
64 * Creates a new file by hooking it on a single inode. This is useful for files 65 * Creates a new file by hooking it on a single inode. This is useful for files
65 * that do not need to have a full-fledged inode in order to operate correctly. 66 * that do not need to have a full-fledged inode in order to operate correctly.
@@ -68,7 +69,7 @@ static struct dentry_operations anon_inodefs_dentry_operations = {
68 * setup. Returns new descriptor or -error. 69 * setup. Returns new descriptor or -error.
69 */ 70 */
70int anon_inode_getfd(const char *name, const struct file_operations *fops, 71int anon_inode_getfd(const char *name, const struct file_operations *fops,
71 void *priv) 72 void *priv, int flags)
72{ 73{
73 struct qstr this; 74 struct qstr this;
74 struct dentry *dentry; 75 struct dentry *dentry;
@@ -78,7 +79,7 @@ int anon_inode_getfd(const char *name, const struct file_operations *fops,
78 if (IS_ERR(anon_inode_inode)) 79 if (IS_ERR(anon_inode_inode))
79 return -ENODEV; 80 return -ENODEV;
80 81
81 error = get_unused_fd(); 82 error = get_unused_fd_flags(flags);
82 if (error < 0) 83 if (error < 0)
83 return error; 84 return error;
84 fd = error; 85 fd = error;
@@ -115,7 +116,7 @@ int anon_inode_getfd(const char *name, const struct file_operations *fops,
115 file->f_mapping = anon_inode_inode->i_mapping; 116 file->f_mapping = anon_inode_inode->i_mapping;
116 117
117 file->f_pos = 0; 118 file->f_pos = 0;
118 file->f_flags = O_RDWR; 119 file->f_flags = O_RDWR | (flags & O_NONBLOCK);
119 file->f_version = 0; 120 file->f_version = 0;
120 file->private_data = priv; 121 file->private_data = priv;
121 122
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index c3d352d7fa93..69a2f5c92319 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -52,7 +52,10 @@ struct autofs_info {
52 52
53 int flags; 53 int flags;
54 54
55 struct list_head rehash; 55 struct completion expire_complete;
56
57 struct list_head active;
58 struct list_head expiring;
56 59
57 struct autofs_sb_info *sbi; 60 struct autofs_sb_info *sbi;
58 unsigned long last_used; 61 unsigned long last_used;
@@ -68,15 +71,14 @@ struct autofs_info {
68}; 71};
69 72
70#define AUTOFS_INF_EXPIRING (1<<0) /* dentry is in the process of expiring */ 73#define AUTOFS_INF_EXPIRING (1<<0) /* dentry is in the process of expiring */
74#define AUTOFS_INF_MOUNTPOINT (1<<1) /* mountpoint status for direct expire */
71 75
72struct autofs_wait_queue { 76struct autofs_wait_queue {
73 wait_queue_head_t queue; 77 wait_queue_head_t queue;
74 struct autofs_wait_queue *next; 78 struct autofs_wait_queue *next;
75 autofs_wqt_t wait_queue_token; 79 autofs_wqt_t wait_queue_token;
76 /* We use the following to see what we are waiting for */ 80 /* We use the following to see what we are waiting for */
77 unsigned int hash; 81 struct qstr name;
78 unsigned int len;
79 char *name;
80 u32 dev; 82 u32 dev;
81 u64 ino; 83 u64 ino;
82 uid_t uid; 84 uid_t uid;
@@ -85,7 +87,7 @@ struct autofs_wait_queue {
85 pid_t tgid; 87 pid_t tgid;
86 /* This is for status reporting upon return */ 88 /* This is for status reporting upon return */
87 int status; 89 int status;
88 atomic_t wait_ctr; 90 unsigned int wait_ctr;
89}; 91};
90 92
91#define AUTOFS_SBI_MAGIC 0x6d4a556d 93#define AUTOFS_SBI_MAGIC 0x6d4a556d
@@ -112,8 +114,9 @@ struct autofs_sb_info {
112 struct mutex wq_mutex; 114 struct mutex wq_mutex;
113 spinlock_t fs_lock; 115 spinlock_t fs_lock;
114 struct autofs_wait_queue *queues; /* Wait queue pointer */ 116 struct autofs_wait_queue *queues; /* Wait queue pointer */
115 spinlock_t rehash_lock; 117 spinlock_t lookup_lock;
116 struct list_head rehash_list; 118 struct list_head active_list;
119 struct list_head expiring_list;
117}; 120};
118 121
119static inline struct autofs_sb_info *autofs4_sbi(struct super_block *sb) 122static inline struct autofs_sb_info *autofs4_sbi(struct super_block *sb)
@@ -138,18 +141,14 @@ static inline int autofs4_oz_mode(struct autofs_sb_info *sbi) {
138static inline int autofs4_ispending(struct dentry *dentry) 141static inline int autofs4_ispending(struct dentry *dentry)
139{ 142{
140 struct autofs_info *inf = autofs4_dentry_ino(dentry); 143 struct autofs_info *inf = autofs4_dentry_ino(dentry);
141 int pending = 0;
142 144
143 if (dentry->d_flags & DCACHE_AUTOFS_PENDING) 145 if (dentry->d_flags & DCACHE_AUTOFS_PENDING)
144 return 1; 146 return 1;
145 147
146 if (inf) { 148 if (inf->flags & AUTOFS_INF_EXPIRING)
147 spin_lock(&inf->sbi->fs_lock); 149 return 1;
148 pending = inf->flags & AUTOFS_INF_EXPIRING;
149 spin_unlock(&inf->sbi->fs_lock);
150 }
151 150
152 return pending; 151 return 0;
153} 152}
154 153
155static inline void autofs4_copy_atime(struct file *src, struct file *dst) 154static inline void autofs4_copy_atime(struct file *src, struct file *dst)
@@ -164,6 +163,7 @@ void autofs4_free_ino(struct autofs_info *);
164 163
165/* Expiration */ 164/* Expiration */
166int is_autofs4_dentry(struct dentry *); 165int is_autofs4_dentry(struct dentry *);
166int autofs4_expire_wait(struct dentry *dentry);
167int autofs4_expire_run(struct super_block *, struct vfsmount *, 167int autofs4_expire_run(struct super_block *, struct vfsmount *,
168 struct autofs_sb_info *, 168 struct autofs_sb_info *,
169 struct autofs_packet_expire __user *); 169 struct autofs_packet_expire __user *);
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 894fee54d4d8..cdabb796ff01 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -259,13 +259,15 @@ static struct dentry *autofs4_expire_direct(struct super_block *sb,
259 now = jiffies; 259 now = jiffies;
260 timeout = sbi->exp_timeout; 260 timeout = sbi->exp_timeout;
261 261
262 /* Lock the tree as we must expire as a whole */
263 spin_lock(&sbi->fs_lock); 262 spin_lock(&sbi->fs_lock);
264 if (!autofs4_direct_busy(mnt, root, timeout, do_now)) { 263 if (!autofs4_direct_busy(mnt, root, timeout, do_now)) {
265 struct autofs_info *ino = autofs4_dentry_ino(root); 264 struct autofs_info *ino = autofs4_dentry_ino(root);
266 265 if (d_mountpoint(root)) {
267 /* Set this flag early to catch sys_chdir and the like */ 266 ino->flags |= AUTOFS_INF_MOUNTPOINT;
267 root->d_mounted--;
268 }
268 ino->flags |= AUTOFS_INF_EXPIRING; 269 ino->flags |= AUTOFS_INF_EXPIRING;
270 init_completion(&ino->expire_complete);
269 spin_unlock(&sbi->fs_lock); 271 spin_unlock(&sbi->fs_lock);
270 return root; 272 return root;
271 } 273 }
@@ -292,6 +294,8 @@ static struct dentry *autofs4_expire_indirect(struct super_block *sb,
292 struct list_head *next; 294 struct list_head *next;
293 int do_now = how & AUTOFS_EXP_IMMEDIATE; 295 int do_now = how & AUTOFS_EXP_IMMEDIATE;
294 int exp_leaves = how & AUTOFS_EXP_LEAVES; 296 int exp_leaves = how & AUTOFS_EXP_LEAVES;
297 struct autofs_info *ino;
298 unsigned int ino_count;
295 299
296 if (!root) 300 if (!root)
297 return NULL; 301 return NULL;
@@ -316,6 +320,9 @@ static struct dentry *autofs4_expire_indirect(struct super_block *sb,
316 dentry = dget(dentry); 320 dentry = dget(dentry);
317 spin_unlock(&dcache_lock); 321 spin_unlock(&dcache_lock);
318 322
323 spin_lock(&sbi->fs_lock);
324 ino = autofs4_dentry_ino(dentry);
325
319 /* 326 /*
320 * Case 1: (i) indirect mount or top level pseudo direct mount 327 * Case 1: (i) indirect mount or top level pseudo direct mount
321 * (autofs-4.1). 328 * (autofs-4.1).
@@ -326,6 +333,11 @@ static struct dentry *autofs4_expire_indirect(struct super_block *sb,
326 DPRINTK("checking mountpoint %p %.*s", 333 DPRINTK("checking mountpoint %p %.*s",
327 dentry, (int)dentry->d_name.len, dentry->d_name.name); 334 dentry, (int)dentry->d_name.len, dentry->d_name.name);
328 335
336 /* Path walk currently on this dentry? */
337 ino_count = atomic_read(&ino->count) + 2;
338 if (atomic_read(&dentry->d_count) > ino_count)
339 goto next;
340
329 /* Can we umount this guy */ 341 /* Can we umount this guy */
330 if (autofs4_mount_busy(mnt, dentry)) 342 if (autofs4_mount_busy(mnt, dentry))
331 goto next; 343 goto next;
@@ -343,23 +355,25 @@ static struct dentry *autofs4_expire_indirect(struct super_block *sb,
343 355
344 /* Case 2: tree mount, expire iff entire tree is not busy */ 356 /* Case 2: tree mount, expire iff entire tree is not busy */
345 if (!exp_leaves) { 357 if (!exp_leaves) {
346 /* Lock the tree as we must expire as a whole */ 358 /* Path walk currently on this dentry? */
347 spin_lock(&sbi->fs_lock); 359 ino_count = atomic_read(&ino->count) + 1;
348 if (!autofs4_tree_busy(mnt, dentry, timeout, do_now)) { 360 if (atomic_read(&dentry->d_count) > ino_count)
349 struct autofs_info *inf = autofs4_dentry_ino(dentry); 361 goto next;
350 362
351 /* Set this flag early to catch sys_chdir and the like */ 363 if (!autofs4_tree_busy(mnt, dentry, timeout, do_now)) {
352 inf->flags |= AUTOFS_INF_EXPIRING;
353 spin_unlock(&sbi->fs_lock);
354 expired = dentry; 364 expired = dentry;
355 goto found; 365 goto found;
356 } 366 }
357 spin_unlock(&sbi->fs_lock);
358 /* 367 /*
359 * Case 3: pseudo direct mount, expire individual leaves 368 * Case 3: pseudo direct mount, expire individual leaves
360 * (autofs-4.1). 369 * (autofs-4.1).
361 */ 370 */
362 } else { 371 } else {
372 /* Path walk currently on this dentry? */
373 ino_count = atomic_read(&ino->count) + 1;
374 if (atomic_read(&dentry->d_count) > ino_count)
375 goto next;
376
363 expired = autofs4_check_leaves(mnt, dentry, timeout, do_now); 377 expired = autofs4_check_leaves(mnt, dentry, timeout, do_now);
364 if (expired) { 378 if (expired) {
365 dput(dentry); 379 dput(dentry);
@@ -367,6 +381,7 @@ static struct dentry *autofs4_expire_indirect(struct super_block *sb,
367 } 381 }
368 } 382 }
369next: 383next:
384 spin_unlock(&sbi->fs_lock);
370 dput(dentry); 385 dput(dentry);
371 spin_lock(&dcache_lock); 386 spin_lock(&dcache_lock);
372 next = next->next; 387 next = next->next;
@@ -377,12 +392,45 @@ next:
377found: 392found:
378 DPRINTK("returning %p %.*s", 393 DPRINTK("returning %p %.*s",
379 expired, (int)expired->d_name.len, expired->d_name.name); 394 expired, (int)expired->d_name.len, expired->d_name.name);
395 ino = autofs4_dentry_ino(expired);
396 ino->flags |= AUTOFS_INF_EXPIRING;
397 init_completion(&ino->expire_complete);
398 spin_unlock(&sbi->fs_lock);
380 spin_lock(&dcache_lock); 399 spin_lock(&dcache_lock);
381 list_move(&expired->d_parent->d_subdirs, &expired->d_u.d_child); 400 list_move(&expired->d_parent->d_subdirs, &expired->d_u.d_child);
382 spin_unlock(&dcache_lock); 401 spin_unlock(&dcache_lock);
383 return expired; 402 return expired;
384} 403}
385 404
405int autofs4_expire_wait(struct dentry *dentry)
406{
407 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
408 struct autofs_info *ino = autofs4_dentry_ino(dentry);
409 int status;
410
411 /* Block on any pending expire */
412 spin_lock(&sbi->fs_lock);
413 if (ino->flags & AUTOFS_INF_EXPIRING) {
414 spin_unlock(&sbi->fs_lock);
415
416 DPRINTK("waiting for expire %p name=%.*s",
417 dentry, dentry->d_name.len, dentry->d_name.name);
418
419 status = autofs4_wait(sbi, dentry, NFY_NONE);
420 wait_for_completion(&ino->expire_complete);
421
422 DPRINTK("expire done status=%d", status);
423
424 if (d_unhashed(dentry))
425 return -EAGAIN;
426
427 return status;
428 }
429 spin_unlock(&sbi->fs_lock);
430
431 return 0;
432}
433
386/* Perform an expiry operation */ 434/* Perform an expiry operation */
387int autofs4_expire_run(struct super_block *sb, 435int autofs4_expire_run(struct super_block *sb,
388 struct vfsmount *mnt, 436 struct vfsmount *mnt,
@@ -390,7 +438,9 @@ int autofs4_expire_run(struct super_block *sb,
390 struct autofs_packet_expire __user *pkt_p) 438 struct autofs_packet_expire __user *pkt_p)
391{ 439{
392 struct autofs_packet_expire pkt; 440 struct autofs_packet_expire pkt;
441 struct autofs_info *ino;
393 struct dentry *dentry; 442 struct dentry *dentry;
443 int ret = 0;
394 444
395 memset(&pkt,0,sizeof pkt); 445 memset(&pkt,0,sizeof pkt);
396 446
@@ -406,9 +456,15 @@ int autofs4_expire_run(struct super_block *sb,
406 dput(dentry); 456 dput(dentry);
407 457
408 if ( copy_to_user(pkt_p, &pkt, sizeof(struct autofs_packet_expire)) ) 458 if ( copy_to_user(pkt_p, &pkt, sizeof(struct autofs_packet_expire)) )
409 return -EFAULT; 459 ret = -EFAULT;
410 460
411 return 0; 461 spin_lock(&sbi->fs_lock);
462 ino = autofs4_dentry_ino(dentry);
463 ino->flags &= ~AUTOFS_INF_EXPIRING;
464 complete_all(&ino->expire_complete);
465 spin_unlock(&sbi->fs_lock);
466
467 return ret;
412} 468}
413 469
414/* Call repeatedly until it returns -EAGAIN, meaning there's nothing 470/* Call repeatedly until it returns -EAGAIN, meaning there's nothing
@@ -433,9 +489,16 @@ int autofs4_expire_multi(struct super_block *sb, struct vfsmount *mnt,
433 489
434 /* This is synchronous because it makes the daemon a 490 /* This is synchronous because it makes the daemon a
435 little easier */ 491 little easier */
436 ino->flags |= AUTOFS_INF_EXPIRING;
437 ret = autofs4_wait(sbi, dentry, NFY_EXPIRE); 492 ret = autofs4_wait(sbi, dentry, NFY_EXPIRE);
493
494 spin_lock(&sbi->fs_lock);
495 if (ino->flags & AUTOFS_INF_MOUNTPOINT) {
496 sb->s_root->d_mounted++;
497 ino->flags &= ~AUTOFS_INF_MOUNTPOINT;
498 }
438 ino->flags &= ~AUTOFS_INF_EXPIRING; 499 ino->flags &= ~AUTOFS_INF_EXPIRING;
500 complete_all(&ino->expire_complete);
501 spin_unlock(&sbi->fs_lock);
439 dput(dentry); 502 dput(dentry);
440 } 503 }
441 504
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index 2fdcf5e1d236..7bb3e5ba0537 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -24,8 +24,10 @@
24 24
25static void ino_lnkfree(struct autofs_info *ino) 25static void ino_lnkfree(struct autofs_info *ino)
26{ 26{
27 kfree(ino->u.symlink); 27 if (ino->u.symlink) {
28 ino->u.symlink = NULL; 28 kfree(ino->u.symlink);
29 ino->u.symlink = NULL;
30 }
29} 31}
30 32
31struct autofs_info *autofs4_init_ino(struct autofs_info *ino, 33struct autofs_info *autofs4_init_ino(struct autofs_info *ino,
@@ -41,16 +43,18 @@ struct autofs_info *autofs4_init_ino(struct autofs_info *ino,
41 if (ino == NULL) 43 if (ino == NULL)
42 return NULL; 44 return NULL;
43 45
44 ino->flags = 0; 46 if (!reinit) {
45 ino->mode = mode; 47 ino->flags = 0;
46 ino->inode = NULL; 48 ino->inode = NULL;
47 ino->dentry = NULL; 49 ino->dentry = NULL;
48 ino->size = 0; 50 ino->size = 0;
49 51 INIT_LIST_HEAD(&ino->active);
50 INIT_LIST_HEAD(&ino->rehash); 52 INIT_LIST_HEAD(&ino->expiring);
53 atomic_set(&ino->count, 0);
54 }
51 55
56 ino->mode = mode;
52 ino->last_used = jiffies; 57 ino->last_used = jiffies;
53 atomic_set(&ino->count, 0);
54 58
55 ino->sbi = sbi; 59 ino->sbi = sbi;
56 60
@@ -159,8 +163,8 @@ void autofs4_kill_sb(struct super_block *sb)
159 if (!sbi) 163 if (!sbi)
160 goto out_kill_sb; 164 goto out_kill_sb;
161 165
162 if (!sbi->catatonic) 166 /* Free wait queues, close pipe */
163 autofs4_catatonic_mode(sbi); /* Free wait queues, close pipe */ 167 autofs4_catatonic_mode(sbi);
164 168
165 /* Clean up and release dangling references */ 169 /* Clean up and release dangling references */
166 autofs4_force_release(sbi); 170 autofs4_force_release(sbi);
@@ -338,8 +342,9 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
338 mutex_init(&sbi->wq_mutex); 342 mutex_init(&sbi->wq_mutex);
339 spin_lock_init(&sbi->fs_lock); 343 spin_lock_init(&sbi->fs_lock);
340 sbi->queues = NULL; 344 sbi->queues = NULL;
341 spin_lock_init(&sbi->rehash_lock); 345 spin_lock_init(&sbi->lookup_lock);
342 INIT_LIST_HEAD(&sbi->rehash_list); 346 INIT_LIST_HEAD(&sbi->active_list);
347 INIT_LIST_HEAD(&sbi->expiring_list);
343 s->s_blocksize = 1024; 348 s->s_blocksize = 1024;
344 s->s_blocksize_bits = 10; 349 s->s_blocksize_bits = 10;
345 s->s_magic = AUTOFS_SUPER_MAGIC; 350 s->s_magic = AUTOFS_SUPER_MAGIC;
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index edf5b6bddb52..bcfb2dc0a61b 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -25,25 +25,25 @@ static int autofs4_dir_rmdir(struct inode *,struct dentry *);
25static int autofs4_dir_mkdir(struct inode *,struct dentry *,int); 25static int autofs4_dir_mkdir(struct inode *,struct dentry *,int);
26static int autofs4_root_ioctl(struct inode *, struct file *,unsigned int,unsigned long); 26static int autofs4_root_ioctl(struct inode *, struct file *,unsigned int,unsigned long);
27static int autofs4_dir_open(struct inode *inode, struct file *file); 27static int autofs4_dir_open(struct inode *inode, struct file *file);
28static int autofs4_dir_close(struct inode *inode, struct file *file);
29static int autofs4_dir_readdir(struct file * filp, void * dirent, filldir_t filldir);
30static int autofs4_root_readdir(struct file * filp, void * dirent, filldir_t filldir);
31static struct dentry *autofs4_lookup(struct inode *,struct dentry *, struct nameidata *); 28static struct dentry *autofs4_lookup(struct inode *,struct dentry *, struct nameidata *);
32static void *autofs4_follow_link(struct dentry *, struct nameidata *); 29static void *autofs4_follow_link(struct dentry *, struct nameidata *);
33 30
31#define TRIGGER_FLAGS (LOOKUP_CONTINUE | LOOKUP_DIRECTORY)
32#define TRIGGER_INTENTS (LOOKUP_OPEN | LOOKUP_CREATE)
33
34const struct file_operations autofs4_root_operations = { 34const struct file_operations autofs4_root_operations = {
35 .open = dcache_dir_open, 35 .open = dcache_dir_open,
36 .release = dcache_dir_close, 36 .release = dcache_dir_close,
37 .read = generic_read_dir, 37 .read = generic_read_dir,
38 .readdir = autofs4_root_readdir, 38 .readdir = dcache_readdir,
39 .ioctl = autofs4_root_ioctl, 39 .ioctl = autofs4_root_ioctl,
40}; 40};
41 41
42const struct file_operations autofs4_dir_operations = { 42const struct file_operations autofs4_dir_operations = {
43 .open = autofs4_dir_open, 43 .open = autofs4_dir_open,
44 .release = autofs4_dir_close, 44 .release = dcache_dir_close,
45 .read = generic_read_dir, 45 .read = generic_read_dir,
46 .readdir = autofs4_dir_readdir, 46 .readdir = dcache_readdir,
47}; 47};
48 48
49const struct inode_operations autofs4_indirect_root_inode_operations = { 49const struct inode_operations autofs4_indirect_root_inode_operations = {
@@ -70,42 +70,10 @@ const struct inode_operations autofs4_dir_inode_operations = {
70 .rmdir = autofs4_dir_rmdir, 70 .rmdir = autofs4_dir_rmdir,
71}; 71};
72 72
73static int autofs4_root_readdir(struct file *file, void *dirent,
74 filldir_t filldir)
75{
76 struct autofs_sb_info *sbi = autofs4_sbi(file->f_path.dentry->d_sb);
77 int oz_mode = autofs4_oz_mode(sbi);
78
79 DPRINTK("called, filp->f_pos = %lld", file->f_pos);
80
81 /*
82 * Don't set reghost flag if:
83 * 1) f_pos is larger than zero -- we've already been here.
84 * 2) we haven't even enabled reghosting in the 1st place.
85 * 3) this is the daemon doing a readdir
86 */
87 if (oz_mode && file->f_pos == 0 && sbi->reghost_enabled)
88 sbi->needs_reghost = 1;
89
90 DPRINTK("needs_reghost = %d", sbi->needs_reghost);
91
92 return dcache_readdir(file, dirent, filldir);
93}
94
95static int autofs4_dir_open(struct inode *inode, struct file *file) 73static int autofs4_dir_open(struct inode *inode, struct file *file)
96{ 74{
97 struct dentry *dentry = file->f_path.dentry; 75 struct dentry *dentry = file->f_path.dentry;
98 struct vfsmount *mnt = file->f_path.mnt;
99 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); 76 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
100 struct dentry *cursor;
101 int status;
102
103 status = dcache_dir_open(inode, file);
104 if (status)
105 goto out;
106
107 cursor = file->private_data;
108 cursor->d_fsdata = NULL;
109 77
110 DPRINTK("file=%p dentry=%p %.*s", 78 DPRINTK("file=%p dentry=%p %.*s",
111 file, dentry, dentry->d_name.len, dentry->d_name.name); 79 file, dentry, dentry->d_name.len, dentry->d_name.name);
@@ -113,159 +81,32 @@ static int autofs4_dir_open(struct inode *inode, struct file *file)
113 if (autofs4_oz_mode(sbi)) 81 if (autofs4_oz_mode(sbi))
114 goto out; 82 goto out;
115 83
116 if (autofs4_ispending(dentry)) { 84 /*
117 DPRINTK("dentry busy"); 85 * An empty directory in an autofs file system is always a
118 dcache_dir_close(inode, file); 86 * mount point. The daemon must have failed to mount this
119 status = -EBUSY; 87 * during lookup so it doesn't exist. This can happen, for
120 goto out; 88 * example, if user space returns an incorrect status for a
121 } 89 * mount request. Otherwise we're doing a readdir on the
122 90 * autofs file system so just let the libfs routines handle
123 status = -ENOENT; 91 * it.
124 if (!d_mountpoint(dentry) && dentry->d_op && dentry->d_op->d_revalidate) { 92 */
125 struct nameidata nd; 93 spin_lock(&dcache_lock);
126 int empty, ret; 94 if (!d_mountpoint(dentry) && __simple_empty(dentry)) {
127
128 /* In case there are stale directory dentrys from a failed mount */
129 spin_lock(&dcache_lock);
130 empty = list_empty(&dentry->d_subdirs);
131 spin_unlock(&dcache_lock); 95 spin_unlock(&dcache_lock);
132 96 return -ENOENT;
133 if (!empty)
134 d_invalidate(dentry);
135
136 nd.flags = LOOKUP_DIRECTORY;
137 ret = (dentry->d_op->d_revalidate)(dentry, &nd);
138
139 if (ret <= 0) {
140 if (ret < 0)
141 status = ret;
142 dcache_dir_close(inode, file);
143 goto out;
144 }
145 } 97 }
98 spin_unlock(&dcache_lock);
146 99
147 if (d_mountpoint(dentry)) {
148 struct file *fp = NULL;
149 struct path fp_path = { .dentry = dentry, .mnt = mnt };
150
151 path_get(&fp_path);
152
153 if (!autofs4_follow_mount(&fp_path.mnt, &fp_path.dentry)) {
154 path_put(&fp_path);
155 dcache_dir_close(inode, file);
156 goto out;
157 }
158
159 fp = dentry_open(fp_path.dentry, fp_path.mnt, file->f_flags);
160 status = PTR_ERR(fp);
161 if (IS_ERR(fp)) {
162 dcache_dir_close(inode, file);
163 goto out;
164 }
165 cursor->d_fsdata = fp;
166 }
167 return 0;
168out:
169 return status;
170}
171
172static int autofs4_dir_close(struct inode *inode, struct file *file)
173{
174 struct dentry *dentry = file->f_path.dentry;
175 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
176 struct dentry *cursor = file->private_data;
177 int status = 0;
178
179 DPRINTK("file=%p dentry=%p %.*s",
180 file, dentry, dentry->d_name.len, dentry->d_name.name);
181
182 if (autofs4_oz_mode(sbi))
183 goto out;
184
185 if (autofs4_ispending(dentry)) {
186 DPRINTK("dentry busy");
187 status = -EBUSY;
188 goto out;
189 }
190
191 if (d_mountpoint(dentry)) {
192 struct file *fp = cursor->d_fsdata;
193 if (!fp) {
194 status = -ENOENT;
195 goto out;
196 }
197 filp_close(fp, current->files);
198 }
199out:
200 dcache_dir_close(inode, file);
201 return status;
202}
203
204static int autofs4_dir_readdir(struct file *file, void *dirent, filldir_t filldir)
205{
206 struct dentry *dentry = file->f_path.dentry;
207 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
208 struct dentry *cursor = file->private_data;
209 int status;
210
211 DPRINTK("file=%p dentry=%p %.*s",
212 file, dentry, dentry->d_name.len, dentry->d_name.name);
213
214 if (autofs4_oz_mode(sbi))
215 goto out;
216
217 if (autofs4_ispending(dentry)) {
218 DPRINTK("dentry busy");
219 return -EBUSY;
220 }
221
222 if (d_mountpoint(dentry)) {
223 struct file *fp = cursor->d_fsdata;
224
225 if (!fp)
226 return -ENOENT;
227
228 if (!fp->f_op || !fp->f_op->readdir)
229 goto out;
230
231 status = vfs_readdir(fp, filldir, dirent);
232 file->f_pos = fp->f_pos;
233 if (status)
234 autofs4_copy_atime(file, fp);
235 return status;
236 }
237out: 100out:
238 return dcache_readdir(file, dirent, filldir); 101 return dcache_dir_open(inode, file);
239} 102}
240 103
241static int try_to_fill_dentry(struct dentry *dentry, int flags) 104static int try_to_fill_dentry(struct dentry *dentry, int flags)
242{ 105{
243 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); 106 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
244 struct autofs_info *ino = autofs4_dentry_ino(dentry); 107 struct autofs_info *ino = autofs4_dentry_ino(dentry);
245 struct dentry *new;
246 int status; 108 int status;
247 109
248 /* Block on any pending expiry here; invalidate the dentry
249 when expiration is done to trigger mount request with a new
250 dentry */
251 if (ino && (ino->flags & AUTOFS_INF_EXPIRING)) {
252 DPRINTK("waiting for expire %p name=%.*s",
253 dentry, dentry->d_name.len, dentry->d_name.name);
254
255 status = autofs4_wait(sbi, dentry, NFY_NONE);
256
257 DPRINTK("expire done status=%d", status);
258
259 /*
260 * If the directory still exists the mount request must
261 * continue otherwise it can't be followed at the right
262 * time during the walk.
263 */
264 status = d_invalidate(dentry);
265 if (status != -EBUSY)
266 return -EAGAIN;
267 }
268
269 DPRINTK("dentry=%p %.*s ino=%p", 110 DPRINTK("dentry=%p %.*s ino=%p",
270 dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_inode); 111 dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_inode);
271 112
@@ -292,7 +133,8 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags)
292 return status; 133 return status;
293 } 134 }
294 /* Trigger mount for path component or follow link */ 135 /* Trigger mount for path component or follow link */
295 } else if (flags & (LOOKUP_CONTINUE | LOOKUP_DIRECTORY) || 136 } else if (dentry->d_flags & DCACHE_AUTOFS_PENDING ||
137 flags & (TRIGGER_FLAGS | TRIGGER_INTENTS) ||
296 current->link_count) { 138 current->link_count) {
297 DPRINTK("waiting for mount name=%.*s", 139 DPRINTK("waiting for mount name=%.*s",
298 dentry->d_name.len, dentry->d_name.name); 140 dentry->d_name.len, dentry->d_name.name);
@@ -320,26 +162,6 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags)
320 dentry->d_flags &= ~DCACHE_AUTOFS_PENDING; 162 dentry->d_flags &= ~DCACHE_AUTOFS_PENDING;
321 spin_unlock(&dentry->d_lock); 163 spin_unlock(&dentry->d_lock);
322 164
323 /*
324 * The dentry that is passed in from lookup may not be the one
325 * we end up using, as mkdir can create a new one. If this
326 * happens, and another process tries the lookup at the same time,
327 * it will set the PENDING flag on this new dentry, but add itself
328 * to our waitq. Then, if after the lookup succeeds, the first
329 * process that requested the mount performs another lookup of the
330 * same directory, it will show up as still pending! So, we need
331 * to redo the lookup here and clear pending on that dentry.
332 */
333 if (d_unhashed(dentry)) {
334 new = d_lookup(dentry->d_parent, &dentry->d_name);
335 if (new) {
336 spin_lock(&new->d_lock);
337 new->d_flags &= ~DCACHE_AUTOFS_PENDING;
338 spin_unlock(&new->d_lock);
339 dput(new);
340 }
341 }
342
343 return 0; 165 return 0;
344} 166}
345 167
@@ -355,51 +177,63 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
355 DPRINTK("dentry=%p %.*s oz_mode=%d nd->flags=%d", 177 DPRINTK("dentry=%p %.*s oz_mode=%d nd->flags=%d",
356 dentry, dentry->d_name.len, dentry->d_name.name, oz_mode, 178 dentry, dentry->d_name.len, dentry->d_name.name, oz_mode,
357 nd->flags); 179 nd->flags);
358 180 /*
359 /* If it's our master or we shouldn't trigger a mount we're done */ 181 * For an expire of a covered direct or offset mount we need
360 lookup_type = nd->flags & (LOOKUP_CONTINUE | LOOKUP_DIRECTORY); 182 * to beeak out of follow_down() at the autofs mount trigger
361 if (oz_mode || !lookup_type) 183 * (d_mounted--), so we can see the expiring flag, and manage
184 * the blocking and following here until the expire is completed.
185 */
186 if (oz_mode) {
187 spin_lock(&sbi->fs_lock);
188 if (ino->flags & AUTOFS_INF_EXPIRING) {
189 spin_unlock(&sbi->fs_lock);
190 /* Follow down to our covering mount. */
191 if (!follow_down(&nd->path.mnt, &nd->path.dentry))
192 goto done;
193 goto follow;
194 }
195 spin_unlock(&sbi->fs_lock);
362 goto done; 196 goto done;
197 }
363 198
364 /* If an expire request is pending wait for it. */ 199 /* If an expire request is pending everyone must wait. */
365 if (ino && (ino->flags & AUTOFS_INF_EXPIRING)) { 200 autofs4_expire_wait(dentry);
366 DPRINTK("waiting for active request %p name=%.*s",
367 dentry, dentry->d_name.len, dentry->d_name.name);
368
369 status = autofs4_wait(sbi, dentry, NFY_NONE);
370 201
371 DPRINTK("request done status=%d", status); 202 /* We trigger a mount for almost all flags */
372 } 203 lookup_type = nd->flags & (TRIGGER_FLAGS | TRIGGER_INTENTS);
204 if (!(lookup_type || dentry->d_flags & DCACHE_AUTOFS_PENDING))
205 goto follow;
373 206
374 /* 207 /*
375 * If the dentry contains directories then it is an 208 * If the dentry contains directories then it is an autofs
376 * autofs multi-mount with no root mount offset. So 209 * multi-mount with no root mount offset. So don't try to
377 * don't try to mount it again. 210 * mount it again.
378 */ 211 */
379 spin_lock(&dcache_lock); 212 spin_lock(&dcache_lock);
380 if (!d_mountpoint(dentry) && __simple_empty(dentry)) { 213 if (dentry->d_flags & DCACHE_AUTOFS_PENDING ||
214 (!d_mountpoint(dentry) && __simple_empty(dentry))) {
381 spin_unlock(&dcache_lock); 215 spin_unlock(&dcache_lock);
382 216
383 status = try_to_fill_dentry(dentry, 0); 217 status = try_to_fill_dentry(dentry, 0);
384 if (status) 218 if (status)
385 goto out_error; 219 goto out_error;
386 220
387 /* 221 goto follow;
388 * The mount succeeded but if there is no root mount
389 * it must be an autofs multi-mount with no root offset
390 * so we don't need to follow the mount.
391 */
392 if (d_mountpoint(dentry)) {
393 if (!autofs4_follow_mount(&nd->path.mnt,
394 &nd->path.dentry)) {
395 status = -ENOENT;
396 goto out_error;
397 }
398 }
399
400 goto done;
401 } 222 }
402 spin_unlock(&dcache_lock); 223 spin_unlock(&dcache_lock);
224follow:
225 /*
226 * If there is no root mount it must be an autofs
227 * multi-mount with no root offset so we don't need
228 * to follow it.
229 */
230 if (d_mountpoint(dentry)) {
231 if (!autofs4_follow_mount(&nd->path.mnt,
232 &nd->path.dentry)) {
233 status = -ENOENT;
234 goto out_error;
235 }
236 }
403 237
404done: 238done:
405 return NULL; 239 return NULL;
@@ -424,12 +258,23 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd)
424 int status = 1; 258 int status = 1;
425 259
426 /* Pending dentry */ 260 /* Pending dentry */
261 spin_lock(&sbi->fs_lock);
427 if (autofs4_ispending(dentry)) { 262 if (autofs4_ispending(dentry)) {
428 /* The daemon never causes a mount to trigger */ 263 /* The daemon never causes a mount to trigger */
264 spin_unlock(&sbi->fs_lock);
265
429 if (oz_mode) 266 if (oz_mode)
430 return 1; 267 return 1;
431 268
432 /* 269 /*
270 * If the directory has gone away due to an expire
271 * we have been called as ->d_revalidate() and so
272 * we need to return false and proceed to ->lookup().
273 */
274 if (autofs4_expire_wait(dentry) == -EAGAIN)
275 return 0;
276
277 /*
433 * A zero status is success otherwise we have a 278 * A zero status is success otherwise we have a
434 * negative error code. 279 * negative error code.
435 */ 280 */
@@ -437,17 +282,9 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd)
437 if (status == 0) 282 if (status == 0)
438 return 1; 283 return 1;
439 284
440 /*
441 * A status of EAGAIN here means that the dentry has gone
442 * away while waiting for an expire to complete. If we are
443 * racing with expire lookup will wait for it so this must
444 * be a revalidate and we need to send it to lookup.
445 */
446 if (status == -EAGAIN)
447 return 0;
448
449 return status; 285 return status;
450 } 286 }
287 spin_unlock(&sbi->fs_lock);
451 288
452 /* Negative dentry.. invalidate if "old" */ 289 /* Negative dentry.. invalidate if "old" */
453 if (dentry->d_inode == NULL) 290 if (dentry->d_inode == NULL)
@@ -461,6 +298,7 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd)
461 DPRINTK("dentry=%p %.*s, emptydir", 298 DPRINTK("dentry=%p %.*s, emptydir",
462 dentry, dentry->d_name.len, dentry->d_name.name); 299 dentry, dentry->d_name.len, dentry->d_name.name);
463 spin_unlock(&dcache_lock); 300 spin_unlock(&dcache_lock);
301
464 /* The daemon never causes a mount to trigger */ 302 /* The daemon never causes a mount to trigger */
465 if (oz_mode) 303 if (oz_mode)
466 return 1; 304 return 1;
@@ -493,10 +331,12 @@ void autofs4_dentry_release(struct dentry *de)
493 struct autofs_sb_info *sbi = autofs4_sbi(de->d_sb); 331 struct autofs_sb_info *sbi = autofs4_sbi(de->d_sb);
494 332
495 if (sbi) { 333 if (sbi) {
496 spin_lock(&sbi->rehash_lock); 334 spin_lock(&sbi->lookup_lock);
497 if (!list_empty(&inf->rehash)) 335 if (!list_empty(&inf->active))
498 list_del(&inf->rehash); 336 list_del(&inf->active);
499 spin_unlock(&sbi->rehash_lock); 337 if (!list_empty(&inf->expiring))
338 list_del(&inf->expiring);
339 spin_unlock(&sbi->lookup_lock);
500 } 340 }
501 341
502 inf->dentry = NULL; 342 inf->dentry = NULL;
@@ -518,7 +358,7 @@ static struct dentry_operations autofs4_dentry_operations = {
518 .d_release = autofs4_dentry_release, 358 .d_release = autofs4_dentry_release,
519}; 359};
520 360
521static struct dentry *autofs4_lookup_unhashed(struct autofs_sb_info *sbi, struct dentry *parent, struct qstr *name) 361static struct dentry *autofs4_lookup_active(struct autofs_sb_info *sbi, struct dentry *parent, struct qstr *name)
522{ 362{
523 unsigned int len = name->len; 363 unsigned int len = name->len;
524 unsigned int hash = name->hash; 364 unsigned int hash = name->hash;
@@ -526,14 +366,66 @@ static struct dentry *autofs4_lookup_unhashed(struct autofs_sb_info *sbi, struct
526 struct list_head *p, *head; 366 struct list_head *p, *head;
527 367
528 spin_lock(&dcache_lock); 368 spin_lock(&dcache_lock);
529 spin_lock(&sbi->rehash_lock); 369 spin_lock(&sbi->lookup_lock);
530 head = &sbi->rehash_list; 370 head = &sbi->active_list;
531 list_for_each(p, head) { 371 list_for_each(p, head) {
532 struct autofs_info *ino; 372 struct autofs_info *ino;
533 struct dentry *dentry; 373 struct dentry *dentry;
534 struct qstr *qstr; 374 struct qstr *qstr;
535 375
536 ino = list_entry(p, struct autofs_info, rehash); 376 ino = list_entry(p, struct autofs_info, active);
377 dentry = ino->dentry;
378
379 spin_lock(&dentry->d_lock);
380
381 /* Already gone? */
382 if (atomic_read(&dentry->d_count) == 0)
383 goto next;
384
385 qstr = &dentry->d_name;
386
387 if (dentry->d_name.hash != hash)
388 goto next;
389 if (dentry->d_parent != parent)
390 goto next;
391
392 if (qstr->len != len)
393 goto next;
394 if (memcmp(qstr->name, str, len))
395 goto next;
396
397 if (d_unhashed(dentry)) {
398 dget(dentry);
399 spin_unlock(&dentry->d_lock);
400 spin_unlock(&sbi->lookup_lock);
401 spin_unlock(&dcache_lock);
402 return dentry;
403 }
404next:
405 spin_unlock(&dentry->d_lock);
406 }
407 spin_unlock(&sbi->lookup_lock);
408 spin_unlock(&dcache_lock);
409
410 return NULL;
411}
412
413static struct dentry *autofs4_lookup_expiring(struct autofs_sb_info *sbi, struct dentry *parent, struct qstr *name)
414{
415 unsigned int len = name->len;
416 unsigned int hash = name->hash;
417 const unsigned char *str = name->name;
418 struct list_head *p, *head;
419
420 spin_lock(&dcache_lock);
421 spin_lock(&sbi->lookup_lock);
422 head = &sbi->expiring_list;
423 list_for_each(p, head) {
424 struct autofs_info *ino;
425 struct dentry *dentry;
426 struct qstr *qstr;
427
428 ino = list_entry(p, struct autofs_info, expiring);
537 dentry = ino->dentry; 429 dentry = ino->dentry;
538 430
539 spin_lock(&dentry->d_lock); 431 spin_lock(&dentry->d_lock);
@@ -555,33 +447,16 @@ static struct dentry *autofs4_lookup_unhashed(struct autofs_sb_info *sbi, struct
555 goto next; 447 goto next;
556 448
557 if (d_unhashed(dentry)) { 449 if (d_unhashed(dentry)) {
558 struct inode *inode = dentry->d_inode;
559
560 ino = autofs4_dentry_ino(dentry);
561 list_del_init(&ino->rehash);
562 dget(dentry); 450 dget(dentry);
563 /*
564 * Make the rehashed dentry negative so the VFS
565 * behaves as it should.
566 */
567 if (inode) {
568 dentry->d_inode = NULL;
569 list_del_init(&dentry->d_alias);
570 spin_unlock(&dentry->d_lock);
571 spin_unlock(&sbi->rehash_lock);
572 spin_unlock(&dcache_lock);
573 iput(inode);
574 return dentry;
575 }
576 spin_unlock(&dentry->d_lock); 451 spin_unlock(&dentry->d_lock);
577 spin_unlock(&sbi->rehash_lock); 452 spin_unlock(&sbi->lookup_lock);
578 spin_unlock(&dcache_lock); 453 spin_unlock(&dcache_lock);
579 return dentry; 454 return dentry;
580 } 455 }
581next: 456next:
582 spin_unlock(&dentry->d_lock); 457 spin_unlock(&dentry->d_lock);
583 } 458 }
584 spin_unlock(&sbi->rehash_lock); 459 spin_unlock(&sbi->lookup_lock);
585 spin_unlock(&dcache_lock); 460 spin_unlock(&dcache_lock);
586 461
587 return NULL; 462 return NULL;
@@ -591,7 +466,8 @@ next:
591static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) 466static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
592{ 467{
593 struct autofs_sb_info *sbi; 468 struct autofs_sb_info *sbi;
594 struct dentry *unhashed; 469 struct autofs_info *ino;
470 struct dentry *expiring, *unhashed;
595 int oz_mode; 471 int oz_mode;
596 472
597 DPRINTK("name = %.*s", 473 DPRINTK("name = %.*s",
@@ -607,8 +483,26 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s
607 DPRINTK("pid = %u, pgrp = %u, catatonic = %d, oz_mode = %d", 483 DPRINTK("pid = %u, pgrp = %u, catatonic = %d, oz_mode = %d",
608 current->pid, task_pgrp_nr(current), sbi->catatonic, oz_mode); 484 current->pid, task_pgrp_nr(current), sbi->catatonic, oz_mode);
609 485
610 unhashed = autofs4_lookup_unhashed(sbi, dentry->d_parent, &dentry->d_name); 486 expiring = autofs4_lookup_expiring(sbi, dentry->d_parent, &dentry->d_name);
611 if (!unhashed) { 487 if (expiring) {
488 /*
489 * If we are racing with expire the request might not
490 * be quite complete but the directory has been removed
491 * so it must have been successful, so just wait for it.
492 */
493 ino = autofs4_dentry_ino(expiring);
494 autofs4_expire_wait(expiring);
495 spin_lock(&sbi->lookup_lock);
496 if (!list_empty(&ino->expiring))
497 list_del_init(&ino->expiring);
498 spin_unlock(&sbi->lookup_lock);
499 dput(expiring);
500 }
501
502 unhashed = autofs4_lookup_active(sbi, dentry->d_parent, &dentry->d_name);
503 if (unhashed)
504 dentry = unhashed;
505 else {
612 /* 506 /*
613 * Mark the dentry incomplete but don't hash it. We do this 507 * Mark the dentry incomplete but don't hash it. We do this
614 * to serialize our inode creation operations (symlink and 508 * to serialize our inode creation operations (symlink and
@@ -622,39 +516,34 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s
622 */ 516 */
623 dentry->d_op = &autofs4_root_dentry_operations; 517 dentry->d_op = &autofs4_root_dentry_operations;
624 518
625 dentry->d_fsdata = NULL;
626 d_instantiate(dentry, NULL);
627 } else {
628 struct autofs_info *ino = autofs4_dentry_ino(unhashed);
629 DPRINTK("rehash %p with %p", dentry, unhashed);
630 /* 519 /*
631 * If we are racing with expire the request might not 520 * And we need to ensure that the same dentry is used for
632 * be quite complete but the directory has been removed 521 * all following lookup calls until it is hashed so that
633 * so it must have been successful, so just wait for it. 522 * the dentry flags are persistent throughout the request.
634 * We need to ensure the AUTOFS_INF_EXPIRING flag is clear
635 * before continuing as revalidate may fail when calling
636 * try_to_fill_dentry (returning EAGAIN) if we don't.
637 */ 523 */
638 while (ino && (ino->flags & AUTOFS_INF_EXPIRING)) { 524 ino = autofs4_init_ino(NULL, sbi, 0555);
639 DPRINTK("wait for incomplete expire %p name=%.*s", 525 if (!ino)
640 unhashed, unhashed->d_name.len, 526 return ERR_PTR(-ENOMEM);
641 unhashed->d_name.name); 527
642 autofs4_wait(sbi, unhashed, NFY_NONE); 528 dentry->d_fsdata = ino;
643 DPRINTK("request completed"); 529 ino->dentry = dentry;
644 } 530
645 dentry = unhashed; 531 spin_lock(&sbi->lookup_lock);
532 list_add(&ino->active, &sbi->active_list);
533 spin_unlock(&sbi->lookup_lock);
534
535 d_instantiate(dentry, NULL);
646 } 536 }
647 537
648 if (!oz_mode) { 538 if (!oz_mode) {
649 spin_lock(&dentry->d_lock); 539 spin_lock(&dentry->d_lock);
650 dentry->d_flags |= DCACHE_AUTOFS_PENDING; 540 dentry->d_flags |= DCACHE_AUTOFS_PENDING;
651 spin_unlock(&dentry->d_lock); 541 spin_unlock(&dentry->d_lock);
652 } 542 if (dentry->d_op && dentry->d_op->d_revalidate) {
653 543 mutex_unlock(&dir->i_mutex);
654 if (dentry->d_op && dentry->d_op->d_revalidate) { 544 (dentry->d_op->d_revalidate)(dentry, nd);
655 mutex_unlock(&dir->i_mutex); 545 mutex_lock(&dir->i_mutex);
656 (dentry->d_op->d_revalidate)(dentry, nd); 546 }
657 mutex_lock(&dir->i_mutex);
658 } 547 }
659 548
660 /* 549 /*
@@ -673,9 +562,11 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s
673 return ERR_PTR(-ERESTARTNOINTR); 562 return ERR_PTR(-ERESTARTNOINTR);
674 } 563 }
675 } 564 }
676 spin_lock(&dentry->d_lock); 565 if (!oz_mode) {
677 dentry->d_flags &= ~DCACHE_AUTOFS_PENDING; 566 spin_lock(&dentry->d_lock);
678 spin_unlock(&dentry->d_lock); 567 dentry->d_flags &= ~DCACHE_AUTOFS_PENDING;
568 spin_unlock(&dentry->d_lock);
569 }
679 } 570 }
680 571
681 /* 572 /*
@@ -706,7 +597,7 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s
706 } 597 }
707 598
708 if (unhashed) 599 if (unhashed)
709 return dentry; 600 return unhashed;
710 601
711 return NULL; 602 return NULL;
712} 603}
@@ -728,20 +619,31 @@ static int autofs4_dir_symlink(struct inode *dir,
728 return -EACCES; 619 return -EACCES;
729 620
730 ino = autofs4_init_ino(ino, sbi, S_IFLNK | 0555); 621 ino = autofs4_init_ino(ino, sbi, S_IFLNK | 0555);
731 if (ino == NULL) 622 if (!ino)
732 return -ENOSPC; 623 return -ENOMEM;
733 624
734 ino->size = strlen(symname); 625 spin_lock(&sbi->lookup_lock);
735 ino->u.symlink = cp = kmalloc(ino->size + 1, GFP_KERNEL); 626 if (!list_empty(&ino->active))
627 list_del_init(&ino->active);
628 spin_unlock(&sbi->lookup_lock);
736 629
737 if (cp == NULL) { 630 ino->size = strlen(symname);
738 kfree(ino); 631 cp = kmalloc(ino->size + 1, GFP_KERNEL);
739 return -ENOSPC; 632 if (!cp) {
633 if (!dentry->d_fsdata)
634 kfree(ino);
635 return -ENOMEM;
740 } 636 }
741 637
742 strcpy(cp, symname); 638 strcpy(cp, symname);
743 639
744 inode = autofs4_get_inode(dir->i_sb, ino); 640 inode = autofs4_get_inode(dir->i_sb, ino);
641 if (!inode) {
642 kfree(cp);
643 if (!dentry->d_fsdata)
644 kfree(ino);
645 return -ENOMEM;
646 }
745 d_add(dentry, inode); 647 d_add(dentry, inode);
746 648
747 if (dir == dir->i_sb->s_root->d_inode) 649 if (dir == dir->i_sb->s_root->d_inode)
@@ -757,6 +659,7 @@ static int autofs4_dir_symlink(struct inode *dir,
757 atomic_inc(&p_ino->count); 659 atomic_inc(&p_ino->count);
758 ino->inode = inode; 660 ino->inode = inode;
759 661
662 ino->u.symlink = cp;
760 dir->i_mtime = CURRENT_TIME; 663 dir->i_mtime = CURRENT_TIME;
761 664
762 return 0; 665 return 0;
@@ -769,9 +672,8 @@ static int autofs4_dir_symlink(struct inode *dir,
769 * that the file no longer exists. However, doing that means that the 672 * that the file no longer exists. However, doing that means that the
770 * VFS layer can turn the dentry into a negative dentry. We don't want 673 * VFS layer can turn the dentry into a negative dentry. We don't want
771 * this, because the unlink is probably the result of an expire. 674 * this, because the unlink is probably the result of an expire.
772 * We simply d_drop it and add it to a rehash candidates list in the 675 * We simply d_drop it and add it to a expiring list in the super block,
773 * super block, which allows the dentry lookup to reuse it retaining 676 * which allows the dentry lookup to check for an incomplete expire.
774 * the flags, such as expire in progress, in case we're racing with expire.
775 * 677 *
776 * If a process is blocked on the dentry waiting for the expire to finish, 678 * If a process is blocked on the dentry waiting for the expire to finish,
777 * it will invalidate the dentry and try to mount with a new one. 679 * it will invalidate the dentry and try to mount with a new one.
@@ -801,9 +703,10 @@ static int autofs4_dir_unlink(struct inode *dir, struct dentry *dentry)
801 dir->i_mtime = CURRENT_TIME; 703 dir->i_mtime = CURRENT_TIME;
802 704
803 spin_lock(&dcache_lock); 705 spin_lock(&dcache_lock);
804 spin_lock(&sbi->rehash_lock); 706 spin_lock(&sbi->lookup_lock);
805 list_add(&ino->rehash, &sbi->rehash_list); 707 if (list_empty(&ino->expiring))
806 spin_unlock(&sbi->rehash_lock); 708 list_add(&ino->expiring, &sbi->expiring_list);
709 spin_unlock(&sbi->lookup_lock);
807 spin_lock(&dentry->d_lock); 710 spin_lock(&dentry->d_lock);
808 __d_drop(dentry); 711 __d_drop(dentry);
809 spin_unlock(&dentry->d_lock); 712 spin_unlock(&dentry->d_lock);
@@ -829,9 +732,10 @@ static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry)
829 spin_unlock(&dcache_lock); 732 spin_unlock(&dcache_lock);
830 return -ENOTEMPTY; 733 return -ENOTEMPTY;
831 } 734 }
832 spin_lock(&sbi->rehash_lock); 735 spin_lock(&sbi->lookup_lock);
833 list_add(&ino->rehash, &sbi->rehash_list); 736 if (list_empty(&ino->expiring))
834 spin_unlock(&sbi->rehash_lock); 737 list_add(&ino->expiring, &sbi->expiring_list);
738 spin_unlock(&sbi->lookup_lock);
835 spin_lock(&dentry->d_lock); 739 spin_lock(&dentry->d_lock);
836 __d_drop(dentry); 740 __d_drop(dentry);
837 spin_unlock(&dentry->d_lock); 741 spin_unlock(&dentry->d_lock);
@@ -866,10 +770,20 @@ static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, int mode)
866 dentry, dentry->d_name.len, dentry->d_name.name); 770 dentry, dentry->d_name.len, dentry->d_name.name);
867 771
868 ino = autofs4_init_ino(ino, sbi, S_IFDIR | 0555); 772 ino = autofs4_init_ino(ino, sbi, S_IFDIR | 0555);
869 if (ino == NULL) 773 if (!ino)
870 return -ENOSPC; 774 return -ENOMEM;
775
776 spin_lock(&sbi->lookup_lock);
777 if (!list_empty(&ino->active))
778 list_del_init(&ino->active);
779 spin_unlock(&sbi->lookup_lock);
871 780
872 inode = autofs4_get_inode(dir->i_sb, ino); 781 inode = autofs4_get_inode(dir->i_sb, ino);
782 if (!inode) {
783 if (!dentry->d_fsdata)
784 kfree(ino);
785 return -ENOMEM;
786 }
873 d_add(dentry, inode); 787 d_add(dentry, inode);
874 788
875 if (dir == dir->i_sb->s_root->d_inode) 789 if (dir == dir->i_sb->s_root->d_inode)
@@ -922,44 +836,6 @@ static inline int autofs4_get_protosubver(struct autofs_sb_info *sbi, int __user
922} 836}
923 837
924/* 838/*
925 * Tells the daemon whether we need to reghost or not. Also, clears
926 * the reghost_needed flag.
927 */
928static inline int autofs4_ask_reghost(struct autofs_sb_info *sbi, int __user *p)
929{
930 int status;
931
932 DPRINTK("returning %d", sbi->needs_reghost);
933
934 status = put_user(sbi->needs_reghost, p);
935 if (status)
936 return status;
937
938 sbi->needs_reghost = 0;
939 return 0;
940}
941
942/*
943 * Enable / Disable reghosting ioctl() operation
944 */
945static inline int autofs4_toggle_reghost(struct autofs_sb_info *sbi, int __user *p)
946{
947 int status;
948 int val;
949
950 status = get_user(val, p);
951
952 DPRINTK("reghost = %d", val);
953
954 if (status)
955 return status;
956
957 /* turn on/off reghosting, with the val */
958 sbi->reghost_enabled = val;
959 return 0;
960}
961
962/*
963* Tells the daemon whether it can umount the autofs mount. 839* Tells the daemon whether it can umount the autofs mount.
964*/ 840*/
965static inline int autofs4_ask_umount(struct vfsmount *mnt, int __user *p) 841static inline int autofs4_ask_umount(struct vfsmount *mnt, int __user *p)
@@ -1023,11 +899,6 @@ static int autofs4_root_ioctl(struct inode *inode, struct file *filp,
1023 case AUTOFS_IOC_SETTIMEOUT: 899 case AUTOFS_IOC_SETTIMEOUT:
1024 return autofs4_get_set_timeout(sbi, p); 900 return autofs4_get_set_timeout(sbi, p);
1025 901
1026 case AUTOFS_IOC_TOGGLEREGHOST:
1027 return autofs4_toggle_reghost(sbi, p);
1028 case AUTOFS_IOC_ASKREGHOST:
1029 return autofs4_ask_reghost(sbi, p);
1030
1031 case AUTOFS_IOC_ASKUMOUNT: 902 case AUTOFS_IOC_ASKUMOUNT:
1032 return autofs4_ask_umount(filp->f_path.mnt, p); 903 return autofs4_ask_umount(filp->f_path.mnt, p);
1033 904
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 75e5955c3f6d..35216d18d8b5 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -28,6 +28,12 @@ void autofs4_catatonic_mode(struct autofs_sb_info *sbi)
28{ 28{
29 struct autofs_wait_queue *wq, *nwq; 29 struct autofs_wait_queue *wq, *nwq;
30 30
31 mutex_lock(&sbi->wq_mutex);
32 if (sbi->catatonic) {
33 mutex_unlock(&sbi->wq_mutex);
34 return;
35 }
36
31 DPRINTK("entering catatonic mode"); 37 DPRINTK("entering catatonic mode");
32 38
33 sbi->catatonic = 1; 39 sbi->catatonic = 1;
@@ -36,13 +42,18 @@ void autofs4_catatonic_mode(struct autofs_sb_info *sbi)
36 while (wq) { 42 while (wq) {
37 nwq = wq->next; 43 nwq = wq->next;
38 wq->status = -ENOENT; /* Magic is gone - report failure */ 44 wq->status = -ENOENT; /* Magic is gone - report failure */
39 kfree(wq->name); 45 if (wq->name.name) {
40 wq->name = NULL; 46 kfree(wq->name.name);
47 wq->name.name = NULL;
48 }
49 wq->wait_ctr--;
41 wake_up_interruptible(&wq->queue); 50 wake_up_interruptible(&wq->queue);
42 wq = nwq; 51 wq = nwq;
43 } 52 }
44 fput(sbi->pipe); /* Close the pipe */ 53 fput(sbi->pipe); /* Close the pipe */
45 sbi->pipe = NULL; 54 sbi->pipe = NULL;
55 sbi->pipefd = -1;
56 mutex_unlock(&sbi->wq_mutex);
46} 57}
47 58
48static int autofs4_write(struct file *file, const void *addr, int bytes) 59static int autofs4_write(struct file *file, const void *addr, int bytes)
@@ -89,10 +100,11 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
89 union autofs_packet_union v4_pkt; 100 union autofs_packet_union v4_pkt;
90 union autofs_v5_packet_union v5_pkt; 101 union autofs_v5_packet_union v5_pkt;
91 } pkt; 102 } pkt;
103 struct file *pipe = NULL;
92 size_t pktsz; 104 size_t pktsz;
93 105
94 DPRINTK("wait id = 0x%08lx, name = %.*s, type=%d", 106 DPRINTK("wait id = 0x%08lx, name = %.*s, type=%d",
95 wq->wait_queue_token, wq->len, wq->name, type); 107 wq->wait_queue_token, wq->name.len, wq->name.name, type);
96 108
97 memset(&pkt,0,sizeof pkt); /* For security reasons */ 109 memset(&pkt,0,sizeof pkt); /* For security reasons */
98 110
@@ -107,9 +119,9 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
107 pktsz = sizeof(*mp); 119 pktsz = sizeof(*mp);
108 120
109 mp->wait_queue_token = wq->wait_queue_token; 121 mp->wait_queue_token = wq->wait_queue_token;
110 mp->len = wq->len; 122 mp->len = wq->name.len;
111 memcpy(mp->name, wq->name, wq->len); 123 memcpy(mp->name, wq->name.name, wq->name.len);
112 mp->name[wq->len] = '\0'; 124 mp->name[wq->name.len] = '\0';
113 break; 125 break;
114 } 126 }
115 case autofs_ptype_expire_multi: 127 case autofs_ptype_expire_multi:
@@ -119,9 +131,9 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
119 pktsz = sizeof(*ep); 131 pktsz = sizeof(*ep);
120 132
121 ep->wait_queue_token = wq->wait_queue_token; 133 ep->wait_queue_token = wq->wait_queue_token;
122 ep->len = wq->len; 134 ep->len = wq->name.len;
123 memcpy(ep->name, wq->name, wq->len); 135 memcpy(ep->name, wq->name.name, wq->name.len);
124 ep->name[wq->len] = '\0'; 136 ep->name[wq->name.len] = '\0';
125 break; 137 break;
126 } 138 }
127 /* 139 /*
@@ -138,9 +150,9 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
138 pktsz = sizeof(*packet); 150 pktsz = sizeof(*packet);
139 151
140 packet->wait_queue_token = wq->wait_queue_token; 152 packet->wait_queue_token = wq->wait_queue_token;
141 packet->len = wq->len; 153 packet->len = wq->name.len;
142 memcpy(packet->name, wq->name, wq->len); 154 memcpy(packet->name, wq->name.name, wq->name.len);
143 packet->name[wq->len] = '\0'; 155 packet->name[wq->name.len] = '\0';
144 packet->dev = wq->dev; 156 packet->dev = wq->dev;
145 packet->ino = wq->ino; 157 packet->ino = wq->ino;
146 packet->uid = wq->uid; 158 packet->uid = wq->uid;
@@ -154,8 +166,19 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
154 return; 166 return;
155 } 167 }
156 168
157 if (autofs4_write(sbi->pipe, &pkt, pktsz)) 169 /* Check if we have become catatonic */
158 autofs4_catatonic_mode(sbi); 170 mutex_lock(&sbi->wq_mutex);
171 if (!sbi->catatonic) {
172 pipe = sbi->pipe;
173 get_file(pipe);
174 }
175 mutex_unlock(&sbi->wq_mutex);
176
177 if (pipe) {
178 if (autofs4_write(pipe, &pkt, pktsz))
179 autofs4_catatonic_mode(sbi);
180 fput(pipe);
181 }
159} 182}
160 183
161static int autofs4_getpath(struct autofs_sb_info *sbi, 184static int autofs4_getpath(struct autofs_sb_info *sbi,
@@ -191,58 +214,55 @@ static int autofs4_getpath(struct autofs_sb_info *sbi,
191} 214}
192 215
193static struct autofs_wait_queue * 216static struct autofs_wait_queue *
194autofs4_find_wait(struct autofs_sb_info *sbi, 217autofs4_find_wait(struct autofs_sb_info *sbi, struct qstr *qstr)
195 char *name, unsigned int hash, unsigned int len)
196{ 218{
197 struct autofs_wait_queue *wq; 219 struct autofs_wait_queue *wq;
198 220
199 for (wq = sbi->queues; wq; wq = wq->next) { 221 for (wq = sbi->queues; wq; wq = wq->next) {
200 if (wq->hash == hash && 222 if (wq->name.hash == qstr->hash &&
201 wq->len == len && 223 wq->name.len == qstr->len &&
202 wq->name && !memcmp(wq->name, name, len)) 224 wq->name.name &&
225 !memcmp(wq->name.name, qstr->name, qstr->len))
203 break; 226 break;
204 } 227 }
205 return wq; 228 return wq;
206} 229}
207 230
208int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry, 231/*
209 enum autofs_notify notify) 232 * Check if we have a valid request.
233 * Returns
234 * 1 if the request should continue.
235 * In this case we can return an autofs_wait_queue entry if one is
236 * found or NULL to idicate a new wait needs to be created.
237 * 0 or a negative errno if the request shouldn't continue.
238 */
239static int validate_request(struct autofs_wait_queue **wait,
240 struct autofs_sb_info *sbi,
241 struct qstr *qstr,
242 struct dentry*dentry, enum autofs_notify notify)
210{ 243{
211 struct autofs_info *ino;
212 struct autofs_wait_queue *wq; 244 struct autofs_wait_queue *wq;
213 char *name; 245 struct autofs_info *ino;
214 unsigned int len = 0;
215 unsigned int hash = 0;
216 int status, type;
217
218 /* In catatonic mode, we don't wait for nobody */
219 if (sbi->catatonic)
220 return -ENOENT;
221
222 name = kmalloc(NAME_MAX + 1, GFP_KERNEL);
223 if (!name)
224 return -ENOMEM;
225 246
226 /* If this is a direct mount request create a dummy name */ 247 /* Wait in progress, continue; */
227 if (IS_ROOT(dentry) && (sbi->type & AUTOFS_TYPE_DIRECT)) 248 wq = autofs4_find_wait(sbi, qstr);
228 len = sprintf(name, "%p", dentry); 249 if (wq) {
229 else { 250 *wait = wq;
230 len = autofs4_getpath(sbi, dentry, &name); 251 return 1;
231 if (!len) {
232 kfree(name);
233 return -ENOENT;
234 }
235 } 252 }
236 hash = full_name_hash(name, len);
237 253
238 if (mutex_lock_interruptible(&sbi->wq_mutex)) { 254 *wait = NULL;
239 kfree(name);
240 return -EINTR;
241 }
242 255
243 wq = autofs4_find_wait(sbi, name, hash, len); 256 /* If we don't yet have any info this is a new request */
244 ino = autofs4_dentry_ino(dentry); 257 ino = autofs4_dentry_ino(dentry);
245 if (!wq && ino && notify == NFY_NONE) { 258 if (!ino)
259 return 1;
260
261 /*
262 * If we've been asked to wait on an existing expire (NFY_NONE)
263 * but there is no wait in the queue ...
264 */
265 if (notify == NFY_NONE) {
246 /* 266 /*
247 * Either we've betean the pending expire to post it's 267 * Either we've betean the pending expire to post it's
248 * wait or it finished while we waited on the mutex. 268 * wait or it finished while we waited on the mutex.
@@ -253,13 +273,14 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
253 while (ino->flags & AUTOFS_INF_EXPIRING) { 273 while (ino->flags & AUTOFS_INF_EXPIRING) {
254 mutex_unlock(&sbi->wq_mutex); 274 mutex_unlock(&sbi->wq_mutex);
255 schedule_timeout_interruptible(HZ/10); 275 schedule_timeout_interruptible(HZ/10);
256 if (mutex_lock_interruptible(&sbi->wq_mutex)) { 276 if (mutex_lock_interruptible(&sbi->wq_mutex))
257 kfree(name);
258 return -EINTR; 277 return -EINTR;
278
279 wq = autofs4_find_wait(sbi, qstr);
280 if (wq) {
281 *wait = wq;
282 return 1;
259 } 283 }
260 wq = autofs4_find_wait(sbi, name, hash, len);
261 if (wq)
262 break;
263 } 284 }
264 285
265 /* 286 /*
@@ -267,18 +288,96 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
267 * cases where we wait on NFY_NONE neither depend on the 288 * cases where we wait on NFY_NONE neither depend on the
268 * return status of the wait. 289 * return status of the wait.
269 */ 290 */
270 if (!wq) { 291 return 0;
292 }
293
294 /*
295 * If we've been asked to trigger a mount and the request
296 * completed while we waited on the mutex ...
297 */
298 if (notify == NFY_MOUNT) {
299 /*
300 * If the dentry isn't hashed just go ahead and try the
301 * mount again with a new wait (not much else we can do).
302 */
303 if (!d_unhashed(dentry)) {
304 /*
305 * But if the dentry is hashed, that means that we
306 * got here through the revalidate path. Thus, we
307 * need to check if the dentry has been mounted
308 * while we waited on the wq_mutex. If it has,
309 * simply return success.
310 */
311 if (d_mountpoint(dentry))
312 return 0;
313 }
314 }
315
316 return 1;
317}
318
319int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
320 enum autofs_notify notify)
321{
322 struct autofs_wait_queue *wq;
323 struct qstr qstr;
324 char *name;
325 int status, ret, type;
326
327 /* In catatonic mode, we don't wait for nobody */
328 if (sbi->catatonic)
329 return -ENOENT;
330
331 if (!dentry->d_inode) {
332 /*
333 * A wait for a negative dentry is invalid for certain
334 * cases. A direct or offset mount "always" has its mount
335 * point directory created and so the request dentry must
336 * be positive or the map key doesn't exist. The situation
337 * is very similar for indirect mounts except only dentrys
338 * in the root of the autofs file system may be negative.
339 */
340 if (sbi->type & (AUTOFS_TYPE_DIRECT|AUTOFS_TYPE_OFFSET))
341 return -ENOENT;
342 else if (!IS_ROOT(dentry->d_parent))
343 return -ENOENT;
344 }
345
346 name = kmalloc(NAME_MAX + 1, GFP_KERNEL);
347 if (!name)
348 return -ENOMEM;
349
350 /* If this is a direct mount request create a dummy name */
351 if (IS_ROOT(dentry) && (sbi->type & AUTOFS_TYPE_DIRECT))
352 qstr.len = sprintf(name, "%p", dentry);
353 else {
354 qstr.len = autofs4_getpath(sbi, dentry, &name);
355 if (!qstr.len) {
271 kfree(name); 356 kfree(name);
272 mutex_unlock(&sbi->wq_mutex); 357 return -ENOENT;
273 return 0;
274 } 358 }
275 } 359 }
360 qstr.name = name;
361 qstr.hash = full_name_hash(name, qstr.len);
362
363 if (mutex_lock_interruptible(&sbi->wq_mutex)) {
364 kfree(qstr.name);
365 return -EINTR;
366 }
367
368 ret = validate_request(&wq, sbi, &qstr, dentry, notify);
369 if (ret <= 0) {
370 if (ret == 0)
371 mutex_unlock(&sbi->wq_mutex);
372 kfree(qstr.name);
373 return ret;
374 }
276 375
277 if (!wq) { 376 if (!wq) {
278 /* Create a new wait queue */ 377 /* Create a new wait queue */
279 wq = kmalloc(sizeof(struct autofs_wait_queue),GFP_KERNEL); 378 wq = kmalloc(sizeof(struct autofs_wait_queue),GFP_KERNEL);
280 if (!wq) { 379 if (!wq) {
281 kfree(name); 380 kfree(qstr.name);
282 mutex_unlock(&sbi->wq_mutex); 381 mutex_unlock(&sbi->wq_mutex);
283 return -ENOMEM; 382 return -ENOMEM;
284 } 383 }
@@ -289,9 +388,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
289 wq->next = sbi->queues; 388 wq->next = sbi->queues;
290 sbi->queues = wq; 389 sbi->queues = wq;
291 init_waitqueue_head(&wq->queue); 390 init_waitqueue_head(&wq->queue);
292 wq->hash = hash; 391 memcpy(&wq->name, &qstr, sizeof(struct qstr));
293 wq->name = name;
294 wq->len = len;
295 wq->dev = autofs4_get_dev(sbi); 392 wq->dev = autofs4_get_dev(sbi);
296 wq->ino = autofs4_get_ino(sbi); 393 wq->ino = autofs4_get_ino(sbi);
297 wq->uid = current->uid; 394 wq->uid = current->uid;
@@ -299,7 +396,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
299 wq->pid = current->pid; 396 wq->pid = current->pid;
300 wq->tgid = current->tgid; 397 wq->tgid = current->tgid;
301 wq->status = -EINTR; /* Status return if interrupted */ 398 wq->status = -EINTR; /* Status return if interrupted */
302 atomic_set(&wq->wait_ctr, 2); 399 wq->wait_ctr = 2;
303 mutex_unlock(&sbi->wq_mutex); 400 mutex_unlock(&sbi->wq_mutex);
304 401
305 if (sbi->version < 5) { 402 if (sbi->version < 5) {
@@ -319,28 +416,25 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
319 } 416 }
320 417
321 DPRINTK("new wait id = 0x%08lx, name = %.*s, nfy=%d\n", 418 DPRINTK("new wait id = 0x%08lx, name = %.*s, nfy=%d\n",
322 (unsigned long) wq->wait_queue_token, wq->len, wq->name, notify); 419 (unsigned long) wq->wait_queue_token, wq->name.len,
420 wq->name.name, notify);
323 421
324 /* autofs4_notify_daemon() may block */ 422 /* autofs4_notify_daemon() may block */
325 autofs4_notify_daemon(sbi, wq, type); 423 autofs4_notify_daemon(sbi, wq, type);
326 } else { 424 } else {
327 atomic_inc(&wq->wait_ctr); 425 wq->wait_ctr++;
328 mutex_unlock(&sbi->wq_mutex); 426 mutex_unlock(&sbi->wq_mutex);
329 kfree(name); 427 kfree(qstr.name);
330 DPRINTK("existing wait id = 0x%08lx, name = %.*s, nfy=%d", 428 DPRINTK("existing wait id = 0x%08lx, name = %.*s, nfy=%d",
331 (unsigned long) wq->wait_queue_token, wq->len, wq->name, notify); 429 (unsigned long) wq->wait_queue_token, wq->name.len,
332 } 430 wq->name.name, notify);
333
334 /* wq->name is NULL if and only if the lock is already released */
335
336 if (sbi->catatonic) {
337 /* We might have slept, so check again for catatonic mode */
338 wq->status = -ENOENT;
339 kfree(wq->name);
340 wq->name = NULL;
341 } 431 }
342 432
343 if (wq->name) { 433 /*
434 * wq->name.name is NULL iff the lock is already released
435 * or the mount has been made catatonic.
436 */
437 if (wq->name.name) {
344 /* Block all but "shutdown" signals while waiting */ 438 /* Block all but "shutdown" signals while waiting */
345 sigset_t oldset; 439 sigset_t oldset;
346 unsigned long irqflags; 440 unsigned long irqflags;
@@ -351,7 +445,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
351 recalc_sigpending(); 445 recalc_sigpending();
352 spin_unlock_irqrestore(&current->sighand->siglock, irqflags); 446 spin_unlock_irqrestore(&current->sighand->siglock, irqflags);
353 447
354 wait_event_interruptible(wq->queue, wq->name == NULL); 448 wait_event_interruptible(wq->queue, wq->name.name == NULL);
355 449
356 spin_lock_irqsave(&current->sighand->siglock, irqflags); 450 spin_lock_irqsave(&current->sighand->siglock, irqflags);
357 current->blocked = oldset; 451 current->blocked = oldset;
@@ -364,8 +458,10 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
364 status = wq->status; 458 status = wq->status;
365 459
366 /* Are we the last process to need status? */ 460 /* Are we the last process to need status? */
367 if (atomic_dec_and_test(&wq->wait_ctr)) 461 mutex_lock(&sbi->wq_mutex);
462 if (!--wq->wait_ctr)
368 kfree(wq); 463 kfree(wq);
464 mutex_unlock(&sbi->wq_mutex);
369 465
370 return status; 466 return status;
371} 467}
@@ -387,16 +483,13 @@ int autofs4_wait_release(struct autofs_sb_info *sbi, autofs_wqt_t wait_queue_tok
387 } 483 }
388 484
389 *wql = wq->next; /* Unlink from chain */ 485 *wql = wq->next; /* Unlink from chain */
390 mutex_unlock(&sbi->wq_mutex); 486 kfree(wq->name.name);
391 kfree(wq->name); 487 wq->name.name = NULL; /* Do not wait on this queue */
392 wq->name = NULL; /* Do not wait on this queue */
393
394 wq->status = status; 488 wq->status = status;
395 489 wake_up_interruptible(&wq->queue);
396 if (atomic_dec_and_test(&wq->wait_ctr)) /* Is anyone still waiting for this guy? */ 490 if (!--wq->wait_ctr)
397 kfree(wq); 491 kfree(wq);
398 else 492 mutex_unlock(&sbi->wq_mutex);
399 wake_up_interruptible(&wq->queue);
400 493
401 return 0; 494 return 0;
402} 495}
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 639d2d8b5710..88d180306cf9 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1478,7 +1478,7 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
1478 const struct user_regset_view *view = task_user_regset_view(dump_task); 1478 const struct user_regset_view *view = task_user_regset_view(dump_task);
1479 struct elf_thread_core_info *t; 1479 struct elf_thread_core_info *t;
1480 struct elf_prpsinfo *psinfo; 1480 struct elf_prpsinfo *psinfo;
1481 struct task_struct *g, *p; 1481 struct core_thread *ct;
1482 unsigned int i; 1482 unsigned int i;
1483 1483
1484 info->size = 0; 1484 info->size = 0;
@@ -1517,31 +1517,26 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
1517 /* 1517 /*
1518 * Allocate a structure for each thread. 1518 * Allocate a structure for each thread.
1519 */ 1519 */
1520 rcu_read_lock(); 1520 for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1521 do_each_thread(g, p) 1521 t = kzalloc(offsetof(struct elf_thread_core_info,
1522 if (p->mm == dump_task->mm) { 1522 notes[info->thread_notes]),
1523 t = kzalloc(offsetof(struct elf_thread_core_info, 1523 GFP_KERNEL);
1524 notes[info->thread_notes]), 1524 if (unlikely(!t))
1525 GFP_ATOMIC); 1525 return 0;
1526 if (unlikely(!t)) { 1526
1527 rcu_read_unlock(); 1527 t->task = ct->task;
1528 return 0; 1528 if (ct->task == dump_task || !info->thread) {
1529 } 1529 t->next = info->thread;
1530 t->task = p; 1530 info->thread = t;
1531 if (p == dump_task || !info->thread) { 1531 } else {
1532 t->next = info->thread; 1532 /*
1533 info->thread = t; 1533 * Make sure to keep the original task at
1534 } else { 1534 * the head of the list.
1535 /* 1535 */
1536 * Make sure to keep the original task at 1536 t->next = info->thread->next;
1537 * the head of the list. 1537 info->thread->next = t;
1538 */
1539 t->next = info->thread->next;
1540 info->thread->next = t;
1541 }
1542 } 1538 }
1543 while_each_thread(g, p); 1539 }
1544 rcu_read_unlock();
1545 1540
1546 /* 1541 /*
1547 * Now fill in each thread's information. 1542 * Now fill in each thread's information.
@@ -1688,7 +1683,6 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
1688{ 1683{
1689#define NUM_NOTES 6 1684#define NUM_NOTES 6
1690 struct list_head *t; 1685 struct list_head *t;
1691 struct task_struct *g, *p;
1692 1686
1693 info->notes = NULL; 1687 info->notes = NULL;
1694 info->prstatus = NULL; 1688 info->prstatus = NULL;
@@ -1720,20 +1714,19 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
1720 1714
1721 info->thread_status_size = 0; 1715 info->thread_status_size = 0;
1722 if (signr) { 1716 if (signr) {
1717 struct core_thread *ct;
1723 struct elf_thread_status *ets; 1718 struct elf_thread_status *ets;
1724 rcu_read_lock(); 1719
1725 do_each_thread(g, p) 1720 for (ct = current->mm->core_state->dumper.next;
1726 if (current->mm == p->mm && current != p) { 1721 ct; ct = ct->next) {
1727 ets = kzalloc(sizeof(*ets), GFP_ATOMIC); 1722 ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1728 if (!ets) { 1723 if (!ets)
1729 rcu_read_unlock(); 1724 return 0;
1730 return 0; 1725
1731 } 1726 ets->thread = ct->task;
1732 ets->thread = p; 1727 list_add(&ets->list, &info->thread_list);
1733 list_add(&ets->list, &info->thread_list); 1728 }
1734 } 1729
1735 while_each_thread(g, p);
1736 rcu_read_unlock();
1737 list_for_each(t, &info->thread_list) { 1730 list_for_each(t, &info->thread_list) {
1738 int sz; 1731 int sz;
1739 1732
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index d051a32e6270..1b59b1edf26d 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1573,7 +1573,6 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
1573 struct memelfnote *notes = NULL; 1573 struct memelfnote *notes = NULL;
1574 struct elf_prstatus *prstatus = NULL; /* NT_PRSTATUS */ 1574 struct elf_prstatus *prstatus = NULL; /* NT_PRSTATUS */
1575 struct elf_prpsinfo *psinfo = NULL; /* NT_PRPSINFO */ 1575 struct elf_prpsinfo *psinfo = NULL; /* NT_PRPSINFO */
1576 struct task_struct *g, *p;
1577 LIST_HEAD(thread_list); 1576 LIST_HEAD(thread_list);
1578 struct list_head *t; 1577 struct list_head *t;
1579 elf_fpregset_t *fpu = NULL; 1578 elf_fpregset_t *fpu = NULL;
@@ -1622,20 +1621,19 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
1622#endif 1621#endif
1623 1622
1624 if (signr) { 1623 if (signr) {
1624 struct core_thread *ct;
1625 struct elf_thread_status *tmp; 1625 struct elf_thread_status *tmp;
1626 rcu_read_lock(); 1626
1627 do_each_thread(g,p) 1627 for (ct = current->mm->core_state->dumper.next;
1628 if (current->mm == p->mm && current != p) { 1628 ct; ct = ct->next) {
1629 tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC); 1629 tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
1630 if (!tmp) { 1630 if (!tmp)
1631 rcu_read_unlock(); 1631 goto cleanup;
1632 goto cleanup; 1632
1633 } 1633 tmp->thread = ct->task;
1634 tmp->thread = p; 1634 list_add(&tmp->list, &thread_list);
1635 list_add(&tmp->list, &thread_list); 1635 }
1636 } 1636
1637 while_each_thread(g,p);
1638 rcu_read_unlock();
1639 list_for_each(t, &thread_list) { 1637 list_for_each(t, &thread_list) {
1640 struct elf_thread_status *tmp; 1638 struct elf_thread_status *tmp;
1641 int sz; 1639 int sz;
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 7191306367c5..756205314c24 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -27,6 +27,7 @@
27#include <linux/namei.h> 27#include <linux/namei.h>
28#include <linux/mount.h> 28#include <linux/mount.h>
29#include <linux/syscalls.h> 29#include <linux/syscalls.h>
30#include <linux/fs.h>
30 31
31#include <asm/uaccess.h> 32#include <asm/uaccess.h>
32 33
@@ -535,31 +536,16 @@ static ssize_t
535bm_entry_read(struct file * file, char __user * buf, size_t nbytes, loff_t *ppos) 536bm_entry_read(struct file * file, char __user * buf, size_t nbytes, loff_t *ppos)
536{ 537{
537 Node *e = file->f_path.dentry->d_inode->i_private; 538 Node *e = file->f_path.dentry->d_inode->i_private;
538 loff_t pos = *ppos;
539 ssize_t res; 539 ssize_t res;
540 char *page; 540 char *page;
541 int len;
542 541
543 if (!(page = (char*) __get_free_page(GFP_KERNEL))) 542 if (!(page = (char*) __get_free_page(GFP_KERNEL)))
544 return -ENOMEM; 543 return -ENOMEM;
545 544
546 entry_status(e, page); 545 entry_status(e, page);
547 len = strlen(page);
548 546
549 res = -EINVAL; 547 res = simple_read_from_buffer(buf, nbytes, ppos, page, strlen(page));
550 if (pos < 0) 548
551 goto out;
552 res = 0;
553 if (pos >= len)
554 goto out;
555 if (len < pos + nbytes)
556 nbytes = len - pos;
557 res = -EFAULT;
558 if (copy_to_user(buf, page + pos, nbytes))
559 goto out;
560 *ppos = pos + nbytes;
561 res = nbytes;
562out:
563 free_page((unsigned long) page); 549 free_page((unsigned long) page);
564 return res; 550 return res;
565} 551}
diff --git a/fs/coda/coda_linux.c b/fs/coda/coda_linux.c
index e1c854890f94..bf4a3fd3c8e3 100644
--- a/fs/coda/coda_linux.c
+++ b/fs/coda/coda_linux.c
@@ -28,11 +28,9 @@ int coda_fake_statfs;
28char * coda_f2s(struct CodaFid *f) 28char * coda_f2s(struct CodaFid *f)
29{ 29{
30 static char s[60]; 30 static char s[60];
31#ifdef CONFIG_CODA_FS_OLD_API 31
32 sprintf(s, "(%08x.%08x.%08x)", f->opaque[0], f->opaque[1], f->opaque[2]);
33#else
34 sprintf(s, "(%08x.%08x.%08x.%08x)", f->opaque[0], f->opaque[1], f->opaque[2], f->opaque[3]); 32 sprintf(s, "(%08x.%08x.%08x.%08x)", f->opaque[0], f->opaque[1], f->opaque[2], f->opaque[3]);
35#endif 33
36 return s; 34 return s;
37} 35}
38 36
diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c
index 40c36f7352a6..0d9b80ec689c 100644
--- a/fs/coda/psdev.c
+++ b/fs/coda/psdev.c
@@ -378,11 +378,7 @@ MODULE_AUTHOR("Jan Harkes, Peter J. Braam");
378MODULE_DESCRIPTION("Coda Distributed File System VFS interface"); 378MODULE_DESCRIPTION("Coda Distributed File System VFS interface");
379MODULE_ALIAS_CHARDEV_MAJOR(CODA_PSDEV_MAJOR); 379MODULE_ALIAS_CHARDEV_MAJOR(CODA_PSDEV_MAJOR);
380MODULE_LICENSE("GPL"); 380MODULE_LICENSE("GPL");
381#ifdef CONFIG_CODA_FS_OLD_API
382MODULE_VERSION("5.3.21");
383#else
384MODULE_VERSION("6.6"); 381MODULE_VERSION("6.6");
385#endif
386 382
387static int __init init_coda(void) 383static int __init init_coda(void)
388{ 384{
diff --git a/fs/coda/upcall.c b/fs/coda/upcall.c
index 359e531094dd..ce432bca95d1 100644
--- a/fs/coda/upcall.c
+++ b/fs/coda/upcall.c
@@ -52,12 +52,8 @@ static void *alloc_upcall(int opcode, int size)
52 inp->ih.opcode = opcode; 52 inp->ih.opcode = opcode;
53 inp->ih.pid = current->pid; 53 inp->ih.pid = current->pid;
54 inp->ih.pgid = task_pgrp_nr(current); 54 inp->ih.pgid = task_pgrp_nr(current);
55#ifdef CONFIG_CODA_FS_OLD_API
56 memset(&inp->ih.cred, 0, sizeof(struct coda_cred));
57 inp->ih.cred.cr_fsuid = current->fsuid;
58#else
59 inp->ih.uid = current->fsuid; 55 inp->ih.uid = current->fsuid;
60#endif 56
61 return (void*)inp; 57 return (void*)inp;
62} 58}
63 59
@@ -166,20 +162,11 @@ int venus_close(struct super_block *sb, struct CodaFid *fid, int flags,
166 union inputArgs *inp; 162 union inputArgs *inp;
167 union outputArgs *outp; 163 union outputArgs *outp;
168 int insize, outsize, error; 164 int insize, outsize, error;
169#ifdef CONFIG_CODA_FS_OLD_API
170 struct coda_cred cred = { 0, };
171 cred.cr_fsuid = uid;
172#endif
173 165
174 insize = SIZE(release); 166 insize = SIZE(release);
175 UPARG(CODA_CLOSE); 167 UPARG(CODA_CLOSE);
176 168
177#ifdef CONFIG_CODA_FS_OLD_API
178 memcpy(&(inp->ih.cred), &cred, sizeof(cred));
179#else
180 inp->ih.uid = uid; 169 inp->ih.uid = uid;
181#endif
182
183 inp->coda_close.VFid = *fid; 170 inp->coda_close.VFid = *fid;
184 inp->coda_close.flags = flags; 171 inp->coda_close.flags = flags;
185 172
diff --git a/fs/compat.c b/fs/compat.c
index ed43e17a5dc6..106eba28ec5a 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -197,8 +197,8 @@ static int put_compat_statfs(struct compat_statfs __user *ubuf, struct kstatfs *
197{ 197{
198 198
199 if (sizeof ubuf->f_blocks == 4) { 199 if (sizeof ubuf->f_blocks == 4) {
200 if ((kbuf->f_blocks | kbuf->f_bfree | kbuf->f_bavail) & 200 if ((kbuf->f_blocks | kbuf->f_bfree | kbuf->f_bavail |
201 0xffffffff00000000ULL) 201 kbuf->f_bsize | kbuf->f_frsize) & 0xffffffff00000000ULL)
202 return -EOVERFLOW; 202 return -EOVERFLOW;
203 /* f_files and f_ffree may be -1; it's okay 203 /* f_files and f_ffree may be -1; it's okay
204 * to stuff that into 32 bits */ 204 * to stuff that into 32 bits */
@@ -271,8 +271,8 @@ out:
271static int put_compat_statfs64(struct compat_statfs64 __user *ubuf, struct kstatfs *kbuf) 271static int put_compat_statfs64(struct compat_statfs64 __user *ubuf, struct kstatfs *kbuf)
272{ 272{
273 if (sizeof ubuf->f_blocks == 4) { 273 if (sizeof ubuf->f_blocks == 4) {
274 if ((kbuf->f_blocks | kbuf->f_bfree | kbuf->f_bavail) & 274 if ((kbuf->f_blocks | kbuf->f_bfree | kbuf->f_bavail |
275 0xffffffff00000000ULL) 275 kbuf->f_bsize | kbuf->f_frsize) & 0xffffffff00000000ULL)
276 return -EOVERFLOW; 276 return -EOVERFLOW;
277 /* f_files and f_ffree may be -1; it's okay 277 /* f_files and f_ffree may be -1; it's okay
278 * to stuff that into 32 bits */ 278 * to stuff that into 32 bits */
@@ -2131,9 +2131,9 @@ asmlinkage long compat_sys_epoll_pwait(int epfd,
2131 2131
2132#ifdef CONFIG_SIGNALFD 2132#ifdef CONFIG_SIGNALFD
2133 2133
2134asmlinkage long compat_sys_signalfd(int ufd, 2134asmlinkage long compat_sys_signalfd4(int ufd,
2135 const compat_sigset_t __user *sigmask, 2135 const compat_sigset_t __user *sigmask,
2136 compat_size_t sigsetsize) 2136 compat_size_t sigsetsize, int flags)
2137{ 2137{
2138 compat_sigset_t ss32; 2138 compat_sigset_t ss32;
2139 sigset_t tmp; 2139 sigset_t tmp;
@@ -2148,9 +2148,15 @@ asmlinkage long compat_sys_signalfd(int ufd,
2148 if (copy_to_user(ksigmask, &tmp, sizeof(sigset_t))) 2148 if (copy_to_user(ksigmask, &tmp, sizeof(sigset_t)))
2149 return -EFAULT; 2149 return -EFAULT;
2150 2150
2151 return sys_signalfd(ufd, ksigmask, sizeof(sigset_t)); 2151 return sys_signalfd4(ufd, ksigmask, sizeof(sigset_t), flags);
2152} 2152}
2153 2153
2154asmlinkage long compat_sys_signalfd(int ufd,
2155 const compat_sigset_t __user *sigmask,
2156 compat_size_t sigsetsize)
2157{
2158 return compat_sys_signalfd4(ufd, sigmask, sigsetsize, 0);
2159}
2154#endif /* CONFIG_SIGNALFD */ 2160#endif /* CONFIG_SIGNALFD */
2155 2161
2156#ifdef CONFIG_TIMERFD 2162#ifdef CONFIG_TIMERFD
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 7b3a03c7c6a9..5235c67e7594 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -25,7 +25,6 @@
25#include <linux/slab.h> 25#include <linux/slab.h>
26#include <linux/raid/md.h> 26#include <linux/raid/md.h>
27#include <linux/kd.h> 27#include <linux/kd.h>
28#include <linux/dirent.h>
29#include <linux/route.h> 28#include <linux/route.h>
30#include <linux/in6.h> 29#include <linux/in6.h>
31#include <linux/ipv6_route.h> 30#include <linux/ipv6_route.h>
@@ -2297,8 +2296,6 @@ COMPATIBLE_IOCTL(AUTOFS_IOC_PROTOVER)
2297COMPATIBLE_IOCTL(AUTOFS_IOC_EXPIRE) 2296COMPATIBLE_IOCTL(AUTOFS_IOC_EXPIRE)
2298COMPATIBLE_IOCTL(AUTOFS_IOC_EXPIRE_MULTI) 2297COMPATIBLE_IOCTL(AUTOFS_IOC_EXPIRE_MULTI)
2299COMPATIBLE_IOCTL(AUTOFS_IOC_PROTOSUBVER) 2298COMPATIBLE_IOCTL(AUTOFS_IOC_PROTOSUBVER)
2300COMPATIBLE_IOCTL(AUTOFS_IOC_ASKREGHOST)
2301COMPATIBLE_IOCTL(AUTOFS_IOC_TOGGLEREGHOST)
2302COMPATIBLE_IOCTL(AUTOFS_IOC_ASKUMOUNT) 2299COMPATIBLE_IOCTL(AUTOFS_IOC_ASKUMOUNT)
2303/* Raw devices */ 2300/* Raw devices */
2304COMPATIBLE_IOCTL(RAW_SETBIND) 2301COMPATIBLE_IOCTL(RAW_SETBIND)
diff --git a/fs/dcache.c b/fs/dcache.c
index 6068c25b393c..3818d6ab76ca 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -61,7 +61,6 @@ static struct kmem_cache *dentry_cache __read_mostly;
61static unsigned int d_hash_mask __read_mostly; 61static unsigned int d_hash_mask __read_mostly;
62static unsigned int d_hash_shift __read_mostly; 62static unsigned int d_hash_shift __read_mostly;
63static struct hlist_head *dentry_hashtable __read_mostly; 63static struct hlist_head *dentry_hashtable __read_mostly;
64static LIST_HEAD(dentry_unused);
65 64
66/* Statistics gathering. */ 65/* Statistics gathering. */
67struct dentry_stat_t dentry_stat = { 66struct dentry_stat_t dentry_stat = {
@@ -96,14 +95,6 @@ static void d_free(struct dentry *dentry)
96 call_rcu(&dentry->d_u.d_rcu, d_callback); 95 call_rcu(&dentry->d_u.d_rcu, d_callback);
97} 96}
98 97
99static void dentry_lru_remove(struct dentry *dentry)
100{
101 if (!list_empty(&dentry->d_lru)) {
102 list_del_init(&dentry->d_lru);
103 dentry_stat.nr_unused--;
104 }
105}
106
107/* 98/*
108 * Release the dentry's inode, using the filesystem 99 * Release the dentry's inode, using the filesystem
109 * d_iput() operation if defined. 100 * d_iput() operation if defined.
@@ -130,6 +121,41 @@ static void dentry_iput(struct dentry * dentry)
130 } 121 }
131} 122}
132 123
124/*
125 * dentry_lru_(add|add_tail|del|del_init) must be called with dcache_lock held.
126 */
127static void dentry_lru_add(struct dentry *dentry)
128{
129 list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
130 dentry->d_sb->s_nr_dentry_unused++;
131 dentry_stat.nr_unused++;
132}
133
134static void dentry_lru_add_tail(struct dentry *dentry)
135{
136 list_add_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
137 dentry->d_sb->s_nr_dentry_unused++;
138 dentry_stat.nr_unused++;
139}
140
141static void dentry_lru_del(struct dentry *dentry)
142{
143 if (!list_empty(&dentry->d_lru)) {
144 list_del(&dentry->d_lru);
145 dentry->d_sb->s_nr_dentry_unused--;
146 dentry_stat.nr_unused--;
147 }
148}
149
150static void dentry_lru_del_init(struct dentry *dentry)
151{
152 if (likely(!list_empty(&dentry->d_lru))) {
153 list_del_init(&dentry->d_lru);
154 dentry->d_sb->s_nr_dentry_unused--;
155 dentry_stat.nr_unused--;
156 }
157}
158
133/** 159/**
134 * d_kill - kill dentry and return parent 160 * d_kill - kill dentry and return parent
135 * @dentry: dentry to kill 161 * @dentry: dentry to kill
@@ -212,8 +238,7 @@ repeat:
212 goto kill_it; 238 goto kill_it;
213 if (list_empty(&dentry->d_lru)) { 239 if (list_empty(&dentry->d_lru)) {
214 dentry->d_flags |= DCACHE_REFERENCED; 240 dentry->d_flags |= DCACHE_REFERENCED;
215 list_add(&dentry->d_lru, &dentry_unused); 241 dentry_lru_add(dentry);
216 dentry_stat.nr_unused++;
217 } 242 }
218 spin_unlock(&dentry->d_lock); 243 spin_unlock(&dentry->d_lock);
219 spin_unlock(&dcache_lock); 244 spin_unlock(&dcache_lock);
@@ -222,7 +247,8 @@ repeat:
222unhash_it: 247unhash_it:
223 __d_drop(dentry); 248 __d_drop(dentry);
224kill_it: 249kill_it:
225 dentry_lru_remove(dentry); 250 /* if dentry was on the d_lru list delete it from there */
251 dentry_lru_del(dentry);
226 dentry = d_kill(dentry); 252 dentry = d_kill(dentry);
227 if (dentry) 253 if (dentry)
228 goto repeat; 254 goto repeat;
@@ -290,7 +316,7 @@ int d_invalidate(struct dentry * dentry)
290static inline struct dentry * __dget_locked(struct dentry *dentry) 316static inline struct dentry * __dget_locked(struct dentry *dentry)
291{ 317{
292 atomic_inc(&dentry->d_count); 318 atomic_inc(&dentry->d_count);
293 dentry_lru_remove(dentry); 319 dentry_lru_del_init(dentry);
294 return dentry; 320 return dentry;
295} 321}
296 322
@@ -406,133 +432,167 @@ static void prune_one_dentry(struct dentry * dentry)
406 432
407 if (dentry->d_op && dentry->d_op->d_delete) 433 if (dentry->d_op && dentry->d_op->d_delete)
408 dentry->d_op->d_delete(dentry); 434 dentry->d_op->d_delete(dentry);
409 dentry_lru_remove(dentry); 435 dentry_lru_del_init(dentry);
410 __d_drop(dentry); 436 __d_drop(dentry);
411 dentry = d_kill(dentry); 437 dentry = d_kill(dentry);
412 spin_lock(&dcache_lock); 438 spin_lock(&dcache_lock);
413 } 439 }
414} 440}
415 441
416/** 442/*
417 * prune_dcache - shrink the dcache 443 * Shrink the dentry LRU on a given superblock.
418 * @count: number of entries to try and free 444 * @sb : superblock to shrink dentry LRU.
419 * @sb: if given, ignore dentries for other superblocks 445 * @count: If count is NULL, we prune all dentries on superblock.
420 * which are being unmounted. 446 * @flags: If flags is non-zero, we need to do special processing based on
421 * 447 * which flags are set. This means we don't need to maintain multiple
422 * Shrink the dcache. This is done when we need 448 * similar copies of this loop.
423 * more memory, or simply when we need to unmount
424 * something (at which point we need to unuse
425 * all dentries).
426 *
427 * This function may fail to free any resources if
428 * all the dentries are in use.
429 */ 449 */
430 450static void __shrink_dcache_sb(struct super_block *sb, int *count, int flags)
431static void prune_dcache(int count, struct super_block *sb)
432{ 451{
433 spin_lock(&dcache_lock); 452 LIST_HEAD(referenced);
434 for (; count ; count--) { 453 LIST_HEAD(tmp);
435 struct dentry *dentry; 454 struct dentry *dentry;
436 struct list_head *tmp; 455 int cnt = 0;
437 struct rw_semaphore *s_umount;
438
439 cond_resched_lock(&dcache_lock);
440 456
441 tmp = dentry_unused.prev; 457 BUG_ON(!sb);
442 if (sb) { 458 BUG_ON((flags & DCACHE_REFERENCED) && count == NULL);
443 /* Try to find a dentry for this sb, but don't try 459 spin_lock(&dcache_lock);
444 * too hard, if they aren't near the tail they will 460 if (count != NULL)
445 * be moved down again soon 461 /* called from prune_dcache() and shrink_dcache_parent() */
462 cnt = *count;
463restart:
464 if (count == NULL)
465 list_splice_init(&sb->s_dentry_lru, &tmp);
466 else {
467 while (!list_empty(&sb->s_dentry_lru)) {
468 dentry = list_entry(sb->s_dentry_lru.prev,
469 struct dentry, d_lru);
470 BUG_ON(dentry->d_sb != sb);
471
472 spin_lock(&dentry->d_lock);
473 /*
474 * If we are honouring the DCACHE_REFERENCED flag and
475 * the dentry has this flag set, don't free it. Clear
476 * the flag and put it back on the LRU.
446 */ 477 */
447 int skip = count; 478 if ((flags & DCACHE_REFERENCED)
448 while (skip && tmp != &dentry_unused && 479 && (dentry->d_flags & DCACHE_REFERENCED)) {
449 list_entry(tmp, struct dentry, d_lru)->d_sb != sb) { 480 dentry->d_flags &= ~DCACHE_REFERENCED;
450 skip--; 481 list_move_tail(&dentry->d_lru, &referenced);
451 tmp = tmp->prev; 482 spin_unlock(&dentry->d_lock);
483 } else {
484 list_move_tail(&dentry->d_lru, &tmp);
485 spin_unlock(&dentry->d_lock);
486 cnt--;
487 if (!cnt)
488 break;
452 } 489 }
453 } 490 }
454 if (tmp == &dentry_unused) 491 }
455 break; 492 while (!list_empty(&tmp)) {
456 list_del_init(tmp); 493 dentry = list_entry(tmp.prev, struct dentry, d_lru);
457 prefetch(dentry_unused.prev); 494 dentry_lru_del_init(dentry);
458 dentry_stat.nr_unused--; 495 spin_lock(&dentry->d_lock);
459 dentry = list_entry(tmp, struct dentry, d_lru);
460
461 spin_lock(&dentry->d_lock);
462 /* 496 /*
463 * We found an inuse dentry which was not removed from 497 * We found an inuse dentry which was not removed from
464 * dentry_unused because of laziness during lookup. Do not free 498 * the LRU because of laziness during lookup. Do not free
465 * it - just keep it off the dentry_unused list. 499 * it - just keep it off the LRU list.
466 */ 500 */
467 if (atomic_read(&dentry->d_count)) { 501 if (atomic_read(&dentry->d_count)) {
468 spin_unlock(&dentry->d_lock); 502 spin_unlock(&dentry->d_lock);
469 continue; 503 continue;
470 } 504 }
471 /* If the dentry was recently referenced, don't free it. */ 505 prune_one_dentry(dentry);
472 if (dentry->d_flags & DCACHE_REFERENCED) { 506 /* dentry->d_lock was dropped in prune_one_dentry() */
473 dentry->d_flags &= ~DCACHE_REFERENCED; 507 cond_resched_lock(&dcache_lock);
474 list_add(&dentry->d_lru, &dentry_unused); 508 }
475 dentry_stat.nr_unused++; 509 if (count == NULL && !list_empty(&sb->s_dentry_lru))
476 spin_unlock(&dentry->d_lock); 510 goto restart;
511 if (count != NULL)
512 *count = cnt;
513 if (!list_empty(&referenced))
514 list_splice(&referenced, &sb->s_dentry_lru);
515 spin_unlock(&dcache_lock);
516}
517
518/**
519 * prune_dcache - shrink the dcache
520 * @count: number of entries to try to free
521 *
522 * Shrink the dcache. This is done when we need more memory, or simply when we
523 * need to unmount something (at which point we need to unuse all dentries).
524 *
525 * This function may fail to free any resources if all the dentries are in use.
526 */
527static void prune_dcache(int count)
528{
529 struct super_block *sb;
530 int w_count;
531 int unused = dentry_stat.nr_unused;
532 int prune_ratio;
533 int pruned;
534
535 if (unused == 0 || count == 0)
536 return;
537 spin_lock(&dcache_lock);
538restart:
539 if (count >= unused)
540 prune_ratio = 1;
541 else
542 prune_ratio = unused / count;
543 spin_lock(&sb_lock);
544 list_for_each_entry(sb, &super_blocks, s_list) {
545 if (sb->s_nr_dentry_unused == 0)
477 continue; 546 continue;
478 } 547 sb->s_count++;
479 /* 548 /* Now, we reclaim unused dentrins with fairness.
480 * If the dentry is not DCACHED_REFERENCED, it is time 549 * We reclaim them same percentage from each superblock.
481 * to remove it from the dcache, provided the super block is 550 * We calculate number of dentries to scan on this sb
482 * NULL (which means we are trying to reclaim memory) 551 * as follows, but the implementation is arranged to avoid
483 * or this dentry belongs to the same super block that 552 * overflows:
484 * we want to shrink. 553 * number of dentries to scan on this sb =
485 */ 554 * count * (number of dentries on this sb /
486 /* 555 * number of dentries in the machine)
487 * If this dentry is for "my" filesystem, then I can prune it
488 * without taking the s_umount lock (I already hold it).
489 */ 556 */
490 if (sb && dentry->d_sb == sb) { 557 spin_unlock(&sb_lock);
491 prune_one_dentry(dentry); 558 if (prune_ratio != 1)
492 continue; 559 w_count = (sb->s_nr_dentry_unused / prune_ratio) + 1;
493 } 560 else
561 w_count = sb->s_nr_dentry_unused;
562 pruned = w_count;
494 /* 563 /*
495 * ...otherwise we need to be sure this filesystem isn't being 564 * We need to be sure this filesystem isn't being unmounted,
496 * unmounted, otherwise we could race with 565 * otherwise we could race with generic_shutdown_super(), and
497 * generic_shutdown_super(), and end up holding a reference to 566 * end up holding a reference to an inode while the filesystem
498 * an inode while the filesystem is unmounted. 567 * is unmounted. So we try to get s_umount, and make sure
499 * So we try to get s_umount, and make sure s_root isn't NULL. 568 * s_root isn't NULL.
500 * (Take a local copy of s_umount to avoid a use-after-free of
501 * `dentry').
502 */ 569 */
503 s_umount = &dentry->d_sb->s_umount; 570 if (down_read_trylock(&sb->s_umount)) {
504 if (down_read_trylock(s_umount)) { 571 if ((sb->s_root != NULL) &&
505 if (dentry->d_sb->s_root != NULL) { 572 (!list_empty(&sb->s_dentry_lru))) {
506 prune_one_dentry(dentry); 573 spin_unlock(&dcache_lock);
507 up_read(s_umount); 574 __shrink_dcache_sb(sb, &w_count,
508 continue; 575 DCACHE_REFERENCED);
576 pruned -= w_count;
577 spin_lock(&dcache_lock);
509 } 578 }
510 up_read(s_umount); 579 up_read(&sb->s_umount);
511 } 580 }
512 spin_unlock(&dentry->d_lock); 581 spin_lock(&sb_lock);
582 count -= pruned;
513 /* 583 /*
514 * Insert dentry at the head of the list as inserting at the 584 * restart only when sb is no longer on the list and
515 * tail leads to a cycle. 585 * we have more work to do.
516 */ 586 */
517 list_add(&dentry->d_lru, &dentry_unused); 587 if (__put_super_and_need_restart(sb) && count > 0) {
518 dentry_stat.nr_unused++; 588 spin_unlock(&sb_lock);
589 goto restart;
590 }
519 } 591 }
592 spin_unlock(&sb_lock);
520 spin_unlock(&dcache_lock); 593 spin_unlock(&dcache_lock);
521} 594}
522 595
523/*
524 * Shrink the dcache for the specified super block.
525 * This allows us to unmount a device without disturbing
526 * the dcache for the other devices.
527 *
528 * This implementation makes just two traversals of the
529 * unused list. On the first pass we move the selected
530 * dentries to the most recent end, and on the second
531 * pass we free them. The second pass must restart after
532 * each dput(), but since the target dentries are all at
533 * the end, it's really just a single traversal.
534 */
535
536/** 596/**
537 * shrink_dcache_sb - shrink dcache for a superblock 597 * shrink_dcache_sb - shrink dcache for a superblock
538 * @sb: superblock 598 * @sb: superblock
@@ -541,44 +601,9 @@ static void prune_dcache(int count, struct super_block *sb)
541 * is used to free the dcache before unmounting a file 601 * is used to free the dcache before unmounting a file
542 * system 602 * system
543 */ 603 */
544
545void shrink_dcache_sb(struct super_block * sb) 604void shrink_dcache_sb(struct super_block * sb)
546{ 605{
547 struct list_head *tmp, *next; 606 __shrink_dcache_sb(sb, NULL, 0);
548 struct dentry *dentry;
549
550 /*
551 * Pass one ... move the dentries for the specified
552 * superblock to the most recent end of the unused list.
553 */
554 spin_lock(&dcache_lock);
555 list_for_each_prev_safe(tmp, next, &dentry_unused) {
556 dentry = list_entry(tmp, struct dentry, d_lru);
557 if (dentry->d_sb != sb)
558 continue;
559 list_move_tail(tmp, &dentry_unused);
560 }
561
562 /*
563 * Pass two ... free the dentries for this superblock.
564 */
565repeat:
566 list_for_each_prev_safe(tmp, next, &dentry_unused) {
567 dentry = list_entry(tmp, struct dentry, d_lru);
568 if (dentry->d_sb != sb)
569 continue;
570 dentry_stat.nr_unused--;
571 list_del_init(tmp);
572 spin_lock(&dentry->d_lock);
573 if (atomic_read(&dentry->d_count)) {
574 spin_unlock(&dentry->d_lock);
575 continue;
576 }
577 prune_one_dentry(dentry);
578 cond_resched_lock(&dcache_lock);
579 goto repeat;
580 }
581 spin_unlock(&dcache_lock);
582} 607}
583 608
584/* 609/*
@@ -595,7 +620,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
595 620
596 /* detach this root from the system */ 621 /* detach this root from the system */
597 spin_lock(&dcache_lock); 622 spin_lock(&dcache_lock);
598 dentry_lru_remove(dentry); 623 dentry_lru_del_init(dentry);
599 __d_drop(dentry); 624 __d_drop(dentry);
600 spin_unlock(&dcache_lock); 625 spin_unlock(&dcache_lock);
601 626
@@ -609,7 +634,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
609 spin_lock(&dcache_lock); 634 spin_lock(&dcache_lock);
610 list_for_each_entry(loop, &dentry->d_subdirs, 635 list_for_each_entry(loop, &dentry->d_subdirs,
611 d_u.d_child) { 636 d_u.d_child) {
612 dentry_lru_remove(loop); 637 dentry_lru_del_init(loop);
613 __d_drop(loop); 638 __d_drop(loop);
614 cond_resched_lock(&dcache_lock); 639 cond_resched_lock(&dcache_lock);
615 } 640 }
@@ -791,14 +816,13 @@ resume:
791 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); 816 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
792 next = tmp->next; 817 next = tmp->next;
793 818
794 dentry_lru_remove(dentry); 819 dentry_lru_del_init(dentry);
795 /* 820 /*
796 * move only zero ref count dentries to the end 821 * move only zero ref count dentries to the end
797 * of the unused list for prune_dcache 822 * of the unused list for prune_dcache
798 */ 823 */
799 if (!atomic_read(&dentry->d_count)) { 824 if (!atomic_read(&dentry->d_count)) {
800 list_add_tail(&dentry->d_lru, &dentry_unused); 825 dentry_lru_add_tail(dentry);
801 dentry_stat.nr_unused++;
802 found++; 826 found++;
803 } 827 }
804 828
@@ -840,10 +864,11 @@ out:
840 864
841void shrink_dcache_parent(struct dentry * parent) 865void shrink_dcache_parent(struct dentry * parent)
842{ 866{
867 struct super_block *sb = parent->d_sb;
843 int found; 868 int found;
844 869
845 while ((found = select_parent(parent)) != 0) 870 while ((found = select_parent(parent)) != 0)
846 prune_dcache(found, parent->d_sb); 871 __shrink_dcache_sb(sb, &found, 0);
847} 872}
848 873
849/* 874/*
@@ -863,7 +888,7 @@ static int shrink_dcache_memory(int nr, gfp_t gfp_mask)
863 if (nr) { 888 if (nr) {
864 if (!(gfp_mask & __GFP_FS)) 889 if (!(gfp_mask & __GFP_FS))
865 return -1; 890 return -1;
866 prune_dcache(nr, NULL); 891 prune_dcache(nr);
867 } 892 }
868 return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure; 893 return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
869} 894}
@@ -1215,7 +1240,7 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
1215 * rcu_read_lock() and rcu_read_unlock() are used to disable preemption while 1240 * rcu_read_lock() and rcu_read_unlock() are used to disable preemption while
1216 * lookup is going on. 1241 * lookup is going on.
1217 * 1242 *
1218 * dentry_unused list is not updated even if lookup finds the required dentry 1243 * The dentry unused LRU is not updated even if lookup finds the required dentry
1219 * in there. It is updated in places such as prune_dcache, shrink_dcache_sb, 1244 * in there. It is updated in places such as prune_dcache, shrink_dcache_sb,
1220 * select_parent and __dget_locked. This laziness saves lookup from dcache_lock 1245 * select_parent and __dget_locked. This laziness saves lookup from dcache_lock
1221 * acquisition. 1246 * acquisition.
diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c
index 78878c5781ca..eba87ff3177b 100644
--- a/fs/dlm/plock.c
+++ b/fs/dlm/plock.c
@@ -116,7 +116,7 @@ int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
116 if (xop->callback == NULL) 116 if (xop->callback == NULL)
117 wait_event(recv_wq, (op->done != 0)); 117 wait_event(recv_wq, (op->done != 0));
118 else { 118 else {
119 rv = -EINPROGRESS; 119 rv = FILE_LOCK_DEFERRED;
120 goto out; 120 goto out;
121 } 121 }
122 122
diff --git a/fs/dquot.c b/fs/dquot.c
index 5ac77da19959..1346eebe74ce 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -562,6 +562,8 @@ static struct shrinker dqcache_shrinker = {
562 */ 562 */
563static void dqput(struct dquot *dquot) 563static void dqput(struct dquot *dquot)
564{ 564{
565 int ret;
566
565 if (!dquot) 567 if (!dquot)
566 return; 568 return;
567#ifdef __DQUOT_PARANOIA 569#ifdef __DQUOT_PARANOIA
@@ -594,7 +596,19 @@ we_slept:
594 if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags) && dquot_dirty(dquot)) { 596 if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags) && dquot_dirty(dquot)) {
595 spin_unlock(&dq_list_lock); 597 spin_unlock(&dq_list_lock);
596 /* Commit dquot before releasing */ 598 /* Commit dquot before releasing */
597 dquot->dq_sb->dq_op->write_dquot(dquot); 599 ret = dquot->dq_sb->dq_op->write_dquot(dquot);
600 if (ret < 0) {
601 printk(KERN_ERR "VFS: cannot write quota structure on "
602 "device %s (error %d). Quota may get out of "
603 "sync!\n", dquot->dq_sb->s_id, ret);
604 /*
605 * We clear dirty bit anyway, so that we avoid
606 * infinite loop here
607 */
608 spin_lock(&dq_list_lock);
609 clear_dquot_dirty(dquot);
610 spin_unlock(&dq_list_lock);
611 }
598 goto we_slept; 612 goto we_slept;
599 } 613 }
600 /* Clear flag in case dquot was inactive (something bad happened) */ 614 /* Clear flag in case dquot was inactive (something bad happened) */
@@ -875,7 +889,10 @@ static void print_warning(struct dquot *dquot, const int warntype)
875 char *msg = NULL; 889 char *msg = NULL;
876 struct tty_struct *tty; 890 struct tty_struct *tty;
877 891
878 if (!need_print_warning(dquot)) 892 if (warntype == QUOTA_NL_IHARDBELOW ||
893 warntype == QUOTA_NL_ISOFTBELOW ||
894 warntype == QUOTA_NL_BHARDBELOW ||
895 warntype == QUOTA_NL_BSOFTBELOW || !need_print_warning(dquot))
879 return; 896 return;
880 897
881 mutex_lock(&tty_mutex); 898 mutex_lock(&tty_mutex);
@@ -1083,6 +1100,35 @@ static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *war
1083 return QUOTA_OK; 1100 return QUOTA_OK;
1084} 1101}
1085 1102
1103static int info_idq_free(struct dquot *dquot, ulong inodes)
1104{
1105 if (test_bit(DQ_FAKE_B, &dquot->dq_flags) ||
1106 dquot->dq_dqb.dqb_curinodes <= dquot->dq_dqb.dqb_isoftlimit)
1107 return QUOTA_NL_NOWARN;
1108
1109 if (dquot->dq_dqb.dqb_curinodes - inodes <= dquot->dq_dqb.dqb_isoftlimit)
1110 return QUOTA_NL_ISOFTBELOW;
1111 if (dquot->dq_dqb.dqb_curinodes >= dquot->dq_dqb.dqb_ihardlimit &&
1112 dquot->dq_dqb.dqb_curinodes - inodes < dquot->dq_dqb.dqb_ihardlimit)
1113 return QUOTA_NL_IHARDBELOW;
1114 return QUOTA_NL_NOWARN;
1115}
1116
1117static int info_bdq_free(struct dquot *dquot, qsize_t space)
1118{
1119 if (test_bit(DQ_FAKE_B, &dquot->dq_flags) ||
1120 toqb(dquot->dq_dqb.dqb_curspace) <= dquot->dq_dqb.dqb_bsoftlimit)
1121 return QUOTA_NL_NOWARN;
1122
1123 if (toqb(dquot->dq_dqb.dqb_curspace - space) <=
1124 dquot->dq_dqb.dqb_bsoftlimit)
1125 return QUOTA_NL_BSOFTBELOW;
1126 if (toqb(dquot->dq_dqb.dqb_curspace) >= dquot->dq_dqb.dqb_bhardlimit &&
1127 toqb(dquot->dq_dqb.dqb_curspace - space) <
1128 dquot->dq_dqb.dqb_bhardlimit)
1129 return QUOTA_NL_BHARDBELOW;
1130 return QUOTA_NL_NOWARN;
1131}
1086/* 1132/*
1087 * Initialize quota pointers in inode 1133 * Initialize quota pointers in inode
1088 * Transaction must be started at entry 1134 * Transaction must be started at entry
@@ -1139,6 +1185,28 @@ int dquot_drop(struct inode *inode)
1139 return 0; 1185 return 0;
1140} 1186}
1141 1187
1188/* Wrapper to remove references to quota structures from inode */
1189void vfs_dq_drop(struct inode *inode)
1190{
1191 /* Here we can get arbitrary inode from clear_inode() so we have
1192 * to be careful. OTOH we don't need locking as quota operations
1193 * are allowed to change only at mount time */
1194 if (!IS_NOQUOTA(inode) && inode->i_sb && inode->i_sb->dq_op
1195 && inode->i_sb->dq_op->drop) {
1196 int cnt;
1197 /* Test before calling to rule out calls from proc and such
1198 * where we are not allowed to block. Note that this is
1199 * actually reliable test even without the lock - the caller
1200 * must assure that nobody can come after the DQUOT_DROP and
1201 * add quota pointers back anyway */
1202 for (cnt = 0; cnt < MAXQUOTAS; cnt++)
1203 if (inode->i_dquot[cnt] != NODQUOT)
1204 break;
1205 if (cnt < MAXQUOTAS)
1206 inode->i_sb->dq_op->drop(inode);
1207 }
1208}
1209
1142/* 1210/*
1143 * Following four functions update i_blocks+i_bytes fields and 1211 * Following four functions update i_blocks+i_bytes fields and
1144 * quota information (together with appropriate checks) 1212 * quota information (together with appropriate checks)
@@ -1248,6 +1316,7 @@ warn_put_all:
1248int dquot_free_space(struct inode *inode, qsize_t number) 1316int dquot_free_space(struct inode *inode, qsize_t number)
1249{ 1317{
1250 unsigned int cnt; 1318 unsigned int cnt;
1319 char warntype[MAXQUOTAS];
1251 1320
1252 /* First test before acquiring mutex - solves deadlocks when we 1321 /* First test before acquiring mutex - solves deadlocks when we
1253 * re-enter the quota code and are already holding the mutex */ 1322 * re-enter the quota code and are already holding the mutex */
@@ -1256,6 +1325,7 @@ out_sub:
1256 inode_sub_bytes(inode, number); 1325 inode_sub_bytes(inode, number);
1257 return QUOTA_OK; 1326 return QUOTA_OK;
1258 } 1327 }
1328
1259 down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); 1329 down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1260 /* Now recheck reliably when holding dqptr_sem */ 1330 /* Now recheck reliably when holding dqptr_sem */
1261 if (IS_NOQUOTA(inode)) { 1331 if (IS_NOQUOTA(inode)) {
@@ -1266,6 +1336,7 @@ out_sub:
1266 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 1336 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
1267 if (inode->i_dquot[cnt] == NODQUOT) 1337 if (inode->i_dquot[cnt] == NODQUOT)
1268 continue; 1338 continue;
1339 warntype[cnt] = info_bdq_free(inode->i_dquot[cnt], number);
1269 dquot_decr_space(inode->i_dquot[cnt], number); 1340 dquot_decr_space(inode->i_dquot[cnt], number);
1270 } 1341 }
1271 inode_sub_bytes(inode, number); 1342 inode_sub_bytes(inode, number);
@@ -1274,6 +1345,7 @@ out_sub:
1274 for (cnt = 0; cnt < MAXQUOTAS; cnt++) 1345 for (cnt = 0; cnt < MAXQUOTAS; cnt++)
1275 if (inode->i_dquot[cnt]) 1346 if (inode->i_dquot[cnt])
1276 mark_dquot_dirty(inode->i_dquot[cnt]); 1347 mark_dquot_dirty(inode->i_dquot[cnt]);
1348 flush_warnings(inode->i_dquot, warntype);
1277 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); 1349 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1278 return QUOTA_OK; 1350 return QUOTA_OK;
1279} 1351}
@@ -1284,11 +1356,13 @@ out_sub:
1284int dquot_free_inode(const struct inode *inode, unsigned long number) 1356int dquot_free_inode(const struct inode *inode, unsigned long number)
1285{ 1357{
1286 unsigned int cnt; 1358 unsigned int cnt;
1359 char warntype[MAXQUOTAS];
1287 1360
1288 /* First test before acquiring mutex - solves deadlocks when we 1361 /* First test before acquiring mutex - solves deadlocks when we
1289 * re-enter the quota code and are already holding the mutex */ 1362 * re-enter the quota code and are already holding the mutex */
1290 if (IS_NOQUOTA(inode)) 1363 if (IS_NOQUOTA(inode))
1291 return QUOTA_OK; 1364 return QUOTA_OK;
1365
1292 down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); 1366 down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1293 /* Now recheck reliably when holding dqptr_sem */ 1367 /* Now recheck reliably when holding dqptr_sem */
1294 if (IS_NOQUOTA(inode)) { 1368 if (IS_NOQUOTA(inode)) {
@@ -1299,6 +1373,7 @@ int dquot_free_inode(const struct inode *inode, unsigned long number)
1299 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 1373 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
1300 if (inode->i_dquot[cnt] == NODQUOT) 1374 if (inode->i_dquot[cnt] == NODQUOT)
1301 continue; 1375 continue;
1376 warntype[cnt] = info_idq_free(inode->i_dquot[cnt], number);
1302 dquot_decr_inodes(inode->i_dquot[cnt], number); 1377 dquot_decr_inodes(inode->i_dquot[cnt], number);
1303 } 1378 }
1304 spin_unlock(&dq_data_lock); 1379 spin_unlock(&dq_data_lock);
@@ -1306,6 +1381,7 @@ int dquot_free_inode(const struct inode *inode, unsigned long number)
1306 for (cnt = 0; cnt < MAXQUOTAS; cnt++) 1381 for (cnt = 0; cnt < MAXQUOTAS; cnt++)
1307 if (inode->i_dquot[cnt]) 1382 if (inode->i_dquot[cnt])
1308 mark_dquot_dirty(inode->i_dquot[cnt]); 1383 mark_dquot_dirty(inode->i_dquot[cnt]);
1384 flush_warnings(inode->i_dquot, warntype);
1309 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); 1385 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1310 return QUOTA_OK; 1386 return QUOTA_OK;
1311} 1387}
@@ -1323,7 +1399,8 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
1323 struct dquot *transfer_to[MAXQUOTAS]; 1399 struct dquot *transfer_to[MAXQUOTAS];
1324 int cnt, ret = NO_QUOTA, chuid = (iattr->ia_valid & ATTR_UID) && inode->i_uid != iattr->ia_uid, 1400 int cnt, ret = NO_QUOTA, chuid = (iattr->ia_valid & ATTR_UID) && inode->i_uid != iattr->ia_uid,
1325 chgid = (iattr->ia_valid & ATTR_GID) && inode->i_gid != iattr->ia_gid; 1401 chgid = (iattr->ia_valid & ATTR_GID) && inode->i_gid != iattr->ia_gid;
1326 char warntype[MAXQUOTAS]; 1402 char warntype_to[MAXQUOTAS];
1403 char warntype_from_inodes[MAXQUOTAS], warntype_from_space[MAXQUOTAS];
1327 1404
1328 /* First test before acquiring mutex - solves deadlocks when we 1405 /* First test before acquiring mutex - solves deadlocks when we
1329 * re-enter the quota code and are already holding the mutex */ 1406 * re-enter the quota code and are already holding the mutex */
@@ -1332,7 +1409,7 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
1332 /* Clear the arrays */ 1409 /* Clear the arrays */
1333 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 1410 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
1334 transfer_to[cnt] = transfer_from[cnt] = NODQUOT; 1411 transfer_to[cnt] = transfer_from[cnt] = NODQUOT;
1335 warntype[cnt] = QUOTA_NL_NOWARN; 1412 warntype_to[cnt] = QUOTA_NL_NOWARN;
1336 } 1413 }
1337 down_write(&sb_dqopt(inode->i_sb)->dqptr_sem); 1414 down_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
1338 /* Now recheck reliably when holding dqptr_sem */ 1415 /* Now recheck reliably when holding dqptr_sem */
@@ -1364,8 +1441,9 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
1364 if (transfer_to[cnt] == NODQUOT) 1441 if (transfer_to[cnt] == NODQUOT)
1365 continue; 1442 continue;
1366 transfer_from[cnt] = inode->i_dquot[cnt]; 1443 transfer_from[cnt] = inode->i_dquot[cnt];
1367 if (check_idq(transfer_to[cnt], 1, warntype+cnt) == NO_QUOTA || 1444 if (check_idq(transfer_to[cnt], 1, warntype_to + cnt) ==
1368 check_bdq(transfer_to[cnt], space, 0, warntype+cnt) == NO_QUOTA) 1445 NO_QUOTA || check_bdq(transfer_to[cnt], space, 0,
1446 warntype_to + cnt) == NO_QUOTA)
1369 goto warn_put_all; 1447 goto warn_put_all;
1370 } 1448 }
1371 1449
@@ -1381,6 +1459,10 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
1381 1459
1382 /* Due to IO error we might not have transfer_from[] structure */ 1460 /* Due to IO error we might not have transfer_from[] structure */
1383 if (transfer_from[cnt]) { 1461 if (transfer_from[cnt]) {
1462 warntype_from_inodes[cnt] =
1463 info_idq_free(transfer_from[cnt], 1);
1464 warntype_from_space[cnt] =
1465 info_bdq_free(transfer_from[cnt], space);
1384 dquot_decr_inodes(transfer_from[cnt], 1); 1466 dquot_decr_inodes(transfer_from[cnt], 1);
1385 dquot_decr_space(transfer_from[cnt], space); 1467 dquot_decr_space(transfer_from[cnt], space);
1386 } 1468 }
@@ -1400,7 +1482,9 @@ warn_put_all:
1400 if (transfer_to[cnt]) 1482 if (transfer_to[cnt])
1401 mark_dquot_dirty(transfer_to[cnt]); 1483 mark_dquot_dirty(transfer_to[cnt]);
1402 } 1484 }
1403 flush_warnings(transfer_to, warntype); 1485 flush_warnings(transfer_to, warntype_to);
1486 flush_warnings(transfer_from, warntype_from_inodes);
1487 flush_warnings(transfer_from, warntype_from_space);
1404 1488
1405 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 1489 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
1406 if (ret == QUOTA_OK && transfer_from[cnt] != NODQUOT) 1490 if (ret == QUOTA_OK && transfer_from[cnt] != NODQUOT)
@@ -1412,6 +1496,18 @@ warn_put_all:
1412 return ret; 1496 return ret;
1413} 1497}
1414 1498
1499/* Wrapper for transferring ownership of an inode */
1500int vfs_dq_transfer(struct inode *inode, struct iattr *iattr)
1501{
1502 if (sb_any_quota_enabled(inode->i_sb) && !IS_NOQUOTA(inode)) {
1503 vfs_dq_init(inode);
1504 if (inode->i_sb->dq_op->transfer(inode, iattr) == NO_QUOTA)
1505 return 1;
1506 }
1507 return 0;
1508}
1509
1510
1415/* 1511/*
1416 * Write info of quota file to disk 1512 * Write info of quota file to disk
1417 */ 1513 */
@@ -1752,6 +1848,22 @@ out:
1752 return error; 1848 return error;
1753} 1849}
1754 1850
1851/* Wrapper to turn on quotas when remounting rw */
1852int vfs_dq_quota_on_remount(struct super_block *sb)
1853{
1854 int cnt;
1855 int ret = 0, err;
1856
1857 if (!sb->s_qcop || !sb->s_qcop->quota_on)
1858 return -ENOSYS;
1859 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
1860 err = sb->s_qcop->quota_on(sb, cnt, 0, NULL, 1);
1861 if (err < 0 && !ret)
1862 ret = err;
1863 }
1864 return ret;
1865}
1866
1755/* Generic routine for getting common part of quota structure */ 1867/* Generic routine for getting common part of quota structure */
1756static void do_get_dqblk(struct dquot *dquot, struct if_dqblk *di) 1868static void do_get_dqblk(struct dquot *dquot, struct if_dqblk *di)
1757{ 1869{
@@ -2087,8 +2199,11 @@ EXPORT_SYMBOL(dquot_release);
2087EXPORT_SYMBOL(dquot_mark_dquot_dirty); 2199EXPORT_SYMBOL(dquot_mark_dquot_dirty);
2088EXPORT_SYMBOL(dquot_initialize); 2200EXPORT_SYMBOL(dquot_initialize);
2089EXPORT_SYMBOL(dquot_drop); 2201EXPORT_SYMBOL(dquot_drop);
2202EXPORT_SYMBOL(vfs_dq_drop);
2090EXPORT_SYMBOL(dquot_alloc_space); 2203EXPORT_SYMBOL(dquot_alloc_space);
2091EXPORT_SYMBOL(dquot_alloc_inode); 2204EXPORT_SYMBOL(dquot_alloc_inode);
2092EXPORT_SYMBOL(dquot_free_space); 2205EXPORT_SYMBOL(dquot_free_space);
2093EXPORT_SYMBOL(dquot_free_inode); 2206EXPORT_SYMBOL(dquot_free_inode);
2094EXPORT_SYMBOL(dquot_transfer); 2207EXPORT_SYMBOL(dquot_transfer);
2208EXPORT_SYMBOL(vfs_dq_transfer);
2209EXPORT_SYMBOL(vfs_dq_quota_on_remount);
diff --git a/fs/ecryptfs/Makefile b/fs/ecryptfs/Makefile
index 1e34a7fd4884..b4755a85996e 100644
--- a/fs/ecryptfs/Makefile
+++ b/fs/ecryptfs/Makefile
@@ -4,4 +4,4 @@
4 4
5obj-$(CONFIG_ECRYPT_FS) += ecryptfs.o 5obj-$(CONFIG_ECRYPT_FS) += ecryptfs.o
6 6
7ecryptfs-objs := dentry.o file.o inode.o main.o super.o mmap.o read_write.o crypto.o keystore.o messaging.o netlink.o miscdev.o debug.o 7ecryptfs-objs := dentry.o file.o inode.o main.o super.o mmap.o read_write.o crypto.o keystore.o messaging.o netlink.o miscdev.o kthread.o debug.o
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index e2832bc7869a..7b99917ffadc 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -33,6 +33,7 @@
33#include <linux/crypto.h> 33#include <linux/crypto.h>
34#include <linux/file.h> 34#include <linux/file.h>
35#include <linux/scatterlist.h> 35#include <linux/scatterlist.h>
36#include <asm/unaligned.h>
36#include "ecryptfs_kernel.h" 37#include "ecryptfs_kernel.h"
37 38
38static int 39static int
@@ -1032,10 +1033,8 @@ static int contains_ecryptfs_marker(char *data)
1032{ 1033{
1033 u32 m_1, m_2; 1034 u32 m_1, m_2;
1034 1035
1035 memcpy(&m_1, data, 4); 1036 m_1 = get_unaligned_be32(data);
1036 m_1 = be32_to_cpu(m_1); 1037 m_2 = get_unaligned_be32(data + 4);
1037 memcpy(&m_2, (data + 4), 4);
1038 m_2 = be32_to_cpu(m_2);
1039 if ((m_1 ^ MAGIC_ECRYPTFS_MARKER) == m_2) 1038 if ((m_1 ^ MAGIC_ECRYPTFS_MARKER) == m_2)
1040 return 1; 1039 return 1;
1041 ecryptfs_printk(KERN_DEBUG, "m_1 = [0x%.8x]; m_2 = [0x%.8x]; " 1040 ecryptfs_printk(KERN_DEBUG, "m_1 = [0x%.8x]; m_2 = [0x%.8x]; "
@@ -1073,8 +1072,7 @@ static int ecryptfs_process_flags(struct ecryptfs_crypt_stat *crypt_stat,
1073 int i; 1072 int i;
1074 u32 flags; 1073 u32 flags;
1075 1074
1076 memcpy(&flags, page_virt, 4); 1075 flags = get_unaligned_be32(page_virt);
1077 flags = be32_to_cpu(flags);
1078 for (i = 0; i < ((sizeof(ecryptfs_flag_map) 1076 for (i = 0; i < ((sizeof(ecryptfs_flag_map)
1079 / sizeof(struct ecryptfs_flag_map_elem))); i++) 1077 / sizeof(struct ecryptfs_flag_map_elem))); i++)
1080 if (flags & ecryptfs_flag_map[i].file_flag) { 1078 if (flags & ecryptfs_flag_map[i].file_flag) {
@@ -1100,11 +1098,9 @@ static void write_ecryptfs_marker(char *page_virt, size_t *written)
1100 1098
1101 get_random_bytes(&m_1, (MAGIC_ECRYPTFS_MARKER_SIZE_BYTES / 2)); 1099 get_random_bytes(&m_1, (MAGIC_ECRYPTFS_MARKER_SIZE_BYTES / 2));
1102 m_2 = (m_1 ^ MAGIC_ECRYPTFS_MARKER); 1100 m_2 = (m_1 ^ MAGIC_ECRYPTFS_MARKER);
1103 m_1 = cpu_to_be32(m_1); 1101 put_unaligned_be32(m_1, page_virt);
1104 memcpy(page_virt, &m_1, (MAGIC_ECRYPTFS_MARKER_SIZE_BYTES / 2)); 1102 page_virt += (MAGIC_ECRYPTFS_MARKER_SIZE_BYTES / 2);
1105 m_2 = cpu_to_be32(m_2); 1103 put_unaligned_be32(m_2, page_virt);
1106 memcpy(page_virt + (MAGIC_ECRYPTFS_MARKER_SIZE_BYTES / 2), &m_2,
1107 (MAGIC_ECRYPTFS_MARKER_SIZE_BYTES / 2));
1108 (*written) = MAGIC_ECRYPTFS_MARKER_SIZE_BYTES; 1104 (*written) = MAGIC_ECRYPTFS_MARKER_SIZE_BYTES;
1109} 1105}
1110 1106
@@ -1121,8 +1117,7 @@ write_ecryptfs_flags(char *page_virt, struct ecryptfs_crypt_stat *crypt_stat,
1121 flags |= ecryptfs_flag_map[i].file_flag; 1117 flags |= ecryptfs_flag_map[i].file_flag;
1122 /* Version is in top 8 bits of the 32-bit flag vector */ 1118 /* Version is in top 8 bits of the 32-bit flag vector */
1123 flags |= ((((u8)crypt_stat->file_version) << 24) & 0xFF000000); 1119 flags |= ((((u8)crypt_stat->file_version) << 24) & 0xFF000000);
1124 flags = cpu_to_be32(flags); 1120 put_unaligned_be32(flags, page_virt);
1125 memcpy(page_virt, &flags, 4);
1126 (*written) = 4; 1121 (*written) = 4;
1127} 1122}
1128 1123
@@ -1238,11 +1233,9 @@ ecryptfs_write_header_metadata(char *virt,
1238 num_header_extents_at_front = 1233 num_header_extents_at_front =
1239 (u16)(crypt_stat->num_header_bytes_at_front 1234 (u16)(crypt_stat->num_header_bytes_at_front
1240 / crypt_stat->extent_size); 1235 / crypt_stat->extent_size);
1241 header_extent_size = cpu_to_be32(header_extent_size); 1236 put_unaligned_be32(header_extent_size, virt);
1242 memcpy(virt, &header_extent_size, 4);
1243 virt += 4; 1237 virt += 4;
1244 num_header_extents_at_front = cpu_to_be16(num_header_extents_at_front); 1238 put_unaligned_be16(num_header_extents_at_front, virt);
1245 memcpy(virt, &num_header_extents_at_front, 2);
1246 (*written) = 6; 1239 (*written) = 6;
1247} 1240}
1248 1241
@@ -1410,15 +1403,13 @@ static int parse_header_metadata(struct ecryptfs_crypt_stat *crypt_stat,
1410 u32 header_extent_size; 1403 u32 header_extent_size;
1411 u16 num_header_extents_at_front; 1404 u16 num_header_extents_at_front;
1412 1405
1413 memcpy(&header_extent_size, virt, sizeof(u32)); 1406 header_extent_size = get_unaligned_be32(virt);
1414 header_extent_size = be32_to_cpu(header_extent_size); 1407 virt += sizeof(__be32);
1415 virt += sizeof(u32); 1408 num_header_extents_at_front = get_unaligned_be16(virt);
1416 memcpy(&num_header_extents_at_front, virt, sizeof(u16));
1417 num_header_extents_at_front = be16_to_cpu(num_header_extents_at_front);
1418 crypt_stat->num_header_bytes_at_front = 1409 crypt_stat->num_header_bytes_at_front =
1419 (((size_t)num_header_extents_at_front 1410 (((size_t)num_header_extents_at_front
1420 * (size_t)header_extent_size)); 1411 * (size_t)header_extent_size));
1421 (*bytes_read) = (sizeof(u32) + sizeof(u16)); 1412 (*bytes_read) = (sizeof(__be32) + sizeof(__be16));
1422 if ((validate_header_size == ECRYPTFS_VALIDATE_HEADER_SIZE) 1413 if ((validate_header_size == ECRYPTFS_VALIDATE_HEADER_SIZE)
1423 && (crypt_stat->num_header_bytes_at_front 1414 && (crypt_stat->num_header_bytes_at_front
1424 < ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE)) { 1415 < ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE)) {
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index c15c25745e05..b73fb752c5f8 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -559,10 +559,25 @@ extern struct kmem_cache *ecryptfs_key_record_cache;
559extern struct kmem_cache *ecryptfs_key_sig_cache; 559extern struct kmem_cache *ecryptfs_key_sig_cache;
560extern struct kmem_cache *ecryptfs_global_auth_tok_cache; 560extern struct kmem_cache *ecryptfs_global_auth_tok_cache;
561extern struct kmem_cache *ecryptfs_key_tfm_cache; 561extern struct kmem_cache *ecryptfs_key_tfm_cache;
562extern struct kmem_cache *ecryptfs_open_req_cache;
562 563
564struct ecryptfs_open_req {
565#define ECRYPTFS_REQ_PROCESSED 0x00000001
566#define ECRYPTFS_REQ_DROPPED 0x00000002
567#define ECRYPTFS_REQ_ZOMBIE 0x00000004
568 u32 flags;
569 struct file **lower_file;
570 struct dentry *lower_dentry;
571 struct vfsmount *lower_mnt;
572 wait_queue_head_t wait;
573 struct mutex mux;
574 struct list_head kthread_ctl_list;
575};
576
577#define ECRYPTFS_INTERPOSE_FLAG_D_ADD 0x00000001
563int ecryptfs_interpose(struct dentry *hidden_dentry, 578int ecryptfs_interpose(struct dentry *hidden_dentry,
564 struct dentry *this_dentry, struct super_block *sb, 579 struct dentry *this_dentry, struct super_block *sb,
565 int flag); 580 u32 flags);
566int ecryptfs_fill_zeros(struct file *file, loff_t new_length); 581int ecryptfs_fill_zeros(struct file *file, loff_t new_length);
567int ecryptfs_decode_filename(struct ecryptfs_crypt_stat *crypt_stat, 582int ecryptfs_decode_filename(struct ecryptfs_crypt_stat *crypt_stat,
568 const char *name, int length, 583 const char *name, int length,
@@ -690,5 +705,11 @@ void ecryptfs_msg_ctx_alloc_to_free(struct ecryptfs_msg_ctx *msg_ctx);
690int 705int
691ecryptfs_spawn_daemon(struct ecryptfs_daemon **daemon, uid_t euid, 706ecryptfs_spawn_daemon(struct ecryptfs_daemon **daemon, uid_t euid,
692 struct user_namespace *user_ns, struct pid *pid); 707 struct user_namespace *user_ns, struct pid *pid);
708int ecryptfs_init_kthread(void);
709void ecryptfs_destroy_kthread(void);
710int ecryptfs_privileged_open(struct file **lower_file,
711 struct dentry *lower_dentry,
712 struct vfsmount *lower_mnt);
713int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry);
693 714
694#endif /* #ifndef ECRYPTFS_KERNEL_H */ 715#endif /* #ifndef ECRYPTFS_KERNEL_H */
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 24749bf0668f..9244d653743e 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -192,6 +192,23 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
192 | ECRYPTFS_ENCRYPTED); 192 | ECRYPTFS_ENCRYPTED);
193 } 193 }
194 mutex_unlock(&crypt_stat->cs_mutex); 194 mutex_unlock(&crypt_stat->cs_mutex);
195 if ((ecryptfs_inode_to_private(inode)->lower_file->f_flags & O_RDONLY)
196 && !(file->f_flags & O_RDONLY)) {
197 rc = -EPERM;
198 printk(KERN_WARNING "%s: Lower persistent file is RO; eCryptfs "
199 "file must hence be opened RO\n", __func__);
200 goto out;
201 }
202 if (!ecryptfs_inode_to_private(inode)->lower_file) {
203 rc = ecryptfs_init_persistent_file(ecryptfs_dentry);
204 if (rc) {
205 printk(KERN_ERR "%s: Error attempting to initialize "
206 "the persistent file for the dentry with name "
207 "[%s]; rc = [%d]\n", __func__,
208 ecryptfs_dentry->d_name.name, rc);
209 goto out;
210 }
211 }
195 ecryptfs_set_file_lower( 212 ecryptfs_set_file_lower(
196 file, ecryptfs_inode_to_private(inode)->lower_file); 213 file, ecryptfs_inode_to_private(inode)->lower_file);
197 if (S_ISDIR(ecryptfs_dentry->d_inode->i_mode)) { 214 if (S_ISDIR(ecryptfs_dentry->d_inode->i_mode)) {
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index c92cc1c00aae..d755455e3bff 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -31,6 +31,7 @@
31#include <linux/mount.h> 31#include <linux/mount.h>
32#include <linux/crypto.h> 32#include <linux/crypto.h>
33#include <linux/fs_stack.h> 33#include <linux/fs_stack.h>
34#include <asm/unaligned.h>
34#include "ecryptfs_kernel.h" 35#include "ecryptfs_kernel.h"
35 36
36static struct dentry *lock_parent(struct dentry *dentry) 37static struct dentry *lock_parent(struct dentry *dentry)
@@ -188,6 +189,16 @@ static int ecryptfs_initialize_file(struct dentry *ecryptfs_dentry)
188 "context; rc = [%d]\n", rc); 189 "context; rc = [%d]\n", rc);
189 goto out; 190 goto out;
190 } 191 }
192 if (!ecryptfs_inode_to_private(ecryptfs_dentry->d_inode)->lower_file) {
193 rc = ecryptfs_init_persistent_file(ecryptfs_dentry);
194 if (rc) {
195 printk(KERN_ERR "%s: Error attempting to initialize "
196 "the persistent file for the dentry with name "
197 "[%s]; rc = [%d]\n", __func__,
198 ecryptfs_dentry->d_name.name, rc);
199 goto out;
200 }
201 }
191 rc = ecryptfs_write_metadata(ecryptfs_dentry); 202 rc = ecryptfs_write_metadata(ecryptfs_dentry);
192 if (rc) { 203 if (rc) {
193 printk(KERN_ERR "Error writing headers; rc = [%d]\n", rc); 204 printk(KERN_ERR "Error writing headers; rc = [%d]\n", rc);
@@ -307,10 +318,11 @@ static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry,
307 d_add(dentry, NULL); 318 d_add(dentry, NULL);
308 goto out; 319 goto out;
309 } 320 }
310 rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb, 1); 321 rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb,
322 ECRYPTFS_INTERPOSE_FLAG_D_ADD);
311 if (rc) { 323 if (rc) {
312 ecryptfs_printk(KERN_ERR, "Error interposing\n"); 324 ecryptfs_printk(KERN_ERR, "Error interposing\n");
313 goto out_dput; 325 goto out;
314 } 326 }
315 if (S_ISDIR(lower_inode->i_mode)) { 327 if (S_ISDIR(lower_inode->i_mode)) {
316 ecryptfs_printk(KERN_DEBUG, "Is a directory; returning\n"); 328 ecryptfs_printk(KERN_DEBUG, "Is a directory; returning\n");
@@ -336,11 +348,21 @@ static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry,
336 rc = -ENOMEM; 348 rc = -ENOMEM;
337 ecryptfs_printk(KERN_ERR, 349 ecryptfs_printk(KERN_ERR,
338 "Cannot ecryptfs_kmalloc a page\n"); 350 "Cannot ecryptfs_kmalloc a page\n");
339 goto out_dput; 351 goto out;
340 } 352 }
341 crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat; 353 crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat;
342 if (!(crypt_stat->flags & ECRYPTFS_POLICY_APPLIED)) 354 if (!(crypt_stat->flags & ECRYPTFS_POLICY_APPLIED))
343 ecryptfs_set_default_sizes(crypt_stat); 355 ecryptfs_set_default_sizes(crypt_stat);
356 if (!ecryptfs_inode_to_private(dentry->d_inode)->lower_file) {
357 rc = ecryptfs_init_persistent_file(dentry);
358 if (rc) {
359 printk(KERN_ERR "%s: Error attempting to initialize "
360 "the persistent file for the dentry with name "
361 "[%s]; rc = [%d]\n", __func__,
362 dentry->d_name.name, rc);
363 goto out;
364 }
365 }
344 rc = ecryptfs_read_and_validate_header_region(page_virt, 366 rc = ecryptfs_read_and_validate_header_region(page_virt,
345 dentry->d_inode); 367 dentry->d_inode);
346 if (rc) { 368 if (rc) {
@@ -364,8 +386,7 @@ static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry,
364 else 386 else
365 file_size = i_size_read(lower_dentry->d_inode); 387 file_size = i_size_read(lower_dentry->d_inode);
366 } else { 388 } else {
367 memcpy(&file_size, page_virt, sizeof(file_size)); 389 file_size = get_unaligned_be64(page_virt);
368 file_size = be64_to_cpu(file_size);
369 } 390 }
370 i_size_write(dentry->d_inode, (loff_t)file_size); 391 i_size_write(dentry->d_inode, (loff_t)file_size);
371 kmem_cache_free(ecryptfs_header_cache_2, page_virt); 392 kmem_cache_free(ecryptfs_header_cache_2, page_virt);
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index e82b457180be..f5b76a331b9c 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -44,15 +44,15 @@ static int process_request_key_err(long err_code)
44 int rc = 0; 44 int rc = 0;
45 45
46 switch (err_code) { 46 switch (err_code) {
47 case ENOKEY: 47 case -ENOKEY:
48 ecryptfs_printk(KERN_WARNING, "No key\n"); 48 ecryptfs_printk(KERN_WARNING, "No key\n");
49 rc = -ENOENT; 49 rc = -ENOENT;
50 break; 50 break;
51 case EKEYEXPIRED: 51 case -EKEYEXPIRED:
52 ecryptfs_printk(KERN_WARNING, "Key expired\n"); 52 ecryptfs_printk(KERN_WARNING, "Key expired\n");
53 rc = -ETIME; 53 rc = -ETIME;
54 break; 54 break;
55 case EKEYREVOKED: 55 case -EKEYREVOKED:
56 ecryptfs_printk(KERN_WARNING, "Key revoked\n"); 56 ecryptfs_printk(KERN_WARNING, "Key revoked\n");
57 rc = -EINVAL; 57 rc = -EINVAL;
58 break; 58 break;
@@ -963,8 +963,7 @@ int ecryptfs_keyring_auth_tok_for_sig(struct key **auth_tok_key,
963 if (!(*auth_tok_key) || IS_ERR(*auth_tok_key)) { 963 if (!(*auth_tok_key) || IS_ERR(*auth_tok_key)) {
964 printk(KERN_ERR "Could not find key with description: [%s]\n", 964 printk(KERN_ERR "Could not find key with description: [%s]\n",
965 sig); 965 sig);
966 process_request_key_err(PTR_ERR(*auth_tok_key)); 966 rc = process_request_key_err(PTR_ERR(*auth_tok_key));
967 rc = -EINVAL;
968 goto out; 967 goto out;
969 } 968 }
970 (*auth_tok) = ecryptfs_get_key_payload_data(*auth_tok_key); 969 (*auth_tok) = ecryptfs_get_key_payload_data(*auth_tok_key);
diff --git a/fs/ecryptfs/kthread.c b/fs/ecryptfs/kthread.c
new file mode 100644
index 000000000000..c440c6b58b2d
--- /dev/null
+++ b/fs/ecryptfs/kthread.c
@@ -0,0 +1,203 @@
1/**
2 * eCryptfs: Linux filesystem encryption layer
3 *
4 * Copyright (C) 2008 International Business Machines Corp.
5 * Author(s): Michael A. Halcrow <mahalcro@us.ibm.com>
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License as
9 * published by the Free Software Foundation; either version 2 of the
10 * License, or (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
20 * 02111-1307, USA.
21 */
22
23#include <linux/kthread.h>
24#include <linux/freezer.h>
25#include <linux/wait.h>
26#include <linux/mount.h>
27#include "ecryptfs_kernel.h"
28
29struct kmem_cache *ecryptfs_open_req_cache;
30
31static struct ecryptfs_kthread_ctl {
32#define ECRYPTFS_KTHREAD_ZOMBIE 0x00000001
33 u32 flags;
34 struct mutex mux;
35 struct list_head req_list;
36 wait_queue_head_t wait;
37} ecryptfs_kthread_ctl;
38
39static struct task_struct *ecryptfs_kthread;
40
41/**
42 * ecryptfs_threadfn
43 * @ignored: ignored
44 *
45 * The eCryptfs kernel thread that has the responsibility of getting
46 * the lower persistent file with RW permissions.
47 *
48 * Returns zero on success; non-zero otherwise
49 */
50static int ecryptfs_threadfn(void *ignored)
51{
52 set_freezable();
53 while (1) {
54 struct ecryptfs_open_req *req;
55
56 wait_event_freezable(
57 ecryptfs_kthread_ctl.wait,
58 (!list_empty(&ecryptfs_kthread_ctl.req_list)
59 || kthread_should_stop()));
60 mutex_lock(&ecryptfs_kthread_ctl.mux);
61 if (ecryptfs_kthread_ctl.flags & ECRYPTFS_KTHREAD_ZOMBIE) {
62 mutex_unlock(&ecryptfs_kthread_ctl.mux);
63 goto out;
64 }
65 while (!list_empty(&ecryptfs_kthread_ctl.req_list)) {
66 req = list_first_entry(&ecryptfs_kthread_ctl.req_list,
67 struct ecryptfs_open_req,
68 kthread_ctl_list);
69 mutex_lock(&req->mux);
70 list_del(&req->kthread_ctl_list);
71 if (!(req->flags & ECRYPTFS_REQ_ZOMBIE)) {
72 dget(req->lower_dentry);
73 mntget(req->lower_mnt);
74 (*req->lower_file) = dentry_open(
75 req->lower_dentry, req->lower_mnt,
76 (O_RDWR | O_LARGEFILE));
77 req->flags |= ECRYPTFS_REQ_PROCESSED;
78 }
79 wake_up(&req->wait);
80 mutex_unlock(&req->mux);
81 }
82 mutex_unlock(&ecryptfs_kthread_ctl.mux);
83 }
84out:
85 return 0;
86}
87
88int ecryptfs_init_kthread(void)
89{
90 int rc = 0;
91
92 mutex_init(&ecryptfs_kthread_ctl.mux);
93 init_waitqueue_head(&ecryptfs_kthread_ctl.wait);
94 INIT_LIST_HEAD(&ecryptfs_kthread_ctl.req_list);
95 ecryptfs_kthread = kthread_run(&ecryptfs_threadfn, NULL,
96 "ecryptfs-kthread");
97 if (IS_ERR(ecryptfs_kthread)) {
98 rc = PTR_ERR(ecryptfs_kthread);
99 printk(KERN_ERR "%s: Failed to create kernel thread; rc = [%d]"
100 "\n", __func__, rc);
101 }
102 return rc;
103}
104
105void ecryptfs_destroy_kthread(void)
106{
107 struct ecryptfs_open_req *req;
108
109 mutex_lock(&ecryptfs_kthread_ctl.mux);
110 ecryptfs_kthread_ctl.flags |= ECRYPTFS_KTHREAD_ZOMBIE;
111 list_for_each_entry(req, &ecryptfs_kthread_ctl.req_list,
112 kthread_ctl_list) {
113 mutex_lock(&req->mux);
114 req->flags |= ECRYPTFS_REQ_ZOMBIE;
115 wake_up(&req->wait);
116 mutex_unlock(&req->mux);
117 }
118 mutex_unlock(&ecryptfs_kthread_ctl.mux);
119 kthread_stop(ecryptfs_kthread);
120 wake_up(&ecryptfs_kthread_ctl.wait);
121}
122
123/**
124 * ecryptfs_privileged_open
125 * @lower_file: Result of dentry_open by root on lower dentry
126 * @lower_dentry: Lower dentry for file to open
127 * @lower_mnt: Lower vfsmount for file to open
128 *
129 * This function gets a r/w file opened againt the lower dentry.
130 *
131 * Returns zero on success; non-zero otherwise
132 */
133int ecryptfs_privileged_open(struct file **lower_file,
134 struct dentry *lower_dentry,
135 struct vfsmount *lower_mnt)
136{
137 struct ecryptfs_open_req *req;
138 int rc = 0;
139
140 /* Corresponding dput() and mntput() are done when the
141 * persistent file is fput() when the eCryptfs inode is
142 * destroyed. */
143 dget(lower_dentry);
144 mntget(lower_mnt);
145 (*lower_file) = dentry_open(lower_dentry, lower_mnt,
146 (O_RDWR | O_LARGEFILE));
147 if (!IS_ERR(*lower_file))
148 goto out;
149 req = kmem_cache_alloc(ecryptfs_open_req_cache, GFP_KERNEL);
150 if (!req) {
151 rc = -ENOMEM;
152 goto out;
153 }
154 mutex_init(&req->mux);
155 req->lower_file = lower_file;
156 req->lower_dentry = lower_dentry;
157 req->lower_mnt = lower_mnt;
158 init_waitqueue_head(&req->wait);
159 req->flags = 0;
160 mutex_lock(&ecryptfs_kthread_ctl.mux);
161 if (ecryptfs_kthread_ctl.flags & ECRYPTFS_KTHREAD_ZOMBIE) {
162 rc = -EIO;
163 mutex_unlock(&ecryptfs_kthread_ctl.mux);
164 printk(KERN_ERR "%s: We are in the middle of shutting down; "
165 "aborting privileged request to open lower file\n",
166 __func__);
167 goto out_free;
168 }
169 list_add_tail(&req->kthread_ctl_list, &ecryptfs_kthread_ctl.req_list);
170 mutex_unlock(&ecryptfs_kthread_ctl.mux);
171 wake_up(&ecryptfs_kthread_ctl.wait);
172 wait_event(req->wait, (req->flags != 0));
173 mutex_lock(&req->mux);
174 BUG_ON(req->flags == 0);
175 if (req->flags & ECRYPTFS_REQ_DROPPED
176 || req->flags & ECRYPTFS_REQ_ZOMBIE) {
177 rc = -EIO;
178 printk(KERN_WARNING "%s: Privileged open request dropped\n",
179 __func__);
180 goto out_unlock;
181 }
182 if (IS_ERR(*req->lower_file)) {
183 rc = PTR_ERR(*req->lower_file);
184 dget(lower_dentry);
185 mntget(lower_mnt);
186 (*lower_file) = dentry_open(lower_dentry, lower_mnt,
187 (O_RDONLY | O_LARGEFILE));
188 if (IS_ERR(*lower_file)) {
189 rc = PTR_ERR(*req->lower_file);
190 (*lower_file) = NULL;
191 printk(KERN_WARNING "%s: Error attempting privileged "
192 "open of lower file with either RW or RO "
193 "perms; rc = [%d]. Giving up.\n",
194 __func__, rc);
195 }
196 }
197out_unlock:
198 mutex_unlock(&req->mux);
199out_free:
200 kmem_cache_free(ecryptfs_open_req_cache, req);
201out:
202 return rc;
203}
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index d603631601eb..6f403cfba14f 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -117,7 +117,7 @@ void __ecryptfs_printk(const char *fmt, ...)
117 * 117 *
118 * Returns zero on success; non-zero otherwise 118 * Returns zero on success; non-zero otherwise
119 */ 119 */
120static int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry) 120int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry)
121{ 121{
122 struct ecryptfs_inode_info *inode_info = 122 struct ecryptfs_inode_info *inode_info =
123 ecryptfs_inode_to_private(ecryptfs_dentry->d_inode); 123 ecryptfs_inode_to_private(ecryptfs_dentry->d_inode);
@@ -130,26 +130,12 @@ static int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry)
130 ecryptfs_dentry_to_lower_mnt(ecryptfs_dentry); 130 ecryptfs_dentry_to_lower_mnt(ecryptfs_dentry);
131 131
132 lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry); 132 lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry);
133 /* Corresponding dput() and mntput() are done when the 133 rc = ecryptfs_privileged_open(&inode_info->lower_file,
134 * persistent file is fput() when the eCryptfs inode 134 lower_dentry, lower_mnt);
135 * is destroyed. */ 135 if (rc || IS_ERR(inode_info->lower_file)) {
136 dget(lower_dentry);
137 mntget(lower_mnt);
138 inode_info->lower_file = dentry_open(lower_dentry,
139 lower_mnt,
140 (O_RDWR | O_LARGEFILE));
141 if (IS_ERR(inode_info->lower_file)) {
142 dget(lower_dentry);
143 mntget(lower_mnt);
144 inode_info->lower_file = dentry_open(lower_dentry,
145 lower_mnt,
146 (O_RDONLY
147 | O_LARGEFILE));
148 }
149 if (IS_ERR(inode_info->lower_file)) {
150 printk(KERN_ERR "Error opening lower persistent file " 136 printk(KERN_ERR "Error opening lower persistent file "
151 "for lower_dentry [0x%p] and lower_mnt [0x%p]\n", 137 "for lower_dentry [0x%p] and lower_mnt [0x%p]; "
152 lower_dentry, lower_mnt); 138 "rc = [%d]\n", lower_dentry, lower_mnt, rc);
153 rc = PTR_ERR(inode_info->lower_file); 139 rc = PTR_ERR(inode_info->lower_file);
154 inode_info->lower_file = NULL; 140 inode_info->lower_file = NULL;
155 } 141 }
@@ -163,14 +149,14 @@ static int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry)
163 * @lower_dentry: Existing dentry in the lower filesystem 149 * @lower_dentry: Existing dentry in the lower filesystem
164 * @dentry: ecryptfs' dentry 150 * @dentry: ecryptfs' dentry
165 * @sb: ecryptfs's super_block 151 * @sb: ecryptfs's super_block
166 * @flag: If set to true, then d_add is called, else d_instantiate is called 152 * @flags: flags to govern behavior of interpose procedure
167 * 153 *
168 * Interposes upper and lower dentries. 154 * Interposes upper and lower dentries.
169 * 155 *
170 * Returns zero on success; non-zero otherwise 156 * Returns zero on success; non-zero otherwise
171 */ 157 */
172int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry, 158int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry,
173 struct super_block *sb, int flag) 159 struct super_block *sb, u32 flags)
174{ 160{
175 struct inode *lower_inode; 161 struct inode *lower_inode;
176 struct inode *inode; 162 struct inode *inode;
@@ -207,7 +193,7 @@ int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry,
207 init_special_inode(inode, lower_inode->i_mode, 193 init_special_inode(inode, lower_inode->i_mode,
208 lower_inode->i_rdev); 194 lower_inode->i_rdev);
209 dentry->d_op = &ecryptfs_dops; 195 dentry->d_op = &ecryptfs_dops;
210 if (flag) 196 if (flags & ECRYPTFS_INTERPOSE_FLAG_D_ADD)
211 d_add(dentry, inode); 197 d_add(dentry, inode);
212 else 198 else
213 d_instantiate(dentry, inode); 199 d_instantiate(dentry, inode);
@@ -215,13 +201,6 @@ int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry,
215 /* This size will be overwritten for real files w/ headers and 201 /* This size will be overwritten for real files w/ headers and
216 * other metadata */ 202 * other metadata */
217 fsstack_copy_inode_size(inode, lower_inode); 203 fsstack_copy_inode_size(inode, lower_inode);
218 rc = ecryptfs_init_persistent_file(dentry);
219 if (rc) {
220 printk(KERN_ERR "%s: Error attempting to initialize the "
221 "persistent file for the dentry with name [%s]; "
222 "rc = [%d]\n", __func__, dentry->d_name.name, rc);
223 goto out;
224 }
225out: 204out:
226 return rc; 205 return rc;
227} 206}
@@ -262,10 +241,11 @@ static int ecryptfs_init_global_auth_toks(
262 "session keyring for sig specified in mount " 241 "session keyring for sig specified in mount "
263 "option: [%s]\n", global_auth_tok->sig); 242 "option: [%s]\n", global_auth_tok->sig);
264 global_auth_tok->flags |= ECRYPTFS_AUTH_TOK_INVALID; 243 global_auth_tok->flags |= ECRYPTFS_AUTH_TOK_INVALID;
265 rc = 0; 244 goto out;
266 } else 245 } else
267 global_auth_tok->flags &= ~ECRYPTFS_AUTH_TOK_INVALID; 246 global_auth_tok->flags &= ~ECRYPTFS_AUTH_TOK_INVALID;
268 } 247 }
248out:
269 return rc; 249 return rc;
270} 250}
271 251
@@ -314,7 +294,6 @@ static int ecryptfs_parse_options(struct super_block *sb, char *options)
314 char *cipher_name_dst; 294 char *cipher_name_dst;
315 char *cipher_name_src; 295 char *cipher_name_src;
316 char *cipher_key_bytes_src; 296 char *cipher_key_bytes_src;
317 int cipher_name_len;
318 297
319 if (!options) { 298 if (!options) {
320 rc = -EINVAL; 299 rc = -EINVAL;
@@ -395,17 +374,12 @@ static int ecryptfs_parse_options(struct super_block *sb, char *options)
395 goto out; 374 goto out;
396 } 375 }
397 if (!cipher_name_set) { 376 if (!cipher_name_set) {
398 cipher_name_len = strlen(ECRYPTFS_DEFAULT_CIPHER); 377 int cipher_name_len = strlen(ECRYPTFS_DEFAULT_CIPHER);
399 if (unlikely(cipher_name_len 378
400 >= ECRYPTFS_MAX_CIPHER_NAME_SIZE)) { 379 BUG_ON(cipher_name_len >= ECRYPTFS_MAX_CIPHER_NAME_SIZE);
401 rc = -EINVAL; 380
402 BUG(); 381 strcpy(mount_crypt_stat->global_default_cipher_name,
403 goto out; 382 ECRYPTFS_DEFAULT_CIPHER);
404 }
405 memcpy(mount_crypt_stat->global_default_cipher_name,
406 ECRYPTFS_DEFAULT_CIPHER, cipher_name_len);
407 mount_crypt_stat->global_default_cipher_name[cipher_name_len]
408 = '\0';
409 } 383 }
410 if (!cipher_key_bytes_set) { 384 if (!cipher_key_bytes_set) {
411 mount_crypt_stat->global_default_cipher_key_size = 0; 385 mount_crypt_stat->global_default_cipher_key_size = 0;
@@ -430,7 +404,6 @@ static int ecryptfs_parse_options(struct super_block *sb, char *options)
430 printk(KERN_WARNING "One or more global auth toks could not " 404 printk(KERN_WARNING "One or more global auth toks could not "
431 "properly register; rc = [%d]\n", rc); 405 "properly register; rc = [%d]\n", rc);
432 } 406 }
433 rc = 0;
434out: 407out:
435 return rc; 408 return rc;
436} 409}
@@ -679,6 +652,11 @@ static struct ecryptfs_cache_info {
679 .name = "ecryptfs_key_tfm_cache", 652 .name = "ecryptfs_key_tfm_cache",
680 .size = sizeof(struct ecryptfs_key_tfm), 653 .size = sizeof(struct ecryptfs_key_tfm),
681 }, 654 },
655 {
656 .cache = &ecryptfs_open_req_cache,
657 .name = "ecryptfs_open_req_cache",
658 .size = sizeof(struct ecryptfs_open_req),
659 },
682}; 660};
683 661
684static void ecryptfs_free_kmem_caches(void) 662static void ecryptfs_free_kmem_caches(void)
@@ -795,11 +773,17 @@ static int __init ecryptfs_init(void)
795 printk(KERN_ERR "sysfs registration failed\n"); 773 printk(KERN_ERR "sysfs registration failed\n");
796 goto out_unregister_filesystem; 774 goto out_unregister_filesystem;
797 } 775 }
776 rc = ecryptfs_init_kthread();
777 if (rc) {
778 printk(KERN_ERR "%s: kthread initialization failed; "
779 "rc = [%d]\n", __func__, rc);
780 goto out_do_sysfs_unregistration;
781 }
798 rc = ecryptfs_init_messaging(ecryptfs_transport); 782 rc = ecryptfs_init_messaging(ecryptfs_transport);
799 if (rc) { 783 if (rc) {
800 ecryptfs_printk(KERN_ERR, "Failure occured while attempting to " 784 printk(KERN_ERR "Failure occured while attempting to "
801 "initialize the eCryptfs netlink socket\n"); 785 "initialize the eCryptfs netlink socket\n");
802 goto out_do_sysfs_unregistration; 786 goto out_destroy_kthread;
803 } 787 }
804 rc = ecryptfs_init_crypto(); 788 rc = ecryptfs_init_crypto();
805 if (rc) { 789 if (rc) {
@@ -814,6 +798,8 @@ static int __init ecryptfs_init(void)
814 goto out; 798 goto out;
815out_release_messaging: 799out_release_messaging:
816 ecryptfs_release_messaging(ecryptfs_transport); 800 ecryptfs_release_messaging(ecryptfs_transport);
801out_destroy_kthread:
802 ecryptfs_destroy_kthread();
817out_do_sysfs_unregistration: 803out_do_sysfs_unregistration:
818 do_sysfs_unregistration(); 804 do_sysfs_unregistration();
819out_unregister_filesystem: 805out_unregister_filesystem:
@@ -833,6 +819,7 @@ static void __exit ecryptfs_exit(void)
833 printk(KERN_ERR "Failure whilst attempting to destroy crypto; " 819 printk(KERN_ERR "Failure whilst attempting to destroy crypto; "
834 "rc = [%d]\n", rc); 820 "rc = [%d]\n", rc);
835 ecryptfs_release_messaging(ecryptfs_transport); 821 ecryptfs_release_messaging(ecryptfs_transport);
822 ecryptfs_destroy_kthread();
836 do_sysfs_unregistration(); 823 do_sysfs_unregistration();
837 unregister_filesystem(&ecryptfs_fs_type); 824 unregister_filesystem(&ecryptfs_fs_type);
838 ecryptfs_free_kmem_caches(); 825 ecryptfs_free_kmem_caches();
diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c
index 09a4522f65e6..b484792a0996 100644
--- a/fs/ecryptfs/miscdev.c
+++ b/fs/ecryptfs/miscdev.c
@@ -358,46 +358,6 @@ out_unlock_daemon:
358} 358}
359 359
360/** 360/**
361 * ecryptfs_miscdev_helo
362 * @euid: effective user id of miscdevess sending helo packet
363 * @user_ns: The namespace in which @euid applies
364 * @pid: miscdevess id of miscdevess sending helo packet
365 *
366 * Returns zero on success; non-zero otherwise
367 */
368static int ecryptfs_miscdev_helo(uid_t euid, struct user_namespace *user_ns,
369 struct pid *pid)
370{
371 int rc;
372
373 rc = ecryptfs_process_helo(ECRYPTFS_TRANSPORT_MISCDEV, euid, user_ns,
374 pid);
375 if (rc)
376 printk(KERN_WARNING "Error processing HELO; rc = [%d]\n", rc);
377 return rc;
378}
379
380/**
381 * ecryptfs_miscdev_quit
382 * @euid: effective user id of miscdevess sending quit packet
383 * @user_ns: The namespace in which @euid applies
384 * @pid: miscdevess id of miscdevess sending quit packet
385 *
386 * Returns zero on success; non-zero otherwise
387 */
388static int ecryptfs_miscdev_quit(uid_t euid, struct user_namespace *user_ns,
389 struct pid *pid)
390{
391 int rc;
392
393 rc = ecryptfs_process_quit(euid, user_ns, pid);
394 if (rc)
395 printk(KERN_WARNING
396 "Error processing QUIT message; rc = [%d]\n", rc);
397 return rc;
398}
399
400/**
401 * ecryptfs_miscdev_response - miscdevess response to message previously sent to daemon 361 * ecryptfs_miscdev_response - miscdevess response to message previously sent to daemon
402 * @data: Bytes comprising struct ecryptfs_message 362 * @data: Bytes comprising struct ecryptfs_message
403 * @data_size: sizeof(struct ecryptfs_message) + data len 363 * @data_size: sizeof(struct ecryptfs_message) + data len
@@ -512,26 +472,7 @@ ecryptfs_miscdev_write(struct file *file, const char __user *buf,
512 __func__, rc); 472 __func__, rc);
513 break; 473 break;
514 case ECRYPTFS_MSG_HELO: 474 case ECRYPTFS_MSG_HELO:
515 rc = ecryptfs_miscdev_helo(current->euid,
516 current->nsproxy->user_ns,
517 task_pid(current));
518 if (rc) {
519 printk(KERN_ERR "%s: Error attempting to process "
520 "helo from pid [0x%p]; rc = [%d]\n", __func__,
521 task_pid(current), rc);
522 goto out_free;
523 }
524 break;
525 case ECRYPTFS_MSG_QUIT: 475 case ECRYPTFS_MSG_QUIT:
526 rc = ecryptfs_miscdev_quit(current->euid,
527 current->nsproxy->user_ns,
528 task_pid(current));
529 if (rc) {
530 printk(KERN_ERR "%s: Error attempting to process "
531 "quit from pid [0x%p]; rc = [%d]\n", __func__,
532 task_pid(current), rc);
533 goto out_free;
534 }
535 break; 476 break;
536 default: 477 default:
537 ecryptfs_printk(KERN_WARNING, "Dropping miscdev " 478 ecryptfs_printk(KERN_WARNING, "Dropping miscdev "
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index 2b6fe1e6e8ba..245c2dc02d5c 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -32,6 +32,7 @@
32#include <linux/file.h> 32#include <linux/file.h>
33#include <linux/crypto.h> 33#include <linux/crypto.h>
34#include <linux/scatterlist.h> 34#include <linux/scatterlist.h>
35#include <asm/unaligned.h>
35#include "ecryptfs_kernel.h" 36#include "ecryptfs_kernel.h"
36 37
37/** 38/**
@@ -372,7 +373,6 @@ out:
372 */ 373 */
373static int ecryptfs_write_inode_size_to_header(struct inode *ecryptfs_inode) 374static int ecryptfs_write_inode_size_to_header(struct inode *ecryptfs_inode)
374{ 375{
375 u64 file_size;
376 char *file_size_virt; 376 char *file_size_virt;
377 int rc; 377 int rc;
378 378
@@ -381,9 +381,7 @@ static int ecryptfs_write_inode_size_to_header(struct inode *ecryptfs_inode)
381 rc = -ENOMEM; 381 rc = -ENOMEM;
382 goto out; 382 goto out;
383 } 383 }
384 file_size = (u64)i_size_read(ecryptfs_inode); 384 put_unaligned_be64(i_size_read(ecryptfs_inode), file_size_virt);
385 file_size = cpu_to_be64(file_size);
386 memcpy(file_size_virt, &file_size, sizeof(u64));
387 rc = ecryptfs_write_lower(ecryptfs_inode, file_size_virt, 0, 385 rc = ecryptfs_write_lower(ecryptfs_inode, file_size_virt, 0,
388 sizeof(u64)); 386 sizeof(u64));
389 kfree(file_size_virt); 387 kfree(file_size_virt);
@@ -403,7 +401,6 @@ static int ecryptfs_write_inode_size_to_xattr(struct inode *ecryptfs_inode)
403 struct dentry *lower_dentry = 401 struct dentry *lower_dentry =
404 ecryptfs_inode_to_private(ecryptfs_inode)->lower_file->f_dentry; 402 ecryptfs_inode_to_private(ecryptfs_inode)->lower_file->f_dentry;
405 struct inode *lower_inode = lower_dentry->d_inode; 403 struct inode *lower_inode = lower_dentry->d_inode;
406 u64 file_size;
407 int rc; 404 int rc;
408 405
409 if (!lower_inode->i_op->getxattr || !lower_inode->i_op->setxattr) { 406 if (!lower_inode->i_op->getxattr || !lower_inode->i_op->setxattr) {
@@ -424,9 +421,7 @@ static int ecryptfs_write_inode_size_to_xattr(struct inode *ecryptfs_inode)
424 xattr_virt, PAGE_CACHE_SIZE); 421 xattr_virt, PAGE_CACHE_SIZE);
425 if (size < 0) 422 if (size < 0)
426 size = 8; 423 size = 8;
427 file_size = (u64)i_size_read(ecryptfs_inode); 424 put_unaligned_be64(i_size_read(ecryptfs_inode), xattr_virt);
428 file_size = cpu_to_be64(file_size);
429 memcpy(xattr_virt, &file_size, sizeof(u64));
430 rc = lower_inode->i_op->setxattr(lower_dentry, ECRYPTFS_XATTR_NAME, 425 rc = lower_inode->i_op->setxattr(lower_dentry, ECRYPTFS_XATTR_NAME,
431 xattr_virt, size, 0); 426 xattr_virt, size, 0);
432 mutex_unlock(&lower_inode->i_mutex); 427 mutex_unlock(&lower_inode->i_mutex);
diff --git a/fs/eventfd.c b/fs/eventfd.c
index 343942deeec1..08bf558d0408 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -198,11 +198,18 @@ struct file *eventfd_fget(int fd)
198 return file; 198 return file;
199} 199}
200 200
201asmlinkage long sys_eventfd(unsigned int count) 201asmlinkage long sys_eventfd2(unsigned int count, int flags)
202{ 202{
203 int fd; 203 int fd;
204 struct eventfd_ctx *ctx; 204 struct eventfd_ctx *ctx;
205 205
206 /* Check the EFD_* constants for consistency. */
207 BUILD_BUG_ON(EFD_CLOEXEC != O_CLOEXEC);
208 BUILD_BUG_ON(EFD_NONBLOCK != O_NONBLOCK);
209
210 if (flags & ~(EFD_CLOEXEC | EFD_NONBLOCK))
211 return -EINVAL;
212
206 ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); 213 ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
207 if (!ctx) 214 if (!ctx)
208 return -ENOMEM; 215 return -ENOMEM;
@@ -214,9 +221,15 @@ asmlinkage long sys_eventfd(unsigned int count)
214 * When we call this, the initialization must be complete, since 221 * When we call this, the initialization must be complete, since
215 * anon_inode_getfd() will install the fd. 222 * anon_inode_getfd() will install the fd.
216 */ 223 */
217 fd = anon_inode_getfd("[eventfd]", &eventfd_fops, ctx); 224 fd = anon_inode_getfd("[eventfd]", &eventfd_fops, ctx,
225 flags & (O_CLOEXEC | O_NONBLOCK));
218 if (fd < 0) 226 if (fd < 0)
219 kfree(ctx); 227 kfree(ctx);
220 return fd; 228 return fd;
221} 229}
222 230
231asmlinkage long sys_eventfd(unsigned int count)
232{
233 return sys_eventfd2(count, 0);
234}
235
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 990c01d2d66b..0c87474f7917 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1046,20 +1046,25 @@ retry:
1046 * RB tree. With the current implementation, the "size" parameter is ignored 1046 * RB tree. With the current implementation, the "size" parameter is ignored
1047 * (besides sanity checks). 1047 * (besides sanity checks).
1048 */ 1048 */
1049asmlinkage long sys_epoll_create(int size) 1049asmlinkage long sys_epoll_create1(int flags)
1050{ 1050{
1051 int error, fd = -1; 1051 int error, fd = -1;
1052 struct eventpoll *ep; 1052 struct eventpoll *ep;
1053 1053
1054 /* Check the EPOLL_* constant for consistency. */
1055 BUILD_BUG_ON(EPOLL_CLOEXEC != O_CLOEXEC);
1056
1057 if (flags & ~EPOLL_CLOEXEC)
1058 return -EINVAL;
1059
1054 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d)\n", 1060 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d)\n",
1055 current, size)); 1061 current, flags));
1056 1062
1057 /* 1063 /*
1058 * Sanity check on the size parameter, and create the internal data 1064 * Create the internal data structure ( "struct eventpoll" ).
1059 * structure ( "struct eventpoll" ).
1060 */ 1065 */
1061 error = -EINVAL; 1066 error = ep_alloc(&ep);
1062 if (size <= 0 || (error = ep_alloc(&ep)) < 0) { 1067 if (error < 0) {
1063 fd = error; 1068 fd = error;
1064 goto error_return; 1069 goto error_return;
1065 } 1070 }
@@ -1068,17 +1073,26 @@ asmlinkage long sys_epoll_create(int size)
1068 * Creates all the items needed to setup an eventpoll file. That is, 1073 * Creates all the items needed to setup an eventpoll file. That is,
1069 * a file structure and a free file descriptor. 1074 * a file structure and a free file descriptor.
1070 */ 1075 */
1071 fd = anon_inode_getfd("[eventpoll]", &eventpoll_fops, ep); 1076 fd = anon_inode_getfd("[eventpoll]", &eventpoll_fops, ep,
1077 flags & O_CLOEXEC);
1072 if (fd < 0) 1078 if (fd < 0)
1073 ep_free(ep); 1079 ep_free(ep);
1074 1080
1075error_return: 1081error_return:
1076 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n", 1082 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n",
1077 current, size, fd)); 1083 current, flags, fd));
1078 1084
1079 return fd; 1085 return fd;
1080} 1086}
1081 1087
1088asmlinkage long sys_epoll_create(int size)
1089{
1090 if (size < 0)
1091 return -EINVAL;
1092
1093 return sys_epoll_create1(0);
1094}
1095
1082/* 1096/*
1083 * The following function implements the controller interface for 1097 * The following function implements the controller interface for
1084 * the eventpoll file that enables the insertion/removal/change of 1098 * the eventpoll file that enables the insertion/removal/change of
diff --git a/fs/exec.c b/fs/exec.c
index fd9234379e8d..5e559013e303 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -25,19 +25,18 @@
25#include <linux/slab.h> 25#include <linux/slab.h>
26#include <linux/file.h> 26#include <linux/file.h>
27#include <linux/fdtable.h> 27#include <linux/fdtable.h>
28#include <linux/mman.h> 28#include <linux/mm.h>
29#include <linux/stat.h> 29#include <linux/stat.h>
30#include <linux/fcntl.h> 30#include <linux/fcntl.h>
31#include <linux/smp_lock.h> 31#include <linux/smp_lock.h>
32#include <linux/swap.h>
32#include <linux/string.h> 33#include <linux/string.h>
33#include <linux/init.h> 34#include <linux/init.h>
34#include <linux/pagemap.h>
35#include <linux/highmem.h> 35#include <linux/highmem.h>
36#include <linux/spinlock.h> 36#include <linux/spinlock.h>
37#include <linux/key.h> 37#include <linux/key.h>
38#include <linux/personality.h> 38#include <linux/personality.h>
39#include <linux/binfmts.h> 39#include <linux/binfmts.h>
40#include <linux/swap.h>
41#include <linux/utsname.h> 40#include <linux/utsname.h>
42#include <linux/pid_namespace.h> 41#include <linux/pid_namespace.h>
43#include <linux/module.h> 42#include <linux/module.h>
@@ -47,7 +46,6 @@
47#include <linux/mount.h> 46#include <linux/mount.h>
48#include <linux/security.h> 47#include <linux/security.h>
49#include <linux/syscalls.h> 48#include <linux/syscalls.h>
50#include <linux/rmap.h>
51#include <linux/tsacct_kern.h> 49#include <linux/tsacct_kern.h>
52#include <linux/cn_proc.h> 50#include <linux/cn_proc.h>
53#include <linux/audit.h> 51#include <linux/audit.h>
@@ -541,7 +539,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
541 /* 539 /*
542 * when the old and new regions overlap clear from new_end. 540 * when the old and new regions overlap clear from new_end.
543 */ 541 */
544 free_pgd_range(&tlb, new_end, old_end, new_end, 542 free_pgd_range(tlb, new_end, old_end, new_end,
545 vma->vm_next ? vma->vm_next->vm_start : 0); 543 vma->vm_next ? vma->vm_next->vm_start : 0);
546 } else { 544 } else {
547 /* 545 /*
@@ -550,7 +548,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
550 * have constraints on va-space that make this illegal (IA64) - 548 * have constraints on va-space that make this illegal (IA64) -
551 * for the others its just a little faster. 549 * for the others its just a little faster.
552 */ 550 */
553 free_pgd_range(&tlb, old_start, old_end, new_end, 551 free_pgd_range(tlb, old_start, old_end, new_end,
554 vma->vm_next ? vma->vm_next->vm_start : 0); 552 vma->vm_next ? vma->vm_next->vm_start : 0);
555 } 553 }
556 tlb_finish_mmu(tlb, new_end, old_end); 554 tlb_finish_mmu(tlb, new_end, old_end);
@@ -724,12 +722,10 @@ static int exec_mmap(struct mm_struct *mm)
724 * Make sure that if there is a core dump in progress 722 * Make sure that if there is a core dump in progress
725 * for the old mm, we get out and die instead of going 723 * for the old mm, we get out and die instead of going
726 * through with the exec. We must hold mmap_sem around 724 * through with the exec. We must hold mmap_sem around
727 * checking core_waiters and changing tsk->mm. The 725 * checking core_state and changing tsk->mm.
728 * core-inducing thread will increment core_waiters for
729 * each thread whose ->mm == old_mm.
730 */ 726 */
731 down_read(&old_mm->mmap_sem); 727 down_read(&old_mm->mmap_sem);
732 if (unlikely(old_mm->core_waiters)) { 728 if (unlikely(old_mm->core_state)) {
733 up_read(&old_mm->mmap_sem); 729 up_read(&old_mm->mmap_sem);
734 return -EINTR; 730 return -EINTR;
735 } 731 }
@@ -1328,6 +1324,7 @@ int do_execve(char * filename,
1328 if (retval < 0) 1324 if (retval < 0)
1329 goto out; 1325 goto out;
1330 1326
1327 current->flags &= ~PF_KTHREAD;
1331 retval = search_binary_handler(bprm,regs); 1328 retval = search_binary_handler(bprm,regs);
1332 if (retval >= 0) { 1329 if (retval >= 0) {
1333 /* execve success */ 1330 /* execve success */
@@ -1382,17 +1379,14 @@ EXPORT_SYMBOL(set_binfmt);
1382 * name into corename, which must have space for at least 1379 * name into corename, which must have space for at least
1383 * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator. 1380 * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator.
1384 */ 1381 */
1385static int format_corename(char *corename, const char *pattern, long signr) 1382static int format_corename(char *corename, int nr_threads, long signr)
1386{ 1383{
1387 const char *pat_ptr = pattern; 1384 const char *pat_ptr = core_pattern;
1385 int ispipe = (*pat_ptr == '|');
1388 char *out_ptr = corename; 1386 char *out_ptr = corename;
1389 char *const out_end = corename + CORENAME_MAX_SIZE; 1387 char *const out_end = corename + CORENAME_MAX_SIZE;
1390 int rc; 1388 int rc;
1391 int pid_in_pattern = 0; 1389 int pid_in_pattern = 0;
1392 int ispipe = 0;
1393
1394 if (*pattern == '|')
1395 ispipe = 1;
1396 1390
1397 /* Repeat as long as we have more pattern to process and more output 1391 /* Repeat as long as we have more pattern to process and more output
1398 space */ 1392 space */
@@ -1493,7 +1487,7 @@ static int format_corename(char *corename, const char *pattern, long signr)
1493 * and core_uses_pid is set, then .%pid will be appended to 1487 * and core_uses_pid is set, then .%pid will be appended to
1494 * the filename. Do not do this for piped commands. */ 1488 * the filename. Do not do this for piped commands. */
1495 if (!ispipe && !pid_in_pattern 1489 if (!ispipe && !pid_in_pattern
1496 && (core_uses_pid || atomic_read(&current->mm->mm_users) != 1)) { 1490 && (core_uses_pid || nr_threads)) {
1497 rc = snprintf(out_ptr, out_end - out_ptr, 1491 rc = snprintf(out_ptr, out_end - out_ptr,
1498 ".%d", task_tgid_vnr(current)); 1492 ".%d", task_tgid_vnr(current));
1499 if (rc > out_end - out_ptr) 1493 if (rc > out_end - out_ptr)
@@ -1505,9 +1499,10 @@ out:
1505 return ispipe; 1499 return ispipe;
1506} 1500}
1507 1501
1508static void zap_process(struct task_struct *start) 1502static int zap_process(struct task_struct *start)
1509{ 1503{
1510 struct task_struct *t; 1504 struct task_struct *t;
1505 int nr = 0;
1511 1506
1512 start->signal->flags = SIGNAL_GROUP_EXIT; 1507 start->signal->flags = SIGNAL_GROUP_EXIT;
1513 start->signal->group_stop_count = 0; 1508 start->signal->group_stop_count = 0;
@@ -1515,72 +1510,99 @@ static void zap_process(struct task_struct *start)
1515 t = start; 1510 t = start;
1516 do { 1511 do {
1517 if (t != current && t->mm) { 1512 if (t != current && t->mm) {
1518 t->mm->core_waiters++;
1519 sigaddset(&t->pending.signal, SIGKILL); 1513 sigaddset(&t->pending.signal, SIGKILL);
1520 signal_wake_up(t, 1); 1514 signal_wake_up(t, 1);
1515 nr++;
1521 } 1516 }
1522 } while ((t = next_thread(t)) != start); 1517 } while_each_thread(start, t);
1518
1519 return nr;
1523} 1520}
1524 1521
1525static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm, 1522static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
1526 int exit_code) 1523 struct core_state *core_state, int exit_code)
1527{ 1524{
1528 struct task_struct *g, *p; 1525 struct task_struct *g, *p;
1529 unsigned long flags; 1526 unsigned long flags;
1530 int err = -EAGAIN; 1527 int nr = -EAGAIN;
1531 1528
1532 spin_lock_irq(&tsk->sighand->siglock); 1529 spin_lock_irq(&tsk->sighand->siglock);
1533 if (!signal_group_exit(tsk->signal)) { 1530 if (!signal_group_exit(tsk->signal)) {
1531 mm->core_state = core_state;
1534 tsk->signal->group_exit_code = exit_code; 1532 tsk->signal->group_exit_code = exit_code;
1535 zap_process(tsk); 1533 nr = zap_process(tsk);
1536 err = 0;
1537 } 1534 }
1538 spin_unlock_irq(&tsk->sighand->siglock); 1535 spin_unlock_irq(&tsk->sighand->siglock);
1539 if (err) 1536 if (unlikely(nr < 0))
1540 return err; 1537 return nr;
1541 1538
1542 if (atomic_read(&mm->mm_users) == mm->core_waiters + 1) 1539 if (atomic_read(&mm->mm_users) == nr + 1)
1543 goto done; 1540 goto done;
1544 1541 /*
1542 * We should find and kill all tasks which use this mm, and we should
1543 * count them correctly into ->nr_threads. We don't take tasklist
1544 * lock, but this is safe wrt:
1545 *
1546 * fork:
1547 * None of sub-threads can fork after zap_process(leader). All
1548 * processes which were created before this point should be
1549 * visible to zap_threads() because copy_process() adds the new
1550 * process to the tail of init_task.tasks list, and lock/unlock
1551 * of ->siglock provides a memory barrier.
1552 *
1553 * do_exit:
1554 * The caller holds mm->mmap_sem. This means that the task which
1555 * uses this mm can't pass exit_mm(), so it can't exit or clear
1556 * its ->mm.
1557 *
1558 * de_thread:
1559 * It does list_replace_rcu(&leader->tasks, &current->tasks),
1560 * we must see either old or new leader, this does not matter.
1561 * However, it can change p->sighand, so lock_task_sighand(p)
1562 * must be used. Since p->mm != NULL and we hold ->mmap_sem
1563 * it can't fail.
1564 *
1565 * Note also that "g" can be the old leader with ->mm == NULL
1566 * and already unhashed and thus removed from ->thread_group.
1567 * This is OK, __unhash_process()->list_del_rcu() does not
1568 * clear the ->next pointer, we will find the new leader via
1569 * next_thread().
1570 */
1545 rcu_read_lock(); 1571 rcu_read_lock();
1546 for_each_process(g) { 1572 for_each_process(g) {
1547 if (g == tsk->group_leader) 1573 if (g == tsk->group_leader)
1548 continue; 1574 continue;
1549 1575 if (g->flags & PF_KTHREAD)
1576 continue;
1550 p = g; 1577 p = g;
1551 do { 1578 do {
1552 if (p->mm) { 1579 if (p->mm) {
1553 if (p->mm == mm) { 1580 if (unlikely(p->mm == mm)) {
1554 /*
1555 * p->sighand can't disappear, but
1556 * may be changed by de_thread()
1557 */
1558 lock_task_sighand(p, &flags); 1581 lock_task_sighand(p, &flags);
1559 zap_process(p); 1582 nr += zap_process(p);
1560 unlock_task_sighand(p, &flags); 1583 unlock_task_sighand(p, &flags);
1561 } 1584 }
1562 break; 1585 break;
1563 } 1586 }
1564 } while ((p = next_thread(p)) != g); 1587 } while_each_thread(g, p);
1565 } 1588 }
1566 rcu_read_unlock(); 1589 rcu_read_unlock();
1567done: 1590done:
1568 return mm->core_waiters; 1591 atomic_set(&core_state->nr_threads, nr);
1592 return nr;
1569} 1593}
1570 1594
1571static int coredump_wait(int exit_code) 1595static int coredump_wait(int exit_code, struct core_state *core_state)
1572{ 1596{
1573 struct task_struct *tsk = current; 1597 struct task_struct *tsk = current;
1574 struct mm_struct *mm = tsk->mm; 1598 struct mm_struct *mm = tsk->mm;
1575 struct completion startup_done;
1576 struct completion *vfork_done; 1599 struct completion *vfork_done;
1577 int core_waiters; 1600 int core_waiters;
1578 1601
1579 init_completion(&mm->core_done); 1602 init_completion(&core_state->startup);
1580 init_completion(&startup_done); 1603 core_state->dumper.task = tsk;
1581 mm->core_startup_done = &startup_done; 1604 core_state->dumper.next = NULL;
1582 1605 core_waiters = zap_threads(tsk, mm, core_state, exit_code);
1583 core_waiters = zap_threads(tsk, mm, exit_code);
1584 up_write(&mm->mmap_sem); 1606 up_write(&mm->mmap_sem);
1585 1607
1586 if (unlikely(core_waiters < 0)) 1608 if (unlikely(core_waiters < 0))
@@ -1597,12 +1619,32 @@ static int coredump_wait(int exit_code)
1597 } 1619 }
1598 1620
1599 if (core_waiters) 1621 if (core_waiters)
1600 wait_for_completion(&startup_done); 1622 wait_for_completion(&core_state->startup);
1601fail: 1623fail:
1602 BUG_ON(mm->core_waiters);
1603 return core_waiters; 1624 return core_waiters;
1604} 1625}
1605 1626
1627static void coredump_finish(struct mm_struct *mm)
1628{
1629 struct core_thread *curr, *next;
1630 struct task_struct *task;
1631
1632 next = mm->core_state->dumper.next;
1633 while ((curr = next) != NULL) {
1634 next = curr->next;
1635 task = curr->task;
1636 /*
1637 * see exit_mm(), curr->task must not see
1638 * ->task == NULL before we read ->next.
1639 */
1640 smp_mb();
1641 curr->task = NULL;
1642 wake_up_process(task);
1643 }
1644
1645 mm->core_state = NULL;
1646}
1647
1606/* 1648/*
1607 * set_dumpable converts traditional three-value dumpable to two flags and 1649 * set_dumpable converts traditional three-value dumpable to two flags and
1608 * stores them into mm->flags. It modifies lower two bits of mm->flags, but 1650 * stores them into mm->flags. It modifies lower two bits of mm->flags, but
@@ -1654,6 +1696,7 @@ int get_dumpable(struct mm_struct *mm)
1654 1696
1655int do_coredump(long signr, int exit_code, struct pt_regs * regs) 1697int do_coredump(long signr, int exit_code, struct pt_regs * regs)
1656{ 1698{
1699 struct core_state core_state;
1657 char corename[CORENAME_MAX_SIZE + 1]; 1700 char corename[CORENAME_MAX_SIZE + 1];
1658 struct mm_struct *mm = current->mm; 1701 struct mm_struct *mm = current->mm;
1659 struct linux_binfmt * binfmt; 1702 struct linux_binfmt * binfmt;
@@ -1677,7 +1720,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
1677 /* 1720 /*
1678 * If another thread got here first, or we are not dumpable, bail out. 1721 * If another thread got here first, or we are not dumpable, bail out.
1679 */ 1722 */
1680 if (mm->core_waiters || !get_dumpable(mm)) { 1723 if (mm->core_state || !get_dumpable(mm)) {
1681 up_write(&mm->mmap_sem); 1724 up_write(&mm->mmap_sem);
1682 goto fail; 1725 goto fail;
1683 } 1726 }
@@ -1692,7 +1735,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
1692 current->fsuid = 0; /* Dump root private */ 1735 current->fsuid = 0; /* Dump root private */
1693 } 1736 }
1694 1737
1695 retval = coredump_wait(exit_code); 1738 retval = coredump_wait(exit_code, &core_state);
1696 if (retval < 0) 1739 if (retval < 0)
1697 goto fail; 1740 goto fail;
1698 1741
@@ -1707,7 +1750,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
1707 * uses lock_kernel() 1750 * uses lock_kernel()
1708 */ 1751 */
1709 lock_kernel(); 1752 lock_kernel();
1710 ispipe = format_corename(corename, core_pattern, signr); 1753 ispipe = format_corename(corename, retval, signr);
1711 unlock_kernel(); 1754 unlock_kernel();
1712 /* 1755 /*
1713 * Don't bother to check the RLIMIT_CORE value if core_pattern points 1756 * Don't bother to check the RLIMIT_CORE value if core_pattern points
@@ -1786,7 +1829,7 @@ fail_unlock:
1786 argv_free(helper_argv); 1829 argv_free(helper_argv);
1787 1830
1788 current->fsuid = fsuid; 1831 current->fsuid = fsuid;
1789 complete_all(&mm->core_done); 1832 coredump_finish(mm);
1790fail: 1833fail:
1791 return retval; 1834 return retval;
1792} 1835}
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index ef50cbc792db..31308a3b0b8b 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -31,6 +31,7 @@
31#include <linux/seq_file.h> 31#include <linux/seq_file.h>
32#include <linux/mount.h> 32#include <linux/mount.h>
33#include <linux/log2.h> 33#include <linux/log2.h>
34#include <linux/quotaops.h>
34#include <asm/uaccess.h> 35#include <asm/uaccess.h>
35#include "ext2.h" 36#include "ext2.h"
36#include "xattr.h" 37#include "xattr.h"
diff --git a/fs/ext2/xattr_security.c b/fs/ext2/xattr_security.c
index eaa23d2d5213..70c0dbdcdcb7 100644
--- a/fs/ext2/xattr_security.c
+++ b/fs/ext2/xattr_security.c
@@ -14,7 +14,7 @@ static size_t
14ext2_xattr_security_list(struct inode *inode, char *list, size_t list_size, 14ext2_xattr_security_list(struct inode *inode, char *list, size_t list_size,
15 const char *name, size_t name_len) 15 const char *name, size_t name_len)
16{ 16{
17 const int prefix_len = sizeof(XATTR_SECURITY_PREFIX)-1; 17 const int prefix_len = XATTR_SECURITY_PREFIX_LEN;
18 const size_t total_len = prefix_len + name_len + 1; 18 const size_t total_len = prefix_len + name_len + 1;
19 19
20 if (list && total_len <= list_size) { 20 if (list && total_len <= list_size) {
diff --git a/fs/ext2/xattr_trusted.c b/fs/ext2/xattr_trusted.c
index 83ee149f353d..e8219f8eae9f 100644
--- a/fs/ext2/xattr_trusted.c
+++ b/fs/ext2/xattr_trusted.c
@@ -12,13 +12,11 @@
12#include <linux/ext2_fs.h> 12#include <linux/ext2_fs.h>
13#include "xattr.h" 13#include "xattr.h"
14 14
15#define XATTR_TRUSTED_PREFIX "trusted."
16
17static size_t 15static size_t
18ext2_xattr_trusted_list(struct inode *inode, char *list, size_t list_size, 16ext2_xattr_trusted_list(struct inode *inode, char *list, size_t list_size,
19 const char *name, size_t name_len) 17 const char *name, size_t name_len)
20{ 18{
21 const int prefix_len = sizeof(XATTR_TRUSTED_PREFIX)-1; 19 const int prefix_len = XATTR_TRUSTED_PREFIX_LEN;
22 const size_t total_len = prefix_len + name_len + 1; 20 const size_t total_len = prefix_len + name_len + 1;
23 21
24 if (!capable(CAP_SYS_ADMIN)) 22 if (!capable(CAP_SYS_ADMIN))
diff --git a/fs/ext2/xattr_user.c b/fs/ext2/xattr_user.c
index f383e7c3a7b5..92495d28c62f 100644
--- a/fs/ext2/xattr_user.c
+++ b/fs/ext2/xattr_user.c
@@ -11,13 +11,11 @@
11#include "ext2.h" 11#include "ext2.h"
12#include "xattr.h" 12#include "xattr.h"
13 13
14#define XATTR_USER_PREFIX "user."
15
16static size_t 14static size_t
17ext2_xattr_user_list(struct inode *inode, char *list, size_t list_size, 15ext2_xattr_user_list(struct inode *inode, char *list, size_t list_size,
18 const char *name, size_t name_len) 16 const char *name, size_t name_len)
19{ 17{
20 const size_t prefix_len = sizeof(XATTR_USER_PREFIX)-1; 18 const size_t prefix_len = XATTR_USER_PREFIX_LEN;
21 const size_t total_len = prefix_len + name_len + 1; 19 const size_t total_len = prefix_len + name_len + 1;
22 20
23 if (!test_opt(inode->i_sb, XATTR_USER)) 21 if (!test_opt(inode->i_sb, XATTR_USER))
diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c
index 8ca3bfd72427..2eea96ec78ed 100644
--- a/fs/ext3/dir.c
+++ b/fs/ext3/dir.c
@@ -272,7 +272,7 @@ static void free_rb_tree_fname(struct rb_root *root)
272 272
273 while (n) { 273 while (n) {
274 /* Do the node's children first */ 274 /* Do the node's children first */
275 if ((n)->rb_left) { 275 if (n->rb_left) {
276 n = n->rb_left; 276 n = n->rb_left;
277 continue; 277 continue;
278 } 278 }
@@ -301,24 +301,18 @@ static void free_rb_tree_fname(struct rb_root *root)
301 parent->rb_right = NULL; 301 parent->rb_right = NULL;
302 n = parent; 302 n = parent;
303 } 303 }
304 root->rb_node = NULL;
305} 304}
306 305
307 306
308static struct dir_private_info *create_dir_info(loff_t pos) 307static struct dir_private_info *ext3_htree_create_dir_info(loff_t pos)
309{ 308{
310 struct dir_private_info *p; 309 struct dir_private_info *p;
311 310
312 p = kmalloc(sizeof(struct dir_private_info), GFP_KERNEL); 311 p = kzalloc(sizeof(struct dir_private_info), GFP_KERNEL);
313 if (!p) 312 if (!p)
314 return NULL; 313 return NULL;
315 p->root.rb_node = NULL;
316 p->curr_node = NULL;
317 p->extra_fname = NULL;
318 p->last_pos = 0;
319 p->curr_hash = pos2maj_hash(pos); 314 p->curr_hash = pos2maj_hash(pos);
320 p->curr_minor_hash = pos2min_hash(pos); 315 p->curr_minor_hash = pos2min_hash(pos);
321 p->next_hash = 0;
322 return p; 316 return p;
323} 317}
324 318
@@ -433,7 +427,7 @@ static int ext3_dx_readdir(struct file * filp,
433 int ret; 427 int ret;
434 428
435 if (!info) { 429 if (!info) {
436 info = create_dir_info(filp->f_pos); 430 info = ext3_htree_create_dir_info(filp->f_pos);
437 if (!info) 431 if (!info)
438 return -ENOMEM; 432 return -ENOMEM;
439 filp->private_data = info; 433 filp->private_data = info;
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index 77126821b2e9..47b678d73e7a 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -669,6 +669,14 @@ struct inode *ext3_orphan_get(struct super_block *sb, unsigned long ino)
669 if (IS_ERR(inode)) 669 if (IS_ERR(inode))
670 goto iget_failed; 670 goto iget_failed;
671 671
672 /*
673 * If the orphans has i_nlinks > 0 then it should be able to be
674 * truncated, otherwise it won't be removed from the orphan list
675 * during processing and an infinite loop will result.
676 */
677 if (inode->i_nlink && !ext3_can_truncate(inode))
678 goto bad_orphan;
679
672 if (NEXT_ORPHAN(inode) > max_ino) 680 if (NEXT_ORPHAN(inode) > max_ino)
673 goto bad_orphan; 681 goto bad_orphan;
674 brelse(bitmap_bh); 682 brelse(bitmap_bh);
@@ -690,6 +698,7 @@ bad_orphan:
690 printk(KERN_NOTICE "NEXT_ORPHAN(inode)=%u\n", 698 printk(KERN_NOTICE "NEXT_ORPHAN(inode)=%u\n",
691 NEXT_ORPHAN(inode)); 699 NEXT_ORPHAN(inode));
692 printk(KERN_NOTICE "max_ino=%lu\n", max_ino); 700 printk(KERN_NOTICE "max_ino=%lu\n", max_ino);
701 printk(KERN_NOTICE "i_nlink=%u\n", inode->i_nlink);
693 /* Avoid freeing blocks if we got a bad deleted inode */ 702 /* Avoid freeing blocks if we got a bad deleted inode */
694 if (inode->i_nlink == 0) 703 if (inode->i_nlink == 0)
695 inode->i_blocks = 0; 704 inode->i_blocks = 0;
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 6ae4ecf3ce40..3bf07d70b914 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -2127,7 +2127,21 @@ static void ext3_free_data(handle_t *handle, struct inode *inode,
2127 2127
2128 if (this_bh) { 2128 if (this_bh) {
2129 BUFFER_TRACE(this_bh, "call ext3_journal_dirty_metadata"); 2129 BUFFER_TRACE(this_bh, "call ext3_journal_dirty_metadata");
2130 ext3_journal_dirty_metadata(handle, this_bh); 2130
2131 /*
2132 * The buffer head should have an attached journal head at this
2133 * point. However, if the data is corrupted and an indirect
2134 * block pointed to itself, it would have been detached when
2135 * the block was cleared. Check for this instead of OOPSing.
2136 */
2137 if (bh2jh(this_bh))
2138 ext3_journal_dirty_metadata(handle, this_bh);
2139 else
2140 ext3_error(inode->i_sb, "ext3_free_data",
2141 "circular indirect block detected, "
2142 "inode=%lu, block=%llu",
2143 inode->i_ino,
2144 (unsigned long long)this_bh->b_blocknr);
2131 } 2145 }
2132} 2146}
2133 2147
@@ -2253,6 +2267,19 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode,
2253 } 2267 }
2254} 2268}
2255 2269
2270int ext3_can_truncate(struct inode *inode)
2271{
2272 if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
2273 return 0;
2274 if (S_ISREG(inode->i_mode))
2275 return 1;
2276 if (S_ISDIR(inode->i_mode))
2277 return 1;
2278 if (S_ISLNK(inode->i_mode))
2279 return !ext3_inode_is_fast_symlink(inode);
2280 return 0;
2281}
2282
2256/* 2283/*
2257 * ext3_truncate() 2284 * ext3_truncate()
2258 * 2285 *
@@ -2297,12 +2324,7 @@ void ext3_truncate(struct inode *inode)
2297 unsigned blocksize = inode->i_sb->s_blocksize; 2324 unsigned blocksize = inode->i_sb->s_blocksize;
2298 struct page *page; 2325 struct page *page;
2299 2326
2300 if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || 2327 if (!ext3_can_truncate(inode))
2301 S_ISLNK(inode->i_mode)))
2302 return;
2303 if (ext3_inode_is_fast_symlink(inode))
2304 return;
2305 if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
2306 return; 2328 return;
2307 2329
2308 /* 2330 /*
@@ -2513,6 +2535,16 @@ static int __ext3_get_inode_loc(struct inode *inode,
2513 } 2535 }
2514 if (!buffer_uptodate(bh)) { 2536 if (!buffer_uptodate(bh)) {
2515 lock_buffer(bh); 2537 lock_buffer(bh);
2538
2539 /*
2540 * If the buffer has the write error flag, we have failed
2541 * to write out another inode in the same block. In this
2542 * case, we don't have to read the block because we may
2543 * read the old inode data successfully.
2544 */
2545 if (buffer_write_io_error(bh) && !buffer_uptodate(bh))
2546 set_buffer_uptodate(bh);
2547
2516 if (buffer_uptodate(bh)) { 2548 if (buffer_uptodate(bh)) {
2517 /* someone brought it uptodate while we waited */ 2549 /* someone brought it uptodate while we waited */
2518 unlock_buffer(bh); 2550 unlock_buffer(bh);
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 0b8cf80154f1..de13e919cd81 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -240,13 +240,13 @@ static inline unsigned dx_root_limit (struct inode *dir, unsigned infosize)
240{ 240{
241 unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(1) - 241 unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(1) -
242 EXT3_DIR_REC_LEN(2) - infosize; 242 EXT3_DIR_REC_LEN(2) - infosize;
243 return 0? 20: entry_space / sizeof(struct dx_entry); 243 return entry_space / sizeof(struct dx_entry);
244} 244}
245 245
246static inline unsigned dx_node_limit (struct inode *dir) 246static inline unsigned dx_node_limit (struct inode *dir)
247{ 247{
248 unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(0); 248 unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(0);
249 return 0? 22: entry_space / sizeof(struct dx_entry); 249 return entry_space / sizeof(struct dx_entry);
250} 250}
251 251
252/* 252/*
@@ -991,19 +991,21 @@ static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry,
991 de = (struct ext3_dir_entry_2 *) bh->b_data; 991 de = (struct ext3_dir_entry_2 *) bh->b_data;
992 top = (struct ext3_dir_entry_2 *) ((char *) de + sb->s_blocksize - 992 top = (struct ext3_dir_entry_2 *) ((char *) de + sb->s_blocksize -
993 EXT3_DIR_REC_LEN(0)); 993 EXT3_DIR_REC_LEN(0));
994 for (; de < top; de = ext3_next_entry(de)) 994 for (; de < top; de = ext3_next_entry(de)) {
995 if (ext3_match (namelen, name, de)) { 995 int off = (block << EXT3_BLOCK_SIZE_BITS(sb))
996 if (!ext3_check_dir_entry("ext3_find_entry", 996 + ((char *) de - bh->b_data);
997 dir, de, bh, 997
998 (block<<EXT3_BLOCK_SIZE_BITS(sb)) 998 if (!ext3_check_dir_entry(__func__, dir, de, bh, off)) {
999 +((char *)de - bh->b_data))) { 999 brelse(bh);
1000 brelse (bh);
1001 *err = ERR_BAD_DX_DIR; 1000 *err = ERR_BAD_DX_DIR;
1002 goto errout; 1001 goto errout;
1003 } 1002 }
1004 *res_dir = de; 1003
1005 dx_release (frames); 1004 if (ext3_match(namelen, name, de)) {
1006 return bh; 1005 *res_dir = de;
1006 dx_release(frames);
1007 return bh;
1008 }
1007 } 1009 }
1008 brelse (bh); 1010 brelse (bh);
1009 /* Check to see if we should continue to search */ 1011 /* Check to see if we should continue to search */
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 2845425077e8..615788c6843a 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -842,7 +842,7 @@ static int parse_options (char *options, struct super_block *sb,
842 int data_opt = 0; 842 int data_opt = 0;
843 int option; 843 int option;
844#ifdef CONFIG_QUOTA 844#ifdef CONFIG_QUOTA
845 int qtype; 845 int qtype, qfmt;
846 char *qname; 846 char *qname;
847#endif 847#endif
848 848
@@ -1018,9 +1018,11 @@ static int parse_options (char *options, struct super_block *sb,
1018 case Opt_grpjquota: 1018 case Opt_grpjquota:
1019 qtype = GRPQUOTA; 1019 qtype = GRPQUOTA;
1020set_qf_name: 1020set_qf_name:
1021 if (sb_any_quota_enabled(sb)) { 1021 if ((sb_any_quota_enabled(sb) ||
1022 sb_any_quota_suspended(sb)) &&
1023 !sbi->s_qf_names[qtype]) {
1022 printk(KERN_ERR 1024 printk(KERN_ERR
1023 "EXT3-fs: Cannot change journalled " 1025 "EXT3-fs: Cannot change journaled "
1024 "quota options when quota turned on.\n"); 1026 "quota options when quota turned on.\n");
1025 return 0; 1027 return 0;
1026 } 1028 }
@@ -1056,9 +1058,11 @@ set_qf_name:
1056 case Opt_offgrpjquota: 1058 case Opt_offgrpjquota:
1057 qtype = GRPQUOTA; 1059 qtype = GRPQUOTA;
1058clear_qf_name: 1060clear_qf_name:
1059 if (sb_any_quota_enabled(sb)) { 1061 if ((sb_any_quota_enabled(sb) ||
1062 sb_any_quota_suspended(sb)) &&
1063 sbi->s_qf_names[qtype]) {
1060 printk(KERN_ERR "EXT3-fs: Cannot change " 1064 printk(KERN_ERR "EXT3-fs: Cannot change "
1061 "journalled quota options when " 1065 "journaled quota options when "
1062 "quota turned on.\n"); 1066 "quota turned on.\n");
1063 return 0; 1067 return 0;
1064 } 1068 }
@@ -1069,10 +1073,20 @@ clear_qf_name:
1069 sbi->s_qf_names[qtype] = NULL; 1073 sbi->s_qf_names[qtype] = NULL;
1070 break; 1074 break;
1071 case Opt_jqfmt_vfsold: 1075 case Opt_jqfmt_vfsold:
1072 sbi->s_jquota_fmt = QFMT_VFS_OLD; 1076 qfmt = QFMT_VFS_OLD;
1073 break; 1077 goto set_qf_format;
1074 case Opt_jqfmt_vfsv0: 1078 case Opt_jqfmt_vfsv0:
1075 sbi->s_jquota_fmt = QFMT_VFS_V0; 1079 qfmt = QFMT_VFS_V0;
1080set_qf_format:
1081 if ((sb_any_quota_enabled(sb) ||
1082 sb_any_quota_suspended(sb)) &&
1083 sbi->s_jquota_fmt != qfmt) {
1084 printk(KERN_ERR "EXT3-fs: Cannot change "
1085 "journaled quota options when "
1086 "quota turned on.\n");
1087 return 0;
1088 }
1089 sbi->s_jquota_fmt = qfmt;
1076 break; 1090 break;
1077 case Opt_quota: 1091 case Opt_quota:
1078 case Opt_usrquota: 1092 case Opt_usrquota:
@@ -1084,7 +1098,8 @@ clear_qf_name:
1084 set_opt(sbi->s_mount_opt, GRPQUOTA); 1098 set_opt(sbi->s_mount_opt, GRPQUOTA);
1085 break; 1099 break;
1086 case Opt_noquota: 1100 case Opt_noquota:
1087 if (sb_any_quota_enabled(sb)) { 1101 if (sb_any_quota_enabled(sb) ||
1102 sb_any_quota_suspended(sb)) {
1088 printk(KERN_ERR "EXT3-fs: Cannot change quota " 1103 printk(KERN_ERR "EXT3-fs: Cannot change quota "
1089 "options when quota turned on.\n"); 1104 "options when quota turned on.\n");
1090 return 0; 1105 return 0;
@@ -1169,14 +1184,14 @@ clear_qf_name:
1169 } 1184 }
1170 1185
1171 if (!sbi->s_jquota_fmt) { 1186 if (!sbi->s_jquota_fmt) {
1172 printk(KERN_ERR "EXT3-fs: journalled quota format " 1187 printk(KERN_ERR "EXT3-fs: journaled quota format "
1173 "not specified.\n"); 1188 "not specified.\n");
1174 return 0; 1189 return 0;
1175 } 1190 }
1176 } else { 1191 } else {
1177 if (sbi->s_jquota_fmt) { 1192 if (sbi->s_jquota_fmt) {
1178 printk(KERN_ERR "EXT3-fs: journalled quota format " 1193 printk(KERN_ERR "EXT3-fs: journaled quota format "
1179 "specified with no journalling " 1194 "specified with no journaling "
1180 "enabled.\n"); 1195 "enabled.\n");
1181 return 0; 1196 return 0;
1182 } 1197 }
@@ -1370,7 +1385,7 @@ static void ext3_orphan_cleanup (struct super_block * sb,
1370 int ret = ext3_quota_on_mount(sb, i); 1385 int ret = ext3_quota_on_mount(sb, i);
1371 if (ret < 0) 1386 if (ret < 0)
1372 printk(KERN_ERR 1387 printk(KERN_ERR
1373 "EXT3-fs: Cannot turn on journalled " 1388 "EXT3-fs: Cannot turn on journaled "
1374 "quota: error %d\n", ret); 1389 "quota: error %d\n", ret);
1375 } 1390 }
1376 } 1391 }
@@ -2712,7 +2727,7 @@ static int ext3_release_dquot(struct dquot *dquot)
2712 2727
2713static int ext3_mark_dquot_dirty(struct dquot *dquot) 2728static int ext3_mark_dquot_dirty(struct dquot *dquot)
2714{ 2729{
2715 /* Are we journalling quotas? */ 2730 /* Are we journaling quotas? */
2716 if (EXT3_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] || 2731 if (EXT3_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] ||
2717 EXT3_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) { 2732 EXT3_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) {
2718 dquot_mark_dquot_dirty(dquot); 2733 dquot_mark_dquot_dirty(dquot);
@@ -2759,23 +2774,42 @@ static int ext3_quota_on(struct super_block *sb, int type, int format_id,
2759 2774
2760 if (!test_opt(sb, QUOTA)) 2775 if (!test_opt(sb, QUOTA))
2761 return -EINVAL; 2776 return -EINVAL;
2762 /* Not journalling quota or remount? */ 2777 /* When remounting, no checks are needed and in fact, path is NULL */
2763 if ((!EXT3_SB(sb)->s_qf_names[USRQUOTA] && 2778 if (remount)
2764 !EXT3_SB(sb)->s_qf_names[GRPQUOTA]) || remount)
2765 return vfs_quota_on(sb, type, format_id, path, remount); 2779 return vfs_quota_on(sb, type, format_id, path, remount);
2780
2766 err = path_lookup(path, LOOKUP_FOLLOW, &nd); 2781 err = path_lookup(path, LOOKUP_FOLLOW, &nd);
2767 if (err) 2782 if (err)
2768 return err; 2783 return err;
2784
2769 /* Quotafile not on the same filesystem? */ 2785 /* Quotafile not on the same filesystem? */
2770 if (nd.path.mnt->mnt_sb != sb) { 2786 if (nd.path.mnt->mnt_sb != sb) {
2771 path_put(&nd.path); 2787 path_put(&nd.path);
2772 return -EXDEV; 2788 return -EXDEV;
2773 } 2789 }
2774 /* Quotafile not in fs root? */ 2790 /* Journaling quota? */
2775 if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode) 2791 if (EXT3_SB(sb)->s_qf_names[type]) {
2776 printk(KERN_WARNING 2792 /* Quotafile not of fs root? */
2777 "EXT3-fs: Quota file not on filesystem root. " 2793 if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode)
2778 "Journalled quota will not work.\n"); 2794 printk(KERN_WARNING
2795 "EXT3-fs: Quota file not on filesystem root. "
2796 "Journaled quota will not work.\n");
2797 }
2798
2799 /*
2800 * When we journal data on quota file, we have to flush journal to see
2801 * all updates to the file when we bypass pagecache...
2802 */
2803 if (ext3_should_journal_data(nd.path.dentry->d_inode)) {
2804 /*
2805 * We don't need to lock updates but journal_flush() could
2806 * otherwise be livelocked...
2807 */
2808 journal_lock_updates(EXT3_SB(sb)->s_journal);
2809 journal_flush(EXT3_SB(sb)->s_journal);
2810 journal_unlock_updates(EXT3_SB(sb)->s_journal);
2811 }
2812
2779 path_put(&nd.path); 2813 path_put(&nd.path);
2780 return vfs_quota_on(sb, type, format_id, path, remount); 2814 return vfs_quota_on(sb, type, format_id, path, remount);
2781} 2815}
diff --git a/fs/ext3/xattr_security.c b/fs/ext3/xattr_security.c
index 821efaf2b94e..37b81097bdf2 100644
--- a/fs/ext3/xattr_security.c
+++ b/fs/ext3/xattr_security.c
@@ -15,7 +15,7 @@ static size_t
15ext3_xattr_security_list(struct inode *inode, char *list, size_t list_size, 15ext3_xattr_security_list(struct inode *inode, char *list, size_t list_size,
16 const char *name, size_t name_len) 16 const char *name, size_t name_len)
17{ 17{
18 const size_t prefix_len = sizeof(XATTR_SECURITY_PREFIX)-1; 18 const size_t prefix_len = XATTR_SECURITY_PREFIX_LEN;
19 const size_t total_len = prefix_len + name_len + 1; 19 const size_t total_len = prefix_len + name_len + 1;
20 20
21 21
diff --git a/fs/ext3/xattr_trusted.c b/fs/ext3/xattr_trusted.c
index 0327497a55ce..c7c41a410c4b 100644
--- a/fs/ext3/xattr_trusted.c
+++ b/fs/ext3/xattr_trusted.c
@@ -13,13 +13,11 @@
13#include <linux/ext3_fs.h> 13#include <linux/ext3_fs.h>
14#include "xattr.h" 14#include "xattr.h"
15 15
16#define XATTR_TRUSTED_PREFIX "trusted."
17
18static size_t 16static size_t
19ext3_xattr_trusted_list(struct inode *inode, char *list, size_t list_size, 17ext3_xattr_trusted_list(struct inode *inode, char *list, size_t list_size,
20 const char *name, size_t name_len) 18 const char *name, size_t name_len)
21{ 19{
22 const size_t prefix_len = sizeof(XATTR_TRUSTED_PREFIX)-1; 20 const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN;
23 const size_t total_len = prefix_len + name_len + 1; 21 const size_t total_len = prefix_len + name_len + 1;
24 22
25 if (!capable(CAP_SYS_ADMIN)) 23 if (!capable(CAP_SYS_ADMIN))
diff --git a/fs/ext3/xattr_user.c b/fs/ext3/xattr_user.c
index 1abd8f92c440..430fe63b31b3 100644
--- a/fs/ext3/xattr_user.c
+++ b/fs/ext3/xattr_user.c
@@ -12,13 +12,11 @@
12#include <linux/ext3_fs.h> 12#include <linux/ext3_fs.h>
13#include "xattr.h" 13#include "xattr.h"
14 14
15#define XATTR_USER_PREFIX "user."
16
17static size_t 15static size_t
18ext3_xattr_user_list(struct inode *inode, char *list, size_t list_size, 16ext3_xattr_user_list(struct inode *inode, char *list, size_t list_size,
19 const char *name, size_t name_len) 17 const char *name, size_t name_len)
20{ 18{
21 const size_t prefix_len = sizeof(XATTR_USER_PREFIX)-1; 19 const size_t prefix_len = XATTR_USER_PREFIX_LEN;
22 const size_t total_len = prefix_len + name_len + 1; 20 const size_t total_len = prefix_len + name_len + 1;
23 21
24 if (!test_opt(inode->i_sb, XATTR_USER)) 22 if (!test_opt(inode->i_sb, XATTR_USER))
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index 34541d06e626..cd4a0162e10d 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -17,7 +17,6 @@
17#include <linux/slab.h> 17#include <linux/slab.h>
18#include <linux/time.h> 18#include <linux/time.h>
19#include <linux/msdos_fs.h> 19#include <linux/msdos_fs.h>
20#include <linux/dirent.h>
21#include <linux/smp_lock.h> 20#include <linux/smp_lock.h>
22#include <linux/buffer_head.h> 21#include <linux/buffer_head.h>
23#include <linux/compat.h> 22#include <linux/compat.h>
@@ -124,10 +123,11 @@ static inline int fat_get_entry(struct inode *dir, loff_t *pos,
124 * but ignore that right now. 123 * but ignore that right now.
125 * Ahem... Stack smashing in ring 0 isn't fun. Fixed. 124 * Ahem... Stack smashing in ring 0 isn't fun. Fixed.
126 */ 125 */
127static int uni16_to_x8(unsigned char *ascii, wchar_t *uni, int len, 126static int uni16_to_x8(unsigned char *ascii, const wchar_t *uni, int len,
128 int uni_xlate, struct nls_table *nls) 127 int uni_xlate, struct nls_table *nls)
129{ 128{
130 wchar_t *ip, ec; 129 const wchar_t *ip;
130 wchar_t ec;
131 unsigned char *op, nc; 131 unsigned char *op, nc;
132 int charlen; 132 int charlen;
133 int k; 133 int k;
@@ -167,6 +167,16 @@ static int uni16_to_x8(unsigned char *ascii, wchar_t *uni, int len,
167 return (op - ascii); 167 return (op - ascii);
168} 168}
169 169
170static inline int fat_uni_to_x8(struct msdos_sb_info *sbi, const wchar_t *uni,
171 unsigned char *buf, int size)
172{
173 if (sbi->options.utf8)
174 return utf8_wcstombs(buf, uni, size);
175 else
176 return uni16_to_x8(buf, uni, size, sbi->options.unicode_xlate,
177 sbi->nls_io);
178}
179
170static inline int 180static inline int
171fat_short2uni(struct nls_table *t, unsigned char *c, int clen, wchar_t *uni) 181fat_short2uni(struct nls_table *t, unsigned char *c, int clen, wchar_t *uni)
172{ 182{
@@ -227,6 +237,19 @@ fat_shortname2uni(struct nls_table *nls, unsigned char *buf, int buf_size,
227 return len; 237 return len;
228} 238}
229 239
240static inline int fat_name_match(struct msdos_sb_info *sbi,
241 const unsigned char *a, int a_len,
242 const unsigned char *b, int b_len)
243{
244 if (a_len != b_len)
245 return 0;
246
247 if (sbi->options.name_check != 's')
248 return !nls_strnicmp(sbi->nls_io, a, b, a_len);
249 else
250 return !memcmp(a, b, a_len);
251}
252
230enum { PARSE_INVALID = 1, PARSE_NOT_LONGNAME, PARSE_EOF, }; 253enum { PARSE_INVALID = 1, PARSE_NOT_LONGNAME, PARSE_EOF, };
231 254
232/** 255/**
@@ -302,6 +325,19 @@ parse_long:
302} 325}
303 326
304/* 327/*
328 * Maximum buffer size of short name.
329 * [(MSDOS_NAME + '.') * max one char + nul]
330 * For msdos style, ['.' (hidden) + MSDOS_NAME + '.' + nul]
331 */
332#define FAT_MAX_SHORT_SIZE ((MSDOS_NAME + 1) * NLS_MAX_CHARSET_SIZE + 1)
333/*
334 * Maximum buffer size of unicode chars from slots.
335 * [(max longname slots * 13 (size in a slot) + nul) * sizeof(wchar_t)]
336 */
337#define FAT_MAX_UNI_CHARS ((MSDOS_SLOTS - 1) * 13 + 1)
338#define FAT_MAX_UNI_SIZE (FAT_MAX_UNI_CHARS * sizeof(wchar_t))
339
340/*
305 * Return values: negative -> error, 0 -> not found, positive -> found, 341 * Return values: negative -> error, 0 -> not found, positive -> found,
306 * value is the total amount of slots, including the shortname entry. 342 * value is the total amount of slots, including the shortname entry.
307 */ 343 */
@@ -312,29 +348,20 @@ int fat_search_long(struct inode *inode, const unsigned char *name,
312 struct msdos_sb_info *sbi = MSDOS_SB(sb); 348 struct msdos_sb_info *sbi = MSDOS_SB(sb);
313 struct buffer_head *bh = NULL; 349 struct buffer_head *bh = NULL;
314 struct msdos_dir_entry *de; 350 struct msdos_dir_entry *de;
315 struct nls_table *nls_io = sbi->nls_io;
316 struct nls_table *nls_disk = sbi->nls_disk; 351 struct nls_table *nls_disk = sbi->nls_disk;
317 wchar_t bufuname[14];
318 unsigned char nr_slots; 352 unsigned char nr_slots;
319 int xlate_len; 353 wchar_t bufuname[14];
320 wchar_t *unicode = NULL; 354 wchar_t *unicode = NULL;
321 unsigned char work[MSDOS_NAME]; 355 unsigned char work[MSDOS_NAME];
322 unsigned char *bufname = NULL; 356 unsigned char bufname[FAT_MAX_SHORT_SIZE];
323 int uni_xlate = sbi->options.unicode_xlate;
324 int utf8 = sbi->options.utf8;
325 int anycase = (sbi->options.name_check != 's');
326 unsigned short opt_shortname = sbi->options.shortname; 357 unsigned short opt_shortname = sbi->options.shortname;
327 loff_t cpos = 0; 358 loff_t cpos = 0;
328 int chl, i, j, last_u, err; 359 int chl, i, j, last_u, err, len;
329
330 bufname = __getname();
331 if (!bufname)
332 return -ENOMEM;
333 360
334 err = -ENOENT; 361 err = -ENOENT;
335 while(1) { 362 while (1) {
336 if (fat_get_entry(inode, &cpos, &bh, &de) == -1) 363 if (fat_get_entry(inode, &cpos, &bh, &de) == -1)
337 goto EODir; 364 goto end_of_dir;
338parse_record: 365parse_record:
339 nr_slots = 0; 366 nr_slots = 0;
340 if (de->name[0] == DELETED_FLAG) 367 if (de->name[0] == DELETED_FLAG)
@@ -353,7 +380,7 @@ parse_record:
353 else if (status == PARSE_NOT_LONGNAME) 380 else if (status == PARSE_NOT_LONGNAME)
354 goto parse_record; 381 goto parse_record;
355 else if (status == PARSE_EOF) 382 else if (status == PARSE_EOF)
356 goto EODir; 383 goto end_of_dir;
357 } 384 }
358 385
359 memcpy(work, de->name, sizeof(de->name)); 386 memcpy(work, de->name, sizeof(de->name));
@@ -394,30 +421,24 @@ parse_record:
394 if (!last_u) 421 if (!last_u)
395 continue; 422 continue;
396 423
424 /* Compare shortname */
397 bufuname[last_u] = 0x0000; 425 bufuname[last_u] = 0x0000;
398 xlate_len = utf8 426 len = fat_uni_to_x8(sbi, bufuname, bufname, sizeof(bufname));
399 ?utf8_wcstombs(bufname, bufuname, PATH_MAX) 427 if (fat_name_match(sbi, name, name_len, bufname, len))
400 :uni16_to_x8(bufname, bufuname, PATH_MAX, uni_xlate, nls_io); 428 goto found;
401 if (xlate_len == name_len)
402 if ((!anycase && !memcmp(name, bufname, xlate_len)) ||
403 (anycase && !nls_strnicmp(nls_io, name, bufname,
404 xlate_len)))
405 goto Found;
406 429
407 if (nr_slots) { 430 if (nr_slots) {
408 xlate_len = utf8 431 void *longname = unicode + FAT_MAX_UNI_CHARS;
409 ?utf8_wcstombs(bufname, unicode, PATH_MAX) 432 int size = PATH_MAX - FAT_MAX_UNI_SIZE;
410 :uni16_to_x8(bufname, unicode, PATH_MAX, uni_xlate, nls_io); 433
411 if (xlate_len != name_len) 434 /* Compare longname */
412 continue; 435 len = fat_uni_to_x8(sbi, unicode, longname, size);
413 if ((!anycase && !memcmp(name, bufname, xlate_len)) || 436 if (fat_name_match(sbi, name, name_len, longname, len))
414 (anycase && !nls_strnicmp(nls_io, name, bufname, 437 goto found;
415 xlate_len)))
416 goto Found;
417 } 438 }
418 } 439 }
419 440
420Found: 441found:
421 nr_slots++; /* include the de */ 442 nr_slots++; /* include the de */
422 sinfo->slot_off = cpos - nr_slots * sizeof(*de); 443 sinfo->slot_off = cpos - nr_slots * sizeof(*de);
423 sinfo->nr_slots = nr_slots; 444 sinfo->nr_slots = nr_slots;
@@ -425,9 +446,7 @@ Found:
425 sinfo->bh = bh; 446 sinfo->bh = bh;
426 sinfo->i_pos = fat_make_i_pos(sb, sinfo->bh, sinfo->de); 447 sinfo->i_pos = fat_make_i_pos(sb, sinfo->bh, sinfo->de);
427 err = 0; 448 err = 0;
428EODir: 449end_of_dir:
429 if (bufname)
430 __putname(bufname);
431 if (unicode) 450 if (unicode)
432 __putname(unicode); 451 __putname(unicode);
433 452
@@ -453,23 +472,20 @@ static int __fat_readdir(struct inode *inode, struct file *filp, void *dirent,
453 struct msdos_sb_info *sbi = MSDOS_SB(sb); 472 struct msdos_sb_info *sbi = MSDOS_SB(sb);
454 struct buffer_head *bh; 473 struct buffer_head *bh;
455 struct msdos_dir_entry *de; 474 struct msdos_dir_entry *de;
456 struct nls_table *nls_io = sbi->nls_io;
457 struct nls_table *nls_disk = sbi->nls_disk; 475 struct nls_table *nls_disk = sbi->nls_disk;
458 unsigned char long_slots; 476 unsigned char nr_slots;
459 const char *fill_name;
460 int fill_len;
461 wchar_t bufuname[14]; 477 wchar_t bufuname[14];
462 wchar_t *unicode = NULL; 478 wchar_t *unicode = NULL;
463 unsigned char c, work[MSDOS_NAME], bufname[56], *ptname = bufname; 479 unsigned char c, work[MSDOS_NAME];
464 unsigned long lpos, dummy, *furrfu = &lpos; 480 unsigned char bufname[FAT_MAX_SHORT_SIZE], *ptname = bufname;
465 int uni_xlate = sbi->options.unicode_xlate; 481 unsigned short opt_shortname = sbi->options.shortname;
466 int isvfat = sbi->options.isvfat; 482 int isvfat = sbi->options.isvfat;
467 int utf8 = sbi->options.utf8;
468 int nocase = sbi->options.nocase; 483 int nocase = sbi->options.nocase;
469 unsigned short opt_shortname = sbi->options.shortname; 484 const char *fill_name = NULL;
470 unsigned long inum; 485 unsigned long inum;
471 int chi, chl, i, i2, j, last, last_u, dotoffset = 0; 486 unsigned long lpos, dummy, *furrfu = &lpos;
472 loff_t cpos; 487 loff_t cpos;
488 int chi, chl, i, i2, j, last, last_u, dotoffset = 0, fill_len = 0;
473 int ret = 0; 489 int ret = 0;
474 490
475 lock_super(sb); 491 lock_super(sb);
@@ -489,43 +505,58 @@ static int __fat_readdir(struct inode *inode, struct file *filp, void *dirent,
489 cpos = 0; 505 cpos = 0;
490 } 506 }
491 } 507 }
492 if (cpos & (sizeof(struct msdos_dir_entry)-1)) { 508 if (cpos & (sizeof(struct msdos_dir_entry) - 1)) {
493 ret = -ENOENT; 509 ret = -ENOENT;
494 goto out; 510 goto out;
495 } 511 }
496 512
497 bh = NULL; 513 bh = NULL;
498GetNew: 514get_new:
499 if (fat_get_entry(inode, &cpos, &bh, &de) == -1) 515 if (fat_get_entry(inode, &cpos, &bh, &de) == -1)
500 goto EODir; 516 goto end_of_dir;
501parse_record: 517parse_record:
502 long_slots = 0; 518 nr_slots = 0;
503 /* Check for long filename entry */ 519 /*
504 if (isvfat) { 520 * Check for long filename entry, but if short_only, we don't
521 * need to parse long filename.
522 */
523 if (isvfat && !short_only) {
505 if (de->name[0] == DELETED_FLAG) 524 if (de->name[0] == DELETED_FLAG)
506 goto RecEnd; 525 goto record_end;
507 if (de->attr != ATTR_EXT && (de->attr & ATTR_VOLUME)) 526 if (de->attr != ATTR_EXT && (de->attr & ATTR_VOLUME))
508 goto RecEnd; 527 goto record_end;
509 if (de->attr != ATTR_EXT && IS_FREE(de->name)) 528 if (de->attr != ATTR_EXT && IS_FREE(de->name))
510 goto RecEnd; 529 goto record_end;
511 } else { 530 } else {
512 if ((de->attr & ATTR_VOLUME) || IS_FREE(de->name)) 531 if ((de->attr & ATTR_VOLUME) || IS_FREE(de->name))
513 goto RecEnd; 532 goto record_end;
514 } 533 }
515 534
516 if (isvfat && de->attr == ATTR_EXT) { 535 if (isvfat && de->attr == ATTR_EXT) {
517 int status = fat_parse_long(inode, &cpos, &bh, &de, 536 int status = fat_parse_long(inode, &cpos, &bh, &de,
518 &unicode, &long_slots); 537 &unicode, &nr_slots);
519 if (status < 0) { 538 if (status < 0) {
520 filp->f_pos = cpos; 539 filp->f_pos = cpos;
521 ret = status; 540 ret = status;
522 goto out; 541 goto out;
523 } else if (status == PARSE_INVALID) 542 } else if (status == PARSE_INVALID)
524 goto RecEnd; 543 goto record_end;
525 else if (status == PARSE_NOT_LONGNAME) 544 else if (status == PARSE_NOT_LONGNAME)
526 goto parse_record; 545 goto parse_record;
527 else if (status == PARSE_EOF) 546 else if (status == PARSE_EOF)
528 goto EODir; 547 goto end_of_dir;
548
549 if (nr_slots) {
550 void *longname = unicode + FAT_MAX_UNI_CHARS;
551 int size = PATH_MAX - FAT_MAX_UNI_SIZE;
552 int len = fat_uni_to_x8(sbi, unicode, longname, size);
553
554 fill_name = longname;
555 fill_len = len;
556 /* !both && !short_only, so we don't need shortname. */
557 if (!both)
558 goto start_filldir;
559 }
529 } 560 }
530 561
531 if (sbi->options.dotsOK) { 562 if (sbi->options.dotsOK) {
@@ -587,12 +618,32 @@ parse_record:
587 } 618 }
588 } 619 }
589 if (!last) 620 if (!last)
590 goto RecEnd; 621 goto record_end;
591 622
592 i = last + dotoffset; 623 i = last + dotoffset;
593 j = last_u; 624 j = last_u;
594 625
595 lpos = cpos - (long_slots+1)*sizeof(struct msdos_dir_entry); 626 if (isvfat) {
627 bufuname[j] = 0x0000;
628 i = fat_uni_to_x8(sbi, bufuname, bufname, sizeof(bufname));
629 }
630 if (nr_slots) {
631 /* hack for fat_ioctl_filldir() */
632 struct fat_ioctl_filldir_callback *p = dirent;
633
634 p->longname = fill_name;
635 p->long_len = fill_len;
636 p->shortname = bufname;
637 p->short_len = i;
638 fill_name = NULL;
639 fill_len = 0;
640 } else {
641 fill_name = bufname;
642 fill_len = i;
643 }
644
645start_filldir:
646 lpos = cpos - (nr_slots + 1) * sizeof(struct msdos_dir_entry);
596 if (!memcmp(de->name, MSDOS_DOT, MSDOS_NAME)) 647 if (!memcmp(de->name, MSDOS_DOT, MSDOS_NAME))
597 inum = inode->i_ino; 648 inum = inode->i_ino;
598 else if (!memcmp(de->name, MSDOS_DOTDOT, MSDOS_NAME)) { 649 else if (!memcmp(de->name, MSDOS_DOTDOT, MSDOS_NAME)) {
@@ -607,49 +658,17 @@ parse_record:
607 inum = iunique(sb, MSDOS_ROOT_INO); 658 inum = iunique(sb, MSDOS_ROOT_INO);
608 } 659 }
609 660
610 if (isvfat) {
611 bufuname[j] = 0x0000;
612 i = utf8 ? utf8_wcstombs(bufname, bufuname, sizeof(bufname))
613 : uni16_to_x8(bufname, bufuname, sizeof(bufname), uni_xlate, nls_io);
614 }
615
616 fill_name = bufname;
617 fill_len = i;
618 if (!short_only && long_slots) {
619 /* convert the unicode long name. 261 is maximum size
620 * of unicode buffer. (13 * slots + nul) */
621 void *longname = unicode + 261;
622 int buf_size = PATH_MAX - (261 * sizeof(unicode[0]));
623 int long_len = utf8
624 ? utf8_wcstombs(longname, unicode, buf_size)
625 : uni16_to_x8(longname, unicode, buf_size, uni_xlate, nls_io);
626
627 if (!both) {
628 fill_name = longname;
629 fill_len = long_len;
630 } else {
631 /* hack for fat_ioctl_filldir() */
632 struct fat_ioctl_filldir_callback *p = dirent;
633
634 p->longname = longname;
635 p->long_len = long_len;
636 p->shortname = bufname;
637 p->short_len = i;
638 fill_name = NULL;
639 fill_len = 0;
640 }
641 }
642 if (filldir(dirent, fill_name, fill_len, *furrfu, inum, 661 if (filldir(dirent, fill_name, fill_len, *furrfu, inum,
643 (de->attr & ATTR_DIR) ? DT_DIR : DT_REG) < 0) 662 (de->attr & ATTR_DIR) ? DT_DIR : DT_REG) < 0)
644 goto FillFailed; 663 goto fill_failed;
645 664
646RecEnd: 665record_end:
647 furrfu = &lpos; 666 furrfu = &lpos;
648 filp->f_pos = cpos; 667 filp->f_pos = cpos;
649 goto GetNew; 668 goto get_new;
650EODir: 669end_of_dir:
651 filp->f_pos = cpos; 670 filp->f_pos = cpos;
652FillFailed: 671fill_failed:
653 brelse(bh); 672 brelse(bh);
654 if (unicode) 673 if (unicode)
655 __putname(unicode); 674 __putname(unicode);
@@ -715,7 +734,7 @@ efault: \
715 return -EFAULT; \ 734 return -EFAULT; \
716} 735}
717 736
718FAT_IOCTL_FILLDIR_FUNC(fat_ioctl_filldir, dirent) 737FAT_IOCTL_FILLDIR_FUNC(fat_ioctl_filldir, __fat_dirent)
719 738
720static int fat_ioctl_readdir(struct inode *inode, struct file *filp, 739static int fat_ioctl_readdir(struct inode *inode, struct file *filp,
721 void __user *dirent, filldir_t filldir, 740 void __user *dirent, filldir_t filldir,
@@ -741,7 +760,7 @@ static int fat_ioctl_readdir(struct inode *inode, struct file *filp,
741static int fat_dir_ioctl(struct inode *inode, struct file *filp, 760static int fat_dir_ioctl(struct inode *inode, struct file *filp,
742 unsigned int cmd, unsigned long arg) 761 unsigned int cmd, unsigned long arg)
743{ 762{
744 struct dirent __user *d1 = (struct dirent __user *)arg; 763 struct __fat_dirent __user *d1 = (struct __fat_dirent __user *)arg;
745 int short_only, both; 764 int short_only, both;
746 765
747 switch (cmd) { 766 switch (cmd) {
@@ -757,7 +776,7 @@ static int fat_dir_ioctl(struct inode *inode, struct file *filp,
757 return fat_generic_ioctl(inode, filp, cmd, arg); 776 return fat_generic_ioctl(inode, filp, cmd, arg);
758 } 777 }
759 778
760 if (!access_ok(VERIFY_WRITE, d1, sizeof(struct dirent[2]))) 779 if (!access_ok(VERIFY_WRITE, d1, sizeof(struct __fat_dirent[2])))
761 return -EFAULT; 780 return -EFAULT;
762 /* 781 /*
763 * Yes, we don't need this put_user() absolutely. However old 782 * Yes, we don't need this put_user() absolutely. However old
@@ -1082,7 +1101,7 @@ int fat_alloc_new_dir(struct inode *dir, struct timespec *ts)
1082 goto error_free; 1101 goto error_free;
1083 } 1102 }
1084 1103
1085 fat_date_unix2dos(ts->tv_sec, &time, &date); 1104 fat_date_unix2dos(ts->tv_sec, &time, &date, sbi->options.tz_utc);
1086 1105
1087 de = (struct msdos_dir_entry *)bhs[0]->b_data; 1106 de = (struct msdos_dir_entry *)bhs[0]->b_data;
1088 /* filling the new directory slots ("." and ".." entries) */ 1107 /* filling the new directory slots ("." and ".." entries) */
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 46a4508ffd2e..23676f9d79ce 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -382,17 +382,20 @@ static int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de)
382 inode->i_blocks = ((inode->i_size + (sbi->cluster_size - 1)) 382 inode->i_blocks = ((inode->i_size + (sbi->cluster_size - 1))
383 & ~((loff_t)sbi->cluster_size - 1)) >> 9; 383 & ~((loff_t)sbi->cluster_size - 1)) >> 9;
384 inode->i_mtime.tv_sec = 384 inode->i_mtime.tv_sec =
385 date_dos2unix(le16_to_cpu(de->time), le16_to_cpu(de->date)); 385 date_dos2unix(le16_to_cpu(de->time), le16_to_cpu(de->date),
386 sbi->options.tz_utc);
386 inode->i_mtime.tv_nsec = 0; 387 inode->i_mtime.tv_nsec = 0;
387 if (sbi->options.isvfat) { 388 if (sbi->options.isvfat) {
388 int secs = de->ctime_cs / 100; 389 int secs = de->ctime_cs / 100;
389 int csecs = de->ctime_cs % 100; 390 int csecs = de->ctime_cs % 100;
390 inode->i_ctime.tv_sec = 391 inode->i_ctime.tv_sec =
391 date_dos2unix(le16_to_cpu(de->ctime), 392 date_dos2unix(le16_to_cpu(de->ctime),
392 le16_to_cpu(de->cdate)) + secs; 393 le16_to_cpu(de->cdate),
394 sbi->options.tz_utc) + secs;
393 inode->i_ctime.tv_nsec = csecs * 10000000; 395 inode->i_ctime.tv_nsec = csecs * 10000000;
394 inode->i_atime.tv_sec = 396 inode->i_atime.tv_sec =
395 date_dos2unix(0, le16_to_cpu(de->adate)); 397 date_dos2unix(0, le16_to_cpu(de->adate),
398 sbi->options.tz_utc);
396 inode->i_atime.tv_nsec = 0; 399 inode->i_atime.tv_nsec = 0;
397 } else 400 } else
398 inode->i_ctime = inode->i_atime = inode->i_mtime; 401 inode->i_ctime = inode->i_atime = inode->i_mtime;
@@ -591,11 +594,14 @@ retry:
591 raw_entry->attr = fat_attr(inode); 594 raw_entry->attr = fat_attr(inode);
592 raw_entry->start = cpu_to_le16(MSDOS_I(inode)->i_logstart); 595 raw_entry->start = cpu_to_le16(MSDOS_I(inode)->i_logstart);
593 raw_entry->starthi = cpu_to_le16(MSDOS_I(inode)->i_logstart >> 16); 596 raw_entry->starthi = cpu_to_le16(MSDOS_I(inode)->i_logstart >> 16);
594 fat_date_unix2dos(inode->i_mtime.tv_sec, &raw_entry->time, &raw_entry->date); 597 fat_date_unix2dos(inode->i_mtime.tv_sec, &raw_entry->time,
598 &raw_entry->date, sbi->options.tz_utc);
595 if (sbi->options.isvfat) { 599 if (sbi->options.isvfat) {
596 __le16 atime; 600 __le16 atime;
597 fat_date_unix2dos(inode->i_ctime.tv_sec,&raw_entry->ctime,&raw_entry->cdate); 601 fat_date_unix2dos(inode->i_ctime.tv_sec, &raw_entry->ctime,
598 fat_date_unix2dos(inode->i_atime.tv_sec,&atime,&raw_entry->adate); 602 &raw_entry->cdate, sbi->options.tz_utc);
603 fat_date_unix2dos(inode->i_atime.tv_sec, &atime,
604 &raw_entry->adate, sbi->options.tz_utc);
599 raw_entry->ctime_cs = (inode->i_ctime.tv_sec & 1) * 100 + 605 raw_entry->ctime_cs = (inode->i_ctime.tv_sec & 1) * 100 +
600 inode->i_ctime.tv_nsec / 10000000; 606 inode->i_ctime.tv_nsec / 10000000;
601 } 607 }
@@ -836,6 +842,8 @@ static int fat_show_options(struct seq_file *m, struct vfsmount *mnt)
836 } 842 }
837 if (sbi->options.flush) 843 if (sbi->options.flush)
838 seq_puts(m, ",flush"); 844 seq_puts(m, ",flush");
845 if (opts->tz_utc)
846 seq_puts(m, ",tz=UTC");
839 847
840 return 0; 848 return 0;
841} 849}
@@ -848,7 +856,7 @@ enum {
848 Opt_charset, Opt_shortname_lower, Opt_shortname_win95, 856 Opt_charset, Opt_shortname_lower, Opt_shortname_win95,
849 Opt_shortname_winnt, Opt_shortname_mixed, Opt_utf8_no, Opt_utf8_yes, 857 Opt_shortname_winnt, Opt_shortname_mixed, Opt_utf8_no, Opt_utf8_yes,
850 Opt_uni_xl_no, Opt_uni_xl_yes, Opt_nonumtail_no, Opt_nonumtail_yes, 858 Opt_uni_xl_no, Opt_uni_xl_yes, Opt_nonumtail_no, Opt_nonumtail_yes,
851 Opt_obsolate, Opt_flush, Opt_err, 859 Opt_obsolate, Opt_flush, Opt_tz_utc, Opt_err,
852}; 860};
853 861
854static match_table_t fat_tokens = { 862static match_table_t fat_tokens = {
@@ -883,6 +891,7 @@ static match_table_t fat_tokens = {
883 {Opt_obsolate, "cvf_options=%100s"}, 891 {Opt_obsolate, "cvf_options=%100s"},
884 {Opt_obsolate, "posix"}, 892 {Opt_obsolate, "posix"},
885 {Opt_flush, "flush"}, 893 {Opt_flush, "flush"},
894 {Opt_tz_utc, "tz=UTC"},
886 {Opt_err, NULL}, 895 {Opt_err, NULL},
887}; 896};
888static match_table_t msdos_tokens = { 897static match_table_t msdos_tokens = {
@@ -947,10 +956,11 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug,
947 opts->utf8 = opts->unicode_xlate = 0; 956 opts->utf8 = opts->unicode_xlate = 0;
948 opts->numtail = 1; 957 opts->numtail = 1;
949 opts->usefree = opts->nocase = 0; 958 opts->usefree = opts->nocase = 0;
959 opts->tz_utc = 0;
950 *debug = 0; 960 *debug = 0;
951 961
952 if (!options) 962 if (!options)
953 return 0; 963 goto out;
954 964
955 while ((p = strsep(&options, ",")) != NULL) { 965 while ((p = strsep(&options, ",")) != NULL) {
956 int token; 966 int token;
@@ -1036,6 +1046,9 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug,
1036 case Opt_flush: 1046 case Opt_flush:
1037 opts->flush = 1; 1047 opts->flush = 1;
1038 break; 1048 break;
1049 case Opt_tz_utc:
1050 opts->tz_utc = 1;
1051 break;
1039 1052
1040 /* msdos specific */ 1053 /* msdos specific */
1041 case Opt_dots: 1054 case Opt_dots:
@@ -1104,10 +1117,13 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug,
1104 return -EINVAL; 1117 return -EINVAL;
1105 } 1118 }
1106 } 1119 }
1120
1121out:
1107 /* UTF-8 doesn't provide FAT semantics */ 1122 /* UTF-8 doesn't provide FAT semantics */
1108 if (!strcmp(opts->iocharset, "utf8")) { 1123 if (!strcmp(opts->iocharset, "utf8")) {
1109 printk(KERN_ERR "FAT: utf8 is not a recommended IO charset" 1124 printk(KERN_ERR "FAT: utf8 is not a recommended IO charset"
1110 " for FAT filesystems, filesystem will be case sensitive!\n"); 1125 " for FAT filesystems, filesystem will be "
1126 "case sensitive!\n");
1111 } 1127 }
1112 1128
1113 /* If user doesn't specify allow_utime, it's initialized from dmask. */ 1129 /* If user doesn't specify allow_utime, it's initialized from dmask. */
diff --git a/fs/fat/misc.c b/fs/fat/misc.c
index 61f23511eacf..79fb98ad36d4 100644
--- a/fs/fat/misc.c
+++ b/fs/fat/misc.c
@@ -142,7 +142,7 @@ static int day_n[] = {
142}; 142};
143 143
144/* Convert a MS-DOS time/date pair to a UNIX date (seconds since 1 1 70). */ 144/* Convert a MS-DOS time/date pair to a UNIX date (seconds since 1 1 70). */
145int date_dos2unix(unsigned short time, unsigned short date) 145int date_dos2unix(unsigned short time, unsigned short date, int tz_utc)
146{ 146{
147 int month, year, secs; 147 int month, year, secs;
148 148
@@ -156,16 +156,18 @@ int date_dos2unix(unsigned short time, unsigned short date)
156 ((date & 31)-1+day_n[month]+(year/4)+year*365-((year & 3) == 0 && 156 ((date & 31)-1+day_n[month]+(year/4)+year*365-((year & 3) == 0 &&
157 month < 2 ? 1 : 0)+3653); 157 month < 2 ? 1 : 0)+3653);
158 /* days since 1.1.70 plus 80's leap day */ 158 /* days since 1.1.70 plus 80's leap day */
159 secs += sys_tz.tz_minuteswest*60; 159 if (!tz_utc)
160 secs += sys_tz.tz_minuteswest*60;
160 return secs; 161 return secs;
161} 162}
162 163
163/* Convert linear UNIX date to a MS-DOS time/date pair. */ 164/* Convert linear UNIX date to a MS-DOS time/date pair. */
164void fat_date_unix2dos(int unix_date, __le16 *time, __le16 *date) 165void fat_date_unix2dos(int unix_date, __le16 *time, __le16 *date, int tz_utc)
165{ 166{
166 int day, year, nl_day, month; 167 int day, year, nl_day, month;
167 168
168 unix_date -= sys_tz.tz_minuteswest*60; 169 if (!tz_utc)
170 unix_date -= sys_tz.tz_minuteswest*60;
169 171
170 /* Jan 1 GMT 00:00:00 1980. But what about another time zone? */ 172 /* Jan 1 GMT 00:00:00 1980. But what about another time zone? */
171 if (unix_date < 315532800) 173 if (unix_date < 315532800)
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 330a7d782591..9679fcbdeaa0 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -125,13 +125,16 @@ static int dupfd(struct file *file, unsigned int start, int cloexec)
125 return fd; 125 return fd;
126} 126}
127 127
128asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd) 128asmlinkage long sys_dup3(unsigned int oldfd, unsigned int newfd, int flags)
129{ 129{
130 int err = -EBADF; 130 int err = -EBADF;
131 struct file * file, *tofree; 131 struct file * file, *tofree;
132 struct files_struct * files = current->files; 132 struct files_struct * files = current->files;
133 struct fdtable *fdt; 133 struct fdtable *fdt;
134 134
135 if ((flags & ~O_CLOEXEC) != 0)
136 return -EINVAL;
137
135 spin_lock(&files->file_lock); 138 spin_lock(&files->file_lock);
136 if (!(file = fcheck(oldfd))) 139 if (!(file = fcheck(oldfd)))
137 goto out_unlock; 140 goto out_unlock;
@@ -163,7 +166,10 @@ asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd)
163 166
164 rcu_assign_pointer(fdt->fd[newfd], file); 167 rcu_assign_pointer(fdt->fd[newfd], file);
165 FD_SET(newfd, fdt->open_fds); 168 FD_SET(newfd, fdt->open_fds);
166 FD_CLR(newfd, fdt->close_on_exec); 169 if (flags & O_CLOEXEC)
170 FD_SET(newfd, fdt->close_on_exec);
171 else
172 FD_CLR(newfd, fdt->close_on_exec);
167 spin_unlock(&files->file_lock); 173 spin_unlock(&files->file_lock);
168 174
169 if (tofree) 175 if (tofree)
@@ -181,6 +187,11 @@ out_fput:
181 goto out; 187 goto out;
182} 188}
183 189
190asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd)
191{
192 return sys_dup3(oldfd, newfd, 0);
193}
194
184asmlinkage long sys_dup(unsigned int fildes) 195asmlinkage long sys_dup(unsigned int fildes)
185{ 196{
186 int ret = -EBADF; 197 int ret = -EBADF;
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 2060bf06b906..51d0035ff07e 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -97,7 +97,7 @@ void fuse_invalidate_attr(struct inode *inode)
97 * timeout is unknown (unlink, rmdir, rename and in some cases 97 * timeout is unknown (unlink, rmdir, rename and in some cases
98 * lookup) 98 * lookup)
99 */ 99 */
100static void fuse_invalidate_entry_cache(struct dentry *entry) 100void fuse_invalidate_entry_cache(struct dentry *entry)
101{ 101{
102 fuse_dentry_settime(entry, 0); 102 fuse_dentry_settime(entry, 0);
103} 103}
@@ -112,18 +112,16 @@ static void fuse_invalidate_entry(struct dentry *entry)
112 fuse_invalidate_entry_cache(entry); 112 fuse_invalidate_entry_cache(entry);
113} 113}
114 114
115static void fuse_lookup_init(struct fuse_req *req, struct inode *dir, 115static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_req *req,
116 struct dentry *entry, 116 u64 nodeid, struct qstr *name,
117 struct fuse_entry_out *outarg) 117 struct fuse_entry_out *outarg)
118{ 118{
119 struct fuse_conn *fc = get_fuse_conn(dir);
120
121 memset(outarg, 0, sizeof(struct fuse_entry_out)); 119 memset(outarg, 0, sizeof(struct fuse_entry_out));
122 req->in.h.opcode = FUSE_LOOKUP; 120 req->in.h.opcode = FUSE_LOOKUP;
123 req->in.h.nodeid = get_node_id(dir); 121 req->in.h.nodeid = nodeid;
124 req->in.numargs = 1; 122 req->in.numargs = 1;
125 req->in.args[0].size = entry->d_name.len + 1; 123 req->in.args[0].size = name->len + 1;
126 req->in.args[0].value = entry->d_name.name; 124 req->in.args[0].value = name->name;
127 req->out.numargs = 1; 125 req->out.numargs = 1;
128 if (fc->minor < 9) 126 if (fc->minor < 9)
129 req->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE; 127 req->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
@@ -189,7 +187,8 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
189 attr_version = fuse_get_attr_version(fc); 187 attr_version = fuse_get_attr_version(fc);
190 188
191 parent = dget_parent(entry); 189 parent = dget_parent(entry);
192 fuse_lookup_init(req, parent->d_inode, entry, &outarg); 190 fuse_lookup_init(fc, req, get_node_id(parent->d_inode),
191 &entry->d_name, &outarg);
193 request_send(fc, req); 192 request_send(fc, req);
194 dput(parent); 193 dput(parent);
195 err = req->out.h.error; 194 err = req->out.h.error;
@@ -225,7 +224,7 @@ static int invalid_nodeid(u64 nodeid)
225 return !nodeid || nodeid == FUSE_ROOT_ID; 224 return !nodeid || nodeid == FUSE_ROOT_ID;
226} 225}
227 226
228static struct dentry_operations fuse_dentry_operations = { 227struct dentry_operations fuse_dentry_operations = {
229 .d_revalidate = fuse_dentry_revalidate, 228 .d_revalidate = fuse_dentry_revalidate,
230}; 229};
231 230
@@ -239,85 +238,127 @@ int fuse_valid_type(int m)
239 * Add a directory inode to a dentry, ensuring that no other dentry 238 * Add a directory inode to a dentry, ensuring that no other dentry
240 * refers to this inode. Called with fc->inst_mutex. 239 * refers to this inode. Called with fc->inst_mutex.
241 */ 240 */
242static int fuse_d_add_directory(struct dentry *entry, struct inode *inode) 241static struct dentry *fuse_d_add_directory(struct dentry *entry,
242 struct inode *inode)
243{ 243{
244 struct dentry *alias = d_find_alias(inode); 244 struct dentry *alias = d_find_alias(inode);
245 if (alias) { 245 if (alias && !(alias->d_flags & DCACHE_DISCONNECTED)) {
246 /* This tries to shrink the subtree below alias */ 246 /* This tries to shrink the subtree below alias */
247 fuse_invalidate_entry(alias); 247 fuse_invalidate_entry(alias);
248 dput(alias); 248 dput(alias);
249 if (!list_empty(&inode->i_dentry)) 249 if (!list_empty(&inode->i_dentry))
250 return -EBUSY; 250 return ERR_PTR(-EBUSY);
251 } else {
252 dput(alias);
251 } 253 }
252 d_add(entry, inode); 254 return d_splice_alias(inode, entry);
253 return 0;
254} 255}
255 256
256static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, 257int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
257 struct nameidata *nd) 258 struct fuse_entry_out *outarg, struct inode **inode)
258{ 259{
259 int err; 260 struct fuse_conn *fc = get_fuse_conn_super(sb);
260 struct fuse_entry_out outarg;
261 struct inode *inode = NULL;
262 struct fuse_conn *fc = get_fuse_conn(dir);
263 struct fuse_req *req; 261 struct fuse_req *req;
264 struct fuse_req *forget_req; 262 struct fuse_req *forget_req;
265 u64 attr_version; 263 u64 attr_version;
264 int err;
266 265
267 if (entry->d_name.len > FUSE_NAME_MAX) 266 *inode = NULL;
268 return ERR_PTR(-ENAMETOOLONG); 267 err = -ENAMETOOLONG;
268 if (name->len > FUSE_NAME_MAX)
269 goto out;
269 270
270 req = fuse_get_req(fc); 271 req = fuse_get_req(fc);
272 err = PTR_ERR(req);
271 if (IS_ERR(req)) 273 if (IS_ERR(req))
272 return ERR_CAST(req); 274 goto out;
273 275
274 forget_req = fuse_get_req(fc); 276 forget_req = fuse_get_req(fc);
277 err = PTR_ERR(forget_req);
275 if (IS_ERR(forget_req)) { 278 if (IS_ERR(forget_req)) {
276 fuse_put_request(fc, req); 279 fuse_put_request(fc, req);
277 return ERR_CAST(forget_req); 280 goto out;
278 } 281 }
279 282
280 attr_version = fuse_get_attr_version(fc); 283 attr_version = fuse_get_attr_version(fc);
281 284
282 fuse_lookup_init(req, dir, entry, &outarg); 285 fuse_lookup_init(fc, req, nodeid, name, outarg);
283 request_send(fc, req); 286 request_send(fc, req);
284 err = req->out.h.error; 287 err = req->out.h.error;
285 fuse_put_request(fc, req); 288 fuse_put_request(fc, req);
286 /* Zero nodeid is same as -ENOENT, but with valid timeout */ 289 /* Zero nodeid is same as -ENOENT, but with valid timeout */
287 if (!err && outarg.nodeid && 290 if (err || !outarg->nodeid)
288 (invalid_nodeid(outarg.nodeid) || 291 goto out_put_forget;
289 !fuse_valid_type(outarg.attr.mode))) 292
290 err = -EIO; 293 err = -EIO;
291 if (!err && outarg.nodeid) { 294 if (!outarg->nodeid)
292 inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation, 295 goto out_put_forget;
293 &outarg.attr, entry_attr_timeout(&outarg), 296 if (!fuse_valid_type(outarg->attr.mode))
294 attr_version); 297 goto out_put_forget;
295 if (!inode) { 298
296 fuse_send_forget(fc, forget_req, outarg.nodeid, 1); 299 *inode = fuse_iget(sb, outarg->nodeid, outarg->generation,
297 return ERR_PTR(-ENOMEM); 300 &outarg->attr, entry_attr_timeout(outarg),
298 } 301 attr_version);
302 err = -ENOMEM;
303 if (!*inode) {
304 fuse_send_forget(fc, forget_req, outarg->nodeid, 1);
305 goto out;
299 } 306 }
307 err = 0;
308
309 out_put_forget:
300 fuse_put_request(fc, forget_req); 310 fuse_put_request(fc, forget_req);
301 if (err && err != -ENOENT) 311 out:
302 return ERR_PTR(err); 312 return err;
313}
314
315static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
316 struct nameidata *nd)
317{
318 int err;
319 struct fuse_entry_out outarg;
320 struct inode *inode;
321 struct dentry *newent;
322 struct fuse_conn *fc = get_fuse_conn(dir);
323 bool outarg_valid = true;
324
325 err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name,
326 &outarg, &inode);
327 if (err == -ENOENT) {
328 outarg_valid = false;
329 err = 0;
330 }
331 if (err)
332 goto out_err;
333
334 err = -EIO;
335 if (inode && get_node_id(inode) == FUSE_ROOT_ID)
336 goto out_iput;
303 337
304 if (inode && S_ISDIR(inode->i_mode)) { 338 if (inode && S_ISDIR(inode->i_mode)) {
305 mutex_lock(&fc->inst_mutex); 339 mutex_lock(&fc->inst_mutex);
306 err = fuse_d_add_directory(entry, inode); 340 newent = fuse_d_add_directory(entry, inode);
307 mutex_unlock(&fc->inst_mutex); 341 mutex_unlock(&fc->inst_mutex);
308 if (err) { 342 err = PTR_ERR(newent);
309 iput(inode); 343 if (IS_ERR(newent))
310 return ERR_PTR(err); 344 goto out_iput;
311 } 345 } else {
312 } else 346 newent = d_splice_alias(inode, entry);
313 d_add(entry, inode); 347 }
314 348
349 entry = newent ? newent : entry;
315 entry->d_op = &fuse_dentry_operations; 350 entry->d_op = &fuse_dentry_operations;
316 if (!err) 351 if (outarg_valid)
317 fuse_change_entry_timeout(entry, &outarg); 352 fuse_change_entry_timeout(entry, &outarg);
318 else 353 else
319 fuse_invalidate_entry_cache(entry); 354 fuse_invalidate_entry_cache(entry);
320 return NULL; 355
356 return newent;
357
358 out_iput:
359 iput(inode);
360 out_err:
361 return ERR_PTR(err);
321} 362}
322 363
323/* 364/*
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 8092f0d9fd1f..67ff2c6a8f63 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1341,6 +1341,11 @@ static int fuse_setlk(struct file *file, struct file_lock *fl, int flock)
1341 pid_t pid = fl->fl_type != F_UNLCK ? current->tgid : 0; 1341 pid_t pid = fl->fl_type != F_UNLCK ? current->tgid : 0;
1342 int err; 1342 int err;
1343 1343
1344 if (fl->fl_lmops && fl->fl_lmops->fl_grant) {
1345 /* NLM needs asynchronous locks, which we don't support yet */
1346 return -ENOLCK;
1347 }
1348
1344 /* Unlock on close is handled by the flush method */ 1349 /* Unlock on close is handled by the flush method */
1345 if (fl->fl_flags & FL_CLOSE) 1350 if (fl->fl_flags & FL_CLOSE)
1346 return 0; 1351 return 0;
@@ -1365,7 +1370,9 @@ static int fuse_file_lock(struct file *file, int cmd, struct file_lock *fl)
1365 struct fuse_conn *fc = get_fuse_conn(inode); 1370 struct fuse_conn *fc = get_fuse_conn(inode);
1366 int err; 1371 int err;
1367 1372
1368 if (cmd == F_GETLK) { 1373 if (cmd == F_CANCELLK) {
1374 err = 0;
1375 } else if (cmd == F_GETLK) {
1369 if (fc->no_lock) { 1376 if (fc->no_lock) {
1370 posix_test_lock(file, fl); 1377 posix_test_lock(file, fl);
1371 err = 0; 1378 err = 0;
@@ -1373,7 +1380,7 @@ static int fuse_file_lock(struct file *file, int cmd, struct file_lock *fl)
1373 err = fuse_getlk(file, fl); 1380 err = fuse_getlk(file, fl);
1374 } else { 1381 } else {
1375 if (fc->no_lock) 1382 if (fc->no_lock)
1376 err = posix_lock_file_wait(file, fl); 1383 err = posix_lock_file(file, fl, NULL);
1377 else 1384 else
1378 err = fuse_setlk(file, fl, 0); 1385 err = fuse_setlk(file, fl, 0);
1379 } 1386 }
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index bae948657c4f..3a876076bdd1 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -363,6 +363,9 @@ struct fuse_conn {
363 /** Do not send separate SETATTR request before open(O_TRUNC) */ 363 /** Do not send separate SETATTR request before open(O_TRUNC) */
364 unsigned atomic_o_trunc : 1; 364 unsigned atomic_o_trunc : 1;
365 365
366 /** Filesystem supports NFS exporting. Only set in INIT */
367 unsigned export_support : 1;
368
366 /* 369 /*
367 * The following bitfields are only for optimization purposes 370 * The following bitfields are only for optimization purposes
368 * and hence races in setting them will not cause malfunction 371 * and hence races in setting them will not cause malfunction
@@ -464,6 +467,8 @@ static inline u64 get_node_id(struct inode *inode)
464/** Device operations */ 467/** Device operations */
465extern const struct file_operations fuse_dev_operations; 468extern const struct file_operations fuse_dev_operations;
466 469
470extern struct dentry_operations fuse_dentry_operations;
471
467/** 472/**
468 * Get a filled in inode 473 * Get a filled in inode
469 */ 474 */
@@ -471,6 +476,9 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
471 int generation, struct fuse_attr *attr, 476 int generation, struct fuse_attr *attr,
472 u64 attr_valid, u64 attr_version); 477 u64 attr_valid, u64 attr_version);
473 478
479int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
480 struct fuse_entry_out *outarg, struct inode **inode);
481
474/** 482/**
475 * Send FORGET command 483 * Send FORGET command
476 */ 484 */
@@ -604,6 +612,8 @@ void fuse_abort_conn(struct fuse_conn *fc);
604 */ 612 */
605void fuse_invalidate_attr(struct inode *inode); 613void fuse_invalidate_attr(struct inode *inode);
606 614
615void fuse_invalidate_entry_cache(struct dentry *entry);
616
607/** 617/**
608 * Acquire reference to fuse_conn 618 * Acquire reference to fuse_conn
609 */ 619 */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 3141690558c8..7d2f7d6e22e2 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -18,6 +18,7 @@
18#include <linux/statfs.h> 18#include <linux/statfs.h>
19#include <linux/random.h> 19#include <linux/random.h>
20#include <linux/sched.h> 20#include <linux/sched.h>
21#include <linux/exportfs.h>
21 22
22MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>"); 23MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
23MODULE_DESCRIPTION("Filesystem in Userspace"); 24MODULE_DESCRIPTION("Filesystem in Userspace");
@@ -552,6 +553,174 @@ static struct inode *get_root_inode(struct super_block *sb, unsigned mode)
552 return fuse_iget(sb, 1, 0, &attr, 0, 0); 553 return fuse_iget(sb, 1, 0, &attr, 0, 0);
553} 554}
554 555
556struct fuse_inode_handle
557{
558 u64 nodeid;
559 u32 generation;
560};
561
562static struct dentry *fuse_get_dentry(struct super_block *sb,
563 struct fuse_inode_handle *handle)
564{
565 struct fuse_conn *fc = get_fuse_conn_super(sb);
566 struct inode *inode;
567 struct dentry *entry;
568 int err = -ESTALE;
569
570 if (handle->nodeid == 0)
571 goto out_err;
572
573 inode = ilookup5(sb, handle->nodeid, fuse_inode_eq, &handle->nodeid);
574 if (!inode) {
575 struct fuse_entry_out outarg;
576 struct qstr name;
577
578 if (!fc->export_support)
579 goto out_err;
580
581 name.len = 1;
582 name.name = ".";
583 err = fuse_lookup_name(sb, handle->nodeid, &name, &outarg,
584 &inode);
585 if (err && err != -ENOENT)
586 goto out_err;
587 if (err || !inode) {
588 err = -ESTALE;
589 goto out_err;
590 }
591 err = -EIO;
592 if (get_node_id(inode) != handle->nodeid)
593 goto out_iput;
594 }
595 err = -ESTALE;
596 if (inode->i_generation != handle->generation)
597 goto out_iput;
598
599 entry = d_alloc_anon(inode);
600 err = -ENOMEM;
601 if (!entry)
602 goto out_iput;
603
604 if (get_node_id(inode) != FUSE_ROOT_ID) {
605 entry->d_op = &fuse_dentry_operations;
606 fuse_invalidate_entry_cache(entry);
607 }
608
609 return entry;
610
611 out_iput:
612 iput(inode);
613 out_err:
614 return ERR_PTR(err);
615}
616
617static int fuse_encode_fh(struct dentry *dentry, u32 *fh, int *max_len,
618 int connectable)
619{
620 struct inode *inode = dentry->d_inode;
621 bool encode_parent = connectable && !S_ISDIR(inode->i_mode);
622 int len = encode_parent ? 6 : 3;
623 u64 nodeid;
624 u32 generation;
625
626 if (*max_len < len)
627 return 255;
628
629 nodeid = get_fuse_inode(inode)->nodeid;
630 generation = inode->i_generation;
631
632 fh[0] = (u32)(nodeid >> 32);
633 fh[1] = (u32)(nodeid & 0xffffffff);
634 fh[2] = generation;
635
636 if (encode_parent) {
637 struct inode *parent;
638
639 spin_lock(&dentry->d_lock);
640 parent = dentry->d_parent->d_inode;
641 nodeid = get_fuse_inode(parent)->nodeid;
642 generation = parent->i_generation;
643 spin_unlock(&dentry->d_lock);
644
645 fh[3] = (u32)(nodeid >> 32);
646 fh[4] = (u32)(nodeid & 0xffffffff);
647 fh[5] = generation;
648 }
649
650 *max_len = len;
651 return encode_parent ? 0x82 : 0x81;
652}
653
654static struct dentry *fuse_fh_to_dentry(struct super_block *sb,
655 struct fid *fid, int fh_len, int fh_type)
656{
657 struct fuse_inode_handle handle;
658
659 if ((fh_type != 0x81 && fh_type != 0x82) || fh_len < 3)
660 return NULL;
661
662 handle.nodeid = (u64) fid->raw[0] << 32;
663 handle.nodeid |= (u64) fid->raw[1];
664 handle.generation = fid->raw[2];
665 return fuse_get_dentry(sb, &handle);
666}
667
668static struct dentry *fuse_fh_to_parent(struct super_block *sb,
669 struct fid *fid, int fh_len, int fh_type)
670{
671 struct fuse_inode_handle parent;
672
673 if (fh_type != 0x82 || fh_len < 6)
674 return NULL;
675
676 parent.nodeid = (u64) fid->raw[3] << 32;
677 parent.nodeid |= (u64) fid->raw[4];
678 parent.generation = fid->raw[5];
679 return fuse_get_dentry(sb, &parent);
680}
681
682static struct dentry *fuse_get_parent(struct dentry *child)
683{
684 struct inode *child_inode = child->d_inode;
685 struct fuse_conn *fc = get_fuse_conn(child_inode);
686 struct inode *inode;
687 struct dentry *parent;
688 struct fuse_entry_out outarg;
689 struct qstr name;
690 int err;
691
692 if (!fc->export_support)
693 return ERR_PTR(-ESTALE);
694
695 name.len = 2;
696 name.name = "..";
697 err = fuse_lookup_name(child_inode->i_sb, get_node_id(child_inode),
698 &name, &outarg, &inode);
699 if (err && err != -ENOENT)
700 return ERR_PTR(err);
701 if (err || !inode)
702 return ERR_PTR(-ESTALE);
703
704 parent = d_alloc_anon(inode);
705 if (!parent) {
706 iput(inode);
707 return ERR_PTR(-ENOMEM);
708 }
709 if (get_node_id(inode) != FUSE_ROOT_ID) {
710 parent->d_op = &fuse_dentry_operations;
711 fuse_invalidate_entry_cache(parent);
712 }
713
714 return parent;
715}
716
717static const struct export_operations fuse_export_operations = {
718 .fh_to_dentry = fuse_fh_to_dentry,
719 .fh_to_parent = fuse_fh_to_parent,
720 .encode_fh = fuse_encode_fh,
721 .get_parent = fuse_get_parent,
722};
723
555static const struct super_operations fuse_super_operations = { 724static const struct super_operations fuse_super_operations = {
556 .alloc_inode = fuse_alloc_inode, 725 .alloc_inode = fuse_alloc_inode,
557 .destroy_inode = fuse_destroy_inode, 726 .destroy_inode = fuse_destroy_inode,
@@ -581,6 +750,11 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
581 fc->no_lock = 1; 750 fc->no_lock = 1;
582 if (arg->flags & FUSE_ATOMIC_O_TRUNC) 751 if (arg->flags & FUSE_ATOMIC_O_TRUNC)
583 fc->atomic_o_trunc = 1; 752 fc->atomic_o_trunc = 1;
753 if (arg->minor >= 9) {
754 /* LOOKUP has dependency on proto version */
755 if (arg->flags & FUSE_EXPORT_SUPPORT)
756 fc->export_support = 1;
757 }
584 if (arg->flags & FUSE_BIG_WRITES) 758 if (arg->flags & FUSE_BIG_WRITES)
585 fc->big_writes = 1; 759 fc->big_writes = 1;
586 } else { 760 } else {
@@ -607,7 +781,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
607 arg->minor = FUSE_KERNEL_MINOR_VERSION; 781 arg->minor = FUSE_KERNEL_MINOR_VERSION;
608 arg->max_readahead = fc->bdi.ra_pages * PAGE_CACHE_SIZE; 782 arg->max_readahead = fc->bdi.ra_pages * PAGE_CACHE_SIZE;
609 arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC | 783 arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC |
610 FUSE_BIG_WRITES; 784 FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES;
611 req->in.h.opcode = FUSE_INIT; 785 req->in.h.opcode = FUSE_INIT;
612 req->in.numargs = 1; 786 req->in.numargs = 1;
613 req->in.args[0].size = sizeof(*arg); 787 req->in.args[0].size = sizeof(*arg);
@@ -652,6 +826,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
652 sb->s_magic = FUSE_SUPER_MAGIC; 826 sb->s_magic = FUSE_SUPER_MAGIC;
653 sb->s_op = &fuse_super_operations; 827 sb->s_op = &fuse_super_operations;
654 sb->s_maxbytes = MAX_LFS_FILESIZE; 828 sb->s_maxbytes = MAX_LFS_FILESIZE;
829 sb->s_export_op = &fuse_export_operations;
655 830
656 file = fget(d.fd); 831 file = fget(d.fd);
657 if (!file) 832 if (!file)
diff --git a/fs/hfs/bitmap.c b/fs/hfs/bitmap.c
index 24e75798ddf0..c6e97366e8ac 100644
--- a/fs/hfs/bitmap.c
+++ b/fs/hfs/bitmap.c
@@ -145,7 +145,7 @@ u32 hfs_vbm_search_free(struct super_block *sb, u32 goal, u32 *num_bits)
145 if (!*num_bits) 145 if (!*num_bits)
146 return 0; 146 return 0;
147 147
148 down(&HFS_SB(sb)->bitmap_lock); 148 mutex_lock(&HFS_SB(sb)->bitmap_lock);
149 bitmap = HFS_SB(sb)->bitmap; 149 bitmap = HFS_SB(sb)->bitmap;
150 150
151 pos = hfs_find_set_zero_bits(bitmap, HFS_SB(sb)->fs_ablocks, goal, num_bits); 151 pos = hfs_find_set_zero_bits(bitmap, HFS_SB(sb)->fs_ablocks, goal, num_bits);
@@ -162,7 +162,7 @@ u32 hfs_vbm_search_free(struct super_block *sb, u32 goal, u32 *num_bits)
162 HFS_SB(sb)->free_ablocks -= *num_bits; 162 HFS_SB(sb)->free_ablocks -= *num_bits;
163 hfs_bitmap_dirty(sb); 163 hfs_bitmap_dirty(sb);
164out: 164out:
165 up(&HFS_SB(sb)->bitmap_lock); 165 mutex_unlock(&HFS_SB(sb)->bitmap_lock);
166 return pos; 166 return pos;
167} 167}
168 168
@@ -205,7 +205,7 @@ int hfs_clear_vbm_bits(struct super_block *sb, u16 start, u16 count)
205 if ((start + count) > HFS_SB(sb)->fs_ablocks) 205 if ((start + count) > HFS_SB(sb)->fs_ablocks)
206 return -2; 206 return -2;
207 207
208 down(&HFS_SB(sb)->bitmap_lock); 208 mutex_lock(&HFS_SB(sb)->bitmap_lock);
209 /* bitmap is always on a 32-bit boundary */ 209 /* bitmap is always on a 32-bit boundary */
210 curr = HFS_SB(sb)->bitmap + (start / 32); 210 curr = HFS_SB(sb)->bitmap + (start / 32);
211 len = count; 211 len = count;
@@ -236,7 +236,7 @@ int hfs_clear_vbm_bits(struct super_block *sb, u16 start, u16 count)
236 } 236 }
237out: 237out:
238 HFS_SB(sb)->free_ablocks += len; 238 HFS_SB(sb)->free_ablocks += len;
239 up(&HFS_SB(sb)->bitmap_lock); 239 mutex_unlock(&HFS_SB(sb)->bitmap_lock);
240 hfs_bitmap_dirty(sb); 240 hfs_bitmap_dirty(sb);
241 241
242 return 0; 242 return 0;
diff --git a/fs/hfs/btree.c b/fs/hfs/btree.c
index f6621a785202..9b9d6395bad3 100644
--- a/fs/hfs/btree.c
+++ b/fs/hfs/btree.c
@@ -40,7 +40,7 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id, btree_keycmp ke
40 { 40 {
41 struct hfs_mdb *mdb = HFS_SB(sb)->mdb; 41 struct hfs_mdb *mdb = HFS_SB(sb)->mdb;
42 HFS_I(tree->inode)->flags = 0; 42 HFS_I(tree->inode)->flags = 0;
43 init_MUTEX(&HFS_I(tree->inode)->extents_lock); 43 mutex_init(&HFS_I(tree->inode)->extents_lock);
44 switch (id) { 44 switch (id) {
45 case HFS_EXT_CNID: 45 case HFS_EXT_CNID:
46 hfs_inode_read_fork(tree->inode, mdb->drXTExtRec, mdb->drXTFlSize, 46 hfs_inode_read_fork(tree->inode, mdb->drXTExtRec, mdb->drXTFlSize,
diff --git a/fs/hfs/extent.c b/fs/hfs/extent.c
index c176f67ba0a5..2c16316d2917 100644
--- a/fs/hfs/extent.c
+++ b/fs/hfs/extent.c
@@ -343,16 +343,16 @@ int hfs_get_block(struct inode *inode, sector_t block,
343 goto done; 343 goto done;
344 } 344 }
345 345
346 down(&HFS_I(inode)->extents_lock); 346 mutex_lock(&HFS_I(inode)->extents_lock);
347 res = hfs_ext_read_extent(inode, ablock); 347 res = hfs_ext_read_extent(inode, ablock);
348 if (!res) 348 if (!res)
349 dblock = hfs_ext_find_block(HFS_I(inode)->cached_extents, 349 dblock = hfs_ext_find_block(HFS_I(inode)->cached_extents,
350 ablock - HFS_I(inode)->cached_start); 350 ablock - HFS_I(inode)->cached_start);
351 else { 351 else {
352 up(&HFS_I(inode)->extents_lock); 352 mutex_unlock(&HFS_I(inode)->extents_lock);
353 return -EIO; 353 return -EIO;
354 } 354 }
355 up(&HFS_I(inode)->extents_lock); 355 mutex_unlock(&HFS_I(inode)->extents_lock);
356 356
357done: 357done:
358 map_bh(bh_result, sb, HFS_SB(sb)->fs_start + 358 map_bh(bh_result, sb, HFS_SB(sb)->fs_start +
@@ -375,7 +375,7 @@ int hfs_extend_file(struct inode *inode)
375 u32 start, len, goal; 375 u32 start, len, goal;
376 int res; 376 int res;
377 377
378 down(&HFS_I(inode)->extents_lock); 378 mutex_lock(&HFS_I(inode)->extents_lock);
379 if (HFS_I(inode)->alloc_blocks == HFS_I(inode)->first_blocks) 379 if (HFS_I(inode)->alloc_blocks == HFS_I(inode)->first_blocks)
380 goal = hfs_ext_lastblock(HFS_I(inode)->first_extents); 380 goal = hfs_ext_lastblock(HFS_I(inode)->first_extents);
381 else { 381 else {
@@ -425,7 +425,7 @@ int hfs_extend_file(struct inode *inode)
425 goto insert_extent; 425 goto insert_extent;
426 } 426 }
427out: 427out:
428 up(&HFS_I(inode)->extents_lock); 428 mutex_unlock(&HFS_I(inode)->extents_lock);
429 if (!res) { 429 if (!res) {
430 HFS_I(inode)->alloc_blocks += len; 430 HFS_I(inode)->alloc_blocks += len;
431 mark_inode_dirty(inode); 431 mark_inode_dirty(inode);
@@ -487,7 +487,7 @@ void hfs_file_truncate(struct inode *inode)
487 if (blk_cnt == alloc_cnt) 487 if (blk_cnt == alloc_cnt)
488 goto out; 488 goto out;
489 489
490 down(&HFS_I(inode)->extents_lock); 490 mutex_lock(&HFS_I(inode)->extents_lock);
491 hfs_find_init(HFS_SB(sb)->ext_tree, &fd); 491 hfs_find_init(HFS_SB(sb)->ext_tree, &fd);
492 while (1) { 492 while (1) {
493 if (alloc_cnt == HFS_I(inode)->first_blocks) { 493 if (alloc_cnt == HFS_I(inode)->first_blocks) {
@@ -514,7 +514,7 @@ void hfs_file_truncate(struct inode *inode)
514 hfs_brec_remove(&fd); 514 hfs_brec_remove(&fd);
515 } 515 }
516 hfs_find_exit(&fd); 516 hfs_find_exit(&fd);
517 up(&HFS_I(inode)->extents_lock); 517 mutex_unlock(&HFS_I(inode)->extents_lock);
518 518
519 HFS_I(inode)->alloc_blocks = blk_cnt; 519 HFS_I(inode)->alloc_blocks = blk_cnt;
520out: 520out:
diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h
index 147374b6f675..9955232fdf8c 100644
--- a/fs/hfs/hfs_fs.h
+++ b/fs/hfs/hfs_fs.h
@@ -11,6 +11,7 @@
11 11
12#include <linux/slab.h> 12#include <linux/slab.h>
13#include <linux/types.h> 13#include <linux/types.h>
14#include <linux/mutex.h>
14#include <linux/buffer_head.h> 15#include <linux/buffer_head.h>
15#include <linux/fs.h> 16#include <linux/fs.h>
16 17
@@ -53,7 +54,7 @@ struct hfs_inode_info {
53 struct list_head open_dir_list; 54 struct list_head open_dir_list;
54 struct inode *rsrc_inode; 55 struct inode *rsrc_inode;
55 56
56 struct semaphore extents_lock; 57 struct mutex extents_lock;
57 58
58 u16 alloc_blocks, clump_blocks; 59 u16 alloc_blocks, clump_blocks;
59 sector_t fs_blocks; 60 sector_t fs_blocks;
@@ -139,7 +140,7 @@ struct hfs_sb_info {
139 140
140 struct nls_table *nls_io, *nls_disk; 141 struct nls_table *nls_io, *nls_disk;
141 142
142 struct semaphore bitmap_lock; 143 struct mutex bitmap_lock;
143 144
144 unsigned long flags; 145 unsigned long flags;
145 146
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 97f8446c4ff4..dc4ec640e875 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -150,7 +150,7 @@ struct inode *hfs_new_inode(struct inode *dir, struct qstr *name, int mode)
150 if (!inode) 150 if (!inode)
151 return NULL; 151 return NULL;
152 152
153 init_MUTEX(&HFS_I(inode)->extents_lock); 153 mutex_init(&HFS_I(inode)->extents_lock);
154 INIT_LIST_HEAD(&HFS_I(inode)->open_dir_list); 154 INIT_LIST_HEAD(&HFS_I(inode)->open_dir_list);
155 hfs_cat_build_key(sb, (btree_key *)&HFS_I(inode)->cat_key, dir->i_ino, name); 155 hfs_cat_build_key(sb, (btree_key *)&HFS_I(inode)->cat_key, dir->i_ino, name);
156 inode->i_ino = HFS_SB(sb)->next_id++; 156 inode->i_ino = HFS_SB(sb)->next_id++;
@@ -281,7 +281,7 @@ static int hfs_read_inode(struct inode *inode, void *data)
281 281
282 HFS_I(inode)->flags = 0; 282 HFS_I(inode)->flags = 0;
283 HFS_I(inode)->rsrc_inode = NULL; 283 HFS_I(inode)->rsrc_inode = NULL;
284 init_MUTEX(&HFS_I(inode)->extents_lock); 284 mutex_init(&HFS_I(inode)->extents_lock);
285 INIT_LIST_HEAD(&HFS_I(inode)->open_dir_list); 285 INIT_LIST_HEAD(&HFS_I(inode)->open_dir_list);
286 286
287 /* Initialize the inode */ 287 /* Initialize the inode */
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 8cf67974adf6..ac2ec5ef66e4 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -372,7 +372,7 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent)
372 372
373 sb->s_op = &hfs_super_operations; 373 sb->s_op = &hfs_super_operations;
374 sb->s_flags |= MS_NODIRATIME; 374 sb->s_flags |= MS_NODIRATIME;
375 init_MUTEX(&sbi->bitmap_lock); 375 mutex_init(&sbi->bitmap_lock);
376 376
377 res = hfs_mdb_get(sb); 377 res = hfs_mdb_get(sb);
378 if (res) { 378 if (res) {
diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c
index 12e899cd7886..fec8f61227ff 100644
--- a/fs/hfsplus/extents.c
+++ b/fs/hfsplus/extents.c
@@ -199,16 +199,16 @@ int hfsplus_get_block(struct inode *inode, sector_t iblock,
199 goto done; 199 goto done;
200 } 200 }
201 201
202 down(&HFSPLUS_I(inode).extents_lock); 202 mutex_lock(&HFSPLUS_I(inode).extents_lock);
203 res = hfsplus_ext_read_extent(inode, ablock); 203 res = hfsplus_ext_read_extent(inode, ablock);
204 if (!res) { 204 if (!res) {
205 dblock = hfsplus_ext_find_block(HFSPLUS_I(inode).cached_extents, ablock - 205 dblock = hfsplus_ext_find_block(HFSPLUS_I(inode).cached_extents, ablock -
206 HFSPLUS_I(inode).cached_start); 206 HFSPLUS_I(inode).cached_start);
207 } else { 207 } else {
208 up(&HFSPLUS_I(inode).extents_lock); 208 mutex_unlock(&HFSPLUS_I(inode).extents_lock);
209 return -EIO; 209 return -EIO;
210 } 210 }
211 up(&HFSPLUS_I(inode).extents_lock); 211 mutex_unlock(&HFSPLUS_I(inode).extents_lock);
212 212
213done: 213done:
214 dprint(DBG_EXTENT, "get_block(%lu): %llu - %u\n", inode->i_ino, (long long)iblock, dblock); 214 dprint(DBG_EXTENT, "get_block(%lu): %llu - %u\n", inode->i_ino, (long long)iblock, dblock);
@@ -355,7 +355,7 @@ int hfsplus_file_extend(struct inode *inode)
355 return -ENOSPC; 355 return -ENOSPC;
356 } 356 }
357 357
358 down(&HFSPLUS_I(inode).extents_lock); 358 mutex_lock(&HFSPLUS_I(inode).extents_lock);
359 if (HFSPLUS_I(inode).alloc_blocks == HFSPLUS_I(inode).first_blocks) 359 if (HFSPLUS_I(inode).alloc_blocks == HFSPLUS_I(inode).first_blocks)
360 goal = hfsplus_ext_lastblock(HFSPLUS_I(inode).first_extents); 360 goal = hfsplus_ext_lastblock(HFSPLUS_I(inode).first_extents);
361 else { 361 else {
@@ -408,7 +408,7 @@ int hfsplus_file_extend(struct inode *inode)
408 goto insert_extent; 408 goto insert_extent;
409 } 409 }
410out: 410out:
411 up(&HFSPLUS_I(inode).extents_lock); 411 mutex_unlock(&HFSPLUS_I(inode).extents_lock);
412 if (!res) { 412 if (!res) {
413 HFSPLUS_I(inode).alloc_blocks += len; 413 HFSPLUS_I(inode).alloc_blocks += len;
414 mark_inode_dirty(inode); 414 mark_inode_dirty(inode);
@@ -465,7 +465,7 @@ void hfsplus_file_truncate(struct inode *inode)
465 if (blk_cnt == alloc_cnt) 465 if (blk_cnt == alloc_cnt)
466 goto out; 466 goto out;
467 467
468 down(&HFSPLUS_I(inode).extents_lock); 468 mutex_lock(&HFSPLUS_I(inode).extents_lock);
469 hfs_find_init(HFSPLUS_SB(sb).ext_tree, &fd); 469 hfs_find_init(HFSPLUS_SB(sb).ext_tree, &fd);
470 while (1) { 470 while (1) {
471 if (alloc_cnt == HFSPLUS_I(inode).first_blocks) { 471 if (alloc_cnt == HFSPLUS_I(inode).first_blocks) {
@@ -492,7 +492,7 @@ void hfsplus_file_truncate(struct inode *inode)
492 hfs_brec_remove(&fd); 492 hfs_brec_remove(&fd);
493 } 493 }
494 hfs_find_exit(&fd); 494 hfs_find_exit(&fd);
495 up(&HFSPLUS_I(inode).extents_lock); 495 mutex_unlock(&HFSPLUS_I(inode).extents_lock);
496 496
497 HFSPLUS_I(inode).alloc_blocks = blk_cnt; 497 HFSPLUS_I(inode).alloc_blocks = blk_cnt;
498out: 498out:
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index 9e59537b43d5..f027a905225f 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -11,6 +11,7 @@
11#define _LINUX_HFSPLUS_FS_H 11#define _LINUX_HFSPLUS_FS_H
12 12
13#include <linux/fs.h> 13#include <linux/fs.h>
14#include <linux/mutex.h>
14#include <linux/buffer_head.h> 15#include <linux/buffer_head.h>
15#include "hfsplus_raw.h" 16#include "hfsplus_raw.h"
16 17
@@ -154,7 +155,7 @@ struct hfsplus_sb_info {
154 155
155 156
156struct hfsplus_inode_info { 157struct hfsplus_inode_info {
157 struct semaphore extents_lock; 158 struct mutex extents_lock;
158 u32 clump_blocks, alloc_blocks; 159 u32 clump_blocks, alloc_blocks;
159 sector_t fs_blocks; 160 sector_t fs_blocks;
160 /* Allocation extents from catalog record or volume header */ 161 /* Allocation extents from catalog record or volume header */
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 67e1c8b467c4..cc3b5e24339b 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -163,7 +163,7 @@ static struct dentry *hfsplus_file_lookup(struct inode *dir, struct dentry *dent
163 163
164 inode->i_ino = dir->i_ino; 164 inode->i_ino = dir->i_ino;
165 INIT_LIST_HEAD(&HFSPLUS_I(inode).open_dir_list); 165 INIT_LIST_HEAD(&HFSPLUS_I(inode).open_dir_list);
166 init_MUTEX(&HFSPLUS_I(inode).extents_lock); 166 mutex_init(&HFSPLUS_I(inode).extents_lock);
167 HFSPLUS_I(inode).flags = HFSPLUS_FLG_RSRC; 167 HFSPLUS_I(inode).flags = HFSPLUS_FLG_RSRC;
168 168
169 hfs_find_init(HFSPLUS_SB(sb).cat_tree, &fd); 169 hfs_find_init(HFSPLUS_SB(sb).cat_tree, &fd);
@@ -316,7 +316,7 @@ struct inode *hfsplus_new_inode(struct super_block *sb, int mode)
316 inode->i_nlink = 1; 316 inode->i_nlink = 1;
317 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; 317 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
318 INIT_LIST_HEAD(&HFSPLUS_I(inode).open_dir_list); 318 INIT_LIST_HEAD(&HFSPLUS_I(inode).open_dir_list);
319 init_MUTEX(&HFSPLUS_I(inode).extents_lock); 319 mutex_init(&HFSPLUS_I(inode).extents_lock);
320 atomic_set(&HFSPLUS_I(inode).opencnt, 0); 320 atomic_set(&HFSPLUS_I(inode).opencnt, 0);
321 HFSPLUS_I(inode).flags = 0; 321 HFSPLUS_I(inode).flags = 0;
322 memset(HFSPLUS_I(inode).first_extents, 0, sizeof(hfsplus_extent_rec)); 322 memset(HFSPLUS_I(inode).first_extents, 0, sizeof(hfsplus_extent_rec));
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index ce97a54518d8..3859118531c7 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -34,7 +34,7 @@ struct inode *hfsplus_iget(struct super_block *sb, unsigned long ino)
34 return inode; 34 return inode;
35 35
36 INIT_LIST_HEAD(&HFSPLUS_I(inode).open_dir_list); 36 INIT_LIST_HEAD(&HFSPLUS_I(inode).open_dir_list);
37 init_MUTEX(&HFSPLUS_I(inode).extents_lock); 37 mutex_init(&HFSPLUS_I(inode).extents_lock);
38 HFSPLUS_I(inode).flags = 0; 38 HFSPLUS_I(inode).flags = 0;
39 HFSPLUS_I(inode).rsrc_inode = NULL; 39 HFSPLUS_I(inode).rsrc_inode = NULL;
40 atomic_set(&HFSPLUS_I(inode).opencnt, 0); 40 atomic_set(&HFSPLUS_I(inode).opencnt, 0);
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index aeabf80f81a5..dbd01d262ca4 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -53,6 +53,7 @@ int sysctl_hugetlb_shm_group;
53enum { 53enum {
54 Opt_size, Opt_nr_inodes, 54 Opt_size, Opt_nr_inodes,
55 Opt_mode, Opt_uid, Opt_gid, 55 Opt_mode, Opt_uid, Opt_gid,
56 Opt_pagesize,
56 Opt_err, 57 Opt_err,
57}; 58};
58 59
@@ -62,6 +63,7 @@ static match_table_t tokens = {
62 {Opt_mode, "mode=%o"}, 63 {Opt_mode, "mode=%o"},
63 {Opt_uid, "uid=%u"}, 64 {Opt_uid, "uid=%u"},
64 {Opt_gid, "gid=%u"}, 65 {Opt_gid, "gid=%u"},
66 {Opt_pagesize, "pagesize=%s"},
65 {Opt_err, NULL}, 67 {Opt_err, NULL},
66}; 68};
67 69
@@ -80,6 +82,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
80 struct inode *inode = file->f_path.dentry->d_inode; 82 struct inode *inode = file->f_path.dentry->d_inode;
81 loff_t len, vma_len; 83 loff_t len, vma_len;
82 int ret; 84 int ret;
85 struct hstate *h = hstate_file(file);
83 86
84 /* 87 /*
85 * vma address alignment (but not the pgoff alignment) has 88 * vma address alignment (but not the pgoff alignment) has
@@ -92,7 +95,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
92 vma->vm_flags |= VM_HUGETLB | VM_RESERVED; 95 vma->vm_flags |= VM_HUGETLB | VM_RESERVED;
93 vma->vm_ops = &hugetlb_vm_ops; 96 vma->vm_ops = &hugetlb_vm_ops;
94 97
95 if (vma->vm_pgoff & ~(HPAGE_MASK >> PAGE_SHIFT)) 98 if (vma->vm_pgoff & ~(huge_page_mask(h) >> PAGE_SHIFT))
96 return -EINVAL; 99 return -EINVAL;
97 100
98 vma_len = (loff_t)(vma->vm_end - vma->vm_start); 101 vma_len = (loff_t)(vma->vm_end - vma->vm_start);
@@ -103,9 +106,9 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
103 ret = -ENOMEM; 106 ret = -ENOMEM;
104 len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); 107 len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
105 108
106 if (vma->vm_flags & VM_MAYSHARE && 109 if (hugetlb_reserve_pages(inode,
107 hugetlb_reserve_pages(inode, vma->vm_pgoff >> (HPAGE_SHIFT-PAGE_SHIFT), 110 vma->vm_pgoff >> huge_page_order(h),
108 len >> HPAGE_SHIFT)) 111 len >> huge_page_shift(h), vma))
109 goto out; 112 goto out;
110 113
111 ret = 0; 114 ret = 0;
@@ -130,20 +133,21 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
130 struct mm_struct *mm = current->mm; 133 struct mm_struct *mm = current->mm;
131 struct vm_area_struct *vma; 134 struct vm_area_struct *vma;
132 unsigned long start_addr; 135 unsigned long start_addr;
136 struct hstate *h = hstate_file(file);
133 137
134 if (len & ~HPAGE_MASK) 138 if (len & ~huge_page_mask(h))
135 return -EINVAL; 139 return -EINVAL;
136 if (len > TASK_SIZE) 140 if (len > TASK_SIZE)
137 return -ENOMEM; 141 return -ENOMEM;
138 142
139 if (flags & MAP_FIXED) { 143 if (flags & MAP_FIXED) {
140 if (prepare_hugepage_range(addr, len)) 144 if (prepare_hugepage_range(file, addr, len))
141 return -EINVAL; 145 return -EINVAL;
142 return addr; 146 return addr;
143 } 147 }
144 148
145 if (addr) { 149 if (addr) {
146 addr = ALIGN(addr, HPAGE_SIZE); 150 addr = ALIGN(addr, huge_page_size(h));
147 vma = find_vma(mm, addr); 151 vma = find_vma(mm, addr);
148 if (TASK_SIZE - len >= addr && 152 if (TASK_SIZE - len >= addr &&
149 (!vma || addr + len <= vma->vm_start)) 153 (!vma || addr + len <= vma->vm_start))
@@ -156,7 +160,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
156 start_addr = TASK_UNMAPPED_BASE; 160 start_addr = TASK_UNMAPPED_BASE;
157 161
158full_search: 162full_search:
159 addr = ALIGN(start_addr, HPAGE_SIZE); 163 addr = ALIGN(start_addr, huge_page_size(h));
160 164
161 for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { 165 for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
162 /* At this point: (!vma || addr < vma->vm_end). */ 166 /* At this point: (!vma || addr < vma->vm_end). */
@@ -174,7 +178,7 @@ full_search:
174 178
175 if (!vma || addr + len <= vma->vm_start) 179 if (!vma || addr + len <= vma->vm_start)
176 return addr; 180 return addr;
177 addr = ALIGN(vma->vm_end, HPAGE_SIZE); 181 addr = ALIGN(vma->vm_end, huge_page_size(h));
178 } 182 }
179} 183}
180#endif 184#endif
@@ -225,10 +229,11 @@ hugetlbfs_read_actor(struct page *page, unsigned long offset,
225static ssize_t hugetlbfs_read(struct file *filp, char __user *buf, 229static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
226 size_t len, loff_t *ppos) 230 size_t len, loff_t *ppos)
227{ 231{
232 struct hstate *h = hstate_file(filp);
228 struct address_space *mapping = filp->f_mapping; 233 struct address_space *mapping = filp->f_mapping;
229 struct inode *inode = mapping->host; 234 struct inode *inode = mapping->host;
230 unsigned long index = *ppos >> HPAGE_SHIFT; 235 unsigned long index = *ppos >> huge_page_shift(h);
231 unsigned long offset = *ppos & ~HPAGE_MASK; 236 unsigned long offset = *ppos & ~huge_page_mask(h);
232 unsigned long end_index; 237 unsigned long end_index;
233 loff_t isize; 238 loff_t isize;
234 ssize_t retval = 0; 239 ssize_t retval = 0;
@@ -243,17 +248,17 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
243 if (!isize) 248 if (!isize)
244 goto out; 249 goto out;
245 250
246 end_index = (isize - 1) >> HPAGE_SHIFT; 251 end_index = (isize - 1) >> huge_page_shift(h);
247 for (;;) { 252 for (;;) {
248 struct page *page; 253 struct page *page;
249 int nr, ret; 254 unsigned long nr, ret;
250 255
251 /* nr is the maximum number of bytes to copy from this page */ 256 /* nr is the maximum number of bytes to copy from this page */
252 nr = HPAGE_SIZE; 257 nr = huge_page_size(h);
253 if (index >= end_index) { 258 if (index >= end_index) {
254 if (index > end_index) 259 if (index > end_index)
255 goto out; 260 goto out;
256 nr = ((isize - 1) & ~HPAGE_MASK) + 1; 261 nr = ((isize - 1) & ~huge_page_mask(h)) + 1;
257 if (nr <= offset) { 262 if (nr <= offset) {
258 goto out; 263 goto out;
259 } 264 }
@@ -287,8 +292,8 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
287 offset += ret; 292 offset += ret;
288 retval += ret; 293 retval += ret;
289 len -= ret; 294 len -= ret;
290 index += offset >> HPAGE_SHIFT; 295 index += offset >> huge_page_shift(h);
291 offset &= ~HPAGE_MASK; 296 offset &= ~huge_page_mask(h);
292 297
293 if (page) 298 if (page)
294 page_cache_release(page); 299 page_cache_release(page);
@@ -298,7 +303,7 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
298 break; 303 break;
299 } 304 }
300out: 305out:
301 *ppos = ((loff_t)index << HPAGE_SHIFT) + offset; 306 *ppos = ((loff_t)index << huge_page_shift(h)) + offset;
302 mutex_unlock(&inode->i_mutex); 307 mutex_unlock(&inode->i_mutex);
303 return retval; 308 return retval;
304} 309}
@@ -339,8 +344,9 @@ static void truncate_huge_page(struct page *page)
339 344
340static void truncate_hugepages(struct inode *inode, loff_t lstart) 345static void truncate_hugepages(struct inode *inode, loff_t lstart)
341{ 346{
347 struct hstate *h = hstate_inode(inode);
342 struct address_space *mapping = &inode->i_data; 348 struct address_space *mapping = &inode->i_data;
343 const pgoff_t start = lstart >> HPAGE_SHIFT; 349 const pgoff_t start = lstart >> huge_page_shift(h);
344 struct pagevec pvec; 350 struct pagevec pvec;
345 pgoff_t next; 351 pgoff_t next;
346 int i, freed = 0; 352 int i, freed = 0;
@@ -441,7 +447,7 @@ hugetlb_vmtruncate_list(struct prio_tree_root *root, pgoff_t pgoff)
441 v_offset = 0; 447 v_offset = 0;
442 448
443 __unmap_hugepage_range(vma, 449 __unmap_hugepage_range(vma,
444 vma->vm_start + v_offset, vma->vm_end); 450 vma->vm_start + v_offset, vma->vm_end, NULL);
445 } 451 }
446} 452}
447 453
@@ -449,8 +455,9 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset)
449{ 455{
450 pgoff_t pgoff; 456 pgoff_t pgoff;
451 struct address_space *mapping = inode->i_mapping; 457 struct address_space *mapping = inode->i_mapping;
458 struct hstate *h = hstate_inode(inode);
452 459
453 BUG_ON(offset & ~HPAGE_MASK); 460 BUG_ON(offset & ~huge_page_mask(h));
454 pgoff = offset >> PAGE_SHIFT; 461 pgoff = offset >> PAGE_SHIFT;
455 462
456 i_size_write(inode, offset); 463 i_size_write(inode, offset);
@@ -465,6 +472,7 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset)
465static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr) 472static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr)
466{ 473{
467 struct inode *inode = dentry->d_inode; 474 struct inode *inode = dentry->d_inode;
475 struct hstate *h = hstate_inode(inode);
468 int error; 476 int error;
469 unsigned int ia_valid = attr->ia_valid; 477 unsigned int ia_valid = attr->ia_valid;
470 478
@@ -476,7 +484,7 @@ static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr)
476 484
477 if (ia_valid & ATTR_SIZE) { 485 if (ia_valid & ATTR_SIZE) {
478 error = -EINVAL; 486 error = -EINVAL;
479 if (!(attr->ia_size & ~HPAGE_MASK)) 487 if (!(attr->ia_size & ~huge_page_mask(h)))
480 error = hugetlb_vmtruncate(inode, attr->ia_size); 488 error = hugetlb_vmtruncate(inode, attr->ia_size);
481 if (error) 489 if (error)
482 goto out; 490 goto out;
@@ -610,9 +618,10 @@ static int hugetlbfs_set_page_dirty(struct page *page)
610static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf) 618static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf)
611{ 619{
612 struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(dentry->d_sb); 620 struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(dentry->d_sb);
621 struct hstate *h = hstate_inode(dentry->d_inode);
613 622
614 buf->f_type = HUGETLBFS_MAGIC; 623 buf->f_type = HUGETLBFS_MAGIC;
615 buf->f_bsize = HPAGE_SIZE; 624 buf->f_bsize = huge_page_size(h);
616 if (sbinfo) { 625 if (sbinfo) {
617 spin_lock(&sbinfo->stat_lock); 626 spin_lock(&sbinfo->stat_lock);
618 /* If no limits set, just report 0 for max/free/used 627 /* If no limits set, just report 0 for max/free/used
@@ -743,6 +752,8 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig)
743 char *p, *rest; 752 char *p, *rest;
744 substring_t args[MAX_OPT_ARGS]; 753 substring_t args[MAX_OPT_ARGS];
745 int option; 754 int option;
755 unsigned long long size = 0;
756 enum { NO_SIZE, SIZE_STD, SIZE_PERCENT } setsize = NO_SIZE;
746 757
747 if (!options) 758 if (!options)
748 return 0; 759 return 0;
@@ -773,17 +784,13 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig)
773 break; 784 break;
774 785
775 case Opt_size: { 786 case Opt_size: {
776 unsigned long long size;
777 /* memparse() will accept a K/M/G without a digit */ 787 /* memparse() will accept a K/M/G without a digit */
778 if (!isdigit(*args[0].from)) 788 if (!isdigit(*args[0].from))
779 goto bad_val; 789 goto bad_val;
780 size = memparse(args[0].from, &rest); 790 size = memparse(args[0].from, &rest);
781 if (*rest == '%') { 791 setsize = SIZE_STD;
782 size <<= HPAGE_SHIFT; 792 if (*rest == '%')
783 size *= max_huge_pages; 793 setsize = SIZE_PERCENT;
784 do_div(size, 100);
785 }
786 pconfig->nr_blocks = (size >> HPAGE_SHIFT);
787 break; 794 break;
788 } 795 }
789 796
@@ -794,6 +801,19 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig)
794 pconfig->nr_inodes = memparse(args[0].from, &rest); 801 pconfig->nr_inodes = memparse(args[0].from, &rest);
795 break; 802 break;
796 803
804 case Opt_pagesize: {
805 unsigned long ps;
806 ps = memparse(args[0].from, &rest);
807 pconfig->hstate = size_to_hstate(ps);
808 if (!pconfig->hstate) {
809 printk(KERN_ERR
810 "hugetlbfs: Unsupported page size %lu MB\n",
811 ps >> 20);
812 return -EINVAL;
813 }
814 break;
815 }
816
797 default: 817 default:
798 printk(KERN_ERR "hugetlbfs: Bad mount option: \"%s\"\n", 818 printk(KERN_ERR "hugetlbfs: Bad mount option: \"%s\"\n",
799 p); 819 p);
@@ -801,6 +821,18 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig)
801 break; 821 break;
802 } 822 }
803 } 823 }
824
825 /* Do size after hstate is set up */
826 if (setsize > NO_SIZE) {
827 struct hstate *h = pconfig->hstate;
828 if (setsize == SIZE_PERCENT) {
829 size <<= huge_page_shift(h);
830 size *= h->max_huge_pages;
831 do_div(size, 100);
832 }
833 pconfig->nr_blocks = (size >> huge_page_shift(h));
834 }
835
804 return 0; 836 return 0;
805 837
806bad_val: 838bad_val:
@@ -825,6 +857,7 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent)
825 config.uid = current->fsuid; 857 config.uid = current->fsuid;
826 config.gid = current->fsgid; 858 config.gid = current->fsgid;
827 config.mode = 0755; 859 config.mode = 0755;
860 config.hstate = &default_hstate;
828 ret = hugetlbfs_parse_options(data, &config); 861 ret = hugetlbfs_parse_options(data, &config);
829 if (ret) 862 if (ret)
830 return ret; 863 return ret;
@@ -833,14 +866,15 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent)
833 if (!sbinfo) 866 if (!sbinfo)
834 return -ENOMEM; 867 return -ENOMEM;
835 sb->s_fs_info = sbinfo; 868 sb->s_fs_info = sbinfo;
869 sbinfo->hstate = config.hstate;
836 spin_lock_init(&sbinfo->stat_lock); 870 spin_lock_init(&sbinfo->stat_lock);
837 sbinfo->max_blocks = config.nr_blocks; 871 sbinfo->max_blocks = config.nr_blocks;
838 sbinfo->free_blocks = config.nr_blocks; 872 sbinfo->free_blocks = config.nr_blocks;
839 sbinfo->max_inodes = config.nr_inodes; 873 sbinfo->max_inodes = config.nr_inodes;
840 sbinfo->free_inodes = config.nr_inodes; 874 sbinfo->free_inodes = config.nr_inodes;
841 sb->s_maxbytes = MAX_LFS_FILESIZE; 875 sb->s_maxbytes = MAX_LFS_FILESIZE;
842 sb->s_blocksize = HPAGE_SIZE; 876 sb->s_blocksize = huge_page_size(config.hstate);
843 sb->s_blocksize_bits = HPAGE_SHIFT; 877 sb->s_blocksize_bits = huge_page_shift(config.hstate);
844 sb->s_magic = HUGETLBFS_MAGIC; 878 sb->s_magic = HUGETLBFS_MAGIC;
845 sb->s_op = &hugetlbfs_ops; 879 sb->s_op = &hugetlbfs_ops;
846 sb->s_time_gran = 1; 880 sb->s_time_gran = 1;
@@ -942,7 +976,8 @@ struct file *hugetlb_file_setup(const char *name, size_t size)
942 goto out_dentry; 976 goto out_dentry;
943 977
944 error = -ENOMEM; 978 error = -ENOMEM;
945 if (hugetlb_reserve_pages(inode, 0, size >> HPAGE_SHIFT)) 979 if (hugetlb_reserve_pages(inode, 0,
980 size >> huge_page_shift(hstate_inode(inode)), NULL))
946 goto out_inode; 981 goto out_inode;
947 982
948 d_instantiate(dentry, inode); 983 d_instantiate(dentry, inode);
diff --git a/fs/inotify_user.c b/fs/inotify_user.c
index 6676c06bb7c1..fe79c25d95dc 100644
--- a/fs/inotify_user.c
+++ b/fs/inotify_user.c
@@ -566,7 +566,7 @@ static const struct inotify_operations inotify_user_ops = {
566 .destroy_watch = free_inotify_user_watch, 566 .destroy_watch = free_inotify_user_watch,
567}; 567};
568 568
569asmlinkage long sys_inotify_init(void) 569asmlinkage long sys_inotify_init1(int flags)
570{ 570{
571 struct inotify_device *dev; 571 struct inotify_device *dev;
572 struct inotify_handle *ih; 572 struct inotify_handle *ih;
@@ -574,7 +574,14 @@ asmlinkage long sys_inotify_init(void)
574 struct file *filp; 574 struct file *filp;
575 int fd, ret; 575 int fd, ret;
576 576
577 fd = get_unused_fd(); 577 /* Check the IN_* constants for consistency. */
578 BUILD_BUG_ON(IN_CLOEXEC != O_CLOEXEC);
579 BUILD_BUG_ON(IN_NONBLOCK != O_NONBLOCK);
580
581 if (flags & ~(IN_CLOEXEC | IN_NONBLOCK))
582 return -EINVAL;
583
584 fd = get_unused_fd_flags(flags & O_CLOEXEC);
578 if (fd < 0) 585 if (fd < 0)
579 return fd; 586 return fd;
580 587
@@ -610,7 +617,7 @@ asmlinkage long sys_inotify_init(void)
610 filp->f_path.dentry = dget(inotify_mnt->mnt_root); 617 filp->f_path.dentry = dget(inotify_mnt->mnt_root);
611 filp->f_mapping = filp->f_path.dentry->d_inode->i_mapping; 618 filp->f_mapping = filp->f_path.dentry->d_inode->i_mapping;
612 filp->f_mode = FMODE_READ; 619 filp->f_mode = FMODE_READ;
613 filp->f_flags = O_RDONLY; 620 filp->f_flags = O_RDONLY | (flags & O_NONBLOCK);
614 filp->private_data = dev; 621 filp->private_data = dev;
615 622
616 INIT_LIST_HEAD(&dev->events); 623 INIT_LIST_HEAD(&dev->events);
@@ -638,6 +645,11 @@ out_put_fd:
638 return ret; 645 return ret;
639} 646}
640 647
648asmlinkage long sys_inotify_init(void)
649{
650 return sys_inotify_init1(0);
651}
652
641asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask) 653asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask)
642{ 654{
643 struct inode *inode; 655 struct inode *inode;
diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c
index 6bd48f0a7047..c2fb2dd0131f 100644
--- a/fs/isofs/rock.c
+++ b/fs/isofs/rock.c
@@ -209,6 +209,11 @@ repeat:
209 209
210 while (rs.len > 2) { /* There may be one byte for padding somewhere */ 210 while (rs.len > 2) { /* There may be one byte for padding somewhere */
211 rr = (struct rock_ridge *)rs.chr; 211 rr = (struct rock_ridge *)rs.chr;
212 /*
213 * Ignore rock ridge info if rr->len is out of range, but
214 * don't return -EIO because that would make the file
215 * invisible.
216 */
212 if (rr->len < 3) 217 if (rr->len < 3)
213 goto out; /* Something got screwed up here */ 218 goto out; /* Something got screwed up here */
214 sig = isonum_721(rs.chr); 219 sig = isonum_721(rs.chr);
@@ -216,8 +221,12 @@ repeat:
216 goto eio; 221 goto eio;
217 rs.chr += rr->len; 222 rs.chr += rr->len;
218 rs.len -= rr->len; 223 rs.len -= rr->len;
224 /*
225 * As above, just ignore the rock ridge info if rr->len
226 * is bogus.
227 */
219 if (rs.len < 0) 228 if (rs.len < 0)
220 goto eio; /* corrupted isofs */ 229 goto out; /* Something got screwed up here */
221 230
222 switch (sig) { 231 switch (sig) {
223 case SIG('R', 'R'): 232 case SIG('R', 'R'):
@@ -307,6 +316,11 @@ parse_rock_ridge_inode_internal(struct iso_directory_record *de,
307repeat: 316repeat:
308 while (rs.len > 2) { /* There may be one byte for padding somewhere */ 317 while (rs.len > 2) { /* There may be one byte for padding somewhere */
309 rr = (struct rock_ridge *)rs.chr; 318 rr = (struct rock_ridge *)rs.chr;
319 /*
320 * Ignore rock ridge info if rr->len is out of range, but
321 * don't return -EIO because that would make the file
322 * invisible.
323 */
310 if (rr->len < 3) 324 if (rr->len < 3)
311 goto out; /* Something got screwed up here */ 325 goto out; /* Something got screwed up here */
312 sig = isonum_721(rs.chr); 326 sig = isonum_721(rs.chr);
@@ -314,8 +328,12 @@ repeat:
314 goto eio; 328 goto eio;
315 rs.chr += rr->len; 329 rs.chr += rr->len;
316 rs.len -= rr->len; 330 rs.len -= rr->len;
331 /*
332 * As above, just ignore the rock ridge info if rr->len
333 * is bogus.
334 */
317 if (rs.len < 0) 335 if (rs.len < 0)
318 goto eio; /* corrupted isofs */ 336 goto out; /* Something got screwed up here */
319 337
320 switch (sig) { 338 switch (sig) {
321#ifndef CONFIG_ZISOFS /* No flag for SF or ZF */ 339#ifndef CONFIG_ZISOFS /* No flag for SF or ZF */
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 5a8ca61498ca..2eccbfaa1d48 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -36,7 +36,7 @@ static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
36 36
37/* 37/*
38 * When an ext3-ordered file is truncated, it is possible that many pages are 38 * When an ext3-ordered file is truncated, it is possible that many pages are
39 * not sucessfully freed, because they are attached to a committing transaction. 39 * not successfully freed, because they are attached to a committing transaction.
40 * After the transaction commits, these pages are left on the LRU, with no 40 * After the transaction commits, these pages are left on the LRU, with no
41 * ->mapping, and with attached buffers. These pages are trivially reclaimable 41 * ->mapping, and with attached buffers. These pages are trivially reclaimable
42 * by the VM, but their apparent absence upsets the VM accounting, and it makes 42 * by the VM, but their apparent absence upsets the VM accounting, and it makes
@@ -45,8 +45,8 @@ static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
45 * So here, we have a buffer which has just come off the forget list. Look to 45 * So here, we have a buffer which has just come off the forget list. Look to
46 * see if we can strip all buffers from the backing page. 46 * see if we can strip all buffers from the backing page.
47 * 47 *
48 * Called under lock_journal(), and possibly under journal_datalist_lock. The 48 * Called under journal->j_list_lock. The caller provided us with a ref
49 * caller provided us with a ref against the buffer, and we drop that here. 49 * against the buffer, and we drop that here.
50 */ 50 */
51static void release_buffer_page(struct buffer_head *bh) 51static void release_buffer_page(struct buffer_head *bh)
52{ 52{
@@ -78,6 +78,19 @@ nope:
78} 78}
79 79
80/* 80/*
81 * Decrement reference counter for data buffer. If it has been marked
82 * 'BH_Freed', release it and the page to which it belongs if possible.
83 */
84static void release_data_buffer(struct buffer_head *bh)
85{
86 if (buffer_freed(bh)) {
87 clear_buffer_freed(bh);
88 release_buffer_page(bh);
89 } else
90 put_bh(bh);
91}
92
93/*
81 * Try to acquire jbd_lock_bh_state() against the buffer, when j_list_lock is 94 * Try to acquire jbd_lock_bh_state() against the buffer, when j_list_lock is
82 * held. For ranking reasons we must trylock. If we lose, schedule away and 95 * held. For ranking reasons we must trylock. If we lose, schedule away and
83 * return 0. j_list_lock is dropped in this case. 96 * return 0. j_list_lock is dropped in this case.
@@ -172,7 +185,7 @@ static void journal_do_submit_data(struct buffer_head **wbuf, int bufs)
172/* 185/*
173 * Submit all the data buffers to disk 186 * Submit all the data buffers to disk
174 */ 187 */
175static void journal_submit_data_buffers(journal_t *journal, 188static int journal_submit_data_buffers(journal_t *journal,
176 transaction_t *commit_transaction) 189 transaction_t *commit_transaction)
177{ 190{
178 struct journal_head *jh; 191 struct journal_head *jh;
@@ -180,6 +193,7 @@ static void journal_submit_data_buffers(journal_t *journal,
180 int locked; 193 int locked;
181 int bufs = 0; 194 int bufs = 0;
182 struct buffer_head **wbuf = journal->j_wbuf; 195 struct buffer_head **wbuf = journal->j_wbuf;
196 int err = 0;
183 197
184 /* 198 /*
185 * Whenever we unlock the journal and sleep, things can get added 199 * Whenever we unlock the journal and sleep, things can get added
@@ -231,7 +245,7 @@ write_out_data:
231 if (locked) 245 if (locked)
232 unlock_buffer(bh); 246 unlock_buffer(bh);
233 BUFFER_TRACE(bh, "already cleaned up"); 247 BUFFER_TRACE(bh, "already cleaned up");
234 put_bh(bh); 248 release_data_buffer(bh);
235 continue; 249 continue;
236 } 250 }
237 if (locked && test_clear_buffer_dirty(bh)) { 251 if (locked && test_clear_buffer_dirty(bh)) {
@@ -253,15 +267,17 @@ write_out_data:
253 put_bh(bh); 267 put_bh(bh);
254 } else { 268 } else {
255 BUFFER_TRACE(bh, "writeout complete: unfile"); 269 BUFFER_TRACE(bh, "writeout complete: unfile");
270 if (unlikely(!buffer_uptodate(bh)))
271 err = -EIO;
256 __journal_unfile_buffer(jh); 272 __journal_unfile_buffer(jh);
257 jbd_unlock_bh_state(bh); 273 jbd_unlock_bh_state(bh);
258 if (locked) 274 if (locked)
259 unlock_buffer(bh); 275 unlock_buffer(bh);
260 journal_remove_journal_head(bh); 276 journal_remove_journal_head(bh);
261 /* Once for our safety reference, once for 277 /* One for our safety reference, other for
262 * journal_remove_journal_head() */ 278 * journal_remove_journal_head() */
263 put_bh(bh); 279 put_bh(bh);
264 put_bh(bh); 280 release_data_buffer(bh);
265 } 281 }
266 282
267 if (need_resched() || spin_needbreak(&journal->j_list_lock)) { 283 if (need_resched() || spin_needbreak(&journal->j_list_lock)) {
@@ -271,6 +287,8 @@ write_out_data:
271 } 287 }
272 spin_unlock(&journal->j_list_lock); 288 spin_unlock(&journal->j_list_lock);
273 journal_do_submit_data(wbuf, bufs); 289 journal_do_submit_data(wbuf, bufs);
290
291 return err;
274} 292}
275 293
276/* 294/*
@@ -410,8 +428,7 @@ void journal_commit_transaction(journal_t *journal)
410 * Now start flushing things to disk, in the order they appear 428 * Now start flushing things to disk, in the order they appear
411 * on the transaction lists. Data blocks go first. 429 * on the transaction lists. Data blocks go first.
412 */ 430 */
413 err = 0; 431 err = journal_submit_data_buffers(journal, commit_transaction);
414 journal_submit_data_buffers(journal, commit_transaction);
415 432
416 /* 433 /*
417 * Wait for all previously submitted IO to complete. 434 * Wait for all previously submitted IO to complete.
@@ -426,10 +443,21 @@ void journal_commit_transaction(journal_t *journal)
426 if (buffer_locked(bh)) { 443 if (buffer_locked(bh)) {
427 spin_unlock(&journal->j_list_lock); 444 spin_unlock(&journal->j_list_lock);
428 wait_on_buffer(bh); 445 wait_on_buffer(bh);
429 if (unlikely(!buffer_uptodate(bh)))
430 err = -EIO;
431 spin_lock(&journal->j_list_lock); 446 spin_lock(&journal->j_list_lock);
432 } 447 }
448 if (unlikely(!buffer_uptodate(bh))) {
449 if (TestSetPageLocked(bh->b_page)) {
450 spin_unlock(&journal->j_list_lock);
451 lock_page(bh->b_page);
452 spin_lock(&journal->j_list_lock);
453 }
454 if (bh->b_page->mapping)
455 set_bit(AS_EIO, &bh->b_page->mapping->flags);
456
457 unlock_page(bh->b_page);
458 SetPageError(bh->b_page);
459 err = -EIO;
460 }
433 if (!inverted_lock(journal, bh)) { 461 if (!inverted_lock(journal, bh)) {
434 put_bh(bh); 462 put_bh(bh);
435 spin_lock(&journal->j_list_lock); 463 spin_lock(&journal->j_list_lock);
@@ -443,17 +471,21 @@ void journal_commit_transaction(journal_t *journal)
443 } else { 471 } else {
444 jbd_unlock_bh_state(bh); 472 jbd_unlock_bh_state(bh);
445 } 473 }
446 put_bh(bh); 474 release_data_buffer(bh);
447 cond_resched_lock(&journal->j_list_lock); 475 cond_resched_lock(&journal->j_list_lock);
448 } 476 }
449 spin_unlock(&journal->j_list_lock); 477 spin_unlock(&journal->j_list_lock);
450 478
451 if (err) 479 if (err) {
452 journal_abort(journal, err); 480 char b[BDEVNAME_SIZE];
453 481
454 journal_write_revoke_records(journal, commit_transaction); 482 printk(KERN_WARNING
483 "JBD: Detected IO errors while flushing file data "
484 "on %s\n", bdevname(journal->j_fs_dev, b));
485 err = 0;
486 }
455 487
456 jbd_debug(3, "JBD: commit phase 2\n"); 488 journal_write_revoke_records(journal, commit_transaction);
457 489
458 /* 490 /*
459 * If we found any dirty or locked buffers, then we should have 491 * If we found any dirty or locked buffers, then we should have
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index b99c3b3654c4..aa7143a8349b 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -68,7 +68,6 @@ EXPORT_SYMBOL(journal_set_features);
68EXPORT_SYMBOL(journal_create); 68EXPORT_SYMBOL(journal_create);
69EXPORT_SYMBOL(journal_load); 69EXPORT_SYMBOL(journal_load);
70EXPORT_SYMBOL(journal_destroy); 70EXPORT_SYMBOL(journal_destroy);
71EXPORT_SYMBOL(journal_update_superblock);
72EXPORT_SYMBOL(journal_abort); 71EXPORT_SYMBOL(journal_abort);
73EXPORT_SYMBOL(journal_errno); 72EXPORT_SYMBOL(journal_errno);
74EXPORT_SYMBOL(journal_ack_err); 73EXPORT_SYMBOL(journal_ack_err);
@@ -1636,9 +1635,10 @@ static int journal_init_journal_head_cache(void)
1636 1635
1637static void journal_destroy_journal_head_cache(void) 1636static void journal_destroy_journal_head_cache(void)
1638{ 1637{
1639 J_ASSERT(journal_head_cache != NULL); 1638 if (journal_head_cache) {
1640 kmem_cache_destroy(journal_head_cache); 1639 kmem_cache_destroy(journal_head_cache);
1641 journal_head_cache = NULL; 1640 journal_head_cache = NULL;
1641 }
1642} 1642}
1643 1643
1644/* 1644/*
diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c
index 1bb43e987f4b..c7bd649bbbdc 100644
--- a/fs/jbd/revoke.c
+++ b/fs/jbd/revoke.c
@@ -166,138 +166,123 @@ static struct jbd_revoke_record_s *find_revoke_record(journal_t *journal,
166 return NULL; 166 return NULL;
167} 167}
168 168
169void journal_destroy_revoke_caches(void)
170{
171 if (revoke_record_cache) {
172 kmem_cache_destroy(revoke_record_cache);
173 revoke_record_cache = NULL;
174 }
175 if (revoke_table_cache) {
176 kmem_cache_destroy(revoke_table_cache);
177 revoke_table_cache = NULL;
178 }
179}
180
169int __init journal_init_revoke_caches(void) 181int __init journal_init_revoke_caches(void)
170{ 182{
183 J_ASSERT(!revoke_record_cache);
184 J_ASSERT(!revoke_table_cache);
185
171 revoke_record_cache = kmem_cache_create("revoke_record", 186 revoke_record_cache = kmem_cache_create("revoke_record",
172 sizeof(struct jbd_revoke_record_s), 187 sizeof(struct jbd_revoke_record_s),
173 0, 188 0,
174 SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY, 189 SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY,
175 NULL); 190 NULL);
176 if (!revoke_record_cache) 191 if (!revoke_record_cache)
177 return -ENOMEM; 192 goto record_cache_failure;
178 193
179 revoke_table_cache = kmem_cache_create("revoke_table", 194 revoke_table_cache = kmem_cache_create("revoke_table",
180 sizeof(struct jbd_revoke_table_s), 195 sizeof(struct jbd_revoke_table_s),
181 0, SLAB_TEMPORARY, NULL); 196 0, SLAB_TEMPORARY, NULL);
182 if (!revoke_table_cache) { 197 if (!revoke_table_cache)
183 kmem_cache_destroy(revoke_record_cache); 198 goto table_cache_failure;
184 revoke_record_cache = NULL; 199
185 return -ENOMEM;
186 }
187 return 0; 200 return 0;
188}
189 201
190void journal_destroy_revoke_caches(void) 202table_cache_failure:
191{ 203 journal_destroy_revoke_caches();
192 kmem_cache_destroy(revoke_record_cache); 204record_cache_failure:
193 revoke_record_cache = NULL; 205 return -ENOMEM;
194 kmem_cache_destroy(revoke_table_cache);
195 revoke_table_cache = NULL;
196} 206}
197 207
198/* Initialise the revoke table for a given journal to a given size. */ 208static struct jbd_revoke_table_s *journal_init_revoke_table(int hash_size)
199
200int journal_init_revoke(journal_t *journal, int hash_size)
201{ 209{
202 int shift, tmp; 210 int shift = 0;
211 int tmp = hash_size;
212 struct jbd_revoke_table_s *table;
203 213
204 J_ASSERT (journal->j_revoke_table[0] == NULL); 214 table = kmem_cache_alloc(revoke_table_cache, GFP_KERNEL);
215 if (!table)
216 goto out;
205 217
206 shift = 0;
207 tmp = hash_size;
208 while((tmp >>= 1UL) != 0UL) 218 while((tmp >>= 1UL) != 0UL)
209 shift++; 219 shift++;
210 220
211 journal->j_revoke_table[0] = kmem_cache_alloc(revoke_table_cache, GFP_KERNEL); 221 table->hash_size = hash_size;
212 if (!journal->j_revoke_table[0]) 222 table->hash_shift = shift;
213 return -ENOMEM; 223 table->hash_table =
214 journal->j_revoke = journal->j_revoke_table[0];
215
216 /* Check that the hash_size is a power of two */
217 J_ASSERT(is_power_of_2(hash_size));
218
219 journal->j_revoke->hash_size = hash_size;
220
221 journal->j_revoke->hash_shift = shift;
222
223 journal->j_revoke->hash_table =
224 kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL); 224 kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL);
225 if (!journal->j_revoke->hash_table) { 225 if (!table->hash_table) {
226 kmem_cache_free(revoke_table_cache, journal->j_revoke_table[0]); 226 kmem_cache_free(revoke_table_cache, table);
227 journal->j_revoke = NULL; 227 table = NULL;
228 return -ENOMEM; 228 goto out;
229 } 229 }
230 230
231 for (tmp = 0; tmp < hash_size; tmp++) 231 for (tmp = 0; tmp < hash_size; tmp++)
232 INIT_LIST_HEAD(&journal->j_revoke->hash_table[tmp]); 232 INIT_LIST_HEAD(&table->hash_table[tmp]);
233 233
234 journal->j_revoke_table[1] = kmem_cache_alloc(revoke_table_cache, GFP_KERNEL); 234out:
235 if (!journal->j_revoke_table[1]) { 235 return table;
236 kfree(journal->j_revoke_table[0]->hash_table); 236}
237 kmem_cache_free(revoke_table_cache, journal->j_revoke_table[0]); 237
238 return -ENOMEM; 238static void journal_destroy_revoke_table(struct jbd_revoke_table_s *table)
239{
240 int i;
241 struct list_head *hash_list;
242
243 for (i = 0; i < table->hash_size; i++) {
244 hash_list = &table->hash_table[i];
245 J_ASSERT(list_empty(hash_list));
239 } 246 }
240 247
241 journal->j_revoke = journal->j_revoke_table[1]; 248 kfree(table->hash_table);
249 kmem_cache_free(revoke_table_cache, table);
250}
242 251
243 /* Check that the hash_size is a power of two */ 252/* Initialise the revoke table for a given journal to a given size. */
253int journal_init_revoke(journal_t *journal, int hash_size)
254{
255 J_ASSERT(journal->j_revoke_table[0] == NULL);
244 J_ASSERT(is_power_of_2(hash_size)); 256 J_ASSERT(is_power_of_2(hash_size));
245 257
246 journal->j_revoke->hash_size = hash_size; 258 journal->j_revoke_table[0] = journal_init_revoke_table(hash_size);
259 if (!journal->j_revoke_table[0])
260 goto fail0;
247 261
248 journal->j_revoke->hash_shift = shift; 262 journal->j_revoke_table[1] = journal_init_revoke_table(hash_size);
263 if (!journal->j_revoke_table[1])
264 goto fail1;
249 265
250 journal->j_revoke->hash_table = 266 journal->j_revoke = journal->j_revoke_table[1];
251 kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL);
252 if (!journal->j_revoke->hash_table) {
253 kfree(journal->j_revoke_table[0]->hash_table);
254 kmem_cache_free(revoke_table_cache, journal->j_revoke_table[0]);
255 kmem_cache_free(revoke_table_cache, journal->j_revoke_table[1]);
256 journal->j_revoke = NULL;
257 return -ENOMEM;
258 }
259
260 for (tmp = 0; tmp < hash_size; tmp++)
261 INIT_LIST_HEAD(&journal->j_revoke->hash_table[tmp]);
262 267
263 spin_lock_init(&journal->j_revoke_lock); 268 spin_lock_init(&journal->j_revoke_lock);
264 269
265 return 0; 270 return 0;
266}
267 271
268/* Destoy a journal's revoke table. The table must already be empty! */ 272fail1:
273 journal_destroy_revoke_table(journal->j_revoke_table[0]);
274fail0:
275 return -ENOMEM;
276}
269 277
278/* Destroy a journal's revoke table. The table must already be empty! */
270void journal_destroy_revoke(journal_t *journal) 279void journal_destroy_revoke(journal_t *journal)
271{ 280{
272 struct jbd_revoke_table_s *table;
273 struct list_head *hash_list;
274 int i;
275
276 table = journal->j_revoke_table[0];
277 if (!table)
278 return;
279
280 for (i=0; i<table->hash_size; i++) {
281 hash_list = &table->hash_table[i];
282 J_ASSERT (list_empty(hash_list));
283 }
284
285 kfree(table->hash_table);
286 kmem_cache_free(revoke_table_cache, table);
287 journal->j_revoke = NULL;
288
289 table = journal->j_revoke_table[1];
290 if (!table)
291 return;
292
293 for (i=0; i<table->hash_size; i++) {
294 hash_list = &table->hash_table[i];
295 J_ASSERT (list_empty(hash_list));
296 }
297
298 kfree(table->hash_table);
299 kmem_cache_free(revoke_table_cache, table);
300 journal->j_revoke = NULL; 281 journal->j_revoke = NULL;
282 if (journal->j_revoke_table[0])
283 journal_destroy_revoke_table(journal->j_revoke_table[0]);
284 if (journal->j_revoke_table[1])
285 journal_destroy_revoke_table(journal->j_revoke_table[1]);
301} 286}
302 287
303 288
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 67ff2024c23c..8dee32007500 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -1648,12 +1648,42 @@ out:
1648 return; 1648 return;
1649} 1649}
1650 1650
1651/*
1652 * journal_try_to_free_buffers() could race with journal_commit_transaction()
1653 * The latter might still hold the a count on buffers when inspecting
1654 * them on t_syncdata_list or t_locked_list.
1655 *
1656 * journal_try_to_free_buffers() will call this function to
1657 * wait for the current transaction to finish syncing data buffers, before
1658 * tryinf to free that buffer.
1659 *
1660 * Called with journal->j_state_lock held.
1661 */
1662static void journal_wait_for_transaction_sync_data(journal_t *journal)
1663{
1664 transaction_t *transaction = NULL;
1665 tid_t tid;
1666
1667 spin_lock(&journal->j_state_lock);
1668 transaction = journal->j_committing_transaction;
1669
1670 if (!transaction) {
1671 spin_unlock(&journal->j_state_lock);
1672 return;
1673 }
1674
1675 tid = transaction->t_tid;
1676 spin_unlock(&journal->j_state_lock);
1677 log_wait_commit(journal, tid);
1678}
1651 1679
1652/** 1680/**
1653 * int journal_try_to_free_buffers() - try to free page buffers. 1681 * int journal_try_to_free_buffers() - try to free page buffers.
1654 * @journal: journal for operation 1682 * @journal: journal for operation
1655 * @page: to try and free 1683 * @page: to try and free
1656 * @unused_gfp_mask: unused 1684 * @gfp_mask: we use the mask to detect how hard should we try to release
1685 * buffers. If __GFP_WAIT and __GFP_FS is set, we wait for commit code to
1686 * release the buffers.
1657 * 1687 *
1658 * 1688 *
1659 * For all the buffers on this page, 1689 * For all the buffers on this page,
@@ -1682,9 +1712,11 @@ out:
1682 * journal_try_to_free_buffer() is changing its state. But that 1712 * journal_try_to_free_buffer() is changing its state. But that
1683 * cannot happen because we never reallocate freed data as metadata 1713 * cannot happen because we never reallocate freed data as metadata
1684 * while the data is part of a transaction. Yes? 1714 * while the data is part of a transaction. Yes?
1715 *
1716 * Return 0 on failure, 1 on success
1685 */ 1717 */
1686int journal_try_to_free_buffers(journal_t *journal, 1718int journal_try_to_free_buffers(journal_t *journal,
1687 struct page *page, gfp_t unused_gfp_mask) 1719 struct page *page, gfp_t gfp_mask)
1688{ 1720{
1689 struct buffer_head *head; 1721 struct buffer_head *head;
1690 struct buffer_head *bh; 1722 struct buffer_head *bh;
@@ -1713,7 +1745,28 @@ int journal_try_to_free_buffers(journal_t *journal,
1713 if (buffer_jbd(bh)) 1745 if (buffer_jbd(bh))
1714 goto busy; 1746 goto busy;
1715 } while ((bh = bh->b_this_page) != head); 1747 } while ((bh = bh->b_this_page) != head);
1748
1716 ret = try_to_free_buffers(page); 1749 ret = try_to_free_buffers(page);
1750
1751 /*
1752 * There are a number of places where journal_try_to_free_buffers()
1753 * could race with journal_commit_transaction(), the later still
1754 * holds the reference to the buffers to free while processing them.
1755 * try_to_free_buffers() failed to free those buffers. Some of the
1756 * caller of releasepage() request page buffers to be dropped, otherwise
1757 * treat the fail-to-free as errors (such as generic_file_direct_IO())
1758 *
1759 * So, if the caller of try_to_release_page() wants the synchronous
1760 * behaviour(i.e make sure buffers are dropped upon return),
1761 * let's wait for the current transaction to finish flush of
1762 * dirty data buffers, then try to free those buffers again,
1763 * with the journal locked.
1764 */
1765 if (ret == 0 && (gfp_mask & __GFP_WAIT) && (gfp_mask & __GFP_FS)) {
1766 journal_wait_for_transaction_sync_data(journal);
1767 ret = try_to_free_buffers(page);
1768 }
1769
1717busy: 1770busy:
1718 return ret; 1771 return ret;
1719} 1772}
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 0288e6d7936a..359c091d8965 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -22,6 +22,7 @@
22#include <linux/parser.h> 22#include <linux/parser.h>
23#include <linux/completion.h> 23#include <linux/completion.h>
24#include <linux/vfs.h> 24#include <linux/vfs.h>
25#include <linux/quotaops.h>
25#include <linux/mount.h> 26#include <linux/mount.h>
26#include <linux/moduleparam.h> 27#include <linux/moduleparam.h>
27#include <linux/kthread.h> 28#include <linux/kthread.h>
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 1f6dc518505c..31668b690e03 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -582,7 +582,15 @@ again:
582 } 582 }
583 if (status < 0) 583 if (status < 0)
584 goto out_unlock; 584 goto out_unlock;
585 status = nlm_stat_to_errno(resp->status); 585 /*
586 * EAGAIN doesn't make sense for sleeping locks, and in some
587 * cases NLM_LCK_DENIED is returned for a permanent error. So
588 * turn it into an ENOLCK.
589 */
590 if (resp->status == nlm_lck_denied && (fl_flags & FL_SLEEP))
591 status = -ENOLCK;
592 else
593 status = nlm_stat_to_errno(resp->status);
586out_unblock: 594out_unblock:
587 nlmclnt_finish_block(block); 595 nlmclnt_finish_block(block);
588out: 596out:
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 821b9acdfb66..cf0d5c2c318d 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -418,8 +418,8 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
418 goto out; 418 goto out;
419 case -EAGAIN: 419 case -EAGAIN:
420 ret = nlm_lck_denied; 420 ret = nlm_lck_denied;
421 break; 421 goto out;
422 case -EINPROGRESS: 422 case FILE_LOCK_DEFERRED:
423 if (wait) 423 if (wait)
424 break; 424 break;
425 /* Filesystem lock operation is in progress 425 /* Filesystem lock operation is in progress
@@ -434,10 +434,6 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
434 goto out; 434 goto out;
435 } 435 }
436 436
437 ret = nlm_lck_denied;
438 if (!wait)
439 goto out;
440
441 ret = nlm_lck_blocked; 437 ret = nlm_lck_blocked;
442 438
443 /* Append to list of blocked */ 439 /* Append to list of blocked */
@@ -507,7 +503,7 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
507 } 503 }
508 504
509 error = vfs_test_lock(file->f_file, &lock->fl); 505 error = vfs_test_lock(file->f_file, &lock->fl);
510 if (error == -EINPROGRESS) { 506 if (error == FILE_LOCK_DEFERRED) {
511 ret = nlmsvc_defer_lock_rqst(rqstp, block); 507 ret = nlmsvc_defer_lock_rqst(rqstp, block);
512 goto out; 508 goto out;
513 } 509 }
@@ -731,8 +727,7 @@ nlmsvc_grant_blocked(struct nlm_block *block)
731 switch (error) { 727 switch (error) {
732 case 0: 728 case 0:
733 break; 729 break;
734 case -EAGAIN: 730 case FILE_LOCK_DEFERRED:
735 case -EINPROGRESS:
736 dprintk("lockd: lock still blocked error %d\n", error); 731 dprintk("lockd: lock still blocked error %d\n", error);
737 nlmsvc_insert_block(block, NLM_NEVER); 732 nlmsvc_insert_block(block, NLM_NEVER);
738 nlmsvc_release_block(block); 733 nlmsvc_release_block(block);
diff --git a/fs/locks.c b/fs/locks.c
index dce8c747371c..01490300f7cb 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -779,8 +779,10 @@ find_conflict:
779 if (!flock_locks_conflict(request, fl)) 779 if (!flock_locks_conflict(request, fl))
780 continue; 780 continue;
781 error = -EAGAIN; 781 error = -EAGAIN;
782 if (request->fl_flags & FL_SLEEP) 782 if (!(request->fl_flags & FL_SLEEP))
783 locks_insert_block(fl, request); 783 goto out;
784 error = FILE_LOCK_DEFERRED;
785 locks_insert_block(fl, request);
784 goto out; 786 goto out;
785 } 787 }
786 if (request->fl_flags & FL_ACCESS) 788 if (request->fl_flags & FL_ACCESS)
@@ -836,7 +838,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
836 error = -EDEADLK; 838 error = -EDEADLK;
837 if (posix_locks_deadlock(request, fl)) 839 if (posix_locks_deadlock(request, fl))
838 goto out; 840 goto out;
839 error = -EAGAIN; 841 error = FILE_LOCK_DEFERRED;
840 locks_insert_block(fl, request); 842 locks_insert_block(fl, request);
841 goto out; 843 goto out;
842 } 844 }
@@ -1035,7 +1037,7 @@ int posix_lock_file_wait(struct file *filp, struct file_lock *fl)
1035 might_sleep (); 1037 might_sleep ();
1036 for (;;) { 1038 for (;;) {
1037 error = posix_lock_file(filp, fl, NULL); 1039 error = posix_lock_file(filp, fl, NULL);
1038 if ((error != -EAGAIN) || !(fl->fl_flags & FL_SLEEP)) 1040 if (error != FILE_LOCK_DEFERRED)
1039 break; 1041 break;
1040 error = wait_event_interruptible(fl->fl_wait, !fl->fl_next); 1042 error = wait_event_interruptible(fl->fl_wait, !fl->fl_next);
1041 if (!error) 1043 if (!error)
@@ -1107,9 +1109,7 @@ int locks_mandatory_area(int read_write, struct inode *inode,
1107 1109
1108 for (;;) { 1110 for (;;) {
1109 error = __posix_lock_file(inode, &fl, NULL); 1111 error = __posix_lock_file(inode, &fl, NULL);
1110 if (error != -EAGAIN) 1112 if (error != FILE_LOCK_DEFERRED)
1111 break;
1112 if (!(fl.fl_flags & FL_SLEEP))
1113 break; 1113 break;
1114 error = wait_event_interruptible(fl.fl_wait, !fl.fl_next); 1114 error = wait_event_interruptible(fl.fl_wait, !fl.fl_next);
1115 if (!error) { 1115 if (!error) {
@@ -1531,7 +1531,7 @@ int flock_lock_file_wait(struct file *filp, struct file_lock *fl)
1531 might_sleep(); 1531 might_sleep();
1532 for (;;) { 1532 for (;;) {
1533 error = flock_lock_file(filp, fl); 1533 error = flock_lock_file(filp, fl);
1534 if ((error != -EAGAIN) || !(fl->fl_flags & FL_SLEEP)) 1534 if (error != FILE_LOCK_DEFERRED)
1535 break; 1535 break;
1536 error = wait_event_interruptible(fl->fl_wait, !fl->fl_next); 1536 error = wait_event_interruptible(fl->fl_wait, !fl->fl_next);
1537 if (!error) 1537 if (!error)
@@ -1716,17 +1716,17 @@ out:
1716 * fl_grant is set. Callers expecting ->lock() to return asynchronously 1716 * fl_grant is set. Callers expecting ->lock() to return asynchronously
1717 * will only use F_SETLK, not F_SETLKW; they will set FL_SLEEP if (and only if) 1717 * will only use F_SETLK, not F_SETLKW; they will set FL_SLEEP if (and only if)
1718 * the request is for a blocking lock. When ->lock() does return asynchronously, 1718 * the request is for a blocking lock. When ->lock() does return asynchronously,
1719 * it must return -EINPROGRESS, and call ->fl_grant() when the lock 1719 * it must return FILE_LOCK_DEFERRED, and call ->fl_grant() when the lock
1720 * request completes. 1720 * request completes.
1721 * If the request is for non-blocking lock the file system should return 1721 * If the request is for non-blocking lock the file system should return
1722 * -EINPROGRESS then try to get the lock and call the callback routine with 1722 * FILE_LOCK_DEFERRED then try to get the lock and call the callback routine
1723 * the result. If the request timed out the callback routine will return a 1723 * with the result. If the request timed out the callback routine will return a
1724 * nonzero return code and the file system should release the lock. The file 1724 * nonzero return code and the file system should release the lock. The file
1725 * system is also responsible to keep a corresponding posix lock when it 1725 * system is also responsible to keep a corresponding posix lock when it
1726 * grants a lock so the VFS can find out which locks are locally held and do 1726 * grants a lock so the VFS can find out which locks are locally held and do
1727 * the correct lock cleanup when required. 1727 * the correct lock cleanup when required.
1728 * The underlying filesystem must not drop the kernel lock or call 1728 * The underlying filesystem must not drop the kernel lock or call
1729 * ->fl_grant() before returning to the caller with a -EINPROGRESS 1729 * ->fl_grant() before returning to the caller with a FILE_LOCK_DEFERRED
1730 * return code. 1730 * return code.
1731 */ 1731 */
1732int vfs_lock_file(struct file *filp, unsigned int cmd, struct file_lock *fl, struct file_lock *conf) 1732int vfs_lock_file(struct file *filp, unsigned int cmd, struct file_lock *fl, struct file_lock *conf)
@@ -1738,6 +1738,30 @@ int vfs_lock_file(struct file *filp, unsigned int cmd, struct file_lock *fl, str
1738} 1738}
1739EXPORT_SYMBOL_GPL(vfs_lock_file); 1739EXPORT_SYMBOL_GPL(vfs_lock_file);
1740 1740
1741static int do_lock_file_wait(struct file *filp, unsigned int cmd,
1742 struct file_lock *fl)
1743{
1744 int error;
1745
1746 error = security_file_lock(filp, fl->fl_type);
1747 if (error)
1748 return error;
1749
1750 for (;;) {
1751 error = vfs_lock_file(filp, cmd, fl, NULL);
1752 if (error != FILE_LOCK_DEFERRED)
1753 break;
1754 error = wait_event_interruptible(fl->fl_wait, !fl->fl_next);
1755 if (!error)
1756 continue;
1757
1758 locks_delete_block(fl);
1759 break;
1760 }
1761
1762 return error;
1763}
1764
1741/* Apply the lock described by l to an open file descriptor. 1765/* Apply the lock described by l to an open file descriptor.
1742 * This implements both the F_SETLK and F_SETLKW commands of fcntl(). 1766 * This implements both the F_SETLK and F_SETLKW commands of fcntl().
1743 */ 1767 */
@@ -1795,26 +1819,7 @@ again:
1795 goto out; 1819 goto out;
1796 } 1820 }
1797 1821
1798 error = security_file_lock(filp, file_lock->fl_type); 1822 error = do_lock_file_wait(filp, cmd, file_lock);
1799 if (error)
1800 goto out;
1801
1802 if (filp->f_op && filp->f_op->lock != NULL)
1803 error = filp->f_op->lock(filp, cmd, file_lock);
1804 else {
1805 for (;;) {
1806 error = posix_lock_file(filp, file_lock, NULL);
1807 if (error != -EAGAIN || cmd == F_SETLK)
1808 break;
1809 error = wait_event_interruptible(file_lock->fl_wait,
1810 !file_lock->fl_next);
1811 if (!error)
1812 continue;
1813
1814 locks_delete_block(file_lock);
1815 break;
1816 }
1817 }
1818 1823
1819 /* 1824 /*
1820 * Attempt to detect a close/fcntl race and recover by 1825 * Attempt to detect a close/fcntl race and recover by
@@ -1932,26 +1937,7 @@ again:
1932 goto out; 1937 goto out;
1933 } 1938 }
1934 1939
1935 error = security_file_lock(filp, file_lock->fl_type); 1940 error = do_lock_file_wait(filp, cmd, file_lock);
1936 if (error)
1937 goto out;
1938
1939 if (filp->f_op && filp->f_op->lock != NULL)
1940 error = filp->f_op->lock(filp, cmd, file_lock);
1941 else {
1942 for (;;) {
1943 error = posix_lock_file(filp, file_lock, NULL);
1944 if (error != -EAGAIN || cmd == F_SETLK64)
1945 break;
1946 error = wait_event_interruptible(file_lock->fl_wait,
1947 !file_lock->fl_next);
1948 if (!error)
1949 continue;
1950
1951 locks_delete_block(file_lock);
1952 break;
1953 }
1954 }
1955 1941
1956 /* 1942 /*
1957 * Attempt to detect a close/fcntl race and recover by 1943 * Attempt to detect a close/fcntl race and recover by
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 84f6242ba6fc..523d73713418 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -256,9 +256,6 @@ static int minix_fill_super(struct super_block *s, void *data, int silent)
256 if (!s->s_root) 256 if (!s->s_root)
257 goto out_iput; 257 goto out_iput;
258 258
259 if (!NO_TRUNCATE)
260 s->s_root->d_op = &minix_dentry_operations;
261
262 if (!(s->s_flags & MS_RDONLY)) { 259 if (!(s->s_flags & MS_RDONLY)) {
263 if (sbi->s_version != MINIX_V3) /* s_state is now out from V3 sb */ 260 if (sbi->s_version != MINIX_V3) /* s_state is now out from V3 sb */
264 ms->s_state &= ~MINIX_VALID_FS; 261 ms->s_state &= ~MINIX_VALID_FS;
diff --git a/fs/minix/minix.h b/fs/minix/minix.h
index 326edfe96108..e6a0b193bea4 100644
--- a/fs/minix/minix.h
+++ b/fs/minix/minix.h
@@ -2,11 +2,6 @@
2#include <linux/pagemap.h> 2#include <linux/pagemap.h>
3#include <linux/minix_fs.h> 3#include <linux/minix_fs.h>
4 4
5/*
6 * change the define below to 0 if you want names > info->s_namelen chars to be
7 * truncated. Else they will be disallowed (ENAMETOOLONG).
8 */
9#define NO_TRUNCATE 1
10#define INODE_VERSION(inode) minix_sb(inode->i_sb)->s_version 5#define INODE_VERSION(inode) minix_sb(inode->i_sb)->s_version
11#define MINIX_V1 0x0001 /* original minix fs */ 6#define MINIX_V1 0x0001 /* original minix fs */
12#define MINIX_V2 0x0002 /* minix V2 fs */ 7#define MINIX_V2 0x0002 /* minix V2 fs */
@@ -83,7 +78,6 @@ extern const struct inode_operations minix_file_inode_operations;
83extern const struct inode_operations minix_dir_inode_operations; 78extern const struct inode_operations minix_dir_inode_operations;
84extern const struct file_operations minix_file_operations; 79extern const struct file_operations minix_file_operations;
85extern const struct file_operations minix_dir_operations; 80extern const struct file_operations minix_dir_operations;
86extern struct dentry_operations minix_dentry_operations;
87 81
88static inline struct minix_sb_info *minix_sb(struct super_block *sb) 82static inline struct minix_sb_info *minix_sb(struct super_block *sb)
89{ 83{
diff --git a/fs/minix/namei.c b/fs/minix/namei.c
index 102241bc9c79..32b131cd6121 100644
--- a/fs/minix/namei.c
+++ b/fs/minix/namei.c
@@ -18,30 +18,6 @@ static int add_nondir(struct dentry *dentry, struct inode *inode)
18 return err; 18 return err;
19} 19}
20 20
21static int minix_hash(struct dentry *dentry, struct qstr *qstr)
22{
23 unsigned long hash;
24 int i;
25 const unsigned char *name;
26
27 i = minix_sb(dentry->d_inode->i_sb)->s_namelen;
28 if (i >= qstr->len)
29 return 0;
30 /* Truncate the name in place, avoids having to define a compare
31 function. */
32 qstr->len = i;
33 name = qstr->name;
34 hash = init_name_hash();
35 while (i--)
36 hash = partial_name_hash(*name++, hash);
37 qstr->hash = end_name_hash(hash);
38 return 0;
39}
40
41struct dentry_operations minix_dentry_operations = {
42 .d_hash = minix_hash,
43};
44
45static struct dentry *minix_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd) 21static struct dentry *minix_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd)
46{ 22{
47 struct inode * inode = NULL; 23 struct inode * inode = NULL;
diff --git a/fs/msdos/namei.c b/fs/msdos/namei.c
index 1f7f2956412a..e844b9809d27 100644
--- a/fs/msdos/namei.c
+++ b/fs/msdos/namei.c
@@ -14,12 +14,7 @@
14 14
15/* Characters that are undesirable in an MS-DOS file name */ 15/* Characters that are undesirable in an MS-DOS file name */
16static unsigned char bad_chars[] = "*?<>|\""; 16static unsigned char bad_chars[] = "*?<>|\"";
17static unsigned char bad_if_strict_pc[] = "+=,; "; 17static unsigned char bad_if_strict[] = "+=,; ";
18/* GEMDOS is less restrictive */
19static unsigned char bad_if_strict_atari[] = " ";
20
21#define bad_if_strict(opts) \
22 ((opts)->atari ? bad_if_strict_atari : bad_if_strict_pc)
23 18
24/***** Formats an MS-DOS file name. Rejects invalid names. */ 19/***** Formats an MS-DOS file name. Rejects invalid names. */
25static int msdos_format_name(const unsigned char *name, int len, 20static int msdos_format_name(const unsigned char *name, int len,
@@ -40,21 +35,20 @@ static int msdos_format_name(const unsigned char *name, int len,
40 /* Get rid of dot - test for it elsewhere */ 35 /* Get rid of dot - test for it elsewhere */
41 name++; 36 name++;
42 len--; 37 len--;
43 } else if (!opts->atari) 38 } else
44 return -EINVAL; 39 return -EINVAL;
45 } 40 }
46 /* 41 /*
47 * disallow names that _really_ start with a dot for MS-DOS, 42 * disallow names that _really_ start with a dot
48 * GEMDOS does not care
49 */ 43 */
50 space = !opts->atari; 44 space = 1;
51 c = 0; 45 c = 0;
52 for (walk = res; len && walk - res < 8; walk++) { 46 for (walk = res; len && walk - res < 8; walk++) {
53 c = *name++; 47 c = *name++;
54 len--; 48 len--;
55 if (opts->name_check != 'r' && strchr(bad_chars, c)) 49 if (opts->name_check != 'r' && strchr(bad_chars, c))
56 return -EINVAL; 50 return -EINVAL;
57 if (opts->name_check == 's' && strchr(bad_if_strict(opts), c)) 51 if (opts->name_check == 's' && strchr(bad_if_strict, c))
58 return -EINVAL; 52 return -EINVAL;
59 if (c >= 'A' && c <= 'Z' && opts->name_check == 's') 53 if (c >= 'A' && c <= 'Z' && opts->name_check == 's')
60 return -EINVAL; 54 return -EINVAL;
@@ -94,7 +88,7 @@ static int msdos_format_name(const unsigned char *name, int len,
94 if (opts->name_check != 'r' && strchr(bad_chars, c)) 88 if (opts->name_check != 'r' && strchr(bad_chars, c))
95 return -EINVAL; 89 return -EINVAL;
96 if (opts->name_check == 's' && 90 if (opts->name_check == 's' &&
97 strchr(bad_if_strict(opts), c)) 91 strchr(bad_if_strict, c))
98 return -EINVAL; 92 return -EINVAL;
99 if (c < ' ' || c == ':' || c == '\\') 93 if (c < ' ' || c == ':' || c == '\\')
100 return -EINVAL; 94 return -EINVAL;
@@ -243,6 +237,7 @@ static int msdos_add_entry(struct inode *dir, const unsigned char *name,
243 int is_dir, int is_hid, int cluster, 237 int is_dir, int is_hid, int cluster,
244 struct timespec *ts, struct fat_slot_info *sinfo) 238 struct timespec *ts, struct fat_slot_info *sinfo)
245{ 239{
240 struct msdos_sb_info *sbi = MSDOS_SB(dir->i_sb);
246 struct msdos_dir_entry de; 241 struct msdos_dir_entry de;
247 __le16 time, date; 242 __le16 time, date;
248 int err; 243 int err;
@@ -252,7 +247,7 @@ static int msdos_add_entry(struct inode *dir, const unsigned char *name,
252 if (is_hid) 247 if (is_hid)
253 de.attr |= ATTR_HIDDEN; 248 de.attr |= ATTR_HIDDEN;
254 de.lcase = 0; 249 de.lcase = 0;
255 fat_date_unix2dos(ts->tv_sec, &time, &date); 250 fat_date_unix2dos(ts->tv_sec, &time, &date, sbi->options.tz_utc);
256 de.cdate = de.adate = 0; 251 de.cdate = de.adate = 0;
257 de.ctime = 0; 252 de.ctime = 0;
258 de.ctime_cs = 0; 253 de.ctime_cs = 0;
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index 46763d1cd397..8478fc25daee 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -127,7 +127,7 @@ enum {
127 Opt_err 127 Opt_err
128}; 128};
129 129
130static match_table_t __initdata tokens = { 130static match_table_t __initconst tokens = {
131 {Opt_port, "port=%u"}, 131 {Opt_port, "port=%u"},
132 {Opt_rsize, "rsize=%u"}, 132 {Opt_rsize, "rsize=%u"},
133 {Opt_wsize, "wsize=%u"}, 133 {Opt_wsize, "wsize=%u"},
diff --git a/fs/nfsd/lockd.c b/fs/nfsd/lockd.c
index 6b6225ac4926..15c6faeec77c 100644
--- a/fs/nfsd/lockd.c
+++ b/fs/nfsd/lockd.c
@@ -19,6 +19,13 @@
19 19
20#define NFSDDBG_FACILITY NFSDDBG_LOCKD 20#define NFSDDBG_FACILITY NFSDDBG_LOCKD
21 21
22#ifdef CONFIG_LOCKD_V4
23#define nlm_stale_fh nlm4_stale_fh
24#define nlm_failed nlm4_failed
25#else
26#define nlm_stale_fh nlm_lck_denied_nolocks
27#define nlm_failed nlm_lck_denied_nolocks
28#endif
22/* 29/*
23 * Note: we hold the dentry use count while the file is open. 30 * Note: we hold the dentry use count while the file is open.
24 */ 31 */
@@ -47,12 +54,10 @@ nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f, struct file **filp)
47 return 0; 54 return 0;
48 case nfserr_dropit: 55 case nfserr_dropit:
49 return nlm_drop_reply; 56 return nlm_drop_reply;
50#ifdef CONFIG_LOCKD_V4
51 case nfserr_stale: 57 case nfserr_stale:
52 return nlm4_stale_fh; 58 return nlm_stale_fh;
53#endif
54 default: 59 default:
55 return nlm_lck_denied; 60 return nlm_failed;
56 } 61 }
57} 62}
58 63
diff --git a/fs/open.c b/fs/open.c
index a99ad09c3197..bb98d2fe809f 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -64,7 +64,8 @@ static int vfs_statfs_native(struct dentry *dentry, struct statfs *buf)
64 memcpy(buf, &st, sizeof(st)); 64 memcpy(buf, &st, sizeof(st));
65 else { 65 else {
66 if (sizeof buf->f_blocks == 4) { 66 if (sizeof buf->f_blocks == 4) {
67 if ((st.f_blocks | st.f_bfree | st.f_bavail) & 67 if ((st.f_blocks | st.f_bfree | st.f_bavail |
68 st.f_bsize | st.f_frsize) &
68 0xffffffff00000000ULL) 69 0xffffffff00000000ULL)
69 return -EOVERFLOW; 70 return -EOVERFLOW;
70 /* 71 /*
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index efef715135d3..7d6b34e201db 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -344,18 +344,18 @@ static ssize_t whole_disk_show(struct device *dev,
344static DEVICE_ATTR(whole_disk, S_IRUSR | S_IRGRP | S_IROTH, 344static DEVICE_ATTR(whole_disk, S_IRUSR | S_IRGRP | S_IROTH,
345 whole_disk_show, NULL); 345 whole_disk_show, NULL);
346 346
347void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len, int flags) 347int add_partition(struct gendisk *disk, int part, sector_t start, sector_t len, int flags)
348{ 348{
349 struct hd_struct *p; 349 struct hd_struct *p;
350 int err; 350 int err;
351 351
352 p = kzalloc(sizeof(*p), GFP_KERNEL); 352 p = kzalloc(sizeof(*p), GFP_KERNEL);
353 if (!p) 353 if (!p)
354 return; 354 return -ENOMEM;
355 355
356 if (!init_part_stats(p)) { 356 if (!init_part_stats(p)) {
357 kfree(p); 357 err = -ENOMEM;
358 return; 358 goto out0;
359 } 359 }
360 p->start_sect = start; 360 p->start_sect = start;
361 p->nr_sects = len; 361 p->nr_sects = len;
@@ -378,15 +378,31 @@ void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len,
378 378
379 /* delay uevent until 'holders' subdir is created */ 379 /* delay uevent until 'holders' subdir is created */
380 p->dev.uevent_suppress = 1; 380 p->dev.uevent_suppress = 1;
381 device_add(&p->dev); 381 err = device_add(&p->dev);
382 if (err)
383 goto out1;
382 partition_sysfs_add_subdir(p); 384 partition_sysfs_add_subdir(p);
383 p->dev.uevent_suppress = 0; 385 p->dev.uevent_suppress = 0;
384 if (flags & ADDPART_FLAG_WHOLEDISK) 386 if (flags & ADDPART_FLAG_WHOLEDISK) {
385 err = device_create_file(&p->dev, &dev_attr_whole_disk); 387 err = device_create_file(&p->dev, &dev_attr_whole_disk);
388 if (err)
389 goto out2;
390 }
386 391
387 /* suppress uevent if the disk supresses it */ 392 /* suppress uevent if the disk supresses it */
388 if (!disk->dev.uevent_suppress) 393 if (!disk->dev.uevent_suppress)
389 kobject_uevent(&p->dev.kobj, KOBJ_ADD); 394 kobject_uevent(&p->dev.kobj, KOBJ_ADD);
395
396 return 0;
397
398out2:
399 device_del(&p->dev);
400out1:
401 put_device(&p->dev);
402 free_part_stats(p);
403out0:
404 kfree(p);
405 return err;
390} 406}
391 407
392/* Not exported, helper to add_disk(). */ 408/* Not exported, helper to add_disk(). */
@@ -483,10 +499,16 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
483 if (!size) 499 if (!size)
484 continue; 500 continue;
485 if (from + size > get_capacity(disk)) { 501 if (from + size > get_capacity(disk)) {
486 printk(" %s: p%d exceeds device capacity\n", 502 printk(KERN_ERR " %s: p%d exceeds device capacity\n",
487 disk->disk_name, p); 503 disk->disk_name, p);
504 continue;
505 }
506 res = add_partition(disk, p, from, size, state->parts[p].flags);
507 if (res) {
508 printk(KERN_ERR " %s: p%d could not be added: %d\n",
509 disk->disk_name, p, -res);
510 continue;
488 } 511 }
489 add_partition(disk, p, from, size, state->parts[p].flags);
490#ifdef CONFIG_BLK_DEV_MD 512#ifdef CONFIG_BLK_DEV_MD
491 if (state->parts[p].flags & ADDPART_FLAG_RAID) 513 if (state->parts[p].flags & ADDPART_FLAG_RAID)
492 md_autodetect_dev(bdev->bd_dev+p); 514 md_autodetect_dev(bdev->bd_dev+p);
diff --git a/fs/partitions/efi.c b/fs/partitions/efi.c
index e7b07006bc41..038a6022152f 100644
--- a/fs/partitions/efi.c
+++ b/fs/partitions/efi.c
@@ -95,13 +95,6 @@
95#include "check.h" 95#include "check.h"
96#include "efi.h" 96#include "efi.h"
97 97
98#undef EFI_DEBUG
99#ifdef EFI_DEBUG
100#define Dprintk(x...) printk(KERN_DEBUG x)
101#else
102#define Dprintk(x...)
103#endif
104
105/* This allows a kernel command line option 'gpt' to override 98/* This allows a kernel command line option 'gpt' to override
106 * the test for invalid PMBR. Not __initdata because reloading 99 * the test for invalid PMBR. Not __initdata because reloading
107 * the partition tables happens after init too. 100 * the partition tables happens after init too.
@@ -305,10 +298,10 @@ is_gpt_valid(struct block_device *bdev, u64 lba,
305 298
306 /* Check the GUID Partition Table signature */ 299 /* Check the GUID Partition Table signature */
307 if (le64_to_cpu((*gpt)->signature) != GPT_HEADER_SIGNATURE) { 300 if (le64_to_cpu((*gpt)->signature) != GPT_HEADER_SIGNATURE) {
308 Dprintk("GUID Partition Table Header signature is wrong:" 301 pr_debug("GUID Partition Table Header signature is wrong:"
309 "%lld != %lld\n", 302 "%lld != %lld\n",
310 (unsigned long long)le64_to_cpu((*gpt)->signature), 303 (unsigned long long)le64_to_cpu((*gpt)->signature),
311 (unsigned long long)GPT_HEADER_SIGNATURE); 304 (unsigned long long)GPT_HEADER_SIGNATURE);
312 goto fail; 305 goto fail;
313 } 306 }
314 307
@@ -318,9 +311,8 @@ is_gpt_valid(struct block_device *bdev, u64 lba,
318 crc = efi_crc32((const unsigned char *) (*gpt), le32_to_cpu((*gpt)->header_size)); 311 crc = efi_crc32((const unsigned char *) (*gpt), le32_to_cpu((*gpt)->header_size));
319 312
320 if (crc != origcrc) { 313 if (crc != origcrc) {
321 Dprintk 314 pr_debug("GUID Partition Table Header CRC is wrong: %x != %x\n",
322 ("GUID Partition Table Header CRC is wrong: %x != %x\n", 315 crc, origcrc);
323 crc, origcrc);
324 goto fail; 316 goto fail;
325 } 317 }
326 (*gpt)->header_crc32 = cpu_to_le32(origcrc); 318 (*gpt)->header_crc32 = cpu_to_le32(origcrc);
@@ -328,9 +320,9 @@ is_gpt_valid(struct block_device *bdev, u64 lba,
328 /* Check that the my_lba entry points to the LBA that contains 320 /* Check that the my_lba entry points to the LBA that contains
329 * the GUID Partition Table */ 321 * the GUID Partition Table */
330 if (le64_to_cpu((*gpt)->my_lba) != lba) { 322 if (le64_to_cpu((*gpt)->my_lba) != lba) {
331 Dprintk("GPT my_lba incorrect: %lld != %lld\n", 323 pr_debug("GPT my_lba incorrect: %lld != %lld\n",
332 (unsigned long long)le64_to_cpu((*gpt)->my_lba), 324 (unsigned long long)le64_to_cpu((*gpt)->my_lba),
333 (unsigned long long)lba); 325 (unsigned long long)lba);
334 goto fail; 326 goto fail;
335 } 327 }
336 328
@@ -339,15 +331,15 @@ is_gpt_valid(struct block_device *bdev, u64 lba,
339 */ 331 */
340 lastlba = last_lba(bdev); 332 lastlba = last_lba(bdev);
341 if (le64_to_cpu((*gpt)->first_usable_lba) > lastlba) { 333 if (le64_to_cpu((*gpt)->first_usable_lba) > lastlba) {
342 Dprintk("GPT: first_usable_lba incorrect: %lld > %lld\n", 334 pr_debug("GPT: first_usable_lba incorrect: %lld > %lld\n",
343 (unsigned long long)le64_to_cpu((*gpt)->first_usable_lba), 335 (unsigned long long)le64_to_cpu((*gpt)->first_usable_lba),
344 (unsigned long long)lastlba); 336 (unsigned long long)lastlba);
345 goto fail; 337 goto fail;
346 } 338 }
347 if (le64_to_cpu((*gpt)->last_usable_lba) > lastlba) { 339 if (le64_to_cpu((*gpt)->last_usable_lba) > lastlba) {
348 Dprintk("GPT: last_usable_lba incorrect: %lld > %lld\n", 340 pr_debug("GPT: last_usable_lba incorrect: %lld > %lld\n",
349 (unsigned long long)le64_to_cpu((*gpt)->last_usable_lba), 341 (unsigned long long)le64_to_cpu((*gpt)->last_usable_lba),
350 (unsigned long long)lastlba); 342 (unsigned long long)lastlba);
351 goto fail; 343 goto fail;
352 } 344 }
353 345
@@ -360,7 +352,7 @@ is_gpt_valid(struct block_device *bdev, u64 lba,
360 le32_to_cpu((*gpt)->sizeof_partition_entry)); 352 le32_to_cpu((*gpt)->sizeof_partition_entry));
361 353
362 if (crc != le32_to_cpu((*gpt)->partition_entry_array_crc32)) { 354 if (crc != le32_to_cpu((*gpt)->partition_entry_array_crc32)) {
363 Dprintk("GUID Partitition Entry Array CRC check failed.\n"); 355 pr_debug("GUID Partitition Entry Array CRC check failed.\n");
364 goto fail_ptes; 356 goto fail_ptes;
365 } 357 }
366 358
@@ -616,7 +608,7 @@ efi_partition(struct parsed_partitions *state, struct block_device *bdev)
616 return 0; 608 return 0;
617 } 609 }
618 610
619 Dprintk("GUID Partition Table is valid! Yea!\n"); 611 pr_debug("GUID Partition Table is valid! Yea!\n");
620 612
621 for (i = 0; i < le32_to_cpu(gpt->num_partition_entries) && i < state->limit-1; i++) { 613 for (i = 0; i < le32_to_cpu(gpt->num_partition_entries) && i < state->limit-1; i++) {
622 if (!is_pte_valid(&ptes[i], last_lba(bdev))) 614 if (!is_pte_valid(&ptes[i], last_lba(bdev)))
diff --git a/fs/partitions/ldm.c b/fs/partitions/ldm.c
index 0fdda2e8a4cc..8652fb99e962 100644
--- a/fs/partitions/ldm.c
+++ b/fs/partitions/ldm.c
@@ -133,17 +133,17 @@ static bool ldm_parse_privhead(const u8 *data, struct privhead *ph)
133 bool is_vista = false; 133 bool is_vista = false;
134 134
135 BUG_ON(!data || !ph); 135 BUG_ON(!data || !ph);
136 if (MAGIC_PRIVHEAD != BE64(data)) { 136 if (MAGIC_PRIVHEAD != get_unaligned_be64(data)) {
137 ldm_error("Cannot find PRIVHEAD structure. LDM database is" 137 ldm_error("Cannot find PRIVHEAD structure. LDM database is"
138 " corrupt. Aborting."); 138 " corrupt. Aborting.");
139 return false; 139 return false;
140 } 140 }
141 ph->ver_major = BE16(data + 0x000C); 141 ph->ver_major = get_unaligned_be16(data + 0x000C);
142 ph->ver_minor = BE16(data + 0x000E); 142 ph->ver_minor = get_unaligned_be16(data + 0x000E);
143 ph->logical_disk_start = BE64(data + 0x011B); 143 ph->logical_disk_start = get_unaligned_be64(data + 0x011B);
144 ph->logical_disk_size = BE64(data + 0x0123); 144 ph->logical_disk_size = get_unaligned_be64(data + 0x0123);
145 ph->config_start = BE64(data + 0x012B); 145 ph->config_start = get_unaligned_be64(data + 0x012B);
146 ph->config_size = BE64(data + 0x0133); 146 ph->config_size = get_unaligned_be64(data + 0x0133);
147 /* Version 2.11 is Win2k/XP and version 2.12 is Vista. */ 147 /* Version 2.11 is Win2k/XP and version 2.12 is Vista. */
148 if (ph->ver_major == 2 && ph->ver_minor == 12) 148 if (ph->ver_major == 2 && ph->ver_minor == 12)
149 is_vista = true; 149 is_vista = true;
@@ -191,14 +191,14 @@ static bool ldm_parse_tocblock (const u8 *data, struct tocblock *toc)
191{ 191{
192 BUG_ON (!data || !toc); 192 BUG_ON (!data || !toc);
193 193
194 if (MAGIC_TOCBLOCK != BE64 (data)) { 194 if (MAGIC_TOCBLOCK != get_unaligned_be64(data)) {
195 ldm_crit ("Cannot find TOCBLOCK, database may be corrupt."); 195 ldm_crit ("Cannot find TOCBLOCK, database may be corrupt.");
196 return false; 196 return false;
197 } 197 }
198 strncpy (toc->bitmap1_name, data + 0x24, sizeof (toc->bitmap1_name)); 198 strncpy (toc->bitmap1_name, data + 0x24, sizeof (toc->bitmap1_name));
199 toc->bitmap1_name[sizeof (toc->bitmap1_name) - 1] = 0; 199 toc->bitmap1_name[sizeof (toc->bitmap1_name) - 1] = 0;
200 toc->bitmap1_start = BE64 (data + 0x2E); 200 toc->bitmap1_start = get_unaligned_be64(data + 0x2E);
201 toc->bitmap1_size = BE64 (data + 0x36); 201 toc->bitmap1_size = get_unaligned_be64(data + 0x36);
202 202
203 if (strncmp (toc->bitmap1_name, TOC_BITMAP1, 203 if (strncmp (toc->bitmap1_name, TOC_BITMAP1,
204 sizeof (toc->bitmap1_name)) != 0) { 204 sizeof (toc->bitmap1_name)) != 0) {
@@ -208,8 +208,8 @@ static bool ldm_parse_tocblock (const u8 *data, struct tocblock *toc)
208 } 208 }
209 strncpy (toc->bitmap2_name, data + 0x46, sizeof (toc->bitmap2_name)); 209 strncpy (toc->bitmap2_name, data + 0x46, sizeof (toc->bitmap2_name));
210 toc->bitmap2_name[sizeof (toc->bitmap2_name) - 1] = 0; 210 toc->bitmap2_name[sizeof (toc->bitmap2_name) - 1] = 0;
211 toc->bitmap2_start = BE64 (data + 0x50); 211 toc->bitmap2_start = get_unaligned_be64(data + 0x50);
212 toc->bitmap2_size = BE64 (data + 0x58); 212 toc->bitmap2_size = get_unaligned_be64(data + 0x58);
213 if (strncmp (toc->bitmap2_name, TOC_BITMAP2, 213 if (strncmp (toc->bitmap2_name, TOC_BITMAP2,
214 sizeof (toc->bitmap2_name)) != 0) { 214 sizeof (toc->bitmap2_name)) != 0) {
215 ldm_crit ("TOCBLOCK's second bitmap is '%s', should be '%s'.", 215 ldm_crit ("TOCBLOCK's second bitmap is '%s', should be '%s'.",
@@ -237,22 +237,22 @@ static bool ldm_parse_vmdb (const u8 *data, struct vmdb *vm)
237{ 237{
238 BUG_ON (!data || !vm); 238 BUG_ON (!data || !vm);
239 239
240 if (MAGIC_VMDB != BE32 (data)) { 240 if (MAGIC_VMDB != get_unaligned_be32(data)) {
241 ldm_crit ("Cannot find the VMDB, database may be corrupt."); 241 ldm_crit ("Cannot find the VMDB, database may be corrupt.");
242 return false; 242 return false;
243 } 243 }
244 244
245 vm->ver_major = BE16 (data + 0x12); 245 vm->ver_major = get_unaligned_be16(data + 0x12);
246 vm->ver_minor = BE16 (data + 0x14); 246 vm->ver_minor = get_unaligned_be16(data + 0x14);
247 if ((vm->ver_major != 4) || (vm->ver_minor != 10)) { 247 if ((vm->ver_major != 4) || (vm->ver_minor != 10)) {
248 ldm_error ("Expected VMDB version %d.%d, got %d.%d. " 248 ldm_error ("Expected VMDB version %d.%d, got %d.%d. "
249 "Aborting.", 4, 10, vm->ver_major, vm->ver_minor); 249 "Aborting.", 4, 10, vm->ver_major, vm->ver_minor);
250 return false; 250 return false;
251 } 251 }
252 252
253 vm->vblk_size = BE32 (data + 0x08); 253 vm->vblk_size = get_unaligned_be32(data + 0x08);
254 vm->vblk_offset = BE32 (data + 0x0C); 254 vm->vblk_offset = get_unaligned_be32(data + 0x0C);
255 vm->last_vblk_seq = BE32 (data + 0x04); 255 vm->last_vblk_seq = get_unaligned_be32(data + 0x04);
256 256
257 ldm_debug ("Parsed VMDB successfully."); 257 ldm_debug ("Parsed VMDB successfully.");
258 return true; 258 return true;
@@ -507,7 +507,7 @@ static bool ldm_validate_vmdb (struct block_device *bdev, unsigned long base,
507 goto out; /* Already logged */ 507 goto out; /* Already logged */
508 508
509 /* Are there uncommitted transactions? */ 509 /* Are there uncommitted transactions? */
510 if (BE16(data + 0x10) != 0x01) { 510 if (get_unaligned_be16(data + 0x10) != 0x01) {
511 ldm_crit ("Database is not in a consistent state. Aborting."); 511 ldm_crit ("Database is not in a consistent state. Aborting.");
512 goto out; 512 goto out;
513 } 513 }
@@ -802,7 +802,7 @@ static bool ldm_parse_cmp3 (const u8 *buffer, int buflen, struct vblk *vb)
802 return false; 802 return false;
803 803
804 len += VBLK_SIZE_CMP3; 804 len += VBLK_SIZE_CMP3;
805 if (len != BE32 (buffer + 0x14)) 805 if (len != get_unaligned_be32(buffer + 0x14))
806 return false; 806 return false;
807 807
808 comp = &vb->vblk.comp; 808 comp = &vb->vblk.comp;
@@ -851,7 +851,7 @@ static int ldm_parse_dgr3 (const u8 *buffer, int buflen, struct vblk *vb)
851 return false; 851 return false;
852 852
853 len += VBLK_SIZE_DGR3; 853 len += VBLK_SIZE_DGR3;
854 if (len != BE32 (buffer + 0x14)) 854 if (len != get_unaligned_be32(buffer + 0x14))
855 return false; 855 return false;
856 856
857 dgrp = &vb->vblk.dgrp; 857 dgrp = &vb->vblk.dgrp;
@@ -895,7 +895,7 @@ static bool ldm_parse_dgr4 (const u8 *buffer, int buflen, struct vblk *vb)
895 return false; 895 return false;
896 896
897 len += VBLK_SIZE_DGR4; 897 len += VBLK_SIZE_DGR4;
898 if (len != BE32 (buffer + 0x14)) 898 if (len != get_unaligned_be32(buffer + 0x14))
899 return false; 899 return false;
900 900
901 dgrp = &vb->vblk.dgrp; 901 dgrp = &vb->vblk.dgrp;
@@ -931,7 +931,7 @@ static bool ldm_parse_dsk3 (const u8 *buffer, int buflen, struct vblk *vb)
931 return false; 931 return false;
932 932
933 len += VBLK_SIZE_DSK3; 933 len += VBLK_SIZE_DSK3;
934 if (len != BE32 (buffer + 0x14)) 934 if (len != get_unaligned_be32(buffer + 0x14))
935 return false; 935 return false;
936 936
937 disk = &vb->vblk.disk; 937 disk = &vb->vblk.disk;
@@ -968,7 +968,7 @@ static bool ldm_parse_dsk4 (const u8 *buffer, int buflen, struct vblk *vb)
968 return false; 968 return false;
969 969
970 len += VBLK_SIZE_DSK4; 970 len += VBLK_SIZE_DSK4;
971 if (len != BE32 (buffer + 0x14)) 971 if (len != get_unaligned_be32(buffer + 0x14))
972 return false; 972 return false;
973 973
974 disk = &vb->vblk.disk; 974 disk = &vb->vblk.disk;
@@ -1034,14 +1034,14 @@ static bool ldm_parse_prt3(const u8 *buffer, int buflen, struct vblk *vb)
1034 return false; 1034 return false;
1035 } 1035 }
1036 len += VBLK_SIZE_PRT3; 1036 len += VBLK_SIZE_PRT3;
1037 if (len > BE32(buffer + 0x14)) { 1037 if (len > get_unaligned_be32(buffer + 0x14)) {
1038 ldm_error("len %d > BE32(buffer + 0x14) %d", len, 1038 ldm_error("len %d > BE32(buffer + 0x14) %d", len,
1039 BE32(buffer + 0x14)); 1039 get_unaligned_be32(buffer + 0x14));
1040 return false; 1040 return false;
1041 } 1041 }
1042 part = &vb->vblk.part; 1042 part = &vb->vblk.part;
1043 part->start = BE64(buffer + 0x24 + r_name); 1043 part->start = get_unaligned_be64(buffer + 0x24 + r_name);
1044 part->volume_offset = BE64(buffer + 0x2C + r_name); 1044 part->volume_offset = get_unaligned_be64(buffer + 0x2C + r_name);
1045 part->size = ldm_get_vnum(buffer + 0x34 + r_name); 1045 part->size = ldm_get_vnum(buffer + 0x34 + r_name);
1046 part->parent_id = ldm_get_vnum(buffer + 0x34 + r_size); 1046 part->parent_id = ldm_get_vnum(buffer + 0x34 + r_size);
1047 part->disk_id = ldm_get_vnum(buffer + 0x34 + r_parent); 1047 part->disk_id = ldm_get_vnum(buffer + 0x34 + r_parent);
@@ -1139,9 +1139,9 @@ static bool ldm_parse_vol5(const u8 *buffer, int buflen, struct vblk *vb)
1139 return false; 1139 return false;
1140 } 1140 }
1141 len += VBLK_SIZE_VOL5; 1141 len += VBLK_SIZE_VOL5;
1142 if (len > BE32(buffer + 0x14)) { 1142 if (len > get_unaligned_be32(buffer + 0x14)) {
1143 ldm_error("len %d > BE32(buffer + 0x14) %d", len, 1143 ldm_error("len %d > BE32(buffer + 0x14) %d", len,
1144 BE32(buffer + 0x14)); 1144 get_unaligned_be32(buffer + 0x14));
1145 return false; 1145 return false;
1146 } 1146 }
1147 volu = &vb->vblk.volu; 1147 volu = &vb->vblk.volu;
@@ -1294,9 +1294,9 @@ static bool ldm_frag_add (const u8 *data, int size, struct list_head *frags)
1294 1294
1295 BUG_ON (!data || !frags); 1295 BUG_ON (!data || !frags);
1296 1296
1297 group = BE32 (data + 0x08); 1297 group = get_unaligned_be32(data + 0x08);
1298 rec = BE16 (data + 0x0C); 1298 rec = get_unaligned_be16(data + 0x0C);
1299 num = BE16 (data + 0x0E); 1299 num = get_unaligned_be16(data + 0x0E);
1300 if ((num < 1) || (num > 4)) { 1300 if ((num < 1) || (num > 4)) {
1301 ldm_error ("A VBLK claims to have %d parts.", num); 1301 ldm_error ("A VBLK claims to have %d parts.", num);
1302 return false; 1302 return false;
@@ -1425,12 +1425,12 @@ static bool ldm_get_vblks (struct block_device *bdev, unsigned long base,
1425 } 1425 }
1426 1426
1427 for (v = 0; v < perbuf; v++, data+=size) { /* For each vblk */ 1427 for (v = 0; v < perbuf; v++, data+=size) { /* For each vblk */
1428 if (MAGIC_VBLK != BE32 (data)) { 1428 if (MAGIC_VBLK != get_unaligned_be32(data)) {
1429 ldm_error ("Expected to find a VBLK."); 1429 ldm_error ("Expected to find a VBLK.");
1430 goto out; 1430 goto out;
1431 } 1431 }
1432 1432
1433 recs = BE16 (data + 0x0E); /* Number of records */ 1433 recs = get_unaligned_be16(data + 0x0E); /* Number of records */
1434 if (recs == 1) { 1434 if (recs == 1) {
1435 if (!ldm_ldmdb_add (data, size, ldb)) 1435 if (!ldm_ldmdb_add (data, size, ldb))
1436 goto out; /* Already logged */ 1436 goto out; /* Already logged */
diff --git a/fs/partitions/ldm.h b/fs/partitions/ldm.h
index 80f63b5fdd9f..30e08e809c1d 100644
--- a/fs/partitions/ldm.h
+++ b/fs/partitions/ldm.h
@@ -98,11 +98,6 @@ struct parsed_partitions;
98#define TOC_BITMAP1 "config" /* Names of the two defined */ 98#define TOC_BITMAP1 "config" /* Names of the two defined */
99#define TOC_BITMAP2 "log" /* bitmaps in the TOCBLOCK. */ 99#define TOC_BITMAP2 "log" /* bitmaps in the TOCBLOCK. */
100 100
101/* Most numbers we deal with are big-endian and won't be aligned. */
102#define BE16(x) ((u16)be16_to_cpu(get_unaligned((__be16*)(x))))
103#define BE32(x) ((u32)be32_to_cpu(get_unaligned((__be32*)(x))))
104#define BE64(x) ((u64)be64_to_cpu(get_unaligned((__be64*)(x))))
105
106/* Borrowed from msdos.c */ 101/* Borrowed from msdos.c */
107#define SYS_IND(p) (get_unaligned(&(p)->sys_ind)) 102#define SYS_IND(p) (get_unaligned(&(p)->sys_ind))
108 103
diff --git a/fs/pipe.c b/fs/pipe.c
index 700f4e0d9572..10c4e9aa5c49 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -950,7 +950,7 @@ fail_inode:
950 return NULL; 950 return NULL;
951} 951}
952 952
953struct file *create_write_pipe(void) 953struct file *create_write_pipe(int flags)
954{ 954{
955 int err; 955 int err;
956 struct inode *inode; 956 struct inode *inode;
@@ -983,7 +983,7 @@ struct file *create_write_pipe(void)
983 goto err_dentry; 983 goto err_dentry;
984 f->f_mapping = inode->i_mapping; 984 f->f_mapping = inode->i_mapping;
985 985
986 f->f_flags = O_WRONLY; 986 f->f_flags = O_WRONLY | (flags & O_NONBLOCK);
987 f->f_version = 0; 987 f->f_version = 0;
988 988
989 return f; 989 return f;
@@ -1007,7 +1007,7 @@ void free_write_pipe(struct file *f)
1007 put_filp(f); 1007 put_filp(f);
1008} 1008}
1009 1009
1010struct file *create_read_pipe(struct file *wrf) 1010struct file *create_read_pipe(struct file *wrf, int flags)
1011{ 1011{
1012 struct file *f = get_empty_filp(); 1012 struct file *f = get_empty_filp();
1013 if (!f) 1013 if (!f)
@@ -1019,7 +1019,7 @@ struct file *create_read_pipe(struct file *wrf)
1019 f->f_mapping = wrf->f_path.dentry->d_inode->i_mapping; 1019 f->f_mapping = wrf->f_path.dentry->d_inode->i_mapping;
1020 1020
1021 f->f_pos = 0; 1021 f->f_pos = 0;
1022 f->f_flags = O_RDONLY; 1022 f->f_flags = O_RDONLY | (flags & O_NONBLOCK);
1023 f->f_op = &read_pipe_fops; 1023 f->f_op = &read_pipe_fops;
1024 f->f_mode = FMODE_READ; 1024 f->f_mode = FMODE_READ;
1025 f->f_version = 0; 1025 f->f_version = 0;
@@ -1027,26 +1027,29 @@ struct file *create_read_pipe(struct file *wrf)
1027 return f; 1027 return f;
1028} 1028}
1029 1029
1030int do_pipe(int *fd) 1030int do_pipe_flags(int *fd, int flags)
1031{ 1031{
1032 struct file *fw, *fr; 1032 struct file *fw, *fr;
1033 int error; 1033 int error;
1034 int fdw, fdr; 1034 int fdw, fdr;
1035 1035
1036 fw = create_write_pipe(); 1036 if (flags & ~(O_CLOEXEC | O_NONBLOCK))
1037 return -EINVAL;
1038
1039 fw = create_write_pipe(flags);
1037 if (IS_ERR(fw)) 1040 if (IS_ERR(fw))
1038 return PTR_ERR(fw); 1041 return PTR_ERR(fw);
1039 fr = create_read_pipe(fw); 1042 fr = create_read_pipe(fw, flags);
1040 error = PTR_ERR(fr); 1043 error = PTR_ERR(fr);
1041 if (IS_ERR(fr)) 1044 if (IS_ERR(fr))
1042 goto err_write_pipe; 1045 goto err_write_pipe;
1043 1046
1044 error = get_unused_fd(); 1047 error = get_unused_fd_flags(flags);
1045 if (error < 0) 1048 if (error < 0)
1046 goto err_read_pipe; 1049 goto err_read_pipe;
1047 fdr = error; 1050 fdr = error;
1048 1051
1049 error = get_unused_fd(); 1052 error = get_unused_fd_flags(flags);
1050 if (error < 0) 1053 if (error < 0)
1051 goto err_fdr; 1054 goto err_fdr;
1052 fdw = error; 1055 fdw = error;
@@ -1074,16 +1077,21 @@ int do_pipe(int *fd)
1074 return error; 1077 return error;
1075} 1078}
1076 1079
1080int do_pipe(int *fd)
1081{
1082 return do_pipe_flags(fd, 0);
1083}
1084
1077/* 1085/*
1078 * sys_pipe() is the normal C calling standard for creating 1086 * sys_pipe() is the normal C calling standard for creating
1079 * a pipe. It's not the way Unix traditionally does this, though. 1087 * a pipe. It's not the way Unix traditionally does this, though.
1080 */ 1088 */
1081asmlinkage long __weak sys_pipe(int __user *fildes) 1089asmlinkage long __weak sys_pipe2(int __user *fildes, int flags)
1082{ 1090{
1083 int fd[2]; 1091 int fd[2];
1084 int error; 1092 int error;
1085 1093
1086 error = do_pipe(fd); 1094 error = do_pipe_flags(fd, flags);
1087 if (!error) { 1095 if (!error) {
1088 if (copy_to_user(fildes, fd, sizeof(fd))) { 1096 if (copy_to_user(fildes, fd, sizeof(fd))) {
1089 sys_close(fd[0]); 1097 sys_close(fd[0]);
@@ -1094,6 +1102,11 @@ asmlinkage long __weak sys_pipe(int __user *fildes)
1094 return error; 1102 return error;
1095} 1103}
1096 1104
1105asmlinkage long __weak sys_pipe(int __user *fildes)
1106{
1107 return sys_pipe2(fildes, 0);
1108}
1109
1097/* 1110/*
1098 * pipefs should _never_ be mounted by userland - too much of security hassle, 1111 * pipefs should _never_ be mounted by userland - too much of security hassle,
1099 * no real gain from having the whole whorehouse mounted. So we don't need 1112 * no real gain from having the whole whorehouse mounted. So we don't need
diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig
new file mode 100644
index 000000000000..73cd7a418f06
--- /dev/null
+++ b/fs/proc/Kconfig
@@ -0,0 +1,59 @@
1config PROC_FS
2 bool "/proc file system support" if EMBEDDED
3 default y
4 help
5 This is a virtual file system providing information about the status
6 of the system. "Virtual" means that it doesn't take up any space on
7 your hard disk: the files are created on the fly by the kernel when
8 you try to access them. Also, you cannot read the files with older
9 version of the program less: you need to use more or cat.
10
11 It's totally cool; for example, "cat /proc/interrupts" gives
12 information about what the different IRQs are used for at the moment
13 (there is a small number of Interrupt ReQuest lines in your computer
14 that are used by the attached devices to gain the CPU's attention --
15 often a source of trouble if two devices are mistakenly configured
16 to use the same IRQ). The program procinfo to display some
17 information about your system gathered from the /proc file system.
18
19 Before you can use the /proc file system, it has to be mounted,
20 meaning it has to be given a location in the directory hierarchy.
21 That location should be /proc. A command such as "mount -t proc proc
22 /proc" or the equivalent line in /etc/fstab does the job.
23
24 The /proc file system is explained in the file
25 <file:Documentation/filesystems/proc.txt> and on the proc(5) manpage
26 ("man 5 proc").
27
28 This option will enlarge your kernel by about 67 KB. Several
29 programs depend on this, so everyone should say Y here.
30
31config PROC_KCORE
32 bool "/proc/kcore support" if !ARM
33 depends on PROC_FS && MMU
34
35config PROC_VMCORE
36 bool "/proc/vmcore support (EXPERIMENTAL)"
37 depends on PROC_FS && CRASH_DUMP
38 default y
39 help
40 Exports the dump image of crashed kernel in ELF format.
41
42config PROC_SYSCTL
43 bool "Sysctl support (/proc/sys)" if EMBEDDED
44 depends on PROC_FS
45 select SYSCTL
46 default y
47 ---help---
48 The sysctl interface provides a means of dynamically changing
49 certain kernel parameters and variables on the fly without requiring
50 a recompile of the kernel or reboot of the system. The primary
51 interface is through /proc/sys. If you say Y here a tree of
52 modifiable sysctl entries will be generated beneath the
53 /proc/sys directory. They are explained in the files
54 in <file:Documentation/sysctl/>. Note that enabling this
55 option will enlarge the kernel by at least 8 KB.
56
57 As it is generally a good thing, you should say Y here unless
58 building a kernel for install/rescue disks or your system is very
59 limited in memory.
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 58c3e6a8e15e..a891fe4cb43b 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2376,29 +2376,82 @@ static int proc_base_fill_cache(struct file *filp, void *dirent,
2376} 2376}
2377 2377
2378#ifdef CONFIG_TASK_IO_ACCOUNTING 2378#ifdef CONFIG_TASK_IO_ACCOUNTING
2379static int proc_pid_io_accounting(struct task_struct *task, char *buffer) 2379static int do_io_accounting(struct task_struct *task, char *buffer, int whole)
2380{ 2380{
2381 u64 rchar, wchar, syscr, syscw;
2382 struct task_io_accounting ioac;
2383
2384 if (!whole) {
2385 rchar = task->rchar;
2386 wchar = task->wchar;
2387 syscr = task->syscr;
2388 syscw = task->syscw;
2389 memcpy(&ioac, &task->ioac, sizeof(ioac));
2390 } else {
2391 unsigned long flags;
2392 struct task_struct *t = task;
2393 rchar = wchar = syscr = syscw = 0;
2394 memset(&ioac, 0, sizeof(ioac));
2395
2396 rcu_read_lock();
2397 do {
2398 rchar += t->rchar;
2399 wchar += t->wchar;
2400 syscr += t->syscr;
2401 syscw += t->syscw;
2402
2403 ioac.read_bytes += t->ioac.read_bytes;
2404 ioac.write_bytes += t->ioac.write_bytes;
2405 ioac.cancelled_write_bytes +=
2406 t->ioac.cancelled_write_bytes;
2407 t = next_thread(t);
2408 } while (t != task);
2409 rcu_read_unlock();
2410
2411 if (lock_task_sighand(task, &flags)) {
2412 struct signal_struct *sig = task->signal;
2413
2414 rchar += sig->rchar;
2415 wchar += sig->wchar;
2416 syscr += sig->syscr;
2417 syscw += sig->syscw;
2418
2419 ioac.read_bytes += sig->ioac.read_bytes;
2420 ioac.write_bytes += sig->ioac.write_bytes;
2421 ioac.cancelled_write_bytes +=
2422 sig->ioac.cancelled_write_bytes;
2423
2424 unlock_task_sighand(task, &flags);
2425 }
2426 }
2427
2381 return sprintf(buffer, 2428 return sprintf(buffer,
2382#ifdef CONFIG_TASK_XACCT
2383 "rchar: %llu\n" 2429 "rchar: %llu\n"
2384 "wchar: %llu\n" 2430 "wchar: %llu\n"
2385 "syscr: %llu\n" 2431 "syscr: %llu\n"
2386 "syscw: %llu\n" 2432 "syscw: %llu\n"
2387#endif
2388 "read_bytes: %llu\n" 2433 "read_bytes: %llu\n"
2389 "write_bytes: %llu\n" 2434 "write_bytes: %llu\n"
2390 "cancelled_write_bytes: %llu\n", 2435 "cancelled_write_bytes: %llu\n",
2391#ifdef CONFIG_TASK_XACCT 2436 (unsigned long long)rchar,
2392 (unsigned long long)task->rchar, 2437 (unsigned long long)wchar,
2393 (unsigned long long)task->wchar, 2438 (unsigned long long)syscr,
2394 (unsigned long long)task->syscr, 2439 (unsigned long long)syscw,
2395 (unsigned long long)task->syscw, 2440 (unsigned long long)ioac.read_bytes,
2396#endif 2441 (unsigned long long)ioac.write_bytes,
2397 (unsigned long long)task->ioac.read_bytes, 2442 (unsigned long long)ioac.cancelled_write_bytes);
2398 (unsigned long long)task->ioac.write_bytes, 2443}
2399 (unsigned long long)task->ioac.cancelled_write_bytes); 2444
2445static int proc_tid_io_accounting(struct task_struct *task, char *buffer)
2446{
2447 return do_io_accounting(task, buffer, 0);
2400} 2448}
2401#endif 2449
2450static int proc_tgid_io_accounting(struct task_struct *task, char *buffer)
2451{
2452 return do_io_accounting(task, buffer, 1);
2453}
2454#endif /* CONFIG_TASK_IO_ACCOUNTING */
2402 2455
2403/* 2456/*
2404 * Thread groups 2457 * Thread groups
@@ -2470,7 +2523,7 @@ static const struct pid_entry tgid_base_stuff[] = {
2470 REG("coredump_filter", S_IRUGO|S_IWUSR, coredump_filter), 2523 REG("coredump_filter", S_IRUGO|S_IWUSR, coredump_filter),
2471#endif 2524#endif
2472#ifdef CONFIG_TASK_IO_ACCOUNTING 2525#ifdef CONFIG_TASK_IO_ACCOUNTING
2473 INF("io", S_IRUGO, pid_io_accounting), 2526 INF("io", S_IRUGO, tgid_io_accounting),
2474#endif 2527#endif
2475}; 2528};
2476 2529
@@ -2797,6 +2850,9 @@ static const struct pid_entry tid_base_stuff[] = {
2797#ifdef CONFIG_FAULT_INJECTION 2850#ifdef CONFIG_FAULT_INJECTION
2798 REG("make-it-fail", S_IRUGO|S_IWUSR, fault_inject), 2851 REG("make-it-fail", S_IRUGO|S_IWUSR, fault_inject),
2799#endif 2852#endif
2853#ifdef CONFIG_TASK_IO_ACCOUNTING
2854 INF("io", S_IRUGO, tid_io_accounting),
2855#endif
2800}; 2856};
2801 2857
2802static int proc_tid_base_readdir(struct file * filp, 2858static int proc_tid_base_readdir(struct file * filp,
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 43e54e86cefd..bc0a0dd2d844 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -597,6 +597,7 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent,
597 ent->pde_users = 0; 597 ent->pde_users = 0;
598 spin_lock_init(&ent->pde_unload_lock); 598 spin_lock_init(&ent->pde_unload_lock);
599 ent->pde_unload_completion = NULL; 599 ent->pde_unload_completion = NULL;
600 INIT_LIST_HEAD(&ent->pde_openers);
600 out: 601 out:
601 return ent; 602 return ent;
602} 603}
@@ -789,6 +790,19 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
789 spin_unlock(&de->pde_unload_lock); 790 spin_unlock(&de->pde_unload_lock);
790 791
791continue_removing: 792continue_removing:
793 spin_lock(&de->pde_unload_lock);
794 while (!list_empty(&de->pde_openers)) {
795 struct pde_opener *pdeo;
796
797 pdeo = list_first_entry(&de->pde_openers, struct pde_opener, lh);
798 list_del(&pdeo->lh);
799 spin_unlock(&de->pde_unload_lock);
800 pdeo->release(pdeo->inode, pdeo->file);
801 kfree(pdeo);
802 spin_lock(&de->pde_unload_lock);
803 }
804 spin_unlock(&de->pde_unload_lock);
805
792 if (S_ISDIR(de->mode)) 806 if (S_ISDIR(de->mode))
793 parent->nlink--; 807 parent->nlink--;
794 de->nlink = 0; 808 de->nlink = 0;
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index b08d10017911..02eca2ed9dd7 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -111,27 +111,25 @@ int __init proc_init_inodecache(void)
111 return 0; 111 return 0;
112} 112}
113 113
114static int proc_remount(struct super_block *sb, int *flags, char *data)
115{
116 *flags |= MS_NODIRATIME;
117 return 0;
118}
119
120static const struct super_operations proc_sops = { 114static const struct super_operations proc_sops = {
121 .alloc_inode = proc_alloc_inode, 115 .alloc_inode = proc_alloc_inode,
122 .destroy_inode = proc_destroy_inode, 116 .destroy_inode = proc_destroy_inode,
123 .drop_inode = generic_delete_inode, 117 .drop_inode = generic_delete_inode,
124 .delete_inode = proc_delete_inode, 118 .delete_inode = proc_delete_inode,
125 .statfs = simple_statfs, 119 .statfs = simple_statfs,
126 .remount_fs = proc_remount,
127}; 120};
128 121
129static void pde_users_dec(struct proc_dir_entry *pde) 122static void __pde_users_dec(struct proc_dir_entry *pde)
130{ 123{
131 spin_lock(&pde->pde_unload_lock);
132 pde->pde_users--; 124 pde->pde_users--;
133 if (pde->pde_unload_completion && pde->pde_users == 0) 125 if (pde->pde_unload_completion && pde->pde_users == 0)
134 complete(pde->pde_unload_completion); 126 complete(pde->pde_unload_completion);
127}
128
129static void pde_users_dec(struct proc_dir_entry *pde)
130{
131 spin_lock(&pde->pde_unload_lock);
132 __pde_users_dec(pde);
135 spin_unlock(&pde->pde_unload_lock); 133 spin_unlock(&pde->pde_unload_lock);
136} 134}
137 135
@@ -318,36 +316,97 @@ static int proc_reg_open(struct inode *inode, struct file *file)
318 struct proc_dir_entry *pde = PDE(inode); 316 struct proc_dir_entry *pde = PDE(inode);
319 int rv = 0; 317 int rv = 0;
320 int (*open)(struct inode *, struct file *); 318 int (*open)(struct inode *, struct file *);
319 int (*release)(struct inode *, struct file *);
320 struct pde_opener *pdeo;
321
322 /*
323 * What for, you ask? Well, we can have open, rmmod, remove_proc_entry
324 * sequence. ->release won't be called because ->proc_fops will be
325 * cleared. Depending on complexity of ->release, consequences vary.
326 *
327 * We can't wait for mercy when close will be done for real, it's
328 * deadlockable: rmmod foo </proc/foo . So, we're going to do ->release
329 * by hand in remove_proc_entry(). For this, save opener's credentials
330 * for later.
331 */
332 pdeo = kmalloc(sizeof(struct pde_opener), GFP_KERNEL);
333 if (!pdeo)
334 return -ENOMEM;
321 335
322 spin_lock(&pde->pde_unload_lock); 336 spin_lock(&pde->pde_unload_lock);
323 if (!pde->proc_fops) { 337 if (!pde->proc_fops) {
324 spin_unlock(&pde->pde_unload_lock); 338 spin_unlock(&pde->pde_unload_lock);
339 kfree(pdeo);
325 return rv; 340 return rv;
326 } 341 }
327 pde->pde_users++; 342 pde->pde_users++;
328 open = pde->proc_fops->open; 343 open = pde->proc_fops->open;
344 release = pde->proc_fops->release;
329 spin_unlock(&pde->pde_unload_lock); 345 spin_unlock(&pde->pde_unload_lock);
330 346
331 if (open) 347 if (open)
332 rv = open(inode, file); 348 rv = open(inode, file);
333 349
334 pde_users_dec(pde); 350 spin_lock(&pde->pde_unload_lock);
351 if (rv == 0 && release) {
352 /* To know what to release. */
353 pdeo->inode = inode;
354 pdeo->file = file;
355 /* Strictly for "too late" ->release in proc_reg_release(). */
356 pdeo->release = release;
357 list_add(&pdeo->lh, &pde->pde_openers);
358 } else
359 kfree(pdeo);
360 __pde_users_dec(pde);
361 spin_unlock(&pde->pde_unload_lock);
335 return rv; 362 return rv;
336} 363}
337 364
365static struct pde_opener *find_pde_opener(struct proc_dir_entry *pde,
366 struct inode *inode, struct file *file)
367{
368 struct pde_opener *pdeo;
369
370 list_for_each_entry(pdeo, &pde->pde_openers, lh) {
371 if (pdeo->inode == inode && pdeo->file == file)
372 return pdeo;
373 }
374 return NULL;
375}
376
338static int proc_reg_release(struct inode *inode, struct file *file) 377static int proc_reg_release(struct inode *inode, struct file *file)
339{ 378{
340 struct proc_dir_entry *pde = PDE(inode); 379 struct proc_dir_entry *pde = PDE(inode);
341 int rv = 0; 380 int rv = 0;
342 int (*release)(struct inode *, struct file *); 381 int (*release)(struct inode *, struct file *);
382 struct pde_opener *pdeo;
343 383
344 spin_lock(&pde->pde_unload_lock); 384 spin_lock(&pde->pde_unload_lock);
385 pdeo = find_pde_opener(pde, inode, file);
345 if (!pde->proc_fops) { 386 if (!pde->proc_fops) {
346 spin_unlock(&pde->pde_unload_lock); 387 /*
388 * Can't simply exit, __fput() will think that everything is OK,
389 * and move on to freeing struct file. remove_proc_entry() will
390 * find slacker in opener's list and will try to do non-trivial
391 * things with struct file. Therefore, remove opener from list.
392 *
393 * But if opener is removed from list, who will ->release it?
394 */
395 if (pdeo) {
396 list_del(&pdeo->lh);
397 spin_unlock(&pde->pde_unload_lock);
398 rv = pdeo->release(inode, file);
399 kfree(pdeo);
400 } else
401 spin_unlock(&pde->pde_unload_lock);
347 return rv; 402 return rv;
348 } 403 }
349 pde->pde_users++; 404 pde->pde_users++;
350 release = pde->proc_fops->release; 405 release = pde->proc_fops->release;
406 if (pdeo) {
407 list_del(&pdeo->lh);
408 kfree(pdeo);
409 }
351 spin_unlock(&pde->pde_unload_lock); 410 spin_unlock(&pde->pde_unload_lock);
352 411
353 if (release) 412 if (release)
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 28cbca805905..442202314d53 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -63,6 +63,7 @@ extern const struct file_operations proc_smaps_operations;
63extern const struct file_operations proc_clear_refs_operations; 63extern const struct file_operations proc_clear_refs_operations;
64extern const struct file_operations proc_pagemap_operations; 64extern const struct file_operations proc_pagemap_operations;
65extern const struct file_operations proc_net_operations; 65extern const struct file_operations proc_net_operations;
66extern const struct file_operations proc_kmsg_operations;
66extern const struct inode_operations proc_net_inode_operations; 67extern const struct inode_operations proc_net_inode_operations;
67 68
68void free_proc_entry(struct proc_dir_entry *de); 69void free_proc_entry(struct proc_dir_entry *de);
@@ -88,3 +89,10 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *ino,
88 struct dentry *dentry); 89 struct dentry *dentry);
89int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent, 90int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent,
90 filldir_t filldir); 91 filldir_t filldir);
92
93struct pde_opener {
94 struct inode *inode;
95 struct file *file;
96 int (*release)(struct inode *, struct file *);
97 struct list_head lh;
98};
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index e78c81fcf547..c2370c76fb71 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -23,6 +23,10 @@
23 23
24#define CORE_STR "CORE" 24#define CORE_STR "CORE"
25 25
26#ifndef ELF_CORE_EFLAGS
27#define ELF_CORE_EFLAGS 0
28#endif
29
26static int open_kcore(struct inode * inode, struct file * filp) 30static int open_kcore(struct inode * inode, struct file * filp)
27{ 31{
28 return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; 32 return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
@@ -164,11 +168,7 @@ static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff)
164 elf->e_entry = 0; 168 elf->e_entry = 0;
165 elf->e_phoff = sizeof(struct elfhdr); 169 elf->e_phoff = sizeof(struct elfhdr);
166 elf->e_shoff = 0; 170 elf->e_shoff = 0;
167#if defined(CONFIG_H8300) 171 elf->e_flags = ELF_CORE_EFLAGS;
168 elf->e_flags = ELF_FLAGS;
169#else
170 elf->e_flags = 0;
171#endif
172 elf->e_ehsize = sizeof(struct elfhdr); 172 elf->e_ehsize = sizeof(struct elfhdr);
173 elf->e_phentsize= sizeof(struct elf_phdr); 173 elf->e_phentsize= sizeof(struct elf_phdr);
174 elf->e_phnum = nphdr; 174 elf->e_phnum = nphdr;
diff --git a/fs/proc/kmsg.c b/fs/proc/kmsg.c
index ff3b90b56e9d..9fd5df3f40ce 100644
--- a/fs/proc/kmsg.c
+++ b/fs/proc/kmsg.c
@@ -15,6 +15,8 @@
15#include <asm/uaccess.h> 15#include <asm/uaccess.h>
16#include <asm/io.h> 16#include <asm/io.h>
17 17
18#include "internal.h"
19
18extern wait_queue_head_t log_wait; 20extern wait_queue_head_t log_wait;
19 21
20extern int do_syslog(int type, char __user *bug, int count); 22extern int do_syslog(int type, char __user *bug, int count);
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index c652d469dc08..ded969862960 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -232,7 +232,6 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
232#undef K 232#undef K
233} 233}
234 234
235extern const struct seq_operations fragmentation_op;
236static int fragmentation_open(struct inode *inode, struct file *file) 235static int fragmentation_open(struct inode *inode, struct file *file)
237{ 236{
238 (void)inode; 237 (void)inode;
@@ -246,7 +245,6 @@ static const struct file_operations fragmentation_file_operations = {
246 .release = seq_release, 245 .release = seq_release,
247}; 246};
248 247
249extern const struct seq_operations pagetypeinfo_op;
250static int pagetypeinfo_open(struct inode *inode, struct file *file) 248static int pagetypeinfo_open(struct inode *inode, struct file *file)
251{ 249{
252 return seq_open(file, &pagetypeinfo_op); 250 return seq_open(file, &pagetypeinfo_op);
@@ -259,7 +257,6 @@ static const struct file_operations pagetypeinfo_file_ops = {
259 .release = seq_release, 257 .release = seq_release,
260}; 258};
261 259
262extern const struct seq_operations zoneinfo_op;
263static int zoneinfo_open(struct inode *inode, struct file *file) 260static int zoneinfo_open(struct inode *inode, struct file *file)
264{ 261{
265 return seq_open(file, &zoneinfo_op); 262 return seq_open(file, &zoneinfo_op);
@@ -356,7 +353,6 @@ static const struct file_operations proc_devinfo_operations = {
356 .release = seq_release, 353 .release = seq_release,
357}; 354};
358 355
359extern const struct seq_operations vmstat_op;
360static int vmstat_open(struct inode *inode, struct file *file) 356static int vmstat_open(struct inode *inode, struct file *file)
361{ 357{
362 return seq_open(file, &vmstat_op); 358 return seq_open(file, &vmstat_op);
@@ -468,14 +464,25 @@ static const struct file_operations proc_slabstats_operations = {
468#ifdef CONFIG_MMU 464#ifdef CONFIG_MMU
469static int vmalloc_open(struct inode *inode, struct file *file) 465static int vmalloc_open(struct inode *inode, struct file *file)
470{ 466{
471 return seq_open(file, &vmalloc_op); 467 unsigned int *ptr = NULL;
468 int ret;
469
470 if (NUMA_BUILD)
471 ptr = kmalloc(nr_node_ids * sizeof(unsigned int), GFP_KERNEL);
472 ret = seq_open(file, &vmalloc_op);
473 if (!ret) {
474 struct seq_file *m = file->private_data;
475 m->private = ptr;
476 } else
477 kfree(ptr);
478 return ret;
472} 479}
473 480
474static const struct file_operations proc_vmalloc_operations = { 481static const struct file_operations proc_vmalloc_operations = {
475 .open = vmalloc_open, 482 .open = vmalloc_open,
476 .read = seq_read, 483 .read = seq_read,
477 .llseek = seq_lseek, 484 .llseek = seq_lseek,
478 .release = seq_release, 485 .release = seq_release_private,
479}; 486};
480#endif 487#endif
481 488
diff --git a/fs/quota.c b/fs/quota.c
index db1cc9f3c7aa..7f4386ebc23a 100644
--- a/fs/quota.c
+++ b/fs/quota.c
@@ -186,7 +186,7 @@ static void quota_sync_sb(struct super_block *sb, int type)
186 186
187void sync_dquots(struct super_block *sb, int type) 187void sync_dquots(struct super_block *sb, int type)
188{ 188{
189 int cnt, dirty; 189 int cnt;
190 190
191 if (sb) { 191 if (sb) {
192 if (sb->s_qcop->quota_sync) 192 if (sb->s_qcop->quota_sync)
@@ -198,11 +198,17 @@ void sync_dquots(struct super_block *sb, int type)
198restart: 198restart:
199 list_for_each_entry(sb, &super_blocks, s_list) { 199 list_for_each_entry(sb, &super_blocks, s_list) {
200 /* This test just improves performance so it needn't be reliable... */ 200 /* This test just improves performance so it needn't be reliable... */
201 for (cnt = 0, dirty = 0; cnt < MAXQUOTAS; cnt++) 201 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
202 if ((type == cnt || type == -1) && sb_has_quota_enabled(sb, cnt) 202 if (type != -1 && type != cnt)
203 && info_any_dirty(&sb_dqopt(sb)->info[cnt])) 203 continue;
204 dirty = 1; 204 if (!sb_has_quota_enabled(sb, cnt))
205 if (!dirty) 205 continue;
206 if (!info_dirty(&sb_dqopt(sb)->info[cnt]) &&
207 list_empty(&sb_dqopt(sb)->info[cnt].dqi_dirty_list))
208 continue;
209 break;
210 }
211 if (cnt == MAXQUOTAS)
206 continue; 212 continue;
207 sb->s_count++; 213 sb->s_count++;
208 spin_unlock(&sb_lock); 214 spin_unlock(&sb_lock);
diff --git a/fs/quota_v1.c b/fs/quota_v1.c
index a6cf9269105c..5ae15b13eeb0 100644
--- a/fs/quota_v1.c
+++ b/fs/quota_v1.c
@@ -1,6 +1,7 @@
1#include <linux/errno.h> 1#include <linux/errno.h>
2#include <linux/fs.h> 2#include <linux/fs.h>
3#include <linux/quota.h> 3#include <linux/quota.h>
4#include <linux/quotaops.h>
4#include <linux/dqblk_v1.h> 5#include <linux/dqblk_v1.h>
5#include <linux/quotaio_v1.h> 6#include <linux/quotaio_v1.h>
6#include <linux/kernel.h> 7#include <linux/kernel.h>
diff --git a/fs/quota_v2.c b/fs/quota_v2.c
index 234ada903633..b53827dc02d9 100644
--- a/fs/quota_v2.c
+++ b/fs/quota_v2.c
@@ -11,6 +11,7 @@
11#include <linux/init.h> 11#include <linux/init.h>
12#include <linux/module.h> 12#include <linux/module.h>
13#include <linux/slab.h> 13#include <linux/slab.h>
14#include <linux/quotaops.h>
14 15
15#include <asm/byteorder.h> 16#include <asm/byteorder.h>
16 17
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index e396b2fa4743..c8f60ee183b5 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -34,15 +34,10 @@
34** from within kupdate, it will ignore the immediate flag 34** from within kupdate, it will ignore the immediate flag
35*/ 35*/
36 36
37#include <asm/uaccess.h>
38#include <asm/system.h>
39
40#include <linux/time.h> 37#include <linux/time.h>
41#include <linux/semaphore.h> 38#include <linux/semaphore.h>
42
43#include <linux/vmalloc.h> 39#include <linux/vmalloc.h>
44#include <linux/reiserfs_fs.h> 40#include <linux/reiserfs_fs.h>
45
46#include <linux/kernel.h> 41#include <linux/kernel.h>
47#include <linux/errno.h> 42#include <linux/errno.h>
48#include <linux/fcntl.h> 43#include <linux/fcntl.h>
@@ -54,6 +49,9 @@
54#include <linux/writeback.h> 49#include <linux/writeback.h>
55#include <linux/blkdev.h> 50#include <linux/blkdev.h>
56#include <linux/backing-dev.h> 51#include <linux/backing-dev.h>
52#include <linux/uaccess.h>
53
54#include <asm/system.h>
57 55
58/* gets a struct reiserfs_journal_list * from a list head */ 56/* gets a struct reiserfs_journal_list * from a list head */
59#define JOURNAL_LIST_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \ 57#define JOURNAL_LIST_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \
@@ -558,13 +556,13 @@ static inline void insert_journal_hash(struct reiserfs_journal_cnode **table,
558static inline void lock_journal(struct super_block *p_s_sb) 556static inline void lock_journal(struct super_block *p_s_sb)
559{ 557{
560 PROC_INFO_INC(p_s_sb, journal.lock_journal); 558 PROC_INFO_INC(p_s_sb, journal.lock_journal);
561 down(&SB_JOURNAL(p_s_sb)->j_lock); 559 mutex_lock(&SB_JOURNAL(p_s_sb)->j_mutex);
562} 560}
563 561
564/* unlock the current transaction */ 562/* unlock the current transaction */
565static inline void unlock_journal(struct super_block *p_s_sb) 563static inline void unlock_journal(struct super_block *p_s_sb)
566{ 564{
567 up(&SB_JOURNAL(p_s_sb)->j_lock); 565 mutex_unlock(&SB_JOURNAL(p_s_sb)->j_mutex);
568} 566}
569 567
570static inline void get_journal_list(struct reiserfs_journal_list *jl) 568static inline void get_journal_list(struct reiserfs_journal_list *jl)
@@ -1045,9 +1043,9 @@ static int flush_commit_list(struct super_block *s,
1045 } 1043 }
1046 1044
1047 /* make sure nobody is trying to flush this one at the same time */ 1045 /* make sure nobody is trying to flush this one at the same time */
1048 down(&jl->j_commit_lock); 1046 mutex_lock(&jl->j_commit_mutex);
1049 if (!journal_list_still_alive(s, trans_id)) { 1047 if (!journal_list_still_alive(s, trans_id)) {
1050 up(&jl->j_commit_lock); 1048 mutex_unlock(&jl->j_commit_mutex);
1051 goto put_jl; 1049 goto put_jl;
1052 } 1050 }
1053 BUG_ON(jl->j_trans_id == 0); 1051 BUG_ON(jl->j_trans_id == 0);
@@ -1057,7 +1055,7 @@ static int flush_commit_list(struct super_block *s,
1057 if (flushall) { 1055 if (flushall) {
1058 atomic_set(&(jl->j_older_commits_done), 1); 1056 atomic_set(&(jl->j_older_commits_done), 1);
1059 } 1057 }
1060 up(&jl->j_commit_lock); 1058 mutex_unlock(&jl->j_commit_mutex);
1061 goto put_jl; 1059 goto put_jl;
1062 } 1060 }
1063 1061
@@ -1181,7 +1179,7 @@ static int flush_commit_list(struct super_block *s,
1181 if (flushall) { 1179 if (flushall) {
1182 atomic_set(&(jl->j_older_commits_done), 1); 1180 atomic_set(&(jl->j_older_commits_done), 1);
1183 } 1181 }
1184 up(&jl->j_commit_lock); 1182 mutex_unlock(&jl->j_commit_mutex);
1185 put_jl: 1183 put_jl:
1186 put_journal_list(s, jl); 1184 put_journal_list(s, jl);
1187 1185
@@ -1411,8 +1409,8 @@ static int flush_journal_list(struct super_block *s,
1411 1409
1412 /* if flushall == 0, the lock is already held */ 1410 /* if flushall == 0, the lock is already held */
1413 if (flushall) { 1411 if (flushall) {
1414 down(&journal->j_flush_sem); 1412 mutex_lock(&journal->j_flush_mutex);
1415 } else if (!down_trylock(&journal->j_flush_sem)) { 1413 } else if (mutex_trylock(&journal->j_flush_mutex)) {
1416 BUG(); 1414 BUG();
1417 } 1415 }
1418 1416
@@ -1642,7 +1640,7 @@ static int flush_journal_list(struct super_block *s,
1642 jl->j_state = 0; 1640 jl->j_state = 0;
1643 put_journal_list(s, jl); 1641 put_journal_list(s, jl);
1644 if (flushall) 1642 if (flushall)
1645 up(&journal->j_flush_sem); 1643 mutex_unlock(&journal->j_flush_mutex);
1646 put_fs_excl(); 1644 put_fs_excl();
1647 return err; 1645 return err;
1648} 1646}
@@ -1772,12 +1770,12 @@ static int kupdate_transactions(struct super_block *s,
1772 struct reiserfs_journal *journal = SB_JOURNAL(s); 1770 struct reiserfs_journal *journal = SB_JOURNAL(s);
1773 chunk.nr = 0; 1771 chunk.nr = 0;
1774 1772
1775 down(&journal->j_flush_sem); 1773 mutex_lock(&journal->j_flush_mutex);
1776 if (!journal_list_still_alive(s, orig_trans_id)) { 1774 if (!journal_list_still_alive(s, orig_trans_id)) {
1777 goto done; 1775 goto done;
1778 } 1776 }
1779 1777
1780 /* we've got j_flush_sem held, nobody is going to delete any 1778 /* we've got j_flush_mutex held, nobody is going to delete any
1781 * of these lists out from underneath us 1779 * of these lists out from underneath us
1782 */ 1780 */
1783 while ((num_trans && transactions_flushed < num_trans) || 1781 while ((num_trans && transactions_flushed < num_trans) ||
@@ -1812,7 +1810,7 @@ static int kupdate_transactions(struct super_block *s,
1812 } 1810 }
1813 1811
1814 done: 1812 done:
1815 up(&journal->j_flush_sem); 1813 mutex_unlock(&journal->j_flush_mutex);
1816 return ret; 1814 return ret;
1817} 1815}
1818 1816
@@ -2556,7 +2554,7 @@ static struct reiserfs_journal_list *alloc_journal_list(struct super_block *s)
2556 INIT_LIST_HEAD(&jl->j_working_list); 2554 INIT_LIST_HEAD(&jl->j_working_list);
2557 INIT_LIST_HEAD(&jl->j_tail_bh_list); 2555 INIT_LIST_HEAD(&jl->j_tail_bh_list);
2558 INIT_LIST_HEAD(&jl->j_bh_list); 2556 INIT_LIST_HEAD(&jl->j_bh_list);
2559 sema_init(&jl->j_commit_lock, 1); 2557 mutex_init(&jl->j_commit_mutex);
2560 SB_JOURNAL(s)->j_num_lists++; 2558 SB_JOURNAL(s)->j_num_lists++;
2561 get_journal_list(jl); 2559 get_journal_list(jl);
2562 return jl; 2560 return jl;
@@ -2837,8 +2835,8 @@ int journal_init(struct super_block *p_s_sb, const char *j_dev_name,
2837 journal->j_last = NULL; 2835 journal->j_last = NULL;
2838 journal->j_first = NULL; 2836 journal->j_first = NULL;
2839 init_waitqueue_head(&(journal->j_join_wait)); 2837 init_waitqueue_head(&(journal->j_join_wait));
2840 sema_init(&journal->j_lock, 1); 2838 mutex_init(&journal->j_mutex);
2841 sema_init(&journal->j_flush_sem, 1); 2839 mutex_init(&journal->j_flush_mutex);
2842 2840
2843 journal->j_trans_id = 10; 2841 journal->j_trans_id = 10;
2844 journal->j_mount_id = 10; 2842 journal->j_mount_id = 10;
@@ -4030,7 +4028,7 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
4030 * the new transaction is fully setup, and we've already flushed the 4028 * the new transaction is fully setup, and we've already flushed the
4031 * ordered bh list 4029 * ordered bh list
4032 */ 4030 */
4033 down(&jl->j_commit_lock); 4031 mutex_lock(&jl->j_commit_mutex);
4034 4032
4035 /* save the transaction id in case we need to commit it later */ 4033 /* save the transaction id in case we need to commit it later */
4036 commit_trans_id = jl->j_trans_id; 4034 commit_trans_id = jl->j_trans_id;
@@ -4196,7 +4194,7 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
4196 lock_kernel(); 4194 lock_kernel();
4197 } 4195 }
4198 BUG_ON(!list_empty(&jl->j_tail_bh_list)); 4196 BUG_ON(!list_empty(&jl->j_tail_bh_list));
4199 up(&jl->j_commit_lock); 4197 mutex_unlock(&jl->j_commit_mutex);
4200 4198
4201 /* honor the flush wishes from the caller, simple commits can 4199 /* honor the flush wishes from the caller, simple commits can
4202 ** be done outside the journal lock, they are done below 4200 ** be done outside the journal lock, they are done below
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 1d40f2bd1970..2ec748ba0bd3 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -22,6 +22,7 @@
22#include <linux/blkdev.h> 22#include <linux/blkdev.h>
23#include <linux/buffer_head.h> 23#include <linux/buffer_head.h>
24#include <linux/exportfs.h> 24#include <linux/exportfs.h>
25#include <linux/quotaops.h>
25#include <linux/vfs.h> 26#include <linux/vfs.h>
26#include <linux/mnt_namespace.h> 27#include <linux/mnt_namespace.h>
27#include <linux/mount.h> 28#include <linux/mount.h>
@@ -182,7 +183,7 @@ static int finish_unfinished(struct super_block *s)
182 int ret = reiserfs_quota_on_mount(s, i); 183 int ret = reiserfs_quota_on_mount(s, i);
183 if (ret < 0) 184 if (ret < 0)
184 reiserfs_warning(s, 185 reiserfs_warning(s,
185 "reiserfs: cannot turn on journalled quota: error %d", 186 "reiserfs: cannot turn on journaled quota: error %d",
186 ret); 187 ret);
187 } 188 }
188 } 189 }
@@ -876,7 +877,9 @@ static int reiserfs_parse_options(struct super_block *s, char *options, /* strin
876 mount options were selected. */ 877 mount options were selected. */
877 unsigned long *blocks, /* strtol-ed from NNN of resize=NNN */ 878 unsigned long *blocks, /* strtol-ed from NNN of resize=NNN */
878 char **jdev_name, 879 char **jdev_name,
879 unsigned int *commit_max_age) 880 unsigned int *commit_max_age,
881 char **qf_names,
882 unsigned int *qfmt)
880{ 883{
881 int c; 884 int c;
882 char *arg = NULL; 885 char *arg = NULL;
@@ -992,9 +995,11 @@ static int reiserfs_parse_options(struct super_block *s, char *options, /* strin
992 if (c == 'u' || c == 'g') { 995 if (c == 'u' || c == 'g') {
993 int qtype = c == 'u' ? USRQUOTA : GRPQUOTA; 996 int qtype = c == 'u' ? USRQUOTA : GRPQUOTA;
994 997
995 if (sb_any_quota_enabled(s)) { 998 if ((sb_any_quota_enabled(s) ||
999 sb_any_quota_suspended(s)) &&
1000 (!*arg != !REISERFS_SB(s)->s_qf_names[qtype])) {
996 reiserfs_warning(s, 1001 reiserfs_warning(s,
997 "reiserfs_parse_options: cannot change journalled quota options when quota turned on."); 1002 "reiserfs_parse_options: cannot change journaled quota options when quota turned on.");
998 return 0; 1003 return 0;
999 } 1004 }
1000 if (*arg) { /* Some filename specified? */ 1005 if (*arg) { /* Some filename specified? */
@@ -1011,46 +1016,54 @@ static int reiserfs_parse_options(struct super_block *s, char *options, /* strin
1011 "reiserfs_parse_options: quotafile must be on filesystem root."); 1016 "reiserfs_parse_options: quotafile must be on filesystem root.");
1012 return 0; 1017 return 0;
1013 } 1018 }
1014 REISERFS_SB(s)->s_qf_names[qtype] = 1019 qf_names[qtype] =
1015 kmalloc(strlen(arg) + 1, GFP_KERNEL); 1020 kmalloc(strlen(arg) + 1, GFP_KERNEL);
1016 if (!REISERFS_SB(s)->s_qf_names[qtype]) { 1021 if (!qf_names[qtype]) {
1017 reiserfs_warning(s, 1022 reiserfs_warning(s,
1018 "reiserfs_parse_options: not enough memory for storing quotafile name."); 1023 "reiserfs_parse_options: not enough memory for storing quotafile name.");
1019 return 0; 1024 return 0;
1020 } 1025 }
1021 strcpy(REISERFS_SB(s)->s_qf_names[qtype], arg); 1026 strcpy(qf_names[qtype], arg);
1022 *mount_options |= 1 << REISERFS_QUOTA; 1027 *mount_options |= 1 << REISERFS_QUOTA;
1023 } else { 1028 } else {
1024 kfree(REISERFS_SB(s)->s_qf_names[qtype]); 1029 if (qf_names[qtype] !=
1025 REISERFS_SB(s)->s_qf_names[qtype] = NULL; 1030 REISERFS_SB(s)->s_qf_names[qtype])
1031 kfree(qf_names[qtype]);
1032 qf_names[qtype] = NULL;
1026 } 1033 }
1027 } 1034 }
1028 if (c == 'f') { 1035 if (c == 'f') {
1029 if (!strcmp(arg, "vfsold")) 1036 if (!strcmp(arg, "vfsold"))
1030 REISERFS_SB(s)->s_jquota_fmt = QFMT_VFS_OLD; 1037 *qfmt = QFMT_VFS_OLD;
1031 else if (!strcmp(arg, "vfsv0")) 1038 else if (!strcmp(arg, "vfsv0"))
1032 REISERFS_SB(s)->s_jquota_fmt = QFMT_VFS_V0; 1039 *qfmt = QFMT_VFS_V0;
1033 else { 1040 else {
1034 reiserfs_warning(s, 1041 reiserfs_warning(s,
1035 "reiserfs_parse_options: unknown quota format specified."); 1042 "reiserfs_parse_options: unknown quota format specified.");
1036 return 0; 1043 return 0;
1037 } 1044 }
1045 if ((sb_any_quota_enabled(s) ||
1046 sb_any_quota_suspended(s)) &&
1047 *qfmt != REISERFS_SB(s)->s_jquota_fmt) {
1048 reiserfs_warning(s,
1049 "reiserfs_parse_options: cannot change journaled quota options when quota turned on.");
1050 return 0;
1051 }
1038 } 1052 }
1039#else 1053#else
1040 if (c == 'u' || c == 'g' || c == 'f') { 1054 if (c == 'u' || c == 'g' || c == 'f') {
1041 reiserfs_warning(s, 1055 reiserfs_warning(s,
1042 "reiserfs_parse_options: journalled quota options not supported."); 1056 "reiserfs_parse_options: journaled quota options not supported.");
1043 return 0; 1057 return 0;
1044 } 1058 }
1045#endif 1059#endif
1046 } 1060 }
1047 1061
1048#ifdef CONFIG_QUOTA 1062#ifdef CONFIG_QUOTA
1049 if (!REISERFS_SB(s)->s_jquota_fmt 1063 if (!REISERFS_SB(s)->s_jquota_fmt && !*qfmt
1050 && (REISERFS_SB(s)->s_qf_names[USRQUOTA] 1064 && (qf_names[USRQUOTA] || qf_names[GRPQUOTA])) {
1051 || REISERFS_SB(s)->s_qf_names[GRPQUOTA])) {
1052 reiserfs_warning(s, 1065 reiserfs_warning(s,
1053 "reiserfs_parse_options: journalled quota format not specified."); 1066 "reiserfs_parse_options: journaled quota format not specified.");
1054 return 0; 1067 return 0;
1055 } 1068 }
1056 /* This checking is not precise wrt the quota type but for our purposes it is sufficient */ 1069 /* This checking is not precise wrt the quota type but for our purposes it is sufficient */
@@ -1130,6 +1143,21 @@ static void handle_attrs(struct super_block *s)
1130 } 1143 }
1131} 1144}
1132 1145
1146#ifdef CONFIG_QUOTA
1147static void handle_quota_files(struct super_block *s, char **qf_names,
1148 unsigned int *qfmt)
1149{
1150 int i;
1151
1152 for (i = 0; i < MAXQUOTAS; i++) {
1153 if (qf_names[i] != REISERFS_SB(s)->s_qf_names[i])
1154 kfree(REISERFS_SB(s)->s_qf_names[i]);
1155 REISERFS_SB(s)->s_qf_names[i] = qf_names[i];
1156 }
1157 REISERFS_SB(s)->s_jquota_fmt = *qfmt;
1158}
1159#endif
1160
1133static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) 1161static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
1134{ 1162{
1135 struct reiserfs_super_block *rs; 1163 struct reiserfs_super_block *rs;
@@ -1141,23 +1169,30 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
1141 struct reiserfs_journal *journal = SB_JOURNAL(s); 1169 struct reiserfs_journal *journal = SB_JOURNAL(s);
1142 char *new_opts = kstrdup(arg, GFP_KERNEL); 1170 char *new_opts = kstrdup(arg, GFP_KERNEL);
1143 int err; 1171 int err;
1172 char *qf_names[MAXQUOTAS];
1173 unsigned int qfmt = 0;
1144#ifdef CONFIG_QUOTA 1174#ifdef CONFIG_QUOTA
1145 int i; 1175 int i;
1176
1177 memcpy(qf_names, REISERFS_SB(s)->s_qf_names, sizeof(qf_names));
1146#endif 1178#endif
1147 1179
1148 rs = SB_DISK_SUPER_BLOCK(s); 1180 rs = SB_DISK_SUPER_BLOCK(s);
1149 1181
1150 if (!reiserfs_parse_options 1182 if (!reiserfs_parse_options
1151 (s, arg, &mount_options, &blocks, NULL, &commit_max_age)) { 1183 (s, arg, &mount_options, &blocks, NULL, &commit_max_age,
1184 qf_names, &qfmt)) {
1152#ifdef CONFIG_QUOTA 1185#ifdef CONFIG_QUOTA
1153 for (i = 0; i < MAXQUOTAS; i++) { 1186 for (i = 0; i < MAXQUOTAS; i++)
1154 kfree(REISERFS_SB(s)->s_qf_names[i]); 1187 if (qf_names[i] != REISERFS_SB(s)->s_qf_names[i])
1155 REISERFS_SB(s)->s_qf_names[i] = NULL; 1188 kfree(qf_names[i]);
1156 }
1157#endif 1189#endif
1158 err = -EINVAL; 1190 err = -EINVAL;
1159 goto out_err; 1191 goto out_err;
1160 } 1192 }
1193#ifdef CONFIG_QUOTA
1194 handle_quota_files(s, qf_names, &qfmt);
1195#endif
1161 1196
1162 handle_attrs(s); 1197 handle_attrs(s);
1163 1198
@@ -1570,6 +1605,8 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1570 char *jdev_name; 1605 char *jdev_name;
1571 struct reiserfs_sb_info *sbi; 1606 struct reiserfs_sb_info *sbi;
1572 int errval = -EINVAL; 1607 int errval = -EINVAL;
1608 char *qf_names[MAXQUOTAS] = {};
1609 unsigned int qfmt = 0;
1573 1610
1574 save_mount_options(s, data); 1611 save_mount_options(s, data);
1575 1612
@@ -1597,9 +1634,12 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1597 jdev_name = NULL; 1634 jdev_name = NULL;
1598 if (reiserfs_parse_options 1635 if (reiserfs_parse_options
1599 (s, (char *)data, &(sbi->s_mount_opt), &blocks, &jdev_name, 1636 (s, (char *)data, &(sbi->s_mount_opt), &blocks, &jdev_name,
1600 &commit_max_age) == 0) { 1637 &commit_max_age, qf_names, &qfmt) == 0) {
1601 goto error; 1638 goto error;
1602 } 1639 }
1640#ifdef CONFIG_QUOTA
1641 handle_quota_files(s, qf_names, &qfmt);
1642#endif
1603 1643
1604 if (blocks) { 1644 if (blocks) {
1605 SWARN(silent, s, "jmacd-7: reiserfs_fill_super: resize option " 1645 SWARN(silent, s, "jmacd-7: reiserfs_fill_super: resize option "
@@ -1819,7 +1859,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1819 1859
1820 return (0); 1860 return (0);
1821 1861
1822 error: 1862error:
1823 if (jinit_done) { /* kill the commit thread, free journal ram */ 1863 if (jinit_done) { /* kill the commit thread, free journal ram */
1824 journal_release_error(NULL, s); 1864 journal_release_error(NULL, s);
1825 } 1865 }
@@ -1830,10 +1870,8 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1830#ifdef CONFIG_QUOTA 1870#ifdef CONFIG_QUOTA
1831 { 1871 {
1832 int j; 1872 int j;
1833 for (j = 0; j < MAXQUOTAS; j++) { 1873 for (j = 0; j < MAXQUOTAS; j++)
1834 kfree(sbi->s_qf_names[j]); 1874 kfree(qf_names[j]);
1835 sbi->s_qf_names[j] = NULL;
1836 }
1837 } 1875 }
1838#endif 1876#endif
1839 kfree(sbi); 1877 kfree(sbi);
@@ -1980,7 +2018,7 @@ static int reiserfs_release_dquot(struct dquot *dquot)
1980 2018
1981static int reiserfs_mark_dquot_dirty(struct dquot *dquot) 2019static int reiserfs_mark_dquot_dirty(struct dquot *dquot)
1982{ 2020{
1983 /* Are we journalling quotas? */ 2021 /* Are we journaling quotas? */
1984 if (REISERFS_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] || 2022 if (REISERFS_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] ||
1985 REISERFS_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) { 2023 REISERFS_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) {
1986 dquot_mark_dquot_dirty(dquot); 2024 dquot_mark_dquot_dirty(dquot);
@@ -2026,6 +2064,7 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
2026 int err; 2064 int err;
2027 struct nameidata nd; 2065 struct nameidata nd;
2028 struct inode *inode; 2066 struct inode *inode;
2067 struct reiserfs_transaction_handle th;
2029 2068
2030 if (!(REISERFS_SB(sb)->s_mount_opt & (1 << REISERFS_QUOTA))) 2069 if (!(REISERFS_SB(sb)->s_mount_opt & (1 << REISERFS_QUOTA)))
2031 return -EINVAL; 2070 return -EINVAL;
@@ -2053,17 +2092,28 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
2053 } 2092 }
2054 mark_inode_dirty(inode); 2093 mark_inode_dirty(inode);
2055 } 2094 }
2056 /* Not journalling quota? No more tests needed... */ 2095 /* Journaling quota? */
2057 if (!REISERFS_SB(sb)->s_qf_names[USRQUOTA] && 2096 if (REISERFS_SB(sb)->s_qf_names[type]) {
2058 !REISERFS_SB(sb)->s_qf_names[GRPQUOTA]) { 2097 /* Quotafile not of fs root? */
2059 path_put(&nd.path); 2098 if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode)
2060 return vfs_quota_on(sb, type, format_id, path, 0); 2099 reiserfs_warning(sb,
2061 }
2062 /* Quotafile not of fs root? */
2063 if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode)
2064 reiserfs_warning(sb,
2065 "reiserfs: Quota file not on filesystem root. " 2100 "reiserfs: Quota file not on filesystem root. "
2066 "Journalled quota will not work."); 2101 "Journalled quota will not work.");
2102 }
2103
2104 /*
2105 * When we journal data on quota file, we have to flush journal to see
2106 * all updates to the file when we bypass pagecache...
2107 */
2108 if (reiserfs_file_data_log(inode)) {
2109 /* Just start temporary transaction and finish it */
2110 err = journal_begin(&th, sb, 1);
2111 if (err)
2112 return err;
2113 err = journal_end_sync(&th, sb, 1);
2114 if (err)
2115 return err;
2116 }
2067 path_put(&nd.path); 2117 path_put(&nd.path);
2068 return vfs_quota_on(sb, type, format_id, path, 0); 2118 return vfs_quota_on(sb, type, format_id, path, 0);
2069} 2119}
diff --git a/fs/reiserfs/xattr_security.c b/fs/reiserfs/xattr_security.c
index 5e90a95ad60b..056008db1377 100644
--- a/fs/reiserfs/xattr_security.c
+++ b/fs/reiserfs/xattr_security.c
@@ -6,8 +6,6 @@
6#include <linux/reiserfs_xattr.h> 6#include <linux/reiserfs_xattr.h>
7#include <asm/uaccess.h> 7#include <asm/uaccess.h>
8 8
9#define XATTR_SECURITY_PREFIX "security."
10
11static int 9static int
12security_get(struct inode *inode, const char *name, void *buffer, size_t size) 10security_get(struct inode *inode, const char *name, void *buffer, size_t size)
13{ 11{
diff --git a/fs/reiserfs/xattr_trusted.c b/fs/reiserfs/xattr_trusted.c
index 024a938ca60f..60abe2bb1f98 100644
--- a/fs/reiserfs/xattr_trusted.c
+++ b/fs/reiserfs/xattr_trusted.c
@@ -7,8 +7,6 @@
7#include <linux/reiserfs_xattr.h> 7#include <linux/reiserfs_xattr.h>
8#include <asm/uaccess.h> 8#include <asm/uaccess.h>
9 9
10#define XATTR_TRUSTED_PREFIX "trusted."
11
12static int 10static int
13trusted_get(struct inode *inode, const char *name, void *buffer, size_t size) 11trusted_get(struct inode *inode, const char *name, void *buffer, size_t size)
14{ 12{
diff --git a/fs/reiserfs/xattr_user.c b/fs/reiserfs/xattr_user.c
index 073f39364b11..1384efcb938e 100644
--- a/fs/reiserfs/xattr_user.c
+++ b/fs/reiserfs/xattr_user.c
@@ -10,8 +10,6 @@
10# include <linux/reiserfs_acl.h> 10# include <linux/reiserfs_acl.h>
11#endif 11#endif
12 12
13#define XATTR_USER_PREFIX "user."
14
15static int 13static int
16user_get(struct inode *inode, const char *name, void *buffer, size_t size) 14user_get(struct inode *inode, const char *name, void *buffer, size_t size)
17{ 15{
diff --git a/fs/signalfd.c b/fs/signalfd.c
index 619725644c75..9c39bc7f8431 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -205,11 +205,19 @@ static const struct file_operations signalfd_fops = {
205 .read = signalfd_read, 205 .read = signalfd_read,
206}; 206};
207 207
208asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemask) 208asmlinkage long sys_signalfd4(int ufd, sigset_t __user *user_mask,
209 size_t sizemask, int flags)
209{ 210{
210 sigset_t sigmask; 211 sigset_t sigmask;
211 struct signalfd_ctx *ctx; 212 struct signalfd_ctx *ctx;
212 213
214 /* Check the SFD_* constants for consistency. */
215 BUILD_BUG_ON(SFD_CLOEXEC != O_CLOEXEC);
216 BUILD_BUG_ON(SFD_NONBLOCK != O_NONBLOCK);
217
218 if (flags & ~(SFD_CLOEXEC | SFD_NONBLOCK))
219 return -EINVAL;
220
213 if (sizemask != sizeof(sigset_t) || 221 if (sizemask != sizeof(sigset_t) ||
214 copy_from_user(&sigmask, user_mask, sizeof(sigmask))) 222 copy_from_user(&sigmask, user_mask, sizeof(sigmask)))
215 return -EINVAL; 223 return -EINVAL;
@@ -227,7 +235,8 @@ asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemas
227 * When we call this, the initialization must be complete, since 235 * When we call this, the initialization must be complete, since
228 * anon_inode_getfd() will install the fd. 236 * anon_inode_getfd() will install the fd.
229 */ 237 */
230 ufd = anon_inode_getfd("[signalfd]", &signalfd_fops, ctx); 238 ufd = anon_inode_getfd("[signalfd]", &signalfd_fops, ctx,
239 flags & (O_CLOEXEC | O_NONBLOCK));
231 if (ufd < 0) 240 if (ufd < 0)
232 kfree(ctx); 241 kfree(ctx);
233 } else { 242 } else {
@@ -249,3 +258,9 @@ asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemas
249 258
250 return ufd; 259 return ufd;
251} 260}
261
262asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask,
263 size_t sizemask)
264{
265 return sys_signalfd4(ufd, user_mask, sizemask, 0);
266}
diff --git a/fs/smbfs/cache.c b/fs/smbfs/cache.c
index 8182f0542a21..8c177eb7e344 100644
--- a/fs/smbfs/cache.c
+++ b/fs/smbfs/cache.c
@@ -13,7 +13,6 @@
13#include <linux/errno.h> 13#include <linux/errno.h>
14#include <linux/kernel.h> 14#include <linux/kernel.h>
15#include <linux/mm.h> 15#include <linux/mm.h>
16#include <linux/dirent.h>
17#include <linux/smb_fs.h> 16#include <linux/smb_fs.h>
18#include <linux/pagemap.h> 17#include <linux/pagemap.h>
19#include <linux/net.h> 18#include <linux/net.h>
diff --git a/fs/smbfs/proc.c b/fs/smbfs/proc.c
index d517a27b7f4b..ee536e8a649a 100644
--- a/fs/smbfs/proc.c
+++ b/fs/smbfs/proc.c
@@ -16,7 +16,6 @@
16#include <linux/stat.h> 16#include <linux/stat.h>
17#include <linux/fcntl.h> 17#include <linux/fcntl.h>
18#include <linux/dcache.h> 18#include <linux/dcache.h>
19#include <linux/dirent.h>
20#include <linux/nls.h> 19#include <linux/nls.h>
21#include <linux/smp_lock.h> 20#include <linux/smp_lock.h>
22#include <linux/net.h> 21#include <linux/net.h>
diff --git a/fs/super.c b/fs/super.c
index 453877c5697b..e931ae9511fe 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -70,6 +70,7 @@ static struct super_block *alloc_super(struct file_system_type *type)
70 INIT_LIST_HEAD(&s->s_instances); 70 INIT_LIST_HEAD(&s->s_instances);
71 INIT_HLIST_HEAD(&s->s_anon); 71 INIT_HLIST_HEAD(&s->s_anon);
72 INIT_LIST_HEAD(&s->s_inodes); 72 INIT_LIST_HEAD(&s->s_inodes);
73 INIT_LIST_HEAD(&s->s_dentry_lru);
73 init_rwsem(&s->s_umount); 74 init_rwsem(&s->s_umount);
74 mutex_init(&s->s_lock); 75 mutex_init(&s->s_lock);
75 lockdep_set_class(&s->s_umount, &type->s_umount_key); 76 lockdep_set_class(&s->s_umount, &type->s_umount_key);
diff --git a/fs/sync.c b/fs/sync.c
index 228e17b5e9ee..2967562d416f 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -139,7 +139,8 @@ asmlinkage long sys_fdatasync(unsigned int fd)
139 * before performing the write. 139 * before performing the write.
140 * 140 *
141 * SYNC_FILE_RANGE_WRITE: initiate writeout of all those dirty pages in the 141 * SYNC_FILE_RANGE_WRITE: initiate writeout of all those dirty pages in the
142 * range which are not presently under writeback. 142 * range which are not presently under writeback. Note that this may block for
143 * significant periods due to exhaustion of disk request structures.
143 * 144 *
144 * SYNC_FILE_RANGE_WAIT_AFTER: wait upon writeout of all pages in the range 145 * SYNC_FILE_RANGE_WAIT_AFTER: wait upon writeout of all pages in the range
145 * after performing the write. 146 * after performing the write.
diff --git a/fs/timerfd.c b/fs/timerfd.c
index d87d354ec424..c502c60e4f54 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -184,7 +184,11 @@ asmlinkage long sys_timerfd_create(int clockid, int flags)
184 int ufd; 184 int ufd;
185 struct timerfd_ctx *ctx; 185 struct timerfd_ctx *ctx;
186 186
187 if (flags) 187 /* Check the TFD_* constants for consistency. */
188 BUILD_BUG_ON(TFD_CLOEXEC != O_CLOEXEC);
189 BUILD_BUG_ON(TFD_NONBLOCK != O_NONBLOCK);
190
191 if (flags & ~(TFD_CLOEXEC | TFD_NONBLOCK))
188 return -EINVAL; 192 return -EINVAL;
189 if (clockid != CLOCK_MONOTONIC && 193 if (clockid != CLOCK_MONOTONIC &&
190 clockid != CLOCK_REALTIME) 194 clockid != CLOCK_REALTIME)
@@ -198,7 +202,8 @@ asmlinkage long sys_timerfd_create(int clockid, int flags)
198 ctx->clockid = clockid; 202 ctx->clockid = clockid;
199 hrtimer_init(&ctx->tmr, clockid, HRTIMER_MODE_ABS); 203 hrtimer_init(&ctx->tmr, clockid, HRTIMER_MODE_ABS);
200 204
201 ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx); 205 ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx,
206 flags & (O_CLOEXEC | O_NONBLOCK));
202 if (ufd < 0) 207 if (ufd < 0)
203 kfree(ctx); 208 kfree(ctx);
204 209
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 85b22b5977fa..227c9d700040 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -76,6 +76,7 @@
76 76
77#include <linux/errno.h> 77#include <linux/errno.h>
78#include <linux/fs.h> 78#include <linux/fs.h>
79#include <linux/quotaops.h>
79#include <linux/slab.h> 80#include <linux/slab.h>
80#include <linux/time.h> 81#include <linux/time.h>
81#include <linux/stat.h> 82#include <linux/stat.h>
@@ -1232,7 +1233,7 @@ static int ufs_show_options(struct seq_file *seq, struct vfsmount *vfs)
1232{ 1233{
1233 struct ufs_sb_info *sbi = UFS_SB(vfs->mnt_sb); 1234 struct ufs_sb_info *sbi = UFS_SB(vfs->mnt_sb);
1234 unsigned mval = sbi->s_mount_opt & UFS_MOUNT_UFSTYPE; 1235 unsigned mval = sbi->s_mount_opt & UFS_MOUNT_UFSTYPE;
1235 struct match_token *tp = tokens; 1236 const struct match_token *tp = tokens;
1236 1237
1237 while (tp->token != Opt_onerror_panic && tp->token != mval) 1238 while (tp->token != Opt_onerror_panic && tp->token != mval)
1238 ++tp; 1239 ++tp;
diff --git a/fs/vfat/namei.c b/fs/vfat/namei.c
index b546ba69be82..155c10b4adbd 100644
--- a/fs/vfat/namei.c
+++ b/fs/vfat/namei.c
@@ -621,7 +621,7 @@ shortname:
621 memcpy(de->name, msdos_name, MSDOS_NAME); 621 memcpy(de->name, msdos_name, MSDOS_NAME);
622 de->attr = is_dir ? ATTR_DIR : ATTR_ARCH; 622 de->attr = is_dir ? ATTR_DIR : ATTR_ARCH;
623 de->lcase = lcase; 623 de->lcase = lcase;
624 fat_date_unix2dos(ts->tv_sec, &time, &date); 624 fat_date_unix2dos(ts->tv_sec, &time, &date, sbi->options.tz_utc);
625 de->time = de->ctime = time; 625 de->time = de->ctime = time;
626 de->date = de->cdate = de->adate = date; 626 de->date = de->cdate = de->adate = date;
627 de->ctime_cs = 0; 627 de->ctime_cs = 0;