aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/anon_inodes.c11
-rw-r--r--fs/autofs4/autofs_i.h28
-rw-r--r--fs/autofs4/expire.c91
-rw-r--r--fs/autofs4/inode.c33
-rw-r--r--fs/autofs4/root.c589
-rw-r--r--fs/autofs4/waitq.c267
-rw-r--r--fs/binfmt_elf.c1
-rw-r--r--fs/binfmt_misc.c20
-rw-r--r--fs/coda/psdev.c5
-rw-r--r--fs/compat.c22
-rw-r--r--fs/compat_ioctl.c2
-rw-r--r--fs/dcache.c335
-rw-r--r--fs/debugfs/inode.c114
-rw-r--r--fs/ecryptfs/Makefile2
-rw-r--r--fs/ecryptfs/crypto.c37
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h23
-rw-r--r--fs/ecryptfs/file.c17
-rw-r--r--fs/ecryptfs/inode.c31
-rw-r--r--fs/ecryptfs/keystore.c9
-rw-r--r--fs/ecryptfs/kthread.c203
-rw-r--r--fs/ecryptfs/main.c79
-rw-r--r--fs/ecryptfs/miscdev.c59
-rw-r--r--fs/ecryptfs/mmap.c11
-rw-r--r--fs/eventfd.c17
-rw-r--r--fs/eventpoll.c30
-rw-r--r--fs/exec.c4
-rw-r--r--fs/fcntl.c15
-rw-r--r--fs/hugetlbfs/inode.c101
-rw-r--r--fs/inotify_user.c18
-rw-r--r--fs/open.c3
-rw-r--r--fs/partitions/check.c2
-rw-r--r--fs/pipe.c35
-rw-r--r--fs/proc/proc_misc.c19
-rw-r--r--fs/proc/proc_net.c11
-rw-r--r--fs/proc/task_mmu.c2
-rw-r--r--fs/signalfd.c19
-rw-r--r--fs/super.c1
-rw-r--r--fs/sync.c3
-rw-r--r--fs/sysfs/dir.c37
-rw-r--r--fs/sysfs/file.c5
-rw-r--r--fs/sysfs/symlink.c41
-rw-r--r--fs/sysfs/sysfs.h1
-rw-r--r--fs/timerfd.c9
-rw-r--r--fs/ufs/super.c2
44 files changed, 1428 insertions, 936 deletions
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 977ef208c051..3662dd44896b 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -58,8 +58,9 @@ static struct dentry_operations anon_inodefs_dentry_operations = {
58 * of the file 58 * of the file
59 * 59 *
60 * @name: [in] name of the "class" of the new file 60 * @name: [in] name of the "class" of the new file
61 * @fops [in] file operations for the new file 61 * @fops: [in] file operations for the new file
62 * @priv [in] private data for the new file (will be file's private_data) 62 * @priv: [in] private data for the new file (will be file's private_data)
63 * @flags: [in] flags
63 * 64 *
64 * Creates a new file by hooking it on a single inode. This is useful for files 65 * Creates a new file by hooking it on a single inode. This is useful for files
65 * that do not need to have a full-fledged inode in order to operate correctly. 66 * that do not need to have a full-fledged inode in order to operate correctly.
@@ -68,7 +69,7 @@ static struct dentry_operations anon_inodefs_dentry_operations = {
68 * setup. Returns new descriptor or -error. 69 * setup. Returns new descriptor or -error.
69 */ 70 */
70int anon_inode_getfd(const char *name, const struct file_operations *fops, 71int anon_inode_getfd(const char *name, const struct file_operations *fops,
71 void *priv) 72 void *priv, int flags)
72{ 73{
73 struct qstr this; 74 struct qstr this;
74 struct dentry *dentry; 75 struct dentry *dentry;
@@ -78,7 +79,7 @@ int anon_inode_getfd(const char *name, const struct file_operations *fops,
78 if (IS_ERR(anon_inode_inode)) 79 if (IS_ERR(anon_inode_inode))
79 return -ENODEV; 80 return -ENODEV;
80 81
81 error = get_unused_fd(); 82 error = get_unused_fd_flags(flags);
82 if (error < 0) 83 if (error < 0)
83 return error; 84 return error;
84 fd = error; 85 fd = error;
@@ -115,7 +116,7 @@ int anon_inode_getfd(const char *name, const struct file_operations *fops,
115 file->f_mapping = anon_inode_inode->i_mapping; 116 file->f_mapping = anon_inode_inode->i_mapping;
116 117
117 file->f_pos = 0; 118 file->f_pos = 0;
118 file->f_flags = O_RDWR; 119 file->f_flags = O_RDWR | (flags & O_NONBLOCK);
119 file->f_version = 0; 120 file->f_version = 0;
120 file->private_data = priv; 121 file->private_data = priv;
121 122
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index c3d352d7fa93..69a2f5c92319 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -52,7 +52,10 @@ struct autofs_info {
52 52
53 int flags; 53 int flags;
54 54
55 struct list_head rehash; 55 struct completion expire_complete;
56
57 struct list_head active;
58 struct list_head expiring;
56 59
57 struct autofs_sb_info *sbi; 60 struct autofs_sb_info *sbi;
58 unsigned long last_used; 61 unsigned long last_used;
@@ -68,15 +71,14 @@ struct autofs_info {
68}; 71};
69 72
70#define AUTOFS_INF_EXPIRING (1<<0) /* dentry is in the process of expiring */ 73#define AUTOFS_INF_EXPIRING (1<<0) /* dentry is in the process of expiring */
74#define AUTOFS_INF_MOUNTPOINT (1<<1) /* mountpoint status for direct expire */
71 75
72struct autofs_wait_queue { 76struct autofs_wait_queue {
73 wait_queue_head_t queue; 77 wait_queue_head_t queue;
74 struct autofs_wait_queue *next; 78 struct autofs_wait_queue *next;
75 autofs_wqt_t wait_queue_token; 79 autofs_wqt_t wait_queue_token;
76 /* We use the following to see what we are waiting for */ 80 /* We use the following to see what we are waiting for */
77 unsigned int hash; 81 struct qstr name;
78 unsigned int len;
79 char *name;
80 u32 dev; 82 u32 dev;
81 u64 ino; 83 u64 ino;
82 uid_t uid; 84 uid_t uid;
@@ -85,7 +87,7 @@ struct autofs_wait_queue {
85 pid_t tgid; 87 pid_t tgid;
86 /* This is for status reporting upon return */ 88 /* This is for status reporting upon return */
87 int status; 89 int status;
88 atomic_t wait_ctr; 90 unsigned int wait_ctr;
89}; 91};
90 92
91#define AUTOFS_SBI_MAGIC 0x6d4a556d 93#define AUTOFS_SBI_MAGIC 0x6d4a556d
@@ -112,8 +114,9 @@ struct autofs_sb_info {
112 struct mutex wq_mutex; 114 struct mutex wq_mutex;
113 spinlock_t fs_lock; 115 spinlock_t fs_lock;
114 struct autofs_wait_queue *queues; /* Wait queue pointer */ 116 struct autofs_wait_queue *queues; /* Wait queue pointer */
115 spinlock_t rehash_lock; 117 spinlock_t lookup_lock;
116 struct list_head rehash_list; 118 struct list_head active_list;
119 struct list_head expiring_list;
117}; 120};
118 121
119static inline struct autofs_sb_info *autofs4_sbi(struct super_block *sb) 122static inline struct autofs_sb_info *autofs4_sbi(struct super_block *sb)
@@ -138,18 +141,14 @@ static inline int autofs4_oz_mode(struct autofs_sb_info *sbi) {
138static inline int autofs4_ispending(struct dentry *dentry) 141static inline int autofs4_ispending(struct dentry *dentry)
139{ 142{
140 struct autofs_info *inf = autofs4_dentry_ino(dentry); 143 struct autofs_info *inf = autofs4_dentry_ino(dentry);
141 int pending = 0;
142 144
143 if (dentry->d_flags & DCACHE_AUTOFS_PENDING) 145 if (dentry->d_flags & DCACHE_AUTOFS_PENDING)
144 return 1; 146 return 1;
145 147
146 if (inf) { 148 if (inf->flags & AUTOFS_INF_EXPIRING)
147 spin_lock(&inf->sbi->fs_lock); 149 return 1;
148 pending = inf->flags & AUTOFS_INF_EXPIRING;
149 spin_unlock(&inf->sbi->fs_lock);
150 }
151 150
152 return pending; 151 return 0;
153} 152}
154 153
155static inline void autofs4_copy_atime(struct file *src, struct file *dst) 154static inline void autofs4_copy_atime(struct file *src, struct file *dst)
@@ -164,6 +163,7 @@ void autofs4_free_ino(struct autofs_info *);
164 163
165/* Expiration */ 164/* Expiration */
166int is_autofs4_dentry(struct dentry *); 165int is_autofs4_dentry(struct dentry *);
166int autofs4_expire_wait(struct dentry *dentry);
167int autofs4_expire_run(struct super_block *, struct vfsmount *, 167int autofs4_expire_run(struct super_block *, struct vfsmount *,
168 struct autofs_sb_info *, 168 struct autofs_sb_info *,
169 struct autofs_packet_expire __user *); 169 struct autofs_packet_expire __user *);
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 894fee54d4d8..cdabb796ff01 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -259,13 +259,15 @@ static struct dentry *autofs4_expire_direct(struct super_block *sb,
259 now = jiffies; 259 now = jiffies;
260 timeout = sbi->exp_timeout; 260 timeout = sbi->exp_timeout;
261 261
262 /* Lock the tree as we must expire as a whole */
263 spin_lock(&sbi->fs_lock); 262 spin_lock(&sbi->fs_lock);
264 if (!autofs4_direct_busy(mnt, root, timeout, do_now)) { 263 if (!autofs4_direct_busy(mnt, root, timeout, do_now)) {
265 struct autofs_info *ino = autofs4_dentry_ino(root); 264 struct autofs_info *ino = autofs4_dentry_ino(root);
266 265 if (d_mountpoint(root)) {
267 /* Set this flag early to catch sys_chdir and the like */ 266 ino->flags |= AUTOFS_INF_MOUNTPOINT;
267 root->d_mounted--;
268 }
268 ino->flags |= AUTOFS_INF_EXPIRING; 269 ino->flags |= AUTOFS_INF_EXPIRING;
270 init_completion(&ino->expire_complete);
269 spin_unlock(&sbi->fs_lock); 271 spin_unlock(&sbi->fs_lock);
270 return root; 272 return root;
271 } 273 }
@@ -292,6 +294,8 @@ static struct dentry *autofs4_expire_indirect(struct super_block *sb,
292 struct list_head *next; 294 struct list_head *next;
293 int do_now = how & AUTOFS_EXP_IMMEDIATE; 295 int do_now = how & AUTOFS_EXP_IMMEDIATE;
294 int exp_leaves = how & AUTOFS_EXP_LEAVES; 296 int exp_leaves = how & AUTOFS_EXP_LEAVES;
297 struct autofs_info *ino;
298 unsigned int ino_count;
295 299
296 if (!root) 300 if (!root)
297 return NULL; 301 return NULL;
@@ -316,6 +320,9 @@ static struct dentry *autofs4_expire_indirect(struct super_block *sb,
316 dentry = dget(dentry); 320 dentry = dget(dentry);
317 spin_unlock(&dcache_lock); 321 spin_unlock(&dcache_lock);
318 322
323 spin_lock(&sbi->fs_lock);
324 ino = autofs4_dentry_ino(dentry);
325
319 /* 326 /*
320 * Case 1: (i) indirect mount or top level pseudo direct mount 327 * Case 1: (i) indirect mount or top level pseudo direct mount
321 * (autofs-4.1). 328 * (autofs-4.1).
@@ -326,6 +333,11 @@ static struct dentry *autofs4_expire_indirect(struct super_block *sb,
326 DPRINTK("checking mountpoint %p %.*s", 333 DPRINTK("checking mountpoint %p %.*s",
327 dentry, (int)dentry->d_name.len, dentry->d_name.name); 334 dentry, (int)dentry->d_name.len, dentry->d_name.name);
328 335
336 /* Path walk currently on this dentry? */
337 ino_count = atomic_read(&ino->count) + 2;
338 if (atomic_read(&dentry->d_count) > ino_count)
339 goto next;
340
329 /* Can we umount this guy */ 341 /* Can we umount this guy */
330 if (autofs4_mount_busy(mnt, dentry)) 342 if (autofs4_mount_busy(mnt, dentry))
331 goto next; 343 goto next;
@@ -343,23 +355,25 @@ static struct dentry *autofs4_expire_indirect(struct super_block *sb,
343 355
344 /* Case 2: tree mount, expire iff entire tree is not busy */ 356 /* Case 2: tree mount, expire iff entire tree is not busy */
345 if (!exp_leaves) { 357 if (!exp_leaves) {
346 /* Lock the tree as we must expire as a whole */ 358 /* Path walk currently on this dentry? */
347 spin_lock(&sbi->fs_lock); 359 ino_count = atomic_read(&ino->count) + 1;
348 if (!autofs4_tree_busy(mnt, dentry, timeout, do_now)) { 360 if (atomic_read(&dentry->d_count) > ino_count)
349 struct autofs_info *inf = autofs4_dentry_ino(dentry); 361 goto next;
350 362
351 /* Set this flag early to catch sys_chdir and the like */ 363 if (!autofs4_tree_busy(mnt, dentry, timeout, do_now)) {
352 inf->flags |= AUTOFS_INF_EXPIRING;
353 spin_unlock(&sbi->fs_lock);
354 expired = dentry; 364 expired = dentry;
355 goto found; 365 goto found;
356 } 366 }
357 spin_unlock(&sbi->fs_lock);
358 /* 367 /*
359 * Case 3: pseudo direct mount, expire individual leaves 368 * Case 3: pseudo direct mount, expire individual leaves
360 * (autofs-4.1). 369 * (autofs-4.1).
361 */ 370 */
362 } else { 371 } else {
372 /* Path walk currently on this dentry? */
373 ino_count = atomic_read(&ino->count) + 1;
374 if (atomic_read(&dentry->d_count) > ino_count)
375 goto next;
376
363 expired = autofs4_check_leaves(mnt, dentry, timeout, do_now); 377 expired = autofs4_check_leaves(mnt, dentry, timeout, do_now);
364 if (expired) { 378 if (expired) {
365 dput(dentry); 379 dput(dentry);
@@ -367,6 +381,7 @@ static struct dentry *autofs4_expire_indirect(struct super_block *sb,
367 } 381 }
368 } 382 }
369next: 383next:
384 spin_unlock(&sbi->fs_lock);
370 dput(dentry); 385 dput(dentry);
371 spin_lock(&dcache_lock); 386 spin_lock(&dcache_lock);
372 next = next->next; 387 next = next->next;
@@ -377,12 +392,45 @@ next:
377found: 392found:
378 DPRINTK("returning %p %.*s", 393 DPRINTK("returning %p %.*s",
379 expired, (int)expired->d_name.len, expired->d_name.name); 394 expired, (int)expired->d_name.len, expired->d_name.name);
395 ino = autofs4_dentry_ino(expired);
396 ino->flags |= AUTOFS_INF_EXPIRING;
397 init_completion(&ino->expire_complete);
398 spin_unlock(&sbi->fs_lock);
380 spin_lock(&dcache_lock); 399 spin_lock(&dcache_lock);
381 list_move(&expired->d_parent->d_subdirs, &expired->d_u.d_child); 400 list_move(&expired->d_parent->d_subdirs, &expired->d_u.d_child);
382 spin_unlock(&dcache_lock); 401 spin_unlock(&dcache_lock);
383 return expired; 402 return expired;
384} 403}
385 404
405int autofs4_expire_wait(struct dentry *dentry)
406{
407 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
408 struct autofs_info *ino = autofs4_dentry_ino(dentry);
409 int status;
410
411 /* Block on any pending expire */
412 spin_lock(&sbi->fs_lock);
413 if (ino->flags & AUTOFS_INF_EXPIRING) {
414 spin_unlock(&sbi->fs_lock);
415
416 DPRINTK("waiting for expire %p name=%.*s",
417 dentry, dentry->d_name.len, dentry->d_name.name);
418
419 status = autofs4_wait(sbi, dentry, NFY_NONE);
420 wait_for_completion(&ino->expire_complete);
421
422 DPRINTK("expire done status=%d", status);
423
424 if (d_unhashed(dentry))
425 return -EAGAIN;
426
427 return status;
428 }
429 spin_unlock(&sbi->fs_lock);
430
431 return 0;
432}
433
386/* Perform an expiry operation */ 434/* Perform an expiry operation */
387int autofs4_expire_run(struct super_block *sb, 435int autofs4_expire_run(struct super_block *sb,
388 struct vfsmount *mnt, 436 struct vfsmount *mnt,
@@ -390,7 +438,9 @@ int autofs4_expire_run(struct super_block *sb,
390 struct autofs_packet_expire __user *pkt_p) 438 struct autofs_packet_expire __user *pkt_p)
391{ 439{
392 struct autofs_packet_expire pkt; 440 struct autofs_packet_expire pkt;
441 struct autofs_info *ino;
393 struct dentry *dentry; 442 struct dentry *dentry;
443 int ret = 0;
394 444
395 memset(&pkt,0,sizeof pkt); 445 memset(&pkt,0,sizeof pkt);
396 446
@@ -406,9 +456,15 @@ int autofs4_expire_run(struct super_block *sb,
406 dput(dentry); 456 dput(dentry);
407 457
408 if ( copy_to_user(pkt_p, &pkt, sizeof(struct autofs_packet_expire)) ) 458 if ( copy_to_user(pkt_p, &pkt, sizeof(struct autofs_packet_expire)) )
409 return -EFAULT; 459 ret = -EFAULT;
410 460
411 return 0; 461 spin_lock(&sbi->fs_lock);
462 ino = autofs4_dentry_ino(dentry);
463 ino->flags &= ~AUTOFS_INF_EXPIRING;
464 complete_all(&ino->expire_complete);
465 spin_unlock(&sbi->fs_lock);
466
467 return ret;
412} 468}
413 469
414/* Call repeatedly until it returns -EAGAIN, meaning there's nothing 470/* Call repeatedly until it returns -EAGAIN, meaning there's nothing
@@ -433,9 +489,16 @@ int autofs4_expire_multi(struct super_block *sb, struct vfsmount *mnt,
433 489
434 /* This is synchronous because it makes the daemon a 490 /* This is synchronous because it makes the daemon a
435 little easier */ 491 little easier */
436 ino->flags |= AUTOFS_INF_EXPIRING;
437 ret = autofs4_wait(sbi, dentry, NFY_EXPIRE); 492 ret = autofs4_wait(sbi, dentry, NFY_EXPIRE);
493
494 spin_lock(&sbi->fs_lock);
495 if (ino->flags & AUTOFS_INF_MOUNTPOINT) {
496 sb->s_root->d_mounted++;
497 ino->flags &= ~AUTOFS_INF_MOUNTPOINT;
498 }
438 ino->flags &= ~AUTOFS_INF_EXPIRING; 499 ino->flags &= ~AUTOFS_INF_EXPIRING;
500 complete_all(&ino->expire_complete);
501 spin_unlock(&sbi->fs_lock);
439 dput(dentry); 502 dput(dentry);
440 } 503 }
441 504
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index 2fdcf5e1d236..7bb3e5ba0537 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -24,8 +24,10 @@
24 24
25static void ino_lnkfree(struct autofs_info *ino) 25static void ino_lnkfree(struct autofs_info *ino)
26{ 26{
27 kfree(ino->u.symlink); 27 if (ino->u.symlink) {
28 ino->u.symlink = NULL; 28 kfree(ino->u.symlink);
29 ino->u.symlink = NULL;
30 }
29} 31}
30 32
31struct autofs_info *autofs4_init_ino(struct autofs_info *ino, 33struct autofs_info *autofs4_init_ino(struct autofs_info *ino,
@@ -41,16 +43,18 @@ struct autofs_info *autofs4_init_ino(struct autofs_info *ino,
41 if (ino == NULL) 43 if (ino == NULL)
42 return NULL; 44 return NULL;
43 45
44 ino->flags = 0; 46 if (!reinit) {
45 ino->mode = mode; 47 ino->flags = 0;
46 ino->inode = NULL; 48 ino->inode = NULL;
47 ino->dentry = NULL; 49 ino->dentry = NULL;
48 ino->size = 0; 50 ino->size = 0;
49 51 INIT_LIST_HEAD(&ino->active);
50 INIT_LIST_HEAD(&ino->rehash); 52 INIT_LIST_HEAD(&ino->expiring);
53 atomic_set(&ino->count, 0);
54 }
51 55
56 ino->mode = mode;
52 ino->last_used = jiffies; 57 ino->last_used = jiffies;
53 atomic_set(&ino->count, 0);
54 58
55 ino->sbi = sbi; 59 ino->sbi = sbi;
56 60
@@ -159,8 +163,8 @@ void autofs4_kill_sb(struct super_block *sb)
159 if (!sbi) 163 if (!sbi)
160 goto out_kill_sb; 164 goto out_kill_sb;
161 165
162 if (!sbi->catatonic) 166 /* Free wait queues, close pipe */
163 autofs4_catatonic_mode(sbi); /* Free wait queues, close pipe */ 167 autofs4_catatonic_mode(sbi);
164 168
165 /* Clean up and release dangling references */ 169 /* Clean up and release dangling references */
166 autofs4_force_release(sbi); 170 autofs4_force_release(sbi);
@@ -338,8 +342,9 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
338 mutex_init(&sbi->wq_mutex); 342 mutex_init(&sbi->wq_mutex);
339 spin_lock_init(&sbi->fs_lock); 343 spin_lock_init(&sbi->fs_lock);
340 sbi->queues = NULL; 344 sbi->queues = NULL;
341 spin_lock_init(&sbi->rehash_lock); 345 spin_lock_init(&sbi->lookup_lock);
342 INIT_LIST_HEAD(&sbi->rehash_list); 346 INIT_LIST_HEAD(&sbi->active_list);
347 INIT_LIST_HEAD(&sbi->expiring_list);
343 s->s_blocksize = 1024; 348 s->s_blocksize = 1024;
344 s->s_blocksize_bits = 10; 349 s->s_blocksize_bits = 10;
345 s->s_magic = AUTOFS_SUPER_MAGIC; 350 s->s_magic = AUTOFS_SUPER_MAGIC;
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index edf5b6bddb52..bcfb2dc0a61b 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -25,25 +25,25 @@ static int autofs4_dir_rmdir(struct inode *,struct dentry *);
25static int autofs4_dir_mkdir(struct inode *,struct dentry *,int); 25static int autofs4_dir_mkdir(struct inode *,struct dentry *,int);
26static int autofs4_root_ioctl(struct inode *, struct file *,unsigned int,unsigned long); 26static int autofs4_root_ioctl(struct inode *, struct file *,unsigned int,unsigned long);
27static int autofs4_dir_open(struct inode *inode, struct file *file); 27static int autofs4_dir_open(struct inode *inode, struct file *file);
28static int autofs4_dir_close(struct inode *inode, struct file *file);
29static int autofs4_dir_readdir(struct file * filp, void * dirent, filldir_t filldir);
30static int autofs4_root_readdir(struct file * filp, void * dirent, filldir_t filldir);
31static struct dentry *autofs4_lookup(struct inode *,struct dentry *, struct nameidata *); 28static struct dentry *autofs4_lookup(struct inode *,struct dentry *, struct nameidata *);
32static void *autofs4_follow_link(struct dentry *, struct nameidata *); 29static void *autofs4_follow_link(struct dentry *, struct nameidata *);
33 30
31#define TRIGGER_FLAGS (LOOKUP_CONTINUE | LOOKUP_DIRECTORY)
32#define TRIGGER_INTENTS (LOOKUP_OPEN | LOOKUP_CREATE)
33
34const struct file_operations autofs4_root_operations = { 34const struct file_operations autofs4_root_operations = {
35 .open = dcache_dir_open, 35 .open = dcache_dir_open,
36 .release = dcache_dir_close, 36 .release = dcache_dir_close,
37 .read = generic_read_dir, 37 .read = generic_read_dir,
38 .readdir = autofs4_root_readdir, 38 .readdir = dcache_readdir,
39 .ioctl = autofs4_root_ioctl, 39 .ioctl = autofs4_root_ioctl,
40}; 40};
41 41
42const struct file_operations autofs4_dir_operations = { 42const struct file_operations autofs4_dir_operations = {
43 .open = autofs4_dir_open, 43 .open = autofs4_dir_open,
44 .release = autofs4_dir_close, 44 .release = dcache_dir_close,
45 .read = generic_read_dir, 45 .read = generic_read_dir,
46 .readdir = autofs4_dir_readdir, 46 .readdir = dcache_readdir,
47}; 47};
48 48
49const struct inode_operations autofs4_indirect_root_inode_operations = { 49const struct inode_operations autofs4_indirect_root_inode_operations = {
@@ -70,42 +70,10 @@ const struct inode_operations autofs4_dir_inode_operations = {
70 .rmdir = autofs4_dir_rmdir, 70 .rmdir = autofs4_dir_rmdir,
71}; 71};
72 72
73static int autofs4_root_readdir(struct file *file, void *dirent,
74 filldir_t filldir)
75{
76 struct autofs_sb_info *sbi = autofs4_sbi(file->f_path.dentry->d_sb);
77 int oz_mode = autofs4_oz_mode(sbi);
78
79 DPRINTK("called, filp->f_pos = %lld", file->f_pos);
80
81 /*
82 * Don't set reghost flag if:
83 * 1) f_pos is larger than zero -- we've already been here.
84 * 2) we haven't even enabled reghosting in the 1st place.
85 * 3) this is the daemon doing a readdir
86 */
87 if (oz_mode && file->f_pos == 0 && sbi->reghost_enabled)
88 sbi->needs_reghost = 1;
89
90 DPRINTK("needs_reghost = %d", sbi->needs_reghost);
91
92 return dcache_readdir(file, dirent, filldir);
93}
94
95static int autofs4_dir_open(struct inode *inode, struct file *file) 73static int autofs4_dir_open(struct inode *inode, struct file *file)
96{ 74{
97 struct dentry *dentry = file->f_path.dentry; 75 struct dentry *dentry = file->f_path.dentry;
98 struct vfsmount *mnt = file->f_path.mnt;
99 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); 76 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
100 struct dentry *cursor;
101 int status;
102
103 status = dcache_dir_open(inode, file);
104 if (status)
105 goto out;
106
107 cursor = file->private_data;
108 cursor->d_fsdata = NULL;
109 77
110 DPRINTK("file=%p dentry=%p %.*s", 78 DPRINTK("file=%p dentry=%p %.*s",
111 file, dentry, dentry->d_name.len, dentry->d_name.name); 79 file, dentry, dentry->d_name.len, dentry->d_name.name);
@@ -113,159 +81,32 @@ static int autofs4_dir_open(struct inode *inode, struct file *file)
113 if (autofs4_oz_mode(sbi)) 81 if (autofs4_oz_mode(sbi))
114 goto out; 82 goto out;
115 83
116 if (autofs4_ispending(dentry)) { 84 /*
117 DPRINTK("dentry busy"); 85 * An empty directory in an autofs file system is always a
118 dcache_dir_close(inode, file); 86 * mount point. The daemon must have failed to mount this
119 status = -EBUSY; 87 * during lookup so it doesn't exist. This can happen, for
120 goto out; 88 * example, if user space returns an incorrect status for a
121 } 89 * mount request. Otherwise we're doing a readdir on the
122 90 * autofs file system so just let the libfs routines handle
123 status = -ENOENT; 91 * it.
124 if (!d_mountpoint(dentry) && dentry->d_op && dentry->d_op->d_revalidate) { 92 */
125 struct nameidata nd; 93 spin_lock(&dcache_lock);
126 int empty, ret; 94 if (!d_mountpoint(dentry) && __simple_empty(dentry)) {
127
128 /* In case there are stale directory dentrys from a failed mount */
129 spin_lock(&dcache_lock);
130 empty = list_empty(&dentry->d_subdirs);
131 spin_unlock(&dcache_lock); 95 spin_unlock(&dcache_lock);
132 96 return -ENOENT;
133 if (!empty)
134 d_invalidate(dentry);
135
136 nd.flags = LOOKUP_DIRECTORY;
137 ret = (dentry->d_op->d_revalidate)(dentry, &nd);
138
139 if (ret <= 0) {
140 if (ret < 0)
141 status = ret;
142 dcache_dir_close(inode, file);
143 goto out;
144 }
145 } 97 }
98 spin_unlock(&dcache_lock);
146 99
147 if (d_mountpoint(dentry)) {
148 struct file *fp = NULL;
149 struct path fp_path = { .dentry = dentry, .mnt = mnt };
150
151 path_get(&fp_path);
152
153 if (!autofs4_follow_mount(&fp_path.mnt, &fp_path.dentry)) {
154 path_put(&fp_path);
155 dcache_dir_close(inode, file);
156 goto out;
157 }
158
159 fp = dentry_open(fp_path.dentry, fp_path.mnt, file->f_flags);
160 status = PTR_ERR(fp);
161 if (IS_ERR(fp)) {
162 dcache_dir_close(inode, file);
163 goto out;
164 }
165 cursor->d_fsdata = fp;
166 }
167 return 0;
168out:
169 return status;
170}
171
172static int autofs4_dir_close(struct inode *inode, struct file *file)
173{
174 struct dentry *dentry = file->f_path.dentry;
175 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
176 struct dentry *cursor = file->private_data;
177 int status = 0;
178
179 DPRINTK("file=%p dentry=%p %.*s",
180 file, dentry, dentry->d_name.len, dentry->d_name.name);
181
182 if (autofs4_oz_mode(sbi))
183 goto out;
184
185 if (autofs4_ispending(dentry)) {
186 DPRINTK("dentry busy");
187 status = -EBUSY;
188 goto out;
189 }
190
191 if (d_mountpoint(dentry)) {
192 struct file *fp = cursor->d_fsdata;
193 if (!fp) {
194 status = -ENOENT;
195 goto out;
196 }
197 filp_close(fp, current->files);
198 }
199out:
200 dcache_dir_close(inode, file);
201 return status;
202}
203
204static int autofs4_dir_readdir(struct file *file, void *dirent, filldir_t filldir)
205{
206 struct dentry *dentry = file->f_path.dentry;
207 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
208 struct dentry *cursor = file->private_data;
209 int status;
210
211 DPRINTK("file=%p dentry=%p %.*s",
212 file, dentry, dentry->d_name.len, dentry->d_name.name);
213
214 if (autofs4_oz_mode(sbi))
215 goto out;
216
217 if (autofs4_ispending(dentry)) {
218 DPRINTK("dentry busy");
219 return -EBUSY;
220 }
221
222 if (d_mountpoint(dentry)) {
223 struct file *fp = cursor->d_fsdata;
224
225 if (!fp)
226 return -ENOENT;
227
228 if (!fp->f_op || !fp->f_op->readdir)
229 goto out;
230
231 status = vfs_readdir(fp, filldir, dirent);
232 file->f_pos = fp->f_pos;
233 if (status)
234 autofs4_copy_atime(file, fp);
235 return status;
236 }
237out: 100out:
238 return dcache_readdir(file, dirent, filldir); 101 return dcache_dir_open(inode, file);
239} 102}
240 103
241static int try_to_fill_dentry(struct dentry *dentry, int flags) 104static int try_to_fill_dentry(struct dentry *dentry, int flags)
242{ 105{
243 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); 106 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
244 struct autofs_info *ino = autofs4_dentry_ino(dentry); 107 struct autofs_info *ino = autofs4_dentry_ino(dentry);
245 struct dentry *new;
246 int status; 108 int status;
247 109
248 /* Block on any pending expiry here; invalidate the dentry
249 when expiration is done to trigger mount request with a new
250 dentry */
251 if (ino && (ino->flags & AUTOFS_INF_EXPIRING)) {
252 DPRINTK("waiting for expire %p name=%.*s",
253 dentry, dentry->d_name.len, dentry->d_name.name);
254
255 status = autofs4_wait(sbi, dentry, NFY_NONE);
256
257 DPRINTK("expire done status=%d", status);
258
259 /*
260 * If the directory still exists the mount request must
261 * continue otherwise it can't be followed at the right
262 * time during the walk.
263 */
264 status = d_invalidate(dentry);
265 if (status != -EBUSY)
266 return -EAGAIN;
267 }
268
269 DPRINTK("dentry=%p %.*s ino=%p", 110 DPRINTK("dentry=%p %.*s ino=%p",
270 dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_inode); 111 dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_inode);
271 112
@@ -292,7 +133,8 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags)
292 return status; 133 return status;
293 } 134 }
294 /* Trigger mount for path component or follow link */ 135 /* Trigger mount for path component or follow link */
295 } else if (flags & (LOOKUP_CONTINUE | LOOKUP_DIRECTORY) || 136 } else if (dentry->d_flags & DCACHE_AUTOFS_PENDING ||
137 flags & (TRIGGER_FLAGS | TRIGGER_INTENTS) ||
296 current->link_count) { 138 current->link_count) {
297 DPRINTK("waiting for mount name=%.*s", 139 DPRINTK("waiting for mount name=%.*s",
298 dentry->d_name.len, dentry->d_name.name); 140 dentry->d_name.len, dentry->d_name.name);
@@ -320,26 +162,6 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags)
320 dentry->d_flags &= ~DCACHE_AUTOFS_PENDING; 162 dentry->d_flags &= ~DCACHE_AUTOFS_PENDING;
321 spin_unlock(&dentry->d_lock); 163 spin_unlock(&dentry->d_lock);
322 164
323 /*
324 * The dentry that is passed in from lookup may not be the one
325 * we end up using, as mkdir can create a new one. If this
326 * happens, and another process tries the lookup at the same time,
327 * it will set the PENDING flag on this new dentry, but add itself
328 * to our waitq. Then, if after the lookup succeeds, the first
329 * process that requested the mount performs another lookup of the
330 * same directory, it will show up as still pending! So, we need
331 * to redo the lookup here and clear pending on that dentry.
332 */
333 if (d_unhashed(dentry)) {
334 new = d_lookup(dentry->d_parent, &dentry->d_name);
335 if (new) {
336 spin_lock(&new->d_lock);
337 new->d_flags &= ~DCACHE_AUTOFS_PENDING;
338 spin_unlock(&new->d_lock);
339 dput(new);
340 }
341 }
342
343 return 0; 165 return 0;
344} 166}
345 167
@@ -355,51 +177,63 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
355 DPRINTK("dentry=%p %.*s oz_mode=%d nd->flags=%d", 177 DPRINTK("dentry=%p %.*s oz_mode=%d nd->flags=%d",
356 dentry, dentry->d_name.len, dentry->d_name.name, oz_mode, 178 dentry, dentry->d_name.len, dentry->d_name.name, oz_mode,
357 nd->flags); 179 nd->flags);
358 180 /*
359 /* If it's our master or we shouldn't trigger a mount we're done */ 181 * For an expire of a covered direct or offset mount we need
360 lookup_type = nd->flags & (LOOKUP_CONTINUE | LOOKUP_DIRECTORY); 182 * to beeak out of follow_down() at the autofs mount trigger
361 if (oz_mode || !lookup_type) 183 * (d_mounted--), so we can see the expiring flag, and manage
184 * the blocking and following here until the expire is completed.
185 */
186 if (oz_mode) {
187 spin_lock(&sbi->fs_lock);
188 if (ino->flags & AUTOFS_INF_EXPIRING) {
189 spin_unlock(&sbi->fs_lock);
190 /* Follow down to our covering mount. */
191 if (!follow_down(&nd->path.mnt, &nd->path.dentry))
192 goto done;
193 goto follow;
194 }
195 spin_unlock(&sbi->fs_lock);
362 goto done; 196 goto done;
197 }
363 198
364 /* If an expire request is pending wait for it. */ 199 /* If an expire request is pending everyone must wait. */
365 if (ino && (ino->flags & AUTOFS_INF_EXPIRING)) { 200 autofs4_expire_wait(dentry);
366 DPRINTK("waiting for active request %p name=%.*s",
367 dentry, dentry->d_name.len, dentry->d_name.name);
368
369 status = autofs4_wait(sbi, dentry, NFY_NONE);
370 201
371 DPRINTK("request done status=%d", status); 202 /* We trigger a mount for almost all flags */
372 } 203 lookup_type = nd->flags & (TRIGGER_FLAGS | TRIGGER_INTENTS);
204 if (!(lookup_type || dentry->d_flags & DCACHE_AUTOFS_PENDING))
205 goto follow;
373 206
374 /* 207 /*
375 * If the dentry contains directories then it is an 208 * If the dentry contains directories then it is an autofs
376 * autofs multi-mount with no root mount offset. So 209 * multi-mount with no root mount offset. So don't try to
377 * don't try to mount it again. 210 * mount it again.
378 */ 211 */
379 spin_lock(&dcache_lock); 212 spin_lock(&dcache_lock);
380 if (!d_mountpoint(dentry) && __simple_empty(dentry)) { 213 if (dentry->d_flags & DCACHE_AUTOFS_PENDING ||
214 (!d_mountpoint(dentry) && __simple_empty(dentry))) {
381 spin_unlock(&dcache_lock); 215 spin_unlock(&dcache_lock);
382 216
383 status = try_to_fill_dentry(dentry, 0); 217 status = try_to_fill_dentry(dentry, 0);
384 if (status) 218 if (status)
385 goto out_error; 219 goto out_error;
386 220
387 /* 221 goto follow;
388 * The mount succeeded but if there is no root mount
389 * it must be an autofs multi-mount with no root offset
390 * so we don't need to follow the mount.
391 */
392 if (d_mountpoint(dentry)) {
393 if (!autofs4_follow_mount(&nd->path.mnt,
394 &nd->path.dentry)) {
395 status = -ENOENT;
396 goto out_error;
397 }
398 }
399
400 goto done;
401 } 222 }
402 spin_unlock(&dcache_lock); 223 spin_unlock(&dcache_lock);
224follow:
225 /*
226 * If there is no root mount it must be an autofs
227 * multi-mount with no root offset so we don't need
228 * to follow it.
229 */
230 if (d_mountpoint(dentry)) {
231 if (!autofs4_follow_mount(&nd->path.mnt,
232 &nd->path.dentry)) {
233 status = -ENOENT;
234 goto out_error;
235 }
236 }
403 237
404done: 238done:
405 return NULL; 239 return NULL;
@@ -424,12 +258,23 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd)
424 int status = 1; 258 int status = 1;
425 259
426 /* Pending dentry */ 260 /* Pending dentry */
261 spin_lock(&sbi->fs_lock);
427 if (autofs4_ispending(dentry)) { 262 if (autofs4_ispending(dentry)) {
428 /* The daemon never causes a mount to trigger */ 263 /* The daemon never causes a mount to trigger */
264 spin_unlock(&sbi->fs_lock);
265
429 if (oz_mode) 266 if (oz_mode)
430 return 1; 267 return 1;
431 268
432 /* 269 /*
270 * If the directory has gone away due to an expire
271 * we have been called as ->d_revalidate() and so
272 * we need to return false and proceed to ->lookup().
273 */
274 if (autofs4_expire_wait(dentry) == -EAGAIN)
275 return 0;
276
277 /*
433 * A zero status is success otherwise we have a 278 * A zero status is success otherwise we have a
434 * negative error code. 279 * negative error code.
435 */ 280 */
@@ -437,17 +282,9 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd)
437 if (status == 0) 282 if (status == 0)
438 return 1; 283 return 1;
439 284
440 /*
441 * A status of EAGAIN here means that the dentry has gone
442 * away while waiting for an expire to complete. If we are
443 * racing with expire lookup will wait for it so this must
444 * be a revalidate and we need to send it to lookup.
445 */
446 if (status == -EAGAIN)
447 return 0;
448
449 return status; 285 return status;
450 } 286 }
287 spin_unlock(&sbi->fs_lock);
451 288
452 /* Negative dentry.. invalidate if "old" */ 289 /* Negative dentry.. invalidate if "old" */
453 if (dentry->d_inode == NULL) 290 if (dentry->d_inode == NULL)
@@ -461,6 +298,7 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd)
461 DPRINTK("dentry=%p %.*s, emptydir", 298 DPRINTK("dentry=%p %.*s, emptydir",
462 dentry, dentry->d_name.len, dentry->d_name.name); 299 dentry, dentry->d_name.len, dentry->d_name.name);
463 spin_unlock(&dcache_lock); 300 spin_unlock(&dcache_lock);
301
464 /* The daemon never causes a mount to trigger */ 302 /* The daemon never causes a mount to trigger */
465 if (oz_mode) 303 if (oz_mode)
466 return 1; 304 return 1;
@@ -493,10 +331,12 @@ void autofs4_dentry_release(struct dentry *de)
493 struct autofs_sb_info *sbi = autofs4_sbi(de->d_sb); 331 struct autofs_sb_info *sbi = autofs4_sbi(de->d_sb);
494 332
495 if (sbi) { 333 if (sbi) {
496 spin_lock(&sbi->rehash_lock); 334 spin_lock(&sbi->lookup_lock);
497 if (!list_empty(&inf->rehash)) 335 if (!list_empty(&inf->active))
498 list_del(&inf->rehash); 336 list_del(&inf->active);
499 spin_unlock(&sbi->rehash_lock); 337 if (!list_empty(&inf->expiring))
338 list_del(&inf->expiring);
339 spin_unlock(&sbi->lookup_lock);
500 } 340 }
501 341
502 inf->dentry = NULL; 342 inf->dentry = NULL;
@@ -518,7 +358,7 @@ static struct dentry_operations autofs4_dentry_operations = {
518 .d_release = autofs4_dentry_release, 358 .d_release = autofs4_dentry_release,
519}; 359};
520 360
521static struct dentry *autofs4_lookup_unhashed(struct autofs_sb_info *sbi, struct dentry *parent, struct qstr *name) 361static struct dentry *autofs4_lookup_active(struct autofs_sb_info *sbi, struct dentry *parent, struct qstr *name)
522{ 362{
523 unsigned int len = name->len; 363 unsigned int len = name->len;
524 unsigned int hash = name->hash; 364 unsigned int hash = name->hash;
@@ -526,14 +366,66 @@ static struct dentry *autofs4_lookup_unhashed(struct autofs_sb_info *sbi, struct
526 struct list_head *p, *head; 366 struct list_head *p, *head;
527 367
528 spin_lock(&dcache_lock); 368 spin_lock(&dcache_lock);
529 spin_lock(&sbi->rehash_lock); 369 spin_lock(&sbi->lookup_lock);
530 head = &sbi->rehash_list; 370 head = &sbi->active_list;
531 list_for_each(p, head) { 371 list_for_each(p, head) {
532 struct autofs_info *ino; 372 struct autofs_info *ino;
533 struct dentry *dentry; 373 struct dentry *dentry;
534 struct qstr *qstr; 374 struct qstr *qstr;
535 375
536 ino = list_entry(p, struct autofs_info, rehash); 376 ino = list_entry(p, struct autofs_info, active);
377 dentry = ino->dentry;
378
379 spin_lock(&dentry->d_lock);
380
381 /* Already gone? */
382 if (atomic_read(&dentry->d_count) == 0)
383 goto next;
384
385 qstr = &dentry->d_name;
386
387 if (dentry->d_name.hash != hash)
388 goto next;
389 if (dentry->d_parent != parent)
390 goto next;
391
392 if (qstr->len != len)
393 goto next;
394 if (memcmp(qstr->name, str, len))
395 goto next;
396
397 if (d_unhashed(dentry)) {
398 dget(dentry);
399 spin_unlock(&dentry->d_lock);
400 spin_unlock(&sbi->lookup_lock);
401 spin_unlock(&dcache_lock);
402 return dentry;
403 }
404next:
405 spin_unlock(&dentry->d_lock);
406 }
407 spin_unlock(&sbi->lookup_lock);
408 spin_unlock(&dcache_lock);
409
410 return NULL;
411}
412
413static struct dentry *autofs4_lookup_expiring(struct autofs_sb_info *sbi, struct dentry *parent, struct qstr *name)
414{
415 unsigned int len = name->len;
416 unsigned int hash = name->hash;
417 const unsigned char *str = name->name;
418 struct list_head *p, *head;
419
420 spin_lock(&dcache_lock);
421 spin_lock(&sbi->lookup_lock);
422 head = &sbi->expiring_list;
423 list_for_each(p, head) {
424 struct autofs_info *ino;
425 struct dentry *dentry;
426 struct qstr *qstr;
427
428 ino = list_entry(p, struct autofs_info, expiring);
537 dentry = ino->dentry; 429 dentry = ino->dentry;
538 430
539 spin_lock(&dentry->d_lock); 431 spin_lock(&dentry->d_lock);
@@ -555,33 +447,16 @@ static struct dentry *autofs4_lookup_unhashed(struct autofs_sb_info *sbi, struct
555 goto next; 447 goto next;
556 448
557 if (d_unhashed(dentry)) { 449 if (d_unhashed(dentry)) {
558 struct inode *inode = dentry->d_inode;
559
560 ino = autofs4_dentry_ino(dentry);
561 list_del_init(&ino->rehash);
562 dget(dentry); 450 dget(dentry);
563 /*
564 * Make the rehashed dentry negative so the VFS
565 * behaves as it should.
566 */
567 if (inode) {
568 dentry->d_inode = NULL;
569 list_del_init(&dentry->d_alias);
570 spin_unlock(&dentry->d_lock);
571 spin_unlock(&sbi->rehash_lock);
572 spin_unlock(&dcache_lock);
573 iput(inode);
574 return dentry;
575 }
576 spin_unlock(&dentry->d_lock); 451 spin_unlock(&dentry->d_lock);
577 spin_unlock(&sbi->rehash_lock); 452 spin_unlock(&sbi->lookup_lock);
578 spin_unlock(&dcache_lock); 453 spin_unlock(&dcache_lock);
579 return dentry; 454 return dentry;
580 } 455 }
581next: 456next:
582 spin_unlock(&dentry->d_lock); 457 spin_unlock(&dentry->d_lock);
583 } 458 }
584 spin_unlock(&sbi->rehash_lock); 459 spin_unlock(&sbi->lookup_lock);
585 spin_unlock(&dcache_lock); 460 spin_unlock(&dcache_lock);
586 461
587 return NULL; 462 return NULL;
@@ -591,7 +466,8 @@ next:
591static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) 466static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
592{ 467{
593 struct autofs_sb_info *sbi; 468 struct autofs_sb_info *sbi;
594 struct dentry *unhashed; 469 struct autofs_info *ino;
470 struct dentry *expiring, *unhashed;
595 int oz_mode; 471 int oz_mode;
596 472
597 DPRINTK("name = %.*s", 473 DPRINTK("name = %.*s",
@@ -607,8 +483,26 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s
607 DPRINTK("pid = %u, pgrp = %u, catatonic = %d, oz_mode = %d", 483 DPRINTK("pid = %u, pgrp = %u, catatonic = %d, oz_mode = %d",
608 current->pid, task_pgrp_nr(current), sbi->catatonic, oz_mode); 484 current->pid, task_pgrp_nr(current), sbi->catatonic, oz_mode);
609 485
610 unhashed = autofs4_lookup_unhashed(sbi, dentry->d_parent, &dentry->d_name); 486 expiring = autofs4_lookup_expiring(sbi, dentry->d_parent, &dentry->d_name);
611 if (!unhashed) { 487 if (expiring) {
488 /*
489 * If we are racing with expire the request might not
490 * be quite complete but the directory has been removed
491 * so it must have been successful, so just wait for it.
492 */
493 ino = autofs4_dentry_ino(expiring);
494 autofs4_expire_wait(expiring);
495 spin_lock(&sbi->lookup_lock);
496 if (!list_empty(&ino->expiring))
497 list_del_init(&ino->expiring);
498 spin_unlock(&sbi->lookup_lock);
499 dput(expiring);
500 }
501
502 unhashed = autofs4_lookup_active(sbi, dentry->d_parent, &dentry->d_name);
503 if (unhashed)
504 dentry = unhashed;
505 else {
612 /* 506 /*
613 * Mark the dentry incomplete but don't hash it. We do this 507 * Mark the dentry incomplete but don't hash it. We do this
614 * to serialize our inode creation operations (symlink and 508 * to serialize our inode creation operations (symlink and
@@ -622,39 +516,34 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s
622 */ 516 */
623 dentry->d_op = &autofs4_root_dentry_operations; 517 dentry->d_op = &autofs4_root_dentry_operations;
624 518
625 dentry->d_fsdata = NULL;
626 d_instantiate(dentry, NULL);
627 } else {
628 struct autofs_info *ino = autofs4_dentry_ino(unhashed);
629 DPRINTK("rehash %p with %p", dentry, unhashed);
630 /* 519 /*
631 * If we are racing with expire the request might not 520 * And we need to ensure that the same dentry is used for
632 * be quite complete but the directory has been removed 521 * all following lookup calls until it is hashed so that
633 * so it must have been successful, so just wait for it. 522 * the dentry flags are persistent throughout the request.
634 * We need to ensure the AUTOFS_INF_EXPIRING flag is clear
635 * before continuing as revalidate may fail when calling
636 * try_to_fill_dentry (returning EAGAIN) if we don't.
637 */ 523 */
638 while (ino && (ino->flags & AUTOFS_INF_EXPIRING)) { 524 ino = autofs4_init_ino(NULL, sbi, 0555);
639 DPRINTK("wait for incomplete expire %p name=%.*s", 525 if (!ino)
640 unhashed, unhashed->d_name.len, 526 return ERR_PTR(-ENOMEM);
641 unhashed->d_name.name); 527
642 autofs4_wait(sbi, unhashed, NFY_NONE); 528 dentry->d_fsdata = ino;
643 DPRINTK("request completed"); 529 ino->dentry = dentry;
644 } 530
645 dentry = unhashed; 531 spin_lock(&sbi->lookup_lock);
532 list_add(&ino->active, &sbi->active_list);
533 spin_unlock(&sbi->lookup_lock);
534
535 d_instantiate(dentry, NULL);
646 } 536 }
647 537
648 if (!oz_mode) { 538 if (!oz_mode) {
649 spin_lock(&dentry->d_lock); 539 spin_lock(&dentry->d_lock);
650 dentry->d_flags |= DCACHE_AUTOFS_PENDING; 540 dentry->d_flags |= DCACHE_AUTOFS_PENDING;
651 spin_unlock(&dentry->d_lock); 541 spin_unlock(&dentry->d_lock);
652 } 542 if (dentry->d_op && dentry->d_op->d_revalidate) {
653 543 mutex_unlock(&dir->i_mutex);
654 if (dentry->d_op && dentry->d_op->d_revalidate) { 544 (dentry->d_op->d_revalidate)(dentry, nd);
655 mutex_unlock(&dir->i_mutex); 545 mutex_lock(&dir->i_mutex);
656 (dentry->d_op->d_revalidate)(dentry, nd); 546 }
657 mutex_lock(&dir->i_mutex);
658 } 547 }
659 548
660 /* 549 /*
@@ -673,9 +562,11 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s
673 return ERR_PTR(-ERESTARTNOINTR); 562 return ERR_PTR(-ERESTARTNOINTR);
674 } 563 }
675 } 564 }
676 spin_lock(&dentry->d_lock); 565 if (!oz_mode) {
677 dentry->d_flags &= ~DCACHE_AUTOFS_PENDING; 566 spin_lock(&dentry->d_lock);
678 spin_unlock(&dentry->d_lock); 567 dentry->d_flags &= ~DCACHE_AUTOFS_PENDING;
568 spin_unlock(&dentry->d_lock);
569 }
679 } 570 }
680 571
681 /* 572 /*
@@ -706,7 +597,7 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s
706 } 597 }
707 598
708 if (unhashed) 599 if (unhashed)
709 return dentry; 600 return unhashed;
710 601
711 return NULL; 602 return NULL;
712} 603}
@@ -728,20 +619,31 @@ static int autofs4_dir_symlink(struct inode *dir,
728 return -EACCES; 619 return -EACCES;
729 620
730 ino = autofs4_init_ino(ino, sbi, S_IFLNK | 0555); 621 ino = autofs4_init_ino(ino, sbi, S_IFLNK | 0555);
731 if (ino == NULL) 622 if (!ino)
732 return -ENOSPC; 623 return -ENOMEM;
733 624
734 ino->size = strlen(symname); 625 spin_lock(&sbi->lookup_lock);
735 ino->u.symlink = cp = kmalloc(ino->size + 1, GFP_KERNEL); 626 if (!list_empty(&ino->active))
627 list_del_init(&ino->active);
628 spin_unlock(&sbi->lookup_lock);
736 629
737 if (cp == NULL) { 630 ino->size = strlen(symname);
738 kfree(ino); 631 cp = kmalloc(ino->size + 1, GFP_KERNEL);
739 return -ENOSPC; 632 if (!cp) {
633 if (!dentry->d_fsdata)
634 kfree(ino);
635 return -ENOMEM;
740 } 636 }
741 637
742 strcpy(cp, symname); 638 strcpy(cp, symname);
743 639
744 inode = autofs4_get_inode(dir->i_sb, ino); 640 inode = autofs4_get_inode(dir->i_sb, ino);
641 if (!inode) {
642 kfree(cp);
643 if (!dentry->d_fsdata)
644 kfree(ino);
645 return -ENOMEM;
646 }
745 d_add(dentry, inode); 647 d_add(dentry, inode);
746 648
747 if (dir == dir->i_sb->s_root->d_inode) 649 if (dir == dir->i_sb->s_root->d_inode)
@@ -757,6 +659,7 @@ static int autofs4_dir_symlink(struct inode *dir,
757 atomic_inc(&p_ino->count); 659 atomic_inc(&p_ino->count);
758 ino->inode = inode; 660 ino->inode = inode;
759 661
662 ino->u.symlink = cp;
760 dir->i_mtime = CURRENT_TIME; 663 dir->i_mtime = CURRENT_TIME;
761 664
762 return 0; 665 return 0;
@@ -769,9 +672,8 @@ static int autofs4_dir_symlink(struct inode *dir,
769 * that the file no longer exists. However, doing that means that the 672 * that the file no longer exists. However, doing that means that the
770 * VFS layer can turn the dentry into a negative dentry. We don't want 673 * VFS layer can turn the dentry into a negative dentry. We don't want
771 * this, because the unlink is probably the result of an expire. 674 * this, because the unlink is probably the result of an expire.
772 * We simply d_drop it and add it to a rehash candidates list in the 675 * We simply d_drop it and add it to a expiring list in the super block,
773 * super block, which allows the dentry lookup to reuse it retaining 676 * which allows the dentry lookup to check for an incomplete expire.
774 * the flags, such as expire in progress, in case we're racing with expire.
775 * 677 *
776 * If a process is blocked on the dentry waiting for the expire to finish, 678 * If a process is blocked on the dentry waiting for the expire to finish,
777 * it will invalidate the dentry and try to mount with a new one. 679 * it will invalidate the dentry and try to mount with a new one.
@@ -801,9 +703,10 @@ static int autofs4_dir_unlink(struct inode *dir, struct dentry *dentry)
801 dir->i_mtime = CURRENT_TIME; 703 dir->i_mtime = CURRENT_TIME;
802 704
803 spin_lock(&dcache_lock); 705 spin_lock(&dcache_lock);
804 spin_lock(&sbi->rehash_lock); 706 spin_lock(&sbi->lookup_lock);
805 list_add(&ino->rehash, &sbi->rehash_list); 707 if (list_empty(&ino->expiring))
806 spin_unlock(&sbi->rehash_lock); 708 list_add(&ino->expiring, &sbi->expiring_list);
709 spin_unlock(&sbi->lookup_lock);
807 spin_lock(&dentry->d_lock); 710 spin_lock(&dentry->d_lock);
808 __d_drop(dentry); 711 __d_drop(dentry);
809 spin_unlock(&dentry->d_lock); 712 spin_unlock(&dentry->d_lock);
@@ -829,9 +732,10 @@ static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry)
829 spin_unlock(&dcache_lock); 732 spin_unlock(&dcache_lock);
830 return -ENOTEMPTY; 733 return -ENOTEMPTY;
831 } 734 }
832 spin_lock(&sbi->rehash_lock); 735 spin_lock(&sbi->lookup_lock);
833 list_add(&ino->rehash, &sbi->rehash_list); 736 if (list_empty(&ino->expiring))
834 spin_unlock(&sbi->rehash_lock); 737 list_add(&ino->expiring, &sbi->expiring_list);
738 spin_unlock(&sbi->lookup_lock);
835 spin_lock(&dentry->d_lock); 739 spin_lock(&dentry->d_lock);
836 __d_drop(dentry); 740 __d_drop(dentry);
837 spin_unlock(&dentry->d_lock); 741 spin_unlock(&dentry->d_lock);
@@ -866,10 +770,20 @@ static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, int mode)
866 dentry, dentry->d_name.len, dentry->d_name.name); 770 dentry, dentry->d_name.len, dentry->d_name.name);
867 771
868 ino = autofs4_init_ino(ino, sbi, S_IFDIR | 0555); 772 ino = autofs4_init_ino(ino, sbi, S_IFDIR | 0555);
869 if (ino == NULL) 773 if (!ino)
870 return -ENOSPC; 774 return -ENOMEM;
775
776 spin_lock(&sbi->lookup_lock);
777 if (!list_empty(&ino->active))
778 list_del_init(&ino->active);
779 spin_unlock(&sbi->lookup_lock);
871 780
872 inode = autofs4_get_inode(dir->i_sb, ino); 781 inode = autofs4_get_inode(dir->i_sb, ino);
782 if (!inode) {
783 if (!dentry->d_fsdata)
784 kfree(ino);
785 return -ENOMEM;
786 }
873 d_add(dentry, inode); 787 d_add(dentry, inode);
874 788
875 if (dir == dir->i_sb->s_root->d_inode) 789 if (dir == dir->i_sb->s_root->d_inode)
@@ -922,44 +836,6 @@ static inline int autofs4_get_protosubver(struct autofs_sb_info *sbi, int __user
922} 836}
923 837
924/* 838/*
925 * Tells the daemon whether we need to reghost or not. Also, clears
926 * the reghost_needed flag.
927 */
928static inline int autofs4_ask_reghost(struct autofs_sb_info *sbi, int __user *p)
929{
930 int status;
931
932 DPRINTK("returning %d", sbi->needs_reghost);
933
934 status = put_user(sbi->needs_reghost, p);
935 if (status)
936 return status;
937
938 sbi->needs_reghost = 0;
939 return 0;
940}
941
942/*
943 * Enable / Disable reghosting ioctl() operation
944 */
945static inline int autofs4_toggle_reghost(struct autofs_sb_info *sbi, int __user *p)
946{
947 int status;
948 int val;
949
950 status = get_user(val, p);
951
952 DPRINTK("reghost = %d", val);
953
954 if (status)
955 return status;
956
957 /* turn on/off reghosting, with the val */
958 sbi->reghost_enabled = val;
959 return 0;
960}
961
962/*
963* Tells the daemon whether it can umount the autofs mount. 839* Tells the daemon whether it can umount the autofs mount.
964*/ 840*/
965static inline int autofs4_ask_umount(struct vfsmount *mnt, int __user *p) 841static inline int autofs4_ask_umount(struct vfsmount *mnt, int __user *p)
@@ -1023,11 +899,6 @@ static int autofs4_root_ioctl(struct inode *inode, struct file *filp,
1023 case AUTOFS_IOC_SETTIMEOUT: 899 case AUTOFS_IOC_SETTIMEOUT:
1024 return autofs4_get_set_timeout(sbi, p); 900 return autofs4_get_set_timeout(sbi, p);
1025 901
1026 case AUTOFS_IOC_TOGGLEREGHOST:
1027 return autofs4_toggle_reghost(sbi, p);
1028 case AUTOFS_IOC_ASKREGHOST:
1029 return autofs4_ask_reghost(sbi, p);
1030
1031 case AUTOFS_IOC_ASKUMOUNT: 902 case AUTOFS_IOC_ASKUMOUNT:
1032 return autofs4_ask_umount(filp->f_path.mnt, p); 903 return autofs4_ask_umount(filp->f_path.mnt, p);
1033 904
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 75e5955c3f6d..35216d18d8b5 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -28,6 +28,12 @@ void autofs4_catatonic_mode(struct autofs_sb_info *sbi)
28{ 28{
29 struct autofs_wait_queue *wq, *nwq; 29 struct autofs_wait_queue *wq, *nwq;
30 30
31 mutex_lock(&sbi->wq_mutex);
32 if (sbi->catatonic) {
33 mutex_unlock(&sbi->wq_mutex);
34 return;
35 }
36
31 DPRINTK("entering catatonic mode"); 37 DPRINTK("entering catatonic mode");
32 38
33 sbi->catatonic = 1; 39 sbi->catatonic = 1;
@@ -36,13 +42,18 @@ void autofs4_catatonic_mode(struct autofs_sb_info *sbi)
36 while (wq) { 42 while (wq) {
37 nwq = wq->next; 43 nwq = wq->next;
38 wq->status = -ENOENT; /* Magic is gone - report failure */ 44 wq->status = -ENOENT; /* Magic is gone - report failure */
39 kfree(wq->name); 45 if (wq->name.name) {
40 wq->name = NULL; 46 kfree(wq->name.name);
47 wq->name.name = NULL;
48 }
49 wq->wait_ctr--;
41 wake_up_interruptible(&wq->queue); 50 wake_up_interruptible(&wq->queue);
42 wq = nwq; 51 wq = nwq;
43 } 52 }
44 fput(sbi->pipe); /* Close the pipe */ 53 fput(sbi->pipe); /* Close the pipe */
45 sbi->pipe = NULL; 54 sbi->pipe = NULL;
55 sbi->pipefd = -1;
56 mutex_unlock(&sbi->wq_mutex);
46} 57}
47 58
48static int autofs4_write(struct file *file, const void *addr, int bytes) 59static int autofs4_write(struct file *file, const void *addr, int bytes)
@@ -89,10 +100,11 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
89 union autofs_packet_union v4_pkt; 100 union autofs_packet_union v4_pkt;
90 union autofs_v5_packet_union v5_pkt; 101 union autofs_v5_packet_union v5_pkt;
91 } pkt; 102 } pkt;
103 struct file *pipe = NULL;
92 size_t pktsz; 104 size_t pktsz;
93 105
94 DPRINTK("wait id = 0x%08lx, name = %.*s, type=%d", 106 DPRINTK("wait id = 0x%08lx, name = %.*s, type=%d",
95 wq->wait_queue_token, wq->len, wq->name, type); 107 wq->wait_queue_token, wq->name.len, wq->name.name, type);
96 108
97 memset(&pkt,0,sizeof pkt); /* For security reasons */ 109 memset(&pkt,0,sizeof pkt); /* For security reasons */
98 110
@@ -107,9 +119,9 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
107 pktsz = sizeof(*mp); 119 pktsz = sizeof(*mp);
108 120
109 mp->wait_queue_token = wq->wait_queue_token; 121 mp->wait_queue_token = wq->wait_queue_token;
110 mp->len = wq->len; 122 mp->len = wq->name.len;
111 memcpy(mp->name, wq->name, wq->len); 123 memcpy(mp->name, wq->name.name, wq->name.len);
112 mp->name[wq->len] = '\0'; 124 mp->name[wq->name.len] = '\0';
113 break; 125 break;
114 } 126 }
115 case autofs_ptype_expire_multi: 127 case autofs_ptype_expire_multi:
@@ -119,9 +131,9 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
119 pktsz = sizeof(*ep); 131 pktsz = sizeof(*ep);
120 132
121 ep->wait_queue_token = wq->wait_queue_token; 133 ep->wait_queue_token = wq->wait_queue_token;
122 ep->len = wq->len; 134 ep->len = wq->name.len;
123 memcpy(ep->name, wq->name, wq->len); 135 memcpy(ep->name, wq->name.name, wq->name.len);
124 ep->name[wq->len] = '\0'; 136 ep->name[wq->name.len] = '\0';
125 break; 137 break;
126 } 138 }
127 /* 139 /*
@@ -138,9 +150,9 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
138 pktsz = sizeof(*packet); 150 pktsz = sizeof(*packet);
139 151
140 packet->wait_queue_token = wq->wait_queue_token; 152 packet->wait_queue_token = wq->wait_queue_token;
141 packet->len = wq->len; 153 packet->len = wq->name.len;
142 memcpy(packet->name, wq->name, wq->len); 154 memcpy(packet->name, wq->name.name, wq->name.len);
143 packet->name[wq->len] = '\0'; 155 packet->name[wq->name.len] = '\0';
144 packet->dev = wq->dev; 156 packet->dev = wq->dev;
145 packet->ino = wq->ino; 157 packet->ino = wq->ino;
146 packet->uid = wq->uid; 158 packet->uid = wq->uid;
@@ -154,8 +166,19 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
154 return; 166 return;
155 } 167 }
156 168
157 if (autofs4_write(sbi->pipe, &pkt, pktsz)) 169 /* Check if we have become catatonic */
158 autofs4_catatonic_mode(sbi); 170 mutex_lock(&sbi->wq_mutex);
171 if (!sbi->catatonic) {
172 pipe = sbi->pipe;
173 get_file(pipe);
174 }
175 mutex_unlock(&sbi->wq_mutex);
176
177 if (pipe) {
178 if (autofs4_write(pipe, &pkt, pktsz))
179 autofs4_catatonic_mode(sbi);
180 fput(pipe);
181 }
159} 182}
160 183
161static int autofs4_getpath(struct autofs_sb_info *sbi, 184static int autofs4_getpath(struct autofs_sb_info *sbi,
@@ -191,58 +214,55 @@ static int autofs4_getpath(struct autofs_sb_info *sbi,
191} 214}
192 215
193static struct autofs_wait_queue * 216static struct autofs_wait_queue *
194autofs4_find_wait(struct autofs_sb_info *sbi, 217autofs4_find_wait(struct autofs_sb_info *sbi, struct qstr *qstr)
195 char *name, unsigned int hash, unsigned int len)
196{ 218{
197 struct autofs_wait_queue *wq; 219 struct autofs_wait_queue *wq;
198 220
199 for (wq = sbi->queues; wq; wq = wq->next) { 221 for (wq = sbi->queues; wq; wq = wq->next) {
200 if (wq->hash == hash && 222 if (wq->name.hash == qstr->hash &&
201 wq->len == len && 223 wq->name.len == qstr->len &&
202 wq->name && !memcmp(wq->name, name, len)) 224 wq->name.name &&
225 !memcmp(wq->name.name, qstr->name, qstr->len))
203 break; 226 break;
204 } 227 }
205 return wq; 228 return wq;
206} 229}
207 230
208int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry, 231/*
209 enum autofs_notify notify) 232 * Check if we have a valid request.
233 * Returns
234 * 1 if the request should continue.
235 * In this case we can return an autofs_wait_queue entry if one is
236 * found or NULL to idicate a new wait needs to be created.
237 * 0 or a negative errno if the request shouldn't continue.
238 */
239static int validate_request(struct autofs_wait_queue **wait,
240 struct autofs_sb_info *sbi,
241 struct qstr *qstr,
242 struct dentry*dentry, enum autofs_notify notify)
210{ 243{
211 struct autofs_info *ino;
212 struct autofs_wait_queue *wq; 244 struct autofs_wait_queue *wq;
213 char *name; 245 struct autofs_info *ino;
214 unsigned int len = 0;
215 unsigned int hash = 0;
216 int status, type;
217
218 /* In catatonic mode, we don't wait for nobody */
219 if (sbi->catatonic)
220 return -ENOENT;
221
222 name = kmalloc(NAME_MAX + 1, GFP_KERNEL);
223 if (!name)
224 return -ENOMEM;
225 246
226 /* If this is a direct mount request create a dummy name */ 247 /* Wait in progress, continue; */
227 if (IS_ROOT(dentry) && (sbi->type & AUTOFS_TYPE_DIRECT)) 248 wq = autofs4_find_wait(sbi, qstr);
228 len = sprintf(name, "%p", dentry); 249 if (wq) {
229 else { 250 *wait = wq;
230 len = autofs4_getpath(sbi, dentry, &name); 251 return 1;
231 if (!len) {
232 kfree(name);
233 return -ENOENT;
234 }
235 } 252 }
236 hash = full_name_hash(name, len);
237 253
238 if (mutex_lock_interruptible(&sbi->wq_mutex)) { 254 *wait = NULL;
239 kfree(name);
240 return -EINTR;
241 }
242 255
243 wq = autofs4_find_wait(sbi, name, hash, len); 256 /* If we don't yet have any info this is a new request */
244 ino = autofs4_dentry_ino(dentry); 257 ino = autofs4_dentry_ino(dentry);
245 if (!wq && ino && notify == NFY_NONE) { 258 if (!ino)
259 return 1;
260
261 /*
262 * If we've been asked to wait on an existing expire (NFY_NONE)
263 * but there is no wait in the queue ...
264 */
265 if (notify == NFY_NONE) {
246 /* 266 /*
247 * Either we've betean the pending expire to post it's 267 * Either we've betean the pending expire to post it's
248 * wait or it finished while we waited on the mutex. 268 * wait or it finished while we waited on the mutex.
@@ -253,13 +273,14 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
253 while (ino->flags & AUTOFS_INF_EXPIRING) { 273 while (ino->flags & AUTOFS_INF_EXPIRING) {
254 mutex_unlock(&sbi->wq_mutex); 274 mutex_unlock(&sbi->wq_mutex);
255 schedule_timeout_interruptible(HZ/10); 275 schedule_timeout_interruptible(HZ/10);
256 if (mutex_lock_interruptible(&sbi->wq_mutex)) { 276 if (mutex_lock_interruptible(&sbi->wq_mutex))
257 kfree(name);
258 return -EINTR; 277 return -EINTR;
278
279 wq = autofs4_find_wait(sbi, qstr);
280 if (wq) {
281 *wait = wq;
282 return 1;
259 } 283 }
260 wq = autofs4_find_wait(sbi, name, hash, len);
261 if (wq)
262 break;
263 } 284 }
264 285
265 /* 286 /*
@@ -267,18 +288,96 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
267 * cases where we wait on NFY_NONE neither depend on the 288 * cases where we wait on NFY_NONE neither depend on the
268 * return status of the wait. 289 * return status of the wait.
269 */ 290 */
270 if (!wq) { 291 return 0;
292 }
293
294 /*
295 * If we've been asked to trigger a mount and the request
296 * completed while we waited on the mutex ...
297 */
298 if (notify == NFY_MOUNT) {
299 /*
300 * If the dentry isn't hashed just go ahead and try the
301 * mount again with a new wait (not much else we can do).
302 */
303 if (!d_unhashed(dentry)) {
304 /*
305 * But if the dentry is hashed, that means that we
306 * got here through the revalidate path. Thus, we
307 * need to check if the dentry has been mounted
308 * while we waited on the wq_mutex. If it has,
309 * simply return success.
310 */
311 if (d_mountpoint(dentry))
312 return 0;
313 }
314 }
315
316 return 1;
317}
318
319int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
320 enum autofs_notify notify)
321{
322 struct autofs_wait_queue *wq;
323 struct qstr qstr;
324 char *name;
325 int status, ret, type;
326
327 /* In catatonic mode, we don't wait for nobody */
328 if (sbi->catatonic)
329 return -ENOENT;
330
331 if (!dentry->d_inode) {
332 /*
333 * A wait for a negative dentry is invalid for certain
334 * cases. A direct or offset mount "always" has its mount
335 * point directory created and so the request dentry must
336 * be positive or the map key doesn't exist. The situation
337 * is very similar for indirect mounts except only dentrys
338 * in the root of the autofs file system may be negative.
339 */
340 if (sbi->type & (AUTOFS_TYPE_DIRECT|AUTOFS_TYPE_OFFSET))
341 return -ENOENT;
342 else if (!IS_ROOT(dentry->d_parent))
343 return -ENOENT;
344 }
345
346 name = kmalloc(NAME_MAX + 1, GFP_KERNEL);
347 if (!name)
348 return -ENOMEM;
349
350 /* If this is a direct mount request create a dummy name */
351 if (IS_ROOT(dentry) && (sbi->type & AUTOFS_TYPE_DIRECT))
352 qstr.len = sprintf(name, "%p", dentry);
353 else {
354 qstr.len = autofs4_getpath(sbi, dentry, &name);
355 if (!qstr.len) {
271 kfree(name); 356 kfree(name);
272 mutex_unlock(&sbi->wq_mutex); 357 return -ENOENT;
273 return 0;
274 } 358 }
275 } 359 }
360 qstr.name = name;
361 qstr.hash = full_name_hash(name, qstr.len);
362
363 if (mutex_lock_interruptible(&sbi->wq_mutex)) {
364 kfree(qstr.name);
365 return -EINTR;
366 }
367
368 ret = validate_request(&wq, sbi, &qstr, dentry, notify);
369 if (ret <= 0) {
370 if (ret == 0)
371 mutex_unlock(&sbi->wq_mutex);
372 kfree(qstr.name);
373 return ret;
374 }
276 375
277 if (!wq) { 376 if (!wq) {
278 /* Create a new wait queue */ 377 /* Create a new wait queue */
279 wq = kmalloc(sizeof(struct autofs_wait_queue),GFP_KERNEL); 378 wq = kmalloc(sizeof(struct autofs_wait_queue),GFP_KERNEL);
280 if (!wq) { 379 if (!wq) {
281 kfree(name); 380 kfree(qstr.name);
282 mutex_unlock(&sbi->wq_mutex); 381 mutex_unlock(&sbi->wq_mutex);
283 return -ENOMEM; 382 return -ENOMEM;
284 } 383 }
@@ -289,9 +388,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
289 wq->next = sbi->queues; 388 wq->next = sbi->queues;
290 sbi->queues = wq; 389 sbi->queues = wq;
291 init_waitqueue_head(&wq->queue); 390 init_waitqueue_head(&wq->queue);
292 wq->hash = hash; 391 memcpy(&wq->name, &qstr, sizeof(struct qstr));
293 wq->name = name;
294 wq->len = len;
295 wq->dev = autofs4_get_dev(sbi); 392 wq->dev = autofs4_get_dev(sbi);
296 wq->ino = autofs4_get_ino(sbi); 393 wq->ino = autofs4_get_ino(sbi);
297 wq->uid = current->uid; 394 wq->uid = current->uid;
@@ -299,7 +396,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
299 wq->pid = current->pid; 396 wq->pid = current->pid;
300 wq->tgid = current->tgid; 397 wq->tgid = current->tgid;
301 wq->status = -EINTR; /* Status return if interrupted */ 398 wq->status = -EINTR; /* Status return if interrupted */
302 atomic_set(&wq->wait_ctr, 2); 399 wq->wait_ctr = 2;
303 mutex_unlock(&sbi->wq_mutex); 400 mutex_unlock(&sbi->wq_mutex);
304 401
305 if (sbi->version < 5) { 402 if (sbi->version < 5) {
@@ -319,28 +416,25 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
319 } 416 }
320 417
321 DPRINTK("new wait id = 0x%08lx, name = %.*s, nfy=%d\n", 418 DPRINTK("new wait id = 0x%08lx, name = %.*s, nfy=%d\n",
322 (unsigned long) wq->wait_queue_token, wq->len, wq->name, notify); 419 (unsigned long) wq->wait_queue_token, wq->name.len,
420 wq->name.name, notify);
323 421
324 /* autofs4_notify_daemon() may block */ 422 /* autofs4_notify_daemon() may block */
325 autofs4_notify_daemon(sbi, wq, type); 423 autofs4_notify_daemon(sbi, wq, type);
326 } else { 424 } else {
327 atomic_inc(&wq->wait_ctr); 425 wq->wait_ctr++;
328 mutex_unlock(&sbi->wq_mutex); 426 mutex_unlock(&sbi->wq_mutex);
329 kfree(name); 427 kfree(qstr.name);
330 DPRINTK("existing wait id = 0x%08lx, name = %.*s, nfy=%d", 428 DPRINTK("existing wait id = 0x%08lx, name = %.*s, nfy=%d",
331 (unsigned long) wq->wait_queue_token, wq->len, wq->name, notify); 429 (unsigned long) wq->wait_queue_token, wq->name.len,
332 } 430 wq->name.name, notify);
333
334 /* wq->name is NULL if and only if the lock is already released */
335
336 if (sbi->catatonic) {
337 /* We might have slept, so check again for catatonic mode */
338 wq->status = -ENOENT;
339 kfree(wq->name);
340 wq->name = NULL;
341 } 431 }
342 432
343 if (wq->name) { 433 /*
434 * wq->name.name is NULL iff the lock is already released
435 * or the mount has been made catatonic.
436 */
437 if (wq->name.name) {
344 /* Block all but "shutdown" signals while waiting */ 438 /* Block all but "shutdown" signals while waiting */
345 sigset_t oldset; 439 sigset_t oldset;
346 unsigned long irqflags; 440 unsigned long irqflags;
@@ -351,7 +445,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
351 recalc_sigpending(); 445 recalc_sigpending();
352 spin_unlock_irqrestore(&current->sighand->siglock, irqflags); 446 spin_unlock_irqrestore(&current->sighand->siglock, irqflags);
353 447
354 wait_event_interruptible(wq->queue, wq->name == NULL); 448 wait_event_interruptible(wq->queue, wq->name.name == NULL);
355 449
356 spin_lock_irqsave(&current->sighand->siglock, irqflags); 450 spin_lock_irqsave(&current->sighand->siglock, irqflags);
357 current->blocked = oldset; 451 current->blocked = oldset;
@@ -364,8 +458,10 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
364 status = wq->status; 458 status = wq->status;
365 459
366 /* Are we the last process to need status? */ 460 /* Are we the last process to need status? */
367 if (atomic_dec_and_test(&wq->wait_ctr)) 461 mutex_lock(&sbi->wq_mutex);
462 if (!--wq->wait_ctr)
368 kfree(wq); 463 kfree(wq);
464 mutex_unlock(&sbi->wq_mutex);
369 465
370 return status; 466 return status;
371} 467}
@@ -387,16 +483,13 @@ int autofs4_wait_release(struct autofs_sb_info *sbi, autofs_wqt_t wait_queue_tok
387 } 483 }
388 484
389 *wql = wq->next; /* Unlink from chain */ 485 *wql = wq->next; /* Unlink from chain */
390 mutex_unlock(&sbi->wq_mutex); 486 kfree(wq->name.name);
391 kfree(wq->name); 487 wq->name.name = NULL; /* Do not wait on this queue */
392 wq->name = NULL; /* Do not wait on this queue */
393
394 wq->status = status; 488 wq->status = status;
395 489 wake_up_interruptible(&wq->queue);
396 if (atomic_dec_and_test(&wq->wait_ctr)) /* Is anyone still waiting for this guy? */ 490 if (!--wq->wait_ctr)
397 kfree(wq); 491 kfree(wq);
398 else 492 mutex_unlock(&sbi->wq_mutex);
399 wake_up_interruptible(&wq->queue);
400 493
401 return 0; 494 return 0;
402} 495}
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index d48ff5f370f4..639d2d8b5710 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -204,6 +204,7 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
204 NEW_AUX_ENT(AT_GID, tsk->gid); 204 NEW_AUX_ENT(AT_GID, tsk->gid);
205 NEW_AUX_ENT(AT_EGID, tsk->egid); 205 NEW_AUX_ENT(AT_EGID, tsk->egid);
206 NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm)); 206 NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
207 NEW_AUX_ENT(AT_EXECFN, bprm->exec);
207 if (k_platform) { 208 if (k_platform) {
208 NEW_AUX_ENT(AT_PLATFORM, 209 NEW_AUX_ENT(AT_PLATFORM,
209 (elf_addr_t)(unsigned long)u_platform); 210 (elf_addr_t)(unsigned long)u_platform);
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 7191306367c5..756205314c24 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -27,6 +27,7 @@
27#include <linux/namei.h> 27#include <linux/namei.h>
28#include <linux/mount.h> 28#include <linux/mount.h>
29#include <linux/syscalls.h> 29#include <linux/syscalls.h>
30#include <linux/fs.h>
30 31
31#include <asm/uaccess.h> 32#include <asm/uaccess.h>
32 33
@@ -535,31 +536,16 @@ static ssize_t
535bm_entry_read(struct file * file, char __user * buf, size_t nbytes, loff_t *ppos) 536bm_entry_read(struct file * file, char __user * buf, size_t nbytes, loff_t *ppos)
536{ 537{
537 Node *e = file->f_path.dentry->d_inode->i_private; 538 Node *e = file->f_path.dentry->d_inode->i_private;
538 loff_t pos = *ppos;
539 ssize_t res; 539 ssize_t res;
540 char *page; 540 char *page;
541 int len;
542 541
543 if (!(page = (char*) __get_free_page(GFP_KERNEL))) 542 if (!(page = (char*) __get_free_page(GFP_KERNEL)))
544 return -ENOMEM; 543 return -ENOMEM;
545 544
546 entry_status(e, page); 545 entry_status(e, page);
547 len = strlen(page);
548 546
549 res = -EINVAL; 547 res = simple_read_from_buffer(buf, nbytes, ppos, page, strlen(page));
550 if (pos < 0) 548
551 goto out;
552 res = 0;
553 if (pos >= len)
554 goto out;
555 if (len < pos + nbytes)
556 nbytes = len - pos;
557 res = -EFAULT;
558 if (copy_to_user(buf, page + pos, nbytes))
559 goto out;
560 *ppos = pos + nbytes;
561 res = nbytes;
562out:
563 free_page((unsigned long) page); 549 free_page((unsigned long) page);
564 return res; 550 return res;
565} 551}
diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c
index e3eb3556622b..40c36f7352a6 100644
--- a/fs/coda/psdev.c
+++ b/fs/coda/psdev.c
@@ -362,8 +362,9 @@ static int init_coda_psdev(void)
362 goto out_chrdev; 362 goto out_chrdev;
363 } 363 }
364 for (i = 0; i < MAX_CODADEVS; i++) 364 for (i = 0; i < MAX_CODADEVS; i++)
365 device_create(coda_psdev_class, NULL, 365 device_create_drvdata(coda_psdev_class, NULL,
366 MKDEV(CODA_PSDEV_MAJOR,i), "cfs%d", i); 366 MKDEV(CODA_PSDEV_MAJOR, i),
367 NULL, "cfs%d", i);
367 coda_sysctl_init(); 368 coda_sysctl_init();
368 goto out; 369 goto out;
369 370
diff --git a/fs/compat.c b/fs/compat.c
index ed43e17a5dc6..106eba28ec5a 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -197,8 +197,8 @@ static int put_compat_statfs(struct compat_statfs __user *ubuf, struct kstatfs *
197{ 197{
198 198
199 if (sizeof ubuf->f_blocks == 4) { 199 if (sizeof ubuf->f_blocks == 4) {
200 if ((kbuf->f_blocks | kbuf->f_bfree | kbuf->f_bavail) & 200 if ((kbuf->f_blocks | kbuf->f_bfree | kbuf->f_bavail |
201 0xffffffff00000000ULL) 201 kbuf->f_bsize | kbuf->f_frsize) & 0xffffffff00000000ULL)
202 return -EOVERFLOW; 202 return -EOVERFLOW;
203 /* f_files and f_ffree may be -1; it's okay 203 /* f_files and f_ffree may be -1; it's okay
204 * to stuff that into 32 bits */ 204 * to stuff that into 32 bits */
@@ -271,8 +271,8 @@ out:
271static int put_compat_statfs64(struct compat_statfs64 __user *ubuf, struct kstatfs *kbuf) 271static int put_compat_statfs64(struct compat_statfs64 __user *ubuf, struct kstatfs *kbuf)
272{ 272{
273 if (sizeof ubuf->f_blocks == 4) { 273 if (sizeof ubuf->f_blocks == 4) {
274 if ((kbuf->f_blocks | kbuf->f_bfree | kbuf->f_bavail) & 274 if ((kbuf->f_blocks | kbuf->f_bfree | kbuf->f_bavail |
275 0xffffffff00000000ULL) 275 kbuf->f_bsize | kbuf->f_frsize) & 0xffffffff00000000ULL)
276 return -EOVERFLOW; 276 return -EOVERFLOW;
277 /* f_files and f_ffree may be -1; it's okay 277 /* f_files and f_ffree may be -1; it's okay
278 * to stuff that into 32 bits */ 278 * to stuff that into 32 bits */
@@ -2131,9 +2131,9 @@ asmlinkage long compat_sys_epoll_pwait(int epfd,
2131 2131
2132#ifdef CONFIG_SIGNALFD 2132#ifdef CONFIG_SIGNALFD
2133 2133
2134asmlinkage long compat_sys_signalfd(int ufd, 2134asmlinkage long compat_sys_signalfd4(int ufd,
2135 const compat_sigset_t __user *sigmask, 2135 const compat_sigset_t __user *sigmask,
2136 compat_size_t sigsetsize) 2136 compat_size_t sigsetsize, int flags)
2137{ 2137{
2138 compat_sigset_t ss32; 2138 compat_sigset_t ss32;
2139 sigset_t tmp; 2139 sigset_t tmp;
@@ -2148,9 +2148,15 @@ asmlinkage long compat_sys_signalfd(int ufd,
2148 if (copy_to_user(ksigmask, &tmp, sizeof(sigset_t))) 2148 if (copy_to_user(ksigmask, &tmp, sizeof(sigset_t)))
2149 return -EFAULT; 2149 return -EFAULT;
2150 2150
2151 return sys_signalfd(ufd, ksigmask, sizeof(sigset_t)); 2151 return sys_signalfd4(ufd, ksigmask, sizeof(sigset_t), flags);
2152} 2152}
2153 2153
2154asmlinkage long compat_sys_signalfd(int ufd,
2155 const compat_sigset_t __user *sigmask,
2156 compat_size_t sigsetsize)
2157{
2158 return compat_sys_signalfd4(ufd, sigmask, sigsetsize, 0);
2159}
2154#endif /* CONFIG_SIGNALFD */ 2160#endif /* CONFIG_SIGNALFD */
2155 2161
2156#ifdef CONFIG_TIMERFD 2162#ifdef CONFIG_TIMERFD
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 7b3a03c7c6a9..18e2c548161d 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -2297,8 +2297,6 @@ COMPATIBLE_IOCTL(AUTOFS_IOC_PROTOVER)
2297COMPATIBLE_IOCTL(AUTOFS_IOC_EXPIRE) 2297COMPATIBLE_IOCTL(AUTOFS_IOC_EXPIRE)
2298COMPATIBLE_IOCTL(AUTOFS_IOC_EXPIRE_MULTI) 2298COMPATIBLE_IOCTL(AUTOFS_IOC_EXPIRE_MULTI)
2299COMPATIBLE_IOCTL(AUTOFS_IOC_PROTOSUBVER) 2299COMPATIBLE_IOCTL(AUTOFS_IOC_PROTOSUBVER)
2300COMPATIBLE_IOCTL(AUTOFS_IOC_ASKREGHOST)
2301COMPATIBLE_IOCTL(AUTOFS_IOC_TOGGLEREGHOST)
2302COMPATIBLE_IOCTL(AUTOFS_IOC_ASKUMOUNT) 2300COMPATIBLE_IOCTL(AUTOFS_IOC_ASKUMOUNT)
2303/* Raw devices */ 2301/* Raw devices */
2304COMPATIBLE_IOCTL(RAW_SETBIND) 2302COMPATIBLE_IOCTL(RAW_SETBIND)
diff --git a/fs/dcache.c b/fs/dcache.c
index 6068c25b393c..3818d6ab76ca 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -61,7 +61,6 @@ static struct kmem_cache *dentry_cache __read_mostly;
61static unsigned int d_hash_mask __read_mostly; 61static unsigned int d_hash_mask __read_mostly;
62static unsigned int d_hash_shift __read_mostly; 62static unsigned int d_hash_shift __read_mostly;
63static struct hlist_head *dentry_hashtable __read_mostly; 63static struct hlist_head *dentry_hashtable __read_mostly;
64static LIST_HEAD(dentry_unused);
65 64
66/* Statistics gathering. */ 65/* Statistics gathering. */
67struct dentry_stat_t dentry_stat = { 66struct dentry_stat_t dentry_stat = {
@@ -96,14 +95,6 @@ static void d_free(struct dentry *dentry)
96 call_rcu(&dentry->d_u.d_rcu, d_callback); 95 call_rcu(&dentry->d_u.d_rcu, d_callback);
97} 96}
98 97
99static void dentry_lru_remove(struct dentry *dentry)
100{
101 if (!list_empty(&dentry->d_lru)) {
102 list_del_init(&dentry->d_lru);
103 dentry_stat.nr_unused--;
104 }
105}
106
107/* 98/*
108 * Release the dentry's inode, using the filesystem 99 * Release the dentry's inode, using the filesystem
109 * d_iput() operation if defined. 100 * d_iput() operation if defined.
@@ -130,6 +121,41 @@ static void dentry_iput(struct dentry * dentry)
130 } 121 }
131} 122}
132 123
124/*
125 * dentry_lru_(add|add_tail|del|del_init) must be called with dcache_lock held.
126 */
127static void dentry_lru_add(struct dentry *dentry)
128{
129 list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
130 dentry->d_sb->s_nr_dentry_unused++;
131 dentry_stat.nr_unused++;
132}
133
134static void dentry_lru_add_tail(struct dentry *dentry)
135{
136 list_add_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
137 dentry->d_sb->s_nr_dentry_unused++;
138 dentry_stat.nr_unused++;
139}
140
141static void dentry_lru_del(struct dentry *dentry)
142{
143 if (!list_empty(&dentry->d_lru)) {
144 list_del(&dentry->d_lru);
145 dentry->d_sb->s_nr_dentry_unused--;
146 dentry_stat.nr_unused--;
147 }
148}
149
150static void dentry_lru_del_init(struct dentry *dentry)
151{
152 if (likely(!list_empty(&dentry->d_lru))) {
153 list_del_init(&dentry->d_lru);
154 dentry->d_sb->s_nr_dentry_unused--;
155 dentry_stat.nr_unused--;
156 }
157}
158
133/** 159/**
134 * d_kill - kill dentry and return parent 160 * d_kill - kill dentry and return parent
135 * @dentry: dentry to kill 161 * @dentry: dentry to kill
@@ -212,8 +238,7 @@ repeat:
212 goto kill_it; 238 goto kill_it;
213 if (list_empty(&dentry->d_lru)) { 239 if (list_empty(&dentry->d_lru)) {
214 dentry->d_flags |= DCACHE_REFERENCED; 240 dentry->d_flags |= DCACHE_REFERENCED;
215 list_add(&dentry->d_lru, &dentry_unused); 241 dentry_lru_add(dentry);
216 dentry_stat.nr_unused++;
217 } 242 }
218 spin_unlock(&dentry->d_lock); 243 spin_unlock(&dentry->d_lock);
219 spin_unlock(&dcache_lock); 244 spin_unlock(&dcache_lock);
@@ -222,7 +247,8 @@ repeat:
222unhash_it: 247unhash_it:
223 __d_drop(dentry); 248 __d_drop(dentry);
224kill_it: 249kill_it:
225 dentry_lru_remove(dentry); 250 /* if dentry was on the d_lru list delete it from there */
251 dentry_lru_del(dentry);
226 dentry = d_kill(dentry); 252 dentry = d_kill(dentry);
227 if (dentry) 253 if (dentry)
228 goto repeat; 254 goto repeat;
@@ -290,7 +316,7 @@ int d_invalidate(struct dentry * dentry)
290static inline struct dentry * __dget_locked(struct dentry *dentry) 316static inline struct dentry * __dget_locked(struct dentry *dentry)
291{ 317{
292 atomic_inc(&dentry->d_count); 318 atomic_inc(&dentry->d_count);
293 dentry_lru_remove(dentry); 319 dentry_lru_del_init(dentry);
294 return dentry; 320 return dentry;
295} 321}
296 322
@@ -406,133 +432,167 @@ static void prune_one_dentry(struct dentry * dentry)
406 432
407 if (dentry->d_op && dentry->d_op->d_delete) 433 if (dentry->d_op && dentry->d_op->d_delete)
408 dentry->d_op->d_delete(dentry); 434 dentry->d_op->d_delete(dentry);
409 dentry_lru_remove(dentry); 435 dentry_lru_del_init(dentry);
410 __d_drop(dentry); 436 __d_drop(dentry);
411 dentry = d_kill(dentry); 437 dentry = d_kill(dentry);
412 spin_lock(&dcache_lock); 438 spin_lock(&dcache_lock);
413 } 439 }
414} 440}
415 441
416/** 442/*
417 * prune_dcache - shrink the dcache 443 * Shrink the dentry LRU on a given superblock.
418 * @count: number of entries to try and free 444 * @sb : superblock to shrink dentry LRU.
419 * @sb: if given, ignore dentries for other superblocks 445 * @count: If count is NULL, we prune all dentries on superblock.
420 * which are being unmounted. 446 * @flags: If flags is non-zero, we need to do special processing based on
421 * 447 * which flags are set. This means we don't need to maintain multiple
422 * Shrink the dcache. This is done when we need 448 * similar copies of this loop.
423 * more memory, or simply when we need to unmount
424 * something (at which point we need to unuse
425 * all dentries).
426 *
427 * This function may fail to free any resources if
428 * all the dentries are in use.
429 */ 449 */
430 450static void __shrink_dcache_sb(struct super_block *sb, int *count, int flags)
431static void prune_dcache(int count, struct super_block *sb)
432{ 451{
433 spin_lock(&dcache_lock); 452 LIST_HEAD(referenced);
434 for (; count ; count--) { 453 LIST_HEAD(tmp);
435 struct dentry *dentry; 454 struct dentry *dentry;
436 struct list_head *tmp; 455 int cnt = 0;
437 struct rw_semaphore *s_umount;
438
439 cond_resched_lock(&dcache_lock);
440 456
441 tmp = dentry_unused.prev; 457 BUG_ON(!sb);
442 if (sb) { 458 BUG_ON((flags & DCACHE_REFERENCED) && count == NULL);
443 /* Try to find a dentry for this sb, but don't try 459 spin_lock(&dcache_lock);
444 * too hard, if they aren't near the tail they will 460 if (count != NULL)
445 * be moved down again soon 461 /* called from prune_dcache() and shrink_dcache_parent() */
462 cnt = *count;
463restart:
464 if (count == NULL)
465 list_splice_init(&sb->s_dentry_lru, &tmp);
466 else {
467 while (!list_empty(&sb->s_dentry_lru)) {
468 dentry = list_entry(sb->s_dentry_lru.prev,
469 struct dentry, d_lru);
470 BUG_ON(dentry->d_sb != sb);
471
472 spin_lock(&dentry->d_lock);
473 /*
474 * If we are honouring the DCACHE_REFERENCED flag and
475 * the dentry has this flag set, don't free it. Clear
476 * the flag and put it back on the LRU.
446 */ 477 */
447 int skip = count; 478 if ((flags & DCACHE_REFERENCED)
448 while (skip && tmp != &dentry_unused && 479 && (dentry->d_flags & DCACHE_REFERENCED)) {
449 list_entry(tmp, struct dentry, d_lru)->d_sb != sb) { 480 dentry->d_flags &= ~DCACHE_REFERENCED;
450 skip--; 481 list_move_tail(&dentry->d_lru, &referenced);
451 tmp = tmp->prev; 482 spin_unlock(&dentry->d_lock);
483 } else {
484 list_move_tail(&dentry->d_lru, &tmp);
485 spin_unlock(&dentry->d_lock);
486 cnt--;
487 if (!cnt)
488 break;
452 } 489 }
453 } 490 }
454 if (tmp == &dentry_unused) 491 }
455 break; 492 while (!list_empty(&tmp)) {
456 list_del_init(tmp); 493 dentry = list_entry(tmp.prev, struct dentry, d_lru);
457 prefetch(dentry_unused.prev); 494 dentry_lru_del_init(dentry);
458 dentry_stat.nr_unused--; 495 spin_lock(&dentry->d_lock);
459 dentry = list_entry(tmp, struct dentry, d_lru);
460
461 spin_lock(&dentry->d_lock);
462 /* 496 /*
463 * We found an inuse dentry which was not removed from 497 * We found an inuse dentry which was not removed from
464 * dentry_unused because of laziness during lookup. Do not free 498 * the LRU because of laziness during lookup. Do not free
465 * it - just keep it off the dentry_unused list. 499 * it - just keep it off the LRU list.
466 */ 500 */
467 if (atomic_read(&dentry->d_count)) { 501 if (atomic_read(&dentry->d_count)) {
468 spin_unlock(&dentry->d_lock); 502 spin_unlock(&dentry->d_lock);
469 continue; 503 continue;
470 } 504 }
471 /* If the dentry was recently referenced, don't free it. */ 505 prune_one_dentry(dentry);
472 if (dentry->d_flags & DCACHE_REFERENCED) { 506 /* dentry->d_lock was dropped in prune_one_dentry() */
473 dentry->d_flags &= ~DCACHE_REFERENCED; 507 cond_resched_lock(&dcache_lock);
474 list_add(&dentry->d_lru, &dentry_unused); 508 }
475 dentry_stat.nr_unused++; 509 if (count == NULL && !list_empty(&sb->s_dentry_lru))
476 spin_unlock(&dentry->d_lock); 510 goto restart;
511 if (count != NULL)
512 *count = cnt;
513 if (!list_empty(&referenced))
514 list_splice(&referenced, &sb->s_dentry_lru);
515 spin_unlock(&dcache_lock);
516}
517
518/**
519 * prune_dcache - shrink the dcache
520 * @count: number of entries to try to free
521 *
522 * Shrink the dcache. This is done when we need more memory, or simply when we
523 * need to unmount something (at which point we need to unuse all dentries).
524 *
525 * This function may fail to free any resources if all the dentries are in use.
526 */
527static void prune_dcache(int count)
528{
529 struct super_block *sb;
530 int w_count;
531 int unused = dentry_stat.nr_unused;
532 int prune_ratio;
533 int pruned;
534
535 if (unused == 0 || count == 0)
536 return;
537 spin_lock(&dcache_lock);
538restart:
539 if (count >= unused)
540 prune_ratio = 1;
541 else
542 prune_ratio = unused / count;
543 spin_lock(&sb_lock);
544 list_for_each_entry(sb, &super_blocks, s_list) {
545 if (sb->s_nr_dentry_unused == 0)
477 continue; 546 continue;
478 } 547 sb->s_count++;
479 /* 548 /* Now, we reclaim unused dentrins with fairness.
480 * If the dentry is not DCACHED_REFERENCED, it is time 549 * We reclaim them same percentage from each superblock.
481 * to remove it from the dcache, provided the super block is 550 * We calculate number of dentries to scan on this sb
482 * NULL (which means we are trying to reclaim memory) 551 * as follows, but the implementation is arranged to avoid
483 * or this dentry belongs to the same super block that 552 * overflows:
484 * we want to shrink. 553 * number of dentries to scan on this sb =
485 */ 554 * count * (number of dentries on this sb /
486 /* 555 * number of dentries in the machine)
487 * If this dentry is for "my" filesystem, then I can prune it
488 * without taking the s_umount lock (I already hold it).
489 */ 556 */
490 if (sb && dentry->d_sb == sb) { 557 spin_unlock(&sb_lock);
491 prune_one_dentry(dentry); 558 if (prune_ratio != 1)
492 continue; 559 w_count = (sb->s_nr_dentry_unused / prune_ratio) + 1;
493 } 560 else
561 w_count = sb->s_nr_dentry_unused;
562 pruned = w_count;
494 /* 563 /*
495 * ...otherwise we need to be sure this filesystem isn't being 564 * We need to be sure this filesystem isn't being unmounted,
496 * unmounted, otherwise we could race with 565 * otherwise we could race with generic_shutdown_super(), and
497 * generic_shutdown_super(), and end up holding a reference to 566 * end up holding a reference to an inode while the filesystem
498 * an inode while the filesystem is unmounted. 567 * is unmounted. So we try to get s_umount, and make sure
499 * So we try to get s_umount, and make sure s_root isn't NULL. 568 * s_root isn't NULL.
500 * (Take a local copy of s_umount to avoid a use-after-free of
501 * `dentry').
502 */ 569 */
503 s_umount = &dentry->d_sb->s_umount; 570 if (down_read_trylock(&sb->s_umount)) {
504 if (down_read_trylock(s_umount)) { 571 if ((sb->s_root != NULL) &&
505 if (dentry->d_sb->s_root != NULL) { 572 (!list_empty(&sb->s_dentry_lru))) {
506 prune_one_dentry(dentry); 573 spin_unlock(&dcache_lock);
507 up_read(s_umount); 574 __shrink_dcache_sb(sb, &w_count,
508 continue; 575 DCACHE_REFERENCED);
576 pruned -= w_count;
577 spin_lock(&dcache_lock);
509 } 578 }
510 up_read(s_umount); 579 up_read(&sb->s_umount);
511 } 580 }
512 spin_unlock(&dentry->d_lock); 581 spin_lock(&sb_lock);
582 count -= pruned;
513 /* 583 /*
514 * Insert dentry at the head of the list as inserting at the 584 * restart only when sb is no longer on the list and
515 * tail leads to a cycle. 585 * we have more work to do.
516 */ 586 */
517 list_add(&dentry->d_lru, &dentry_unused); 587 if (__put_super_and_need_restart(sb) && count > 0) {
518 dentry_stat.nr_unused++; 588 spin_unlock(&sb_lock);
589 goto restart;
590 }
519 } 591 }
592 spin_unlock(&sb_lock);
520 spin_unlock(&dcache_lock); 593 spin_unlock(&dcache_lock);
521} 594}
522 595
523/*
524 * Shrink the dcache for the specified super block.
525 * This allows us to unmount a device without disturbing
526 * the dcache for the other devices.
527 *
528 * This implementation makes just two traversals of the
529 * unused list. On the first pass we move the selected
530 * dentries to the most recent end, and on the second
531 * pass we free them. The second pass must restart after
532 * each dput(), but since the target dentries are all at
533 * the end, it's really just a single traversal.
534 */
535
536/** 596/**
537 * shrink_dcache_sb - shrink dcache for a superblock 597 * shrink_dcache_sb - shrink dcache for a superblock
538 * @sb: superblock 598 * @sb: superblock
@@ -541,44 +601,9 @@ static void prune_dcache(int count, struct super_block *sb)
541 * is used to free the dcache before unmounting a file 601 * is used to free the dcache before unmounting a file
542 * system 602 * system
543 */ 603 */
544
545void shrink_dcache_sb(struct super_block * sb) 604void shrink_dcache_sb(struct super_block * sb)
546{ 605{
547 struct list_head *tmp, *next; 606 __shrink_dcache_sb(sb, NULL, 0);
548 struct dentry *dentry;
549
550 /*
551 * Pass one ... move the dentries for the specified
552 * superblock to the most recent end of the unused list.
553 */
554 spin_lock(&dcache_lock);
555 list_for_each_prev_safe(tmp, next, &dentry_unused) {
556 dentry = list_entry(tmp, struct dentry, d_lru);
557 if (dentry->d_sb != sb)
558 continue;
559 list_move_tail(tmp, &dentry_unused);
560 }
561
562 /*
563 * Pass two ... free the dentries for this superblock.
564 */
565repeat:
566 list_for_each_prev_safe(tmp, next, &dentry_unused) {
567 dentry = list_entry(tmp, struct dentry, d_lru);
568 if (dentry->d_sb != sb)
569 continue;
570 dentry_stat.nr_unused--;
571 list_del_init(tmp);
572 spin_lock(&dentry->d_lock);
573 if (atomic_read(&dentry->d_count)) {
574 spin_unlock(&dentry->d_lock);
575 continue;
576 }
577 prune_one_dentry(dentry);
578 cond_resched_lock(&dcache_lock);
579 goto repeat;
580 }
581 spin_unlock(&dcache_lock);
582} 607}
583 608
584/* 609/*
@@ -595,7 +620,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
595 620
596 /* detach this root from the system */ 621 /* detach this root from the system */
597 spin_lock(&dcache_lock); 622 spin_lock(&dcache_lock);
598 dentry_lru_remove(dentry); 623 dentry_lru_del_init(dentry);
599 __d_drop(dentry); 624 __d_drop(dentry);
600 spin_unlock(&dcache_lock); 625 spin_unlock(&dcache_lock);
601 626
@@ -609,7 +634,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
609 spin_lock(&dcache_lock); 634 spin_lock(&dcache_lock);
610 list_for_each_entry(loop, &dentry->d_subdirs, 635 list_for_each_entry(loop, &dentry->d_subdirs,
611 d_u.d_child) { 636 d_u.d_child) {
612 dentry_lru_remove(loop); 637 dentry_lru_del_init(loop);
613 __d_drop(loop); 638 __d_drop(loop);
614 cond_resched_lock(&dcache_lock); 639 cond_resched_lock(&dcache_lock);
615 } 640 }
@@ -791,14 +816,13 @@ resume:
791 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); 816 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
792 next = tmp->next; 817 next = tmp->next;
793 818
794 dentry_lru_remove(dentry); 819 dentry_lru_del_init(dentry);
795 /* 820 /*
796 * move only zero ref count dentries to the end 821 * move only zero ref count dentries to the end
797 * of the unused list for prune_dcache 822 * of the unused list for prune_dcache
798 */ 823 */
799 if (!atomic_read(&dentry->d_count)) { 824 if (!atomic_read(&dentry->d_count)) {
800 list_add_tail(&dentry->d_lru, &dentry_unused); 825 dentry_lru_add_tail(dentry);
801 dentry_stat.nr_unused++;
802 found++; 826 found++;
803 } 827 }
804 828
@@ -840,10 +864,11 @@ out:
840 864
841void shrink_dcache_parent(struct dentry * parent) 865void shrink_dcache_parent(struct dentry * parent)
842{ 866{
867 struct super_block *sb = parent->d_sb;
843 int found; 868 int found;
844 869
845 while ((found = select_parent(parent)) != 0) 870 while ((found = select_parent(parent)) != 0)
846 prune_dcache(found, parent->d_sb); 871 __shrink_dcache_sb(sb, &found, 0);
847} 872}
848 873
849/* 874/*
@@ -863,7 +888,7 @@ static int shrink_dcache_memory(int nr, gfp_t gfp_mask)
863 if (nr) { 888 if (nr) {
864 if (!(gfp_mask & __GFP_FS)) 889 if (!(gfp_mask & __GFP_FS))
865 return -1; 890 return -1;
866 prune_dcache(nr, NULL); 891 prune_dcache(nr);
867 } 892 }
868 return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure; 893 return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
869} 894}
@@ -1215,7 +1240,7 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
1215 * rcu_read_lock() and rcu_read_unlock() are used to disable preemption while 1240 * rcu_read_lock() and rcu_read_unlock() are used to disable preemption while
1216 * lookup is going on. 1241 * lookup is going on.
1217 * 1242 *
1218 * dentry_unused list is not updated even if lookup finds the required dentry 1243 * The dentry unused LRU is not updated even if lookup finds the required dentry
1219 * in there. It is updated in places such as prune_dcache, shrink_dcache_sb, 1244 * in there. It is updated in places such as prune_dcache, shrink_dcache_sb,
1220 * select_parent and __dget_locked. This laziness saves lookup from dcache_lock 1245 * select_parent and __dget_locked. This laziness saves lookup from dcache_lock
1221 * acquisition. 1246 * acquisition.
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index e9602d85c11d..08e28c9bb416 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -309,6 +309,31 @@ struct dentry *debugfs_create_symlink(const char *name, struct dentry *parent,
309} 309}
310EXPORT_SYMBOL_GPL(debugfs_create_symlink); 310EXPORT_SYMBOL_GPL(debugfs_create_symlink);
311 311
312static void __debugfs_remove(struct dentry *dentry, struct dentry *parent)
313{
314 int ret = 0;
315
316 if (debugfs_positive(dentry)) {
317 if (dentry->d_inode) {
318 dget(dentry);
319 switch (dentry->d_inode->i_mode & S_IFMT) {
320 case S_IFDIR:
321 ret = simple_rmdir(parent->d_inode, dentry);
322 break;
323 case S_IFLNK:
324 kfree(dentry->d_inode->i_private);
325 /* fall through */
326 default:
327 simple_unlink(parent->d_inode, dentry);
328 break;
329 }
330 if (!ret)
331 d_delete(dentry);
332 dput(dentry);
333 }
334 }
335}
336
312/** 337/**
313 * debugfs_remove - removes a file or directory from the debugfs filesystem 338 * debugfs_remove - removes a file or directory from the debugfs filesystem
314 * @dentry: a pointer to a the dentry of the file or directory to be 339 * @dentry: a pointer to a the dentry of the file or directory to be
@@ -325,7 +350,6 @@ EXPORT_SYMBOL_GPL(debugfs_create_symlink);
325void debugfs_remove(struct dentry *dentry) 350void debugfs_remove(struct dentry *dentry)
326{ 351{
327 struct dentry *parent; 352 struct dentry *parent;
328 int ret = 0;
329 353
330 if (!dentry) 354 if (!dentry)
331 return; 355 return;
@@ -335,29 +359,83 @@ void debugfs_remove(struct dentry *dentry)
335 return; 359 return;
336 360
337 mutex_lock(&parent->d_inode->i_mutex); 361 mutex_lock(&parent->d_inode->i_mutex);
338 if (debugfs_positive(dentry)) { 362 __debugfs_remove(dentry, parent);
339 if (dentry->d_inode) { 363 mutex_unlock(&parent->d_inode->i_mutex);
340 dget(dentry); 364 simple_release_fs(&debugfs_mount, &debugfs_mount_count);
341 switch (dentry->d_inode->i_mode & S_IFMT) { 365}
342 case S_IFDIR: 366EXPORT_SYMBOL_GPL(debugfs_remove);
343 ret = simple_rmdir(parent->d_inode, dentry); 367
344 break; 368/**
345 case S_IFLNK: 369 * debugfs_remove_recursive - recursively removes a directory
346 kfree(dentry->d_inode->i_private); 370 * @dentry: a pointer to a the dentry of the directory to be removed.
347 /* fall through */ 371 *
348 default: 372 * This function recursively removes a directory tree in debugfs that
349 simple_unlink(parent->d_inode, dentry); 373 * was previously created with a call to another debugfs function
374 * (like debugfs_create_file() or variants thereof.)
375 *
376 * This function is required to be called in order for the file to be
377 * removed, no automatic cleanup of files will happen when a module is
378 * removed, you are responsible here.
379 */
380void debugfs_remove_recursive(struct dentry *dentry)
381{
382 struct dentry *child;
383 struct dentry *parent;
384
385 if (!dentry)
386 return;
387
388 parent = dentry->d_parent;
389 if (!parent || !parent->d_inode)
390 return;
391
392 parent = dentry;
393 mutex_lock(&parent->d_inode->i_mutex);
394
395 while (1) {
396 /*
397 * When all dentries under "parent" has been removed,
398 * walk up the tree until we reach our starting point.
399 */
400 if (list_empty(&parent->d_subdirs)) {
401 mutex_unlock(&parent->d_inode->i_mutex);
402 if (parent == dentry)
350 break; 403 break;
351 } 404 parent = parent->d_parent;
352 if (!ret) 405 mutex_lock(&parent->d_inode->i_mutex);
353 d_delete(dentry); 406 }
354 dput(dentry); 407 child = list_entry(parent->d_subdirs.next, struct dentry,
408 d_u.d_child);
409
410 /*
411 * If "child" isn't empty, walk down the tree and
412 * remove all its descendants first.
413 */
414 if (!list_empty(&child->d_subdirs)) {
415 mutex_unlock(&parent->d_inode->i_mutex);
416 parent = child;
417 mutex_lock(&parent->d_inode->i_mutex);
418 continue;
355 } 419 }
420 __debugfs_remove(child, parent);
421 if (parent->d_subdirs.next == &child->d_u.d_child) {
422 /*
423 * Avoid infinite loop if we fail to remove
424 * one dentry.
425 */
426 mutex_unlock(&parent->d_inode->i_mutex);
427 break;
428 }
429 simple_release_fs(&debugfs_mount, &debugfs_mount_count);
356 } 430 }
431
432 parent = dentry->d_parent;
433 mutex_lock(&parent->d_inode->i_mutex);
434 __debugfs_remove(dentry, parent);
357 mutex_unlock(&parent->d_inode->i_mutex); 435 mutex_unlock(&parent->d_inode->i_mutex);
358 simple_release_fs(&debugfs_mount, &debugfs_mount_count); 436 simple_release_fs(&debugfs_mount, &debugfs_mount_count);
359} 437}
360EXPORT_SYMBOL_GPL(debugfs_remove); 438EXPORT_SYMBOL_GPL(debugfs_remove_recursive);
361 439
362/** 440/**
363 * debugfs_rename - rename a file/directory in the debugfs filesystem 441 * debugfs_rename - rename a file/directory in the debugfs filesystem
diff --git a/fs/ecryptfs/Makefile b/fs/ecryptfs/Makefile
index 1e34a7fd4884..b4755a85996e 100644
--- a/fs/ecryptfs/Makefile
+++ b/fs/ecryptfs/Makefile
@@ -4,4 +4,4 @@
4 4
5obj-$(CONFIG_ECRYPT_FS) += ecryptfs.o 5obj-$(CONFIG_ECRYPT_FS) += ecryptfs.o
6 6
7ecryptfs-objs := dentry.o file.o inode.o main.o super.o mmap.o read_write.o crypto.o keystore.o messaging.o netlink.o miscdev.o debug.o 7ecryptfs-objs := dentry.o file.o inode.o main.o super.o mmap.o read_write.o crypto.o keystore.o messaging.o netlink.o miscdev.o kthread.o debug.o
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index e2832bc7869a..7b99917ffadc 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -33,6 +33,7 @@
33#include <linux/crypto.h> 33#include <linux/crypto.h>
34#include <linux/file.h> 34#include <linux/file.h>
35#include <linux/scatterlist.h> 35#include <linux/scatterlist.h>
36#include <asm/unaligned.h>
36#include "ecryptfs_kernel.h" 37#include "ecryptfs_kernel.h"
37 38
38static int 39static int
@@ -1032,10 +1033,8 @@ static int contains_ecryptfs_marker(char *data)
1032{ 1033{
1033 u32 m_1, m_2; 1034 u32 m_1, m_2;
1034 1035
1035 memcpy(&m_1, data, 4); 1036 m_1 = get_unaligned_be32(data);
1036 m_1 = be32_to_cpu(m_1); 1037 m_2 = get_unaligned_be32(data + 4);
1037 memcpy(&m_2, (data + 4), 4);
1038 m_2 = be32_to_cpu(m_2);
1039 if ((m_1 ^ MAGIC_ECRYPTFS_MARKER) == m_2) 1038 if ((m_1 ^ MAGIC_ECRYPTFS_MARKER) == m_2)
1040 return 1; 1039 return 1;
1041 ecryptfs_printk(KERN_DEBUG, "m_1 = [0x%.8x]; m_2 = [0x%.8x]; " 1040 ecryptfs_printk(KERN_DEBUG, "m_1 = [0x%.8x]; m_2 = [0x%.8x]; "
@@ -1073,8 +1072,7 @@ static int ecryptfs_process_flags(struct ecryptfs_crypt_stat *crypt_stat,
1073 int i; 1072 int i;
1074 u32 flags; 1073 u32 flags;
1075 1074
1076 memcpy(&flags, page_virt, 4); 1075 flags = get_unaligned_be32(page_virt);
1077 flags = be32_to_cpu(flags);
1078 for (i = 0; i < ((sizeof(ecryptfs_flag_map) 1076 for (i = 0; i < ((sizeof(ecryptfs_flag_map)
1079 / sizeof(struct ecryptfs_flag_map_elem))); i++) 1077 / sizeof(struct ecryptfs_flag_map_elem))); i++)
1080 if (flags & ecryptfs_flag_map[i].file_flag) { 1078 if (flags & ecryptfs_flag_map[i].file_flag) {
@@ -1100,11 +1098,9 @@ static void write_ecryptfs_marker(char *page_virt, size_t *written)
1100 1098
1101 get_random_bytes(&m_1, (MAGIC_ECRYPTFS_MARKER_SIZE_BYTES / 2)); 1099 get_random_bytes(&m_1, (MAGIC_ECRYPTFS_MARKER_SIZE_BYTES / 2));
1102 m_2 = (m_1 ^ MAGIC_ECRYPTFS_MARKER); 1100 m_2 = (m_1 ^ MAGIC_ECRYPTFS_MARKER);
1103 m_1 = cpu_to_be32(m_1); 1101 put_unaligned_be32(m_1, page_virt);
1104 memcpy(page_virt, &m_1, (MAGIC_ECRYPTFS_MARKER_SIZE_BYTES / 2)); 1102 page_virt += (MAGIC_ECRYPTFS_MARKER_SIZE_BYTES / 2);
1105 m_2 = cpu_to_be32(m_2); 1103 put_unaligned_be32(m_2, page_virt);
1106 memcpy(page_virt + (MAGIC_ECRYPTFS_MARKER_SIZE_BYTES / 2), &m_2,
1107 (MAGIC_ECRYPTFS_MARKER_SIZE_BYTES / 2));
1108 (*written) = MAGIC_ECRYPTFS_MARKER_SIZE_BYTES; 1104 (*written) = MAGIC_ECRYPTFS_MARKER_SIZE_BYTES;
1109} 1105}
1110 1106
@@ -1121,8 +1117,7 @@ write_ecryptfs_flags(char *page_virt, struct ecryptfs_crypt_stat *crypt_stat,
1121 flags |= ecryptfs_flag_map[i].file_flag; 1117 flags |= ecryptfs_flag_map[i].file_flag;
1122 /* Version is in top 8 bits of the 32-bit flag vector */ 1118 /* Version is in top 8 bits of the 32-bit flag vector */
1123 flags |= ((((u8)crypt_stat->file_version) << 24) & 0xFF000000); 1119 flags |= ((((u8)crypt_stat->file_version) << 24) & 0xFF000000);
1124 flags = cpu_to_be32(flags); 1120 put_unaligned_be32(flags, page_virt);
1125 memcpy(page_virt, &flags, 4);
1126 (*written) = 4; 1121 (*written) = 4;
1127} 1122}
1128 1123
@@ -1238,11 +1233,9 @@ ecryptfs_write_header_metadata(char *virt,
1238 num_header_extents_at_front = 1233 num_header_extents_at_front =
1239 (u16)(crypt_stat->num_header_bytes_at_front 1234 (u16)(crypt_stat->num_header_bytes_at_front
1240 / crypt_stat->extent_size); 1235 / crypt_stat->extent_size);
1241 header_extent_size = cpu_to_be32(header_extent_size); 1236 put_unaligned_be32(header_extent_size, virt);
1242 memcpy(virt, &header_extent_size, 4);
1243 virt += 4; 1237 virt += 4;
1244 num_header_extents_at_front = cpu_to_be16(num_header_extents_at_front); 1238 put_unaligned_be16(num_header_extents_at_front, virt);
1245 memcpy(virt, &num_header_extents_at_front, 2);
1246 (*written) = 6; 1239 (*written) = 6;
1247} 1240}
1248 1241
@@ -1410,15 +1403,13 @@ static int parse_header_metadata(struct ecryptfs_crypt_stat *crypt_stat,
1410 u32 header_extent_size; 1403 u32 header_extent_size;
1411 u16 num_header_extents_at_front; 1404 u16 num_header_extents_at_front;
1412 1405
1413 memcpy(&header_extent_size, virt, sizeof(u32)); 1406 header_extent_size = get_unaligned_be32(virt);
1414 header_extent_size = be32_to_cpu(header_extent_size); 1407 virt += sizeof(__be32);
1415 virt += sizeof(u32); 1408 num_header_extents_at_front = get_unaligned_be16(virt);
1416 memcpy(&num_header_extents_at_front, virt, sizeof(u16));
1417 num_header_extents_at_front = be16_to_cpu(num_header_extents_at_front);
1418 crypt_stat->num_header_bytes_at_front = 1409 crypt_stat->num_header_bytes_at_front =
1419 (((size_t)num_header_extents_at_front 1410 (((size_t)num_header_extents_at_front
1420 * (size_t)header_extent_size)); 1411 * (size_t)header_extent_size));
1421 (*bytes_read) = (sizeof(u32) + sizeof(u16)); 1412 (*bytes_read) = (sizeof(__be32) + sizeof(__be16));
1422 if ((validate_header_size == ECRYPTFS_VALIDATE_HEADER_SIZE) 1413 if ((validate_header_size == ECRYPTFS_VALIDATE_HEADER_SIZE)
1423 && (crypt_stat->num_header_bytes_at_front 1414 && (crypt_stat->num_header_bytes_at_front
1424 < ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE)) { 1415 < ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE)) {
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index c15c25745e05..b73fb752c5f8 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -559,10 +559,25 @@ extern struct kmem_cache *ecryptfs_key_record_cache;
559extern struct kmem_cache *ecryptfs_key_sig_cache; 559extern struct kmem_cache *ecryptfs_key_sig_cache;
560extern struct kmem_cache *ecryptfs_global_auth_tok_cache; 560extern struct kmem_cache *ecryptfs_global_auth_tok_cache;
561extern struct kmem_cache *ecryptfs_key_tfm_cache; 561extern struct kmem_cache *ecryptfs_key_tfm_cache;
562extern struct kmem_cache *ecryptfs_open_req_cache;
562 563
564struct ecryptfs_open_req {
565#define ECRYPTFS_REQ_PROCESSED 0x00000001
566#define ECRYPTFS_REQ_DROPPED 0x00000002
567#define ECRYPTFS_REQ_ZOMBIE 0x00000004
568 u32 flags;
569 struct file **lower_file;
570 struct dentry *lower_dentry;
571 struct vfsmount *lower_mnt;
572 wait_queue_head_t wait;
573 struct mutex mux;
574 struct list_head kthread_ctl_list;
575};
576
577#define ECRYPTFS_INTERPOSE_FLAG_D_ADD 0x00000001
563int ecryptfs_interpose(struct dentry *hidden_dentry, 578int ecryptfs_interpose(struct dentry *hidden_dentry,
564 struct dentry *this_dentry, struct super_block *sb, 579 struct dentry *this_dentry, struct super_block *sb,
565 int flag); 580 u32 flags);
566int ecryptfs_fill_zeros(struct file *file, loff_t new_length); 581int ecryptfs_fill_zeros(struct file *file, loff_t new_length);
567int ecryptfs_decode_filename(struct ecryptfs_crypt_stat *crypt_stat, 582int ecryptfs_decode_filename(struct ecryptfs_crypt_stat *crypt_stat,
568 const char *name, int length, 583 const char *name, int length,
@@ -690,5 +705,11 @@ void ecryptfs_msg_ctx_alloc_to_free(struct ecryptfs_msg_ctx *msg_ctx);
690int 705int
691ecryptfs_spawn_daemon(struct ecryptfs_daemon **daemon, uid_t euid, 706ecryptfs_spawn_daemon(struct ecryptfs_daemon **daemon, uid_t euid,
692 struct user_namespace *user_ns, struct pid *pid); 707 struct user_namespace *user_ns, struct pid *pid);
708int ecryptfs_init_kthread(void);
709void ecryptfs_destroy_kthread(void);
710int ecryptfs_privileged_open(struct file **lower_file,
711 struct dentry *lower_dentry,
712 struct vfsmount *lower_mnt);
713int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry);
693 714
694#endif /* #ifndef ECRYPTFS_KERNEL_H */ 715#endif /* #ifndef ECRYPTFS_KERNEL_H */
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 24749bf0668f..9244d653743e 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -192,6 +192,23 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
192 | ECRYPTFS_ENCRYPTED); 192 | ECRYPTFS_ENCRYPTED);
193 } 193 }
194 mutex_unlock(&crypt_stat->cs_mutex); 194 mutex_unlock(&crypt_stat->cs_mutex);
195 if ((ecryptfs_inode_to_private(inode)->lower_file->f_flags & O_RDONLY)
196 && !(file->f_flags & O_RDONLY)) {
197 rc = -EPERM;
198 printk(KERN_WARNING "%s: Lower persistent file is RO; eCryptfs "
199 "file must hence be opened RO\n", __func__);
200 goto out;
201 }
202 if (!ecryptfs_inode_to_private(inode)->lower_file) {
203 rc = ecryptfs_init_persistent_file(ecryptfs_dentry);
204 if (rc) {
205 printk(KERN_ERR "%s: Error attempting to initialize "
206 "the persistent file for the dentry with name "
207 "[%s]; rc = [%d]\n", __func__,
208 ecryptfs_dentry->d_name.name, rc);
209 goto out;
210 }
211 }
195 ecryptfs_set_file_lower( 212 ecryptfs_set_file_lower(
196 file, ecryptfs_inode_to_private(inode)->lower_file); 213 file, ecryptfs_inode_to_private(inode)->lower_file);
197 if (S_ISDIR(ecryptfs_dentry->d_inode->i_mode)) { 214 if (S_ISDIR(ecryptfs_dentry->d_inode->i_mode)) {
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index c92cc1c00aae..d755455e3bff 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -31,6 +31,7 @@
31#include <linux/mount.h> 31#include <linux/mount.h>
32#include <linux/crypto.h> 32#include <linux/crypto.h>
33#include <linux/fs_stack.h> 33#include <linux/fs_stack.h>
34#include <asm/unaligned.h>
34#include "ecryptfs_kernel.h" 35#include "ecryptfs_kernel.h"
35 36
36static struct dentry *lock_parent(struct dentry *dentry) 37static struct dentry *lock_parent(struct dentry *dentry)
@@ -188,6 +189,16 @@ static int ecryptfs_initialize_file(struct dentry *ecryptfs_dentry)
188 "context; rc = [%d]\n", rc); 189 "context; rc = [%d]\n", rc);
189 goto out; 190 goto out;
190 } 191 }
192 if (!ecryptfs_inode_to_private(ecryptfs_dentry->d_inode)->lower_file) {
193 rc = ecryptfs_init_persistent_file(ecryptfs_dentry);
194 if (rc) {
195 printk(KERN_ERR "%s: Error attempting to initialize "
196 "the persistent file for the dentry with name "
197 "[%s]; rc = [%d]\n", __func__,
198 ecryptfs_dentry->d_name.name, rc);
199 goto out;
200 }
201 }
191 rc = ecryptfs_write_metadata(ecryptfs_dentry); 202 rc = ecryptfs_write_metadata(ecryptfs_dentry);
192 if (rc) { 203 if (rc) {
193 printk(KERN_ERR "Error writing headers; rc = [%d]\n", rc); 204 printk(KERN_ERR "Error writing headers; rc = [%d]\n", rc);
@@ -307,10 +318,11 @@ static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry,
307 d_add(dentry, NULL); 318 d_add(dentry, NULL);
308 goto out; 319 goto out;
309 } 320 }
310 rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb, 1); 321 rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb,
322 ECRYPTFS_INTERPOSE_FLAG_D_ADD);
311 if (rc) { 323 if (rc) {
312 ecryptfs_printk(KERN_ERR, "Error interposing\n"); 324 ecryptfs_printk(KERN_ERR, "Error interposing\n");
313 goto out_dput; 325 goto out;
314 } 326 }
315 if (S_ISDIR(lower_inode->i_mode)) { 327 if (S_ISDIR(lower_inode->i_mode)) {
316 ecryptfs_printk(KERN_DEBUG, "Is a directory; returning\n"); 328 ecryptfs_printk(KERN_DEBUG, "Is a directory; returning\n");
@@ -336,11 +348,21 @@ static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry,
336 rc = -ENOMEM; 348 rc = -ENOMEM;
337 ecryptfs_printk(KERN_ERR, 349 ecryptfs_printk(KERN_ERR,
338 "Cannot ecryptfs_kmalloc a page\n"); 350 "Cannot ecryptfs_kmalloc a page\n");
339 goto out_dput; 351 goto out;
340 } 352 }
341 crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat; 353 crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat;
342 if (!(crypt_stat->flags & ECRYPTFS_POLICY_APPLIED)) 354 if (!(crypt_stat->flags & ECRYPTFS_POLICY_APPLIED))
343 ecryptfs_set_default_sizes(crypt_stat); 355 ecryptfs_set_default_sizes(crypt_stat);
356 if (!ecryptfs_inode_to_private(dentry->d_inode)->lower_file) {
357 rc = ecryptfs_init_persistent_file(dentry);
358 if (rc) {
359 printk(KERN_ERR "%s: Error attempting to initialize "
360 "the persistent file for the dentry with name "
361 "[%s]; rc = [%d]\n", __func__,
362 dentry->d_name.name, rc);
363 goto out;
364 }
365 }
344 rc = ecryptfs_read_and_validate_header_region(page_virt, 366 rc = ecryptfs_read_and_validate_header_region(page_virt,
345 dentry->d_inode); 367 dentry->d_inode);
346 if (rc) { 368 if (rc) {
@@ -364,8 +386,7 @@ static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry,
364 else 386 else
365 file_size = i_size_read(lower_dentry->d_inode); 387 file_size = i_size_read(lower_dentry->d_inode);
366 } else { 388 } else {
367 memcpy(&file_size, page_virt, sizeof(file_size)); 389 file_size = get_unaligned_be64(page_virt);
368 file_size = be64_to_cpu(file_size);
369 } 390 }
370 i_size_write(dentry->d_inode, (loff_t)file_size); 391 i_size_write(dentry->d_inode, (loff_t)file_size);
371 kmem_cache_free(ecryptfs_header_cache_2, page_virt); 392 kmem_cache_free(ecryptfs_header_cache_2, page_virt);
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index e82b457180be..f5b76a331b9c 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -44,15 +44,15 @@ static int process_request_key_err(long err_code)
44 int rc = 0; 44 int rc = 0;
45 45
46 switch (err_code) { 46 switch (err_code) {
47 case ENOKEY: 47 case -ENOKEY:
48 ecryptfs_printk(KERN_WARNING, "No key\n"); 48 ecryptfs_printk(KERN_WARNING, "No key\n");
49 rc = -ENOENT; 49 rc = -ENOENT;
50 break; 50 break;
51 case EKEYEXPIRED: 51 case -EKEYEXPIRED:
52 ecryptfs_printk(KERN_WARNING, "Key expired\n"); 52 ecryptfs_printk(KERN_WARNING, "Key expired\n");
53 rc = -ETIME; 53 rc = -ETIME;
54 break; 54 break;
55 case EKEYREVOKED: 55 case -EKEYREVOKED:
56 ecryptfs_printk(KERN_WARNING, "Key revoked\n"); 56 ecryptfs_printk(KERN_WARNING, "Key revoked\n");
57 rc = -EINVAL; 57 rc = -EINVAL;
58 break; 58 break;
@@ -963,8 +963,7 @@ int ecryptfs_keyring_auth_tok_for_sig(struct key **auth_tok_key,
963 if (!(*auth_tok_key) || IS_ERR(*auth_tok_key)) { 963 if (!(*auth_tok_key) || IS_ERR(*auth_tok_key)) {
964 printk(KERN_ERR "Could not find key with description: [%s]\n", 964 printk(KERN_ERR "Could not find key with description: [%s]\n",
965 sig); 965 sig);
966 process_request_key_err(PTR_ERR(*auth_tok_key)); 966 rc = process_request_key_err(PTR_ERR(*auth_tok_key));
967 rc = -EINVAL;
968 goto out; 967 goto out;
969 } 968 }
970 (*auth_tok) = ecryptfs_get_key_payload_data(*auth_tok_key); 969 (*auth_tok) = ecryptfs_get_key_payload_data(*auth_tok_key);
diff --git a/fs/ecryptfs/kthread.c b/fs/ecryptfs/kthread.c
new file mode 100644
index 000000000000..c440c6b58b2d
--- /dev/null
+++ b/fs/ecryptfs/kthread.c
@@ -0,0 +1,203 @@
1/**
2 * eCryptfs: Linux filesystem encryption layer
3 *
4 * Copyright (C) 2008 International Business Machines Corp.
5 * Author(s): Michael A. Halcrow <mahalcro@us.ibm.com>
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License as
9 * published by the Free Software Foundation; either version 2 of the
10 * License, or (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
20 * 02111-1307, USA.
21 */
22
23#include <linux/kthread.h>
24#include <linux/freezer.h>
25#include <linux/wait.h>
26#include <linux/mount.h>
27#include "ecryptfs_kernel.h"
28
29struct kmem_cache *ecryptfs_open_req_cache;
30
31static struct ecryptfs_kthread_ctl {
32#define ECRYPTFS_KTHREAD_ZOMBIE 0x00000001
33 u32 flags;
34 struct mutex mux;
35 struct list_head req_list;
36 wait_queue_head_t wait;
37} ecryptfs_kthread_ctl;
38
39static struct task_struct *ecryptfs_kthread;
40
41/**
42 * ecryptfs_threadfn
43 * @ignored: ignored
44 *
45 * The eCryptfs kernel thread that has the responsibility of getting
46 * the lower persistent file with RW permissions.
47 *
48 * Returns zero on success; non-zero otherwise
49 */
50static int ecryptfs_threadfn(void *ignored)
51{
52 set_freezable();
53 while (1) {
54 struct ecryptfs_open_req *req;
55
56 wait_event_freezable(
57 ecryptfs_kthread_ctl.wait,
58 (!list_empty(&ecryptfs_kthread_ctl.req_list)
59 || kthread_should_stop()));
60 mutex_lock(&ecryptfs_kthread_ctl.mux);
61 if (ecryptfs_kthread_ctl.flags & ECRYPTFS_KTHREAD_ZOMBIE) {
62 mutex_unlock(&ecryptfs_kthread_ctl.mux);
63 goto out;
64 }
65 while (!list_empty(&ecryptfs_kthread_ctl.req_list)) {
66 req = list_first_entry(&ecryptfs_kthread_ctl.req_list,
67 struct ecryptfs_open_req,
68 kthread_ctl_list);
69 mutex_lock(&req->mux);
70 list_del(&req->kthread_ctl_list);
71 if (!(req->flags & ECRYPTFS_REQ_ZOMBIE)) {
72 dget(req->lower_dentry);
73 mntget(req->lower_mnt);
74 (*req->lower_file) = dentry_open(
75 req->lower_dentry, req->lower_mnt,
76 (O_RDWR | O_LARGEFILE));
77 req->flags |= ECRYPTFS_REQ_PROCESSED;
78 }
79 wake_up(&req->wait);
80 mutex_unlock(&req->mux);
81 }
82 mutex_unlock(&ecryptfs_kthread_ctl.mux);
83 }
84out:
85 return 0;
86}
87
88int ecryptfs_init_kthread(void)
89{
90 int rc = 0;
91
92 mutex_init(&ecryptfs_kthread_ctl.mux);
93 init_waitqueue_head(&ecryptfs_kthread_ctl.wait);
94 INIT_LIST_HEAD(&ecryptfs_kthread_ctl.req_list);
95 ecryptfs_kthread = kthread_run(&ecryptfs_threadfn, NULL,
96 "ecryptfs-kthread");
97 if (IS_ERR(ecryptfs_kthread)) {
98 rc = PTR_ERR(ecryptfs_kthread);
99 printk(KERN_ERR "%s: Failed to create kernel thread; rc = [%d]"
100 "\n", __func__, rc);
101 }
102 return rc;
103}
104
105void ecryptfs_destroy_kthread(void)
106{
107 struct ecryptfs_open_req *req;
108
109 mutex_lock(&ecryptfs_kthread_ctl.mux);
110 ecryptfs_kthread_ctl.flags |= ECRYPTFS_KTHREAD_ZOMBIE;
111 list_for_each_entry(req, &ecryptfs_kthread_ctl.req_list,
112 kthread_ctl_list) {
113 mutex_lock(&req->mux);
114 req->flags |= ECRYPTFS_REQ_ZOMBIE;
115 wake_up(&req->wait);
116 mutex_unlock(&req->mux);
117 }
118 mutex_unlock(&ecryptfs_kthread_ctl.mux);
119 kthread_stop(ecryptfs_kthread);
120 wake_up(&ecryptfs_kthread_ctl.wait);
121}
122
123/**
124 * ecryptfs_privileged_open
125 * @lower_file: Result of dentry_open by root on lower dentry
126 * @lower_dentry: Lower dentry for file to open
127 * @lower_mnt: Lower vfsmount for file to open
128 *
129 * This function gets a r/w file opened againt the lower dentry.
130 *
131 * Returns zero on success; non-zero otherwise
132 */
133int ecryptfs_privileged_open(struct file **lower_file,
134 struct dentry *lower_dentry,
135 struct vfsmount *lower_mnt)
136{
137 struct ecryptfs_open_req *req;
138 int rc = 0;
139
140 /* Corresponding dput() and mntput() are done when the
141 * persistent file is fput() when the eCryptfs inode is
142 * destroyed. */
143 dget(lower_dentry);
144 mntget(lower_mnt);
145 (*lower_file) = dentry_open(lower_dentry, lower_mnt,
146 (O_RDWR | O_LARGEFILE));
147 if (!IS_ERR(*lower_file))
148 goto out;
149 req = kmem_cache_alloc(ecryptfs_open_req_cache, GFP_KERNEL);
150 if (!req) {
151 rc = -ENOMEM;
152 goto out;
153 }
154 mutex_init(&req->mux);
155 req->lower_file = lower_file;
156 req->lower_dentry = lower_dentry;
157 req->lower_mnt = lower_mnt;
158 init_waitqueue_head(&req->wait);
159 req->flags = 0;
160 mutex_lock(&ecryptfs_kthread_ctl.mux);
161 if (ecryptfs_kthread_ctl.flags & ECRYPTFS_KTHREAD_ZOMBIE) {
162 rc = -EIO;
163 mutex_unlock(&ecryptfs_kthread_ctl.mux);
164 printk(KERN_ERR "%s: We are in the middle of shutting down; "
165 "aborting privileged request to open lower file\n",
166 __func__);
167 goto out_free;
168 }
169 list_add_tail(&req->kthread_ctl_list, &ecryptfs_kthread_ctl.req_list);
170 mutex_unlock(&ecryptfs_kthread_ctl.mux);
171 wake_up(&ecryptfs_kthread_ctl.wait);
172 wait_event(req->wait, (req->flags != 0));
173 mutex_lock(&req->mux);
174 BUG_ON(req->flags == 0);
175 if (req->flags & ECRYPTFS_REQ_DROPPED
176 || req->flags & ECRYPTFS_REQ_ZOMBIE) {
177 rc = -EIO;
178 printk(KERN_WARNING "%s: Privileged open request dropped\n",
179 __func__);
180 goto out_unlock;
181 }
182 if (IS_ERR(*req->lower_file)) {
183 rc = PTR_ERR(*req->lower_file);
184 dget(lower_dentry);
185 mntget(lower_mnt);
186 (*lower_file) = dentry_open(lower_dentry, lower_mnt,
187 (O_RDONLY | O_LARGEFILE));
188 if (IS_ERR(*lower_file)) {
189 rc = PTR_ERR(*req->lower_file);
190 (*lower_file) = NULL;
191 printk(KERN_WARNING "%s: Error attempting privileged "
192 "open of lower file with either RW or RO "
193 "perms; rc = [%d]. Giving up.\n",
194 __func__, rc);
195 }
196 }
197out_unlock:
198 mutex_unlock(&req->mux);
199out_free:
200 kmem_cache_free(ecryptfs_open_req_cache, req);
201out:
202 return rc;
203}
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index d603631601eb..6f403cfba14f 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -117,7 +117,7 @@ void __ecryptfs_printk(const char *fmt, ...)
117 * 117 *
118 * Returns zero on success; non-zero otherwise 118 * Returns zero on success; non-zero otherwise
119 */ 119 */
120static int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry) 120int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry)
121{ 121{
122 struct ecryptfs_inode_info *inode_info = 122 struct ecryptfs_inode_info *inode_info =
123 ecryptfs_inode_to_private(ecryptfs_dentry->d_inode); 123 ecryptfs_inode_to_private(ecryptfs_dentry->d_inode);
@@ -130,26 +130,12 @@ static int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry)
130 ecryptfs_dentry_to_lower_mnt(ecryptfs_dentry); 130 ecryptfs_dentry_to_lower_mnt(ecryptfs_dentry);
131 131
132 lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry); 132 lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry);
133 /* Corresponding dput() and mntput() are done when the 133 rc = ecryptfs_privileged_open(&inode_info->lower_file,
134 * persistent file is fput() when the eCryptfs inode 134 lower_dentry, lower_mnt);
135 * is destroyed. */ 135 if (rc || IS_ERR(inode_info->lower_file)) {
136 dget(lower_dentry);
137 mntget(lower_mnt);
138 inode_info->lower_file = dentry_open(lower_dentry,
139 lower_mnt,
140 (O_RDWR | O_LARGEFILE));
141 if (IS_ERR(inode_info->lower_file)) {
142 dget(lower_dentry);
143 mntget(lower_mnt);
144 inode_info->lower_file = dentry_open(lower_dentry,
145 lower_mnt,
146 (O_RDONLY
147 | O_LARGEFILE));
148 }
149 if (IS_ERR(inode_info->lower_file)) {
150 printk(KERN_ERR "Error opening lower persistent file " 136 printk(KERN_ERR "Error opening lower persistent file "
151 "for lower_dentry [0x%p] and lower_mnt [0x%p]\n", 137 "for lower_dentry [0x%p] and lower_mnt [0x%p]; "
152 lower_dentry, lower_mnt); 138 "rc = [%d]\n", lower_dentry, lower_mnt, rc);
153 rc = PTR_ERR(inode_info->lower_file); 139 rc = PTR_ERR(inode_info->lower_file);
154 inode_info->lower_file = NULL; 140 inode_info->lower_file = NULL;
155 } 141 }
@@ -163,14 +149,14 @@ static int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry)
163 * @lower_dentry: Existing dentry in the lower filesystem 149 * @lower_dentry: Existing dentry in the lower filesystem
164 * @dentry: ecryptfs' dentry 150 * @dentry: ecryptfs' dentry
165 * @sb: ecryptfs's super_block 151 * @sb: ecryptfs's super_block
166 * @flag: If set to true, then d_add is called, else d_instantiate is called 152 * @flags: flags to govern behavior of interpose procedure
167 * 153 *
168 * Interposes upper and lower dentries. 154 * Interposes upper and lower dentries.
169 * 155 *
170 * Returns zero on success; non-zero otherwise 156 * Returns zero on success; non-zero otherwise
171 */ 157 */
172int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry, 158int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry,
173 struct super_block *sb, int flag) 159 struct super_block *sb, u32 flags)
174{ 160{
175 struct inode *lower_inode; 161 struct inode *lower_inode;
176 struct inode *inode; 162 struct inode *inode;
@@ -207,7 +193,7 @@ int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry,
207 init_special_inode(inode, lower_inode->i_mode, 193 init_special_inode(inode, lower_inode->i_mode,
208 lower_inode->i_rdev); 194 lower_inode->i_rdev);
209 dentry->d_op = &ecryptfs_dops; 195 dentry->d_op = &ecryptfs_dops;
210 if (flag) 196 if (flags & ECRYPTFS_INTERPOSE_FLAG_D_ADD)
211 d_add(dentry, inode); 197 d_add(dentry, inode);
212 else 198 else
213 d_instantiate(dentry, inode); 199 d_instantiate(dentry, inode);
@@ -215,13 +201,6 @@ int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry,
215 /* This size will be overwritten for real files w/ headers and 201 /* This size will be overwritten for real files w/ headers and
216 * other metadata */ 202 * other metadata */
217 fsstack_copy_inode_size(inode, lower_inode); 203 fsstack_copy_inode_size(inode, lower_inode);
218 rc = ecryptfs_init_persistent_file(dentry);
219 if (rc) {
220 printk(KERN_ERR "%s: Error attempting to initialize the "
221 "persistent file for the dentry with name [%s]; "
222 "rc = [%d]\n", __func__, dentry->d_name.name, rc);
223 goto out;
224 }
225out: 204out:
226 return rc; 205 return rc;
227} 206}
@@ -262,10 +241,11 @@ static int ecryptfs_init_global_auth_toks(
262 "session keyring for sig specified in mount " 241 "session keyring for sig specified in mount "
263 "option: [%s]\n", global_auth_tok->sig); 242 "option: [%s]\n", global_auth_tok->sig);
264 global_auth_tok->flags |= ECRYPTFS_AUTH_TOK_INVALID; 243 global_auth_tok->flags |= ECRYPTFS_AUTH_TOK_INVALID;
265 rc = 0; 244 goto out;
266 } else 245 } else
267 global_auth_tok->flags &= ~ECRYPTFS_AUTH_TOK_INVALID; 246 global_auth_tok->flags &= ~ECRYPTFS_AUTH_TOK_INVALID;
268 } 247 }
248out:
269 return rc; 249 return rc;
270} 250}
271 251
@@ -314,7 +294,6 @@ static int ecryptfs_parse_options(struct super_block *sb, char *options)
314 char *cipher_name_dst; 294 char *cipher_name_dst;
315 char *cipher_name_src; 295 char *cipher_name_src;
316 char *cipher_key_bytes_src; 296 char *cipher_key_bytes_src;
317 int cipher_name_len;
318 297
319 if (!options) { 298 if (!options) {
320 rc = -EINVAL; 299 rc = -EINVAL;
@@ -395,17 +374,12 @@ static int ecryptfs_parse_options(struct super_block *sb, char *options)
395 goto out; 374 goto out;
396 } 375 }
397 if (!cipher_name_set) { 376 if (!cipher_name_set) {
398 cipher_name_len = strlen(ECRYPTFS_DEFAULT_CIPHER); 377 int cipher_name_len = strlen(ECRYPTFS_DEFAULT_CIPHER);
399 if (unlikely(cipher_name_len 378
400 >= ECRYPTFS_MAX_CIPHER_NAME_SIZE)) { 379 BUG_ON(cipher_name_len >= ECRYPTFS_MAX_CIPHER_NAME_SIZE);
401 rc = -EINVAL; 380
402 BUG(); 381 strcpy(mount_crypt_stat->global_default_cipher_name,
403 goto out; 382 ECRYPTFS_DEFAULT_CIPHER);
404 }
405 memcpy(mount_crypt_stat->global_default_cipher_name,
406 ECRYPTFS_DEFAULT_CIPHER, cipher_name_len);
407 mount_crypt_stat->global_default_cipher_name[cipher_name_len]
408 = '\0';
409 } 383 }
410 if (!cipher_key_bytes_set) { 384 if (!cipher_key_bytes_set) {
411 mount_crypt_stat->global_default_cipher_key_size = 0; 385 mount_crypt_stat->global_default_cipher_key_size = 0;
@@ -430,7 +404,6 @@ static int ecryptfs_parse_options(struct super_block *sb, char *options)
430 printk(KERN_WARNING "One or more global auth toks could not " 404 printk(KERN_WARNING "One or more global auth toks could not "
431 "properly register; rc = [%d]\n", rc); 405 "properly register; rc = [%d]\n", rc);
432 } 406 }
433 rc = 0;
434out: 407out:
435 return rc; 408 return rc;
436} 409}
@@ -679,6 +652,11 @@ static struct ecryptfs_cache_info {
679 .name = "ecryptfs_key_tfm_cache", 652 .name = "ecryptfs_key_tfm_cache",
680 .size = sizeof(struct ecryptfs_key_tfm), 653 .size = sizeof(struct ecryptfs_key_tfm),
681 }, 654 },
655 {
656 .cache = &ecryptfs_open_req_cache,
657 .name = "ecryptfs_open_req_cache",
658 .size = sizeof(struct ecryptfs_open_req),
659 },
682}; 660};
683 661
684static void ecryptfs_free_kmem_caches(void) 662static void ecryptfs_free_kmem_caches(void)
@@ -795,11 +773,17 @@ static int __init ecryptfs_init(void)
795 printk(KERN_ERR "sysfs registration failed\n"); 773 printk(KERN_ERR "sysfs registration failed\n");
796 goto out_unregister_filesystem; 774 goto out_unregister_filesystem;
797 } 775 }
776 rc = ecryptfs_init_kthread();
777 if (rc) {
778 printk(KERN_ERR "%s: kthread initialization failed; "
779 "rc = [%d]\n", __func__, rc);
780 goto out_do_sysfs_unregistration;
781 }
798 rc = ecryptfs_init_messaging(ecryptfs_transport); 782 rc = ecryptfs_init_messaging(ecryptfs_transport);
799 if (rc) { 783 if (rc) {
800 ecryptfs_printk(KERN_ERR, "Failure occured while attempting to " 784 printk(KERN_ERR "Failure occured while attempting to "
801 "initialize the eCryptfs netlink socket\n"); 785 "initialize the eCryptfs netlink socket\n");
802 goto out_do_sysfs_unregistration; 786 goto out_destroy_kthread;
803 } 787 }
804 rc = ecryptfs_init_crypto(); 788 rc = ecryptfs_init_crypto();
805 if (rc) { 789 if (rc) {
@@ -814,6 +798,8 @@ static int __init ecryptfs_init(void)
814 goto out; 798 goto out;
815out_release_messaging: 799out_release_messaging:
816 ecryptfs_release_messaging(ecryptfs_transport); 800 ecryptfs_release_messaging(ecryptfs_transport);
801out_destroy_kthread:
802 ecryptfs_destroy_kthread();
817out_do_sysfs_unregistration: 803out_do_sysfs_unregistration:
818 do_sysfs_unregistration(); 804 do_sysfs_unregistration();
819out_unregister_filesystem: 805out_unregister_filesystem:
@@ -833,6 +819,7 @@ static void __exit ecryptfs_exit(void)
833 printk(KERN_ERR "Failure whilst attempting to destroy crypto; " 819 printk(KERN_ERR "Failure whilst attempting to destroy crypto; "
834 "rc = [%d]\n", rc); 820 "rc = [%d]\n", rc);
835 ecryptfs_release_messaging(ecryptfs_transport); 821 ecryptfs_release_messaging(ecryptfs_transport);
822 ecryptfs_destroy_kthread();
836 do_sysfs_unregistration(); 823 do_sysfs_unregistration();
837 unregister_filesystem(&ecryptfs_fs_type); 824 unregister_filesystem(&ecryptfs_fs_type);
838 ecryptfs_free_kmem_caches(); 825 ecryptfs_free_kmem_caches();
diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c
index 09a4522f65e6..b484792a0996 100644
--- a/fs/ecryptfs/miscdev.c
+++ b/fs/ecryptfs/miscdev.c
@@ -358,46 +358,6 @@ out_unlock_daemon:
358} 358}
359 359
360/** 360/**
361 * ecryptfs_miscdev_helo
362 * @euid: effective user id of miscdevess sending helo packet
363 * @user_ns: The namespace in which @euid applies
364 * @pid: miscdevess id of miscdevess sending helo packet
365 *
366 * Returns zero on success; non-zero otherwise
367 */
368static int ecryptfs_miscdev_helo(uid_t euid, struct user_namespace *user_ns,
369 struct pid *pid)
370{
371 int rc;
372
373 rc = ecryptfs_process_helo(ECRYPTFS_TRANSPORT_MISCDEV, euid, user_ns,
374 pid);
375 if (rc)
376 printk(KERN_WARNING "Error processing HELO; rc = [%d]\n", rc);
377 return rc;
378}
379
380/**
381 * ecryptfs_miscdev_quit
382 * @euid: effective user id of miscdevess sending quit packet
383 * @user_ns: The namespace in which @euid applies
384 * @pid: miscdevess id of miscdevess sending quit packet
385 *
386 * Returns zero on success; non-zero otherwise
387 */
388static int ecryptfs_miscdev_quit(uid_t euid, struct user_namespace *user_ns,
389 struct pid *pid)
390{
391 int rc;
392
393 rc = ecryptfs_process_quit(euid, user_ns, pid);
394 if (rc)
395 printk(KERN_WARNING
396 "Error processing QUIT message; rc = [%d]\n", rc);
397 return rc;
398}
399
400/**
401 * ecryptfs_miscdev_response - miscdevess response to message previously sent to daemon 361 * ecryptfs_miscdev_response - miscdevess response to message previously sent to daemon
402 * @data: Bytes comprising struct ecryptfs_message 362 * @data: Bytes comprising struct ecryptfs_message
403 * @data_size: sizeof(struct ecryptfs_message) + data len 363 * @data_size: sizeof(struct ecryptfs_message) + data len
@@ -512,26 +472,7 @@ ecryptfs_miscdev_write(struct file *file, const char __user *buf,
512 __func__, rc); 472 __func__, rc);
513 break; 473 break;
514 case ECRYPTFS_MSG_HELO: 474 case ECRYPTFS_MSG_HELO:
515 rc = ecryptfs_miscdev_helo(current->euid,
516 current->nsproxy->user_ns,
517 task_pid(current));
518 if (rc) {
519 printk(KERN_ERR "%s: Error attempting to process "
520 "helo from pid [0x%p]; rc = [%d]\n", __func__,
521 task_pid(current), rc);
522 goto out_free;
523 }
524 break;
525 case ECRYPTFS_MSG_QUIT: 475 case ECRYPTFS_MSG_QUIT:
526 rc = ecryptfs_miscdev_quit(current->euid,
527 current->nsproxy->user_ns,
528 task_pid(current));
529 if (rc) {
530 printk(KERN_ERR "%s: Error attempting to process "
531 "quit from pid [0x%p]; rc = [%d]\n", __func__,
532 task_pid(current), rc);
533 goto out_free;
534 }
535 break; 476 break;
536 default: 477 default:
537 ecryptfs_printk(KERN_WARNING, "Dropping miscdev " 478 ecryptfs_printk(KERN_WARNING, "Dropping miscdev "
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index 2b6fe1e6e8ba..245c2dc02d5c 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -32,6 +32,7 @@
32#include <linux/file.h> 32#include <linux/file.h>
33#include <linux/crypto.h> 33#include <linux/crypto.h>
34#include <linux/scatterlist.h> 34#include <linux/scatterlist.h>
35#include <asm/unaligned.h>
35#include "ecryptfs_kernel.h" 36#include "ecryptfs_kernel.h"
36 37
37/** 38/**
@@ -372,7 +373,6 @@ out:
372 */ 373 */
373static int ecryptfs_write_inode_size_to_header(struct inode *ecryptfs_inode) 374static int ecryptfs_write_inode_size_to_header(struct inode *ecryptfs_inode)
374{ 375{
375 u64 file_size;
376 char *file_size_virt; 376 char *file_size_virt;
377 int rc; 377 int rc;
378 378
@@ -381,9 +381,7 @@ static int ecryptfs_write_inode_size_to_header(struct inode *ecryptfs_inode)
381 rc = -ENOMEM; 381 rc = -ENOMEM;
382 goto out; 382 goto out;
383 } 383 }
384 file_size = (u64)i_size_read(ecryptfs_inode); 384 put_unaligned_be64(i_size_read(ecryptfs_inode), file_size_virt);
385 file_size = cpu_to_be64(file_size);
386 memcpy(file_size_virt, &file_size, sizeof(u64));
387 rc = ecryptfs_write_lower(ecryptfs_inode, file_size_virt, 0, 385 rc = ecryptfs_write_lower(ecryptfs_inode, file_size_virt, 0,
388 sizeof(u64)); 386 sizeof(u64));
389 kfree(file_size_virt); 387 kfree(file_size_virt);
@@ -403,7 +401,6 @@ static int ecryptfs_write_inode_size_to_xattr(struct inode *ecryptfs_inode)
403 struct dentry *lower_dentry = 401 struct dentry *lower_dentry =
404 ecryptfs_inode_to_private(ecryptfs_inode)->lower_file->f_dentry; 402 ecryptfs_inode_to_private(ecryptfs_inode)->lower_file->f_dentry;
405 struct inode *lower_inode = lower_dentry->d_inode; 403 struct inode *lower_inode = lower_dentry->d_inode;
406 u64 file_size;
407 int rc; 404 int rc;
408 405
409 if (!lower_inode->i_op->getxattr || !lower_inode->i_op->setxattr) { 406 if (!lower_inode->i_op->getxattr || !lower_inode->i_op->setxattr) {
@@ -424,9 +421,7 @@ static int ecryptfs_write_inode_size_to_xattr(struct inode *ecryptfs_inode)
424 xattr_virt, PAGE_CACHE_SIZE); 421 xattr_virt, PAGE_CACHE_SIZE);
425 if (size < 0) 422 if (size < 0)
426 size = 8; 423 size = 8;
427 file_size = (u64)i_size_read(ecryptfs_inode); 424 put_unaligned_be64(i_size_read(ecryptfs_inode), xattr_virt);
428 file_size = cpu_to_be64(file_size);
429 memcpy(xattr_virt, &file_size, sizeof(u64));
430 rc = lower_inode->i_op->setxattr(lower_dentry, ECRYPTFS_XATTR_NAME, 425 rc = lower_inode->i_op->setxattr(lower_dentry, ECRYPTFS_XATTR_NAME,
431 xattr_virt, size, 0); 426 xattr_virt, size, 0);
432 mutex_unlock(&lower_inode->i_mutex); 427 mutex_unlock(&lower_inode->i_mutex);
diff --git a/fs/eventfd.c b/fs/eventfd.c
index 343942deeec1..08bf558d0408 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -198,11 +198,18 @@ struct file *eventfd_fget(int fd)
198 return file; 198 return file;
199} 199}
200 200
201asmlinkage long sys_eventfd(unsigned int count) 201asmlinkage long sys_eventfd2(unsigned int count, int flags)
202{ 202{
203 int fd; 203 int fd;
204 struct eventfd_ctx *ctx; 204 struct eventfd_ctx *ctx;
205 205
206 /* Check the EFD_* constants for consistency. */
207 BUILD_BUG_ON(EFD_CLOEXEC != O_CLOEXEC);
208 BUILD_BUG_ON(EFD_NONBLOCK != O_NONBLOCK);
209
210 if (flags & ~(EFD_CLOEXEC | EFD_NONBLOCK))
211 return -EINVAL;
212
206 ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); 213 ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
207 if (!ctx) 214 if (!ctx)
208 return -ENOMEM; 215 return -ENOMEM;
@@ -214,9 +221,15 @@ asmlinkage long sys_eventfd(unsigned int count)
214 * When we call this, the initialization must be complete, since 221 * When we call this, the initialization must be complete, since
215 * anon_inode_getfd() will install the fd. 222 * anon_inode_getfd() will install the fd.
216 */ 223 */
217 fd = anon_inode_getfd("[eventfd]", &eventfd_fops, ctx); 224 fd = anon_inode_getfd("[eventfd]", &eventfd_fops, ctx,
225 flags & (O_CLOEXEC | O_NONBLOCK));
218 if (fd < 0) 226 if (fd < 0)
219 kfree(ctx); 227 kfree(ctx);
220 return fd; 228 return fd;
221} 229}
222 230
231asmlinkage long sys_eventfd(unsigned int count)
232{
233 return sys_eventfd2(count, 0);
234}
235
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 990c01d2d66b..0c87474f7917 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1046,20 +1046,25 @@ retry:
1046 * RB tree. With the current implementation, the "size" parameter is ignored 1046 * RB tree. With the current implementation, the "size" parameter is ignored
1047 * (besides sanity checks). 1047 * (besides sanity checks).
1048 */ 1048 */
1049asmlinkage long sys_epoll_create(int size) 1049asmlinkage long sys_epoll_create1(int flags)
1050{ 1050{
1051 int error, fd = -1; 1051 int error, fd = -1;
1052 struct eventpoll *ep; 1052 struct eventpoll *ep;
1053 1053
1054 /* Check the EPOLL_* constant for consistency. */
1055 BUILD_BUG_ON(EPOLL_CLOEXEC != O_CLOEXEC);
1056
1057 if (flags & ~EPOLL_CLOEXEC)
1058 return -EINVAL;
1059
1054 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d)\n", 1060 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d)\n",
1055 current, size)); 1061 current, flags));
1056 1062
1057 /* 1063 /*
1058 * Sanity check on the size parameter, and create the internal data 1064 * Create the internal data structure ( "struct eventpoll" ).
1059 * structure ( "struct eventpoll" ).
1060 */ 1065 */
1061 error = -EINVAL; 1066 error = ep_alloc(&ep);
1062 if (size <= 0 || (error = ep_alloc(&ep)) < 0) { 1067 if (error < 0) {
1063 fd = error; 1068 fd = error;
1064 goto error_return; 1069 goto error_return;
1065 } 1070 }
@@ -1068,17 +1073,26 @@ asmlinkage long sys_epoll_create(int size)
1068 * Creates all the items needed to setup an eventpoll file. That is, 1073 * Creates all the items needed to setup an eventpoll file. That is,
1069 * a file structure and a free file descriptor. 1074 * a file structure and a free file descriptor.
1070 */ 1075 */
1071 fd = anon_inode_getfd("[eventpoll]", &eventpoll_fops, ep); 1076 fd = anon_inode_getfd("[eventpoll]", &eventpoll_fops, ep,
1077 flags & O_CLOEXEC);
1072 if (fd < 0) 1078 if (fd < 0)
1073 ep_free(ep); 1079 ep_free(ep);
1074 1080
1075error_return: 1081error_return:
1076 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n", 1082 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n",
1077 current, size, fd)); 1083 current, flags, fd));
1078 1084
1079 return fd; 1085 return fd;
1080} 1086}
1081 1087
1088asmlinkage long sys_epoll_create(int size)
1089{
1090 if (size < 0)
1091 return -EINVAL;
1092
1093 return sys_epoll_create1(0);
1094}
1095
1082/* 1096/*
1083 * The following function implements the controller interface for 1097 * The following function implements the controller interface for
1084 * the eventpoll file that enables the insertion/removal/change of 1098 * the eventpoll file that enables the insertion/removal/change of
diff --git a/fs/exec.c b/fs/exec.c
index fd9234379e8d..190ed1f92774 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -541,7 +541,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
541 /* 541 /*
542 * when the old and new regions overlap clear from new_end. 542 * when the old and new regions overlap clear from new_end.
543 */ 543 */
544 free_pgd_range(&tlb, new_end, old_end, new_end, 544 free_pgd_range(tlb, new_end, old_end, new_end,
545 vma->vm_next ? vma->vm_next->vm_start : 0); 545 vma->vm_next ? vma->vm_next->vm_start : 0);
546 } else { 546 } else {
547 /* 547 /*
@@ -550,7 +550,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
550 * have constraints on va-space that make this illegal (IA64) - 550 * have constraints on va-space that make this illegal (IA64) -
551 * for the others its just a little faster. 551 * for the others its just a little faster.
552 */ 552 */
553 free_pgd_range(&tlb, old_start, old_end, new_end, 553 free_pgd_range(tlb, old_start, old_end, new_end,
554 vma->vm_next ? vma->vm_next->vm_start : 0); 554 vma->vm_next ? vma->vm_next->vm_start : 0);
555 } 555 }
556 tlb_finish_mmu(tlb, new_end, old_end); 556 tlb_finish_mmu(tlb, new_end, old_end);
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 330a7d782591..9679fcbdeaa0 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -125,13 +125,16 @@ static int dupfd(struct file *file, unsigned int start, int cloexec)
125 return fd; 125 return fd;
126} 126}
127 127
128asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd) 128asmlinkage long sys_dup3(unsigned int oldfd, unsigned int newfd, int flags)
129{ 129{
130 int err = -EBADF; 130 int err = -EBADF;
131 struct file * file, *tofree; 131 struct file * file, *tofree;
132 struct files_struct * files = current->files; 132 struct files_struct * files = current->files;
133 struct fdtable *fdt; 133 struct fdtable *fdt;
134 134
135 if ((flags & ~O_CLOEXEC) != 0)
136 return -EINVAL;
137
135 spin_lock(&files->file_lock); 138 spin_lock(&files->file_lock);
136 if (!(file = fcheck(oldfd))) 139 if (!(file = fcheck(oldfd)))
137 goto out_unlock; 140 goto out_unlock;
@@ -163,7 +166,10 @@ asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd)
163 166
164 rcu_assign_pointer(fdt->fd[newfd], file); 167 rcu_assign_pointer(fdt->fd[newfd], file);
165 FD_SET(newfd, fdt->open_fds); 168 FD_SET(newfd, fdt->open_fds);
166 FD_CLR(newfd, fdt->close_on_exec); 169 if (flags & O_CLOEXEC)
170 FD_SET(newfd, fdt->close_on_exec);
171 else
172 FD_CLR(newfd, fdt->close_on_exec);
167 spin_unlock(&files->file_lock); 173 spin_unlock(&files->file_lock);
168 174
169 if (tofree) 175 if (tofree)
@@ -181,6 +187,11 @@ out_fput:
181 goto out; 187 goto out;
182} 188}
183 189
190asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd)
191{
192 return sys_dup3(oldfd, newfd, 0);
193}
194
184asmlinkage long sys_dup(unsigned int fildes) 195asmlinkage long sys_dup(unsigned int fildes)
185{ 196{
186 int ret = -EBADF; 197 int ret = -EBADF;
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index aeabf80f81a5..dbd01d262ca4 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -53,6 +53,7 @@ int sysctl_hugetlb_shm_group;
53enum { 53enum {
54 Opt_size, Opt_nr_inodes, 54 Opt_size, Opt_nr_inodes,
55 Opt_mode, Opt_uid, Opt_gid, 55 Opt_mode, Opt_uid, Opt_gid,
56 Opt_pagesize,
56 Opt_err, 57 Opt_err,
57}; 58};
58 59
@@ -62,6 +63,7 @@ static match_table_t tokens = {
62 {Opt_mode, "mode=%o"}, 63 {Opt_mode, "mode=%o"},
63 {Opt_uid, "uid=%u"}, 64 {Opt_uid, "uid=%u"},
64 {Opt_gid, "gid=%u"}, 65 {Opt_gid, "gid=%u"},
66 {Opt_pagesize, "pagesize=%s"},
65 {Opt_err, NULL}, 67 {Opt_err, NULL},
66}; 68};
67 69
@@ -80,6 +82,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
80 struct inode *inode = file->f_path.dentry->d_inode; 82 struct inode *inode = file->f_path.dentry->d_inode;
81 loff_t len, vma_len; 83 loff_t len, vma_len;
82 int ret; 84 int ret;
85 struct hstate *h = hstate_file(file);
83 86
84 /* 87 /*
85 * vma address alignment (but not the pgoff alignment) has 88 * vma address alignment (but not the pgoff alignment) has
@@ -92,7 +95,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
92 vma->vm_flags |= VM_HUGETLB | VM_RESERVED; 95 vma->vm_flags |= VM_HUGETLB | VM_RESERVED;
93 vma->vm_ops = &hugetlb_vm_ops; 96 vma->vm_ops = &hugetlb_vm_ops;
94 97
95 if (vma->vm_pgoff & ~(HPAGE_MASK >> PAGE_SHIFT)) 98 if (vma->vm_pgoff & ~(huge_page_mask(h) >> PAGE_SHIFT))
96 return -EINVAL; 99 return -EINVAL;
97 100
98 vma_len = (loff_t)(vma->vm_end - vma->vm_start); 101 vma_len = (loff_t)(vma->vm_end - vma->vm_start);
@@ -103,9 +106,9 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
103 ret = -ENOMEM; 106 ret = -ENOMEM;
104 len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); 107 len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
105 108
106 if (vma->vm_flags & VM_MAYSHARE && 109 if (hugetlb_reserve_pages(inode,
107 hugetlb_reserve_pages(inode, vma->vm_pgoff >> (HPAGE_SHIFT-PAGE_SHIFT), 110 vma->vm_pgoff >> huge_page_order(h),
108 len >> HPAGE_SHIFT)) 111 len >> huge_page_shift(h), vma))
109 goto out; 112 goto out;
110 113
111 ret = 0; 114 ret = 0;
@@ -130,20 +133,21 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
130 struct mm_struct *mm = current->mm; 133 struct mm_struct *mm = current->mm;
131 struct vm_area_struct *vma; 134 struct vm_area_struct *vma;
132 unsigned long start_addr; 135 unsigned long start_addr;
136 struct hstate *h = hstate_file(file);
133 137
134 if (len & ~HPAGE_MASK) 138 if (len & ~huge_page_mask(h))
135 return -EINVAL; 139 return -EINVAL;
136 if (len > TASK_SIZE) 140 if (len > TASK_SIZE)
137 return -ENOMEM; 141 return -ENOMEM;
138 142
139 if (flags & MAP_FIXED) { 143 if (flags & MAP_FIXED) {
140 if (prepare_hugepage_range(addr, len)) 144 if (prepare_hugepage_range(file, addr, len))
141 return -EINVAL; 145 return -EINVAL;
142 return addr; 146 return addr;
143 } 147 }
144 148
145 if (addr) { 149 if (addr) {
146 addr = ALIGN(addr, HPAGE_SIZE); 150 addr = ALIGN(addr, huge_page_size(h));
147 vma = find_vma(mm, addr); 151 vma = find_vma(mm, addr);
148 if (TASK_SIZE - len >= addr && 152 if (TASK_SIZE - len >= addr &&
149 (!vma || addr + len <= vma->vm_start)) 153 (!vma || addr + len <= vma->vm_start))
@@ -156,7 +160,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
156 start_addr = TASK_UNMAPPED_BASE; 160 start_addr = TASK_UNMAPPED_BASE;
157 161
158full_search: 162full_search:
159 addr = ALIGN(start_addr, HPAGE_SIZE); 163 addr = ALIGN(start_addr, huge_page_size(h));
160 164
161 for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { 165 for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
162 /* At this point: (!vma || addr < vma->vm_end). */ 166 /* At this point: (!vma || addr < vma->vm_end). */
@@ -174,7 +178,7 @@ full_search:
174 178
175 if (!vma || addr + len <= vma->vm_start) 179 if (!vma || addr + len <= vma->vm_start)
176 return addr; 180 return addr;
177 addr = ALIGN(vma->vm_end, HPAGE_SIZE); 181 addr = ALIGN(vma->vm_end, huge_page_size(h));
178 } 182 }
179} 183}
180#endif 184#endif
@@ -225,10 +229,11 @@ hugetlbfs_read_actor(struct page *page, unsigned long offset,
225static ssize_t hugetlbfs_read(struct file *filp, char __user *buf, 229static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
226 size_t len, loff_t *ppos) 230 size_t len, loff_t *ppos)
227{ 231{
232 struct hstate *h = hstate_file(filp);
228 struct address_space *mapping = filp->f_mapping; 233 struct address_space *mapping = filp->f_mapping;
229 struct inode *inode = mapping->host; 234 struct inode *inode = mapping->host;
230 unsigned long index = *ppos >> HPAGE_SHIFT; 235 unsigned long index = *ppos >> huge_page_shift(h);
231 unsigned long offset = *ppos & ~HPAGE_MASK; 236 unsigned long offset = *ppos & ~huge_page_mask(h);
232 unsigned long end_index; 237 unsigned long end_index;
233 loff_t isize; 238 loff_t isize;
234 ssize_t retval = 0; 239 ssize_t retval = 0;
@@ -243,17 +248,17 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
243 if (!isize) 248 if (!isize)
244 goto out; 249 goto out;
245 250
246 end_index = (isize - 1) >> HPAGE_SHIFT; 251 end_index = (isize - 1) >> huge_page_shift(h);
247 for (;;) { 252 for (;;) {
248 struct page *page; 253 struct page *page;
249 int nr, ret; 254 unsigned long nr, ret;
250 255
251 /* nr is the maximum number of bytes to copy from this page */ 256 /* nr is the maximum number of bytes to copy from this page */
252 nr = HPAGE_SIZE; 257 nr = huge_page_size(h);
253 if (index >= end_index) { 258 if (index >= end_index) {
254 if (index > end_index) 259 if (index > end_index)
255 goto out; 260 goto out;
256 nr = ((isize - 1) & ~HPAGE_MASK) + 1; 261 nr = ((isize - 1) & ~huge_page_mask(h)) + 1;
257 if (nr <= offset) { 262 if (nr <= offset) {
258 goto out; 263 goto out;
259 } 264 }
@@ -287,8 +292,8 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
287 offset += ret; 292 offset += ret;
288 retval += ret; 293 retval += ret;
289 len -= ret; 294 len -= ret;
290 index += offset >> HPAGE_SHIFT; 295 index += offset >> huge_page_shift(h);
291 offset &= ~HPAGE_MASK; 296 offset &= ~huge_page_mask(h);
292 297
293 if (page) 298 if (page)
294 page_cache_release(page); 299 page_cache_release(page);
@@ -298,7 +303,7 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
298 break; 303 break;
299 } 304 }
300out: 305out:
301 *ppos = ((loff_t)index << HPAGE_SHIFT) + offset; 306 *ppos = ((loff_t)index << huge_page_shift(h)) + offset;
302 mutex_unlock(&inode->i_mutex); 307 mutex_unlock(&inode->i_mutex);
303 return retval; 308 return retval;
304} 309}
@@ -339,8 +344,9 @@ static void truncate_huge_page(struct page *page)
339 344
340static void truncate_hugepages(struct inode *inode, loff_t lstart) 345static void truncate_hugepages(struct inode *inode, loff_t lstart)
341{ 346{
347 struct hstate *h = hstate_inode(inode);
342 struct address_space *mapping = &inode->i_data; 348 struct address_space *mapping = &inode->i_data;
343 const pgoff_t start = lstart >> HPAGE_SHIFT; 349 const pgoff_t start = lstart >> huge_page_shift(h);
344 struct pagevec pvec; 350 struct pagevec pvec;
345 pgoff_t next; 351 pgoff_t next;
346 int i, freed = 0; 352 int i, freed = 0;
@@ -441,7 +447,7 @@ hugetlb_vmtruncate_list(struct prio_tree_root *root, pgoff_t pgoff)
441 v_offset = 0; 447 v_offset = 0;
442 448
443 __unmap_hugepage_range(vma, 449 __unmap_hugepage_range(vma,
444 vma->vm_start + v_offset, vma->vm_end); 450 vma->vm_start + v_offset, vma->vm_end, NULL);
445 } 451 }
446} 452}
447 453
@@ -449,8 +455,9 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset)
449{ 455{
450 pgoff_t pgoff; 456 pgoff_t pgoff;
451 struct address_space *mapping = inode->i_mapping; 457 struct address_space *mapping = inode->i_mapping;
458 struct hstate *h = hstate_inode(inode);
452 459
453 BUG_ON(offset & ~HPAGE_MASK); 460 BUG_ON(offset & ~huge_page_mask(h));
454 pgoff = offset >> PAGE_SHIFT; 461 pgoff = offset >> PAGE_SHIFT;
455 462
456 i_size_write(inode, offset); 463 i_size_write(inode, offset);
@@ -465,6 +472,7 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset)
465static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr) 472static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr)
466{ 473{
467 struct inode *inode = dentry->d_inode; 474 struct inode *inode = dentry->d_inode;
475 struct hstate *h = hstate_inode(inode);
468 int error; 476 int error;
469 unsigned int ia_valid = attr->ia_valid; 477 unsigned int ia_valid = attr->ia_valid;
470 478
@@ -476,7 +484,7 @@ static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr)
476 484
477 if (ia_valid & ATTR_SIZE) { 485 if (ia_valid & ATTR_SIZE) {
478 error = -EINVAL; 486 error = -EINVAL;
479 if (!(attr->ia_size & ~HPAGE_MASK)) 487 if (!(attr->ia_size & ~huge_page_mask(h)))
480 error = hugetlb_vmtruncate(inode, attr->ia_size); 488 error = hugetlb_vmtruncate(inode, attr->ia_size);
481 if (error) 489 if (error)
482 goto out; 490 goto out;
@@ -610,9 +618,10 @@ static int hugetlbfs_set_page_dirty(struct page *page)
610static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf) 618static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf)
611{ 619{
612 struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(dentry->d_sb); 620 struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(dentry->d_sb);
621 struct hstate *h = hstate_inode(dentry->d_inode);
613 622
614 buf->f_type = HUGETLBFS_MAGIC; 623 buf->f_type = HUGETLBFS_MAGIC;
615 buf->f_bsize = HPAGE_SIZE; 624 buf->f_bsize = huge_page_size(h);
616 if (sbinfo) { 625 if (sbinfo) {
617 spin_lock(&sbinfo->stat_lock); 626 spin_lock(&sbinfo->stat_lock);
618 /* If no limits set, just report 0 for max/free/used 627 /* If no limits set, just report 0 for max/free/used
@@ -743,6 +752,8 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig)
743 char *p, *rest; 752 char *p, *rest;
744 substring_t args[MAX_OPT_ARGS]; 753 substring_t args[MAX_OPT_ARGS];
745 int option; 754 int option;
755 unsigned long long size = 0;
756 enum { NO_SIZE, SIZE_STD, SIZE_PERCENT } setsize = NO_SIZE;
746 757
747 if (!options) 758 if (!options)
748 return 0; 759 return 0;
@@ -773,17 +784,13 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig)
773 break; 784 break;
774 785
775 case Opt_size: { 786 case Opt_size: {
776 unsigned long long size;
777 /* memparse() will accept a K/M/G without a digit */ 787 /* memparse() will accept a K/M/G without a digit */
778 if (!isdigit(*args[0].from)) 788 if (!isdigit(*args[0].from))
779 goto bad_val; 789 goto bad_val;
780 size = memparse(args[0].from, &rest); 790 size = memparse(args[0].from, &rest);
781 if (*rest == '%') { 791 setsize = SIZE_STD;
782 size <<= HPAGE_SHIFT; 792 if (*rest == '%')
783 size *= max_huge_pages; 793 setsize = SIZE_PERCENT;
784 do_div(size, 100);
785 }
786 pconfig->nr_blocks = (size >> HPAGE_SHIFT);
787 break; 794 break;
788 } 795 }
789 796
@@ -794,6 +801,19 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig)
794 pconfig->nr_inodes = memparse(args[0].from, &rest); 801 pconfig->nr_inodes = memparse(args[0].from, &rest);
795 break; 802 break;
796 803
804 case Opt_pagesize: {
805 unsigned long ps;
806 ps = memparse(args[0].from, &rest);
807 pconfig->hstate = size_to_hstate(ps);
808 if (!pconfig->hstate) {
809 printk(KERN_ERR
810 "hugetlbfs: Unsupported page size %lu MB\n",
811 ps >> 20);
812 return -EINVAL;
813 }
814 break;
815 }
816
797 default: 817 default:
798 printk(KERN_ERR "hugetlbfs: Bad mount option: \"%s\"\n", 818 printk(KERN_ERR "hugetlbfs: Bad mount option: \"%s\"\n",
799 p); 819 p);
@@ -801,6 +821,18 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig)
801 break; 821 break;
802 } 822 }
803 } 823 }
824
825 /* Do size after hstate is set up */
826 if (setsize > NO_SIZE) {
827 struct hstate *h = pconfig->hstate;
828 if (setsize == SIZE_PERCENT) {
829 size <<= huge_page_shift(h);
830 size *= h->max_huge_pages;
831 do_div(size, 100);
832 }
833 pconfig->nr_blocks = (size >> huge_page_shift(h));
834 }
835
804 return 0; 836 return 0;
805 837
806bad_val: 838bad_val:
@@ -825,6 +857,7 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent)
825 config.uid = current->fsuid; 857 config.uid = current->fsuid;
826 config.gid = current->fsgid; 858 config.gid = current->fsgid;
827 config.mode = 0755; 859 config.mode = 0755;
860 config.hstate = &default_hstate;
828 ret = hugetlbfs_parse_options(data, &config); 861 ret = hugetlbfs_parse_options(data, &config);
829 if (ret) 862 if (ret)
830 return ret; 863 return ret;
@@ -833,14 +866,15 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent)
833 if (!sbinfo) 866 if (!sbinfo)
834 return -ENOMEM; 867 return -ENOMEM;
835 sb->s_fs_info = sbinfo; 868 sb->s_fs_info = sbinfo;
869 sbinfo->hstate = config.hstate;
836 spin_lock_init(&sbinfo->stat_lock); 870 spin_lock_init(&sbinfo->stat_lock);
837 sbinfo->max_blocks = config.nr_blocks; 871 sbinfo->max_blocks = config.nr_blocks;
838 sbinfo->free_blocks = config.nr_blocks; 872 sbinfo->free_blocks = config.nr_blocks;
839 sbinfo->max_inodes = config.nr_inodes; 873 sbinfo->max_inodes = config.nr_inodes;
840 sbinfo->free_inodes = config.nr_inodes; 874 sbinfo->free_inodes = config.nr_inodes;
841 sb->s_maxbytes = MAX_LFS_FILESIZE; 875 sb->s_maxbytes = MAX_LFS_FILESIZE;
842 sb->s_blocksize = HPAGE_SIZE; 876 sb->s_blocksize = huge_page_size(config.hstate);
843 sb->s_blocksize_bits = HPAGE_SHIFT; 877 sb->s_blocksize_bits = huge_page_shift(config.hstate);
844 sb->s_magic = HUGETLBFS_MAGIC; 878 sb->s_magic = HUGETLBFS_MAGIC;
845 sb->s_op = &hugetlbfs_ops; 879 sb->s_op = &hugetlbfs_ops;
846 sb->s_time_gran = 1; 880 sb->s_time_gran = 1;
@@ -942,7 +976,8 @@ struct file *hugetlb_file_setup(const char *name, size_t size)
942 goto out_dentry; 976 goto out_dentry;
943 977
944 error = -ENOMEM; 978 error = -ENOMEM;
945 if (hugetlb_reserve_pages(inode, 0, size >> HPAGE_SHIFT)) 979 if (hugetlb_reserve_pages(inode, 0,
980 size >> huge_page_shift(hstate_inode(inode)), NULL))
946 goto out_inode; 981 goto out_inode;
947 982
948 d_instantiate(dentry, inode); 983 d_instantiate(dentry, inode);
diff --git a/fs/inotify_user.c b/fs/inotify_user.c
index 6676c06bb7c1..fe79c25d95dc 100644
--- a/fs/inotify_user.c
+++ b/fs/inotify_user.c
@@ -566,7 +566,7 @@ static const struct inotify_operations inotify_user_ops = {
566 .destroy_watch = free_inotify_user_watch, 566 .destroy_watch = free_inotify_user_watch,
567}; 567};
568 568
569asmlinkage long sys_inotify_init(void) 569asmlinkage long sys_inotify_init1(int flags)
570{ 570{
571 struct inotify_device *dev; 571 struct inotify_device *dev;
572 struct inotify_handle *ih; 572 struct inotify_handle *ih;
@@ -574,7 +574,14 @@ asmlinkage long sys_inotify_init(void)
574 struct file *filp; 574 struct file *filp;
575 int fd, ret; 575 int fd, ret;
576 576
577 fd = get_unused_fd(); 577 /* Check the IN_* constants for consistency. */
578 BUILD_BUG_ON(IN_CLOEXEC != O_CLOEXEC);
579 BUILD_BUG_ON(IN_NONBLOCK != O_NONBLOCK);
580
581 if (flags & ~(IN_CLOEXEC | IN_NONBLOCK))
582 return -EINVAL;
583
584 fd = get_unused_fd_flags(flags & O_CLOEXEC);
578 if (fd < 0) 585 if (fd < 0)
579 return fd; 586 return fd;
580 587
@@ -610,7 +617,7 @@ asmlinkage long sys_inotify_init(void)
610 filp->f_path.dentry = dget(inotify_mnt->mnt_root); 617 filp->f_path.dentry = dget(inotify_mnt->mnt_root);
611 filp->f_mapping = filp->f_path.dentry->d_inode->i_mapping; 618 filp->f_mapping = filp->f_path.dentry->d_inode->i_mapping;
612 filp->f_mode = FMODE_READ; 619 filp->f_mode = FMODE_READ;
613 filp->f_flags = O_RDONLY; 620 filp->f_flags = O_RDONLY | (flags & O_NONBLOCK);
614 filp->private_data = dev; 621 filp->private_data = dev;
615 622
616 INIT_LIST_HEAD(&dev->events); 623 INIT_LIST_HEAD(&dev->events);
@@ -638,6 +645,11 @@ out_put_fd:
638 return ret; 645 return ret;
639} 646}
640 647
648asmlinkage long sys_inotify_init(void)
649{
650 return sys_inotify_init1(0);
651}
652
641asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask) 653asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask)
642{ 654{
643 struct inode *inode; 655 struct inode *inode;
diff --git a/fs/open.c b/fs/open.c
index a99ad09c3197..bb98d2fe809f 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -64,7 +64,8 @@ static int vfs_statfs_native(struct dentry *dentry, struct statfs *buf)
64 memcpy(buf, &st, sizeof(st)); 64 memcpy(buf, &st, sizeof(st));
65 else { 65 else {
66 if (sizeof buf->f_blocks == 4) { 66 if (sizeof buf->f_blocks == 4) {
67 if ((st.f_blocks | st.f_bfree | st.f_bavail) & 67 if ((st.f_blocks | st.f_bfree | st.f_bavail |
68 st.f_bsize | st.f_frsize) &
68 0xffffffff00000000ULL) 69 0xffffffff00000000ULL)
69 return -EOVERFLOW; 70 return -EOVERFLOW;
70 /* 71 /*
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 6149e4b58c88..efef715135d3 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -401,7 +401,7 @@ void register_disk(struct gendisk *disk)
401 disk->dev.parent = disk->driverfs_dev; 401 disk->dev.parent = disk->driverfs_dev;
402 disk->dev.devt = MKDEV(disk->major, disk->first_minor); 402 disk->dev.devt = MKDEV(disk->major, disk->first_minor);
403 403
404 strlcpy(disk->dev.bus_id, disk->disk_name, KOBJ_NAME_LEN); 404 strlcpy(disk->dev.bus_id, disk->disk_name, BUS_ID_SIZE);
405 /* ewww... some of these buggers have / in the name... */ 405 /* ewww... some of these buggers have / in the name... */
406 s = strchr(disk->dev.bus_id, '/'); 406 s = strchr(disk->dev.bus_id, '/');
407 if (s) 407 if (s)
diff --git a/fs/pipe.c b/fs/pipe.c
index 700f4e0d9572..10c4e9aa5c49 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -950,7 +950,7 @@ fail_inode:
950 return NULL; 950 return NULL;
951} 951}
952 952
953struct file *create_write_pipe(void) 953struct file *create_write_pipe(int flags)
954{ 954{
955 int err; 955 int err;
956 struct inode *inode; 956 struct inode *inode;
@@ -983,7 +983,7 @@ struct file *create_write_pipe(void)
983 goto err_dentry; 983 goto err_dentry;
984 f->f_mapping = inode->i_mapping; 984 f->f_mapping = inode->i_mapping;
985 985
986 f->f_flags = O_WRONLY; 986 f->f_flags = O_WRONLY | (flags & O_NONBLOCK);
987 f->f_version = 0; 987 f->f_version = 0;
988 988
989 return f; 989 return f;
@@ -1007,7 +1007,7 @@ void free_write_pipe(struct file *f)
1007 put_filp(f); 1007 put_filp(f);
1008} 1008}
1009 1009
1010struct file *create_read_pipe(struct file *wrf) 1010struct file *create_read_pipe(struct file *wrf, int flags)
1011{ 1011{
1012 struct file *f = get_empty_filp(); 1012 struct file *f = get_empty_filp();
1013 if (!f) 1013 if (!f)
@@ -1019,7 +1019,7 @@ struct file *create_read_pipe(struct file *wrf)
1019 f->f_mapping = wrf->f_path.dentry->d_inode->i_mapping; 1019 f->f_mapping = wrf->f_path.dentry->d_inode->i_mapping;
1020 1020
1021 f->f_pos = 0; 1021 f->f_pos = 0;
1022 f->f_flags = O_RDONLY; 1022 f->f_flags = O_RDONLY | (flags & O_NONBLOCK);
1023 f->f_op = &read_pipe_fops; 1023 f->f_op = &read_pipe_fops;
1024 f->f_mode = FMODE_READ; 1024 f->f_mode = FMODE_READ;
1025 f->f_version = 0; 1025 f->f_version = 0;
@@ -1027,26 +1027,29 @@ struct file *create_read_pipe(struct file *wrf)
1027 return f; 1027 return f;
1028} 1028}
1029 1029
1030int do_pipe(int *fd) 1030int do_pipe_flags(int *fd, int flags)
1031{ 1031{
1032 struct file *fw, *fr; 1032 struct file *fw, *fr;
1033 int error; 1033 int error;
1034 int fdw, fdr; 1034 int fdw, fdr;
1035 1035
1036 fw = create_write_pipe(); 1036 if (flags & ~(O_CLOEXEC | O_NONBLOCK))
1037 return -EINVAL;
1038
1039 fw = create_write_pipe(flags);
1037 if (IS_ERR(fw)) 1040 if (IS_ERR(fw))
1038 return PTR_ERR(fw); 1041 return PTR_ERR(fw);
1039 fr = create_read_pipe(fw); 1042 fr = create_read_pipe(fw, flags);
1040 error = PTR_ERR(fr); 1043 error = PTR_ERR(fr);
1041 if (IS_ERR(fr)) 1044 if (IS_ERR(fr))
1042 goto err_write_pipe; 1045 goto err_write_pipe;
1043 1046
1044 error = get_unused_fd(); 1047 error = get_unused_fd_flags(flags);
1045 if (error < 0) 1048 if (error < 0)
1046 goto err_read_pipe; 1049 goto err_read_pipe;
1047 fdr = error; 1050 fdr = error;
1048 1051
1049 error = get_unused_fd(); 1052 error = get_unused_fd_flags(flags);
1050 if (error < 0) 1053 if (error < 0)
1051 goto err_fdr; 1054 goto err_fdr;
1052 fdw = error; 1055 fdw = error;
@@ -1074,16 +1077,21 @@ int do_pipe(int *fd)
1074 return error; 1077 return error;
1075} 1078}
1076 1079
1080int do_pipe(int *fd)
1081{
1082 return do_pipe_flags(fd, 0);
1083}
1084
1077/* 1085/*
1078 * sys_pipe() is the normal C calling standard for creating 1086 * sys_pipe() is the normal C calling standard for creating
1079 * a pipe. It's not the way Unix traditionally does this, though. 1087 * a pipe. It's not the way Unix traditionally does this, though.
1080 */ 1088 */
1081asmlinkage long __weak sys_pipe(int __user *fildes) 1089asmlinkage long __weak sys_pipe2(int __user *fildes, int flags)
1082{ 1090{
1083 int fd[2]; 1091 int fd[2];
1084 int error; 1092 int error;
1085 1093
1086 error = do_pipe(fd); 1094 error = do_pipe_flags(fd, flags);
1087 if (!error) { 1095 if (!error) {
1088 if (copy_to_user(fildes, fd, sizeof(fd))) { 1096 if (copy_to_user(fildes, fd, sizeof(fd))) {
1089 sys_close(fd[0]); 1097 sys_close(fd[0]);
@@ -1094,6 +1102,11 @@ asmlinkage long __weak sys_pipe(int __user *fildes)
1094 return error; 1102 return error;
1095} 1103}
1096 1104
1105asmlinkage long __weak sys_pipe(int __user *fildes)
1106{
1107 return sys_pipe2(fildes, 0);
1108}
1109
1097/* 1110/*
1098 * pipefs should _never_ be mounted by userland - too much of security hassle, 1111 * pipefs should _never_ be mounted by userland - too much of security hassle,
1099 * no real gain from having the whole whorehouse mounted. So we don't need 1112 * no real gain from having the whole whorehouse mounted. So we don't need
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index c652d469dc08..ded969862960 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -232,7 +232,6 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
232#undef K 232#undef K
233} 233}
234 234
235extern const struct seq_operations fragmentation_op;
236static int fragmentation_open(struct inode *inode, struct file *file) 235static int fragmentation_open(struct inode *inode, struct file *file)
237{ 236{
238 (void)inode; 237 (void)inode;
@@ -246,7 +245,6 @@ static const struct file_operations fragmentation_file_operations = {
246 .release = seq_release, 245 .release = seq_release,
247}; 246};
248 247
249extern const struct seq_operations pagetypeinfo_op;
250static int pagetypeinfo_open(struct inode *inode, struct file *file) 248static int pagetypeinfo_open(struct inode *inode, struct file *file)
251{ 249{
252 return seq_open(file, &pagetypeinfo_op); 250 return seq_open(file, &pagetypeinfo_op);
@@ -259,7 +257,6 @@ static const struct file_operations pagetypeinfo_file_ops = {
259 .release = seq_release, 257 .release = seq_release,
260}; 258};
261 259
262extern const struct seq_operations zoneinfo_op;
263static int zoneinfo_open(struct inode *inode, struct file *file) 260static int zoneinfo_open(struct inode *inode, struct file *file)
264{ 261{
265 return seq_open(file, &zoneinfo_op); 262 return seq_open(file, &zoneinfo_op);
@@ -356,7 +353,6 @@ static const struct file_operations proc_devinfo_operations = {
356 .release = seq_release, 353 .release = seq_release,
357}; 354};
358 355
359extern const struct seq_operations vmstat_op;
360static int vmstat_open(struct inode *inode, struct file *file) 356static int vmstat_open(struct inode *inode, struct file *file)
361{ 357{
362 return seq_open(file, &vmstat_op); 358 return seq_open(file, &vmstat_op);
@@ -468,14 +464,25 @@ static const struct file_operations proc_slabstats_operations = {
468#ifdef CONFIG_MMU 464#ifdef CONFIG_MMU
469static int vmalloc_open(struct inode *inode, struct file *file) 465static int vmalloc_open(struct inode *inode, struct file *file)
470{ 466{
471 return seq_open(file, &vmalloc_op); 467 unsigned int *ptr = NULL;
468 int ret;
469
470 if (NUMA_BUILD)
471 ptr = kmalloc(nr_node_ids * sizeof(unsigned int), GFP_KERNEL);
472 ret = seq_open(file, &vmalloc_op);
473 if (!ret) {
474 struct seq_file *m = file->private_data;
475 m->private = ptr;
476 } else
477 kfree(ptr);
478 return ret;
472} 479}
473 480
474static const struct file_operations proc_vmalloc_operations = { 481static const struct file_operations proc_vmalloc_operations = {
475 .open = vmalloc_open, 482 .open = vmalloc_open,
476 .read = seq_read, 483 .read = seq_read,
477 .llseek = seq_lseek, 484 .llseek = seq_lseek,
478 .release = seq_release, 485 .release = seq_release_private,
479}; 486};
480#endif 487#endif
481 488
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index b224a28e0c15..7bc296f424ae 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -27,6 +27,11 @@
27#include "internal.h" 27#include "internal.h"
28 28
29 29
30static struct net *get_proc_net(const struct inode *inode)
31{
32 return maybe_get_net(PDE_NET(PDE(inode)));
33}
34
30int seq_open_net(struct inode *ino, struct file *f, 35int seq_open_net(struct inode *ino, struct file *f,
31 const struct seq_operations *ops, int size) 36 const struct seq_operations *ops, int size)
32{ 37{
@@ -185,12 +190,6 @@ void proc_net_remove(struct net *net, const char *name)
185} 190}
186EXPORT_SYMBOL_GPL(proc_net_remove); 191EXPORT_SYMBOL_GPL(proc_net_remove);
187 192
188struct net *get_proc_net(const struct inode *inode)
189{
190 return maybe_get_net(PDE_NET(PDE(inode)));
191}
192EXPORT_SYMBOL_GPL(get_proc_net);
193
194static __net_init int proc_net_ns_init(struct net *net) 193static __net_init int proc_net_ns_init(struct net *net)
195{ 194{
196 struct proc_dir_entry *netd, *net_statd; 195 struct proc_dir_entry *netd, *net_statd;
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 164bd9f9ede3..7546a918f790 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -636,7 +636,7 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
636 struct pagemapread pm; 636 struct pagemapread pm;
637 int pagecount; 637 int pagecount;
638 int ret = -ESRCH; 638 int ret = -ESRCH;
639 struct mm_walk pagemap_walk; 639 struct mm_walk pagemap_walk = {};
640 unsigned long src; 640 unsigned long src;
641 unsigned long svpfn; 641 unsigned long svpfn;
642 unsigned long start_vaddr; 642 unsigned long start_vaddr;
diff --git a/fs/signalfd.c b/fs/signalfd.c
index 619725644c75..9c39bc7f8431 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -205,11 +205,19 @@ static const struct file_operations signalfd_fops = {
205 .read = signalfd_read, 205 .read = signalfd_read,
206}; 206};
207 207
208asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemask) 208asmlinkage long sys_signalfd4(int ufd, sigset_t __user *user_mask,
209 size_t sizemask, int flags)
209{ 210{
210 sigset_t sigmask; 211 sigset_t sigmask;
211 struct signalfd_ctx *ctx; 212 struct signalfd_ctx *ctx;
212 213
214 /* Check the SFD_* constants for consistency. */
215 BUILD_BUG_ON(SFD_CLOEXEC != O_CLOEXEC);
216 BUILD_BUG_ON(SFD_NONBLOCK != O_NONBLOCK);
217
218 if (flags & ~(SFD_CLOEXEC | SFD_NONBLOCK))
219 return -EINVAL;
220
213 if (sizemask != sizeof(sigset_t) || 221 if (sizemask != sizeof(sigset_t) ||
214 copy_from_user(&sigmask, user_mask, sizeof(sigmask))) 222 copy_from_user(&sigmask, user_mask, sizeof(sigmask)))
215 return -EINVAL; 223 return -EINVAL;
@@ -227,7 +235,8 @@ asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemas
227 * When we call this, the initialization must be complete, since 235 * When we call this, the initialization must be complete, since
228 * anon_inode_getfd() will install the fd. 236 * anon_inode_getfd() will install the fd.
229 */ 237 */
230 ufd = anon_inode_getfd("[signalfd]", &signalfd_fops, ctx); 238 ufd = anon_inode_getfd("[signalfd]", &signalfd_fops, ctx,
239 flags & (O_CLOEXEC | O_NONBLOCK));
231 if (ufd < 0) 240 if (ufd < 0)
232 kfree(ctx); 241 kfree(ctx);
233 } else { 242 } else {
@@ -249,3 +258,9 @@ asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemas
249 258
250 return ufd; 259 return ufd;
251} 260}
261
262asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask,
263 size_t sizemask)
264{
265 return sys_signalfd4(ufd, user_mask, sizemask, 0);
266}
diff --git a/fs/super.c b/fs/super.c
index 453877c5697b..e931ae9511fe 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -70,6 +70,7 @@ static struct super_block *alloc_super(struct file_system_type *type)
70 INIT_LIST_HEAD(&s->s_instances); 70 INIT_LIST_HEAD(&s->s_instances);
71 INIT_HLIST_HEAD(&s->s_anon); 71 INIT_HLIST_HEAD(&s->s_anon);
72 INIT_LIST_HEAD(&s->s_inodes); 72 INIT_LIST_HEAD(&s->s_inodes);
73 INIT_LIST_HEAD(&s->s_dentry_lru);
73 init_rwsem(&s->s_umount); 74 init_rwsem(&s->s_umount);
74 mutex_init(&s->s_lock); 75 mutex_init(&s->s_lock);
75 lockdep_set_class(&s->s_umount, &type->s_umount_key); 76 lockdep_set_class(&s->s_umount, &type->s_umount_key);
diff --git a/fs/sync.c b/fs/sync.c
index 228e17b5e9ee..2967562d416f 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -139,7 +139,8 @@ asmlinkage long sys_fdatasync(unsigned int fd)
139 * before performing the write. 139 * before performing the write.
140 * 140 *
141 * SYNC_FILE_RANGE_WRITE: initiate writeout of all those dirty pages in the 141 * SYNC_FILE_RANGE_WRITE: initiate writeout of all those dirty pages in the
142 * range which are not presently under writeback. 142 * range which are not presently under writeback. Note that this may block for
143 * significant periods due to exhaustion of disk request structures.
143 * 144 *
144 * SYNC_FILE_RANGE_WAIT_AFTER: wait upon writeout of all pages in the range 145 * SYNC_FILE_RANGE_WAIT_AFTER: wait upon writeout of all pages in the range
145 * after performing the write. 146 * after performing the write.
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 8c0e4b92574f..c1a7efb310bf 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -398,7 +398,7 @@ void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt,
398} 398}
399 399
400/** 400/**
401 * sysfs_add_one - add sysfs_dirent to parent 401 * __sysfs_add_one - add sysfs_dirent to parent without warning
402 * @acxt: addrm context to use 402 * @acxt: addrm context to use
403 * @sd: sysfs_dirent to be added 403 * @sd: sysfs_dirent to be added
404 * 404 *
@@ -417,7 +417,7 @@ void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt,
417 * 0 on success, -EEXIST if entry with the given name already 417 * 0 on success, -EEXIST if entry with the given name already
418 * exists. 418 * exists.
419 */ 419 */
420int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd) 420int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
421{ 421{
422 if (sysfs_find_dirent(acxt->parent_sd, sd->s_name)) 422 if (sysfs_find_dirent(acxt->parent_sd, sd->s_name))
423 return -EEXIST; 423 return -EEXIST;
@@ -435,6 +435,39 @@ int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
435} 435}
436 436
437/** 437/**
438 * sysfs_add_one - add sysfs_dirent to parent
439 * @acxt: addrm context to use
440 * @sd: sysfs_dirent to be added
441 *
442 * Get @acxt->parent_sd and set sd->s_parent to it and increment
443 * nlink of parent inode if @sd is a directory and link into the
444 * children list of the parent.
445 *
446 * This function should be called between calls to
447 * sysfs_addrm_start() and sysfs_addrm_finish() and should be
448 * passed the same @acxt as passed to sysfs_addrm_start().
449 *
450 * LOCKING:
451 * Determined by sysfs_addrm_start().
452 *
453 * RETURNS:
454 * 0 on success, -EEXIST if entry with the given name already
455 * exists.
456 */
457int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
458{
459 int ret;
460
461 ret = __sysfs_add_one(acxt, sd);
462 if (ret == -EEXIST) {
463 printk(KERN_WARNING "sysfs: duplicate filename '%s' "
464 "can not be created\n", sd->s_name);
465 WARN_ON(1);
466 }
467 return ret;
468}
469
470/**
438 * sysfs_remove_one - remove sysfs_dirent from parent 471 * sysfs_remove_one - remove sysfs_dirent from parent
439 * @acxt: addrm context to use 472 * @acxt: addrm context to use
440 * @sd: sysfs_dirent to be removed 473 * @sd: sysfs_dirent to be removed
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index e7735f643cd1..3f07893ff896 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -14,6 +14,7 @@
14#include <linux/kobject.h> 14#include <linux/kobject.h>
15#include <linux/kallsyms.h> 15#include <linux/kallsyms.h>
16#include <linux/slab.h> 16#include <linux/slab.h>
17#include <linux/fsnotify.h>
17#include <linux/namei.h> 18#include <linux/namei.h>
18#include <linux/poll.h> 19#include <linux/poll.h>
19#include <linux/list.h> 20#include <linux/list.h>
@@ -585,9 +586,11 @@ int sysfs_chmod_file(struct kobject *kobj, struct attribute *attr, mode_t mode)
585 586
586 newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); 587 newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
587 newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; 588 newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
588 rc = notify_change(victim, &newattrs); 589 newattrs.ia_ctime = current_fs_time(inode->i_sb);
590 rc = sysfs_setattr(victim, &newattrs);
589 591
590 if (rc == 0) { 592 if (rc == 0) {
593 fsnotify_change(victim, newattrs.ia_valid);
591 mutex_lock(&sysfs_mutex); 594 mutex_lock(&sysfs_mutex);
592 victim_sd->s_mode = newattrs.ia_mode; 595 victim_sd->s_mode = newattrs.ia_mode;
593 mutex_unlock(&sysfs_mutex); 596 mutex_unlock(&sysfs_mutex);
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c
index 817f5966edca..a3ba217fbe74 100644
--- a/fs/sysfs/symlink.c
+++ b/fs/sysfs/symlink.c
@@ -19,13 +19,8 @@
19 19
20#include "sysfs.h" 20#include "sysfs.h"
21 21
22/** 22static int sysfs_do_create_link(struct kobject *kobj, struct kobject *target,
23 * sysfs_create_link - create symlink between two objects. 23 const char *name, int warn)
24 * @kobj: object whose directory we're creating the link in.
25 * @target: object we're pointing to.
26 * @name: name of the symlink.
27 */
28int sysfs_create_link(struct kobject * kobj, struct kobject * target, const char * name)
29{ 24{
30 struct sysfs_dirent *parent_sd = NULL; 25 struct sysfs_dirent *parent_sd = NULL;
31 struct sysfs_dirent *target_sd = NULL; 26 struct sysfs_dirent *target_sd = NULL;
@@ -65,7 +60,10 @@ int sysfs_create_link(struct kobject * kobj, struct kobject * target, const char
65 target_sd = NULL; /* reference is now owned by the symlink */ 60 target_sd = NULL; /* reference is now owned by the symlink */
66 61
67 sysfs_addrm_start(&acxt, parent_sd); 62 sysfs_addrm_start(&acxt, parent_sd);
68 error = sysfs_add_one(&acxt, sd); 63 if (warn)
64 error = sysfs_add_one(&acxt, sd);
65 else
66 error = __sysfs_add_one(&acxt, sd);
69 sysfs_addrm_finish(&acxt); 67 sysfs_addrm_finish(&acxt);
70 68
71 if (error) 69 if (error)
@@ -80,6 +78,33 @@ int sysfs_create_link(struct kobject * kobj, struct kobject * target, const char
80} 78}
81 79
82/** 80/**
81 * sysfs_create_link - create symlink between two objects.
82 * @kobj: object whose directory we're creating the link in.
83 * @target: object we're pointing to.
84 * @name: name of the symlink.
85 */
86int sysfs_create_link(struct kobject *kobj, struct kobject *target,
87 const char *name)
88{
89 return sysfs_do_create_link(kobj, target, name, 1);
90}
91
92/**
93 * sysfs_create_link_nowarn - create symlink between two objects.
94 * @kobj: object whose directory we're creating the link in.
95 * @target: object we're pointing to.
96 * @name: name of the symlink.
97 *
98 * This function does the same as sysf_create_link(), but it
99 * doesn't warn if the link already exists.
100 */
101int sysfs_create_link_nowarn(struct kobject *kobj, struct kobject *target,
102 const char *name)
103{
104 return sysfs_do_create_link(kobj, target, name, 0);
105}
106
107/**
83 * sysfs_remove_link - remove symlink in object's directory. 108 * sysfs_remove_link - remove symlink in object's directory.
84 * @kobj: object we're acting for. 109 * @kobj: object we're acting for.
85 * @name: name of the symlink to remove. 110 * @name: name of the symlink to remove.
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index ce4e15f8aaeb..a5db496f71c7 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -107,6 +107,7 @@ struct sysfs_dirent *sysfs_get_active_two(struct sysfs_dirent *sd);
107void sysfs_put_active_two(struct sysfs_dirent *sd); 107void sysfs_put_active_two(struct sysfs_dirent *sd);
108void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt, 108void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt,
109 struct sysfs_dirent *parent_sd); 109 struct sysfs_dirent *parent_sd);
110int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd);
110int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd); 111int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd);
111void sysfs_remove_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd); 112void sysfs_remove_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd);
112void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt); 113void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt);
diff --git a/fs/timerfd.c b/fs/timerfd.c
index d87d354ec424..c502c60e4f54 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -184,7 +184,11 @@ asmlinkage long sys_timerfd_create(int clockid, int flags)
184 int ufd; 184 int ufd;
185 struct timerfd_ctx *ctx; 185 struct timerfd_ctx *ctx;
186 186
187 if (flags) 187 /* Check the TFD_* constants for consistency. */
188 BUILD_BUG_ON(TFD_CLOEXEC != O_CLOEXEC);
189 BUILD_BUG_ON(TFD_NONBLOCK != O_NONBLOCK);
190
191 if (flags & ~(TFD_CLOEXEC | TFD_NONBLOCK))
188 return -EINVAL; 192 return -EINVAL;
189 if (clockid != CLOCK_MONOTONIC && 193 if (clockid != CLOCK_MONOTONIC &&
190 clockid != CLOCK_REALTIME) 194 clockid != CLOCK_REALTIME)
@@ -198,7 +202,8 @@ asmlinkage long sys_timerfd_create(int clockid, int flags)
198 ctx->clockid = clockid; 202 ctx->clockid = clockid;
199 hrtimer_init(&ctx->tmr, clockid, HRTIMER_MODE_ABS); 203 hrtimer_init(&ctx->tmr, clockid, HRTIMER_MODE_ABS);
200 204
201 ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx); 205 ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx,
206 flags & (O_CLOEXEC | O_NONBLOCK));
202 if (ufd < 0) 207 if (ufd < 0)
203 kfree(ctx); 208 kfree(ctx);
204 209
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 85b22b5977fa..506f724055c2 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1232,7 +1232,7 @@ static int ufs_show_options(struct seq_file *seq, struct vfsmount *vfs)
1232{ 1232{
1233 struct ufs_sb_info *sbi = UFS_SB(vfs->mnt_sb); 1233 struct ufs_sb_info *sbi = UFS_SB(vfs->mnt_sb);
1234 unsigned mval = sbi->s_mount_opt & UFS_MOUNT_UFSTYPE; 1234 unsigned mval = sbi->s_mount_opt & UFS_MOUNT_UFSTYPE;
1235 struct match_token *tp = tokens; 1235 const struct match_token *tp = tokens;
1236 1236
1237 while (tp->token != Opt_onerror_panic && tp->token != mval) 1237 while (tp->token != Opt_onerror_panic && tp->token != mval)
1238 ++tp; 1238 ++tp;