aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/vfs_file.c1
-rw-r--r--fs/Makefile2
-rw-r--r--fs/adfs/adfs.h1
-rw-r--r--fs/binfmt_misc.c2
-rw-r--r--fs/compat_ioctl.c153
-rw-r--r--fs/dquot.c18
-rw-r--r--fs/exec.c16
-rw-r--r--fs/ext2/CHANGES157
-rw-r--r--fs/ext2/balloc.c73
-rw-r--r--fs/ext2/ialloc.c40
-rw-r--r--fs/ext2/super.c16
-rw-r--r--fs/ext3/balloc.c73
-rw-r--r--fs/ext3/ialloc.c41
-rw-r--r--fs/ext3/super.c17
-rw-r--r--fs/fat/inode.c11
-rw-r--r--fs/hfs/hfs_fs.h1
-rw-r--r--fs/hfs/inode.c1
-rw-r--r--fs/hfsplus/bnode.c1
-rw-r--r--fs/hfsplus/dir.c1
-rw-r--r--fs/hfsplus/extents.c1
-rw-r--r--fs/hfsplus/hfsplus_fs.h1
-rw-r--r--fs/hfsplus/inode.c1
-rw-r--r--fs/hfsplus/super.c1
-rw-r--r--fs/hfsplus/wrapper.c1
-rw-r--r--fs/hostfs/hostfs_kern.c1
-rw-r--r--fs/hpfs/file.c7
-rw-r--r--fs/hugetlbfs/inode.c2
-rw-r--r--fs/inotify.c2
-rw-r--r--fs/jfs/namei.c3
-rw-r--r--fs/namei.c66
-rw-r--r--fs/namespace.c699
-rw-r--r--fs/ncpfs/ioctl.c34
-rw-r--r--fs/open.c14
-rw-r--r--fs/pnode.c305
-rw-r--r--fs/pnode.h37
-rw-r--r--fs/proc/base.c62
-rw-r--r--fs/reiserfs/file.c4
-rw-r--r--fs/seq_file.c12
-rw-r--r--fs/super.c1
-rw-r--r--fs/udf/file.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_linux.h1
-rw-r--r--fs/xfs/xfs.h7
-rw-r--r--fs/xfs/xfs_dmapi.h1
43 files changed, 999 insertions, 891 deletions
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index bbc3cc63854..89c849da850 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -32,7 +32,6 @@
32#include <linux/string.h> 32#include <linux/string.h>
33#include <linux/smp_lock.h> 33#include <linux/smp_lock.h>
34#include <linux/inet.h> 34#include <linux/inet.h>
35#include <linux/version.h>
36#include <linux/list.h> 35#include <linux/list.h>
37#include <asm/uaccess.h> 36#include <asm/uaccess.h>
38#include <linux/idr.h> 37#include <linux/idr.h>
diff --git a/fs/Makefile b/fs/Makefile
index 1972da18627..4c265575907 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -10,7 +10,7 @@ obj-y := open.o read_write.o file_table.o buffer.o bio.o super.o \
10 ioctl.o readdir.o select.o fifo.o locks.o dcache.o inode.o \ 10 ioctl.o readdir.o select.o fifo.o locks.o dcache.o inode.o \
11 attr.o bad_inode.o file.o filesystems.o namespace.o aio.o \ 11 attr.o bad_inode.o file.o filesystems.o namespace.o aio.o \
12 seq_file.o xattr.o libfs.o fs-writeback.o mpage.o direct-io.o \ 12 seq_file.o xattr.o libfs.o fs-writeback.o mpage.o direct-io.o \
13 ioprio.o 13 ioprio.o pnode.o
14 14
15obj-$(CONFIG_INOTIFY) += inotify.o 15obj-$(CONFIG_INOTIFY) += inotify.o
16obj-$(CONFIG_EPOLL) += eventpoll.o 16obj-$(CONFIG_EPOLL) += eventpoll.o
diff --git a/fs/adfs/adfs.h b/fs/adfs/adfs.h
index fd528433de4..f6cd01352cc 100644
--- a/fs/adfs/adfs.h
+++ b/fs/adfs/adfs.h
@@ -12,7 +12,6 @@
12#define ADFS_NDA_PUBLIC_READ (1 << 5) 12#define ADFS_NDA_PUBLIC_READ (1 << 5)
13#define ADFS_NDA_PUBLIC_WRITE (1 << 6) 13#define ADFS_NDA_PUBLIC_WRITE (1 << 6)
14 14
15#include <linux/version.h>
16#include "dir_f.h" 15#include "dir_f.h"
17 16
18struct buffer_head; 17struct buffer_head;
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 8ae0db6cd69..2568eb41cb3 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -150,7 +150,7 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs)
150 150
151 /* if the binary is not readable than enforce mm->dumpable=0 151 /* if the binary is not readable than enforce mm->dumpable=0
152 regardless of the interpreter's permissions */ 152 regardless of the interpreter's permissions */
153 if (permission(bprm->file->f_dentry->d_inode, MAY_READ, NULL)) 153 if (file_permission(bprm->file, MAY_READ))
154 bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP; 154 bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
155 155
156 allow_write_access(bprm->file); 156 allow_write_access(bprm->file);
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 4909754ea84..26300fccb4f 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -840,146 +840,6 @@ static int hdio_getgeo(unsigned int fd, unsigned int cmd, unsigned long arg)
840 return err ? -EFAULT : 0; 840 return err ? -EFAULT : 0;
841} 841}
842 842
843struct fb_fix_screeninfo32 {
844 char id[16];
845 compat_caddr_t smem_start;
846 u32 smem_len;
847 u32 type;
848 u32 type_aux;
849 u32 visual;
850 u16 xpanstep;
851 u16 ypanstep;
852 u16 ywrapstep;
853 u32 line_length;
854 compat_caddr_t mmio_start;
855 u32 mmio_len;
856 u32 accel;
857 u16 reserved[3];
858};
859
860struct fb_cmap32 {
861 u32 start;
862 u32 len;
863 compat_caddr_t red;
864 compat_caddr_t green;
865 compat_caddr_t blue;
866 compat_caddr_t transp;
867};
868
869static int fb_getput_cmap(unsigned int fd, unsigned int cmd, unsigned long arg)
870{
871 struct fb_cmap_user __user *cmap;
872 struct fb_cmap32 __user *cmap32;
873 __u32 data;
874 int err;
875
876 cmap = compat_alloc_user_space(sizeof(*cmap));
877 cmap32 = compat_ptr(arg);
878
879 if (copy_in_user(&cmap->start, &cmap32->start, 2 * sizeof(__u32)))
880 return -EFAULT;
881
882 if (get_user(data, &cmap32->red) ||
883 put_user(compat_ptr(data), &cmap->red) ||
884 get_user(data, &cmap32->green) ||
885 put_user(compat_ptr(data), &cmap->green) ||
886 get_user(data, &cmap32->blue) ||
887 put_user(compat_ptr(data), &cmap->blue) ||
888 get_user(data, &cmap32->transp) ||
889 put_user(compat_ptr(data), &cmap->transp))
890 return -EFAULT;
891
892 err = sys_ioctl(fd, cmd, (unsigned long) cmap);
893
894 if (!err) {
895 if (copy_in_user(&cmap32->start,
896 &cmap->start,
897 2 * sizeof(__u32)))
898 err = -EFAULT;
899 }
900 return err;
901}
902
903static int do_fscreeninfo_to_user(struct fb_fix_screeninfo *fix,
904 struct fb_fix_screeninfo32 __user *fix32)
905{
906 __u32 data;
907 int err;
908
909 err = copy_to_user(&fix32->id, &fix->id, sizeof(fix32->id));
910
911 data = (__u32) (unsigned long) fix->smem_start;
912 err |= put_user(data, &fix32->smem_start);
913
914 err |= put_user(fix->smem_len, &fix32->smem_len);
915 err |= put_user(fix->type, &fix32->type);
916 err |= put_user(fix->type_aux, &fix32->type_aux);
917 err |= put_user(fix->visual, &fix32->visual);
918 err |= put_user(fix->xpanstep, &fix32->xpanstep);
919 err |= put_user(fix->ypanstep, &fix32->ypanstep);
920 err |= put_user(fix->ywrapstep, &fix32->ywrapstep);
921 err |= put_user(fix->line_length, &fix32->line_length);
922
923 data = (__u32) (unsigned long) fix->mmio_start;
924 err |= put_user(data, &fix32->mmio_start);
925
926 err |= put_user(fix->mmio_len, &fix32->mmio_len);
927 err |= put_user(fix->accel, &fix32->accel);
928 err |= copy_to_user(fix32->reserved, fix->reserved,
929 sizeof(fix->reserved));
930
931 return err;
932}
933
934static int fb_get_fscreeninfo(unsigned int fd, unsigned int cmd, unsigned long arg)
935{
936 mm_segment_t old_fs;
937 struct fb_fix_screeninfo fix;
938 struct fb_fix_screeninfo32 __user *fix32;
939 int err;
940
941 fix32 = compat_ptr(arg);
942
943 old_fs = get_fs();
944 set_fs(KERNEL_DS);
945 err = sys_ioctl(fd, cmd, (unsigned long) &fix);
946 set_fs(old_fs);
947
948 if (!err)
949 err = do_fscreeninfo_to_user(&fix, fix32);
950
951 return err;
952}
953
954static int fb_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long arg)
955{
956 int err;
957
958 switch (cmd) {
959 case FBIOGET_FSCREENINFO:
960 err = fb_get_fscreeninfo(fd,cmd, arg);
961 break;
962
963 case FBIOGETCMAP:
964 case FBIOPUTCMAP:
965 err = fb_getput_cmap(fd, cmd, arg);
966 break;
967
968 default:
969 do {
970 static int count;
971 if (++count <= 20)
972 printk("%s: Unknown fb ioctl cmd fd(%d) "
973 "cmd(%08x) arg(%08lx)\n",
974 __FUNCTION__, fd, cmd, arg);
975 } while(0);
976 err = -ENOSYS;
977 break;
978 };
979
980 return err;
981}
982
983static int hdio_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long arg) 843static int hdio_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long arg)
984{ 844{
985 mm_segment_t old_fs = get_fs(); 845 mm_segment_t old_fs = get_fs();
@@ -2953,10 +2813,7 @@ HANDLE_IOCTL(BLKGETSIZE, w_long)
2953HANDLE_IOCTL(0x1260, broken_blkgetsize) 2813HANDLE_IOCTL(0x1260, broken_blkgetsize)
2954HANDLE_IOCTL(BLKFRAGET, w_long) 2814HANDLE_IOCTL(BLKFRAGET, w_long)
2955HANDLE_IOCTL(BLKSECTGET, w_long) 2815HANDLE_IOCTL(BLKSECTGET, w_long)
2956HANDLE_IOCTL(FBIOGET_FSCREENINFO, fb_ioctl_trans)
2957HANDLE_IOCTL(BLKPG, blkpg_ioctl_trans) 2816HANDLE_IOCTL(BLKPG, blkpg_ioctl_trans)
2958HANDLE_IOCTL(FBIOGETCMAP, fb_ioctl_trans)
2959HANDLE_IOCTL(FBIOPUTCMAP, fb_ioctl_trans)
2960HANDLE_IOCTL(HDIO_GET_KEEPSETTINGS, hdio_ioctl_trans) 2817HANDLE_IOCTL(HDIO_GET_KEEPSETTINGS, hdio_ioctl_trans)
2961HANDLE_IOCTL(HDIO_GET_UNMASKINTR, hdio_ioctl_trans) 2818HANDLE_IOCTL(HDIO_GET_UNMASKINTR, hdio_ioctl_trans)
2962HANDLE_IOCTL(HDIO_GET_DMA, hdio_ioctl_trans) 2819HANDLE_IOCTL(HDIO_GET_DMA, hdio_ioctl_trans)
@@ -3051,6 +2908,16 @@ HANDLE_IOCTL(TIOCSSERIAL, serial_struct_ioctl)
3051COMPATIBLE_IOCTL(TIOCGLTC) 2908COMPATIBLE_IOCTL(TIOCGLTC)
3052COMPATIBLE_IOCTL(TIOCSLTC) 2909COMPATIBLE_IOCTL(TIOCSLTC)
3053#endif 2910#endif
2911#ifdef TIOCSTART
2912/*
2913 * For these two we have defintions in ioctls.h and/or termios.h on
2914 * some architectures but no actual implemention. Some applications
2915 * like bash call them if they are defined in the headers, so we provide
2916 * entries here to avoid syslog message spew.
2917 */
2918COMPATIBLE_IOCTL(TIOCSTART)
2919COMPATIBLE_IOCTL(TIOCSTOP)
2920#endif
3054/* Usbdevfs */ 2921/* Usbdevfs */
3055HANDLE_IOCTL(USBDEVFS_CONTROL32, do_usbdevfs_control) 2922HANDLE_IOCTL(USBDEVFS_CONTROL32, do_usbdevfs_control)
3056HANDLE_IOCTL(USBDEVFS_BULK32, do_usbdevfs_bulk) 2923HANDLE_IOCTL(USBDEVFS_BULK32, do_usbdevfs_bulk)
diff --git a/fs/dquot.c b/fs/dquot.c
index afa06a89346..05b60283c9c 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -1321,13 +1321,11 @@ int vfs_quota_off(struct super_block *sb, int type)
1321 int cnt; 1321 int cnt;
1322 struct quota_info *dqopt = sb_dqopt(sb); 1322 struct quota_info *dqopt = sb_dqopt(sb);
1323 struct inode *toputinode[MAXQUOTAS]; 1323 struct inode *toputinode[MAXQUOTAS];
1324 struct vfsmount *toputmnt[MAXQUOTAS];
1325 1324
1326 /* We need to serialize quota_off() for device */ 1325 /* We need to serialize quota_off() for device */
1327 down(&dqopt->dqonoff_sem); 1326 down(&dqopt->dqonoff_sem);
1328 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 1327 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
1329 toputinode[cnt] = NULL; 1328 toputinode[cnt] = NULL;
1330 toputmnt[cnt] = NULL;
1331 if (type != -1 && cnt != type) 1329 if (type != -1 && cnt != type)
1332 continue; 1330 continue;
1333 if (!sb_has_quota_enabled(sb, cnt)) 1331 if (!sb_has_quota_enabled(sb, cnt))
@@ -1348,9 +1346,7 @@ int vfs_quota_off(struct super_block *sb, int type)
1348 put_quota_format(dqopt->info[cnt].dqi_format); 1346 put_quota_format(dqopt->info[cnt].dqi_format);
1349 1347
1350 toputinode[cnt] = dqopt->files[cnt]; 1348 toputinode[cnt] = dqopt->files[cnt];
1351 toputmnt[cnt] = dqopt->mnt[cnt];
1352 dqopt->files[cnt] = NULL; 1349 dqopt->files[cnt] = NULL;
1353 dqopt->mnt[cnt] = NULL;
1354 dqopt->info[cnt].dqi_flags = 0; 1350 dqopt->info[cnt].dqi_flags = 0;
1355 dqopt->info[cnt].dqi_igrace = 0; 1351 dqopt->info[cnt].dqi_igrace = 0;
1356 dqopt->info[cnt].dqi_bgrace = 0; 1352 dqopt->info[cnt].dqi_bgrace = 0;
@@ -1358,10 +1354,7 @@ int vfs_quota_off(struct super_block *sb, int type)
1358 } 1354 }
1359 up(&dqopt->dqonoff_sem); 1355 up(&dqopt->dqonoff_sem);
1360 /* Sync the superblock so that buffers with quota data are written to 1356 /* Sync the superblock so that buffers with quota data are written to
1361 * disk (and so userspace sees correct data afterwards). 1357 * disk (and so userspace sees correct data afterwards). */
1362 * The reference to vfsmnt we are still holding protects us from
1363 * umount (we don't have it only when quotas are turned on/off for
1364 * journal replay but in that case we are guarded by the fs anyway). */
1365 if (sb->s_op->sync_fs) 1358 if (sb->s_op->sync_fs)
1366 sb->s_op->sync_fs(sb, 1); 1359 sb->s_op->sync_fs(sb, 1);
1367 sync_blockdev(sb->s_bdev); 1360 sync_blockdev(sb->s_bdev);
@@ -1385,10 +1378,6 @@ int vfs_quota_off(struct super_block *sb, int type)
1385 iput(toputinode[cnt]); 1378 iput(toputinode[cnt]);
1386 } 1379 }
1387 up(&dqopt->dqonoff_sem); 1380 up(&dqopt->dqonoff_sem);
1388 /* We don't hold the reference when we turned on quotas
1389 * just for the journal replay... */
1390 if (toputmnt[cnt])
1391 mntput(toputmnt[cnt]);
1392 } 1381 }
1393 if (sb->s_bdev) 1382 if (sb->s_bdev)
1394 invalidate_bdev(sb->s_bdev, 0); 1383 invalidate_bdev(sb->s_bdev, 0);
@@ -1503,11 +1492,8 @@ int vfs_quota_on(struct super_block *sb, int type, int format_id, char *path)
1503 /* Quota file not on the same filesystem? */ 1492 /* Quota file not on the same filesystem? */
1504 if (nd.mnt->mnt_sb != sb) 1493 if (nd.mnt->mnt_sb != sb)
1505 error = -EXDEV; 1494 error = -EXDEV;
1506 else { 1495 else
1507 error = vfs_quota_on_inode(nd.dentry->d_inode, type, format_id); 1496 error = vfs_quota_on_inode(nd.dentry->d_inode, type, format_id);
1508 if (!error)
1509 sb_dqopt(sb)->mnt[type] = mntget(nd.mnt);
1510 }
1511out_path: 1497out_path:
1512 path_release(&nd); 1498 path_release(&nd);
1513 return error; 1499 return error;
diff --git a/fs/exec.c b/fs/exec.c
index cd6c574557d..c466fec5de2 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -135,7 +135,7 @@ asmlinkage long sys_uselib(const char __user * library)
135 if (!S_ISREG(nd.dentry->d_inode->i_mode)) 135 if (!S_ISREG(nd.dentry->d_inode->i_mode))
136 goto exit; 136 goto exit;
137 137
138 error = permission(nd.dentry->d_inode, MAY_READ | MAY_EXEC, &nd); 138 error = vfs_permission(&nd, MAY_READ | MAY_EXEC);
139 if (error) 139 if (error)
140 goto exit; 140 goto exit;
141 141
@@ -495,7 +495,7 @@ struct file *open_exec(const char *name)
495 file = ERR_PTR(-EACCES); 495 file = ERR_PTR(-EACCES);
496 if (!(nd.mnt->mnt_flags & MNT_NOEXEC) && 496 if (!(nd.mnt->mnt_flags & MNT_NOEXEC) &&
497 S_ISREG(inode->i_mode)) { 497 S_ISREG(inode->i_mode)) {
498 int err = permission(inode, MAY_EXEC, &nd); 498 int err = vfs_permission(&nd, MAY_EXEC);
499 if (!err && !(inode->i_mode & 0111)) 499 if (!err && !(inode->i_mode & 0111))
500 err = -EACCES; 500 err = -EACCES;
501 file = ERR_PTR(err); 501 file = ERR_PTR(err);
@@ -590,6 +590,7 @@ static inline int de_thread(struct task_struct *tsk)
590 struct signal_struct *sig = tsk->signal; 590 struct signal_struct *sig = tsk->signal;
591 struct sighand_struct *newsighand, *oldsighand = tsk->sighand; 591 struct sighand_struct *newsighand, *oldsighand = tsk->sighand;
592 spinlock_t *lock = &oldsighand->siglock; 592 spinlock_t *lock = &oldsighand->siglock;
593 struct task_struct *leader = NULL;
593 int count; 594 int count;
594 595
595 /* 596 /*
@@ -665,7 +666,7 @@ static inline int de_thread(struct task_struct *tsk)
665 * and to assume its PID: 666 * and to assume its PID:
666 */ 667 */
667 if (!thread_group_leader(current)) { 668 if (!thread_group_leader(current)) {
668 struct task_struct *leader = current->group_leader, *parent; 669 struct task_struct *parent;
669 struct dentry *proc_dentry1, *proc_dentry2; 670 struct dentry *proc_dentry1, *proc_dentry2;
670 unsigned long exit_state, ptrace; 671 unsigned long exit_state, ptrace;
671 672
@@ -674,6 +675,7 @@ static inline int de_thread(struct task_struct *tsk)
674 * It should already be zombie at this point, most 675 * It should already be zombie at this point, most
675 * of the time. 676 * of the time.
676 */ 677 */
678 leader = current->group_leader;
677 while (leader->exit_state != EXIT_ZOMBIE) 679 while (leader->exit_state != EXIT_ZOMBIE)
678 yield(); 680 yield();
679 681
@@ -733,7 +735,6 @@ static inline int de_thread(struct task_struct *tsk)
733 proc_pid_flush(proc_dentry2); 735 proc_pid_flush(proc_dentry2);
734 736
735 BUG_ON(exit_state != EXIT_ZOMBIE); 737 BUG_ON(exit_state != EXIT_ZOMBIE);
736 release_task(leader);
737 } 738 }
738 739
739 /* 740 /*
@@ -743,8 +744,11 @@ static inline int de_thread(struct task_struct *tsk)
743 sig->flags = 0; 744 sig->flags = 0;
744 745
745no_thread_group: 746no_thread_group:
746 BUG_ON(atomic_read(&sig->count) != 1);
747 exit_itimers(sig); 747 exit_itimers(sig);
748 if (leader)
749 release_task(leader);
750
751 BUG_ON(atomic_read(&sig->count) != 1);
748 752
749 if (atomic_read(&oldsighand->count) == 1) { 753 if (atomic_read(&oldsighand->count) == 1) {
750 /* 754 /*
@@ -892,7 +896,7 @@ int flush_old_exec(struct linux_binprm * bprm)
892 flush_thread(); 896 flush_thread();
893 897
894 if (bprm->e_uid != current->euid || bprm->e_gid != current->egid || 898 if (bprm->e_uid != current->euid || bprm->e_gid != current->egid ||
895 permission(bprm->file->f_dentry->d_inode,MAY_READ, NULL) || 899 file_permission(bprm->file, MAY_READ) ||
896 (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP)) { 900 (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP)) {
897 suid_keys(current); 901 suid_keys(current);
898 current->mm->dumpable = suid_dumpable; 902 current->mm->dumpable = suid_dumpable;
diff --git a/fs/ext2/CHANGES b/fs/ext2/CHANGES
deleted file mode 100644
index aa5aaf0e591..00000000000
--- a/fs/ext2/CHANGES
+++ /dev/null
@@ -1,157 +0,0 @@
1Changes from version 0.5a to version 0.5b
2=========================================
3 - Now that we have sysctl(), the immutable flag cannot be changed when
4 the system is running at security level > 0.
5 - Some cleanups in the code.
6 - More consistency checks on directories.
7 - The ext2.diff patch from Tom May <ftom@netcom.com> has been
8 integrated. This patch replaces expensive "/" and "%" with
9 cheap ">>" and "&" where possible.
10
11Changes from version 0.5 to version 0.5a
12========================================
13 - Zero the partial block following the end of the file when a file
14 is truncated.
15 - Dates updated in the copyright.
16 - More checks when the filesystem is mounted: the count of blocks,
17 fragments, and inodes per group is checked against the block size.
18 - The buffers used by the error routines are now static variables, to
19 avoid using space on the kernel stack, as requested by Linus.
20 - Some cleanups in the error messages (some versions of syslog contain
21 a bug which truncates an error message if it contains '\n').
22 - Check that no data can be written to a file past the 2GB limit.
23 - The famous readdir() bug has been fixed by Stephen Tweedie.
24 - Added a revision level in the superblock.
25 - Full support for O_SYNC flag of the open system call.
26 - New mount options: `resuid=#uid' and `resgid=#gid'. `resuid' causes
27 ext2fs to consider user #uid like root for the reserved blocks.
28 `resgid' acts the same way with group #gid. New fields in the
29 superblock contain default values for resuid and resgid and can
30 be modified by tune2fs.
31 Idea comes from Rene Cougnenc <cougnenc@renux.frmug.fr.net>.
32 - New mount options: `bsddf' and `minixdf'. `bsddf' causes ext2fs
33 to remove the blocks used for FS structures from the total block
34 count in statfs. With `minixdf', ext2fs mimics Minix behavior
35 in statfs (i.e. it returns the total number of blocks on the
36 partition). This is intended to make bde happy :-)
37 - New file attributes:
38 - Immutable files cannot be modified. Data cannot be written to
39 these files. They cannot be removed, renamed and new links cannot
40 be created. Even root cannot modify the files. He has to remove
41 the immutable attribute first.
42 - Append-only files: can only be written in append-mode when writing.
43 They cannot be removed, renamed and new links cannot be created.
44 Note: files may only be added to an append-only directory.
45 - No-dump files: the attribute is not used by the kernel. My port
46 of dump uses it to avoid backing up files which are not important.
47 - New check in ext2_check_dir_entry: the inode number is checked.
48 - Support for big file systems: the copy of the FS descriptor is now
49 dynamically allocated (previous versions used a fixed size array).
50 This allows to mount 2GB+ FS.
51 - Reorganization of the ext2_inode structure to allow other operating
52 systems to create specific fields if they use ext2fs as their native
53 file system. Currently, ext2fs is only implemented in Linux but
54 will soon be part of Gnu Hurd and of Masix.
55
56Changes from version 0.4b to version 0.5
57========================================
58 - New superblock fields: s_lastcheck and s_checkinterval added
59 by Uwe Ohse <uwe@tirka.gun.de> to implement timedependent checks
60 of the file system
61 - Real random numbers for secure rm added by Pierre del Perugia
62 <delperug@gla.ecoledoc.ibp.fr>
63 - The mount warnings related to the state of a fs are not printed
64 if the fs is mounted read-only, idea by Nick Holloway
65 <alfie@dcs.warwick.ac.uk>
66
67Changes from version 0.4a to version 0.4b
68=========================================
69 - Copyrights changed to include the name of my laboratory.
70 - Clean up of balloc.c and ialloc.c.
71 - More consistency checks.
72 - Block preallocation added by Stephen Tweedie.
73 - Direct reads of directories disallowed.
74 - Readahead implemented in readdir by Stephen Tweedie.
75 - Bugs in block and inodes allocation fixed.
76 - Readahead implemented in ext2_find_entry by Chip Salzenberg.
77 - New mount options:
78 `check=none|normal|strict'
79 `debug'
80 `errors=continue|remount-ro|panic'
81 `grpid', `bsdgroups'
82 `nocheck'
83 `nogrpid', `sysvgroups'
84 - truncate() now tries to deallocate contiguous blocks in a single call
85 to ext2_free_blocks().
86 - lots of cosmetic changes.
87
88Changes from version 0.4 to version 0.4a
89========================================
90 - the `sync' option support is now complete. Version 0.4 was not
91 supporting it when truncating a file. I have tested the synchronous
92 writes and they work but they make the system very slow :-( I have
93 to work again on this to make it faster.
94 - when detecting an error on a mounted filesystem, version 0.4 used
95 to try to write a flag in the super block even if the filesystem had
96 been mounted read-only. This is fixed.
97 - the `sb=#' option now causes the kernel code to use the filesystem
98 descriptors located at block #+1. Version 0.4 used the superblock
99 backup located at block # but used the main copy of the descriptors.
100 - a new file attribute `S' is supported. This attribute causes
101 synchronous writes but is applied to a file not to the entire file
102 system (thanks to Michael Kraehe <kraehe@bakunin.north.de> for
103 suggesting it).
104 - the directory cache is inhibited by default. The cache management
105 code seems to be buggy and I have to look at it carefully before
106 using it again.
107 - deleting a file with the `s' attribute (secure deletion) causes its
108 blocks to be overwritten with random values not with zeros (thanks to
109 Michael A. Griffith <grif@cs.ucr.edu> for suggesting it).
110 - lots of cosmetic changes have been made.
111
112Changes from version 0.3 to version 0.4
113=======================================
114 - Three new mount options are supported: `check', `sync' and `sb=#'.
115 `check' tells the kernel code to make more consistency checks
116 when the file system is mounted. Currently, the kernel code checks
117 that the blocks and inodes bitmaps are consistent with the free
118 blocks and inodes counts. More checks will be added in future
119 releases.
120 `sync' tells the kernel code to use synchronous writes when updating
121 an inode, a bitmap, a directory entry or an indirect block. This
122 can make the file system much slower but can be a big win for files
123 recovery in case of a crash (and we can now say to the BSD folks
124 that Linux also supports synchronous updates :-).
125 `sb=#' tells the kernel code to use an alternate super block instead
126 of its master copy. `#' is the number of the block (counted in
127 1024 bytes blocks) which contains the alternate super block.
128 An ext2 file system typically contains backups of the super block
129 at blocks 8193, 16385, and so on.
130 - I have change the meaning of the valid flag used by e2fsck. it
131 now contains the state of the file system. If the kernel code
132 detects an inconsistency while the file system is mounted, it flags
133 it as erroneous and e2fsck will detect that on next run.
134 - The super block now contains a mount counter. This counter is
135 incremented each time the file system is mounted read/write. When
136 this counter becomes bigger than a maximal mount counts (also stored
137 in the super block), e2fsck checks the file system, even if it had
138 been unmounted cleanly, and resets this counter to 0.
139 - File attributes are now supported. One can associate a set of
140 attributes to a file. Three attributes are defined:
141 `c': the file is marked for automatic compression,
142 `s': the file is marked for secure deletion: when the file is
143 deleted, its blocks are zeroed and written back to the disk,
144 `u': the file is marked for undeletion: when the file is deleted,
145 its contents are saved to allow a future undeletion.
146 Currently, only the `s' attribute is implemented in the kernel
147 code. Support for the other attributes will be added in a future
148 release.
149 - a few bugs related to times updates have been fixed by Bruce
150 Evans and me.
151 - a bug related to the links count of deleted inodes has been fixed.
152 Previous versions used to keep the links count set to 1 when a file
153 was deleted. The new version now sets links_count to 0 when deleting
154 the last link.
155 - a race condition when deallocating an inode has been fixed by
156 Stephen Tweedie.
157
diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c
index 6591abef64d..bb690806649 100644
--- a/fs/ext2/balloc.c
+++ b/fs/ext2/balloc.c
@@ -624,76 +624,3 @@ unsigned long ext2_bg_num_gdb(struct super_block *sb, int group)
624 return EXT2_SB(sb)->s_gdb_count; 624 return EXT2_SB(sb)->s_gdb_count;
625} 625}
626 626
627#ifdef CONFIG_EXT2_CHECK
628/* Called at mount-time, super-block is locked */
629void ext2_check_blocks_bitmap (struct super_block * sb)
630{
631 struct buffer_head *bitmap_bh = NULL;
632 struct ext2_super_block * es;
633 unsigned long desc_count, bitmap_count, x, j;
634 unsigned long desc_blocks;
635 struct ext2_group_desc * desc;
636 int i;
637
638 es = EXT2_SB(sb)->s_es;
639 desc_count = 0;
640 bitmap_count = 0;
641 desc = NULL;
642 for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) {
643 desc = ext2_get_group_desc (sb, i, NULL);
644 if (!desc)
645 continue;
646 desc_count += le16_to_cpu(desc->bg_free_blocks_count);
647 brelse(bitmap_bh);
648 bitmap_bh = read_block_bitmap(sb, i);
649 if (!bitmap_bh)
650 continue;
651
652 if (ext2_bg_has_super(sb, i) &&
653 !ext2_test_bit(0, bitmap_bh->b_data))
654 ext2_error(sb, __FUNCTION__,
655 "Superblock in group %d is marked free", i);
656
657 desc_blocks = ext2_bg_num_gdb(sb, i);
658 for (j = 0; j < desc_blocks; j++)
659 if (!ext2_test_bit(j + 1, bitmap_bh->b_data))
660 ext2_error(sb, __FUNCTION__,
661 "Descriptor block #%ld in group "
662 "%d is marked free", j, i);
663
664 if (!block_in_use(le32_to_cpu(desc->bg_block_bitmap),
665 sb, bitmap_bh->b_data))
666 ext2_error(sb, "ext2_check_blocks_bitmap",
667 "Block bitmap for group %d is marked free",
668 i);
669
670 if (!block_in_use(le32_to_cpu(desc->bg_inode_bitmap),
671 sb, bitmap_bh->b_data))
672 ext2_error(sb, "ext2_check_blocks_bitmap",
673 "Inode bitmap for group %d is marked free",
674 i);
675
676 for (j = 0; j < EXT2_SB(sb)->s_itb_per_group; j++)
677 if (!block_in_use(le32_to_cpu(desc->bg_inode_table) + j,
678 sb, bitmap_bh->b_data))
679 ext2_error (sb, "ext2_check_blocks_bitmap",
680 "Block #%ld of the inode table in "
681 "group %d is marked free", j, i);
682
683 x = ext2_count_free(bitmap_bh, sb->s_blocksize);
684 if (le16_to_cpu(desc->bg_free_blocks_count) != x)
685 ext2_error (sb, "ext2_check_blocks_bitmap",
686 "Wrong free blocks count for group %d, "
687 "stored = %d, counted = %lu", i,
688 le16_to_cpu(desc->bg_free_blocks_count), x);
689 bitmap_count += x;
690 }
691 if (le32_to_cpu(es->s_free_blocks_count) != bitmap_count)
692 ext2_error (sb, "ext2_check_blocks_bitmap",
693 "Wrong free blocks count in super block, "
694 "stored = %lu, counted = %lu",
695 (unsigned long)le32_to_cpu(es->s_free_blocks_count),
696 bitmap_count);
697 brelse(bitmap_bh);
698}
699#endif
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index e2d6208633a..74714af4ae6 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -700,43 +700,3 @@ unsigned long ext2_count_dirs (struct super_block * sb)
700 return count; 700 return count;
701} 701}
702 702
703#ifdef CONFIG_EXT2_CHECK
704/* Called at mount-time, super-block is locked */
705void ext2_check_inodes_bitmap (struct super_block * sb)
706{
707 struct ext2_super_block * es = EXT2_SB(sb)->s_es;
708 unsigned long desc_count = 0, bitmap_count = 0;
709 struct buffer_head *bitmap_bh = NULL;
710 int i;
711
712 for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) {
713 struct ext2_group_desc *desc;
714 unsigned x;
715
716 desc = ext2_get_group_desc(sb, i, NULL);
717 if (!desc)
718 continue;
719 desc_count += le16_to_cpu(desc->bg_free_inodes_count);
720 brelse(bitmap_bh);
721 bitmap_bh = read_inode_bitmap(sb, i);
722 if (!bitmap_bh)
723 continue;
724
725 x = ext2_count_free(bitmap_bh, EXT2_INODES_PER_GROUP(sb) / 8);
726 if (le16_to_cpu(desc->bg_free_inodes_count) != x)
727 ext2_error (sb, "ext2_check_inodes_bitmap",
728 "Wrong free inodes count in group %d, "
729 "stored = %d, counted = %lu", i,
730 le16_to_cpu(desc->bg_free_inodes_count), x);
731 bitmap_count += x;
732 }
733 brelse(bitmap_bh);
734 if (percpu_counter_read(&EXT2_SB(sb)->s_freeinodes_counter) !=
735 bitmap_count)
736 ext2_error(sb, "ext2_check_inodes_bitmap",
737 "Wrong free inodes count in super block, "
738 "stored = %lu, counted = %lu",
739 (unsigned long)le32_to_cpu(es->s_free_inodes_count),
740 bitmap_count);
741}
742#endif
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 3c0c7c6a5b4..e4ed4b31a43 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -281,7 +281,7 @@ static unsigned long get_sb_block(void **data)
281enum { 281enum {
282 Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid, 282 Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid,
283 Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, 283 Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic,
284 Opt_err_ro, Opt_nouid32, Opt_check, Opt_nocheck, Opt_debug, 284 Opt_err_ro, Opt_nouid32, Opt_nocheck, Opt_debug,
285 Opt_oldalloc, Opt_orlov, Opt_nobh, Opt_user_xattr, Opt_nouser_xattr, 285 Opt_oldalloc, Opt_orlov, Opt_nobh, Opt_user_xattr, Opt_nouser_xattr,
286 Opt_acl, Opt_noacl, Opt_xip, Opt_ignore, Opt_err, Opt_quota, 286 Opt_acl, Opt_noacl, Opt_xip, Opt_ignore, Opt_err, Opt_quota,
287 Opt_usrquota, Opt_grpquota 287 Opt_usrquota, Opt_grpquota
@@ -303,7 +303,6 @@ static match_table_t tokens = {
303 {Opt_nouid32, "nouid32"}, 303 {Opt_nouid32, "nouid32"},
304 {Opt_nocheck, "check=none"}, 304 {Opt_nocheck, "check=none"},
305 {Opt_nocheck, "nocheck"}, 305 {Opt_nocheck, "nocheck"},
306 {Opt_check, "check"},
307 {Opt_debug, "debug"}, 306 {Opt_debug, "debug"},
308 {Opt_oldalloc, "oldalloc"}, 307 {Opt_oldalloc, "oldalloc"},
309 {Opt_orlov, "orlov"}, 308 {Opt_orlov, "orlov"},
@@ -376,13 +375,6 @@ static int parse_options (char * options,
376 case Opt_nouid32: 375 case Opt_nouid32:
377 set_opt (sbi->s_mount_opt, NO_UID32); 376 set_opt (sbi->s_mount_opt, NO_UID32);
378 break; 377 break;
379 case Opt_check:
380#ifdef CONFIG_EXT2_CHECK
381 set_opt (sbi->s_mount_opt, CHECK);
382#else
383 printk("EXT2 Check option not supported\n");
384#endif
385 break;
386 case Opt_nocheck: 378 case Opt_nocheck:
387 clear_opt (sbi->s_mount_opt, CHECK); 379 clear_opt (sbi->s_mount_opt, CHECK);
388 break; 380 break;
@@ -503,12 +495,6 @@ static int ext2_setup_super (struct super_block * sb,
503 EXT2_BLOCKS_PER_GROUP(sb), 495 EXT2_BLOCKS_PER_GROUP(sb),
504 EXT2_INODES_PER_GROUP(sb), 496 EXT2_INODES_PER_GROUP(sb),
505 sbi->s_mount_opt); 497 sbi->s_mount_opt);
506#ifdef CONFIG_EXT2_CHECK
507 if (test_opt (sb, CHECK)) {
508 ext2_check_blocks_bitmap (sb);
509 ext2_check_inodes_bitmap (sb);
510 }
511#endif
512 return res; 498 return res;
513} 499}
514 500
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index 7992d21e0e0..ae1148c24c5 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -1517,76 +1517,3 @@ unsigned long ext3_bg_num_gdb(struct super_block *sb, int group)
1517 return EXT3_SB(sb)->s_gdb_count; 1517 return EXT3_SB(sb)->s_gdb_count;
1518} 1518}
1519 1519
1520#ifdef CONFIG_EXT3_CHECK
1521/* Called at mount-time, super-block is locked */
1522void ext3_check_blocks_bitmap (struct super_block * sb)
1523{
1524 struct ext3_super_block *es;
1525 unsigned long desc_count, bitmap_count, x, j;
1526 unsigned long desc_blocks;
1527 struct buffer_head *bitmap_bh = NULL;
1528 struct ext3_group_desc *gdp;
1529 int i;
1530
1531 es = EXT3_SB(sb)->s_es;
1532 desc_count = 0;
1533 bitmap_count = 0;
1534 gdp = NULL;
1535 for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) {
1536 gdp = ext3_get_group_desc (sb, i, NULL);
1537 if (!gdp)
1538 continue;
1539 desc_count += le16_to_cpu(gdp->bg_free_blocks_count);
1540 brelse(bitmap_bh);
1541 bitmap_bh = read_block_bitmap(sb, i);
1542 if (bitmap_bh == NULL)
1543 continue;
1544
1545 if (ext3_bg_has_super(sb, i) &&
1546 !ext3_test_bit(0, bitmap_bh->b_data))
1547 ext3_error(sb, __FUNCTION__,
1548 "Superblock in group %d is marked free", i);
1549
1550 desc_blocks = ext3_bg_num_gdb(sb, i);
1551 for (j = 0; j < desc_blocks; j++)
1552 if (!ext3_test_bit(j + 1, bitmap_bh->b_data))
1553 ext3_error(sb, __FUNCTION__,
1554 "Descriptor block #%ld in group "
1555 "%d is marked free", j, i);
1556
1557 if (!block_in_use (le32_to_cpu(gdp->bg_block_bitmap),
1558 sb, bitmap_bh->b_data))
1559 ext3_error (sb, "ext3_check_blocks_bitmap",
1560 "Block bitmap for group %d is marked free",
1561 i);
1562
1563 if (!block_in_use (le32_to_cpu(gdp->bg_inode_bitmap),
1564 sb, bitmap_bh->b_data))
1565 ext3_error (sb, "ext3_check_blocks_bitmap",
1566 "Inode bitmap for group %d is marked free",
1567 i);
1568
1569 for (j = 0; j < EXT3_SB(sb)->s_itb_per_group; j++)
1570 if (!block_in_use (le32_to_cpu(gdp->bg_inode_table) + j,
1571 sb, bitmap_bh->b_data))
1572 ext3_error (sb, "ext3_check_blocks_bitmap",
1573 "Block #%d of the inode table in "
1574 "group %d is marked free", j, i);
1575
1576 x = ext3_count_free(bitmap_bh, sb->s_blocksize);
1577 if (le16_to_cpu(gdp->bg_free_blocks_count) != x)
1578 ext3_error (sb, "ext3_check_blocks_bitmap",
1579 "Wrong free blocks count for group %d, "
1580 "stored = %d, counted = %lu", i,
1581 le16_to_cpu(gdp->bg_free_blocks_count), x);
1582 bitmap_count += x;
1583 }
1584 brelse(bitmap_bh);
1585 if (le32_to_cpu(es->s_free_blocks_count) != bitmap_count)
1586 ext3_error (sb, "ext3_check_blocks_bitmap",
1587 "Wrong free blocks count in super block, "
1588 "stored = %lu, counted = %lu",
1589 (unsigned long)le32_to_cpu(es->s_free_blocks_count),
1590 bitmap_count);
1591}
1592#endif
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index df3f517c54a..9e4a2437621 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -756,44 +756,3 @@ unsigned long ext3_count_dirs (struct super_block * sb)
756 return count; 756 return count;
757} 757}
758 758
759#ifdef CONFIG_EXT3_CHECK
760/* Called at mount-time, super-block is locked */
761void ext3_check_inodes_bitmap (struct super_block * sb)
762{
763 struct ext3_super_block * es;
764 unsigned long desc_count, bitmap_count, x;
765 struct buffer_head *bitmap_bh = NULL;
766 struct ext3_group_desc * gdp;
767 int i;
768
769 es = EXT3_SB(sb)->s_es;
770 desc_count = 0;
771 bitmap_count = 0;
772 gdp = NULL;
773 for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) {
774 gdp = ext3_get_group_desc (sb, i, NULL);
775 if (!gdp)
776 continue;
777 desc_count += le16_to_cpu(gdp->bg_free_inodes_count);
778 brelse(bitmap_bh);
779 bitmap_bh = read_inode_bitmap(sb, i);
780 if (!bitmap_bh)
781 continue;
782
783 x = ext3_count_free(bitmap_bh, EXT3_INODES_PER_GROUP(sb) / 8);
784 if (le16_to_cpu(gdp->bg_free_inodes_count) != x)
785 ext3_error (sb, "ext3_check_inodes_bitmap",
786 "Wrong free inodes count in group %d, "
787 "stored = %d, counted = %lu", i,
788 le16_to_cpu(gdp->bg_free_inodes_count), x);
789 bitmap_count += x;
790 }
791 brelse(bitmap_bh);
792 if (le32_to_cpu(es->s_free_inodes_count) != bitmap_count)
793 ext3_error (sb, "ext3_check_inodes_bitmap",
794 "Wrong free inodes count in super block, "
795 "stored = %lu, counted = %lu",
796 (unsigned long)le32_to_cpu(es->s_free_inodes_count),
797 bitmap_count);
798}
799#endif
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index f594989ccb7..4e6730622d9 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -625,7 +625,7 @@ static struct export_operations ext3_export_ops = {
625enum { 625enum {
626 Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid, 626 Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid,
627 Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro, 627 Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro,
628 Opt_nouid32, Opt_check, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov, 628 Opt_nouid32, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov,
629 Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, 629 Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
630 Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, 630 Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh,
631 Opt_commit, Opt_journal_update, Opt_journal_inum, 631 Opt_commit, Opt_journal_update, Opt_journal_inum,
@@ -652,7 +652,6 @@ static match_table_t tokens = {
652 {Opt_nouid32, "nouid32"}, 652 {Opt_nouid32, "nouid32"},
653 {Opt_nocheck, "nocheck"}, 653 {Opt_nocheck, "nocheck"},
654 {Opt_nocheck, "check=none"}, 654 {Opt_nocheck, "check=none"},
655 {Opt_check, "check"},
656 {Opt_debug, "debug"}, 655 {Opt_debug, "debug"},
657 {Opt_oldalloc, "oldalloc"}, 656 {Opt_oldalloc, "oldalloc"},
658 {Opt_orlov, "orlov"}, 657 {Opt_orlov, "orlov"},
@@ -773,14 +772,6 @@ static int parse_options (char * options, struct super_block *sb,
773 case Opt_nouid32: 772 case Opt_nouid32:
774 set_opt (sbi->s_mount_opt, NO_UID32); 773 set_opt (sbi->s_mount_opt, NO_UID32);
775 break; 774 break;
776 case Opt_check:
777#ifdef CONFIG_EXT3_CHECK
778 set_opt (sbi->s_mount_opt, CHECK);
779#else
780 printk(KERN_ERR
781 "EXT3 Check option not supported\n");
782#endif
783 break;
784 case Opt_nocheck: 775 case Opt_nocheck:
785 clear_opt (sbi->s_mount_opt, CHECK); 776 clear_opt (sbi->s_mount_opt, CHECK);
786 break; 777 break;
@@ -1115,12 +1106,6 @@ static int ext3_setup_super(struct super_block *sb, struct ext3_super_block *es,
1115 } else { 1106 } else {
1116 printk("internal journal\n"); 1107 printk("internal journal\n");
1117 } 1108 }
1118#ifdef CONFIG_EXT3_CHECK
1119 if (test_opt (sb, CHECK)) {
1120 ext3_check_blocks_bitmap (sb);
1121 ext3_check_inodes_bitmap (sb);
1122 }
1123#endif
1124 return res; 1109 return res;
1125} 1110}
1126 1111
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index e2effe2dc9b..a0f9b9fe130 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -846,7 +846,7 @@ static match_table_t vfat_tokens = {
846 {Opt_err, NULL} 846 {Opt_err, NULL}
847}; 847};
848 848
849static int parse_options(char *options, int is_vfat, int *debug, 849static int parse_options(char *options, int is_vfat, int silent, int *debug,
850 struct fat_mount_options *opts) 850 struct fat_mount_options *opts)
851{ 851{
852 char *p; 852 char *p;
@@ -1008,8 +1008,11 @@ static int parse_options(char *options, int is_vfat, int *debug,
1008 break; 1008 break;
1009 /* unknown option */ 1009 /* unknown option */
1010 default: 1010 default:
1011 printk(KERN_ERR "FAT: Unrecognized mount option \"%s\" " 1011 if (!silent) {
1012 "or missing value\n", p); 1012 printk(KERN_ERR
1013 "FAT: Unrecognized mount option \"%s\" "
1014 "or missing value\n", p);
1015 }
1013 return -EINVAL; 1016 return -EINVAL;
1014 } 1017 }
1015 } 1018 }
@@ -1091,7 +1094,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
1091 sb->s_export_op = &fat_export_ops; 1094 sb->s_export_op = &fat_export_ops;
1092 sbi->dir_ops = fs_dir_inode_ops; 1095 sbi->dir_ops = fs_dir_inode_ops;
1093 1096
1094 error = parse_options(data, isvfat, &debug, &sbi->options); 1097 error = parse_options(data, isvfat, silent, &debug, &sbi->options);
1095 if (error) 1098 if (error)
1096 goto out_fail; 1099 goto out_fail;
1097 1100
diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h
index aae019aadf8..cc5dcd52e23 100644
--- a/fs/hfs/hfs_fs.h
+++ b/fs/hfs/hfs_fs.h
@@ -9,7 +9,6 @@
9#ifndef _LINUX_HFS_FS_H 9#ifndef _LINUX_HFS_FS_H
10#define _LINUX_HFS_FS_H 10#define _LINUX_HFS_FS_H
11 11
12#include <linux/version.h>
13#include <linux/slab.h> 12#include <linux/slab.h>
14#include <linux/types.h> 13#include <linux/types.h>
15#include <linux/buffer_head.h> 14#include <linux/buffer_head.h>
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 3f680c5675b..d499393a8ae 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -12,7 +12,6 @@
12 */ 12 */
13 13
14#include <linux/pagemap.h> 14#include <linux/pagemap.h>
15#include <linux/version.h>
16#include <linux/mpage.h> 15#include <linux/mpage.h>
17 16
18#include "hfs_fs.h" 17#include "hfs_fs.h"
diff --git a/fs/hfsplus/bnode.c b/fs/hfsplus/bnode.c
index b85abc6e6f8..930cd9212de 100644
--- a/fs/hfsplus/bnode.c
+++ b/fs/hfsplus/bnode.c
@@ -13,7 +13,6 @@
13#include <linux/pagemap.h> 13#include <linux/pagemap.h>
14#include <linux/fs.h> 14#include <linux/fs.h>
15#include <linux/swap.h> 15#include <linux/swap.h>
16#include <linux/version.h>
17 16
18#include "hfsplus_fs.h" 17#include "hfsplus_fs.h"
19#include "hfsplus_raw.h" 18#include "hfsplus_raw.h"
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index 7bda76667a4..50c8f44b6c6 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -13,7 +13,6 @@
13#include <linux/sched.h> 13#include <linux/sched.h>
14#include <linux/slab.h> 14#include <linux/slab.h>
15#include <linux/random.h> 15#include <linux/random.h>
16#include <linux/version.h>
17 16
18#include "hfsplus_fs.h" 17#include "hfsplus_fs.h"
19#include "hfsplus_raw.h" 18#include "hfsplus_raw.h"
diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c
index e7235ca79a9..e3ff56a0301 100644
--- a/fs/hfsplus/extents.c
+++ b/fs/hfsplus/extents.c
@@ -11,7 +11,6 @@
11#include <linux/errno.h> 11#include <linux/errno.h>
12#include <linux/fs.h> 12#include <linux/fs.h>
13#include <linux/pagemap.h> 13#include <linux/pagemap.h>
14#include <linux/version.h>
15 14
16#include "hfsplus_fs.h" 15#include "hfsplus_fs.h"
17#include "hfsplus_raw.h" 16#include "hfsplus_raw.h"
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index 2bc0cdd30e5..c60e5635498 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -11,7 +11,6 @@
11#define _LINUX_HFSPLUS_FS_H 11#define _LINUX_HFSPLUS_FS_H
12 12
13#include <linux/fs.h> 13#include <linux/fs.h>
14#include <linux/version.h>
15#include <linux/buffer_head.h> 14#include <linux/buffer_head.h>
16#include "hfsplus_raw.h" 15#include "hfsplus_raw.h"
17 16
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index f205773ddfb..fc98583cf04 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -11,7 +11,6 @@
11#include <linux/mm.h> 11#include <linux/mm.h>
12#include <linux/fs.h> 12#include <linux/fs.h>
13#include <linux/pagemap.h> 13#include <linux/pagemap.h>
14#include <linux/version.h>
15#include <linux/mpage.h> 14#include <linux/mpage.h>
16 15
17#include "hfsplus_fs.h" 16#include "hfsplus_fs.h"
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 452fc1fdbd3..0ce1c455ae5 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -14,7 +14,6 @@
14#include <linux/fs.h> 14#include <linux/fs.h>
15#include <linux/sched.h> 15#include <linux/sched.h>
16#include <linux/slab.h> 16#include <linux/slab.h>
17#include <linux/version.h>
18#include <linux/vfs.h> 17#include <linux/vfs.h>
19#include <linux/nls.h> 18#include <linux/nls.h>
20 19
diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c
index 0c51d6338b0..95455e83923 100644
--- a/fs/hfsplus/wrapper.c
+++ b/fs/hfsplus/wrapper.c
@@ -12,7 +12,6 @@
12#include <linux/blkdev.h> 12#include <linux/blkdev.h>
13#include <linux/cdrom.h> 13#include <linux/cdrom.h>
14#include <linux/genhd.h> 14#include <linux/genhd.h>
15#include <linux/version.h>
16#include <asm/unaligned.h> 15#include <asm/unaligned.h>
17 16
18#include "hfsplus_fs.h" 17#include "hfsplus_fs.h"
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index a33fb1d9137..4684eb7d48c 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -8,7 +8,6 @@
8 8
9#include <linux/stddef.h> 9#include <linux/stddef.h>
10#include <linux/fs.h> 10#include <linux/fs.h>
11#include <linux/version.h>
12#include <linux/module.h> 11#include <linux/module.h>
13#include <linux/init.h> 12#include <linux/init.h>
14#include <linux/slab.h> 13#include <linux/slab.h>
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c
index ab144dabd87..7c995ac4081 100644
--- a/fs/hpfs/file.c
+++ b/fs/hpfs/file.c
@@ -114,11 +114,8 @@ static ssize_t hpfs_file_write(struct file *file, const char __user *buf,
114 ssize_t retval; 114 ssize_t retval;
115 115
116 retval = generic_file_write(file, buf, count, ppos); 116 retval = generic_file_write(file, buf, count, ppos);
117 if (retval > 0) { 117 if (retval > 0)
118 struct inode *inode = file->f_dentry->d_inode; 118 hpfs_i(file->f_dentry->d_inode)->i_dirty = 1;
119 inode->i_mtime = CURRENT_TIME_SEC;
120 hpfs_i(inode)->i_dirty = 1;
121 }
122 return retval; 119 return retval;
123} 120}
124 121
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index e026c807e6b..64983ab5558 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -63,7 +63,7 @@ static void huge_pagevec_release(struct pagevec *pvec)
63 * 63 *
64 * Result is in bytes to be compatible with is_hugepage_mem_enough() 64 * Result is in bytes to be compatible with is_hugepage_mem_enough()
65 */ 65 */
66unsigned long 66static unsigned long
67huge_pages_needed(struct address_space *mapping, struct vm_area_struct *vma) 67huge_pages_needed(struct address_space *mapping, struct vm_area_struct *vma)
68{ 68{
69 int i; 69 int i;
diff --git a/fs/inotify.c b/fs/inotify.c
index 9fbaebfdf40..bf7ce1d2412 100644
--- a/fs/inotify.c
+++ b/fs/inotify.c
@@ -372,7 +372,7 @@ static int find_inode(const char __user *dirname, struct nameidata *nd)
372 if (error) 372 if (error)
373 return error; 373 return error;
374 /* you can only watch an inode if you have read permissions on it */ 374 /* you can only watch an inode if you have read permissions on it */
375 error = permission(nd->dentry->d_inode, MAY_READ, NULL); 375 error = vfs_permission(nd, MAY_READ);
376 if (error) 376 if (error)
377 path_release(nd); 377 path_release(nd);
378 return error; 378 return error;
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 1abe7343f92..4abbe860430 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -827,6 +827,7 @@ static int jfs_link(struct dentry *old_dentry,
827 /* update object inode */ 827 /* update object inode */
828 ip->i_nlink++; /* for new link */ 828 ip->i_nlink++; /* for new link */
829 ip->i_ctime = CURRENT_TIME; 829 ip->i_ctime = CURRENT_TIME;
830 dir->i_ctime = dir->i_mtime = CURRENT_TIME;
830 mark_inode_dirty(dir); 831 mark_inode_dirty(dir);
831 atomic_inc(&ip->i_count); 832 atomic_inc(&ip->i_count);
832 833
@@ -1024,6 +1025,8 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
1024 insert_inode_hash(ip); 1025 insert_inode_hash(ip);
1025 mark_inode_dirty(ip); 1026 mark_inode_dirty(ip);
1026 1027
1028 dip->i_ctime = dip->i_mtime = CURRENT_TIME;
1029 mark_inode_dirty(dip);
1027 /* 1030 /*
1028 * commit update of parent directory and link object 1031 * commit update of parent directory and link object
1029 */ 1032 */
diff --git a/fs/namei.c b/fs/namei.c
index b3f8a1966c9..6dbbd42d8b9 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -256,6 +256,38 @@ int permission(struct inode *inode, int mask, struct nameidata *nd)
256 return security_inode_permission(inode, mask, nd); 256 return security_inode_permission(inode, mask, nd);
257} 257}
258 258
259/**
260 * vfs_permission - check for access rights to a given path
261 * @nd: lookup result that describes the path
262 * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
263 *
264 * Used to check for read/write/execute permissions on a path.
265 * We use "fsuid" for this, letting us set arbitrary permissions
266 * for filesystem access without changing the "normal" uids which
267 * are used for other things.
268 */
269int vfs_permission(struct nameidata *nd, int mask)
270{
271 return permission(nd->dentry->d_inode, mask, nd);
272}
273
274/**
275 * file_permission - check for additional access rights to a given file
276 * @file: file to check access rights for
277 * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
278 *
279 * Used to check for read/write/execute permissions on an already opened
280 * file.
281 *
282 * Note:
283 * Do not use this function in new code. All access checks should
284 * be done using vfs_permission().
285 */
286int file_permission(struct file *file, int mask)
287{
288 return permission(file->f_dentry->d_inode, mask, NULL);
289}
290
259/* 291/*
260 * get_write_access() gets write permission for a file. 292 * get_write_access() gets write permission for a file.
261 * put_write_access() releases this write permission. 293 * put_write_access() releases this write permission.
@@ -765,9 +797,8 @@ static fastcall int __link_path_walk(const char * name, struct nameidata *nd)
765 797
766 nd->flags |= LOOKUP_CONTINUE; 798 nd->flags |= LOOKUP_CONTINUE;
767 err = exec_permission_lite(inode, nd); 799 err = exec_permission_lite(inode, nd);
768 if (err == -EAGAIN) { 800 if (err == -EAGAIN)
769 err = permission(inode, MAY_EXEC, nd); 801 err = vfs_permission(nd, MAY_EXEC);
770 }
771 if (err) 802 if (err)
772 break; 803 break;
773 804
@@ -1109,8 +1140,9 @@ int path_lookup_open(const char *name, unsigned int lookup_flags,
1109 * @open_flags: open intent flags 1140 * @open_flags: open intent flags
1110 * @create_mode: create intent flags 1141 * @create_mode: create intent flags
1111 */ 1142 */
1112int path_lookup_create(const char *name, unsigned int lookup_flags, 1143static int path_lookup_create(const char *name, unsigned int lookup_flags,
1113 struct nameidata *nd, int open_flags, int create_mode) 1144 struct nameidata *nd, int open_flags,
1145 int create_mode)
1114{ 1146{
1115 return __path_lookup_intent_open(name, lookup_flags|LOOKUP_CREATE, nd, 1147 return __path_lookup_intent_open(name, lookup_flags|LOOKUP_CREATE, nd,
1116 open_flags, create_mode); 1148 open_flags, create_mode);
@@ -1173,9 +1205,9 @@ out:
1173 return dentry; 1205 return dentry;
1174} 1206}
1175 1207
1176struct dentry * lookup_hash(struct qstr *name, struct dentry * base) 1208struct dentry * lookup_hash(struct nameidata *nd)
1177{ 1209{
1178 return __lookup_hash(name, base, NULL); 1210 return __lookup_hash(&nd->last, nd->dentry, nd);
1179} 1211}
1180 1212
1181/* SMP-safe */ 1213/* SMP-safe */
@@ -1199,7 +1231,7 @@ struct dentry * lookup_one_len(const char * name, struct dentry * base, int len)
1199 } 1231 }
1200 this.hash = end_name_hash(hash); 1232 this.hash = end_name_hash(hash);
1201 1233
1202 return lookup_hash(&this, base); 1234 return __lookup_hash(&this, base, NULL);
1203access: 1235access:
1204 return ERR_PTR(-EACCES); 1236 return ERR_PTR(-EACCES);
1205} 1237}
@@ -1407,7 +1439,7 @@ int may_open(struct nameidata *nd, int acc_mode, int flag)
1407 if (S_ISDIR(inode->i_mode) && (flag & FMODE_WRITE)) 1439 if (S_ISDIR(inode->i_mode) && (flag & FMODE_WRITE))
1408 return -EISDIR; 1440 return -EISDIR;
1409 1441
1410 error = permission(inode, acc_mode, nd); 1442 error = vfs_permission(nd, acc_mode);
1411 if (error) 1443 if (error)
1412 return error; 1444 return error;
1413 1445
@@ -1532,7 +1564,7 @@ int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd)
1532 dir = nd->dentry; 1564 dir = nd->dentry;
1533 nd->flags &= ~LOOKUP_PARENT; 1565 nd->flags &= ~LOOKUP_PARENT;
1534 down(&dir->d_inode->i_sem); 1566 down(&dir->d_inode->i_sem);
1535 path.dentry = __lookup_hash(&nd->last, nd->dentry, nd); 1567 path.dentry = lookup_hash(nd);
1536 path.mnt = nd->mnt; 1568 path.mnt = nd->mnt;
1537 1569
1538do_last: 1570do_last:
@@ -1634,7 +1666,7 @@ do_link:
1634 } 1666 }
1635 dir = nd->dentry; 1667 dir = nd->dentry;
1636 down(&dir->d_inode->i_sem); 1668 down(&dir->d_inode->i_sem);
1637 path.dentry = __lookup_hash(&nd->last, nd->dentry, nd); 1669 path.dentry = lookup_hash(nd);
1638 path.mnt = nd->mnt; 1670 path.mnt = nd->mnt;
1639 __putname(nd->last.name); 1671 __putname(nd->last.name);
1640 goto do_last; 1672 goto do_last;
@@ -1666,7 +1698,7 @@ struct dentry *lookup_create(struct nameidata *nd, int is_dir)
1666 /* 1698 /*
1667 * Do the final lookup. 1699 * Do the final lookup.
1668 */ 1700 */
1669 dentry = lookup_hash(&nd->last, nd->dentry); 1701 dentry = lookup_hash(nd);
1670 if (IS_ERR(dentry)) 1702 if (IS_ERR(dentry))
1671 goto fail; 1703 goto fail;
1672 1704
@@ -1901,7 +1933,7 @@ asmlinkage long sys_rmdir(const char __user * pathname)
1901 goto exit1; 1933 goto exit1;
1902 } 1934 }
1903 down(&nd.dentry->d_inode->i_sem); 1935 down(&nd.dentry->d_inode->i_sem);
1904 dentry = lookup_hash(&nd.last, nd.dentry); 1936 dentry = lookup_hash(&nd);
1905 error = PTR_ERR(dentry); 1937 error = PTR_ERR(dentry);
1906 if (!IS_ERR(dentry)) { 1938 if (!IS_ERR(dentry)) {
1907 error = vfs_rmdir(nd.dentry->d_inode, dentry); 1939 error = vfs_rmdir(nd.dentry->d_inode, dentry);
@@ -1970,7 +2002,7 @@ asmlinkage long sys_unlink(const char __user * pathname)
1970 if (nd.last_type != LAST_NORM) 2002 if (nd.last_type != LAST_NORM)
1971 goto exit1; 2003 goto exit1;
1972 down(&nd.dentry->d_inode->i_sem); 2004 down(&nd.dentry->d_inode->i_sem);
1973 dentry = lookup_hash(&nd.last, nd.dentry); 2005 dentry = lookup_hash(&nd);
1974 error = PTR_ERR(dentry); 2006 error = PTR_ERR(dentry);
1975 if (!IS_ERR(dentry)) { 2007 if (!IS_ERR(dentry)) {
1976 /* Why not before? Because we want correct error value */ 2008 /* Why not before? Because we want correct error value */
@@ -2313,7 +2345,7 @@ static inline int do_rename(const char * oldname, const char * newname)
2313 2345
2314 trap = lock_rename(new_dir, old_dir); 2346 trap = lock_rename(new_dir, old_dir);
2315 2347
2316 old_dentry = lookup_hash(&oldnd.last, old_dir); 2348 old_dentry = lookup_hash(&oldnd);
2317 error = PTR_ERR(old_dentry); 2349 error = PTR_ERR(old_dentry);
2318 if (IS_ERR(old_dentry)) 2350 if (IS_ERR(old_dentry))
2319 goto exit3; 2351 goto exit3;
@@ -2333,7 +2365,7 @@ static inline int do_rename(const char * oldname, const char * newname)
2333 error = -EINVAL; 2365 error = -EINVAL;
2334 if (old_dentry == trap) 2366 if (old_dentry == trap)
2335 goto exit4; 2367 goto exit4;
2336 new_dentry = lookup_hash(&newnd.last, new_dir); 2368 new_dentry = lookup_hash(&newnd);
2337 error = PTR_ERR(new_dentry); 2369 error = PTR_ERR(new_dentry);
2338 if (IS_ERR(new_dentry)) 2370 if (IS_ERR(new_dentry))
2339 goto exit4; 2371 goto exit4;
@@ -2536,6 +2568,8 @@ EXPORT_SYMBOL(path_lookup);
2536EXPORT_SYMBOL(path_release); 2568EXPORT_SYMBOL(path_release);
2537EXPORT_SYMBOL(path_walk); 2569EXPORT_SYMBOL(path_walk);
2538EXPORT_SYMBOL(permission); 2570EXPORT_SYMBOL(permission);
2571EXPORT_SYMBOL(vfs_permission);
2572EXPORT_SYMBOL(file_permission);
2539EXPORT_SYMBOL(unlock_rename); 2573EXPORT_SYMBOL(unlock_rename);
2540EXPORT_SYMBOL(vfs_create); 2574EXPORT_SYMBOL(vfs_create);
2541EXPORT_SYMBOL(vfs_follow_link); 2575EXPORT_SYMBOL(vfs_follow_link);
diff --git a/fs/namespace.c b/fs/namespace.c
index 2fa9fdf7d6f..2019899f2ab 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -24,6 +24,7 @@
24#include <linux/mount.h> 24#include <linux/mount.h>
25#include <asm/uaccess.h> 25#include <asm/uaccess.h>
26#include <asm/unistd.h> 26#include <asm/unistd.h>
27#include "pnode.h"
27 28
28extern int __init init_rootfs(void); 29extern int __init init_rootfs(void);
29 30
@@ -37,33 +38,39 @@ static inline int sysfs_init(void)
37#endif 38#endif
38 39
39/* spinlock for vfsmount related operations, inplace of dcache_lock */ 40/* spinlock for vfsmount related operations, inplace of dcache_lock */
40 __cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock); 41__cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock);
42
43static int event;
41 44
42static struct list_head *mount_hashtable; 45static struct list_head *mount_hashtable;
43static int hash_mask __read_mostly, hash_bits __read_mostly; 46static int hash_mask __read_mostly, hash_bits __read_mostly;
44static kmem_cache_t *mnt_cache; 47static kmem_cache_t *mnt_cache;
48static struct rw_semaphore namespace_sem;
45 49
46static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry) 50static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry)
47{ 51{
48 unsigned long tmp = ((unsigned long) mnt / L1_CACHE_BYTES); 52 unsigned long tmp = ((unsigned long)mnt / L1_CACHE_BYTES);
49 tmp += ((unsigned long) dentry / L1_CACHE_BYTES); 53 tmp += ((unsigned long)dentry / L1_CACHE_BYTES);
50 tmp = tmp + (tmp >> hash_bits); 54 tmp = tmp + (tmp >> hash_bits);
51 return tmp & hash_mask; 55 return tmp & hash_mask;
52} 56}
53 57
54struct vfsmount *alloc_vfsmnt(const char *name) 58struct vfsmount *alloc_vfsmnt(const char *name)
55{ 59{
56 struct vfsmount *mnt = kmem_cache_alloc(mnt_cache, GFP_KERNEL); 60 struct vfsmount *mnt = kmem_cache_alloc(mnt_cache, GFP_KERNEL);
57 if (mnt) { 61 if (mnt) {
58 memset(mnt, 0, sizeof(struct vfsmount)); 62 memset(mnt, 0, sizeof(struct vfsmount));
59 atomic_set(&mnt->mnt_count,1); 63 atomic_set(&mnt->mnt_count, 1);
60 INIT_LIST_HEAD(&mnt->mnt_hash); 64 INIT_LIST_HEAD(&mnt->mnt_hash);
61 INIT_LIST_HEAD(&mnt->mnt_child); 65 INIT_LIST_HEAD(&mnt->mnt_child);
62 INIT_LIST_HEAD(&mnt->mnt_mounts); 66 INIT_LIST_HEAD(&mnt->mnt_mounts);
63 INIT_LIST_HEAD(&mnt->mnt_list); 67 INIT_LIST_HEAD(&mnt->mnt_list);
64 INIT_LIST_HEAD(&mnt->mnt_expire); 68 INIT_LIST_HEAD(&mnt->mnt_expire);
69 INIT_LIST_HEAD(&mnt->mnt_share);
70 INIT_LIST_HEAD(&mnt->mnt_slave_list);
71 INIT_LIST_HEAD(&mnt->mnt_slave);
65 if (name) { 72 if (name) {
66 int size = strlen(name)+1; 73 int size = strlen(name) + 1;
67 char *newname = kmalloc(size, GFP_KERNEL); 74 char *newname = kmalloc(size, GFP_KERNEL);
68 if (newname) { 75 if (newname) {
69 memcpy(newname, name, size); 76 memcpy(newname, name, size);
@@ -81,36 +88,65 @@ void free_vfsmnt(struct vfsmount *mnt)
81} 88}
82 89
83/* 90/*
84 * Now, lookup_mnt increments the ref count before returning 91 * find the first or last mount at @dentry on vfsmount @mnt depending on
85 * the vfsmount struct. 92 * @dir. If @dir is set return the first mount else return the last mount.
86 */ 93 */
87struct vfsmount *lookup_mnt(struct vfsmount *mnt, struct dentry *dentry) 94struct vfsmount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry,
95 int dir)
88{ 96{
89 struct list_head * head = mount_hashtable + hash(mnt, dentry); 97 struct list_head *head = mount_hashtable + hash(mnt, dentry);
90 struct list_head * tmp = head; 98 struct list_head *tmp = head;
91 struct vfsmount *p, *found = NULL; 99 struct vfsmount *p, *found = NULL;
92 100
93 spin_lock(&vfsmount_lock);
94 for (;;) { 101 for (;;) {
95 tmp = tmp->next; 102 tmp = dir ? tmp->next : tmp->prev;
96 p = NULL; 103 p = NULL;
97 if (tmp == head) 104 if (tmp == head)
98 break; 105 break;
99 p = list_entry(tmp, struct vfsmount, mnt_hash); 106 p = list_entry(tmp, struct vfsmount, mnt_hash);
100 if (p->mnt_parent == mnt && p->mnt_mountpoint == dentry) { 107 if (p->mnt_parent == mnt && p->mnt_mountpoint == dentry) {
101 found = mntget(p); 108 found = p;
102 break; 109 break;
103 } 110 }
104 } 111 }
105 spin_unlock(&vfsmount_lock);
106 return found; 112 return found;
107} 113}
108 114
115/*
116 * lookup_mnt increments the ref count before returning
117 * the vfsmount struct.
118 */
119struct vfsmount *lookup_mnt(struct vfsmount *mnt, struct dentry *dentry)
120{
121 struct vfsmount *child_mnt;
122 spin_lock(&vfsmount_lock);
123 if ((child_mnt = __lookup_mnt(mnt, dentry, 1)))
124 mntget(child_mnt);
125 spin_unlock(&vfsmount_lock);
126 return child_mnt;
127}
128
109static inline int check_mnt(struct vfsmount *mnt) 129static inline int check_mnt(struct vfsmount *mnt)
110{ 130{
111 return mnt->mnt_namespace == current->namespace; 131 return mnt->mnt_namespace == current->namespace;
112} 132}
113 133
134static void touch_namespace(struct namespace *ns)
135{
136 if (ns) {
137 ns->event = ++event;
138 wake_up_interruptible(&ns->poll);
139 }
140}
141
142static void __touch_namespace(struct namespace *ns)
143{
144 if (ns && ns->event != event) {
145 ns->event = event;
146 wake_up_interruptible(&ns->poll);
147 }
148}
149
114static void detach_mnt(struct vfsmount *mnt, struct nameidata *old_nd) 150static void detach_mnt(struct vfsmount *mnt, struct nameidata *old_nd)
115{ 151{
116 old_nd->dentry = mnt->mnt_mountpoint; 152 old_nd->dentry = mnt->mnt_mountpoint;
@@ -122,13 +158,43 @@ static void detach_mnt(struct vfsmount *mnt, struct nameidata *old_nd)
122 old_nd->dentry->d_mounted--; 158 old_nd->dentry->d_mounted--;
123} 159}
124 160
161void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry,
162 struct vfsmount *child_mnt)
163{
164 child_mnt->mnt_parent = mntget(mnt);
165 child_mnt->mnt_mountpoint = dget(dentry);
166 dentry->d_mounted++;
167}
168
125static void attach_mnt(struct vfsmount *mnt, struct nameidata *nd) 169static void attach_mnt(struct vfsmount *mnt, struct nameidata *nd)
126{ 170{
127 mnt->mnt_parent = mntget(nd->mnt); 171 mnt_set_mountpoint(nd->mnt, nd->dentry, mnt);
128 mnt->mnt_mountpoint = dget(nd->dentry); 172 list_add_tail(&mnt->mnt_hash, mount_hashtable +
129 list_add(&mnt->mnt_hash, mount_hashtable+hash(nd->mnt, nd->dentry)); 173 hash(nd->mnt, nd->dentry));
130 list_add_tail(&mnt->mnt_child, &nd->mnt->mnt_mounts); 174 list_add_tail(&mnt->mnt_child, &nd->mnt->mnt_mounts);
131 nd->dentry->d_mounted++; 175}
176
177/*
178 * the caller must hold vfsmount_lock
179 */
180static void commit_tree(struct vfsmount *mnt)
181{
182 struct vfsmount *parent = mnt->mnt_parent;
183 struct vfsmount *m;
184 LIST_HEAD(head);
185 struct namespace *n = parent->mnt_namespace;
186
187 BUG_ON(parent == mnt);
188
189 list_add_tail(&head, &mnt->mnt_list);
190 list_for_each_entry(m, &head, mnt_list)
191 m->mnt_namespace = n;
192 list_splice(&head, n->list.prev);
193
194 list_add_tail(&mnt->mnt_hash, mount_hashtable +
195 hash(parent, mnt->mnt_mountpoint));
196 list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
197 touch_namespace(n);
132} 198}
133 199
134static struct vfsmount *next_mnt(struct vfsmount *p, struct vfsmount *root) 200static struct vfsmount *next_mnt(struct vfsmount *p, struct vfsmount *root)
@@ -147,8 +213,18 @@ static struct vfsmount *next_mnt(struct vfsmount *p, struct vfsmount *root)
147 return list_entry(next, struct vfsmount, mnt_child); 213 return list_entry(next, struct vfsmount, mnt_child);
148} 214}
149 215
150static struct vfsmount * 216static struct vfsmount *skip_mnt_tree(struct vfsmount *p)
151clone_mnt(struct vfsmount *old, struct dentry *root) 217{
218 struct list_head *prev = p->mnt_mounts.prev;
219 while (prev != &p->mnt_mounts) {
220 p = list_entry(prev, struct vfsmount, mnt_child);
221 prev = p->mnt_mounts.prev;
222 }
223 return p;
224}
225
226static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root,
227 int flag)
152{ 228{
153 struct super_block *sb = old->mnt_sb; 229 struct super_block *sb = old->mnt_sb;
154 struct vfsmount *mnt = alloc_vfsmnt(old->mnt_devname); 230 struct vfsmount *mnt = alloc_vfsmnt(old->mnt_devname);
@@ -160,19 +236,34 @@ clone_mnt(struct vfsmount *old, struct dentry *root)
160 mnt->mnt_root = dget(root); 236 mnt->mnt_root = dget(root);
161 mnt->mnt_mountpoint = mnt->mnt_root; 237 mnt->mnt_mountpoint = mnt->mnt_root;
162 mnt->mnt_parent = mnt; 238 mnt->mnt_parent = mnt;
163 mnt->mnt_namespace = current->namespace; 239
240 if (flag & CL_SLAVE) {
241 list_add(&mnt->mnt_slave, &old->mnt_slave_list);
242 mnt->mnt_master = old;
243 CLEAR_MNT_SHARED(mnt);
244 } else {
245 if ((flag & CL_PROPAGATION) || IS_MNT_SHARED(old))
246 list_add(&mnt->mnt_share, &old->mnt_share);
247 if (IS_MNT_SLAVE(old))
248 list_add(&mnt->mnt_slave, &old->mnt_slave);
249 mnt->mnt_master = old->mnt_master;
250 }
251 if (flag & CL_MAKE_SHARED)
252 set_mnt_shared(mnt);
164 253
165 /* stick the duplicate mount on the same expiry list 254 /* stick the duplicate mount on the same expiry list
166 * as the original if that was on one */ 255 * as the original if that was on one */
167 spin_lock(&vfsmount_lock); 256 if (flag & CL_EXPIRE) {
168 if (!list_empty(&old->mnt_expire)) 257 spin_lock(&vfsmount_lock);
169 list_add(&mnt->mnt_expire, &old->mnt_expire); 258 if (!list_empty(&old->mnt_expire))
170 spin_unlock(&vfsmount_lock); 259 list_add(&mnt->mnt_expire, &old->mnt_expire);
260 spin_unlock(&vfsmount_lock);
261 }
171 } 262 }
172 return mnt; 263 return mnt;
173} 264}
174 265
175void __mntput(struct vfsmount *mnt) 266static inline void __mntput(struct vfsmount *mnt)
176{ 267{
177 struct super_block *sb = mnt->mnt_sb; 268 struct super_block *sb = mnt->mnt_sb;
178 dput(mnt->mnt_root); 269 dput(mnt->mnt_root);
@@ -180,7 +271,46 @@ void __mntput(struct vfsmount *mnt)
180 deactivate_super(sb); 271 deactivate_super(sb);
181} 272}
182 273
183EXPORT_SYMBOL(__mntput); 274void mntput_no_expire(struct vfsmount *mnt)
275{
276repeat:
277 if (atomic_dec_and_lock(&mnt->mnt_count, &vfsmount_lock)) {
278 if (likely(!mnt->mnt_pinned)) {
279 spin_unlock(&vfsmount_lock);
280 __mntput(mnt);
281 return;
282 }
283 atomic_add(mnt->mnt_pinned + 1, &mnt->mnt_count);
284 mnt->mnt_pinned = 0;
285 spin_unlock(&vfsmount_lock);
286 acct_auto_close_mnt(mnt);
287 security_sb_umount_close(mnt);
288 goto repeat;
289 }
290}
291
292EXPORT_SYMBOL(mntput_no_expire);
293
294void mnt_pin(struct vfsmount *mnt)
295{
296 spin_lock(&vfsmount_lock);
297 mnt->mnt_pinned++;
298 spin_unlock(&vfsmount_lock);
299}
300
301EXPORT_SYMBOL(mnt_pin);
302
303void mnt_unpin(struct vfsmount *mnt)
304{
305 spin_lock(&vfsmount_lock);
306 if (mnt->mnt_pinned) {
307 atomic_inc(&mnt->mnt_count);
308 mnt->mnt_pinned--;
309 }
310 spin_unlock(&vfsmount_lock);
311}
312
313EXPORT_SYMBOL(mnt_unpin);
184 314
185/* iterator */ 315/* iterator */
186static void *m_start(struct seq_file *m, loff_t *pos) 316static void *m_start(struct seq_file *m, loff_t *pos)
@@ -189,7 +319,7 @@ static void *m_start(struct seq_file *m, loff_t *pos)
189 struct list_head *p; 319 struct list_head *p;
190 loff_t l = *pos; 320 loff_t l = *pos;
191 321
192 down_read(&n->sem); 322 down_read(&namespace_sem);
193 list_for_each(p, &n->list) 323 list_for_each(p, &n->list)
194 if (!l--) 324 if (!l--)
195 return list_entry(p, struct vfsmount, mnt_list); 325 return list_entry(p, struct vfsmount, mnt_list);
@@ -201,13 +331,12 @@ static void *m_next(struct seq_file *m, void *v, loff_t *pos)
201 struct namespace *n = m->private; 331 struct namespace *n = m->private;
202 struct list_head *p = ((struct vfsmount *)v)->mnt_list.next; 332 struct list_head *p = ((struct vfsmount *)v)->mnt_list.next;
203 (*pos)++; 333 (*pos)++;
204 return p==&n->list ? NULL : list_entry(p, struct vfsmount, mnt_list); 334 return p == &n->list ? NULL : list_entry(p, struct vfsmount, mnt_list);
205} 335}
206 336
207static void m_stop(struct seq_file *m, void *v) 337static void m_stop(struct seq_file *m, void *v)
208{ 338{
209 struct namespace *n = m->private; 339 up_read(&namespace_sem);
210 up_read(&n->sem);
211} 340}
212 341
213static inline void mangle(struct seq_file *m, const char *s) 342static inline void mangle(struct seq_file *m, const char *s)
@@ -275,35 +404,14 @@ struct seq_operations mounts_op = {
275 */ 404 */
276int may_umount_tree(struct vfsmount *mnt) 405int may_umount_tree(struct vfsmount *mnt)
277{ 406{
278 struct list_head *next; 407 int actual_refs = 0;
279 struct vfsmount *this_parent = mnt; 408 int minimum_refs = 0;
280 int actual_refs; 409 struct vfsmount *p;
281 int minimum_refs;
282 410
283 spin_lock(&vfsmount_lock); 411 spin_lock(&vfsmount_lock);
284 actual_refs = atomic_read(&mnt->mnt_count); 412 for (p = mnt; p; p = next_mnt(p, mnt)) {
285 minimum_refs = 2;
286repeat:
287 next = this_parent->mnt_mounts.next;
288resume:
289 while (next != &this_parent->mnt_mounts) {
290 struct vfsmount *p = list_entry(next, struct vfsmount, mnt_child);
291
292 next = next->next;
293
294 actual_refs += atomic_read(&p->mnt_count); 413 actual_refs += atomic_read(&p->mnt_count);
295 minimum_refs += 2; 414 minimum_refs += 2;
296
297 if (!list_empty(&p->mnt_mounts)) {
298 this_parent = p;
299 goto repeat;
300 }
301 }
302
303 if (this_parent != mnt) {
304 next = this_parent->mnt_child.next;
305 this_parent = this_parent->mnt_parent;
306 goto resume;
307 } 415 }
308 spin_unlock(&vfsmount_lock); 416 spin_unlock(&vfsmount_lock);
309 417
@@ -330,45 +438,67 @@ EXPORT_SYMBOL(may_umount_tree);
330 */ 438 */
331int may_umount(struct vfsmount *mnt) 439int may_umount(struct vfsmount *mnt)
332{ 440{
333 if (atomic_read(&mnt->mnt_count) > 2) 441 int ret = 0;
334 return -EBUSY; 442 spin_lock(&vfsmount_lock);
335 return 0; 443 if (propagate_mount_busy(mnt, 2))
444 ret = -EBUSY;
445 spin_unlock(&vfsmount_lock);
446 return ret;
336} 447}
337 448
338EXPORT_SYMBOL(may_umount); 449EXPORT_SYMBOL(may_umount);
339 450
340static void umount_tree(struct vfsmount *mnt) 451void release_mounts(struct list_head *head)
452{
453 struct vfsmount *mnt;
454 while(!list_empty(head)) {
455 mnt = list_entry(head->next, struct vfsmount, mnt_hash);
456 list_del_init(&mnt->mnt_hash);
457 if (mnt->mnt_parent != mnt) {
458 struct dentry *dentry;
459 struct vfsmount *m;
460 spin_lock(&vfsmount_lock);
461 dentry = mnt->mnt_mountpoint;
462 m = mnt->mnt_parent;
463 mnt->mnt_mountpoint = mnt->mnt_root;
464 mnt->mnt_parent = mnt;
465 spin_unlock(&vfsmount_lock);
466 dput(dentry);
467 mntput(m);
468 }
469 mntput(mnt);
470 }
471}
472
473void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill)
341{ 474{
342 struct vfsmount *p; 475 struct vfsmount *p;
343 LIST_HEAD(kill);
344 476
345 for (p = mnt; p; p = next_mnt(p, mnt)) { 477 for (p = mnt; p; p = next_mnt(p, mnt)) {
346 list_del(&p->mnt_list); 478 list_del(&p->mnt_hash);
347 list_add(&p->mnt_list, &kill); 479 list_add(&p->mnt_hash, kill);
348 p->mnt_namespace = NULL;
349 } 480 }
350 481
351 while (!list_empty(&kill)) { 482 if (propagate)
352 mnt = list_entry(kill.next, struct vfsmount, mnt_list); 483 propagate_umount(kill);
353 list_del_init(&mnt->mnt_list); 484
354 list_del_init(&mnt->mnt_expire); 485 list_for_each_entry(p, kill, mnt_hash) {
355 if (mnt->mnt_parent == mnt) { 486 list_del_init(&p->mnt_expire);
356 spin_unlock(&vfsmount_lock); 487 list_del_init(&p->mnt_list);
357 } else { 488 __touch_namespace(p->mnt_namespace);
358 struct nameidata old_nd; 489 p->mnt_namespace = NULL;
359 detach_mnt(mnt, &old_nd); 490 list_del_init(&p->mnt_child);
360 spin_unlock(&vfsmount_lock); 491 if (p->mnt_parent != p)
361 path_release(&old_nd); 492 mnt->mnt_mountpoint->d_mounted--;
362 } 493 change_mnt_propagation(p, MS_PRIVATE);
363 mntput(mnt);
364 spin_lock(&vfsmount_lock);
365 } 494 }
366} 495}
367 496
368static int do_umount(struct vfsmount *mnt, int flags) 497static int do_umount(struct vfsmount *mnt, int flags)
369{ 498{
370 struct super_block * sb = mnt->mnt_sb; 499 struct super_block *sb = mnt->mnt_sb;
371 int retval; 500 int retval;
501 LIST_HEAD(umount_list);
372 502
373 retval = security_sb_umount(mnt, flags); 503 retval = security_sb_umount(mnt, flags);
374 if (retval) 504 if (retval)
@@ -403,7 +533,7 @@ static int do_umount(struct vfsmount *mnt, int flags)
403 */ 533 */
404 534
405 lock_kernel(); 535 lock_kernel();
406 if( (flags&MNT_FORCE) && sb->s_op->umount_begin) 536 if ((flags & MNT_FORCE) && sb->s_op->umount_begin)
407 sb->s_op->umount_begin(sb); 537 sb->s_op->umount_begin(sb);
408 unlock_kernel(); 538 unlock_kernel();
409 539
@@ -432,29 +562,21 @@ static int do_umount(struct vfsmount *mnt, int flags)
432 return retval; 562 return retval;
433 } 563 }
434 564
435 down_write(&current->namespace->sem); 565 down_write(&namespace_sem);
436 spin_lock(&vfsmount_lock); 566 spin_lock(&vfsmount_lock);
567 event++;
437 568
438 if (atomic_read(&sb->s_active) == 1) {
439 /* last instance - try to be smart */
440 spin_unlock(&vfsmount_lock);
441 lock_kernel();
442 DQUOT_OFF(sb);
443 acct_auto_close(sb);
444 unlock_kernel();
445 security_sb_umount_close(mnt);
446 spin_lock(&vfsmount_lock);
447 }
448 retval = -EBUSY; 569 retval = -EBUSY;
449 if (atomic_read(&mnt->mnt_count) == 2 || flags & MNT_DETACH) { 570 if (flags & MNT_DETACH || !propagate_mount_busy(mnt, 2)) {
450 if (!list_empty(&mnt->mnt_list)) 571 if (!list_empty(&mnt->mnt_list))
451 umount_tree(mnt); 572 umount_tree(mnt, 1, &umount_list);
452 retval = 0; 573 retval = 0;
453 } 574 }
454 spin_unlock(&vfsmount_lock); 575 spin_unlock(&vfsmount_lock);
455 if (retval) 576 if (retval)
456 security_sb_umount_busy(mnt); 577 security_sb_umount_busy(mnt);
457 up_write(&current->namespace->sem); 578 up_write(&namespace_sem);
579 release_mounts(&umount_list);
458 return retval; 580 return retval;
459} 581}
460 582
@@ -494,12 +616,11 @@ out:
494#ifdef __ARCH_WANT_SYS_OLDUMOUNT 616#ifdef __ARCH_WANT_SYS_OLDUMOUNT
495 617
496/* 618/*
497 * The 2.0 compatible umount. No flags. 619 * The 2.0 compatible umount. No flags.
498 */ 620 */
499
500asmlinkage long sys_oldumount(char __user * name) 621asmlinkage long sys_oldumount(char __user * name)
501{ 622{
502 return sys_umount(name,0); 623 return sys_umount(name, 0);
503} 624}
504 625
505#endif 626#endif
@@ -516,14 +637,13 @@ static int mount_is_safe(struct nameidata *nd)
516 if (current->uid != nd->dentry->d_inode->i_uid) 637 if (current->uid != nd->dentry->d_inode->i_uid)
517 return -EPERM; 638 return -EPERM;
518 } 639 }
519 if (permission(nd->dentry->d_inode, MAY_WRITE, nd)) 640 if (vfs_permission(nd, MAY_WRITE))
520 return -EPERM; 641 return -EPERM;
521 return 0; 642 return 0;
522#endif 643#endif
523} 644}
524 645
525static int 646static int lives_below_in_same_fs(struct dentry *d, struct dentry *dentry)
526lives_below_in_same_fs(struct dentry *d, struct dentry *dentry)
527{ 647{
528 while (1) { 648 while (1) {
529 if (d == dentry) 649 if (d == dentry)
@@ -534,12 +654,16 @@ lives_below_in_same_fs(struct dentry *d, struct dentry *dentry)
534 } 654 }
535} 655}
536 656
537static struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry) 657struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry,
658 int flag)
538{ 659{
539 struct vfsmount *res, *p, *q, *r, *s; 660 struct vfsmount *res, *p, *q, *r, *s;
540 struct nameidata nd; 661 struct nameidata nd;
541 662
542 res = q = clone_mnt(mnt, dentry); 663 if (!(flag & CL_COPY_ALL) && IS_MNT_UNBINDABLE(mnt))
664 return NULL;
665
666 res = q = clone_mnt(mnt, dentry, flag);
543 if (!q) 667 if (!q)
544 goto Enomem; 668 goto Enomem;
545 q->mnt_mountpoint = mnt->mnt_mountpoint; 669 q->mnt_mountpoint = mnt->mnt_mountpoint;
@@ -550,6 +674,10 @@ static struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry)
550 continue; 674 continue;
551 675
552 for (s = r; s; s = next_mnt(s, r)) { 676 for (s = r; s; s = next_mnt(s, r)) {
677 if (!(flag & CL_COPY_ALL) && IS_MNT_UNBINDABLE(s)) {
678 s = skip_mnt_tree(s);
679 continue;
680 }
553 while (p != s->mnt_parent) { 681 while (p != s->mnt_parent) {
554 p = p->mnt_parent; 682 p = p->mnt_parent;
555 q = q->mnt_parent; 683 q = q->mnt_parent;
@@ -557,7 +685,7 @@ static struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry)
557 p = s; 685 p = s;
558 nd.mnt = q; 686 nd.mnt = q;
559 nd.dentry = p->mnt_mountpoint; 687 nd.dentry = p->mnt_mountpoint;
560 q = clone_mnt(p, p->mnt_root); 688 q = clone_mnt(p, p->mnt_root, flag);
561 if (!q) 689 if (!q)
562 goto Enomem; 690 goto Enomem;
563 spin_lock(&vfsmount_lock); 691 spin_lock(&vfsmount_lock);
@@ -567,15 +695,114 @@ static struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry)
567 } 695 }
568 } 696 }
569 return res; 697 return res;
570 Enomem: 698Enomem:
571 if (res) { 699 if (res) {
700 LIST_HEAD(umount_list);
572 spin_lock(&vfsmount_lock); 701 spin_lock(&vfsmount_lock);
573 umount_tree(res); 702 umount_tree(res, 0, &umount_list);
574 spin_unlock(&vfsmount_lock); 703 spin_unlock(&vfsmount_lock);
704 release_mounts(&umount_list);
575 } 705 }
576 return NULL; 706 return NULL;
577} 707}
578 708
709/*
710 * @source_mnt : mount tree to be attached
711 * @nd : place the mount tree @source_mnt is attached
712 * @parent_nd : if non-null, detach the source_mnt from its parent and
713 * store the parent mount and mountpoint dentry.
714 * (done when source_mnt is moved)
715 *
716 * NOTE: in the table below explains the semantics when a source mount
717 * of a given type is attached to a destination mount of a given type.
718 * ---------------------------------------------------------------------------
719 * | BIND MOUNT OPERATION |
720 * |**************************************************************************
721 * | source-->| shared | private | slave | unbindable |
722 * | dest | | | | |
723 * | | | | | | |
724 * | v | | | | |
725 * |**************************************************************************
726 * | shared | shared (++) | shared (+) | shared(+++)| invalid |
727 * | | | | | |
728 * |non-shared| shared (+) | private | slave (*) | invalid |
729 * ***************************************************************************
730 * A bind operation clones the source mount and mounts the clone on the
731 * destination mount.
732 *
733 * (++) the cloned mount is propagated to all the mounts in the propagation
734 * tree of the destination mount and the cloned mount is added to
735 * the peer group of the source mount.
736 * (+) the cloned mount is created under the destination mount and is marked
737 * as shared. The cloned mount is added to the peer group of the source
738 * mount.
739 * (+++) the mount is propagated to all the mounts in the propagation tree
740 * of the destination mount and the cloned mount is made slave
741 * of the same master as that of the source mount. The cloned mount
742 * is marked as 'shared and slave'.
743 * (*) the cloned mount is made a slave of the same master as that of the
744 * source mount.
745 *
746 * ---------------------------------------------------------------------------
747 * | MOVE MOUNT OPERATION |
748 * |**************************************************************************
749 * | source-->| shared | private | slave | unbindable |
750 * | dest | | | | |
751 * | | | | | | |
752 * | v | | | | |
753 * |**************************************************************************
754 * | shared | shared (+) | shared (+) | shared(+++) | invalid |
755 * | | | | | |
756 * |non-shared| shared (+*) | private | slave (*) | unbindable |
757 * ***************************************************************************
758 *
759 * (+) the mount is moved to the destination. And is then propagated to
760 * all the mounts in the propagation tree of the destination mount.
761 * (+*) the mount is moved to the destination.
762 * (+++) the mount is moved to the destination and is then propagated to
763 * all the mounts belonging to the destination mount's propagation tree.
764 * the mount is marked as 'shared and slave'.
765 * (*) the mount continues to be a slave at the new location.
766 *
767 * if the source mount is a tree, the operations explained above is
768 * applied to each mount in the tree.
769 * Must be called without spinlocks held, since this function can sleep
770 * in allocations.
771 */
772static int attach_recursive_mnt(struct vfsmount *source_mnt,
773 struct nameidata *nd, struct nameidata *parent_nd)
774{
775 LIST_HEAD(tree_list);
776 struct vfsmount *dest_mnt = nd->mnt;
777 struct dentry *dest_dentry = nd->dentry;
778 struct vfsmount *child, *p;
779
780 if (propagate_mnt(dest_mnt, dest_dentry, source_mnt, &tree_list))
781 return -EINVAL;
782
783 if (IS_MNT_SHARED(dest_mnt)) {
784 for (p = source_mnt; p; p = next_mnt(p, source_mnt))
785 set_mnt_shared(p);
786 }
787
788 spin_lock(&vfsmount_lock);
789 if (parent_nd) {
790 detach_mnt(source_mnt, parent_nd);
791 attach_mnt(source_mnt, nd);
792 touch_namespace(current->namespace);
793 } else {
794 mnt_set_mountpoint(dest_mnt, dest_dentry, source_mnt);
795 commit_tree(source_mnt);
796 }
797
798 list_for_each_entry_safe(child, p, &tree_list, mnt_hash) {
799 list_del_init(&child->mnt_hash);
800 commit_tree(child);
801 }
802 spin_unlock(&vfsmount_lock);
803 return 0;
804}
805
579static int graft_tree(struct vfsmount *mnt, struct nameidata *nd) 806static int graft_tree(struct vfsmount *mnt, struct nameidata *nd)
580{ 807{
581 int err; 808 int err;
@@ -596,17 +823,8 @@ static int graft_tree(struct vfsmount *mnt, struct nameidata *nd)
596 goto out_unlock; 823 goto out_unlock;
597 824
598 err = -ENOENT; 825 err = -ENOENT;
599 spin_lock(&vfsmount_lock); 826 if (IS_ROOT(nd->dentry) || !d_unhashed(nd->dentry))
600 if (IS_ROOT(nd->dentry) || !d_unhashed(nd->dentry)) { 827 err = attach_recursive_mnt(mnt, nd, NULL);
601 struct list_head head;
602
603 attach_mnt(mnt, nd);
604 list_add_tail(&head, &mnt->mnt_list);
605 list_splice(&head, current->namespace->list.prev);
606 mntget(mnt);
607 err = 0;
608 }
609 spin_unlock(&vfsmount_lock);
610out_unlock: 828out_unlock:
611 up(&nd->dentry->d_inode->i_sem); 829 up(&nd->dentry->d_inode->i_sem);
612 if (!err) 830 if (!err)
@@ -615,6 +833,27 @@ out_unlock:
615} 833}
616 834
617/* 835/*
836 * recursively change the type of the mountpoint.
837 */
838static int do_change_type(struct nameidata *nd, int flag)
839{
840 struct vfsmount *m, *mnt = nd->mnt;
841 int recurse = flag & MS_REC;
842 int type = flag & ~MS_REC;
843
844 if (nd->dentry != nd->mnt->mnt_root)
845 return -EINVAL;
846
847 down_write(&namespace_sem);
848 spin_lock(&vfsmount_lock);
849 for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL))
850 change_mnt_propagation(m, type);
851 spin_unlock(&vfsmount_lock);
852 up_write(&namespace_sem);
853 return 0;
854}
855
856/*
618 * do loopback mount. 857 * do loopback mount.
619 */ 858 */
620static int do_loopback(struct nameidata *nd, char *old_name, int recurse) 859static int do_loopback(struct nameidata *nd, char *old_name, int recurse)
@@ -630,32 +869,34 @@ static int do_loopback(struct nameidata *nd, char *old_name, int recurse)
630 if (err) 869 if (err)
631 return err; 870 return err;
632 871
633 down_write(&current->namespace->sem); 872 down_write(&namespace_sem);
634 err = -EINVAL; 873 err = -EINVAL;
635 if (check_mnt(nd->mnt) && (!recurse || check_mnt(old_nd.mnt))) { 874 if (IS_MNT_UNBINDABLE(old_nd.mnt))
636 err = -ENOMEM; 875 goto out;
637 if (recurse)
638 mnt = copy_tree(old_nd.mnt, old_nd.dentry);
639 else
640 mnt = clone_mnt(old_nd.mnt, old_nd.dentry);
641 }
642 876
643 if (mnt) { 877 if (!check_mnt(nd->mnt) || !check_mnt(old_nd.mnt))
644 /* stop bind mounts from expiring */ 878 goto out;
879
880 err = -ENOMEM;
881 if (recurse)
882 mnt = copy_tree(old_nd.mnt, old_nd.dentry, 0);
883 else
884 mnt = clone_mnt(old_nd.mnt, old_nd.dentry, 0);
885
886 if (!mnt)
887 goto out;
888
889 err = graft_tree(mnt, nd);
890 if (err) {
891 LIST_HEAD(umount_list);
645 spin_lock(&vfsmount_lock); 892 spin_lock(&vfsmount_lock);
646 list_del_init(&mnt->mnt_expire); 893 umount_tree(mnt, 0, &umount_list);
647 spin_unlock(&vfsmount_lock); 894 spin_unlock(&vfsmount_lock);
648 895 release_mounts(&umount_list);
649 err = graft_tree(mnt, nd);
650 if (err) {
651 spin_lock(&vfsmount_lock);
652 umount_tree(mnt);
653 spin_unlock(&vfsmount_lock);
654 } else
655 mntput(mnt);
656 } 896 }
657 897
658 up_write(&current->namespace->sem); 898out:
899 up_write(&namespace_sem);
659 path_release(&old_nd); 900 path_release(&old_nd);
660 return err; 901 return err;
661} 902}
@@ -665,12 +906,11 @@ static int do_loopback(struct nameidata *nd, char *old_name, int recurse)
665 * If you've mounted a non-root directory somewhere and want to do remount 906 * If you've mounted a non-root directory somewhere and want to do remount
666 * on it - tough luck. 907 * on it - tough luck.
667 */ 908 */
668
669static int do_remount(struct nameidata *nd, int flags, int mnt_flags, 909static int do_remount(struct nameidata *nd, int flags, int mnt_flags,
670 void *data) 910 void *data)
671{ 911{
672 int err; 912 int err;
673 struct super_block * sb = nd->mnt->mnt_sb; 913 struct super_block *sb = nd->mnt->mnt_sb;
674 914
675 if (!capable(CAP_SYS_ADMIN)) 915 if (!capable(CAP_SYS_ADMIN))
676 return -EPERM; 916 return -EPERM;
@@ -684,13 +924,23 @@ static int do_remount(struct nameidata *nd, int flags, int mnt_flags,
684 down_write(&sb->s_umount); 924 down_write(&sb->s_umount);
685 err = do_remount_sb(sb, flags, data, 0); 925 err = do_remount_sb(sb, flags, data, 0);
686 if (!err) 926 if (!err)
687 nd->mnt->mnt_flags=mnt_flags; 927 nd->mnt->mnt_flags = mnt_flags;
688 up_write(&sb->s_umount); 928 up_write(&sb->s_umount);
689 if (!err) 929 if (!err)
690 security_sb_post_remount(nd->mnt, flags, data); 930 security_sb_post_remount(nd->mnt, flags, data);
691 return err; 931 return err;
692} 932}
693 933
934static inline int tree_contains_unbindable(struct vfsmount *mnt)
935{
936 struct vfsmount *p;
937 for (p = mnt; p; p = next_mnt(p, mnt)) {
938 if (IS_MNT_UNBINDABLE(p))
939 return 1;
940 }
941 return 0;
942}
943
694static int do_move_mount(struct nameidata *nd, char *old_name) 944static int do_move_mount(struct nameidata *nd, char *old_name)
695{ 945{
696 struct nameidata old_nd, parent_nd; 946 struct nameidata old_nd, parent_nd;
@@ -704,8 +954,8 @@ static int do_move_mount(struct nameidata *nd, char *old_name)
704 if (err) 954 if (err)
705 return err; 955 return err;
706 956
707 down_write(&current->namespace->sem); 957 down_write(&namespace_sem);
708 while(d_mountpoint(nd->dentry) && follow_down(&nd->mnt, &nd->dentry)) 958 while (d_mountpoint(nd->dentry) && follow_down(&nd->mnt, &nd->dentry))
709 ; 959 ;
710 err = -EINVAL; 960 err = -EINVAL;
711 if (!check_mnt(nd->mnt) || !check_mnt(old_nd.mnt)) 961 if (!check_mnt(nd->mnt) || !check_mnt(old_nd.mnt))
@@ -716,39 +966,47 @@ static int do_move_mount(struct nameidata *nd, char *old_name)
716 if (IS_DEADDIR(nd->dentry->d_inode)) 966 if (IS_DEADDIR(nd->dentry->d_inode))
717 goto out1; 967 goto out1;
718 968
719 spin_lock(&vfsmount_lock);
720 if (!IS_ROOT(nd->dentry) && d_unhashed(nd->dentry)) 969 if (!IS_ROOT(nd->dentry) && d_unhashed(nd->dentry))
721 goto out2; 970 goto out1;
722 971
723 err = -EINVAL; 972 err = -EINVAL;
724 if (old_nd.dentry != old_nd.mnt->mnt_root) 973 if (old_nd.dentry != old_nd.mnt->mnt_root)
725 goto out2; 974 goto out1;
726 975
727 if (old_nd.mnt == old_nd.mnt->mnt_parent) 976 if (old_nd.mnt == old_nd.mnt->mnt_parent)
728 goto out2; 977 goto out1;
729 978
730 if (S_ISDIR(nd->dentry->d_inode->i_mode) != 979 if (S_ISDIR(nd->dentry->d_inode->i_mode) !=
731 S_ISDIR(old_nd.dentry->d_inode->i_mode)) 980 S_ISDIR(old_nd.dentry->d_inode->i_mode))
732 goto out2; 981 goto out1;
733 982 /*
983 * Don't move a mount residing in a shared parent.
984 */
985 if (old_nd.mnt->mnt_parent && IS_MNT_SHARED(old_nd.mnt->mnt_parent))
986 goto out1;
987 /*
988 * Don't move a mount tree containing unbindable mounts to a destination
989 * mount which is shared.
990 */
991 if (IS_MNT_SHARED(nd->mnt) && tree_contains_unbindable(old_nd.mnt))
992 goto out1;
734 err = -ELOOP; 993 err = -ELOOP;
735 for (p = nd->mnt; p->mnt_parent!=p; p = p->mnt_parent) 994 for (p = nd->mnt; p->mnt_parent != p; p = p->mnt_parent)
736 if (p == old_nd.mnt) 995 if (p == old_nd.mnt)
737 goto out2; 996 goto out1;
738 err = 0;
739 997
740 detach_mnt(old_nd.mnt, &parent_nd); 998 if ((err = attach_recursive_mnt(old_nd.mnt, nd, &parent_nd)))
741 attach_mnt(old_nd.mnt, nd); 999 goto out1;
742 1000
1001 spin_lock(&vfsmount_lock);
743 /* if the mount is moved, it should no longer be expire 1002 /* if the mount is moved, it should no longer be expire
744 * automatically */ 1003 * automatically */
745 list_del_init(&old_nd.mnt->mnt_expire); 1004 list_del_init(&old_nd.mnt->mnt_expire);
746out2:
747 spin_unlock(&vfsmount_lock); 1005 spin_unlock(&vfsmount_lock);
748out1: 1006out1:
749 up(&nd->dentry->d_inode->i_sem); 1007 up(&nd->dentry->d_inode->i_sem);
750out: 1008out:
751 up_write(&current->namespace->sem); 1009 up_write(&namespace_sem);
752 if (!err) 1010 if (!err)
753 path_release(&parent_nd); 1011 path_release(&parent_nd);
754 path_release(&old_nd); 1012 path_release(&old_nd);
@@ -787,9 +1045,9 @@ int do_add_mount(struct vfsmount *newmnt, struct nameidata *nd,
787{ 1045{
788 int err; 1046 int err;
789 1047
790 down_write(&current->namespace->sem); 1048 down_write(&namespace_sem);
791 /* Something was mounted here while we slept */ 1049 /* Something was mounted here while we slept */
792 while(d_mountpoint(nd->dentry) && follow_down(&nd->mnt, &nd->dentry)) 1050 while (d_mountpoint(nd->dentry) && follow_down(&nd->mnt, &nd->dentry))
793 ; 1051 ;
794 err = -EINVAL; 1052 err = -EINVAL;
795 if (!check_mnt(nd->mnt)) 1053 if (!check_mnt(nd->mnt))
@@ -806,25 +1064,28 @@ int do_add_mount(struct vfsmount *newmnt, struct nameidata *nd,
806 goto unlock; 1064 goto unlock;
807 1065
808 newmnt->mnt_flags = mnt_flags; 1066 newmnt->mnt_flags = mnt_flags;
809 newmnt->mnt_namespace = current->namespace; 1067 if ((err = graft_tree(newmnt, nd)))
810 err = graft_tree(newmnt, nd); 1068 goto unlock;
811 1069
812 if (err == 0 && fslist) { 1070 if (fslist) {
813 /* add to the specified expiration list */ 1071 /* add to the specified expiration list */
814 spin_lock(&vfsmount_lock); 1072 spin_lock(&vfsmount_lock);
815 list_add_tail(&newmnt->mnt_expire, fslist); 1073 list_add_tail(&newmnt->mnt_expire, fslist);
816 spin_unlock(&vfsmount_lock); 1074 spin_unlock(&vfsmount_lock);
817 } 1075 }
1076 up_write(&namespace_sem);
1077 return 0;
818 1078
819unlock: 1079unlock:
820 up_write(&current->namespace->sem); 1080 up_write(&namespace_sem);
821 mntput(newmnt); 1081 mntput(newmnt);
822 return err; 1082 return err;
823} 1083}
824 1084
825EXPORT_SYMBOL_GPL(do_add_mount); 1085EXPORT_SYMBOL_GPL(do_add_mount);
826 1086
827static void expire_mount(struct vfsmount *mnt, struct list_head *mounts) 1087static void expire_mount(struct vfsmount *mnt, struct list_head *mounts,
1088 struct list_head *umounts)
828{ 1089{
829 spin_lock(&vfsmount_lock); 1090 spin_lock(&vfsmount_lock);
830 1091
@@ -841,27 +1102,13 @@ static void expire_mount(struct vfsmount *mnt, struct list_head *mounts)
841 * Check that it is still dead: the count should now be 2 - as 1102 * Check that it is still dead: the count should now be 2 - as
842 * contributed by the vfsmount parent and the mntget above 1103 * contributed by the vfsmount parent and the mntget above
843 */ 1104 */
844 if (atomic_read(&mnt->mnt_count) == 2) { 1105 if (!propagate_mount_busy(mnt, 2)) {
845 struct nameidata old_nd;
846
847 /* delete from the namespace */ 1106 /* delete from the namespace */
1107 touch_namespace(mnt->mnt_namespace);
848 list_del_init(&mnt->mnt_list); 1108 list_del_init(&mnt->mnt_list);
849 mnt->mnt_namespace = NULL; 1109 mnt->mnt_namespace = NULL;
850 detach_mnt(mnt, &old_nd); 1110 umount_tree(mnt, 1, umounts);
851 spin_unlock(&vfsmount_lock); 1111 spin_unlock(&vfsmount_lock);
852 path_release(&old_nd);
853
854 /*
855 * Now lay it to rest if this was the last ref on the superblock
856 */
857 if (atomic_read(&mnt->mnt_sb->s_active) == 1) {
858 /* last instance - try to be smart */
859 lock_kernel();
860 DQUOT_OFF(mnt->mnt_sb);
861 acct_auto_close(mnt->mnt_sb);
862 unlock_kernel();
863 }
864 mntput(mnt);
865 } else { 1112 } else {
866 /* 1113 /*
867 * Someone brought it back to life whilst we didn't have any 1114 * Someone brought it back to life whilst we didn't have any
@@ -910,6 +1157,7 @@ void mark_mounts_for_expiry(struct list_head *mounts)
910 * - dispose of the corpse 1157 * - dispose of the corpse
911 */ 1158 */
912 while (!list_empty(&graveyard)) { 1159 while (!list_empty(&graveyard)) {
1160 LIST_HEAD(umounts);
913 mnt = list_entry(graveyard.next, struct vfsmount, mnt_expire); 1161 mnt = list_entry(graveyard.next, struct vfsmount, mnt_expire);
914 list_del_init(&mnt->mnt_expire); 1162 list_del_init(&mnt->mnt_expire);
915 1163
@@ -921,13 +1169,12 @@ void mark_mounts_for_expiry(struct list_head *mounts)
921 get_namespace(namespace); 1169 get_namespace(namespace);
922 1170
923 spin_unlock(&vfsmount_lock); 1171 spin_unlock(&vfsmount_lock);
924 down_write(&namespace->sem); 1172 down_write(&namespace_sem);
925 expire_mount(mnt, mounts); 1173 expire_mount(mnt, mounts, &umounts);
926 up_write(&namespace->sem); 1174 up_write(&namespace_sem);
927 1175 release_mounts(&umounts);
928 mntput(mnt); 1176 mntput(mnt);
929 put_namespace(namespace); 1177 put_namespace(namespace);
930
931 spin_lock(&vfsmount_lock); 1178 spin_lock(&vfsmount_lock);
932 } 1179 }
933 1180
@@ -942,8 +1189,8 @@ EXPORT_SYMBOL_GPL(mark_mounts_for_expiry);
942 * Note that this function differs from copy_from_user() in that it will oops 1189 * Note that this function differs from copy_from_user() in that it will oops
943 * on bad values of `to', rather than returning a short copy. 1190 * on bad values of `to', rather than returning a short copy.
944 */ 1191 */
945static long 1192static long exact_copy_from_user(void *to, const void __user * from,
946exact_copy_from_user(void *to, const void __user *from, unsigned long n) 1193 unsigned long n)
947{ 1194{
948 char *t = to; 1195 char *t = to;
949 const char __user *f = from; 1196 const char __user *f = from;
@@ -964,12 +1211,12 @@ exact_copy_from_user(void *to, const void __user *from, unsigned long n)
964 return n; 1211 return n;
965} 1212}
966 1213
967int copy_mount_options(const void __user *data, unsigned long *where) 1214int copy_mount_options(const void __user * data, unsigned long *where)
968{ 1215{
969 int i; 1216 int i;
970 unsigned long page; 1217 unsigned long page;
971 unsigned long size; 1218 unsigned long size;
972 1219
973 *where = 0; 1220 *where = 0;
974 if (!data) 1221 if (!data)
975 return 0; 1222 return 0;
@@ -988,7 +1235,7 @@ int copy_mount_options(const void __user *data, unsigned long *where)
988 1235
989 i = size - exact_copy_from_user((void *)page, data, size); 1236 i = size - exact_copy_from_user((void *)page, data, size);
990 if (!i) { 1237 if (!i) {
991 free_page(page); 1238 free_page(page);
992 return -EFAULT; 1239 return -EFAULT;
993 } 1240 }
994 if (i != PAGE_SIZE) 1241 if (i != PAGE_SIZE)
@@ -1011,7 +1258,7 @@ int copy_mount_options(const void __user *data, unsigned long *where)
1011 * Therefore, if this magic number is present, it carries no information 1258 * Therefore, if this magic number is present, it carries no information
1012 * and must be discarded. 1259 * and must be discarded.
1013 */ 1260 */
1014long do_mount(char * dev_name, char * dir_name, char *type_page, 1261long do_mount(char *dev_name, char *dir_name, char *type_page,
1015 unsigned long flags, void *data_page) 1262 unsigned long flags, void *data_page)
1016{ 1263{
1017 struct nameidata nd; 1264 struct nameidata nd;
@@ -1039,7 +1286,7 @@ long do_mount(char * dev_name, char * dir_name, char *type_page,
1039 mnt_flags |= MNT_NODEV; 1286 mnt_flags |= MNT_NODEV;
1040 if (flags & MS_NOEXEC) 1287 if (flags & MS_NOEXEC)
1041 mnt_flags |= MNT_NOEXEC; 1288 mnt_flags |= MNT_NOEXEC;
1042 flags &= ~(MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_ACTIVE); 1289 flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE);
1043 1290
1044 /* ... and get the mountpoint */ 1291 /* ... and get the mountpoint */
1045 retval = path_lookup(dir_name, LOOKUP_FOLLOW, &nd); 1292 retval = path_lookup(dir_name, LOOKUP_FOLLOW, &nd);
@@ -1055,6 +1302,8 @@ long do_mount(char * dev_name, char * dir_name, char *type_page,
1055 data_page); 1302 data_page);
1056 else if (flags & MS_BIND) 1303 else if (flags & MS_BIND)
1057 retval = do_loopback(&nd, dev_name, flags & MS_REC); 1304 retval = do_loopback(&nd, dev_name, flags & MS_REC);
1305 else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
1306 retval = do_change_type(&nd, flags);
1058 else if (flags & MS_MOVE) 1307 else if (flags & MS_MOVE)
1059 retval = do_move_mount(&nd, dev_name); 1308 retval = do_move_mount(&nd, dev_name);
1060 else 1309 else
@@ -1091,14 +1340,16 @@ int copy_namespace(int flags, struct task_struct *tsk)
1091 goto out; 1340 goto out;
1092 1341
1093 atomic_set(&new_ns->count, 1); 1342 atomic_set(&new_ns->count, 1);
1094 init_rwsem(&new_ns->sem);
1095 INIT_LIST_HEAD(&new_ns->list); 1343 INIT_LIST_HEAD(&new_ns->list);
1344 init_waitqueue_head(&new_ns->poll);
1345 new_ns->event = 0;
1096 1346
1097 down_write(&tsk->namespace->sem); 1347 down_write(&namespace_sem);
1098 /* First pass: copy the tree topology */ 1348 /* First pass: copy the tree topology */
1099 new_ns->root = copy_tree(namespace->root, namespace->root->mnt_root); 1349 new_ns->root = copy_tree(namespace->root, namespace->root->mnt_root,
1350 CL_COPY_ALL | CL_EXPIRE);
1100 if (!new_ns->root) { 1351 if (!new_ns->root) {
1101 up_write(&tsk->namespace->sem); 1352 up_write(&namespace_sem);
1102 kfree(new_ns); 1353 kfree(new_ns);
1103 goto out; 1354 goto out;
1104 } 1355 }
@@ -1132,7 +1383,7 @@ int copy_namespace(int flags, struct task_struct *tsk)
1132 p = next_mnt(p, namespace->root); 1383 p = next_mnt(p, namespace->root);
1133 q = next_mnt(q, new_ns->root); 1384 q = next_mnt(q, new_ns->root);
1134 } 1385 }
1135 up_write(&tsk->namespace->sem); 1386 up_write(&namespace_sem);
1136 1387
1137 tsk->namespace = new_ns; 1388 tsk->namespace = new_ns;
1138 1389
@@ -1161,7 +1412,7 @@ asmlinkage long sys_mount(char __user * dev_name, char __user * dir_name,
1161 unsigned long dev_page; 1412 unsigned long dev_page;
1162 char *dir_page; 1413 char *dir_page;
1163 1414
1164 retval = copy_mount_options (type, &type_page); 1415 retval = copy_mount_options(type, &type_page);
1165 if (retval < 0) 1416 if (retval < 0)
1166 return retval; 1417 return retval;
1167 1418
@@ -1170,17 +1421,17 @@ asmlinkage long sys_mount(char __user * dev_name, char __user * dir_name,
1170 if (IS_ERR(dir_page)) 1421 if (IS_ERR(dir_page))
1171 goto out1; 1422 goto out1;
1172 1423
1173 retval = copy_mount_options (dev_name, &dev_page); 1424 retval = copy_mount_options(dev_name, &dev_page);
1174 if (retval < 0) 1425 if (retval < 0)
1175 goto out2; 1426 goto out2;
1176 1427
1177 retval = copy_mount_options (data, &data_page); 1428 retval = copy_mount_options(data, &data_page);
1178 if (retval < 0) 1429 if (retval < 0)
1179 goto out3; 1430 goto out3;
1180 1431
1181 lock_kernel(); 1432 lock_kernel();
1182 retval = do_mount((char*)dev_page, dir_page, (char*)type_page, 1433 retval = do_mount((char *)dev_page, dir_page, (char *)type_page,
1183 flags, (void*)data_page); 1434 flags, (void *)data_page);
1184 unlock_kernel(); 1435 unlock_kernel();
1185 free_page(data_page); 1436 free_page(data_page);
1186 1437
@@ -1249,9 +1500,11 @@ static void chroot_fs_refs(struct nameidata *old_nd, struct nameidata *new_nd)
1249 if (fs) { 1500 if (fs) {
1250 atomic_inc(&fs->count); 1501 atomic_inc(&fs->count);
1251 task_unlock(p); 1502 task_unlock(p);
1252 if (fs->root==old_nd->dentry&&fs->rootmnt==old_nd->mnt) 1503 if (fs->root == old_nd->dentry
1504 && fs->rootmnt == old_nd->mnt)
1253 set_fs_root(fs, new_nd->mnt, new_nd->dentry); 1505 set_fs_root(fs, new_nd->mnt, new_nd->dentry);
1254 if (fs->pwd==old_nd->dentry&&fs->pwdmnt==old_nd->mnt) 1506 if (fs->pwd == old_nd->dentry
1507 && fs->pwdmnt == old_nd->mnt)
1255 set_fs_pwd(fs, new_nd->mnt, new_nd->dentry); 1508 set_fs_pwd(fs, new_nd->mnt, new_nd->dentry);
1256 put_fs_struct(fs); 1509 put_fs_struct(fs);
1257 } else 1510 } else
@@ -1281,8 +1534,8 @@ static void chroot_fs_refs(struct nameidata *old_nd, struct nameidata *new_nd)
1281 * though, so you may need to say mount --bind /nfs/my_root /nfs/my_root 1534 * though, so you may need to say mount --bind /nfs/my_root /nfs/my_root
1282 * first. 1535 * first.
1283 */ 1536 */
1284 1537asmlinkage long sys_pivot_root(const char __user * new_root,
1285asmlinkage long sys_pivot_root(const char __user *new_root, const char __user *put_old) 1538 const char __user * put_old)
1286{ 1539{
1287 struct vfsmount *tmp; 1540 struct vfsmount *tmp;
1288 struct nameidata new_nd, old_nd, parent_nd, root_parent, user_nd; 1541 struct nameidata new_nd, old_nd, parent_nd, root_parent, user_nd;
@@ -1293,14 +1546,15 @@ asmlinkage long sys_pivot_root(const char __user *new_root, const char __user *p
1293 1546
1294 lock_kernel(); 1547 lock_kernel();
1295 1548
1296 error = __user_walk(new_root, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &new_nd); 1549 error = __user_walk(new_root, LOOKUP_FOLLOW | LOOKUP_DIRECTORY,
1550 &new_nd);
1297 if (error) 1551 if (error)
1298 goto out0; 1552 goto out0;
1299 error = -EINVAL; 1553 error = -EINVAL;
1300 if (!check_mnt(new_nd.mnt)) 1554 if (!check_mnt(new_nd.mnt))
1301 goto out1; 1555 goto out1;
1302 1556
1303 error = __user_walk(put_old, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &old_nd); 1557 error = __user_walk(put_old, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &old_nd);
1304 if (error) 1558 if (error)
1305 goto out1; 1559 goto out1;
1306 1560
@@ -1314,9 +1568,13 @@ asmlinkage long sys_pivot_root(const char __user *new_root, const char __user *p
1314 user_nd.mnt = mntget(current->fs->rootmnt); 1568 user_nd.mnt = mntget(current->fs->rootmnt);
1315 user_nd.dentry = dget(current->fs->root); 1569 user_nd.dentry = dget(current->fs->root);
1316 read_unlock(&current->fs->lock); 1570 read_unlock(&current->fs->lock);
1317 down_write(&current->namespace->sem); 1571 down_write(&namespace_sem);
1318 down(&old_nd.dentry->d_inode->i_sem); 1572 down(&old_nd.dentry->d_inode->i_sem);
1319 error = -EINVAL; 1573 error = -EINVAL;
1574 if (IS_MNT_SHARED(old_nd.mnt) ||
1575 IS_MNT_SHARED(new_nd.mnt->mnt_parent) ||
1576 IS_MNT_SHARED(user_nd.mnt->mnt_parent))
1577 goto out2;
1320 if (!check_mnt(user_nd.mnt)) 1578 if (!check_mnt(user_nd.mnt))
1321 goto out2; 1579 goto out2;
1322 error = -ENOENT; 1580 error = -ENOENT;
@@ -1356,6 +1614,7 @@ asmlinkage long sys_pivot_root(const char __user *new_root, const char __user *p
1356 detach_mnt(user_nd.mnt, &root_parent); 1614 detach_mnt(user_nd.mnt, &root_parent);
1357 attach_mnt(user_nd.mnt, &old_nd); /* mount old root on put_old */ 1615 attach_mnt(user_nd.mnt, &old_nd); /* mount old root on put_old */
1358 attach_mnt(new_nd.mnt, &root_parent); /* mount new_root on / */ 1616 attach_mnt(new_nd.mnt, &root_parent); /* mount new_root on / */
1617 touch_namespace(current->namespace);
1359 spin_unlock(&vfsmount_lock); 1618 spin_unlock(&vfsmount_lock);
1360 chroot_fs_refs(&user_nd, &new_nd); 1619 chroot_fs_refs(&user_nd, &new_nd);
1361 security_sb_post_pivotroot(&user_nd, &new_nd); 1620 security_sb_post_pivotroot(&user_nd, &new_nd);
@@ -1364,7 +1623,7 @@ asmlinkage long sys_pivot_root(const char __user *new_root, const char __user *p
1364 path_release(&parent_nd); 1623 path_release(&parent_nd);
1365out2: 1624out2:
1366 up(&old_nd.dentry->d_inode->i_sem); 1625 up(&old_nd.dentry->d_inode->i_sem);
1367 up_write(&current->namespace->sem); 1626 up_write(&namespace_sem);
1368 path_release(&user_nd); 1627 path_release(&user_nd);
1369 path_release(&old_nd); 1628 path_release(&old_nd);
1370out1: 1629out1:
@@ -1391,7 +1650,8 @@ static void __init init_mount_tree(void)
1391 panic("Can't allocate initial namespace"); 1650 panic("Can't allocate initial namespace");
1392 atomic_set(&namespace->count, 1); 1651 atomic_set(&namespace->count, 1);
1393 INIT_LIST_HEAD(&namespace->list); 1652 INIT_LIST_HEAD(&namespace->list);
1394 init_rwsem(&namespace->sem); 1653 init_waitqueue_head(&namespace->poll);
1654 namespace->event = 0;
1395 list_add(&mnt->mnt_list, &namespace->list); 1655 list_add(&mnt->mnt_list, &namespace->list);
1396 namespace->root = mnt; 1656 namespace->root = mnt;
1397 mnt->mnt_namespace = namespace; 1657 mnt->mnt_namespace = namespace;
@@ -1414,11 +1674,12 @@ void __init mnt_init(unsigned long mempages)
1414 unsigned int nr_hash; 1674 unsigned int nr_hash;
1415 int i; 1675 int i;
1416 1676
1677 init_rwsem(&namespace_sem);
1678
1417 mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct vfsmount), 1679 mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct vfsmount),
1418 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); 1680 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL, NULL);
1419 1681
1420 mount_hashtable = (struct list_head *) 1682 mount_hashtable = (struct list_head *)__get_free_page(GFP_ATOMIC);
1421 __get_free_page(GFP_ATOMIC);
1422 1683
1423 if (!mount_hashtable) 1684 if (!mount_hashtable)
1424 panic("Failed to allocate mount hash table\n"); 1685 panic("Failed to allocate mount hash table\n");
@@ -1440,7 +1701,7 @@ void __init mnt_init(unsigned long mempages)
1440 * from the number of bits we can fit. 1701 * from the number of bits we can fit.
1441 */ 1702 */
1442 nr_hash = 1UL << hash_bits; 1703 nr_hash = 1UL << hash_bits;
1443 hash_mask = nr_hash-1; 1704 hash_mask = nr_hash - 1;
1444 1705
1445 printk("Mount-cache hash table entries: %d\n", nr_hash); 1706 printk("Mount-cache hash table entries: %d\n", nr_hash);
1446 1707
@@ -1460,12 +1721,14 @@ void __init mnt_init(unsigned long mempages)
1460void __put_namespace(struct namespace *namespace) 1721void __put_namespace(struct namespace *namespace)
1461{ 1722{
1462 struct vfsmount *root = namespace->root; 1723 struct vfsmount *root = namespace->root;
1724 LIST_HEAD(umount_list);
1463 namespace->root = NULL; 1725 namespace->root = NULL;
1464 spin_unlock(&vfsmount_lock); 1726 spin_unlock(&vfsmount_lock);
1465 down_write(&namespace->sem); 1727 down_write(&namespace_sem);
1466 spin_lock(&vfsmount_lock); 1728 spin_lock(&vfsmount_lock);
1467 umount_tree(root); 1729 umount_tree(root, 0, &umount_list);
1468 spin_unlock(&vfsmount_lock); 1730 spin_unlock(&vfsmount_lock);
1469 up_write(&namespace->sem); 1731 up_write(&namespace_sem);
1732 release_mounts(&umount_list);
1470 kfree(namespace); 1733 kfree(namespace);
1471} 1734}
diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c
index 88df79356a1..fd3efdca5ae 100644
--- a/fs/ncpfs/ioctl.c
+++ b/fs/ncpfs/ioctl.c
@@ -30,11 +30,13 @@
30#define NCP_PACKET_SIZE_INTERNAL 65536 30#define NCP_PACKET_SIZE_INTERNAL 65536
31 31
32static int 32static int
33ncp_get_fs_info(struct ncp_server* server, struct inode* inode, struct ncp_fs_info __user *arg) 33ncp_get_fs_info(struct ncp_server * server, struct file *file,
34 struct ncp_fs_info __user *arg)
34{ 35{
36 struct inode *inode = file->f_dentry->d_inode;
35 struct ncp_fs_info info; 37 struct ncp_fs_info info;
36 38
37 if ((permission(inode, MAY_WRITE, NULL) != 0) 39 if ((file_permission(file, MAY_WRITE) != 0)
38 && (current->uid != server->m.mounted_uid)) { 40 && (current->uid != server->m.mounted_uid)) {
39 return -EACCES; 41 return -EACCES;
40 } 42 }
@@ -58,11 +60,13 @@ ncp_get_fs_info(struct ncp_server* server, struct inode* inode, struct ncp_fs_in
58} 60}
59 61
60static int 62static int
61ncp_get_fs_info_v2(struct ncp_server* server, struct inode* inode, struct ncp_fs_info_v2 __user * arg) 63ncp_get_fs_info_v2(struct ncp_server * server, struct file *file,
64 struct ncp_fs_info_v2 __user * arg)
62{ 65{
66 struct inode *inode = file->f_dentry->d_inode;
63 struct ncp_fs_info_v2 info2; 67 struct ncp_fs_info_v2 info2;
64 68
65 if ((permission(inode, MAY_WRITE, NULL) != 0) 69 if ((file_permission(file, MAY_WRITE) != 0)
66 && (current->uid != server->m.mounted_uid)) { 70 && (current->uid != server->m.mounted_uid)) {
67 return -EACCES; 71 return -EACCES;
68 } 72 }
@@ -190,7 +194,7 @@ int ncp_ioctl(struct inode *inode, struct file *filp,
190 switch (cmd) { 194 switch (cmd) {
191 case NCP_IOC_NCPREQUEST: 195 case NCP_IOC_NCPREQUEST:
192 196
193 if ((permission(inode, MAY_WRITE, NULL) != 0) 197 if ((file_permission(filp, MAY_WRITE) != 0)
194 && (current->uid != server->m.mounted_uid)) { 198 && (current->uid != server->m.mounted_uid)) {
195 return -EACCES; 199 return -EACCES;
196 } 200 }
@@ -245,16 +249,16 @@ int ncp_ioctl(struct inode *inode, struct file *filp,
245 return ncp_conn_logged_in(inode->i_sb); 249 return ncp_conn_logged_in(inode->i_sb);
246 250
247 case NCP_IOC_GET_FS_INFO: 251 case NCP_IOC_GET_FS_INFO:
248 return ncp_get_fs_info(server, inode, argp); 252 return ncp_get_fs_info(server, filp, argp);
249 253
250 case NCP_IOC_GET_FS_INFO_V2: 254 case NCP_IOC_GET_FS_INFO_V2:
251 return ncp_get_fs_info_v2(server, inode, argp); 255 return ncp_get_fs_info_v2(server, filp, argp);
252 256
253 case NCP_IOC_GETMOUNTUID2: 257 case NCP_IOC_GETMOUNTUID2:
254 { 258 {
255 unsigned long tmp = server->m.mounted_uid; 259 unsigned long tmp = server->m.mounted_uid;
256 260
257 if ( (permission(inode, MAY_READ, NULL) != 0) 261 if ((file_permission(filp, MAY_READ) != 0)
258 && (current->uid != server->m.mounted_uid)) 262 && (current->uid != server->m.mounted_uid))
259 { 263 {
260 return -EACCES; 264 return -EACCES;
@@ -268,7 +272,7 @@ int ncp_ioctl(struct inode *inode, struct file *filp,
268 { 272 {
269 struct ncp_setroot_ioctl sr; 273 struct ncp_setroot_ioctl sr;
270 274
271 if ( (permission(inode, MAY_READ, NULL) != 0) 275 if ((file_permission(filp, MAY_READ) != 0)
272 && (current->uid != server->m.mounted_uid)) 276 && (current->uid != server->m.mounted_uid))
273 { 277 {
274 return -EACCES; 278 return -EACCES;
@@ -343,7 +347,7 @@ int ncp_ioctl(struct inode *inode, struct file *filp,
343 347
344#ifdef CONFIG_NCPFS_PACKET_SIGNING 348#ifdef CONFIG_NCPFS_PACKET_SIGNING
345 case NCP_IOC_SIGN_INIT: 349 case NCP_IOC_SIGN_INIT:
346 if ((permission(inode, MAY_WRITE, NULL) != 0) 350 if ((file_permission(filp, MAY_WRITE) != 0)
347 && (current->uid != server->m.mounted_uid)) 351 && (current->uid != server->m.mounted_uid))
348 { 352 {
349 return -EACCES; 353 return -EACCES;
@@ -366,7 +370,7 @@ int ncp_ioctl(struct inode *inode, struct file *filp,
366 return 0; 370 return 0;
367 371
368 case NCP_IOC_SIGN_WANTED: 372 case NCP_IOC_SIGN_WANTED:
369 if ( (permission(inode, MAY_READ, NULL) != 0) 373 if ((file_permission(filp, MAY_READ) != 0)
370 && (current->uid != server->m.mounted_uid)) 374 && (current->uid != server->m.mounted_uid))
371 { 375 {
372 return -EACCES; 376 return -EACCES;
@@ -379,7 +383,7 @@ int ncp_ioctl(struct inode *inode, struct file *filp,
379 { 383 {
380 int newstate; 384 int newstate;
381 385
382 if ( (permission(inode, MAY_WRITE, NULL) != 0) 386 if ((file_permission(filp, MAY_WRITE) != 0)
383 && (current->uid != server->m.mounted_uid)) 387 && (current->uid != server->m.mounted_uid))
384 { 388 {
385 return -EACCES; 389 return -EACCES;
@@ -400,7 +404,7 @@ int ncp_ioctl(struct inode *inode, struct file *filp,
400 404
401#ifdef CONFIG_NCPFS_IOCTL_LOCKING 405#ifdef CONFIG_NCPFS_IOCTL_LOCKING
402 case NCP_IOC_LOCKUNLOCK: 406 case NCP_IOC_LOCKUNLOCK:
403 if ( (permission(inode, MAY_WRITE, NULL) != 0) 407 if ((file_permission(filp, MAY_WRITE) != 0)
404 && (current->uid != server->m.mounted_uid)) 408 && (current->uid != server->m.mounted_uid))
405 { 409 {
406 return -EACCES; 410 return -EACCES;
@@ -605,7 +609,7 @@ outrel:
605#endif /* CONFIG_NCPFS_NLS */ 609#endif /* CONFIG_NCPFS_NLS */
606 610
607 case NCP_IOC_SETDENTRYTTL: 611 case NCP_IOC_SETDENTRYTTL:
608 if ((permission(inode, MAY_WRITE, NULL) != 0) && 612 if ((file_permission(filp, MAY_WRITE) != 0) &&
609 (current->uid != server->m.mounted_uid)) 613 (current->uid != server->m.mounted_uid))
610 return -EACCES; 614 return -EACCES;
611 { 615 {
@@ -635,7 +639,7 @@ outrel:
635 so we have this out of switch */ 639 so we have this out of switch */
636 if (cmd == NCP_IOC_GETMOUNTUID) { 640 if (cmd == NCP_IOC_GETMOUNTUID) {
637 __kernel_uid_t uid = 0; 641 __kernel_uid_t uid = 0;
638 if ((permission(inode, MAY_READ, NULL) != 0) 642 if ((file_permission(filp, MAY_READ) != 0)
639 && (current->uid != server->m.mounted_uid)) { 643 && (current->uid != server->m.mounted_uid)) {
640 return -EACCES; 644 return -EACCES;
641 } 645 }
diff --git a/fs/open.c b/fs/open.c
index 6e8136751e9..f53a5b9ffb7 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -240,7 +240,7 @@ static inline long do_sys_truncate(const char __user * path, loff_t length)
240 if (!S_ISREG(inode->i_mode)) 240 if (!S_ISREG(inode->i_mode))
241 goto dput_and_out; 241 goto dput_and_out;
242 242
243 error = permission(inode,MAY_WRITE,&nd); 243 error = vfs_permission(&nd, MAY_WRITE);
244 if (error) 244 if (error)
245 goto dput_and_out; 245 goto dput_and_out;
246 246
@@ -394,7 +394,7 @@ asmlinkage long sys_utime(char __user * filename, struct utimbuf __user * times)
394 goto dput_and_out; 394 goto dput_and_out;
395 395
396 if (current->fsuid != inode->i_uid && 396 if (current->fsuid != inode->i_uid &&
397 (error = permission(inode,MAY_WRITE,&nd)) != 0) 397 (error = vfs_permission(&nd, MAY_WRITE)) != 0)
398 goto dput_and_out; 398 goto dput_and_out;
399 } 399 }
400 down(&inode->i_sem); 400 down(&inode->i_sem);
@@ -447,7 +447,7 @@ long do_utimes(char __user * filename, struct timeval * times)
447 goto dput_and_out; 447 goto dput_and_out;
448 448
449 if (current->fsuid != inode->i_uid && 449 if (current->fsuid != inode->i_uid &&
450 (error = permission(inode,MAY_WRITE,&nd)) != 0) 450 (error = vfs_permission(&nd, MAY_WRITE)) != 0)
451 goto dput_and_out; 451 goto dput_and_out;
452 } 452 }
453 down(&inode->i_sem); 453 down(&inode->i_sem);
@@ -506,7 +506,7 @@ asmlinkage long sys_access(const char __user * filename, int mode)
506 506
507 res = __user_walk(filename, LOOKUP_FOLLOW|LOOKUP_ACCESS, &nd); 507 res = __user_walk(filename, LOOKUP_FOLLOW|LOOKUP_ACCESS, &nd);
508 if (!res) { 508 if (!res) {
509 res = permission(nd.dentry->d_inode, mode, &nd); 509 res = vfs_permission(&nd, mode);
510 /* SuS v2 requires we report a read only fs too */ 510 /* SuS v2 requires we report a read only fs too */
511 if(!res && (mode & S_IWOTH) && IS_RDONLY(nd.dentry->d_inode) 511 if(!res && (mode & S_IWOTH) && IS_RDONLY(nd.dentry->d_inode)
512 && !special_file(nd.dentry->d_inode->i_mode)) 512 && !special_file(nd.dentry->d_inode->i_mode))
@@ -530,7 +530,7 @@ asmlinkage long sys_chdir(const char __user * filename)
530 if (error) 530 if (error)
531 goto out; 531 goto out;
532 532
533 error = permission(nd.dentry->d_inode,MAY_EXEC,&nd); 533 error = vfs_permission(&nd, MAY_EXEC);
534 if (error) 534 if (error)
535 goto dput_and_out; 535 goto dput_and_out;
536 536
@@ -563,7 +563,7 @@ asmlinkage long sys_fchdir(unsigned int fd)
563 if (!S_ISDIR(inode->i_mode)) 563 if (!S_ISDIR(inode->i_mode))
564 goto out_putf; 564 goto out_putf;
565 565
566 error = permission(inode, MAY_EXEC, NULL); 566 error = file_permission(file, MAY_EXEC);
567 if (!error) 567 if (!error)
568 set_fs_pwd(current->fs, mnt, dentry); 568 set_fs_pwd(current->fs, mnt, dentry);
569out_putf: 569out_putf:
@@ -581,7 +581,7 @@ asmlinkage long sys_chroot(const char __user * filename)
581 if (error) 581 if (error)
582 goto out; 582 goto out;
583 583
584 error = permission(nd.dentry->d_inode,MAY_EXEC,&nd); 584 error = vfs_permission(&nd, MAY_EXEC);
585 if (error) 585 if (error)
586 goto dput_and_out; 586 goto dput_and_out;
587 587
diff --git a/fs/pnode.c b/fs/pnode.c
new file mode 100644
index 00000000000..aeeec8ba8dd
--- /dev/null
+++ b/fs/pnode.c
@@ -0,0 +1,305 @@
1/*
2 * linux/fs/pnode.c
3 *
4 * (C) Copyright IBM Corporation 2005.
5 * Released under GPL v2.
6 * Author : Ram Pai (linuxram@us.ibm.com)
7 *
8 */
9#include <linux/namespace.h>
10#include <linux/mount.h>
11#include <linux/fs.h>
12#include "pnode.h"
13
14/* return the next shared peer mount of @p */
15static inline struct vfsmount *next_peer(struct vfsmount *p)
16{
17 return list_entry(p->mnt_share.next, struct vfsmount, mnt_share);
18}
19
20static inline struct vfsmount *first_slave(struct vfsmount *p)
21{
22 return list_entry(p->mnt_slave_list.next, struct vfsmount, mnt_slave);
23}
24
25static inline struct vfsmount *next_slave(struct vfsmount *p)
26{
27 return list_entry(p->mnt_slave.next, struct vfsmount, mnt_slave);
28}
29
30static int do_make_slave(struct vfsmount *mnt)
31{
32 struct vfsmount *peer_mnt = mnt, *master = mnt->mnt_master;
33 struct vfsmount *slave_mnt;
34
35 /*
36 * slave 'mnt' to a peer mount that has the
37 * same root dentry. If none is available than
38 * slave it to anything that is available.
39 */
40 while ((peer_mnt = next_peer(peer_mnt)) != mnt &&
41 peer_mnt->mnt_root != mnt->mnt_root) ;
42
43 if (peer_mnt == mnt) {
44 peer_mnt = next_peer(mnt);
45 if (peer_mnt == mnt)
46 peer_mnt = NULL;
47 }
48 list_del_init(&mnt->mnt_share);
49
50 if (peer_mnt)
51 master = peer_mnt;
52
53 if (master) {
54 list_for_each_entry(slave_mnt, &mnt->mnt_slave_list, mnt_slave)
55 slave_mnt->mnt_master = master;
56 list_del(&mnt->mnt_slave);
57 list_add(&mnt->mnt_slave, &master->mnt_slave_list);
58 list_splice(&mnt->mnt_slave_list, master->mnt_slave_list.prev);
59 INIT_LIST_HEAD(&mnt->mnt_slave_list);
60 } else {
61 struct list_head *p = &mnt->mnt_slave_list;
62 while (!list_empty(p)) {
63 slave_mnt = list_entry(p->next,
64 struct vfsmount, mnt_slave);
65 list_del_init(&slave_mnt->mnt_slave);
66 slave_mnt->mnt_master = NULL;
67 }
68 }
69 mnt->mnt_master = master;
70 CLEAR_MNT_SHARED(mnt);
71 INIT_LIST_HEAD(&mnt->mnt_slave_list);
72 return 0;
73}
74
75void change_mnt_propagation(struct vfsmount *mnt, int type)
76{
77 if (type == MS_SHARED) {
78 set_mnt_shared(mnt);
79 return;
80 }
81 do_make_slave(mnt);
82 if (type != MS_SLAVE) {
83 list_del_init(&mnt->mnt_slave);
84 mnt->mnt_master = NULL;
85 if (type == MS_UNBINDABLE)
86 mnt->mnt_flags |= MNT_UNBINDABLE;
87 }
88}
89
90/*
91 * get the next mount in the propagation tree.
92 * @m: the mount seen last
93 * @origin: the original mount from where the tree walk initiated
94 */
95static struct vfsmount *propagation_next(struct vfsmount *m,
96 struct vfsmount *origin)
97{
98 /* are there any slaves of this mount? */
99 if (!IS_MNT_NEW(m) && !list_empty(&m->mnt_slave_list))
100 return first_slave(m);
101
102 while (1) {
103 struct vfsmount *next;
104 struct vfsmount *master = m->mnt_master;
105
106 if ( master == origin->mnt_master ) {
107 next = next_peer(m);
108 return ((next == origin) ? NULL : next);
109 } else if (m->mnt_slave.next != &master->mnt_slave_list)
110 return next_slave(m);
111
112 /* back at master */
113 m = master;
114 }
115}
116
117/*
118 * return the source mount to be used for cloning
119 *
120 * @dest the current destination mount
121 * @last_dest the last seen destination mount
122 * @last_src the last seen source mount
123 * @type return CL_SLAVE if the new mount has to be
124 * cloned as a slave.
125 */
126static struct vfsmount *get_source(struct vfsmount *dest,
127 struct vfsmount *last_dest,
128 struct vfsmount *last_src,
129 int *type)
130{
131 struct vfsmount *p_last_src = NULL;
132 struct vfsmount *p_last_dest = NULL;
133 *type = CL_PROPAGATION;;
134
135 if (IS_MNT_SHARED(dest))
136 *type |= CL_MAKE_SHARED;
137
138 while (last_dest != dest->mnt_master) {
139 p_last_dest = last_dest;
140 p_last_src = last_src;
141 last_dest = last_dest->mnt_master;
142 last_src = last_src->mnt_master;
143 }
144
145 if (p_last_dest) {
146 do {
147 p_last_dest = next_peer(p_last_dest);
148 } while (IS_MNT_NEW(p_last_dest));
149 }
150
151 if (dest != p_last_dest) {
152 *type |= CL_SLAVE;
153 return last_src;
154 } else
155 return p_last_src;
156}
157
158/*
159 * mount 'source_mnt' under the destination 'dest_mnt' at
160 * dentry 'dest_dentry'. And propagate that mount to
161 * all the peer and slave mounts of 'dest_mnt'.
162 * Link all the new mounts into a propagation tree headed at
163 * source_mnt. Also link all the new mounts using ->mnt_list
164 * headed at source_mnt's ->mnt_list
165 *
166 * @dest_mnt: destination mount.
167 * @dest_dentry: destination dentry.
168 * @source_mnt: source mount.
169 * @tree_list : list of heads of trees to be attached.
170 */
171int propagate_mnt(struct vfsmount *dest_mnt, struct dentry *dest_dentry,
172 struct vfsmount *source_mnt, struct list_head *tree_list)
173{
174 struct vfsmount *m, *child;
175 int ret = 0;
176 struct vfsmount *prev_dest_mnt = dest_mnt;
177 struct vfsmount *prev_src_mnt = source_mnt;
178 LIST_HEAD(tmp_list);
179 LIST_HEAD(umount_list);
180
181 for (m = propagation_next(dest_mnt, dest_mnt); m;
182 m = propagation_next(m, dest_mnt)) {
183 int type;
184 struct vfsmount *source;
185
186 if (IS_MNT_NEW(m))
187 continue;
188
189 source = get_source(m, prev_dest_mnt, prev_src_mnt, &type);
190
191 if (!(child = copy_tree(source, source->mnt_root, type))) {
192 ret = -ENOMEM;
193 list_splice(tree_list, tmp_list.prev);
194 goto out;
195 }
196
197 if (is_subdir(dest_dentry, m->mnt_root)) {
198 mnt_set_mountpoint(m, dest_dentry, child);
199 list_add_tail(&child->mnt_hash, tree_list);
200 } else {
201 /*
202 * This can happen if the parent mount was bind mounted
203 * on some subdirectory of a shared/slave mount.
204 */
205 list_add_tail(&child->mnt_hash, &tmp_list);
206 }
207 prev_dest_mnt = m;
208 prev_src_mnt = child;
209 }
210out:
211 spin_lock(&vfsmount_lock);
212 while (!list_empty(&tmp_list)) {
213 child = list_entry(tmp_list.next, struct vfsmount, mnt_hash);
214 list_del_init(&child->mnt_hash);
215 umount_tree(child, 0, &umount_list);
216 }
217 spin_unlock(&vfsmount_lock);
218 release_mounts(&umount_list);
219 return ret;
220}
221
222/*
223 * return true if the refcount is greater than count
224 */
225static inline int do_refcount_check(struct vfsmount *mnt, int count)
226{
227 int mycount = atomic_read(&mnt->mnt_count);
228 return (mycount > count);
229}
230
231/*
232 * check if the mount 'mnt' can be unmounted successfully.
233 * @mnt: the mount to be checked for unmount
234 * NOTE: unmounting 'mnt' would naturally propagate to all
235 * other mounts its parent propagates to.
236 * Check if any of these mounts that **do not have submounts**
237 * have more references than 'refcnt'. If so return busy.
238 */
239int propagate_mount_busy(struct vfsmount *mnt, int refcnt)
240{
241 struct vfsmount *m, *child;
242 struct vfsmount *parent = mnt->mnt_parent;
243 int ret = 0;
244
245 if (mnt == parent)
246 return do_refcount_check(mnt, refcnt);
247
248 /*
249 * quickly check if the current mount can be unmounted.
250 * If not, we don't have to go checking for all other
251 * mounts
252 */
253 if (!list_empty(&mnt->mnt_mounts) || do_refcount_check(mnt, refcnt))
254 return 1;
255
256 for (m = propagation_next(parent, parent); m;
257 m = propagation_next(m, parent)) {
258 child = __lookup_mnt(m, mnt->mnt_mountpoint, 0);
259 if (child && list_empty(&child->mnt_mounts) &&
260 (ret = do_refcount_check(child, 1)))
261 break;
262 }
263 return ret;
264}
265
266/*
267 * NOTE: unmounting 'mnt' naturally propagates to all other mounts its
268 * parent propagates to.
269 */
270static void __propagate_umount(struct vfsmount *mnt)
271{
272 struct vfsmount *parent = mnt->mnt_parent;
273 struct vfsmount *m;
274
275 BUG_ON(parent == mnt);
276
277 for (m = propagation_next(parent, parent); m;
278 m = propagation_next(m, parent)) {
279
280 struct vfsmount *child = __lookup_mnt(m,
281 mnt->mnt_mountpoint, 0);
282 /*
283 * umount the child only if the child has no
284 * other children
285 */
286 if (child && list_empty(&child->mnt_mounts)) {
287 list_del(&child->mnt_hash);
288 list_add_tail(&child->mnt_hash, &mnt->mnt_hash);
289 }
290 }
291}
292
293/*
294 * collect all mounts that receive propagation from the mount in @list,
295 * and return these additional mounts in the same list.
296 * @list: the list of mounts to be unmounted.
297 */
298int propagate_umount(struct list_head *list)
299{
300 struct vfsmount *mnt;
301
302 list_for_each_entry(mnt, list, mnt_hash)
303 __propagate_umount(mnt);
304 return 0;
305}
diff --git a/fs/pnode.h b/fs/pnode.h
new file mode 100644
index 00000000000..020e1bb60fd
--- /dev/null
+++ b/fs/pnode.h
@@ -0,0 +1,37 @@
1/*
2 * linux/fs/pnode.h
3 *
4 * (C) Copyright IBM Corporation 2005.
5 * Released under GPL v2.
6 *
7 */
8#ifndef _LINUX_PNODE_H
9#define _LINUX_PNODE_H
10
11#include <linux/list.h>
12#include <linux/mount.h>
13
14#define IS_MNT_SHARED(mnt) (mnt->mnt_flags & MNT_SHARED)
15#define IS_MNT_SLAVE(mnt) (mnt->mnt_master)
16#define IS_MNT_NEW(mnt) (!mnt->mnt_namespace)
17#define CLEAR_MNT_SHARED(mnt) (mnt->mnt_flags &= ~MNT_SHARED)
18#define IS_MNT_UNBINDABLE(mnt) (mnt->mnt_flags & MNT_UNBINDABLE)
19
20#define CL_EXPIRE 0x01
21#define CL_SLAVE 0x02
22#define CL_COPY_ALL 0x04
23#define CL_MAKE_SHARED 0x08
24#define CL_PROPAGATION 0x10
25
26static inline void set_mnt_shared(struct vfsmount *mnt)
27{
28 mnt->mnt_flags &= ~MNT_PNODE_MASK;
29 mnt->mnt_flags |= MNT_SHARED;
30}
31
32void change_mnt_propagation(struct vfsmount *, int);
33int propagate_mnt(struct vfsmount *, struct dentry *, struct vfsmount *,
34 struct list_head *);
35int propagate_umount(struct list_head *);
36int propagate_mount_busy(struct vfsmount *, int);
37#endif /* _LINUX_PNODE_H */
diff --git a/fs/proc/base.c b/fs/proc/base.c
index a170450aadb..634355e1698 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -70,6 +70,7 @@
70#include <linux/seccomp.h> 70#include <linux/seccomp.h>
71#include <linux/cpuset.h> 71#include <linux/cpuset.h>
72#include <linux/audit.h> 72#include <linux/audit.h>
73#include <linux/poll.h>
73#include "internal.h" 74#include "internal.h"
74 75
75/* 76/*
@@ -660,26 +661,38 @@ static struct file_operations proc_smaps_operations = {
660#endif 661#endif
661 662
662extern struct seq_operations mounts_op; 663extern struct seq_operations mounts_op;
664struct proc_mounts {
665 struct seq_file m;
666 int event;
667};
668
663static int mounts_open(struct inode *inode, struct file *file) 669static int mounts_open(struct inode *inode, struct file *file)
664{ 670{
665 struct task_struct *task = proc_task(inode); 671 struct task_struct *task = proc_task(inode);
666 int ret = seq_open(file, &mounts_op); 672 struct namespace *namespace;
673 struct proc_mounts *p;
674 int ret = -EINVAL;
667 675
668 if (!ret) { 676 task_lock(task);
669 struct seq_file *m = file->private_data; 677 namespace = task->namespace;
670 struct namespace *namespace; 678 if (namespace)
671 task_lock(task); 679 get_namespace(namespace);
672 namespace = task->namespace; 680 task_unlock(task);
673 if (namespace) 681
674 get_namespace(namespace); 682 if (namespace) {
675 task_unlock(task); 683 ret = -ENOMEM;
676 684 p = kmalloc(sizeof(struct proc_mounts), GFP_KERNEL);
677 if (namespace) 685 if (p) {
678 m->private = namespace; 686 file->private_data = &p->m;
679 else { 687 ret = seq_open(file, &mounts_op);
680 seq_release(inode, file); 688 if (!ret) {
681 ret = -EINVAL; 689 p->m.private = namespace;
690 p->event = namespace->event;
691 return 0;
692 }
693 kfree(p);
682 } 694 }
695 put_namespace(namespace);
683 } 696 }
684 return ret; 697 return ret;
685} 698}
@@ -692,11 +705,30 @@ static int mounts_release(struct inode *inode, struct file *file)
692 return seq_release(inode, file); 705 return seq_release(inode, file);
693} 706}
694 707
708static unsigned mounts_poll(struct file *file, poll_table *wait)
709{
710 struct proc_mounts *p = file->private_data;
711 struct namespace *ns = p->m.private;
712 unsigned res = 0;
713
714 poll_wait(file, &ns->poll, wait);
715
716 spin_lock(&vfsmount_lock);
717 if (p->event != ns->event) {
718 p->event = ns->event;
719 res = POLLERR;
720 }
721 spin_unlock(&vfsmount_lock);
722
723 return res;
724}
725
695static struct file_operations proc_mounts_operations = { 726static struct file_operations proc_mounts_operations = {
696 .open = mounts_open, 727 .open = mounts_open,
697 .read = seq_read, 728 .read = seq_read,
698 .llseek = seq_lseek, 729 .llseek = seq_lseek,
699 .release = mounts_release, 730 .release = mounts_release,
731 .poll = mounts_poll,
700}; 732};
701 733
702#define PROC_BLOCK_SIZE (3*1024) /* 4K page size but our output routines use some slack for overruns */ 734#define PROC_BLOCK_SIZE (3*1024) /* 4K page size but our output routines use some slack for overruns */
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index c20babd6216..7892a865b58 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -251,12 +251,12 @@ static int reiserfs_allocate_blocks_for_region(struct reiserfs_transaction_handl
251 blocks_to_allocate, 251 blocks_to_allocate,
252 blocks_to_allocate); 252 blocks_to_allocate);
253 if (res != CARRY_ON) { 253 if (res != CARRY_ON) {
254 res = -ENOSPC; 254 res = res == QUOTA_EXCEEDED ? -EDQUOT : -ENOSPC;
255 pathrelse(&path); 255 pathrelse(&path);
256 goto error_exit; 256 goto error_exit;
257 } 257 }
258 } else { 258 } else {
259 res = -ENOSPC; 259 res = res == QUOTA_EXCEEDED ? -EDQUOT : -ENOSPC;
260 pathrelse(&path); 260 pathrelse(&path);
261 goto error_exit; 261 goto error_exit;
262 } 262 }
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 38ef913767f..7c40570b71d 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -28,13 +28,17 @@
28 */ 28 */
29int seq_open(struct file *file, struct seq_operations *op) 29int seq_open(struct file *file, struct seq_operations *op)
30{ 30{
31 struct seq_file *p = kmalloc(sizeof(*p), GFP_KERNEL); 31 struct seq_file *p = file->private_data;
32 if (!p) 32
33 return -ENOMEM; 33 if (!p) {
34 p = kmalloc(sizeof(*p), GFP_KERNEL);
35 if (!p)
36 return -ENOMEM;
37 file->private_data = p;
38 }
34 memset(p, 0, sizeof(*p)); 39 memset(p, 0, sizeof(*p));
35 sema_init(&p->sem, 1); 40 sema_init(&p->sem, 1);
36 p->op = op; 41 p->op = op;
37 file->private_data = p;
38 42
39 /* 43 /*
40 * Wrappers around seq_open(e.g. swaps_open) need to be 44 * Wrappers around seq_open(e.g. swaps_open) need to be
diff --git a/fs/super.c b/fs/super.c
index eed6c313290..6689dded3c8 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -171,6 +171,7 @@ void deactivate_super(struct super_block *s)
171 if (atomic_dec_and_lock(&s->s_active, &sb_lock)) { 171 if (atomic_dec_and_lock(&s->s_active, &sb_lock)) {
172 s->s_count -= S_BIAS-1; 172 s->s_count -= S_BIAS-1;
173 spin_unlock(&sb_lock); 173 spin_unlock(&sb_lock);
174 DQUOT_OFF(s);
174 down_write(&s->s_umount); 175 down_write(&s->s_umount);
175 fs->kill_sb(s); 176 fs->kill_sb(s);
176 put_filesystem(fs); 177 put_filesystem(fs);
diff --git a/fs/udf/file.c b/fs/udf/file.c
index bb40d63f328..01f520c71dc 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -186,7 +186,7 @@ int udf_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
186{ 186{
187 int result = -EINVAL; 187 int result = -EINVAL;
188 188
189 if ( permission(inode, MAY_READ, NULL) != 0 ) 189 if ( file_permission(filp, MAY_READ) != 0 )
190 { 190 {
191 udf_debug("no permission to access inode %lu\n", 191 udf_debug("no permission to access inode %lu\n",
192 inode->i_ino); 192 inode->i_ino);
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index 44fed10af0d..d8e21ba0ccc 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -72,7 +72,6 @@
72#include <linux/init.h> 72#include <linux/init.h>
73#include <linux/list.h> 73#include <linux/list.h>
74#include <linux/proc_fs.h> 74#include <linux/proc_fs.h>
75#include <linux/version.h>
76#include <linux/sort.h> 75#include <linux/sort.h>
77 76
78#include <asm/page.h> 77#include <asm/page.h>
diff --git a/fs/xfs/xfs.h b/fs/xfs/xfs.h
index 99b50d2bda9..1a48dbb902a 100644
--- a/fs/xfs/xfs.h
+++ b/fs/xfs/xfs.h
@@ -17,12 +17,5 @@
17 */ 17 */
18#ifndef __XFS_H__ 18#ifndef __XFS_H__
19#define __XFS_H__ 19#define __XFS_H__
20
21#include <linux/version.h>
22#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
23#include <linux-2.6/xfs_linux.h> 20#include <linux-2.6/xfs_linux.h>
24#else
25#include <linux-2.4/xfs_linux.h>
26#endif
27
28#endif /* __XFS_H__ */ 21#endif /* __XFS_H__ */
diff --git a/fs/xfs/xfs_dmapi.h b/fs/xfs/xfs_dmapi.h
index 5a5c7a63e80..864bf695568 100644
--- a/fs/xfs/xfs_dmapi.h
+++ b/fs/xfs/xfs_dmapi.h
@@ -18,6 +18,7 @@
18#ifndef __XFS_DMAPI_H__ 18#ifndef __XFS_DMAPI_H__
19#define __XFS_DMAPI_H__ 19#define __XFS_DMAPI_H__
20 20
21#include <linux/version.h>
21/* Values used to define the on-disk version of dm_attrname_t. All 22/* Values used to define the on-disk version of dm_attrname_t. All
22 * on-disk attribute names start with the 8-byte string "SGI_DMI_". 23 * on-disk attribute names start with the 8-byte string "SGI_DMI_".
23 * 24 *