/* * linux/fs/namespace.c * * (C) Copyright Al Viro 2000, 2001 * Released under GPL v2. * * Based on code from fs/super.c, copyright Linus Torvalds and others. * Heavily rewritten. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "pnode.h" /* spinlock for vfsmount related operations, inplace of dcache_lock */ __cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock); static int event; static struct list_head *mount_hashtable __read_mostly; static int hash_mask __read_mostly, hash_bits __read_mostly; static struct kmem_cache *mnt_cache __read_mostly; static struct rw_semaphore namespace_sem; /* /sys/fs */ decl_subsys(fs, NULL, NULL); EXPORT_SYMBOL_GPL(fs_subsys); static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry) { unsigned long tmp = ((unsigned long)mnt / L1_CACHE_BYTES); tmp += ((unsigned long)dentry / L1_CACHE_BYTES); tmp = tmp + (tmp >> hash_bits); return tmp & hash_mask; } struct vfsmount *alloc_vfsmnt(const char *name) { struct vfsmount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL); if (mnt) { atomic_set(&mnt->mnt_count, 1); INIT_LIST_HEAD(&mnt->mnt_hash); INIT_LIST_HEAD(&mnt->mnt_child); INIT_LIST_HEAD(&mnt->mnt_mounts); INIT_LIST_HEAD(&mnt->mnt_list); INIT_LIST_HEAD(&mnt->mnt_expire); INIT_LIST_HEAD(&mnt->mnt_share); INIT_LIST_HEAD(&mnt->mnt_slave_list); INIT_LIST_HEAD(&mnt->mnt_slave); if (name) { int size = strlen(name) + 1; char *newname = kmalloc(size, GFP_KERNEL); if (newname) { memcpy(newname, name, size); mnt->mnt_devname = newname; } } } return mnt; } int simple_set_mnt(struct vfsmount *mnt, struct super_block *sb) { mnt->mnt_sb = sb; mnt->mnt_root = dget(sb->s_root); return 0; } EXPORT_SYMBOL(simple_set_mnt); void free_vfsmnt(struct vfsmount *mnt) { kfree(mnt->mnt_devname); kmem_cache_free(mnt_cache, mnt); } /* * find the first or last mount at @dentry on vfsmount @mnt depending on * @dir. If @dir is set return the first mount else return the last mount. */ struct vfsmount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry, int dir) { struct list_head *head = mount_hashtable + hash(mnt, dentry); struct list_head *tmp = head; struct vfsmount *p, *found = NULL; for (;;) { tmp = dir ? tmp->next : tmp->prev; p = NULL; if (tmp == head) break; p = list_entry(tmp, struct vfsmount, mnt_hash); if (p->mnt_parent == mnt && p->mnt_mountpoint == dentry) { found = p; break; } } return found; } /* * lookup_mnt increments the ref count before returning * the vfsmount struct. */ struct vfsmount *lookup_mnt(struct vfsmount *mnt, struct dentry *dentry) { struct vfsmount *child_mnt; spin_lock(&vfsmount_lock); if ((child_mnt = __lookup_mnt(mnt, dentry, 1))) mntget(child_mnt); spin_unlock(&vfsmount_lock); return child_mnt; } static inline int check_mnt(struct vfsmount *mnt) { return mnt->mnt_ns == current->nsproxy->mnt_ns; } static void touch_mnt_namespace(struct mnt_namespace *ns) { if (ns) { ns->event = ++event; wake_up_interruptible(&ns->poll); } } static void __touch_mnt_namespace(struct mnt_namespace *ns) { if (ns && ns->event != event) { ns->event = event; wake_up_interruptible(&ns->poll); } } static void detach_mnt(struct vfsmount *mnt, struct nameidata *old_nd) { old_nd->dentry = mnt->mnt_mountpoint; old_nd->mnt = mnt->mnt_parent; mnt->mnt_parent = mnt; mnt->mnt_mountpoint = mnt->mnt_root; list_del_init(&mnt->mnt_child); list_del_init(&mnt->mnt_hash); old_nd->dentry->d_mounted--; } void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry, struct vfsmount *child_mnt) { child_mnt->mnt_parent = mntget(mnt); child_mnt->mnt_mountpoint = dget(dentry); dentry->d_mounted++; } static void attach_mnt(struct vfsmount *mnt, struct nameidata *nd) { mnt_set_mountpoint(nd->mnt, nd->dentry, mnt); list_add_tail(&mnt->mnt_hash, mount_hashtable + hash(nd->mnt, nd->dentry)); list_add_tail(&mnt->mnt_child, &nd->mnt->mnt_mounts); } /* * the caller must hold vfsmount_lock */ static void commit_tree(struct vfsmount *mnt) { struct vfsmount *parent = mnt->mnt_parent; struct vfsmount *m; LIST_HEAD(head); struct mnt_namespace *n = parent->mnt_ns; BUG_ON(parent == mnt); list_add_tail(&head, &mnt->mnt_list); list_for_each_entry(m, &head, mnt_list) m->mnt_ns = n; list_splice(&head, n->list.prev); list_add_tail(&mnt->mnt_hash, mount_hashtable + hash(parent, mnt->mnt_mountpoint)); list_add_tail(&mnt->mnt_child, &parent->mnt_mounts); touch_mnt_namespace(n); } static struct vfsmount *next_mnt(struct vfsmount *p, struct vfsmount *root) { struct list_head *next = p->mnt_mounts.next; if (next == &p->mnt_mounts) { while (1) { if (p == root) return NULL; next = p->mnt_child.next; if (next != &p->mnt_parent->mnt_mounts) break; p = p->mnt_parent; } } return list_entry(next, struct vfsmount, mnt_child); } static struct vfsmount *skip_mnt_tree(struct vfsmount *p) { struct list_head *prev = p->mnt_mounts.prev; while (prev != &p->mnt_mounts) { p = list_entry(prev, struct vfsmount, mnt_child); prev = p->mnt_mounts.prev; } return p; } static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root, int flag) { struct super_block *sb = old->mnt_sb; struct vfsmount *mnt = alloc_vfsmnt(old->mnt_devname); if (mnt) { mnt->mnt_flags = old->mnt_flags; atomic_inc(&sb->s_active); mnt->mnt_sb = sb; mnt->mnt_root = dget(root); mnt->mnt_mountpoint = mnt->mnt_root; mnt->mnt_parent = mnt; if (flag & CL_SLAVE) { list_add(&mnt->mnt_slave, &old->mnt_slave_list); mnt->mnt_master = old; CLEAR_MNT_SHARED(mnt); } else { if ((flag & CL_PROPAGATION) || IS_MNT_SHARED(old)) list_add(&mnt->mnt_share, &old->mnt_share); if (IS_MNT_SLAVE(old)) list_add(&mnt->mnt_slave, &old->mnt_slave); mnt->mnt_master = old->mnt_master; } if (flag & CL_MAKE_SHARED) set_mnt_shared(mnt); /* stick the duplicate mount on the same expiry list * as the original if that was on one */ if (flag & CL_EXPIRE) { spin_lock(&vfsmount_lock); if (!list_empty(&old->mnt_expire)) list_add(&mnt->mnt_expire, &old->mnt_expire); spin_unlock(&vfsmount_lock); } } return mnt; } static inline void __mntput(struct vfsmount *mnt) { struct super_block *sb = mnt->mnt_sb; dput(mnt->mnt_root); free_vfsmnt(mnt); deactivate_super(sb); } void mntput_no_expire(struct vfsmount *mnt) { repeat: if (atomic_dec_and_lock(&mnt->mnt_count, &vfsmount_lock)) { if (likely(!mnt->mnt_pinned)) { spin_unlock(&vfsmount_lock); __mntput(mnt); return; } atomic_add(mnt->mnt_pinned + 1, &mnt->mnt_count); mnt->mnt_pinned = 0; spin_unlock(&vfsmount_lock); acct_auto_close_mnt(mnt); security_sb_umount_close(mnt); goto repeat; } } EXPORT_SYMBOL(mntput_no_expire); void mnt_pin(struct vfsmount *mnt) { spin_lock(&vfsmount_lock); mnt->mnt_pinned++; spin_unlock(&vfsmount_lock); } EXPORT_SYMBOL(mnt_pin); void mnt_unpin(struct vfsmount *mnt) { spin_lock(&vfsmount_lock); if (mnt->mnt_pinned) { atomic_inc(&mnt->mnt_count); mnt->mnt_pinned--; } spin_unlock(&vfsmount_lock); } EXPORT_SYMBOL(mnt_unpin); /* iterator */ static void *m_start(struct seq_file *m, loff_t *pos) { struct mnt_namespace *n = m->private; struct list_head *p; loff_t l = *pos; down_read(&namespace_sem); list_for_each(p, &n->list) if (!l--) return list_entry(p, struct vfsmount, mnt_list); return NULL; } static void *m_next(struct seq_file *m, void *v, loff_t *pos) { struct mnt_namespace *n = m->private; struct list_head *p = ((struct vfsmount *)v)->mnt_list.next; (*pos)++; return p == &n->list ? NULL : list_entry(p, struct vfsmount, mnt_list); } static void m_stop(struct seq_file *m, void *v) { up_read(&namespace_sem); } static inline void mangle(struct seq_file *m, const char *s) { seq_escape(m, s, " \t\n\\"); } static int show_vfsmnt(struct seq_file *m, void *v) { struct vfsmount *mnt = v; int err = 0; static struct proc_fs_info { int flag; char *str; } fs_info[] = { { MS_SYNCHRONOUS, ",sync" }, { MS_DIRSYNC, ",dirsync" }, { MS_MANDLOCK, ",mand" }, { 0, NULL } }; static struct proc_fs_info mnt_info[] = { { MNT_NOSUID, ",nosuid" }, { MNT_NODEV, ",nodev" }, { MNT_NOEXEC, ",noexec" }, { MNT_NOATIME, ",noatime" }, { MNT_NODIRATIME, ",nodiratime" }, { MNT_RELATIME, ",relatime" }, { 0, NULL } }; struct proc_fs_info *fs_infop; mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none"); seq_putc(m, ' '); seq_path(m, mnt, mnt->mnt_root, " \t\n\\"); seq_putc(m, ' '); mangle(m, mnt->mnt_sb->s_type->name); seq_puts(m, mnt->mnt_sb->s_flags & MS_RDONLY ? " ro" : " rw"); for (fs_infop = fs_info; fs_infop->flag; fs_infop++) { if (mnt->mnt_sb->s_flags & fs_infop->flag) seq_puts(m, fs_infop->str); } for (fs_infop = mnt_info; fs_infop->flag; fs_infop++) { if (mnt->mnt_flags & fs_infop->flag) seq_puts(m, fs_infop->str); } if (mnt->mnt_sb->s_op->show_options) err = mnt->mnt_sb->s_op->show_options(m, mnt); seq_puts(m, " 0 0\n"); return err; } struct seq_operations mounts_op = { .start = m_start, .next = m_next, .stop = m_stop, .show = show_vfsmnt }; static int show_vfsstat(struct seq_file *m, void *v) { struct vfsmount *mnt = v; int err = 0; /* device */ if (mnt->mnt_devname) { seq_puts(m, "device "); mangle(m, mnt->mnt_devname); } else seq_puts(m, "no device"); /* mount point */ seq_puts(m, " mounted on "); seq_path(m, mnt, mnt->mnt_root, " \t\n\\"); seq_putc(m, ' '); /* file system type */ seq_puts(m, "with fstype "); mangle(m, mnt->mnt_sb->s_type->name); /* optional statistics */ if (mnt->mnt_sb->s_op->show_stats) { seq_putc(m, ' '); err = mnt->mnt_sb->s_op->show_stats(m, mnt); } seq_putc(m, '\n'); return err; } struct seq_operations mountstats_op = { .start = m_start, .next = m_next, .stop = m_stop, .show = show_vfsstat, }; /** * may_umount_tree - check if a mount tree is busy * @mnt: root of mount tree * * This is called to check if a tree of mounts has any * open files, pwds, chroots or sub mounts that are * busy. */ int may_umount_tree(struct vfsmount *mnt) { int actual_refs = 0; int minimum_refs = 0; struct vfsmount *p; spin_lock(&vfsmount_lock); for (p = mnt; p; p = next_mnt(p, mnt)) { actual_refs += atomic_read(&p->mnt_count); minimum_refs += 2; } spin_unlock(&vfsmount_lock); if (actual_refs > minimum_refs) return 0; return 1; } EXPORT_SYMBOL(may_umount_tree); /** * may_umount - check if a mount point is busy * @mnt: root of mount * * This is called to check if a mount point has any * open files, pwds, chroots or sub mounts. If the * mount has sub mounts this will return busy * regardless of whether the sub mounts are busy. * * Doesn't take quota and stuff into account. IOW, in some cases it will * give false negatives. The main reason why it's here is that we need * a non-destructive way to look for easily umountable filesystems. */ int may_umount(struct vfsmount *mnt) { int ret = 1; spin_lock(&vfsmount_lock); if (propagate_mount_busy(mnt, 2)) ret = 0; spin_unlock(&vfsmount_lock); return ret; } EXPORT_SYMBOL(may_umount); void release_mounts(struct list_head *head) { struct vfsmount *mnt; while (!list_empty(head)) { mnt = list_entry(head->next, struct vfsmount, mnt_hash); list_del_init(&mnt->mnt_hash); if (mnt->mnt_parent != mnt) { struct dentry *dentry; struct vfsmount *m; spin_lock(&vfsmount_lock); dentry = mnt->mnt_mountpoint; m = mnt->mnt_parent; mnt->mnt_mountpoint = mnt->mnt_root; mnt->mnt_parent = mnt; spin_unlock(&vfsmount_lock); dput(dentry); mntput(m); } mntput(mnt); } } void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill) { struct vfsmount *p; for (p = mnt; p; p = next_mnt(p, mnt)) list_move(&p->mnt_hash, kill); if (propagate) propagate_umount(kill); list_for_each_entry(p, kill, mnt_hash) { list_del_init(&p->mnt_expire); list_del_init(&p->mnt_list); __touch_mnt_namespace(p->mnt_ns); p->mnt_ns = NULL; list_del_init(&p->mnt_child); if (p->mnt_parent != p) p->mnt_mountpoint->d_mounted--; change_mnt_propagation(p, MS_PRIVATE); } } static int do_umount(struct vfsmount *mnt, int flags) { struct super_block *sb = mnt->mnt_sb; int retval; LIST_HEAD(umount_list); retval = security_sb_umount(mnt, flags); if (retval) return retval; /* * Allow userspace to request a mountpoint be expired rather than * unmounting unconditionally. Unmount only happens if: * (1) the mark is already set (the mark is cleared by mntput()) * (2) the usage count == 1 [parent vfsmount] + 1 [sys_umount] */ if (flags & MNT_EXPIRE) { if (mnt == current->fs->rootmnt || flags & (MNT_FORCE | MNT_DETACH)) return -EINVAL; if (atomic_read(&mnt->mnt_count) != 2) return -EBUSY; if (!xchg(&mnt->mnt_expiry_mark, 1)) return -EAGAIN; } /* * If we may have to abort operations to get out of this * mount, and they will themselves hold resources we must * allow the fs to do things. In the Unix tradition of * 'Gee thats tricky lets do it in userspace' the umount_begin * might fail to complete on the first run through as other tasks * must return, and the like. Thats for the mount program to worry * about for the moment. */ lock_kernel(); if (sb->s_op->umount_begin) sb->s_op->umount_begin(mnt, flags); unlock_kernel(); /* * No sense to grab the lock for this test, but test itself looks * somewhat bogus. Suggestions for better replacement? * Ho-hum... In principle, we might treat that as umount + switch * to rootfs. GC would eventually take care of the old vfsmount. * Actually it makes sense, especially if rootfs would contain a * /reboot - static binary that would close all descriptors and * call reboot(9). Then init(8) could umount root and exec /reboot. */ if (mnt == current->fs->rootmnt && !(flags & MNT_DETACH)) { /* * Special case for "unmounting" root ... * we just try to remount it readonly. */ down_write(&sb->s_umount); if (!(sb->s_flags & MS_RDONLY)) { lock_kernel(); DQUOT_OFF(sb); retval = do_remount_sb(sb, MS_RDONLY, NULL, 0); unlock_kernel(); } up_write(&sb->s_umount); return retval; } down_write(&namespace_sem); spin_lock(&vfsmount_lock); event++; retval = -EBUSY; if (flags & MNT_DETACH || !propagate_mount_busy(mnt, 2)) { if (!list_empty(&mnt->mnt_list)) umount_tree(mnt, 1, &umount_list); retval = 0; } spin_unlock(&vfsmount_lock); if (retval) security_sb_umount_busy(mnt); up_write(&namespace_sem); release_mounts(&umount_list); return retval; } /* * Now umount can handle mount points as well as block devices. * This is important for filesystems which use unnamed block devices. * * We now support a flag for forced unmount like the other 'big iron' * unixes. Our API is identical to OSF/1 to avoid making a mess of AMD */ asmlinkage long sys_umount(char __user * name, int flags) { struct nameidata nd; int retval; retval = __user_walk(name, LOOKUP_FOLLOW, &nd); if (retval) goto out; retval = -EINVAL; if (nd.dentry != nd.mnt->mnt_root) goto dput_and_out; if (!check_mnt(nd.mnt)) goto dput_and_out; retval = -EPERM; if (!capable(CAP_SYS_ADMIN)) goto dput_and_out; retval = do_umount(nd.mnt, flags); dput_and_out: path_release_on_umount(&nd); out: return retval; } #ifdef __ARCH_WANT_SYS_OLDUMOUNT /* * The 2.0 compatible umount. No flags. */ asmlinkage long sys_oldumount(char __user * name) { return sys_umount(name, 0); } #endif static int mount_is_safe(struct nameidata *nd) { if (capable(CAP_SYS_ADMIN)) return 0; return -EPERM; #ifdef notyet if (S_ISLNK(nd->dentry->d_inode->i_mode)) return -EPERM; if (nd->dentry->d_inode->i_mode & S_ISVTX) { if (current->uid != nd->dentry->d_inode->i_uid) return -EPERM; } if (vfs_permission(nd, MAY_WRITE)) return -EPERM; return 0; #endif } static int lives_below_in_same_fs(struct dentry *d, struct dentry *dentry) { while (1) { if (d == dentry) return 1; if (d == NULL || d == d->d_parent) return 0; d = d->d_parent; } } struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry, int flag) { struct vfsmount *res, *p, *q, *r, *s; struct nameidata nd; if (!(flag & CL_COPY_ALL) && IS_MNT_UNBINDABLE(mnt)) return NULL; res = q = clone_mnt(mnt, dentry, flag); if (!q) goto Enomem; q->mnt_mountpoint = mnt->mnt_mountpoint; p = mnt; list_for_each_entry(r, &mnt->mnt_mounts, mnt_child) { if (!lives_below_in_same_fs(r->mnt_mountpoint, dentry)) continue; for (s = r; s; s = next_mnt(s, r)) { if (!(flag & CL_COPY_ALL) && IS_MNT_UNBINDABLE(s)) { s = skip_mnt_tree(s); continue; } while (p != s->mnt_parent) { p = p->mnt_parent; q = q->mnt_parent; } p = s; nd.mnt = q; nd.dentry = p->mnt_mountpoint; q = clone_mnt(p, p->mnt_root, flag); if (!q) goto Enomem; spin_lock(&vfsmount_lock); list_add_tail(&q->mnt_list, &res->mnt_list); attach_mnt(q, &nd); spin_unlock(&vfsmount_lock); } } return res; Enomem: if (res) { LIST_HEAD(umount_list); spin_lock(&vfsmount_lock); umount_tree(res, 0, &umount_list); spin_unlock(&vfsmount_lock); release_mounts(&umount_list); } return NULL; } /* * @source_mnt : mount tree to be attached * @nd : place the mount tree @source_mnt is attached * @parent_nd : if non-null, detach the source_mnt from its parent and * store the parent mount and mountpoint dentry. * (done when source_mnt is moved) * * NOTE: in the table below explains the semantics when a source mount * of a given type is attached to a destination mount of a given type. * --------------------------------------------------------------------------- * | BIND MOUNT OPERATION | * |************************************************************************** * | source-->| shared | private | slave | unbindable | * | dest | | | | | * | | | | | | | * | v | | | | | * |************************************************************************** * | shared | shared (++) | shared (+) | shared(+++)| invalid | * | | | | | | * |non-shared| shared (+) | private | slave (*) | invalid | * *************************************************************************** * A bind operation clones the source mount and mounts the clone on the * destination mount. * * (++) the cloned mount is propagated to all the mounts in the propagation * tree of the destination mount and the cloned mount is added to * the peer group of the source mount. * (+) the cloned mount is created under the destination mount and is marked * as shared. The cloned mount is added to the peer group of the source * mount. * (+++) the mount is propagated to all the mounts in the propagation tree * of the destination mount and the cloned mount is made slave * of the same master as that of the source mount. The cloned mount * is marked as 'shared and slave'. * (*) the cloned mount is made a slave of the same master as that of the * source mount. * * --------------------------------------------------------------------------- * | MOVE MOUNT OPERATION | * |************************************************************************** * | source-->| shared | private | slave | unbindable | * | dest | | | | | * | | | | | | | * | v | | | | | * |************************************************************************** * | shared | shared (+) | shared (+) | shared(+++) | invalid | * | | | | | | * |non-shared| shared (+*) | private | slave (*) | unbindable | * *************************************************************************** * * (+) the mount is moved to the destination. And is then propagated to * all the mounts in the propagation tree of the destination mount. * (+*) the mount is moved to the destination. * (+++) the mount is moved to the destination and is then propagated to * all the mounts belonging to the destination mount's propagation tree. * the mount is marked as 'shared and slave'. * (*) the mount continues to be a slave at the new location. * * if the source mount is a tree, the operations explained above is * applied to each mount in the tree. * Must be called without spinlocks held, since this function can sleep * in allocations. */ static int attach_recursive_mnt(struct vfsmount *source_mnt, struct nameidata *nd, struct nameidata *parent_nd) { LIST_HEAD(tree_list); struct vfsmount *dest_mnt = nd->mnt; struct dentry *dest_dentry = nd->dentry; struct vfsmount *child, *p; if (propagate_mnt(dest_mnt, dest_dentry, source_mnt, &tree_list)) return -EINVAL; if (IS_MNT_SHARED(dest_mnt)) { for (p = source_mnt; p; p = next_mnt(p, source_mnt)) set_mnt_shared(p); } spin_lock(&vfsmount_lock); if (parent_nd) { detach_mnt(source_mnt, parent_nd); attach_mnt(source_mnt, nd); touch_mnt_namespace(current->nsproxy->mnt_ns); } else { mnt_set_mountpoint(dest_mnt, dest_dentry, source_mnt); commit_tree(source_mnt); } list_for_each_entry_safe(child, p, &tree_list, mnt_hash) { list_del_init(&child->mnt_hash); commit_tree(child); } spin_unlock(&vfsmount_lock); return 0; } static int graft_tree(struct vfsmount *mnt, struct nameidata *nd) { int err; if (mnt->mnt_sb->s_flags & MS_NOUSER) return -EINVAL; if (S_ISDIR(nd->dentry->d_inode->i_mode) != S_ISDIR(mnt->mnt_root->d_inode->i_mode)) return -ENOTDIR; err = -ENOENT; mutex_lock(&nd->dentry->d_inode->i_mutex); if (IS_DEADDIR(nd->dentry->d_inode)) goto out_unlock; err = security_sb_check_sb(mnt, nd); if (err) goto out_unlock; err = -ENOENT; if (IS_ROOT(nd->dentry) || !d_unhashed(nd->dentry)) err = attach_recursive_mnt(mnt, nd, NULL); out_unlock: mutex_unlock(&nd->dentry->d_inode->i_mutex); if (!err) security_sb_post_addmount(mnt, nd); return err; } /* * recursively change the type of the mountpoint. */ static int do_change_type(struct nameidata *nd, int flag) { struct vfsmount *m, *mnt = nd->mnt; int recurse = flag & MS_REC; int type = flag & ~MS_REC; if (nd->dentry != nd->mnt->mnt_root) return -EINVAL; down_write(&namespace_sem); spin_lock(&vfsmount_lock); for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL)) change_mnt_propagation(m, type); spin_unlock(&vfsmount_lock); up_write(&namespace_sem); return 0; } /* * do loopback mount. */ static int do_loopback(struct nameidata *nd, char *old_name, int recurse) { struct nameidata old_nd; struct vfsmount *mnt = NULL; int err = mount_is_safe(nd); if (err) return err; if (!old_name || !*old_name) return -EINVAL; err = path_lookup(old_name, LOOKUP_FOLLOW, &old_nd); if (err) return err; down_write(&namespace_sem); err = -EINVAL; if (IS_MNT_UNBINDABLE(old_nd.mnt)) goto out; if (!check_mnt(nd->mnt) || !check_mnt(old_nd.mnt)) goto out; err = -ENOMEM; if (recurse) mnt = copy_tree(old_nd.mnt, old_nd.dentry, 0); else mnt = clone_mnt(old_nd.mnt, old_nd.dentry, 0); if (!mnt) goto out; err = graft_tree(mnt, nd); if (err) { LIST_HEAD(umount_list); spin_lock(&vfsmount_lock); umount_tree(mnt, 0, &umount_list); spin_unlock(&vfsmount_lock); release_mounts(&umount_list); } out: up_write(&namespace_sem); path_release(&old_nd); return err; } /* * change f/* * drivers/base/node.c - basic Node class support */ #include <linux/sysdev.h> #include <linux/module.h> #include <linux/init.h> #include <linux/mm.h> #include <linux/memory.h> #include <linux/node.h> #include <linux/hugetlb.h> #include <linux/cpumask.h> #include <linux/topology.h> #include <linux/nodemask.h> #include <linux/cpu.h> #include <linux/device.h> #include <linux/swap.h> #include <linux/gfp.h> static struct sysdev_class_attribute *node_state_attrs[]; static struct sysdev_class node_class = { .name = "node", .attrs = node_state_attrs, }; static ssize_t node_read_cpumap(struct sys_device *dev, int type, char *buf) { struct node *node_dev = to_node(dev); const struct cpumask *mask = cpumask_of_node(node_dev->sysdev.id); int len; /* 2008/04/07: buf currently PAGE_SIZE, need 9 chars per 32 bits. */ BUILD_BUG_ON((NR_CPUS/32 * 9) > (PAGE_SIZE-1)); len = type? cpulist_scnprintf(buf, PAGE_SIZE-2, mask) : cpumask_scnprintf(buf, PAGE_SIZE-2, mask); buf[len++] = '\n'; buf[len] = '\0'; return len; } static inline ssize_t node_read_cpumask(struct sys_device *dev, struct sysdev_attribute *attr, char *buf) { return node_read_cpumap(dev, 0, buf); } static inline ssize_t node_read_cpulist(struct sys_device *dev, struct sysdev_attribute *attr, char *buf) { return node_read_cpumap(dev, 1, buf); } static SYSDEV_ATTR(cpumap, S_IRUGO, node_read_cpumask, NULL); static SYSDEV_ATTR(cpulist, S_IRUGO, node_read_cpulist, NULL); #define K(x) ((x) << (PAGE_SHIFT - 10)) static ssize_t node_read_meminfo(struct sys_device * dev, struct sysdev_attribute *attr, char * buf) { int n; int nid = dev->id; struct sysinfo i; si_meminfo_node(&i, nid); n = sprintf(buf, "\n" "Node %d MemTotal: %8lu kB\n" "Node %d MemFree: %8lu kB\n" "Node %d MemUsed: %8lu kB\n" "Node %d Active: %8lu kB\n" "Node %d Inactive: %8lu kB\n" "Node %d Active(anon): %8lu kB\n" "Node %d Inactive(anon): %8lu kB\n" "Node %d Active(file): %8lu kB\n" "Node %d Inactive(file): %8lu kB\n" "Node %d Unevictable: %8lu kB\n" "Node %d Mlocked: %8lu kB\n" #ifdef CONFIG_HIGHMEM "Node %d HighTotal: %8lu kB\n" "Node %d HighFree: %8lu kB\n" "Node %d LowTotal: %8lu kB\n" "Node %d LowFree: %8lu kB\n" #endif "Node %d Dirty: %8lu kB\n" "Node %d Writeback: %8lu kB\n" "Node %d FilePages: %8lu kB\n" "Node %d Mapped: %8lu kB\n" "Node %d AnonPages: %8lu kB\n" "Node %d Shmem: %8lu kB\n" "Node %d KernelStack: %8lu kB\n" "Node %d PageTables: %8lu kB\n" "Node %d NFS_Unstable: %8lu kB\n" "Node %d Bounce: %8lu kB\n" "Node %d WritebackTmp: %8lu kB\n" "Node %d Slab: %8lu kB\n" "Node %d SReclaimable: %8lu kB\n" "Node %d SUnreclaim: %8lu kB\n", nid, K(i.totalram), nid, K(i.freeram), nid, K(i.totalram - i.freeram), nid, K(node_page_state(nid, NR_ACTIVE_ANON) + node_page_state(nid, NR_ACTIVE_FILE)), nid, K(node_page_state(nid, NR_INACTIVE_ANON) + node_page_state(nid, NR_INACTIVE_FILE)), nid, K(node_page_state(nid, NR_ACTIVE_ANON)), nid, K(node_page_state(nid, NR_INACTIVE_ANON)), nid, K(node_page_state(nid, NR_ACTIVE_FILE)), nid, K(node_page_state(nid, NR_INACTIVE_FILE)), nid, K(node_page_state(nid, NR_UNEVICTABLE)), nid, K(node_page_state(nid, NR_MLOCK)), #ifdef CONFIG_HIGHMEM nid, K(i.totalhigh), nid, K(i.freehigh), nid, K(i.totalram - i.totalhigh), nid, K(i.freeram - i.freehigh), #endif nid, K(node_page_state(nid, NR_FILE_DIRTY)), nid, K(node_page_state(nid, NR_WRITEBACK)), nid, K(node_page_state(nid, NR_FILE_PAGES)), nid, K(node_page_state(nid, NR_FILE_MAPPED)), nid, K(node_page_state(nid, NR_ANON_PAGES)), nid, K(node_page_state(nid, NR_SHMEM)), nid, node_page_state(nid, NR_KERNEL_STACK) * THREAD_SIZE / 1024, nid, K(node_page_state(nid, NR_PAGETABLE)), nid, K(node_page_state(nid, NR_UNSTABLE_NFS)), nid, K(node_page_state(nid, NR_BOUNCE)), nid, K(node_page_state(nid, NR_WRITEBACK_TEMP)), nid, K(node_page_state(nid, NR_SLAB_RECLAIMABLE) + node_page_state(nid, NR_SLAB_UNRECLAIMABLE)), nid, K(node_page_state(nid, NR_SLAB_RECLAIMABLE)), nid, K(node_page_state(nid, NR_SLAB_UNRECLAIMABLE))); n += hugetlb_report_node_meminfo(nid, buf + n); return n; } #undef K static SYSDEV_ATTR(meminfo, S_IRUGO, node_read_meminfo, NULL); static ssize_t node_read_numastat(struct sys_device * dev, struct sysdev_attribute *attr, char * buf) { return sprintf(buf, "numa_hit %lu\n" "numa_miss %lu\n" "numa_foreign %lu\n" "interleave_hit %lu\n" "local_node %lu\n" "other_node %lu\n", node_page_state(dev->id, NUMA_HIT), node_page_state(dev->id, NUMA_MISS), node_page_state(dev->id, NUMA_FOREIGN), node_page_state(dev->id, NUMA_INTERLEAVE_HIT), node_page_state(dev->id, NUMA_LOCAL), node_page_state(dev->id, NUMA_OTHER)); } static SYSDEV_ATTR(numastat, S_IRUGO, node_read_numastat, NULL); static ssize_t node_read_distance(struct sys_device * dev, struct sysdev_attribute *attr, char * buf) { int nid = dev->id; int len = 0; int i; /* * buf is currently PAGE_SIZE in length and each node needs 4 chars * at the most (distance + space or newline). */