27 files changed, 3140 insertions, 2478 deletions
diff --git a/Documentation/kobject.txt b/Documentation/kobject.txt
index c5182bb2c16c..f87241dfed87 100644
--- a/Documentation/kobject.txt
+++ b/Documentation/kobject.txt
@@ -342,7 +342,10 @@ kset use:
 When you are finished with the kset, call:
  void kset_unregister(struct kset *kset);
-to destroy it.
+to destroy it.  This removes the kset from sysfs and decrements its reference
+count.  When the reference count goes to zero, the kset will be released.
+Because other references to the kset may still exist, the release may happen
+after kset_unregister() returns.
 An example of using a kset can be seen in the
 samples/kobject/kset-example.c file in the kernel tree.
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index c3d4cc972eca..22b3a1191ab3 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -430,7 +430,7 @@ static enum ucode_state request_microcode_amd(int cpu, struct device *device,
        if (c->x86 >= 0x15)
                snprintf(fw_name, sizeof(fw_name), "amd-ucode/microcode_amd_fam%.2xh.bin", c->x86);
-        if (request_firmware(&fw, (const char *)fw_name, device)) {
+        if (request_firmware_direct(&fw, (const char *)fw_name, device)) {
                pr_debug("failed to load file %s\n", fw_name);
                goto out;
        }
diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c
index 5fb2cebf556b..a276fa75d9b5 100644
--- a/arch/x86/kernel/microcode_intel.c
+++ b/arch/x86/kernel/microcode_intel.c
@@ -278,7 +278,7 @@ static enum ucode_state request_microcode_fw(int cpu, struct device *device,
        sprintf(name, "intel-ucode/%02x-%02x-%02x",
                c->x86, c->x86_model, c->x86_mask);
-        if (request_firmware(&firmware, name, device)) {
+        if (request_firmware_direct(&firmware, name, device)) {
                pr_debug("data file %s load failed\n", name);
                return UCODE_NFOUND;
        }
diff --git a/drivers/base/core.c b/drivers/base/core.c
index 67b180d855b2..aab43fbb8336 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -1603,6 +1603,7 @@ device_create_groups_vargs(struct class *class, struct device *parent,
                goto error;
        }
+        device_initialize(dev);
        dev->devt = devt;
        dev->class = class;
        dev->parent = parent;
@@ -1614,7 +1615,7 @@ device_create_groups_vargs(struct class *class, struct device *parent,
        if (retval)
                goto error;
-        retval = device_register(dev);
+        retval = device_add(dev);
        if (retval)
                goto error;
diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c
index eb8fb94ae2c5..33b87bf664ab 100644
--- a/drivers/base/firmware_class.c
+++ b/drivers/base/firmware_class.c
@@ -96,6 +96,15 @@ static inline long firmware_loading_timeout(void)
        return loading_timeout > 0 ? loading_timeout * HZ : MAX_SCHEDULE_TIMEOUT;
 }
+/* firmware behavior options */
+#define FW_OPT_UEVENT   (1U << 0)
+#define FW_OPT_NOWAIT   (1U << 1)
+#ifdef CONFIG_FW_LOADER_USER_HELPER
+#define FW_OPT_FALLBACK (1U << 2)
+#else
+#define FW_OPT_FALLBACK 0
+#endif
 struct firmware_cache {
        /* firmware_buf instance will be added into the below list */
        spinlock_t lock;
@@ -820,7 +829,7 @@ static void firmware_class_timeout_work(struct work_struct *work)
 static struct firmware_priv *
 fw_create_instance(struct firmware *firmware, const char *fw_name,
-                   struct device *device, bool uevent, bool nowait)
+                   struct device *device, unsigned int opt_flags)
 {
        struct firmware_priv *fw_priv;
        struct device *f_dev;
@@ -832,7 +841,7 @@ fw_create_instance(struct firmware *firmware, const char *fw_name,
                goto exit;
        }
-        fw_priv->nowait = nowait;
+        fw_priv->nowait = !!(opt_flags & FW_OPT_NOWAIT);
        fw_priv->fw = firmware;
        INIT_DELAYED_WORK(&fw_priv->timeout_work,
                firmware_class_timeout_work);
@@ -848,8 +857,8 @@ exit:
 }
 /* load a firmware via user helper */
-static int _request_firmware_load(struct firmware_priv *fw_priv, bool uevent,
+static int _request_firmware_load(struct firmware_priv *fw_priv,
-                                  long timeout)
+                                  unsigned int opt_flags, long timeout)
 {
        int retval = 0;
        struct device *f_dev = &fw_priv->dev;
@@ -885,7 +894,7 @@ static int _request_firmware_load(struct firmware_priv *fw_priv, bool uevent,
                goto err_del_bin_attr;
        }
-        if (uevent) {
+        if (opt_flags & FW_OPT_UEVENT) {
                buf->need_uevent = true;
                dev_set_uevent_suppress(f_dev, false);
                dev_dbg(f_dev, "firmware: requesting %s\n", buf->fw_id);
@@ -911,16 +920,16 @@ err_put_dev:
 static int fw_load_from_user_helper(struct firmware *firmware,
                                    const char *name, struct device *device,
-                                    bool uevent, bool nowait, long timeout)
+                                    unsigned int opt_flags, long timeout)
 {
        struct firmware_priv *fw_priv;
-        fw_priv = fw_create_instance(firmware, name, device, uevent, nowait);
+        fw_priv = fw_create_instance(firmware, name, device, opt_flags);
        if (IS_ERR(fw_priv))
                return PTR_ERR(fw_priv);
        fw_priv->buf = firmware->priv;
-        return _request_firmware_load(fw_priv, uevent, timeout);
+        return _request_firmware_load(fw_priv, opt_flags, timeout);
 }
 #ifdef CONFIG_PM_SLEEP
@@ -942,7 +951,7 @@ static void kill_requests_without_uevent(void)
 #else /* CONFIG_FW_LOADER_USER_HELPER */
 static inline int
 fw_load_from_user_helper(struct firmware *firmware, const char *name,
-                         struct device *device, bool uevent, bool nowait,
+                         struct device *device, unsigned int opt_flags,
                         long timeout)
 {
        return -ENOENT;
@@ -1023,7 +1032,7 @@ _request_firmware_prepare(struct firmware **firmware_p, const char *name,
 }
 static int assign_firmware_buf(struct firmware *fw, struct device *device,
-                                bool skip_cache)
+                               unsigned int opt_flags)
 {
        struct firmware_buf *buf = fw->priv;
@@ -1040,7 +1049,8 @@ static int assign_firmware_buf(struct firmware *fw, struct device *device,
         * device may has been deleted already, but the problem
         * should be fixed in devres or driver core.
         */
-        if (device && !skip_cache)
+        /* don't cache firmware handled without uevent */
+        if (device && (opt_flags & FW_OPT_UEVENT))
                fw_add_devm_name(device, buf->fw_id);
        /*
@@ -1061,7 +1071,7 @@ static int assign_firmware_buf(struct firmware *fw, struct device *device,
 /* called from request_firmware() and request_firmware_work_func() */
 static int
 _request_firmware(const struct firmware **firmware_p, const char *name,
-                  struct device *device, bool uevent, bool nowait)
+                  struct device *device, unsigned int opt_flags)
 {
        struct firmware *fw;
        long timeout;
@@ -1076,7 +1086,7 @@ _request_firmware(const struct firmware **firmware_p, const char *name,
        ret = 0;
        timeout = firmware_loading_timeout();
-        if (nowait) {
+        if (opt_flags & FW_OPT_NOWAIT) {
                timeout = usermodehelper_read_lock_wait(timeout);
                if (!timeout) {
                        dev_dbg(device, "firmware: %s loading timed out\n",
@@ -1095,16 +1105,18 @@ _request_firmware(const struct firmware **firmware_p, const char *name,
        ret = fw_get_filesystem_firmware(device, fw->priv);
        if (ret) {
-                dev_warn(device, "Direct firmware load failed with error %d\n",
+                if (opt_flags & FW_OPT_FALLBACK) {
-                         ret);
+                        dev_warn(device,
-                dev_warn(device, "Falling back to user helper\n");
+                                 "Direct firmware load failed with error %d\n",
-                ret = fw_load_from_user_helper(fw, name, device,
+                                 ret);
-                                               uevent, nowait, timeout);
+                        dev_warn(device, "Falling back to user helper\n");
+                        ret = fw_load_from_user_helper(fw, name, device,
+                                                       opt_flags, timeout);
+                }
        }
-        /* don't cache firmware handled without uevent */
        if (!ret)
-                ret = assign_firmware_buf(fw, device, !uevent);
+                ret = assign_firmware_buf(fw, device, opt_flags);
        usermodehelper_read_unlock();
@@ -1146,12 +1158,37 @@ request_firmware(const struct firmware **firmware_p, const char *name,
        /* Need to pin this module until return */
        __module_get(THIS_MODULE);
-        ret = _request_firmware(firmware_p, name, device, true, false);
+        ret = _request_firmware(firmware_p, name, device,
+                                FW_OPT_UEVENT | FW_OPT_FALLBACK);
        module_put(THIS_MODULE);
        return ret;
 }
 EXPORT_SYMBOL(request_firmware);
+#ifdef CONFIG_FW_LOADER_USER_HELPER
+/**
+ * request_firmware: - load firmware directly without usermode helper
+ * @firmware_p: pointer to firmware image
+ * @name: name of firmware file
+ * @device: device for which firmware is being loaded
+ *
+ * This function works pretty much like request_firmware(), but this doesn't
+ * fall back to usermode helper even if the firmware couldn't be loaded
+ * directly from fs.  Hence it's useful for loading optional firmwares, which
+ * aren't always present, without extra long timeouts of udev.
+ **/
+int request_firmware_direct(const struct firmware **firmware_p,
+                            const char *name, struct device *device)
+{
+        int ret;
+        __module_get(THIS_MODULE);
+        ret = _request_firmware(firmware_p, name, device, FW_OPT_UEVENT);
+        module_put(THIS_MODULE);
+        return ret;
+}
+EXPORT_SYMBOL_GPL(request_firmware_direct);
+#endif
 /**
 * release_firmware: - release the resource associated with a firmware image
 * @fw: firmware resource to release
@@ -1174,7 +1211,7 @@ struct firmware_work {
        struct device *device;
        void *context;
        void (*cont)(const struct firmware *fw, void *context);
-        bool uevent;
+        unsigned int opt_flags;
 };
 static void request_firmware_work_func(struct work_struct *work)
@@ -1185,7 +1222,7 @@ static void request_firmware_work_func(struct work_struct *work)
        fw_work = container_of(work, struct firmware_work, work);
        _request_firmware(&fw, fw_work->name, fw_work->device,
-                          fw_work->uevent, true);
+                          fw_work->opt_flags);
        fw_work->cont(fw, fw_work->context);
        put_device(fw_work->device); /* taken in request_firmware_nowait() */
@@ -1233,7 +1270,8 @@ request_firmware_nowait(
        fw_work->device = device;
        fw_work->context = context;
        fw_work->cont = cont;
-        fw_work->uevent = uevent;
+        fw_work->opt_flags = FW_OPT_NOWAIT | FW_OPT_FALLBACK |
+                (uevent ? FW_OPT_UEVENT : 0);
        if (!try_module_get(module)) {
                kfree(fw_work);
diff --git a/drivers/firmware/dmi-sysfs.c b/drivers/firmware/dmi-sysfs.c
index eb26d62e5188..e0f1cb3d3598 100644
--- a/drivers/firmware/dmi-sysfs.c
+++ b/drivers/firmware/dmi-sysfs.c
@@ -553,7 +553,7 @@ static const struct bin_attribute dmi_entry_raw_attr = {
 static void dmi_sysfs_entry_release(struct kobject *kobj)
 {
        struct dmi_sysfs_entry *entry = to_entry(kobj);
-        sysfs_remove_bin_file(&entry->kobj, &dmi_entry_raw_attr);
        spin_lock(&entry_list_lock);
        list_del(&entry->list);
        spin_unlock(&entry_list_lock);
@@ -685,6 +685,7 @@ static void __exit dmi_sysfs_exit(void)
        pr_debug("dmi-sysfs: unloading.\n");
        cleanup_entry_list();
        kset_unregister(dmi_kset);
+        kobject_del(dmi_kobj);
        kobject_put(dmi_kobj);
 }
diff --git a/fs/Makefile b/fs/Makefile
index 4fe6df3ec28f..39a824f44e7c 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -53,7 +53,7 @@ obj-$(CONFIG_FHANDLE)		+= fhandle.o
 obj-y                           += quota/
 obj-$(CONFIG_PROC_FS)           += proc/
-obj-$(CONFIG_SYSFS)             += sysfs/
+obj-$(CONFIG_SYSFS)             += sysfs/ kernfs/
 obj-$(CONFIG_CONFIGFS_FS)       += configfs/
 obj-y                           += devpts/
diff --git a/fs/kernfs/Makefile b/fs/kernfs/Makefile
new file mode 100644
index 000000000000..674337c76673
--- /dev/null
+++ b/fs/kernfs/Makefile
@@ -0,0 +1,5 @@
+#
+# Makefile for the kernfs pseudo filesystem
+#
+obj-y           := mount.o inode.o dir.o file.o symlink.o
diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
new file mode 100644
index 000000000000..a441e3be8052
--- /dev/null
+++ b/fs/kernfs/dir.c
@@ -0,0 +1,1020 @@
+/*
+ * fs/kernfs/dir.c - kernfs directory implementation
+ *
+ * Copyright (c) 2001-3 Patrick Mochel
+ * Copyright (c) 2007 SUSE Linux Products GmbH
+ * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org>
+ *
+ * This file is released under the GPLv2.
+ */
+#include <linux/fs.h>
+#include <linux/namei.h>
+#include <linux/idr.h>
+#include <linux/slab.h>
+#include <linux/security.h>
+#include <linux/hash.h>
+#include "kernfs-internal.h"
+DEFINE_MUTEX(sysfs_mutex);
+#define to_sysfs_dirent(X) rb_entry((X), struct sysfs_dirent, s_rb)
+/**
+ *      sysfs_name_hash
+ *      @name: Null terminated string to hash
+ *      @ns:   Namespace tag to hash
+ *
+ *      Returns 31 bit hash of ns + name (so it fits in an off_t )
+ */
+static unsigned int sysfs_name_hash(const char *name, const void *ns)
+{
+        unsigned long hash = init_name_hash();
+        unsigned int len = strlen(name);
+        while (len--)
+                hash = partial_name_hash(*name++, hash);
+        hash = (end_name_hash(hash) ^ hash_ptr((void *)ns, 31));
+        hash &= 0x7fffffffU;
+        /* Reserve hash numbers 0, 1 and INT_MAX for magic directory entries */
+        if (hash < 1)
+                hash += 2;
+        if (hash >= INT_MAX)
+                hash = INT_MAX - 1;
+        return hash;
+}
+static int sysfs_name_compare(unsigned int hash, const char *name,
+                              const void *ns, const struct sysfs_dirent *sd)
+{
+        if (hash != sd->s_hash)
+                return hash - sd->s_hash;
+        if (ns != sd->s_ns)
+                return ns - sd->s_ns;
+        return strcmp(name, sd->s_name);
+}
+static int sysfs_sd_compare(const struct sysfs_dirent *left,
+                            const struct sysfs_dirent *right)
+{
+        return sysfs_name_compare(left->s_hash, left->s_name, left->s_ns,
+                                  right);
+}
+/**
+ *      sysfs_link_sibling - link sysfs_dirent into sibling rbtree
+ *      @sd: sysfs_dirent of interest
+ *
+ *      Link @sd into its sibling rbtree which starts from
+ *      sd->s_parent->s_dir.children.
+ *
+ *      Locking:
+ *      mutex_lock(sysfs_mutex)
+ *
+ *      RETURNS:
+ *      0 on susccess -EEXIST on failure.
+ */
+static int sysfs_link_sibling(struct sysfs_dirent *sd)
+{
+        struct rb_node **node = &sd->s_parent->s_dir.children.rb_node;
+        struct rb_node *parent = NULL;
+        if (sysfs_type(sd) == SYSFS_DIR)
+                sd->s_parent->s_dir.subdirs++;
+        while (*node) {
+                struct sysfs_dirent *pos;
+                int result;
+                pos = to_sysfs_dirent(*node);
+                parent = *node;
+                result = sysfs_sd_compare(sd, pos);
+                if (result < 0)
+                        node = &pos->s_rb.rb_left;
+                else if (result > 0)
+                        node = &pos->s_rb.rb_right;
+                else
+                        return -EEXIST;
+        }
+        /* add new node and rebalance the tree */
+        rb_link_node(&sd->s_rb, parent, node);
+        rb_insert_color(&sd->s_rb, &sd->s_parent->s_dir.children);
+        return 0;
+}
+/**
+ *      sysfs_unlink_sibling - unlink sysfs_dirent from sibling rbtree
+ *      @sd: sysfs_dirent of interest
+ *
+ *      Unlink @sd from its sibling rbtree which starts from
+ *      sd->s_parent->s_dir.children.
+ *
+ *      Locking:
+ *      mutex_lock(sysfs_mutex)
+ */
+static void sysfs_unlink_sibling(struct sysfs_dirent *sd)
+{
+        if (sysfs_type(sd) == SYSFS_DIR)
+                sd->s_parent->s_dir.subdirs--;
+        rb_erase(&sd->s_rb, &sd->s_parent->s_dir.children);
+}
+/**
+ *      sysfs_get_active - get an active reference to sysfs_dirent
+ *      @sd: sysfs_dirent to get an active reference to
+ *
+ *      Get an active reference of @sd.  This function is noop if @sd
+ *      is NULL.
+ *
+ *      RETURNS:
+ *      Pointer to @sd on success, NULL on failure.
+ */
+struct sysfs_dirent *sysfs_get_active(struct sysfs_dirent *sd)
+{
+        if (unlikely(!sd))
+                return NULL;
+        if (!atomic_inc_unless_negative(&sd->s_active))
+                return NULL;
+        if (sd->s_flags & SYSFS_FLAG_LOCKDEP)
+                rwsem_acquire_read(&sd->dep_map, 0, 1, _RET_IP_);
+        return sd;
+}
+/**
+ *      sysfs_put_active - put an active reference to sysfs_dirent
+ *      @sd: sysfs_dirent to put an active reference to
+ *
+ *      Put an active reference to @sd.  This function is noop if @sd
+ *      is NULL.
+ */
+void sysfs_put_active(struct sysfs_dirent *sd)
+{
+        int v;
+        if (unlikely(!sd))
+                return;
+        if (sd->s_flags & SYSFS_FLAG_LOCKDEP)
+                rwsem_release(&sd->dep_map, 1, _RET_IP_);
+        v = atomic_dec_return(&sd->s_active);
+        if (likely(v != SD_DEACTIVATED_BIAS))
+                return;
+        /* atomic_dec_return() is a mb(), we'll always see the updated
+         * sd->u.completion.
+         */
+        complete(sd->u.completion);
+}
+/**
+ *      sysfs_deactivate - deactivate sysfs_dirent
+ *      @sd: sysfs_dirent to deactivate
+ *
+ *      Deny new active references and drain existing ones.
+ */
+static void sysfs_deactivate(struct sysfs_dirent *sd)
+{
+        DECLARE_COMPLETION_ONSTACK(wait);
+        int v;
+        BUG_ON(!(sd->s_flags & SYSFS_FLAG_REMOVED));
+        if (!(sysfs_type(sd) & SYSFS_ACTIVE_REF))
+                return;
+        sd->u.completion = (void *)&wait;
+        rwsem_acquire(&sd->dep_map, 0, 0, _RET_IP_);
+        /* atomic_add_return() is a mb(), put_active() will always see
+         * the updated sd->u.completion.
+         */
+        v = atomic_add_return(SD_DEACTIVATED_BIAS, &sd->s_active);
+        if (v != SD_DEACTIVATED_BIAS) {
+                lock_contended(&sd->dep_map, _RET_IP_);
+                wait_for_completion(&wait);
+        }
+        lock_acquired(&sd->dep_map, _RET_IP_);
+        rwsem_release(&sd->dep_map, 1, _RET_IP_);
+}
+/**
+ * kernfs_get - get a reference count on a sysfs_dirent
+ * @sd: the target sysfs_dirent
+ */
+void kernfs_get(struct sysfs_dirent *sd)
+{
+        if (sd) {
+                WARN_ON(!atomic_read(&sd->s_count));
+                atomic_inc(&sd->s_count);
+        }
+}
+EXPORT_SYMBOL_GPL(kernfs_get);
+/**
+ * kernfs_put - put a reference count on a sysfs_dirent
+ * @sd: the target sysfs_dirent
+ *
+ * Put a reference count of @sd and destroy it if it reached zero.
+ */
+void kernfs_put(struct sysfs_dirent *sd)
+{
+        struct sysfs_dirent *parent_sd;
+        struct kernfs_root *root;
+        if (!sd || !atomic_dec_and_test(&sd->s_count))
+                return;
+        root = kernfs_root(sd);
+ repeat:
+        /* Moving/renaming is always done while holding reference.
+         * sd->s_parent won't change beneath us.
+         */
+        parent_sd = sd->s_parent;
+        WARN(!(sd->s_flags & SYSFS_FLAG_REMOVED),
+                "sysfs: free using entry: %s/%s\n",
+                parent_sd ? parent_sd->s_name : "", sd->s_name);
+        if (sysfs_type(sd) == SYSFS_KOBJ_LINK)
+                kernfs_put(sd->s_symlink.target_sd);
+        if (sysfs_type(sd) & SYSFS_COPY_NAME)
+                kfree(sd->s_name);
+        if (sd->s_iattr) {
+                if (sd->s_iattr->ia_secdata)
+                        security_release_secctx(sd->s_iattr->ia_secdata,
+                                                sd->s_iattr->ia_secdata_len);
+                simple_xattrs_free(&sd->s_iattr->xattrs);
+        }
+        kfree(sd->s_iattr);
+        ida_simple_remove(&root->ino_ida, sd->s_ino);
+        kmem_cache_free(sysfs_dir_cachep, sd);
+        sd = parent_sd;
+        if (sd) {
+                if (atomic_dec_and_test(&sd->s_count))
+                        goto repeat;
+        } else {
+                /* just released the root sd, free @root too */
+                ida_destroy(&root->ino_ida);
+                kfree(root);
+        }
+}
+EXPORT_SYMBOL_GPL(kernfs_put);
+static int sysfs_dentry_delete(const struct dentry *dentry)
+{
+        struct sysfs_dirent *sd = dentry->d_fsdata;
+        return !(sd && !(sd->s_flags & SYSFS_FLAG_REMOVED));
+}
+static int sysfs_dentry_revalidate(struct dentry *dentry, unsigned int flags)
+{
+        struct sysfs_dirent *sd;
+        if (flags & LOOKUP_RCU)
+                return -ECHILD;
+        sd = dentry->d_fsdata;
+        mutex_lock(&sysfs_mutex);
+        /* The sysfs dirent has been deleted */
+        if (sd->s_flags & SYSFS_FLAG_REMOVED)
+                goto out_bad;
+        /* The sysfs dirent has been moved? */
+        if (dentry->d_parent->d_fsdata != sd->s_parent)
+                goto out_bad;
+        /* The sysfs dirent has been renamed */
+        if (strcmp(dentry->d_name.name, sd->s_name) != 0)
+                goto out_bad;
+        /* The sysfs dirent has been moved to a different namespace */
+        if (sd->s_parent && kernfs_ns_enabled(sd->s_parent) &&
+            sysfs_info(dentry->d_sb)->ns != sd->s_ns)
+                goto out_bad;
+        mutex_unlock(&sysfs_mutex);
+out_valid:
+        return 1;
+out_bad:
+        /* Remove the dentry from the dcache hashes.
+         * If this is a deleted dentry we use d_drop instead of d_delete
+         * so sysfs doesn't need to cope with negative dentries.
+         *
+         * If this is a dentry that has simply been renamed we
+         * use d_drop to remove it from the dcache lookup on its
+         * old parent.  If this dentry persists later when a lookup
+         * is performed at its new name the dentry will be readded
+         * to the dcache hashes.
+         */
+        mutex_unlock(&sysfs_mutex);
+        /* If we have submounts we must allow the vfs caches
+         * to lie about the state of the filesystem to prevent
+         * leaks and other nasty things.
+         */
+        if (check_submounts_and_drop(dentry) != 0)
+                goto out_valid;
+        return 0;
+}
+static void sysfs_dentry_release(struct dentry *dentry)
+{
+        kernfs_put(dentry->d_fsdata);
+}
+const struct dentry_operations sysfs_dentry_ops = {
+        .d_revalidate   = sysfs_dentry_revalidate,
+        .d_delete       = sysfs_dentry_delete,
+        .d_release      = sysfs_dentry_release,
+};
+struct sysfs_dirent *sysfs_new_dirent(struct kernfs_root *root,
+                                      const char *name, umode_t mode, int type)
+{
+        char *dup_name = NULL;
+        struct sysfs_dirent *sd;
+        int ret;
+        if (type & SYSFS_COPY_NAME) {
+                name = dup_name = kstrdup(name, GFP_KERNEL);
+                if (!name)
+                        return NULL;
+        }
+        sd = kmem_cache_zalloc(sysfs_dir_cachep, GFP_KERNEL);
+        if (!sd)
+                goto err_out1;
+        ret = ida_simple_get(&root->ino_ida, 1, 0, GFP_KERNEL);
+        if (ret < 0)
+                goto err_out2;
+        sd->s_ino = ret;
+        atomic_set(&sd->s_count, 1);
+        atomic_set(&sd->s_active, 0);
+        sd->s_name = name;
+        sd->s_mode = mode;
+        sd->s_flags = type | SYSFS_FLAG_REMOVED;
+        return sd;
+ err_out2:
+        kmem_cache_free(sysfs_dir_cachep, sd);
+ err_out1:
+        kfree(dup_name);
+        return NULL;
+}
+/**
+ *      sysfs_addrm_start - prepare for sysfs_dirent add/remove
+ *      @acxt: pointer to sysfs_addrm_cxt to be used
+ *
+ *      This function is called when the caller is about to add or remove
+ *      sysfs_dirent.  This function acquires sysfs_mutex.  @acxt is used
+ *      to keep and pass context to other addrm functions.
+ *
+ *      LOCKING:
+ *      Kernel thread context (may sleep).  sysfs_mutex is locked on
+ *      return.
+ */
+void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt)
+        __acquires(sysfs_mutex)
+{
+        memset(acxt, 0, sizeof(*acxt));
+        mutex_lock(&sysfs_mutex);
+}
+/**
+ *      sysfs_add_one - add sysfs_dirent to parent without warning
+ *      @acxt: addrm context to use
+ *      @sd: sysfs_dirent to be added
+ *      @parent_sd: the parent sysfs_dirent to add @sd to
+ *
+ *      Get @parent_sd and set @sd->s_parent to it and increment nlink of
+ *      the parent inode if @sd is a directory and link into the children
+ *      list of the parent.
+ *
+ *      This function should be called between calls to
+ *      sysfs_addrm_start() and sysfs_addrm_finish() and should be
+ *      passed the same @acxt as passed to sysfs_addrm_start().
+ *
+ *      LOCKING:
+ *      Determined by sysfs_addrm_start().
+ *
+ *      RETURNS:
+ *      0 on success, -EEXIST if entry with the given name already
+ *      exists.
+ */
+int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd,
+                  struct sysfs_dirent *parent_sd)
+{
+        bool has_ns = kernfs_ns_enabled(parent_sd);
+        struct sysfs_inode_attrs *ps_iattr;
+        int ret;
+        if (has_ns != (bool)sd->s_ns) {
+                WARN(1, KERN_WARNING "sysfs: ns %s in '%s' for '%s'\n",
+                     has_ns ? "required" : "invalid",
+                     parent_sd->s_name, sd->s_name);
+                return -EINVAL;
+        }
+        if (sysfs_type(parent_sd) != SYSFS_DIR)
+                return -EINVAL;
+        sd->s_hash = sysfs_name_hash(sd->s_name, sd->s_ns);
+        sd->s_parent = parent_sd;
+        kernfs_get(parent_sd);
+        ret = sysfs_link_sibling(sd);
+        if (ret)
+                return ret;
+        /* Update timestamps on the parent */
+        ps_iattr = parent_sd->s_iattr;
+        if (ps_iattr) {
+                struct iattr *ps_iattrs = &ps_iattr->ia_iattr;
+                ps_iattrs->ia_ctime = ps_iattrs->ia_mtime = CURRENT_TIME;
+        }
+        /* Mark the entry added into directory tree */
+        sd->s_flags &= ~SYSFS_FLAG_REMOVED;
+        return 0;
+}
+/**
+ *      sysfs_remove_one - remove sysfs_dirent from parent
+ *      @acxt: addrm context to use
+ *      @sd: sysfs_dirent to be removed
+ *
+ *      Mark @sd removed and drop nlink of parent inode if @sd is a
+ *      directory.  @sd is unlinked from the children list.
+ *
+ *      This function should be called between calls to
+ *      sysfs_addrm_start() and sysfs_addrm_finish() and should be
+ *      passed the same @acxt as passed to sysfs_addrm_start().
+ *
+ *      LOCKING:
+ *      Determined by sysfs_addrm_start().
+ */
+static void sysfs_remove_one(struct sysfs_addrm_cxt *acxt,
+                             struct sysfs_dirent *sd)
+{
+        struct sysfs_inode_attrs *ps_iattr;
+        /*
+         * Removal can be called multiple times on the same node.  Only the
+         * first invocation is effective and puts the base ref.
+         */
+        if (sd->s_flags & SYSFS_FLAG_REMOVED)
+                return;
+        if (sd->s_parent) {
+                sysfs_unlink_sibling(sd);
+                /* Update timestamps on the parent */
+                ps_iattr = sd->s_parent->s_iattr;
+                if (ps_iattr) {
+                        ps_iattr->ia_iattr.ia_ctime = CURRENT_TIME;
+                        ps_iattr->ia_iattr.ia_mtime = CURRENT_TIME;
+                }
+        }
+        sd->s_flags |= SYSFS_FLAG_REMOVED;
+        sd->u.removed_list = acxt->removed;
+        acxt->removed = sd;
+}
+/**
+ *      sysfs_addrm_finish - finish up sysfs_dirent add/remove
+ *      @acxt: addrm context to finish up
+ *
+ *      Finish up sysfs_dirent add/remove.  Resources acquired by
+ *      sysfs_addrm_start() are released and removed sysfs_dirents are
+ *      cleaned up.
+ *
+ *      LOCKING:
+ *      sysfs_mutex is released.
+ */
+void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt)
+        __releases(sysfs_mutex)
+{
+        /* release resources acquired by sysfs_addrm_start() */
+        mutex_unlock(&sysfs_mutex);
+        /* kill removed sysfs_dirents */
+        while (acxt->removed) {
+                struct sysfs_dirent *sd = acxt->removed;
+                acxt->removed = sd->u.removed_list;
+                sysfs_deactivate(sd);
+                sysfs_unmap_bin_file(sd);
+                kernfs_put(sd);
+        }
+}
+/**
+ * kernfs_find_ns - find sysfs_dirent with the given name
+ * @parent: sysfs_dirent to search under
+ * @name: name to look for
+ * @ns: the namespace tag to use
+ *
+ * Look for sysfs_dirent with name @name under @parent.  Returns pointer to
+ * the found sysfs_dirent on success, %NULL on failure.
+ */
+static struct sysfs_dirent *kernfs_find_ns(struct sysfs_dirent *parent,
+                                           const unsigned char *name,
+                                           const void *ns)
+{
+        struct rb_node *node = parent->s_dir.children.rb_node;
+        bool has_ns = kernfs_ns_enabled(parent);
+        unsigned int hash;
+        lockdep_assert_held(&sysfs_mutex);
+        if (has_ns != (bool)ns) {
+                WARN(1, KERN_WARNING "sysfs: ns %s in '%s' for '%s'\n",
+                     has_ns ? "required" : "invalid",
+                     parent->s_name, name);
+                return NULL;
+        }
+        hash = sysfs_name_hash(name, ns);
+        while (node) {
+                struct sysfs_dirent *sd;
+                int result;
+                sd = to_sysfs_dirent(node);
+                result = sysfs_name_compare(hash, name, ns, sd);
+                if (result < 0)
+                        node = node->rb_left;
+                else if (result > 0)
+                        node = node->rb_right;
+                else
+                        return sd;
+        }
+        return NULL;
+}
+/**
+ * kernfs_find_and_get_ns - find and get sysfs_dirent with the given name
+ * @parent: sysfs_dirent to search under
+ * @name: name to look for
+ * @ns: the namespace tag to use
+ *
+ * Look for sysfs_dirent with name @name under @parent and get a reference
+ * if found.  This function may sleep and returns pointer to the found
+ * sysfs_dirent on success, %NULL on failure.
+ */
+struct sysfs_dirent *kernfs_find_and_get_ns(struct sysfs_dirent *parent,
+                                            const char *name, const void *ns)
+{
+        struct sysfs_dirent *sd;
+        mutex_lock(&sysfs_mutex);
+        sd = kernfs_find_ns(parent, name, ns);
+        kernfs_get(sd);
+        mutex_unlock(&sysfs_mutex);
+        return sd;
+}
+EXPORT_SYMBOL_GPL(kernfs_find_and_get_ns);
+/**
+ * kernfs_create_root - create a new kernfs hierarchy
+ * @priv: opaque data associated with the new directory
+ *
+ * Returns the root of the new hierarchy on success, ERR_PTR() value on
+ * failure.
+ */
+struct kernfs_root *kernfs_create_root(void *priv)
+{
+        struct kernfs_root *root;
+        struct sysfs_dirent *sd;
+        root = kzalloc(sizeof(*root), GFP_KERNEL);
+        if (!root)
+                return ERR_PTR(-ENOMEM);
+        ida_init(&root->ino_ida);
+        sd = sysfs_new_dirent(root, "", S_IFDIR | S_IRUGO | S_IXUGO, SYSFS_DIR);
+        if (!sd) {
+                ida_destroy(&root->ino_ida);
+                kfree(root);
+                return ERR_PTR(-ENOMEM);
+        }
+        sd->s_flags &= ~SYSFS_FLAG_REMOVED;
+        sd->priv = priv;
+        sd->s_dir.root = root;
+        root->sd = sd;
+        return root;
+}
+/**
+ * kernfs_destroy_root - destroy a kernfs hierarchy
+ * @root: root of the hierarchy to destroy
+ *
+ * Destroy the hierarchy anchored at @root by removing all existing
+ * directories and destroying @root.
+ */
+void kernfs_destroy_root(struct kernfs_root *root)
+{
+        kernfs_remove(root->sd);        /* will also free @root */
+}
+/**
+ * kernfs_create_dir_ns - create a directory
+ * @parent: parent in which to create a new directory
+ * @name: name of the new directory
+ * @priv: opaque data associated with the new directory
+ * @ns: optional namespace tag of the directory
+ *
+ * Returns the created node on success, ERR_PTR() value on failure.
+ */
+struct sysfs_dirent *kernfs_create_dir_ns(struct sysfs_dirent *parent,
+                                          const char *name, void *priv,
+                                          const void *ns)
+{
+        umode_t mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO;
+        struct sysfs_addrm_cxt acxt;
+        struct sysfs_dirent *sd;
+        int rc;
+        /* allocate */
+        sd = sysfs_new_dirent(kernfs_root(parent), name, mode, SYSFS_DIR);
+        if (!sd)
+                return ERR_PTR(-ENOMEM);
+        sd->s_dir.root = parent->s_dir.root;
+        sd->s_ns = ns;
+        sd->priv = priv;
+        /* link in */
+        sysfs_addrm_start(&acxt);
+        rc = sysfs_add_one(&acxt, sd, parent);
+        sysfs_addrm_finish(&acxt);
+        if (!rc)
+                return sd;
+        kernfs_put(sd);
+        return ERR_PTR(rc);
+}
+static struct dentry *sysfs_lookup(struct inode *dir, struct dentry *dentry,
+                                   unsigned int flags)
+{
+        struct dentry *ret = NULL;
+        struct dentry *parent = dentry->d_parent;
+        struct sysfs_dirent *parent_sd = parent->d_fsdata;
+        struct sysfs_dirent *sd;
+        struct inode *inode;
+        const void *ns = NULL;
+        mutex_lock(&sysfs_mutex);
+        if (kernfs_ns_enabled(parent_sd))
+                ns = sysfs_info(dir->i_sb)->ns;
+        sd = kernfs_find_ns(parent_sd, dentry->d_name.name, ns);
+        /* no such entry */
+        if (!sd) {
+                ret = ERR_PTR(-ENOENT);
+                goto out_unlock;
+        }
+        kernfs_get(sd);
+        dentry->d_fsdata = sd;
+        /* attach dentry and inode */
+        inode = sysfs_get_inode(dir->i_sb, sd);
+        if (!inode) {
+                ret = ERR_PTR(-ENOMEM);
+                goto out_unlock;
+        }
+        /* instantiate and hash dentry */
+        ret = d_materialise_unique(dentry, inode);
+ out_unlock:
+        mutex_unlock(&sysfs_mutex);
+        return ret;
+}
+const struct inode_operations sysfs_dir_inode_operations = {
+        .lookup         = sysfs_lookup,
+        .permission     = sysfs_permission,
+        .setattr        = sysfs_setattr,
+        .getattr        = sysfs_getattr,
+        .setxattr       = sysfs_setxattr,
+        .removexattr    = sysfs_removexattr,
+        .getxattr       = sysfs_getxattr,
+        .listxattr      = sysfs_listxattr,
+};
+static struct sysfs_dirent *sysfs_leftmost_descendant(struct sysfs_dirent *pos)
+{
+        struct sysfs_dirent *last;
+        while (true) {
+                struct rb_node *rbn;
+                last = pos;
+                if (sysfs_type(pos) != SYSFS_DIR)
+                        break;
+                rbn = rb_first(&pos->s_dir.children);
+                if (!rbn)
+                        break;
+                pos = to_sysfs_dirent(rbn);
+        }
+        return last;
+}
+/**
+ * sysfs_next_descendant_post - find the next descendant for post-order walk
+ * @pos: the current position (%NULL to initiate traversal)
+ * @root: sysfs_dirent whose descendants to walk
+ *
+ * Find the next descendant to visit for post-order traversal of @root's
+ * descendants.  @root is included in the iteration and the last node to be
+ * visited.
+ */
+static struct sysfs_dirent *sysfs_next_descendant_post(struct sysfs_dirent *pos,
+                                                       struct sysfs_dirent *root)
+{
+        struct rb_node *rbn;
+        lockdep_assert_held(&sysfs_mutex);
+        /* if first iteration, visit leftmost descendant which may be root */
+        if (!pos)
+                return sysfs_leftmost_descendant(root);
+        /* if we visited @root, we're done */
+        if (pos == root)
+                return NULL;
+        /* if there's an unvisited sibling, visit its leftmost descendant */
+        rbn = rb_next(&pos->s_rb);
+        if (rbn)
+                return sysfs_leftmost_descendant(to_sysfs_dirent(rbn));
+        /* no sibling left, visit parent */
+        return pos->s_parent;
+}
+static void __kernfs_remove(struct sysfs_addrm_cxt *acxt,
+                            struct sysfs_dirent *sd)
+{
+        struct sysfs_dirent *pos, *next;
+        if (!sd)
+                return;
+        pr_debug("sysfs %s: removing\n", sd->s_name);
+        next = NULL;
+        do {
+                pos = next;
+                next = sysfs_next_descendant_post(pos, sd);
+                if (pos)
+                        sysfs_remove_one(acxt, pos);
+        } while (next);
+}
+/**
+ * kernfs_remove - remove a sysfs_dirent recursively
+ * @sd: the sysfs_dirent to remove
+ *
+ * Remove @sd along with all its subdirectories and files.
+ */
+void kernfs_remove(struct sysfs_dirent *sd)
+{
+        struct sysfs_addrm_cxt acxt;
+        sysfs_addrm_start(&acxt);
+        __kernfs_remove(&acxt, sd);
+        sysfs_addrm_finish(&acxt);
+}
+/**
+ * kernfs_remove_by_name_ns - find a sysfs_dirent by name and remove it
+ * @dir_sd: parent of the target
+ * @name: name of the sysfs_dirent to remove
+ * @ns: namespace tag of the sysfs_dirent to remove
+ *
+ * Look for the sysfs_dirent with @name and @ns under @dir_sd and remove
+ * it.  Returns 0 on success, -ENOENT if such entry doesn't exist.
+ */
+int kernfs_remove_by_name_ns(struct sysfs_dirent *dir_sd, const char *name,
+                             const void *ns)
+{
+        struct sysfs_addrm_cxt acxt;
+        struct sysfs_dirent *sd;
+        if (!dir_sd) {
+                WARN(1, KERN_WARNING "sysfs: can not remove '%s', no directory\n",
+                        name);
+                return -ENOENT;
+        }
+        sysfs_addrm_start(&acxt);
+        sd = kernfs_find_ns(dir_sd, name, ns);
+        if (sd)
+                __kernfs_remove(&acxt, sd);
+        sysfs_addrm_finish(&acxt);
+        if (sd)
+                return 0;
+        else
+                return -ENOENT;
+}
+/**
+ * kernfs_rename_ns - move and rename a kernfs_node
+ * @sd: target node
+ * @new_parent: new parent to put @sd under
+ * @new_name: new name
+ * @new_ns: new namespace tag
+ */
+int kernfs_rename_ns(struct sysfs_dirent *sd, struct sysfs_dirent *new_parent,
+                     const char *new_name, const void *new_ns)
+{
+        int error;
+        mutex_lock(&sysfs_mutex);
+        error = 0;
+        if ((sd->s_parent == new_parent) && (sd->s_ns == new_ns) &&
+            (strcmp(sd->s_name, new_name) == 0))
+                goto out;       /* nothing to rename */
+        error = -EEXIST;
+        if (kernfs_find_ns(new_parent, new_name, new_ns))
+                goto out;
+        /* rename sysfs_dirent */
+        if (strcmp(sd->s_name, new_name) != 0) {
+                error = -ENOMEM;
+                new_name = kstrdup(new_name, GFP_KERNEL);
+                if (!new_name)
+                        goto out;
+                kfree(sd->s_name);
+                sd->s_name = new_name;
+        }
+        /*
+         * Move to the appropriate place in the appropriate directories rbtree.
+         */
+        sysfs_unlink_sibling(sd);
+        kernfs_get(new_parent);
+        kernfs_put(sd->s_parent);
+        sd->s_ns = new_ns;
+        sd->s_hash = sysfs_name_hash(sd->s_name, sd->s_ns);
+        sd->s_parent = new_parent;
+        sysfs_link_sibling(sd);
+        error = 0;
+ out:
+        mutex_unlock(&sysfs_mutex);
+        return error;
+}
+/* Relationship between s_mode and the DT_xxx types */
+static inline unsigned char dt_type(struct sysfs_dirent *sd)
+{
+        return (sd->s_mode >> 12) & 15;
+}
+static int sysfs_dir_release(struct inode *inode, struct file *filp)
+{
+        kernfs_put(filp->private_data);
+        return 0;
+}
+static struct sysfs_dirent *sysfs_dir_pos(const void *ns,
+        struct sysfs_dirent *parent_sd, loff_t hash, struct sysfs_dirent *pos)
+{
+        if (pos) {
+                int valid = !(pos->s_flags & SYSFS_FLAG_REMOVED) &&
+                        pos->s_parent == parent_sd &&
+                        hash == pos->s_hash;
+                kernfs_put(pos);
+                if (!valid)
+                        pos = NULL;
+        }
+        if (!pos && (hash > 1) && (hash < INT_MAX)) {
+                struct rb_node *node = parent_sd->s_dir.children.rb_node;
+                while (node) {
+                        pos = to_sysfs_dirent(node);
+                        if (hash < pos->s_hash)
+                                node = node->rb_left;
+                        else if (hash > pos->s_hash)
+                                node = node->rb_right;
+                        else
+                                break;
+                }
+        }
+        /* Skip over entries in the wrong namespace */
+        while (pos && pos->s_ns != ns) {
+                struct rb_node *node = rb_next(&pos->s_rb);
+                if (!node)
+                        pos = NULL;
+                else
+                        pos = to_sysfs_dirent(node);
+        }
+        return pos;
+}
+static struct sysfs_dirent *sysfs_dir_next_pos(const void *ns,
+        struct sysfs_dirent *parent_sd, ino_t ino, struct sysfs_dirent *pos)
+{
+        pos = sysfs_dir_pos(ns, parent_sd, ino, pos);
+        if (pos)
+                do {
+                        struct rb_node *node = rb_next(&pos->s_rb);
+                        if (!node)
+                                pos = NULL;
+                        else
+                                pos = to_sysfs_dirent(node);
+                } while (pos && pos->s_ns != ns);
+        return pos;
+}
+static int sysfs_readdir(struct file *file, struct dir_context *ctx)
+{
+        struct dentry *dentry = file->f_path.dentry;
+        struct sysfs_dirent *parent_sd = dentry->d_fsdata;
+        struct sysfs_dirent *pos = file->private_data;
+        const void *ns = NULL;
+        if (!dir_emit_dots(file, ctx))
+                return 0;
+        mutex_lock(&sysfs_mutex);
+        if (kernfs_ns_enabled(parent_sd))
+                ns = sysfs_info(dentry->d_sb)->ns;
+        for (pos = sysfs_dir_pos(ns, parent_sd, ctx->pos, pos);
+             pos;
+             pos = sysfs_dir_next_pos(ns, parent_sd, ctx->pos, pos)) {
+                const char *name = pos->s_name;
+                unsigned int type = dt_type(pos);
+                int len = strlen(name);
+                ino_t ino = pos->s_ino;
+                ctx->pos = pos->s_hash;
+                file->private_data = pos;
+                kernfs_get(pos);
+                mutex_unlock(&sysfs_mutex);
+                if (!dir_emit(ctx, name, len, ino, type))
+                        return 0;
+                mutex_lock(&sysfs_mutex);
+        }
+        mutex_unlock(&sysfs_mutex);
+        file->private_data = NULL;
+        ctx->pos = INT_MAX;
+        return 0;
+}
+static loff_t sysfs_dir_llseek(struct file *file, loff_t offset, int whence)
+{
+        struct inode *inode = file_inode(file);
+        loff_t ret;
+        mutex_lock(&inode->i_mutex);
+        ret = generic_file_llseek(file, offset, whence);
+        mutex_unlock(&inode->i_mutex);
+        return ret;
+}
+const struct file_operations sysfs_dir_operations = {
+        .read           = generic_read_dir,
+        .iterate        = sysfs_readdir,
+        .release        = sysfs_dir_release,
+        .llseek         = sysfs_dir_llseek,
+};
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
new file mode 100644
index 000000000000..4a5863b79de9
--- /dev/null
+++ b/fs/kernfs/file.c
@@ -0,0 +1,813 @@
+/*
+ * fs/kernfs/file.c - kernfs file implementation
+ *
+ * Copyright (c) 2001-3 Patrick Mochel
+ * Copyright (c) 2007 SUSE Linux Products GmbH
+ * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org>
+ *
+ * This file is released under the GPLv2.
+ */
+#include <linux/fs.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/poll.h>
+#include <linux/pagemap.h>
+#include <linux/sched.h>
+#include "kernfs-internal.h"
+/*
+ * There's one sysfs_open_file for each open file and one sysfs_open_dirent
+ * for each sysfs_dirent with one or more open files.
+ *
+ * sysfs_dirent->s_attr.open points to sysfs_open_dirent.  s_attr.open is
+ * protected by sysfs_open_dirent_lock.
+ *
+ * filp->private_data points to seq_file whose ->private points to
+ * sysfs_open_file.  sysfs_open_files are chained at
+ * sysfs_open_dirent->files, which is protected by sysfs_open_file_mutex.
+ */
+static DEFINE_SPINLOCK(sysfs_open_dirent_lock);
+static DEFINE_MUTEX(sysfs_open_file_mutex);
+struct sysfs_open_dirent {
+        atomic_t                refcnt;
+        atomic_t                event;
+        wait_queue_head_t       poll;
+        struct list_head        files; /* goes through sysfs_open_file.list */
+};
+static struct sysfs_open_file *sysfs_of(struct file *file)
+{
+        return ((struct seq_file *)file->private_data)->private;
+}
+/*
+ * Determine the kernfs_ops for the given sysfs_dirent.  This function must
+ * be called while holding an active reference.
+ */
+static const struct kernfs_ops *kernfs_ops(struct sysfs_dirent *sd)
+{
+        if (sd->s_flags & SYSFS_FLAG_LOCKDEP)
+                lockdep_assert_held(sd);
+        return sd->s_attr.ops;
+}
+static void *kernfs_seq_start(struct seq_file *sf, loff_t *ppos)
+{
+        struct sysfs_open_file *of = sf->private;
+        const struct kernfs_ops *ops;
+        /*
+         * @of->mutex nests outside active ref and is just to ensure that
+         * the ops aren't called concurrently for the same open file.
+         */
+        mutex_lock(&of->mutex);
+        if (!sysfs_get_active(of->sd))
+                return ERR_PTR(-ENODEV);
+        ops = kernfs_ops(of->sd);
+        if (ops->seq_start) {
+                return ops->seq_start(sf, ppos);
+        } else {
+                /*
+                 * The same behavior and code as single_open().  Returns
+                 * !NULL if pos is at the beginning; otherwise, NULL.
+                 */
+                return NULL + !*ppos;
+        }
+}
+static void *kernfs_seq_next(struct seq_file *sf, void *v, loff_t *ppos)
+{
+        struct sysfs_open_file *of = sf->private;
+        const struct kernfs_ops *ops = kernfs_ops(of->sd);
+        if (ops->seq_next) {
+                return ops->seq_next(sf, v, ppos);
+        } else {
+                /*
+                 * The same behavior and code as single_open(), always
+                 * terminate after the initial read.
+                 */
+                ++*ppos;
+                return NULL;
+        }
+}
+static void kernfs_seq_stop(struct seq_file *sf, void *v)
+{
+        struct sysfs_open_file *of = sf->private;
+        const struct kernfs_ops *ops = kernfs_ops(of->sd);
+        if (ops->seq_stop)
+                ops->seq_stop(sf, v);
+        sysfs_put_active(of->sd);
+        mutex_unlock(&of->mutex);
+}
+static int kernfs_seq_show(struct seq_file *sf, void *v)
+{
+        struct sysfs_open_file *of = sf->private;
+        of->event = atomic_read(&of->sd->s_attr.open->event);
+        return of->sd->s_attr.ops->seq_show(sf, v);
+}
+static const struct seq_operations kernfs_seq_ops = {
+        .start = kernfs_seq_start,
+        .next = kernfs_seq_next,
+        .stop = kernfs_seq_stop,
+        .show = kernfs_seq_show,
+};
+/*
+ * As reading a bin file can have side-effects, the exact offset and bytes
+ * specified in read(2) call should be passed to the read callback making
+ * it difficult to use seq_file.  Implement simplistic custom buffering for
+ * bin files.
+ */
+static ssize_t kernfs_file_direct_read(struct sysfs_open_file *of,
+                                       char __user *user_buf, size_t count,
+                                       loff_t *ppos)
+{
+        ssize_t len = min_t(size_t, count, PAGE_SIZE);
+        const struct kernfs_ops *ops;
+        char *buf;
+        buf = kmalloc(len, GFP_KERNEL);
+        if (!buf)
+                return -ENOMEM;
+        /*
+         * @of->mutex nests outside active ref and is just to ensure that
+         * the ops aren't called concurrently for the same open file.
+         */
+        mutex_lock(&of->mutex);
+        if (!sysfs_get_active(of->sd)) {
+                len = -ENODEV;
+                mutex_unlock(&of->mutex);
+                goto out_free;
+        }
+        ops = kernfs_ops(of->sd);
+        if (ops->read)
+                len = ops->read(of, buf, len, *ppos);
+        else
+                len = -EINVAL;
+        sysfs_put_active(of->sd);
+        mutex_unlock(&of->mutex);
+        if (len < 0)
+                goto out_free;
+        if (copy_to_user(user_buf, buf, len)) {
+                len = -EFAULT;
+                goto out_free;
+        }
+        *ppos += len;
+ out_free:
+        kfree(buf);
+        return len;
+}
+/**
+ * kernfs_file_read - kernfs vfs read callback
+ * @file: file pointer
+ * @user_buf: data to write
+ * @count: number of bytes
+ * @ppos: starting offset
+ */
+static ssize_t kernfs_file_read(struct file *file, char __user *user_buf,
+                                size_t count, loff_t *ppos)
+{
+        struct sysfs_open_file *of = sysfs_of(file);
+        if (of->sd->s_flags & SYSFS_FLAG_HAS_SEQ_SHOW)
+                return seq_read(file, user_buf, count, ppos);
+        else
+                return kernfs_file_direct_read(of, user_buf, count, ppos);
+}
+/**
+ * kernfs_file_write - kernfs vfs write callback
+ * @file: file pointer
+ * @user_buf: data to write
+ * @count: number of bytes
+ * @ppos: starting offset
+ *
+ * Copy data in from userland and pass it to the matching kernfs write
+ * operation.
+ *
+ * There is no easy way for us to know if userspace is only doing a partial
+ * write, so we don't support them. We expect the entire buffer to come on
+ * the first write.  Hint: if you're writing a value, first read the file,
+ * modify only the the value you're changing, then write entire buffer
+ * back.
+ */
+static ssize_t kernfs_file_write(struct file *file, const char __user *user_buf,
+                                 size_t count, loff_t *ppos)
+{
+        struct sysfs_open_file *of = sysfs_of(file);
+        ssize_t len = min_t(size_t, count, PAGE_SIZE);
+        const struct kernfs_ops *ops;
+        char *buf;
+        buf = kmalloc(len + 1, GFP_KERNEL);
+        if (!buf)
+                return -ENOMEM;
+        if (copy_from_user(buf, user_buf, len)) {
+                len = -EFAULT;
+                goto out_free;
+        }
+        buf[len] = '\0';        /* guarantee string termination */
+        /*
+         * @of->mutex nests outside active ref and is just to ensure that
+         * the ops aren't called concurrently for the same open file.
+         */
+        mutex_lock(&of->mutex);
+        if (!sysfs_get_active(of->sd)) {
+                mutex_unlock(&of->mutex);
+                len = -ENODEV;
+                goto out_free;
+        }
+        ops = kernfs_ops(of->sd);
+        if (ops->write)
+                len = ops->write(of, buf, len, *ppos);
+        else
+                len = -EINVAL;
+        sysfs_put_active(of->sd);
+        mutex_unlock(&of->mutex);
+        if (len > 0)
+                *ppos += len;
+out_free:
+        kfree(buf);
+        return len;
+}
+static void kernfs_vma_open(struct vm_area_struct *vma)
+{
+        struct file *file = vma->vm_file;
+        struct sysfs_open_file *of = sysfs_of(file);
+        if (!of->vm_ops)
+                return;
+        if (!sysfs_get_active(of->sd))
+                return;
+        if (of->vm_ops->open)
+                of->vm_ops->open(vma);
+        sysfs_put_active(of->sd);
+}
+static int kernfs_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+        struct file *file = vma->vm_file;
+        struct sysfs_open_file *of = sysfs_of(file);
+        int ret;
+        if (!of->vm_ops)
+                return VM_FAULT_SIGBUS;
+        if (!sysfs_get_active(of->sd))
+                return VM_FAULT_SIGBUS;
+        ret = VM_FAULT_SIGBUS;
+        if (of->vm_ops->fault)
+                ret = of->vm_ops->fault(vma, vmf);
+        sysfs_put_active(of->sd);
+        return ret;
+}
+static int kernfs_vma_page_mkwrite(struct vm_area_struct *vma,
+                                   struct vm_fault *vmf)
+{
+        struct file *file = vma->vm_file;
+        struct sysfs_open_file *of = sysfs_of(file);
+        int ret;
+        if (!of->vm_ops)
+                return VM_FAULT_SIGBUS;
+        if (!sysfs_get_active(of->sd))
+                return VM_FAULT_SIGBUS;
+        ret = 0;
+        if (of->vm_ops->page_mkwrite)
+                ret = of->vm_ops->page_mkwrite(vma, vmf);
+        else
+                file_update_time(file);
+        sysfs_put_active(of->sd);
+        return ret;
+}
+static int kernfs_vma_access(struct vm_area_struct *vma, unsigned long addr,
+                             void *buf, int len, int write)
+{
+        struct file *file = vma->vm_file;
+        struct sysfs_open_file *of = sysfs_of(file);
+        int ret;
+        if (!of->vm_ops)
+                return -EINVAL;
+        if (!sysfs_get_active(of->sd))
+                return -EINVAL;
+        ret = -EINVAL;
+        if (of->vm_ops->access)
+                ret = of->vm_ops->access(vma, addr, buf, len, write);
+        sysfs_put_active(of->sd);
+        return ret;
+}
+#ifdef CONFIG_NUMA
+static int kernfs_vma_set_policy(struct vm_area_struct *vma,
+                                 struct mempolicy *new)
+{
+        struct file *file = vma->vm_file;
+        struct sysfs_open_file *of = sysfs_of(file);
+        int ret;
+        if (!of->vm_ops)
+                return 0;
+        if (!sysfs_get_active(of->sd))
+                return -EINVAL;
+        ret = 0;
+        if (of->vm_ops->set_policy)
+                ret = of->vm_ops->set_policy(vma, new);
+        sysfs_put_active(of->sd);
+        return ret;
+}
+static struct mempolicy *kernfs_vma_get_policy(struct vm_area_struct *vma,
+                                               unsigned long addr)
+{
+        struct file *file = vma->vm_file;
+        struct sysfs_open_file *of = sysfs_of(file);
+        struct mempolicy *pol;
+        if (!of->vm_ops)
+                return vma->vm_policy;
+        if (!sysfs_get_active(of->sd))
+                return vma->vm_policy;
+        pol = vma->vm_policy;
+        if (of->vm_ops->get_policy)
+                pol = of->vm_ops->get_policy(vma, addr);
+        sysfs_put_active(of->sd);
+        return pol;
+}
+static int kernfs_vma_migrate(struct vm_area_struct *vma,
+                              const nodemask_t *from, const nodemask_t *to,
+                              unsigned long flags)
+{
+        struct file *file = vma->vm_file;
+        struct sysfs_open_file *of = sysfs_of(file);
+        int ret;
+        if (!of->vm_ops)
+                return 0;
+        if (!sysfs_get_active(of->sd))
+                return 0;
+        ret = 0;
+        if (of->vm_ops->migrate)
+                ret = of->vm_ops->migrate(vma, from, to, flags);
+        sysfs_put_active(of->sd);
+        return ret;
+}
+#endif
+static const struct vm_operations_struct kernfs_vm_ops = {
+        .open           = kernfs_vma_open,
+        .fault          = kernfs_vma_fault,
+        .page_mkwrite   = kernfs_vma_page_mkwrite,
+        .access         = kernfs_vma_access,
+#ifdef CONFIG_NUMA
+        .set_policy     = kernfs_vma_set_policy,
+        .get_policy     = kernfs_vma_get_policy,
+        .migrate        = kernfs_vma_migrate,
+#endif
+};
+static int kernfs_file_mmap(struct file *file, struct vm_area_struct *vma)
+{
+        struct sysfs_open_file *of = sysfs_of(file);
+        const struct kernfs_ops *ops;
+        int rc;
+        mutex_lock(&of->mutex);
+        rc = -ENODEV;
+        if (!sysfs_get_active(of->sd))
+                goto out_unlock;
+        ops = kernfs_ops(of->sd);
+        if (ops->mmap)
+                rc = ops->mmap(of, vma);
+        if (rc)
+                goto out_put;
+        /*
+         * PowerPC's pci_mmap of legacy_mem uses shmem_zero_setup()
+         * to satisfy versions of X which crash if the mmap fails: that
+         * substitutes a new vm_file, and we don't then want bin_vm_ops.
+         */
+        if (vma->vm_file != file)
+                goto out_put;
+        rc = -EINVAL;
+        if (of->mmapped && of->vm_ops != vma->vm_ops)
+                goto out_put;
+        /*
+         * It is not possible to successfully wrap close.
+         * So error if someone is trying to use close.
+         */
+        rc = -EINVAL;
+        if (vma->vm_ops && vma->vm_ops->close)
+                goto out_put;
+        rc = 0;
+        of->mmapped = 1;
+        of->vm_ops = vma->vm_ops;
+        vma->vm_ops = &kernfs_vm_ops;
+out_put:
+        sysfs_put_active(of->sd);
+out_unlock:
+        mutex_unlock(&of->mutex);
+        return rc;
+}
+/**
+ *      sysfs_get_open_dirent - get or create sysfs_open_dirent
+ *      @sd: target sysfs_dirent
+ *      @of: sysfs_open_file for this instance of open
+ *
+ *      If @sd->s_attr.open exists, increment its reference count;
+ *      otherwise, create one.  @of is chained to the files list.
+ *
+ *      LOCKING:
+ *      Kernel thread context (may sleep).
+ *
+ *      RETURNS:
+ *      0 on success, -errno on failure.
+ */
+static int sysfs_get_open_dirent(struct sysfs_dirent *sd,
+                                 struct sysfs_open_file *of)
+{
+        struct sysfs_open_dirent *od, *new_od = NULL;
+ retry:
+        mutex_lock(&sysfs_open_file_mutex);
+        spin_lock_irq(&sysfs_open_dirent_lock);
+        if (!sd->s_attr.open && new_od) {
+                sd->s_attr.open = new_od;
+                new_od = NULL;
+        }
+        od = sd->s_attr.open;
+        if (od) {
+                atomic_inc(&od->refcnt);
+                list_add_tail(&of->list, &od->files);
+        }
+        spin_unlock_irq(&sysfs_open_dirent_lock);
+        mutex_unlock(&sysfs_open_file_mutex);
+        if (od) {
+                kfree(new_od);
+                return 0;
+        }
+        /* not there, initialize a new one and retry */
+        new_od = kmalloc(sizeof(*new_od), GFP_KERNEL);
+        if (!new_od)
+                return -ENOMEM;
+        atomic_set(&new_od->refcnt, 0);
+        atomic_set(&new_od->event, 1);
+        init_waitqueue_head(&new_od->poll);
+        INIT_LIST_HEAD(&new_od->files);
+        goto retry;
+}
+/**
+ *      sysfs_put_open_dirent - put sysfs_open_dirent
+ *      @sd: target sysfs_dirent
+ *      @of: associated sysfs_open_file
+ *
+ *      Put @sd->s_attr.open and unlink @of from the files list.  If
+ *      reference count reaches zero, disassociate and free it.
+ *
+ *      LOCKING:
+ *      None.
+ */
+static void sysfs_put_open_dirent(struct sysfs_dirent *sd,
+                                  struct sysfs_open_file *of)
+{
+        struct sysfs_open_dirent *od = sd->s_attr.open;
+        unsigned long flags;
+        mutex_lock(&sysfs_open_file_mutex);
+        spin_lock_irqsave(&sysfs_open_dirent_lock, flags);
+        if (of)
+                list_del(&of->list);
+        if (atomic_dec_and_test(&od->refcnt))
+                sd->s_attr.open = NULL;
+        else
+                od = NULL;
+        spin_unlock_irqrestore(&sysfs_open_dirent_lock, flags);
+        mutex_unlock(&sysfs_open_file_mutex);
+        kfree(od);
+}
+static int kernfs_file_open(struct inode *inode, struct file *file)
+{
+        struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
+        const struct kernfs_ops *ops;
+        struct sysfs_open_file *of;
+        bool has_read, has_write, has_mmap;
+        int error = -EACCES;
+        if (!sysfs_get_active(attr_sd))
+                return -ENODEV;
+        ops = kernfs_ops(attr_sd);
+        has_read = ops->seq_show || ops->read || ops->mmap;
+        has_write = ops->write || ops->mmap;
+        has_mmap = ops->mmap;
+        /* check perms and supported operations */
+        if ((file->f_mode & FMODE_WRITE) &&
+            (!(inode->i_mode & S_IWUGO) || !has_write))
+                goto err_out;
+        if ((file->f_mode & FMODE_READ) &&
+            (!(inode->i_mode & S_IRUGO) || !has_read))
+                goto err_out;
+        /* allocate a sysfs_open_file for the file */
+        error = -ENOMEM;
+        of = kzalloc(sizeof(struct sysfs_open_file), GFP_KERNEL);
+        if (!of)
+                goto err_out;
+        /*
+         * The following is done to give a different lockdep key to
+         * @of->mutex for files which implement mmap.  This is a rather
+         * crude way to avoid false positive lockdep warning around
+         * mm->mmap_sem - mmap nests @of->mutex under mm->mmap_sem and
+         * reading /sys/block/sda/trace/act_mask grabs sr_mutex, under
+         * which mm->mmap_sem nests, while holding @of->mutex.  As each
+         * open file has a separate mutex, it's okay as long as those don't
+         * happen on the same file.  At this point, we can't easily give
+         * each file a separate locking class.  Let's differentiate on
+         * whether the file has mmap or not for now.
+         */
+        if (has_mmap)
+                mutex_init(&of->mutex);
+        else
+                mutex_init(&of->mutex);
+        of->sd = attr_sd;
+        of->file = file;
+        /*
+         * Always instantiate seq_file even if read access doesn't use
+         * seq_file or is not requested.  This unifies private data access
+         * and readable regular files are the vast majority anyway.
+         */
+        if (ops->seq_show)
+                error = seq_open(file, &kernfs_seq_ops);
+        else
+                error = seq_open(file, NULL);
+        if (error)
+                goto err_free;
+        ((struct seq_file *)file->private_data)->private = of;
+        /* seq_file clears PWRITE unconditionally, restore it if WRITE */
+        if (file->f_mode & FMODE_WRITE)
+                file->f_mode |= FMODE_PWRITE;
+        /* make sure we have open dirent struct */
+        error = sysfs_get_open_dirent(attr_sd, of);
+        if (error)
+                goto err_close;
+        /* open succeeded, put active references */
+        sysfs_put_active(attr_sd);
+        return 0;
+err_close:
+        seq_release(inode, file);
+err_free:
+        kfree(of);
+err_out:
+        sysfs_put_active(attr_sd);
+        return error;
+}
+static int kernfs_file_release(struct inode *inode, struct file *filp)
+{
+        struct sysfs_dirent *sd = filp->f_path.dentry->d_fsdata;
+        struct sysfs_open_file *of = sysfs_of(filp);
+        sysfs_put_open_dirent(sd, of);
+        seq_release(inode, filp);
+        kfree(of);
+        return 0;
+}
+void sysfs_unmap_bin_file(struct sysfs_dirent *sd)
+{
+        struct sysfs_open_dirent *od;
+        struct sysfs_open_file *of;
+        if (!(sd->s_flags & SYSFS_FLAG_HAS_MMAP))
+                return;
+        spin_lock_irq(&sysfs_open_dirent_lock);
+        od = sd->s_attr.open;
+        if (od)
+                atomic_inc(&od->refcnt);
+        spin_unlock_irq(&sysfs_open_dirent_lock);
+        if (!od)
+                return;
+        mutex_lock(&sysfs_open_file_mutex);
+        list_for_each_entry(of, &od->files, list) {
+                struct inode *inode = file_inode(of->file);
+                unmap_mapping_range(inode->i_mapping, 0, 0, 1);
+        }
+        mutex_unlock(&sysfs_open_file_mutex);
+        sysfs_put_open_dirent(sd, NULL);
+}
+/* Sysfs attribute files are pollable.  The idea is that you read
+ * the content and then you use 'poll' or 'select' to wait for
+ * the content to change.  When the content changes (assuming the
+ * manager for the kobject supports notification), poll will
+ * return POLLERR|POLLPRI, and select will return the fd whether
+ * it is waiting for read, write, or exceptions.
+ * Once poll/select indicates that the value has changed, you
+ * need to close and re-open the file, or seek to 0 and read again.
+ * Reminder: this only works for attributes which actively support
+ * it, and it is not possible to test an attribute from userspace
+ * to see if it supports poll (Neither 'poll' nor 'select' return
+ * an appropriate error code).  When in doubt, set a suitable timeout value.
+ */
+static unsigned int kernfs_file_poll(struct file *filp, poll_table *wait)
+{
+        struct sysfs_open_file *of = sysfs_of(filp);
+        struct sysfs_dirent *attr_sd = filp->f_path.dentry->d_fsdata;
+        struct sysfs_open_dirent *od = attr_sd->s_attr.open;
+        /* need parent for the kobj, grab both */
+        if (!sysfs_get_active(attr_sd))
+                goto trigger;
+        poll_wait(filp, &od->poll, wait);
+        sysfs_put_active(attr_sd);
+        if (of->event != atomic_read(&od->event))
+                goto trigger;
+        return DEFAULT_POLLMASK;
+ trigger:
+        return DEFAULT_POLLMASK|POLLERR|POLLPRI;
+}
+/**
+ * kernfs_notify - notify a kernfs file
+ * @sd: file to notify
+ *
+ * Notify @sd such that poll(2) on @sd wakes up.
+ */
+void kernfs_notify(struct sysfs_dirent *sd)
+{
+        struct sysfs_open_dirent *od;
+        unsigned long flags;
+        spin_lock_irqsave(&sysfs_open_dirent_lock, flags);
+        if (!WARN_ON(sysfs_type(sd) != SYSFS_KOBJ_ATTR)) {
+                od = sd->s_attr.open;
+                if (od) {
+                        atomic_inc(&od->event);
+                        wake_up_interruptible(&od->poll);
+                }
+        }
+        spin_unlock_irqrestore(&sysfs_open_dirent_lock, flags);
+}
+EXPORT_SYMBOL_GPL(kernfs_notify);
+const struct file_operations kernfs_file_operations = {
+        .read           = kernfs_file_read,
+        .write          = kernfs_file_write,
+        .llseek         = generic_file_llseek,
+        .mmap           = kernfs_file_mmap,
+        .open           = kernfs_file_open,
+        .release        = kernfs_file_release,
+        .poll           = kernfs_file_poll,
+};
+/**
+ * kernfs_create_file_ns_key - create a file
+ * @parent: directory to create the file in
+ * @name: name of the file
+ * @mode: mode of the file
+ * @size: size of the file
+ * @ops: kernfs operations for the file
+ * @priv: private data for the file
+ * @ns: optional namespace tag of the file
+ * @key: lockdep key for the file's active_ref, %NULL to disable lockdep
+ *
+ * Returns the created node on success, ERR_PTR() value on error.
+ */
+struct sysfs_dirent *kernfs_create_file_ns_key(struct sysfs_dirent *parent,
+                                               const char *name,
+                                               umode_t mode, loff_t size,
+                                               const struct kernfs_ops *ops,
+                                               void *priv, const void *ns,
+                                               struct lock_class_key *key)
+{
+        struct sysfs_addrm_cxt acxt;
+        struct sysfs_dirent *sd;
+        int rc;
+        sd = sysfs_new_dirent(kernfs_root(parent), name,
+                              (mode & S_IALLUGO) | S_IFREG, SYSFS_KOBJ_ATTR);
+        if (!sd)
+                return ERR_PTR(-ENOMEM);
+        sd->s_attr.ops = ops;
+        sd->s_attr.size = size;
+        sd->s_ns = ns;
+        sd->priv = priv;
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+        if (key) {
+                lockdep_init_map(&sd->dep_map, "s_active", key, 0);
+                sd->s_flags |= SYSFS_FLAG_LOCKDEP;
+        }
+#endif
+        /*
+         * sd->s_attr.ops is accesible only while holding active ref.  We
+         * need to know whether some ops are implemented outside active
+         * ref.  Cache their existence in flags.
+         */
+        if (ops->seq_show)
+                sd->s_flags |= SYSFS_FLAG_HAS_SEQ_SHOW;
+        if (ops->mmap)
+                sd->s_flags |= SYSFS_FLAG_HAS_MMAP;
+        sysfs_addrm_start(&acxt);
+        rc = sysfs_add_one(&acxt, sd, parent);
+        sysfs_addrm_finish(&acxt);
+        if (rc) {
+                kernfs_put(sd);
+                return ERR_PTR(rc);
+        }
+        return sd;
+}
diff --git a/fs/sysfs/inode.c b/fs/kernfs/inode.c
index 1750f790af3b..18ad431e8c2a 100644
--- a/fs/sysfs/inode.c
+++ b/fs/kernfs/inode.c
@@ -1,28 +1,22 @@
 /*
- * fs/sysfs/inode.c - basic sysfs inode and dentry operations
+ * fs/kernfs/inode.c - kernfs inode implementation
 *
 * Copyright (c) 2001-3 Patrick Mochel
 * Copyright (c) 2007 SUSE Linux Products GmbH
- * Copyright (c) 2007 Tejun Heo <teheo@suse.de>
+ * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org>
 *
 * This file is released under the GPLv2.
- *
- * Please see Documentation/filesystems/sysfs.txt for more information.
 */
-#undef DEBUG
 #include <linux/pagemap.h>
-#include <linux/namei.h>
 #include <linux/backing-dev.h>
 #include <linux/capability.h>
 #include <linux/errno.h>
-#include <linux/sched.h>
 #include <linux/slab.h>
-#include <linux/sysfs.h>
 #include <linux/xattr.h>
 #include <linux/security.h>
-#include "sysfs.h"
+#include "kernfs-internal.h"
 static const struct address_space_operations sysfs_aops = {
        .readpage       = simple_readpage,
@@ -41,22 +35,28 @@ static const struct inode_operations sysfs_inode_operations = {
        .setattr        = sysfs_setattr,
        .getattr        = sysfs_getattr,
        .setxattr       = sysfs_setxattr,
+        .removexattr    = sysfs_removexattr,
+        .getxattr       = sysfs_getxattr,
+        .listxattr      = sysfs_listxattr,
 };
-int __init sysfs_inode_init(void)
+void __init sysfs_inode_init(void)
 {
-        return bdi_init(&sysfs_backing_dev_info);
+        if (bdi_init(&sysfs_backing_dev_info))
+                panic("failed to init sysfs_backing_dev_info");
 }
-static struct sysfs_inode_attrs *sysfs_init_inode_attrs(struct sysfs_dirent *sd)
+static struct sysfs_inode_attrs *sysfs_inode_attrs(struct sysfs_dirent *sd)
 {
-        struct sysfs_inode_attrs *attrs;
        struct iattr *iattrs;
-        attrs = kzalloc(sizeof(struct sysfs_inode_attrs), GFP_KERNEL);
+        if (sd->s_iattr)
-        if (!attrs)
+                return sd->s_iattr;
+        sd->s_iattr = kzalloc(sizeof(struct sysfs_inode_attrs), GFP_KERNEL);
+        if (!sd->s_iattr)
                return NULL;
-        iattrs = &attrs->ia_iattr;
+        iattrs = &sd->s_iattr->ia_iattr;
        /* assign default attributes */
        iattrs->ia_mode = sd->s_mode;
@@ -64,26 +64,22 @@ static struct sysfs_inode_attrs *sysfs_init_inode_attrs(struct sysfs_dirent *sd)
        iattrs->ia_gid = GLOBAL_ROOT_GID;
        iattrs->ia_atime = iattrs->ia_mtime = iattrs->ia_ctime = CURRENT_TIME;
-        return attrs;
+        simple_xattrs_init(&sd->s_iattr->xattrs);
+        return sd->s_iattr;
 }
-int sysfs_sd_setattr(struct sysfs_dirent *sd, struct iattr *iattr)
+static int __kernfs_setattr(struct sysfs_dirent *sd, const struct iattr *iattr)
 {
-        struct sysfs_inode_attrs *sd_attrs;
+        struct sysfs_inode_attrs *attrs;
        struct iattr *iattrs;
        unsigned int ia_valid = iattr->ia_valid;
-        sd_attrs = sd->s_iattr;
+        attrs = sysfs_inode_attrs(sd);
+        if (!attrs)
+                return -ENOMEM;
-        if (!sd_attrs) {
+        iattrs = &attrs->ia_iattr;
-                /* setting attributes for the first time, allocate now */
-                sd_attrs = sysfs_init_inode_attrs(sd);
-                if (!sd_attrs)
-                        return -ENOMEM;
-                sd->s_iattr = sd_attrs;
-        }
-        /* attributes were changed at least once in past */
-        iattrs = &sd_attrs->ia_iattr;
        if (ia_valid & ATTR_UID)
                iattrs->ia_uid = iattr->ia_uid;
@@ -102,6 +98,23 @@ int sysfs_sd_setattr(struct sysfs_dirent *sd, struct iattr *iattr)
        return 0;
 }
+/**
+ * kernfs_setattr - set iattr on a node
+ * @sd: target node
+ * @iattr: iattr to set
+ *
+ * Returns 0 on success, -errno on failure.
+ */
+int kernfs_setattr(struct sysfs_dirent *sd, const struct iattr *iattr)
+{
+        int ret;
+        mutex_lock(&sysfs_mutex);
+        ret = __kernfs_setattr(sd, iattr);
+        mutex_unlock(&sysfs_mutex);
+        return ret;
+}
 int sysfs_setattr(struct dentry *dentry, struct iattr *iattr)
 {
        struct inode *inode = dentry->d_inode;
@@ -116,7 +129,7 @@ int sysfs_setattr(struct dentry *dentry, struct iattr *iattr)
        if (error)
                goto out;
-        error = sysfs_sd_setattr(sd, iattr);
+        error = __kernfs_setattr(sd, iattr);
        if (error)
                goto out;
@@ -131,22 +144,19 @@ out:
 static int sysfs_sd_setsecdata(struct sysfs_dirent *sd, void **secdata,
                               u32 *secdata_len)
 {
-        struct sysfs_inode_attrs *iattrs;
+        struct sysfs_inode_attrs *attrs;
        void *old_secdata;
        size_t old_secdata_len;
-        if (!sd->s_iattr) {
+        attrs = sysfs_inode_attrs(sd);
-                sd->s_iattr = sysfs_init_inode_attrs(sd);
+        if (!attrs)
-                if (!sd->s_iattr)
+                return -ENOMEM;
-                        return -ENOMEM;
-        }
-        iattrs = sd->s_iattr;
+        old_secdata = attrs->ia_secdata;
-        old_secdata = iattrs->ia_secdata;
+        old_secdata_len = attrs->ia_secdata_len;
-        old_secdata_len = iattrs->ia_secdata_len;
-        iattrs->ia_secdata = *secdata;
+        attrs->ia_secdata = *secdata;
-        iattrs->ia_secdata_len = *secdata_len;
+        attrs->ia_secdata_len = *secdata_len;
        *secdata = old_secdata;
        *secdata_len = old_secdata_len;
@@ -157,23 +167,25 @@ int sysfs_setxattr(struct dentry *dentry, const char *name, const void *value,
                size_t size, int flags)
 {
        struct sysfs_dirent *sd = dentry->d_fsdata;
+        struct sysfs_inode_attrs *attrs;
        void *secdata;
        int error;
        u32 secdata_len = 0;
-        if (!sd)
+        attrs = sysfs_inode_attrs(sd);
-                return -EINVAL;
+        if (!attrs)
+                return -ENOMEM;
        if (!strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN)) {
                const char *suffix = name + XATTR_SECURITY_PREFIX_LEN;
                error = security_inode_setsecurity(dentry->d_inode, suffix,
                                                value, size, flags);
                if (error)
-                        goto out;
+                        return error;
                error = security_inode_getsecctx(dentry->d_inode,
                                                &secdata, &secdata_len);
                if (error)
-                        goto out;
+                        return error;
                mutex_lock(&sysfs_mutex);
                error = sysfs_sd_setsecdata(sd, &secdata, &secdata_len);
@@ -181,10 +193,50 @@ int sysfs_setxattr(struct dentry *dentry, const char *name, const void *value,
                if (secdata)
                        security_release_secctx(secdata, secdata_len);
-        } else
+                return error;
-                return -EINVAL;
+        } else if (!strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN)) {
-out:
+                return simple_xattr_set(&attrs->xattrs, name, value, size,
-        return error;
+                                        flags);
+        }
+        return -EINVAL;
+}
+int sysfs_removexattr(struct dentry *dentry, const char *name)
+{
+        struct sysfs_dirent *sd = dentry->d_fsdata;
+        struct sysfs_inode_attrs *attrs;
+        attrs = sysfs_inode_attrs(sd);
+        if (!attrs)
+                return -ENOMEM;
+        return simple_xattr_remove(&attrs->xattrs, name);
+}
+ssize_t sysfs_getxattr(struct dentry *dentry, const char *name, void *buf,
+                       size_t size)
+{
+        struct sysfs_dirent *sd = dentry->d_fsdata;
+        struct sysfs_inode_attrs *attrs;
+        attrs = sysfs_inode_attrs(sd);
+        if (!attrs)
+                return -ENOMEM;
+        return simple_xattr_get(&attrs->xattrs, name, buf, size);
+}
+ssize_t sysfs_listxattr(struct dentry *dentry, char *buf, size_t size)
+{
+        struct sysfs_dirent *sd = dentry->d_fsdata;
+        struct sysfs_inode_attrs *attrs;
+        attrs = sysfs_inode_attrs(sd);
+        if (!attrs)
+                return -ENOMEM;
+        return simple_xattr_list(&attrs->xattrs, buf, size);
 }
 static inline void set_default_inode_attr(struct inode *inode, umode_t mode)
@@ -204,17 +256,16 @@ static inline void set_inode_attr(struct inode *inode, struct iattr *iattr)
 static void sysfs_refresh_inode(struct sysfs_dirent *sd, struct inode *inode)
 {
-        struct sysfs_inode_attrs *iattrs = sd->s_iattr;
+        struct sysfs_inode_attrs *attrs = sd->s_iattr;
        inode->i_mode = sd->s_mode;
-        if (iattrs) {
+        if (attrs) {
                /* sysfs_dirent has non-default attributes
                 * get them from persistent copy in sysfs_dirent
                 */
-                set_inode_attr(inode, &iattrs->ia_iattr);
+                set_inode_attr(inode, &attrs->ia_iattr);
-                security_inode_notifysecctx(inode,
+                security_inode_notifysecctx(inode, attrs->ia_secdata,
-                                            iattrs->ia_secdata,
+                                            attrs->ia_secdata_len);
-                                            iattrs->ia_secdata_len);
        }
        if (sysfs_type(sd) == SYSFS_DIR)
@@ -237,9 +288,8 @@ int sysfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
 static void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode)
 {
-        struct bin_attribute *bin_attr;
+        kernfs_get(sd);
+        inode->i_private = sd;
-        inode->i_private = sysfs_get(sd);
        inode->i_mapping->a_ops = &sysfs_aops;
        inode->i_mapping->backing_dev_info = &sysfs_backing_dev_info;
        inode->i_op = &sysfs_inode_operations;
@@ -254,13 +304,8 @@ static void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode)
                inode->i_fop = &sysfs_dir_operations;
                break;
        case SYSFS_KOBJ_ATTR:
-                inode->i_size = PAGE_SIZE;
+                inode->i_size = sd->s_attr.size;
-                inode->i_fop = &sysfs_file_operations;
+                inode->i_fop = &kernfs_file_operations;
-                break;
-        case SYSFS_KOBJ_BIN_ATTR:
-                bin_attr = sd->s_attr.bin_attr;
-                inode->i_size = bin_attr->size;
-                inode->i_fop = &sysfs_bin_operations;
                break;
        case SYSFS_KOBJ_LINK:
                inode->i_op = &sysfs_symlink_inode_operations;
@@ -311,7 +356,7 @@ void sysfs_evict_inode(struct inode *inode)
        truncate_inode_pages(&inode->i_data, 0);
        clear_inode(inode);
-        sysfs_put(sd);
+        kernfs_put(sd);
 }
 int sysfs_permission(struct inode *inode, int mask)
diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h
new file mode 100644
index 000000000000..910e485b7333
--- /dev/null
+++ b/fs/kernfs/kernfs-internal.h
@@ -0,0 +1,122 @@
+/*
+ * fs/kernfs/kernfs-internal.h - kernfs internal header file
+ *
+ * Copyright (c) 2001-3 Patrick Mochel
+ * Copyright (c) 2007 SUSE Linux Products GmbH
+ * Copyright (c) 2007, 2013 Tejun Heo <teheo@suse.de>
+ *
+ * This file is released under the GPLv2.
+ */
+#ifndef __KERNFS_INTERNAL_H
+#define __KERNFS_INTERNAL_H
+#include <linux/lockdep.h>
+#include <linux/fs.h>
+#include <linux/mutex.h>
+#include <linux/xattr.h>
+#include <linux/kernfs.h>
+struct sysfs_inode_attrs {
+        struct iattr            ia_iattr;
+        void                    *ia_secdata;
+        u32                     ia_secdata_len;
+        struct simple_xattrs    xattrs;
+};
+#define SD_DEACTIVATED_BIAS             INT_MIN
+/* SYSFS_TYPE_MASK and types are defined in include/linux/kernfs.h */
+/**
+ * kernfs_root - find out the kernfs_root a sysfs_dirent belongs to
+ * @sd: sysfs_dirent of interest
+ *
+ * Return the kernfs_root @sd belongs to.
+ */
+static inline struct kernfs_root *kernfs_root(struct sysfs_dirent *sd)
+{
+        /* if parent exists, it's always a dir; otherwise, @sd is a dir */
+        if (sd->s_parent)
+                sd = sd->s_parent;
+        return sd->s_dir.root;
+}
+/*
+ * Context structure to be used while adding/removing nodes.
+ */
+struct sysfs_addrm_cxt {
+        struct sysfs_dirent     *removed;
+};
+/*
+ * mount.c
+ */
+struct sysfs_super_info {
+        /*
+         * The root associated with this super_block.  Each super_block is
+         * identified by the root and ns it's associated with.
+         */
+        struct kernfs_root      *root;
+        /*
+         * Each sb is associated with one namespace tag, currently the network
+         * namespace of the task which mounted this sysfs instance.  If multiple
+         * tags become necessary, make the following an array and compare
+         * sysfs_dirent tag against every entry.
+         */
+        const void              *ns;
+};
+#define sysfs_info(SB) ((struct sysfs_super_info *)(SB->s_fs_info))
+extern struct kmem_cache *sysfs_dir_cachep;
+/*
+ * inode.c
+ */
+struct inode *sysfs_get_inode(struct super_block *sb, struct sysfs_dirent *sd);
+void sysfs_evict_inode(struct inode *inode);
+int sysfs_permission(struct inode *inode, int mask);
+int sysfs_setattr(struct dentry *dentry, struct iattr *iattr);
+int sysfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
+                  struct kstat *stat);
+int sysfs_setxattr(struct dentry *dentry, const char *name, const void *value,
+                   size_t size, int flags);
+int sysfs_removexattr(struct dentry *dentry, const char *name);
+ssize_t sysfs_getxattr(struct dentry *dentry, const char *name, void *buf,
+                       size_t size);
+ssize_t sysfs_listxattr(struct dentry *dentry, char *buf, size_t size);
+void sysfs_inode_init(void);
+/*
+ * dir.c
+ */
+extern struct mutex sysfs_mutex;
+extern const struct dentry_operations sysfs_dentry_ops;
+extern const struct file_operations sysfs_dir_operations;
+extern const struct inode_operations sysfs_dir_inode_operations;
+struct sysfs_dirent *sysfs_get_active(struct sysfs_dirent *sd);
+void sysfs_put_active(struct sysfs_dirent *sd);
+void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt);
+int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd,
+                  struct sysfs_dirent *parent_sd);
+void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt);
+struct sysfs_dirent *sysfs_new_dirent(struct kernfs_root *root,
+                                      const char *name, umode_t mode, int type);
+/*
+ * file.c
+ */
+extern const struct file_operations kernfs_file_operations;
+void sysfs_unmap_bin_file(struct sysfs_dirent *sd);
+/*
+ * symlink.c
+ */
+extern const struct inode_operations sysfs_symlink_inode_operations;
+#endif  /* __KERNFS_INTERNAL_H */
diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c
new file mode 100644
index 000000000000..84c83e24bf25
--- /dev/null
+++ b/fs/kernfs/mount.c
@@ -0,0 +1,165 @@
+/*
+ * fs/kernfs/mount.c - kernfs mount implementation
+ *
+ * Copyright (c) 2001-3 Patrick Mochel
+ * Copyright (c) 2007 SUSE Linux Products GmbH
+ * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org>
+ *
+ * This file is released under the GPLv2.
+ */
+#include <linux/fs.h>
+#include <linux/mount.h>
+#include <linux/init.h>
+#include <linux/magic.h>
+#include <linux/slab.h>
+#include <linux/pagemap.h>
+#include "kernfs-internal.h"
+struct kmem_cache *sysfs_dir_cachep;
+static const struct super_operations sysfs_ops = {
+        .statfs         = simple_statfs,
+        .drop_inode     = generic_delete_inode,
+        .evict_inode    = sysfs_evict_inode,
+};
+static int sysfs_fill_super(struct super_block *sb)
+{
+        struct sysfs_super_info *info = sysfs_info(sb);
+        struct inode *inode;
+        struct dentry *root;
+        sb->s_blocksize = PAGE_CACHE_SIZE;
+        sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+        sb->s_magic = SYSFS_MAGIC;
+        sb->s_op = &sysfs_ops;
+        sb->s_time_gran = 1;
+        /* get root inode, initialize and unlock it */
+        mutex_lock(&sysfs_mutex);
+        inode = sysfs_get_inode(sb, info->root->sd);
+        mutex_unlock(&sysfs_mutex);
+        if (!inode) {
+                pr_debug("sysfs: could not get root inode\n");
+                return -ENOMEM;
+        }
+        /* instantiate and link root dentry */
+        root = d_make_root(inode);
+        if (!root) {
+                pr_debug("%s: could not get root dentry!\n", __func__);
+                return -ENOMEM;
+        }
+        kernfs_get(info->root->sd);
+        root->d_fsdata = info->root->sd;
+        sb->s_root = root;
+        sb->s_d_op = &sysfs_dentry_ops;
+        return 0;
+}
+static int sysfs_test_super(struct super_block *sb, void *data)
+{
+        struct sysfs_super_info *sb_info = sysfs_info(sb);
+        struct sysfs_super_info *info = data;
+        return sb_info->root == info->root && sb_info->ns == info->ns;
+}
+static int sysfs_set_super(struct super_block *sb, void *data)
+{
+        int error;
+        error = set_anon_super(sb, data);
+        if (!error)
+                sb->s_fs_info = data;
+        return error;
+}
+/**
+ * kernfs_super_ns - determine the namespace tag of a kernfs super_block
+ * @sb: super_block of interest
+ *
+ * Return the namespace tag associated with kernfs super_block @sb.
+ */
+const void *kernfs_super_ns(struct super_block *sb)
+{
+        struct sysfs_super_info *info = sysfs_info(sb);
+        return info->ns;
+}
+/**
+ * kernfs_mount_ns - kernfs mount helper
+ * @fs_type: file_system_type of the fs being mounted
+ * @flags: mount flags specified for the mount
+ * @root: kernfs_root of the hierarchy being mounted
+ * @ns: optional namespace tag of the mount
+ *
+ * This is to be called from each kernfs user's file_system_type->mount()
+ * implementation, which should pass through the specified @fs_type and
+ * @flags, and specify the hierarchy and namespace tag to mount via @root
+ * and @ns, respectively.
+ *
+ * The return value can be passed to the vfs layer verbatim.
+ */
+struct dentry *kernfs_mount_ns(struct file_system_type *fs_type, int flags,
+                               struct kernfs_root *root, const void *ns)
+{
+        struct super_block *sb;
+        struct sysfs_super_info *info;
+        int error;
+        info = kzalloc(sizeof(*info), GFP_KERNEL);
+        if (!info)
+                return ERR_PTR(-ENOMEM);
+        info->root = root;
+        info->ns = ns;
+        sb = sget(fs_type, sysfs_test_super, sysfs_set_super, flags, info);
+        if (IS_ERR(sb) || sb->s_fs_info != info)
+                kfree(info);
+        if (IS_ERR(sb))
+                return ERR_CAST(sb);
+        if (!sb->s_root) {
+                error = sysfs_fill_super(sb);
+                if (error) {
+                        deactivate_locked_super(sb);
+                        return ERR_PTR(error);
+                }
+                sb->s_flags |= MS_ACTIVE;
+        }
+        return dget(sb->s_root);
+}
+/**
+ * kernfs_kill_sb - kill_sb for kernfs
+ * @sb: super_block being killed
+ *
+ * This can be used directly for file_system_type->kill_sb().  If a kernfs
+ * user needs extra cleanup, it can implement its own kill_sb() and call
+ * this function at the end.
+ */
+void kernfs_kill_sb(struct super_block *sb)
+{
+        struct sysfs_super_info *info = sysfs_info(sb);
+        struct sysfs_dirent *root_sd = sb->s_root->d_fsdata;
+        /*
+         * Remove the superblock from fs_supers/s_instances
+         * so we can't find it, before freeing sysfs_super_info.
+         */
+        kill_anon_super(sb);
+        kfree(info);
+        kernfs_put(root_sd);
+}
+void __init kernfs_init(void)
+{
+        sysfs_dir_cachep = kmem_cache_create("sysfs_dir_cache",
+                                              sizeof(struct sysfs_dirent),
+                                              0, SLAB_PANIC, NULL);
+        sysfs_inode_init();
+}
diff --git a/fs/kernfs/symlink.c b/fs/kernfs/symlink.c
new file mode 100644
index 000000000000..adf28755b0ee
--- /dev/null
+++ b/fs/kernfs/symlink.c
@@ -0,0 +1,152 @@
+/*
+ * fs/kernfs/symlink.c - kernfs symlink implementation
+ *
+ * Copyright (c) 2001-3 Patrick Mochel
+ * Copyright (c) 2007 SUSE Linux Products GmbH
+ * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org>
+ *
+ * This file is released under the GPLv2.
+ */
+#include <linux/fs.h>
+#include <linux/gfp.h>
+#include <linux/namei.h>
+#include "kernfs-internal.h"
+/**
+ * kernfs_create_link - create a symlink
+ * @parent: directory to create the symlink in
+ * @name: name of the symlink
+ * @target: target node for the symlink to point to
+ *
+ * Returns the created node on success, ERR_PTR() value on error.
+ */
+struct sysfs_dirent *kernfs_create_link(struct sysfs_dirent *parent,
+                                        const char *name,
+                                        struct sysfs_dirent *target)
+{
+        struct sysfs_dirent *sd;
+        struct sysfs_addrm_cxt acxt;
+        int error;
+        sd = sysfs_new_dirent(kernfs_root(parent), name, S_IFLNK|S_IRWXUGO,
+                              SYSFS_KOBJ_LINK);
+        if (!sd)
+                return ERR_PTR(-ENOMEM);
+        if (kernfs_ns_enabled(parent))
+                sd->s_ns = target->s_ns;
+        sd->s_symlink.target_sd = target;
+        kernfs_get(target);     /* ref owned by symlink */
+        sysfs_addrm_start(&acxt);
+        error = sysfs_add_one(&acxt, sd, parent);
+        sysfs_addrm_finish(&acxt);
+        if (!error)
+                return sd;
+        kernfs_put(sd);
+        return ERR_PTR(error);
+}
+static int sysfs_get_target_path(struct sysfs_dirent *parent_sd,
+                                 struct sysfs_dirent *target_sd, char *path)
+{
+        struct sysfs_dirent *base, *sd;
+        char *s = path;
+        int len = 0;
+        /* go up to the root, stop at the base */
+        base = parent_sd;
+        while (base->s_parent) {
+                sd = target_sd->s_parent;
+                while (sd->s_parent && base != sd)
+                        sd = sd->s_parent;
+                if (base == sd)
+                        break;
+                strcpy(s, "../");
+                s += 3;
+                base = base->s_parent;
+        }
+        /* determine end of target string for reverse fillup */
+        sd = target_sd;
+        while (sd->s_parent && sd != base) {
+                len += strlen(sd->s_name) + 1;
+                sd = sd->s_parent;
+        }
+        /* check limits */
+        if (len < 2)
+                return -EINVAL;
+        len--;
+        if ((s - path) + len > PATH_MAX)
+                return -ENAMETOOLONG;
+        /* reverse fillup of target string from target to base */
+        sd = target_sd;
+        while (sd->s_parent && sd != base) {
+                int slen = strlen(sd->s_name);
+                len -= slen;
+                strncpy(s + len, sd->s_name, slen);
+                if (len)
+                        s[--len] = '/';
+                sd = sd->s_parent;
+        }
+        return 0;
+}
+static int sysfs_getlink(struct dentry *dentry, char *path)
+{
+        struct sysfs_dirent *sd = dentry->d_fsdata;
+        struct sysfs_dirent *parent_sd = sd->s_parent;
+        struct sysfs_dirent *target_sd = sd->s_symlink.target_sd;
+        int error;
+        mutex_lock(&sysfs_mutex);
+        error = sysfs_get_target_path(parent_sd, target_sd, path);
+        mutex_unlock(&sysfs_mutex);
+        return error;
+}
+static void *sysfs_follow_link(struct dentry *dentry, struct nameidata *nd)
+{
+        int error = -ENOMEM;
+        unsigned long page = get_zeroed_page(GFP_KERNEL);
+        if (page) {
+                error = sysfs_getlink(dentry, (char *) page);
+                if (error < 0)
+                        free_page((unsigned long)page);
+        }
+        nd_set_link(nd, error ? ERR_PTR(error) : (char *)page);
+        return NULL;
+}
+static void sysfs_put_link(struct dentry *dentry, struct nameidata *nd,
+                           void *cookie)
+{
+        char *page = nd_get_link(nd);
+        if (!IS_ERR(page))
+                free_page((unsigned long)page);
+}
+const struct inode_operations sysfs_symlink_inode_operations = {
+        .setxattr       = sysfs_setxattr,
+        .removexattr    = sysfs_removexattr,
+        .getxattr       = sysfs_getxattr,
+        .listxattr      = sysfs_listxattr,
+        .readlink       = generic_readlink,
+        .follow_link    = sysfs_follow_link,
+        .put_link       = sysfs_put_link,
+        .setattr        = sysfs_setattr,
+        .getattr        = sysfs_getattr,
+        .permission     = sysfs_permission,
+};
diff --git a/fs/namespace.c b/fs/namespace.c
index ac2ce8a766e1..a511ea003f89 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2790,6 +2790,8 @@ void __init mnt_init(void)
        for (u = 0; u < HASH_SIZE; u++)
                INIT_LIST_HEAD(&mountpoint_hashtable[u]);
+        kernfs_init();
        err = sysfs_init();
        if (err)
                printk(KERN_WARNING "%s: sysfs_init error: %d\n",
diff --git a/fs/sysfs/Makefile b/fs/sysfs/Makefile
index 8876ac183373..6eff6e1205a5 100644
--- a/fs/sysfs/Makefile
+++ b/fs/sysfs/Makefile
@@ -2,4 +2,4 @@
 # Makefile for the sysfs virtual filesystem
 #
-obj-y           := inode.o file.o dir.o symlink.o mount.o group.o
+obj-y           := file.o dir.o symlink.o mount.o group.o
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 5e73d6626e50..2fea501889e7 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -13,446 +13,12 @@
 #undef DEBUG
 #include <linux/fs.h>
-#include <linux/mount.h>
-#include <linux/module.h>
 #include <linux/kobject.h>
-#include <linux/namei.h>
-#include <linux/idr.h>
-#include <linux/completion.h>
-#include <linux/mutex.h>
 #include <linux/slab.h>
-#include <linux/security.h>
-#include <linux/hash.h>
 #include "sysfs.h"
-DEFINE_MUTEX(sysfs_mutex);
 DEFINE_SPINLOCK(sysfs_symlink_target_lock);
-#define to_sysfs_dirent(X) rb_entry((X), struct sysfs_dirent, s_rb)
-static DEFINE_SPINLOCK(sysfs_ino_lock);
-static DEFINE_IDA(sysfs_ino_ida);
-/**
- *      sysfs_name_hash
- *      @name: Null terminated string to hash
- *      @ns:   Namespace tag to hash
- *
- *      Returns 31 bit hash of ns + name (so it fits in an off_t )
- */
-static unsigned int sysfs_name_hash(const char *name, const void *ns)
-{
-        unsigned long hash = init_name_hash();
-        unsigned int len = strlen(name);
-        while (len--)
-                hash = partial_name_hash(*name++, hash);
-        hash = (end_name_hash(hash) ^ hash_ptr((void *)ns, 31));
-        hash &= 0x7fffffffU;
-        /* Reserve hash numbers 0, 1 and INT_MAX for magic directory entries */
-        if (hash < 1)
-                hash += 2;
-        if (hash >= INT_MAX)
-                hash = INT_MAX - 1;
-        return hash;
-}
-static int sysfs_name_compare(unsigned int hash, const char *name,
-                              const void *ns, const struct sysfs_dirent *sd)
-{
-        if (hash != sd->s_hash)
-                return hash - sd->s_hash;
-        if (ns != sd->s_ns)
-                return ns - sd->s_ns;
-        return strcmp(name, sd->s_name);
-}
-static int sysfs_sd_compare(const struct sysfs_dirent *left,
-                            const struct sysfs_dirent *right)
-{
-        return sysfs_name_compare(left->s_hash, left->s_name, left->s_ns,
-                                  right);
-}
-/**
- *      sysfs_link_sibling - link sysfs_dirent into sibling rbtree
- *      @sd: sysfs_dirent of interest
- *
- *      Link @sd into its sibling rbtree which starts from
- *      sd->s_parent->s_dir.children.
- *
- *      Locking:
- *      mutex_lock(sysfs_mutex)
- *
- *      RETURNS:
- *      0 on susccess -EEXIST on failure.
- */
-static int sysfs_link_sibling(struct sysfs_dirent *sd)
-{
-        struct rb_node **node = &sd->s_parent->s_dir.children.rb_node;
-        struct rb_node *parent = NULL;
-        if (sysfs_type(sd) == SYSFS_DIR)
-                sd->s_parent->s_dir.subdirs++;
-        while (*node) {
-                struct sysfs_dirent *pos;
-                int result;
-                pos = to_sysfs_dirent(*node);
-                parent = *node;
-                result = sysfs_sd_compare(sd, pos);
-                if (result < 0)
-                        node = &pos->s_rb.rb_left;
-                else if (result > 0)
-                        node = &pos->s_rb.rb_right;
-                else
-                        return -EEXIST;
-        }
-        /* add new node and rebalance the tree */
-        rb_link_node(&sd->s_rb, parent, node);
-        rb_insert_color(&sd->s_rb, &sd->s_parent->s_dir.children);
-        return 0;
-}
-/**
- *      sysfs_unlink_sibling - unlink sysfs_dirent from sibling rbtree
- *      @sd: sysfs_dirent of interest
- *
- *      Unlink @sd from its sibling rbtree which starts from
- *      sd->s_parent->s_dir.children.
- *
- *      Locking:
- *      mutex_lock(sysfs_mutex)
- */
-static void sysfs_unlink_sibling(struct sysfs_dirent *sd)
-{
-        if (sysfs_type(sd) == SYSFS_DIR)
-                sd->s_parent->s_dir.subdirs--;
-        rb_erase(&sd->s_rb, &sd->s_parent->s_dir.children);
-}
-/**
- *      sysfs_get_active - get an active reference to sysfs_dirent
- *      @sd: sysfs_dirent to get an active reference to
- *
- *      Get an active reference of @sd.  This function is noop if @sd
- *      is NULL.
- *
- *      RETURNS:
- *      Pointer to @sd on success, NULL on failure.
- */
-struct sysfs_dirent *sysfs_get_active(struct sysfs_dirent *sd)
-{
-        if (unlikely(!sd))
-                return NULL;
-        if (!atomic_inc_unless_negative(&sd->s_active))
-                return NULL;
-        if (likely(!sysfs_ignore_lockdep(sd)))
-                rwsem_acquire_read(&sd->dep_map, 0, 1, _RET_IP_);
-        return sd;
-}
-/**
- *      sysfs_put_active - put an active reference to sysfs_dirent
- *      @sd: sysfs_dirent to put an active reference to
- *
- *      Put an active reference to @sd.  This function is noop if @sd
- *      is NULL.
- */
-void sysfs_put_active(struct sysfs_dirent *sd)
-{
-        int v;
-        if (unlikely(!sd))
-                return;
-        if (likely(!sysfs_ignore_lockdep(sd)))
-                rwsem_release(&sd->dep_map, 1, _RET_IP_);
-        v = atomic_dec_return(&sd->s_active);
-        if (likely(v != SD_DEACTIVATED_BIAS))
-                return;
-        /* atomic_dec_return() is a mb(), we'll always see the updated
-         * sd->u.completion.
-         */
-        complete(sd->u.completion);
-}
-/**
- *      sysfs_deactivate - deactivate sysfs_dirent
- *      @sd: sysfs_dirent to deactivate
- *
- *      Deny new active references and drain existing ones.
- */
-static void sysfs_deactivate(struct sysfs_dirent *sd)
-{
-        DECLARE_COMPLETION_ONSTACK(wait);
-        int v;
-        BUG_ON(!(sd->s_flags & SYSFS_FLAG_REMOVED));
-        if (!(sysfs_type(sd) & SYSFS_ACTIVE_REF))
-                return;
-        sd->u.completion = (void *)&wait;
-        rwsem_acquire(&sd->dep_map, 0, 0, _RET_IP_);
-        /* atomic_add_return() is a mb(), put_active() will always see
-         * the updated sd->u.completion.
-         */
-        v = atomic_add_return(SD_DEACTIVATED_BIAS, &sd->s_active);
-        if (v != SD_DEACTIVATED_BIAS) {
-                lock_contended(&sd->dep_map, _RET_IP_);
-                wait_for_completion(&wait);
-        }
-        lock_acquired(&sd->dep_map, _RET_IP_);
-        rwsem_release(&sd->dep_map, 1, _RET_IP_);
-}
-static int sysfs_alloc_ino(unsigned int *pino)
-{
-        int ino, rc;
- retry:
-        spin_lock(&sysfs_ino_lock);
-        rc = ida_get_new_above(&sysfs_ino_ida, 2, &ino);
-        spin_unlock(&sysfs_ino_lock);
-        if (rc == -EAGAIN) {
-                if (ida_pre_get(&sysfs_ino_ida, GFP_KERNEL))
-                        goto retry;
-                rc = -ENOMEM;
-        }
-        *pino = ino;
-        return rc;
-}
-static void sysfs_free_ino(unsigned int ino)
-{
-        spin_lock(&sysfs_ino_lock);
-        ida_remove(&sysfs_ino_ida, ino);
-        spin_unlock(&sysfs_ino_lock);
-}
-void release_sysfs_dirent(struct sysfs_dirent *sd)
-{
-        struct sysfs_dirent *parent_sd;
- repeat:
-        /* Moving/renaming is always done while holding reference.
-         * sd->s_parent won't change beneath us.
-         */
-        parent_sd = sd->s_parent;
-        WARN(!(sd->s_flags & SYSFS_FLAG_REMOVED),
-                "sysfs: free using entry: %s/%s\n",
-                parent_sd ? parent_sd->s_name : "", sd->s_name);
-        if (sysfs_type(sd) == SYSFS_KOBJ_LINK)
-                sysfs_put(sd->s_symlink.target_sd);
-        if (sysfs_type(sd) & SYSFS_COPY_NAME)
-                kfree(sd->s_name);
-        if (sd->s_iattr && sd->s_iattr->ia_secdata)
-                security_release_secctx(sd->s_iattr->ia_secdata,
-                                        sd->s_iattr->ia_secdata_len);
-        kfree(sd->s_iattr);
-        sysfs_free_ino(sd->s_ino);
-        kmem_cache_free(sysfs_dir_cachep, sd);
-        sd = parent_sd;
-        if (sd && atomic_dec_and_test(&sd->s_count))
-                goto repeat;
-}
-static int sysfs_dentry_delete(const struct dentry *dentry)
-{
-        struct sysfs_dirent *sd = dentry->d_fsdata;
-        return !(sd && !(sd->s_flags & SYSFS_FLAG_REMOVED));
-}
-static int sysfs_dentry_revalidate(struct dentry *dentry, unsigned int flags)
-{
-        struct sysfs_dirent *sd;
-        int type;
-        if (flags & LOOKUP_RCU)
-                return -ECHILD;
-        sd = dentry->d_fsdata;
-        mutex_lock(&sysfs_mutex);
-        /* The sysfs dirent has been deleted */
-        if (sd->s_flags & SYSFS_FLAG_REMOVED)
-                goto out_bad;
-        /* The sysfs dirent has been moved? */
-        if (dentry->d_parent->d_fsdata != sd->s_parent)
-                goto out_bad;
-        /* The sysfs dirent has been renamed */
-        if (strcmp(dentry->d_name.name, sd->s_name) != 0)
-                goto out_bad;
-        /* The sysfs dirent has been moved to a different namespace */
-        type = KOBJ_NS_TYPE_NONE;
-        if (sd->s_parent) {
-                type = sysfs_ns_type(sd->s_parent);
-                if (type != KOBJ_NS_TYPE_NONE &&
-                                sysfs_info(dentry->d_sb)->ns[type] != sd->s_ns)
-                        goto out_bad;
-        }
-        mutex_unlock(&sysfs_mutex);
-out_valid:
-        return 1;
-out_bad:
-        /* Remove the dentry from the dcache hashes.
-         * If this is a deleted dentry we use d_drop instead of d_delete
-         * so sysfs doesn't need to cope with negative dentries.
-         *
-         * If this is a dentry that has simply been renamed we
-         * use d_drop to remove it from the dcache lookup on its
-         * old parent.  If this dentry persists later when a lookup
-         * is performed at its new name the dentry will be readded
-         * to the dcache hashes.
-         */
-        mutex_unlock(&sysfs_mutex);
-        /* If we have submounts we must allow the vfs caches
-         * to lie about the state of the filesystem to prevent
-         * leaks and other nasty things.
-         */
-        if (check_submounts_and_drop(dentry) != 0)
-                goto out_valid;
-        return 0;
-}
-static void sysfs_dentry_release(struct dentry *dentry)
-{
-        sysfs_put(dentry->d_fsdata);
-}
-const struct dentry_operations sysfs_dentry_ops = {
-        .d_revalidate   = sysfs_dentry_revalidate,
-        .d_delete       = sysfs_dentry_delete,
-        .d_release      = sysfs_dentry_release,
-};
-struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type)
-{
-        char *dup_name = NULL;
-        struct sysfs_dirent *sd;
-        if (type & SYSFS_COPY_NAME) {
-                name = dup_name = kstrdup(name, GFP_KERNEL);
-                if (!name)
-                        return NULL;
-        }
-        sd = kmem_cache_zalloc(sysfs_dir_cachep, GFP_KERNEL);
-        if (!sd)
-                goto err_out1;
-        if (sysfs_alloc_ino(&sd->s_ino))
-                goto err_out2;
-        atomic_set(&sd->s_count, 1);
-        atomic_set(&sd->s_active, 0);
-        sd->s_name = name;
-        sd->s_mode = mode;
-        sd->s_flags = type | SYSFS_FLAG_REMOVED;
-        return sd;
- err_out2:
-        kmem_cache_free(sysfs_dir_cachep, sd);
- err_out1:
-        kfree(dup_name);
-        return NULL;
-}
-/**
- *      sysfs_addrm_start - prepare for sysfs_dirent add/remove
- *      @acxt: pointer to sysfs_addrm_cxt to be used
- *
- *      This function is called when the caller is about to add or remove
- *      sysfs_dirent.  This function acquires sysfs_mutex.  @acxt is used
- *      to keep and pass context to other addrm functions.
- *
- *      LOCKING:
- *      Kernel thread context (may sleep).  sysfs_mutex is locked on
- *      return.
- */
-void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt)
-        __acquires(sysfs_mutex)
-{
-        memset(acxt, 0, sizeof(*acxt));
-        mutex_lock(&sysfs_mutex);
-}
-/**
- *      __sysfs_add_one - add sysfs_dirent to parent without warning
- *      @acxt: addrm context to use
- *      @sd: sysfs_dirent to be added
- *      @parent_sd: the parent sysfs_dirent to add @sd to
- *
- *      Get @parent_sd and set @sd->s_parent to it and increment nlink of
- *      the parent inode if @sd is a directory and link into the children
- *      list of the parent.
- *
- *      This function should be called between calls to
- *      sysfs_addrm_start() and sysfs_addrm_finish() and should be
- *      passed the same @acxt as passed to sysfs_addrm_start().
- *
- *      LOCKING:
- *      Determined by sysfs_addrm_start().
- *
- *      RETURNS:
- *      0 on success, -EEXIST if entry with the given name already
- *      exists.
- */
-int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd,
-                    struct sysfs_dirent *parent_sd)
-{
-        struct sysfs_inode_attrs *ps_iattr;
-        int ret;
-        if (!!sysfs_ns_type(parent_sd) != !!sd->s_ns) {
-                WARN(1, KERN_WARNING "sysfs: ns %s in '%s' for '%s'\n",
-                        sysfs_ns_type(parent_sd) ? "required" : "invalid",
-                        parent_sd->s_name, sd->s_name);
-                return -EINVAL;
-        }
-        sd->s_hash = sysfs_name_hash(sd->s_name, sd->s_ns);
-        sd->s_parent = sysfs_get(parent_sd);
-        ret = sysfs_link_sibling(sd);
-        if (ret)
-                return ret;
-        /* Update timestamps on the parent */
-        ps_iattr = parent_sd->s_iattr;
-        if (ps_iattr) {
-                struct iattr *ps_iattrs = &ps_iattr->ia_iattr;
-                ps_iattrs->ia_ctime = ps_iattrs->ia_mtime = CURRENT_TIME;
-        }
-        /* Mark the entry added into directory tree */
-        sd->s_flags &= ~SYSFS_FLAG_REMOVED;
-        return 0;
-}
 /**
 *      sysfs_pathname - return full path to sysfs dirent
 *      @sd: sysfs_dirent whose path we want
@@ -489,445 +55,33 @@ void sysfs_warn_dup(struct sysfs_dirent *parent, const char *name)
 }
 /**
- *      sysfs_add_one - add sysfs_dirent to parent
- *      @acxt: addrm context to use
- *      @sd: sysfs_dirent to be added
- *      @parent_sd: the parent sysfs_dirent to add @sd to
- *
- *      Get @parent_sd and set @sd->s_parent to it and increment nlink of
- *      the parent inode if @sd is a directory and link into the children
- *      list of the parent.
- *
- *      This function should be called between calls to
- *      sysfs_addrm_start() and sysfs_addrm_finish() and should be
- *      passed the same @acxt as passed to sysfs_addrm_start().
- *
- *      LOCKING:
- *      Determined by sysfs_addrm_start().
- *
- *      RETURNS:
- *      0 on success, -EEXIST if entry with the given name already
- *      exists.
- */
-int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd,
-                  struct sysfs_dirent *parent_sd)
-{
-        int ret;
-        ret = __sysfs_add_one(acxt, sd, parent_sd);
-        if (ret == -EEXIST)
-                sysfs_warn_dup(parent_sd, sd->s_name);
-        return ret;
-}
-/**
- *      sysfs_remove_one - remove sysfs_dirent from parent
- *      @acxt: addrm context to use
- *      @sd: sysfs_dirent to be removed
- *
- *      Mark @sd removed and drop nlink of parent inode if @sd is a
- *      directory.  @sd is unlinked from the children list.
- *
- *      This function should be called between calls to
- *      sysfs_addrm_start() and sysfs_addrm_finish() and should be
- *      passed the same @acxt as passed to sysfs_addrm_start().
- *
- *      LOCKING:
- *      Determined by sysfs_addrm_start().
- */
-static void sysfs_remove_one(struct sysfs_addrm_cxt *acxt,
-                             struct sysfs_dirent *sd)
-{
-        struct sysfs_inode_attrs *ps_iattr;
-        /*
-         * Removal can be called multiple times on the same node.  Only the
-         * first invocation is effective and puts the base ref.
-         */
-        if (sd->s_flags & SYSFS_FLAG_REMOVED)
-                return;
-        sysfs_unlink_sibling(sd);
-        /* Update timestamps on the parent */
-        ps_iattr = sd->s_parent->s_iattr;
-        if (ps_iattr) {
-                struct iattr *ps_iattrs = &ps_iattr->ia_iattr;
-                ps_iattrs->ia_ctime = ps_iattrs->ia_mtime = CURRENT_TIME;
-        }
-        sd->s_flags |= SYSFS_FLAG_REMOVED;
-        sd->u.removed_list = acxt->removed;
-        acxt->removed = sd;
-}
-/**
- *      sysfs_addrm_finish - finish up sysfs_dirent add/remove
- *      @acxt: addrm context to finish up
- *
- *      Finish up sysfs_dirent add/remove.  Resources acquired by
- *      sysfs_addrm_start() are released and removed sysfs_dirents are
- *      cleaned up.
- *
- *      LOCKING:
- *      sysfs_mutex is released.
- */
-void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt)
-        __releases(sysfs_mutex)
-{
-        /* release resources acquired by sysfs_addrm_start() */
-        mutex_unlock(&sysfs_mutex);
-        /* kill removed sysfs_dirents */
-        while (acxt->removed) {
-                struct sysfs_dirent *sd = acxt->removed;
-                acxt->removed = sd->u.removed_list;
-                sysfs_deactivate(sd);
-                sysfs_unmap_bin_file(sd);
-                sysfs_put(sd);
-        }
-}
-/**
- *      sysfs_find_dirent - find sysfs_dirent with the given name
- *      @parent_sd: sysfs_dirent to search under
- *      @name: name to look for
- *      @ns: the namespace tag to use
- *
- *      Look for sysfs_dirent with name @name under @parent_sd.
- *
- *      LOCKING:
- *      mutex_lock(sysfs_mutex)
- *
- *      RETURNS:
- *      Pointer to sysfs_dirent if found, NULL if not.
- */
-struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
-                                       const unsigned char *name,
-                                       const void *ns)
-{
-        struct rb_node *node = parent_sd->s_dir.children.rb_node;
-        unsigned int hash;
-        if (!!sysfs_ns_type(parent_sd) != !!ns) {
-                WARN(1, KERN_WARNING "sysfs: ns %s in '%s' for '%s'\n",
-                        sysfs_ns_type(parent_sd) ? "required" : "invalid",
-                        parent_sd->s_name, name);
-                return NULL;
-        }
-        hash = sysfs_name_hash(name, ns);
-        while (node) {
-                struct sysfs_dirent *sd;
-                int result;
-                sd = to_sysfs_dirent(node);
-                result = sysfs_name_compare(hash, name, ns, sd);
-                if (result < 0)
-                        node = node->rb_left;
-                else if (result > 0)
-                        node = node->rb_right;
-                else
-                        return sd;
-        }
-        return NULL;
-}
-/**
- *      sysfs_get_dirent_ns - find and get sysfs_dirent with the given name
- *      @parent_sd: sysfs_dirent to search under
- *      @name: name to look for
- *      @ns: the namespace tag to use
- *
- *      Look for sysfs_dirent with name @name under @parent_sd and get
- *      it if found.
- *
- *      LOCKING:
- *      Kernel thread context (may sleep).  Grabs sysfs_mutex.
- *
- *      RETURNS:
- *      Pointer to sysfs_dirent if found, NULL if not.
- */
-struct sysfs_dirent *sysfs_get_dirent_ns(struct sysfs_dirent *parent_sd,
-                                         const unsigned char *name,
-                                         const void *ns)
-{
-        struct sysfs_dirent *sd;
-        mutex_lock(&sysfs_mutex);
-        sd = sysfs_find_dirent(parent_sd, name, ns);
-        sysfs_get(sd);
-        mutex_unlock(&sysfs_mutex);
-        return sd;
-}
-EXPORT_SYMBOL_GPL(sysfs_get_dirent_ns);
-static int create_dir(struct kobject *kobj, struct sysfs_dirent *parent_sd,
-                      enum kobj_ns_type type,
-                      const char *name, const void *ns,
-                      struct sysfs_dirent **p_sd)
-{
-        umode_t mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO;
-        struct sysfs_addrm_cxt acxt;
-        struct sysfs_dirent *sd;
-        int rc;
-        /* allocate */
-        sd = sysfs_new_dirent(name, mode, SYSFS_DIR);
-        if (!sd)
-                return -ENOMEM;
-        sd->s_flags |= (type << SYSFS_NS_TYPE_SHIFT);
-        sd->s_ns = ns;
-        sd->s_dir.kobj = kobj;
-        /* link in */
-        sysfs_addrm_start(&acxt);
-        rc = sysfs_add_one(&acxt, sd, parent_sd);
-        sysfs_addrm_finish(&acxt);
-        if (rc == 0)
-                *p_sd = sd;
-        else
-                sysfs_put(sd);
-        return rc;
-}
-int sysfs_create_subdir(struct kobject *kobj, const char *name,
-                        struct sysfs_dirent **p_sd)
-{
-        return create_dir(kobj, kobj->sd,
-                          KOBJ_NS_TYPE_NONE, name, NULL, p_sd);
-}
-/**
- *      sysfs_read_ns_type: return associated ns_type
- *      @kobj: the kobject being queried
- *
- *      Each kobject can be tagged with exactly one namespace type
- *      (i.e. network or user).  Return the ns_type associated with
- *      this object if any
- */
-static enum kobj_ns_type sysfs_read_ns_type(struct kobject *kobj)
-{
-        const struct kobj_ns_type_operations *ops;
-        enum kobj_ns_type type;
-        ops = kobj_child_ns_ops(kobj);
-        if (!ops)
-                return KOBJ_NS_TYPE_NONE;
-        type = ops->type;
-        BUG_ON(type <= KOBJ_NS_TYPE_NONE);
-        BUG_ON(type >= KOBJ_NS_TYPES);
-        BUG_ON(!kobj_ns_type_registered(type));
-        return type;
-}
-/**
 * sysfs_create_dir_ns - create a directory for an object with a namespace tag
 * @kobj: object we're creating directory for
 * @ns: the namespace tag to use
 */
 int sysfs_create_dir_ns(struct kobject *kobj, const void *ns)
 {
-        enum kobj_ns_type type;
        struct sysfs_dirent *parent_sd, *sd;
-        int error = 0;
        BUG_ON(!kobj);
        if (kobj->parent)
                parent_sd = kobj->parent->sd;
        else
-                parent_sd = &sysfs_root;
+                parent_sd = sysfs_root_sd;
        if (!parent_sd)
                return -ENOENT;
-        type = sysfs_read_ns_type(kobj);
+        sd = kernfs_create_dir_ns(parent_sd, kobject_name(kobj), kobj, ns);
+        if (IS_ERR(sd)) {
-        error = create_dir(kobj, parent_sd, type, kobject_name(kobj), ns, &sd);
+                if (PTR_ERR(sd) == -EEXIST)
-        if (!error)
+                        sysfs_warn_dup(parent_sd, kobject_name(kobj));
-                kobj->sd = sd;
+                return PTR_ERR(sd);
-        return error;
-}
-static struct dentry *sysfs_lookup(struct inode *dir, struct dentry *dentry,
-                                   unsigned int flags)
-{
-        struct dentry *ret = NULL;
-        struct dentry *parent = dentry->d_parent;
-        struct sysfs_dirent *parent_sd = parent->d_fsdata;
-        struct sysfs_dirent *sd;
-        struct inode *inode;
-        enum kobj_ns_type type;
-        const void *ns;
-        mutex_lock(&sysfs_mutex);
-        type = sysfs_ns_type(parent_sd);
-        ns = sysfs_info(dir->i_sb)->ns[type];
-        sd = sysfs_find_dirent(parent_sd, dentry->d_name.name, ns);
-        /* no such entry */
-        if (!sd) {
-                ret = ERR_PTR(-ENOENT);
-                goto out_unlock;
-        }
-        dentry->d_fsdata = sysfs_get(sd);
-        /* attach dentry and inode */
-        inode = sysfs_get_inode(dir->i_sb, sd);
-        if (!inode) {
-                ret = ERR_PTR(-ENOMEM);
-                goto out_unlock;
        }
-        /* instantiate and hash dentry */
+        kobj->sd = sd;
-        ret = d_materialise_unique(dentry, inode);
+        return 0;
- out_unlock:
-        mutex_unlock(&sysfs_mutex);
-        return ret;
-}
-const struct inode_operations sysfs_dir_inode_operations = {
-        .lookup         = sysfs_lookup,
-        .permission     = sysfs_permission,
-        .setattr        = sysfs_setattr,
-        .getattr        = sysfs_getattr,
-        .setxattr       = sysfs_setxattr,
-};
-static struct sysfs_dirent *sysfs_leftmost_descendant(struct sysfs_dirent *pos)
-{
-        struct sysfs_dirent *last;
-        while (true) {
-                struct rb_node *rbn;
-                last = pos;
-                if (sysfs_type(pos) != SYSFS_DIR)
-                        break;
-                rbn = rb_first(&pos->s_dir.children);
-                if (!rbn)
-                        break;
-                pos = to_sysfs_dirent(rbn);
-        }
-        return last;
-}
-/**
- * sysfs_next_descendant_post - find the next descendant for post-order walk
- * @pos: the current position (%NULL to initiate traversal)
- * @root: sysfs_dirent whose descendants to walk
- *
- * Find the next descendant to visit for post-order traversal of @root's
- * descendants.  @root is included in the iteration and the last node to be
- * visited.
- */
-static struct sysfs_dirent *sysfs_next_descendant_post(struct sysfs_dirent *pos,
-                                                       struct sysfs_dirent *root)
-{
-        struct rb_node *rbn;
-        lockdep_assert_held(&sysfs_mutex);
-        /* if first iteration, visit leftmost descendant which may be root */
-        if (!pos)
-                return sysfs_leftmost_descendant(root);
-        /* if we visited @root, we're done */
-        if (pos == root)
-                return NULL;
-        /* if there's an unvisited sibling, visit its leftmost descendant */
-        rbn = rb_next(&pos->s_rb);
-        if (rbn)
-                return sysfs_leftmost_descendant(to_sysfs_dirent(rbn));
-        /* no sibling left, visit parent */
-        return pos->s_parent;
-}
-static void __sysfs_remove(struct sysfs_addrm_cxt *acxt,
-                           struct sysfs_dirent *sd)
-{
-        struct sysfs_dirent *pos, *next;
-        if (!sd)
-                return;
-        pr_debug("sysfs %s: removing\n", sd->s_name);
-        next = NULL;
-        do {
-                pos = next;
-                next = sysfs_next_descendant_post(pos, sd);
-                if (pos)
-                        sysfs_remove_one(acxt, pos);
-        } while (next);
-}
-/**
- * sysfs_remove - remove a sysfs_dirent recursively
- * @sd: the sysfs_dirent to remove
- *
- * Remove @sd along with all its subdirectories and files.
- */
-void sysfs_remove(struct sysfs_dirent *sd)
-{
-        struct sysfs_addrm_cxt acxt;
-        sysfs_addrm_start(&acxt);
-        __sysfs_remove(&acxt, sd);
-        sysfs_addrm_finish(&acxt);
-}
-/**
- * sysfs_hash_and_remove - find a sysfs_dirent by name and remove it
- * @dir_sd: parent of the target
- * @name: name of the sysfs_dirent to remove
- * @ns: namespace tag of the sysfs_dirent to remove
- *
- * Look for the sysfs_dirent with @name and @ns under @dir_sd and remove
- * it.  Returns 0 on success, -ENOENT if such entry doesn't exist.
- */
-int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name,
-                          const void *ns)
-{
-        struct sysfs_addrm_cxt acxt;
-        struct sysfs_dirent *sd;
-        if (!dir_sd) {
-                WARN(1, KERN_WARNING "sysfs: can not remove '%s', no directory\n",
-                        name);
-                return -ENOENT;
-        }
-        sysfs_addrm_start(&acxt);
-        sd = sysfs_find_dirent(dir_sd, name, ns);
-        if (sd)
-                __sysfs_remove(&acxt, sd);
-        sysfs_addrm_finish(&acxt);
-        if (sd)
-                return 0;
-        else
-                return -ENOENT;
 }
 /**
@@ -960,60 +114,16 @@ void sysfs_remove_dir(struct kobject *kobj)
        if (sd) {
                WARN_ON_ONCE(sysfs_type(sd) != SYSFS_DIR);
-                sysfs_remove(sd);
+                kernfs_remove(sd);
        }
 }
-int sysfs_rename(struct sysfs_dirent *sd, struct sysfs_dirent *new_parent_sd,
-                 const char *new_name, const void *new_ns)
-{
-        int error;
-        mutex_lock(&sysfs_mutex);
-        error = 0;
-        if ((sd->s_parent == new_parent_sd) && (sd->s_ns == new_ns) &&
-            (strcmp(sd->s_name, new_name) == 0))
-                goto out;       /* nothing to rename */
-        error = -EEXIST;
-        if (sysfs_find_dirent(new_parent_sd, new_name, new_ns))
-                goto out;
-        /* rename sysfs_dirent */
-        if (strcmp(sd->s_name, new_name) != 0) {
-                error = -ENOMEM;
-                new_name = kstrdup(new_name, GFP_KERNEL);
-                if (!new_name)
-                        goto out;
-                kfree(sd->s_name);
-                sd->s_name = new_name;
-        }
-        /*
-         * Move to the appropriate place in the appropriate directories rbtree.
-         */
-        sysfs_unlink_sibling(sd);
-        sysfs_get(new_parent_sd);
-        sysfs_put(sd->s_parent);
-        sd->s_ns = new_ns;
-        sd->s_hash = sysfs_name_hash(sd->s_name, sd->s_ns);
-        sd->s_parent = new_parent_sd;
-        sysfs_link_sibling(sd);
-        error = 0;
- out:
-        mutex_unlock(&sysfs_mutex);
-        return error;
-}
 int sysfs_rename_dir_ns(struct kobject *kobj, const char *new_name,
                        const void *new_ns)
 {
        struct sysfs_dirent *parent_sd = kobj->sd->s_parent;
-        return sysfs_rename(kobj->sd, parent_sd, new_name, new_ns);
+        return kernfs_rename_ns(kobj->sd, parent_sd, new_name, new_ns);
 }
 int sysfs_move_dir_ns(struct kobject *kobj, struct kobject *new_parent_kobj,
@@ -1024,123 +134,7 @@ int sysfs_move_dir_ns(struct kobject *kobj, struct kobject *new_parent_kobj,
        BUG_ON(!sd->s_parent);
        new_parent_sd = new_parent_kobj && new_parent_kobj->sd ?
-                new_parent_kobj->sd : &sysfs_root;
+                new_parent_kobj->sd : sysfs_root_sd;
-        return sysfs_rename(sd, new_parent_sd, sd->s_name, new_ns);
+        return kernfs_rename_ns(sd, new_parent_sd, sd->s_name, new_ns);
 }
-/* Relationship between s_mode and the DT_xxx types */
-static inline unsigned char dt_type(struct sysfs_dirent *sd)
-{
-        return (sd->s_mode >> 12) & 15;
-}
-static int sysfs_dir_release(struct inode *inode, struct file *filp)
-{
-        sysfs_put(filp->private_data);
-        return 0;
-}
-static struct sysfs_dirent *sysfs_dir_pos(const void *ns,
-        struct sysfs_dirent *parent_sd, loff_t hash, struct sysfs_dirent *pos)
-{
-        if (pos) {
-                int valid = !(pos->s_flags & SYSFS_FLAG_REMOVED) &&
-                        pos->s_parent == parent_sd &&
-                        hash == pos->s_hash;
-                sysfs_put(pos);
-                if (!valid)
-                        pos = NULL;
-        }
-        if (!pos && (hash > 1) && (hash < INT_MAX)) {
-                struct rb_node *node = parent_sd->s_dir.children.rb_node;
-                while (node) {
-                        pos = to_sysfs_dirent(node);
-                        if (hash < pos->s_hash)
-                                node = node->rb_left;
-                        else if (hash > pos->s_hash)
-                                node = node->rb_right;
-                        else
-                                break;
-                }
-        }
-        /* Skip over entries in the wrong namespace */
-        while (pos && pos->s_ns != ns) {
-                struct rb_node *node = rb_next(&pos->s_rb);
-                if (!node)
-                        pos = NULL;
-                else
-                        pos = to_sysfs_dirent(node);
-        }
-        return pos;
-}
-static struct sysfs_dirent *sysfs_dir_next_pos(const void *ns,
-        struct sysfs_dirent *parent_sd, ino_t ino, struct sysfs_dirent *pos)
-{
-        pos = sysfs_dir_pos(ns, parent_sd, ino, pos);
-        if (pos)
-                do {
-                        struct rb_node *node = rb_next(&pos->s_rb);
-                        if (!node)
-                                pos = NULL;
-                        else
-                                pos = to_sysfs_dirent(node);
-                } while (pos && pos->s_ns != ns);
-        return pos;
-}
-static int sysfs_readdir(struct file *file, struct dir_context *ctx)
-{
-        struct dentry *dentry = file->f_path.dentry;
-        struct sysfs_dirent *parent_sd = dentry->d_fsdata;
-        struct sysfs_dirent *pos = file->private_data;
-        enum kobj_ns_type type;
-        const void *ns;
-        type = sysfs_ns_type(parent_sd);
-        ns = sysfs_info(dentry->d_sb)->ns[type];
-        if (!dir_emit_dots(file, ctx))
-                return 0;
-        mutex_lock(&sysfs_mutex);
-        for (pos = sysfs_dir_pos(ns, parent_sd, ctx->pos, pos);
-             pos;
-             pos = sysfs_dir_next_pos(ns, parent_sd, ctx->pos, pos)) {
-                const char *name = pos->s_name;
-                unsigned int type = dt_type(pos);
-                int len = strlen(name);
-                ino_t ino = pos->s_ino;
-                ctx->pos = pos->s_hash;
-                file->private_data = sysfs_get(pos);
-                mutex_unlock(&sysfs_mutex);
-                if (!dir_emit(ctx, name, len, ino, type))
-                        return 0;
-                mutex_lock(&sysfs_mutex);
-        }
-        mutex_unlock(&sysfs_mutex);
-        file->private_data = NULL;
-        ctx->pos = INT_MAX;
-        return 0;
-}
-static loff_t sysfs_dir_llseek(struct file *file, loff_t offset, int whence)
-{
-        struct inode *inode = file_inode(file);
-        loff_t ret;
-        mutex_lock(&inode->i_mutex);
-        ret = generic_file_llseek(file, offset, whence);
-        mutex_unlock(&inode->i_mutex);
-        return ret;
-}
-const struct file_operations sysfs_dir_operations = {
-        .read           = generic_read_dir,
-        .iterate        = sysfs_readdir,
-        .release        = sysfs_dir_release,
-        .llseek         = sysfs_dir_llseek,
-};
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 35e7d08fe629..ac77d2be3c31 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -14,59 +14,12 @@
 #include <linux/kobject.h>
 #include <linux/kallsyms.h>
 #include <linux/slab.h>
-#include <linux/fsnotify.h>
-#include <linux/namei.h>
-#include <linux/poll.h>
 #include <linux/list.h>
 #include <linux/mutex.h>
-#include <linux/limits.h>
-#include <linux/uaccess.h>
 #include <linux/seq_file.h>
-#include <linux/mm.h>
 #include "sysfs.h"
+#include "../kernfs/kernfs-internal.h"
-/*
- * There's one sysfs_open_file for each open file and one sysfs_open_dirent
- * for each sysfs_dirent with one or more open files.
- *
- * sysfs_dirent->s_attr.open points to sysfs_open_dirent.  s_attr.open is
- * protected by sysfs_open_dirent_lock.
- *
- * filp->private_data points to seq_file whose ->private points to
- * sysfs_open_file.  sysfs_open_files are chained at
- * sysfs_open_dirent->files, which is protected by sysfs_open_file_mutex.
- */
-static DEFINE_SPINLOCK(sysfs_open_dirent_lock);
-static DEFINE_MUTEX(sysfs_open_file_mutex);
-struct sysfs_open_dirent {
-        atomic_t                refcnt;
-        atomic_t                event;
-        wait_queue_head_t       poll;
-        struct list_head        files; /* goes through sysfs_open_file.list */
-};
-struct sysfs_open_file {
-        struct sysfs_dirent     *sd;
-        struct file             *file;
-        struct mutex            mutex;
-        int                     event;
-        struct list_head        list;
-        bool                    mmapped;
-        const struct vm_operations_struct *vm_ops;
-};
-static bool sysfs_is_bin(struct sysfs_dirent *sd)
-{
-        return sysfs_type(sd) == SYSFS_KOBJ_BIN_ATTR;
-}
-static struct sysfs_open_file *sysfs_of(struct file *file)
-{
-        return ((struct seq_file *)file->private_data)->private;
-}
 /*
 * Determine ktype->sysfs_ops for the given sysfs_dirent.  This function
@@ -74,9 +27,9 @@ static struct sysfs_open_file *sysfs_of(struct file *file)
 */
 static const struct sysfs_ops *sysfs_file_ops(struct sysfs_dirent *sd)
 {
-        struct kobject *kobj = sd->s_parent->s_dir.kobj;
+        struct kobject *kobj = sd->s_parent->priv;
-        if (!sysfs_ignore_lockdep(sd))
+        if (sd->s_flags & SYSFS_FLAG_LOCKDEP)
                lockdep_assert_held(sd);
        return kobj->ktype ? kobj->ktype->sysfs_ops : NULL;
 }
@@ -86,13 +39,13 @@ static const struct sysfs_ops *sysfs_file_ops(struct sysfs_dirent *sd)
 * details like buffering and seeking.  The following function pipes
 * sysfs_ops->show() result through seq_file.
 */
-static int sysfs_seq_show(struct seq_file *sf, void *v)
+static int sysfs_kf_seq_show(struct seq_file *sf, void *v)
 {
        struct sysfs_open_file *of = sf->private;
-        struct kobject *kobj = of->sd->s_parent->s_dir.kobj;
+        struct kobject *kobj = of->sd->s_parent->priv;
-        const struct sysfs_ops *ops;
+        const struct sysfs_ops *ops = sysfs_file_ops(of->sd);
-        char *buf;
        ssize_t count;
+        char *buf;
        /* acquire buffer and ensure that it's >= PAGE_SIZE */
        count = seq_get_buf(sf, &buf);
@@ -102,34 +55,15 @@ static int sysfs_seq_show(struct seq_file *sf, void *v)
        }
        /*
-         * Need @of->sd for attr and ops, its parent for kobj.  @of->mutex
+         * Invoke show().  Control may reach here via seq file lseek even
-         * nests outside active ref and is just to ensure that the ops
+         * if @ops->show() isn't implemented.
-         * aren't called concurrently for the same open file.
         */
-        mutex_lock(&of->mutex);
+        if (ops->show) {
-        if (!sysfs_get_active(of->sd)) {
+                count = ops->show(kobj, of->sd->priv, buf);
-                mutex_unlock(&of->mutex);
+                if (count < 0)
-                return -ENODEV;
+                        return count;
        }
-        of->event = atomic_read(&of->sd->s_attr.open->event);
-        /*
-         * Lookup @ops and invoke show().  Control may reach here via seq
-         * file lseek even if @ops->show() isn't implemented.
-         */
-        ops = sysfs_file_ops(of->sd);
-        if (ops->show)
-                count = ops->show(kobj, of->sd->s_attr.attr, buf);
-        else
-                count = 0;
-        sysfs_put_active(of->sd);
-        mutex_unlock(&of->mutex);
-        if (count < 0)
-                return count;
        /*
         * The code works fine with PAGE_SIZE return but it's likely to
         * indicate truncated result or overflow in normal use cases.
@@ -144,726 +78,190 @@ static int sysfs_seq_show(struct seq_file *sf, void *v)
        return 0;
 }
-/*
+static ssize_t sysfs_kf_bin_read(struct sysfs_open_file *of, char *buf,
- * Read method for bin files.  As reading a bin file can have side-effects,
+                                 size_t count, loff_t pos)
- * the exact offset and bytes specified in read(2) call should be passed to
- * the read callback making it difficult to use seq_file.  Implement
- * simplistic custom buffering for bin files.
- */
-static ssize_t sysfs_bin_read(struct file *file, char __user *userbuf,
-                              size_t bytes, loff_t *off)
 {
-        struct sysfs_open_file *of = sysfs_of(file);
+        struct bin_attribute *battr = of->sd->priv;
-        struct bin_attribute *battr = of->sd->s_attr.bin_attr;
+        struct kobject *kobj = of->sd->s_parent->priv;
-        struct kobject *kobj = of->sd->s_parent->s_dir.kobj;
+        loff_t size = file_inode(of->file)->i_size;
-        loff_t size = file_inode(file)->i_size;
-        int count = min_t(size_t, bytes, PAGE_SIZE);
-        loff_t offs = *off;
-        char *buf;
-        if (!bytes)
+        if (!count)
                return 0;
        if (size) {
-                if (offs > size)
+                if (pos > size)
                        return 0;
-                if (offs + count > size)
+                if (pos + count > size)
-                        count = size - offs;
+                        count = size - pos;
-        }
-        buf = kmalloc(count, GFP_KERNEL);
-        if (!buf)
-                return -ENOMEM;
-        /* need of->sd for battr, its parent for kobj */
-        mutex_lock(&of->mutex);
-        if (!sysfs_get_active(of->sd)) {
-                count = -ENODEV;
-                mutex_unlock(&of->mutex);
-                goto out_free;
        }
-        if (battr->read)
+        if (!battr->read)
-                count = battr->read(file, kobj, battr, buf, offs, count);
+                return -EIO;
-        else
-                count = -EIO;
-        sysfs_put_active(of->sd);
-        mutex_unlock(&of->mutex);
-        if (count < 0)
-                goto out_free;
-        if (copy_to_user(userbuf, buf, count)) {
-                count = -EFAULT;
-                goto out_free;
-        }
-        pr_debug("offs = %lld, *off = %lld, count = %d\n", offs, *off, count);
-        *off = offs + count;
+        return battr->read(of->file, kobj, battr, buf, pos, count);
- out_free:
-        kfree(buf);
-        return count;
 }
-/**
+/* kernfs write callback for regular sysfs files */
- * flush_write_buffer - push buffer to kobject
+static ssize_t sysfs_kf_write(struct sysfs_open_file *of, char *buf,
- * @of: open file
+                              size_t count, loff_t pos)
- * @buf: data buffer for file
- * @off: file offset to write to
- * @count: number of bytes
- *
- * Get the correct pointers for the kobject and the attribute we're dealing
- * with, then call the store() method for it with @buf.
- */
-static int flush_write_buffer(struct sysfs_open_file *of, char *buf, loff_t off,
-                              size_t count)
 {
-        struct kobject *kobj = of->sd->s_parent->s_dir.kobj;
+        const struct sysfs_ops *ops = sysfs_file_ops(of->sd);
-        int rc = 0;
+        struct kobject *kobj = of->sd->s_parent->priv;
-        /*
-         * Need @of->sd for attr and ops, its parent for kobj.  @of->mutex
-         * nests outside active ref and is just to ensure that the ops
-         * aren't called concurrently for the same open file.
-         */
-        mutex_lock(&of->mutex);
-        if (!sysfs_get_active(of->sd)) {
-                mutex_unlock(&of->mutex);
-                return -ENODEV;
-        }
-        if (sysfs_is_bin(of->sd)) {
-                struct bin_attribute *battr = of->sd->s_attr.bin_attr;
-                rc = -EIO;
+        if (!count)
-                if (battr->write)
+                return 0;
-                        rc = battr->write(of->file, kobj, battr, buf, off,
-                                          count);
-        } else {
-                const struct sysfs_ops *ops = sysfs_file_ops(of->sd);
-                rc = ops->store(kobj, of->sd->s_attr.attr, buf, count);
-        }
-        sysfs_put_active(of->sd);
-        mutex_unlock(&of->mutex);
-        return rc;
+        return ops->store(kobj, of->sd->priv, buf, count);
 }
-/**
+/* kernfs write callback for bin sysfs files */
- * sysfs_write_file - write an attribute
+static ssize_t sysfs_kf_bin_write(struct sysfs_open_file *of, char *buf,
- * @file: file pointer
+                                  size_t count, loff_t pos)
- * @user_buf: data to write
- * @count: number of bytes
- * @ppos: starting offset
- *
- * Copy data in from userland and pass it to the matching
- * sysfs_ops->store() by invoking flush_write_buffer().
- *
- * There is no easy way for us to know if userspace is only doing a partial
- * write, so we don't support them. We expect the entire buffer to come on
- * the first write.  Hint: if you're writing a value, first read the file,
- * modify only the the value you're changing, then write entire buffer
- * back.
- */
-static ssize_t sysfs_write_file(struct file *file, const char __user *user_buf,
-                                size_t count, loff_t *ppos)
 {
-        struct sysfs_open_file *of = sysfs_of(file);
+        struct bin_attribute *battr = of->sd->priv;
-        ssize_t len = min_t(size_t, count, PAGE_SIZE);
+        struct kobject *kobj = of->sd->s_parent->priv;
-        loff_t size = file_inode(file)->i_size;
+        loff_t size = file_inode(of->file)->i_size;
-        char *buf;
-        if (sysfs_is_bin(of->sd) && size) {
+        if (size) {
-                if (size <= *ppos)
+                if (size <= pos)
                        return 0;
-                len = min_t(ssize_t, len, size - *ppos);
+                count = min_t(ssize_t, count, size - pos);
        }
+        if (!count)
-        if (!len)
                return 0;
-        buf = kmalloc(len + 1, GFP_KERNEL);
+        if (!battr->write)
-        if (!buf)
+                return -EIO;
-                return -ENOMEM;
-        if (copy_from_user(buf, user_buf, len)) {
+        return battr->write(of->file, kobj, battr, buf, pos, count);
-                len = -EFAULT;
-                goto out_free;
-        }
-        buf[len] = '\0';        /* guarantee string termination */
-        len = flush_write_buffer(of, buf, *ppos, len);
-        if (len > 0)
-                *ppos += len;
-out_free:
-        kfree(buf);
-        return len;
 }
-static void sysfs_bin_vma_open(struct vm_area_struct *vma)
+static int sysfs_kf_bin_mmap(struct sysfs_open_file *of,
+                             struct vm_area_struct *vma)
 {
-        struct file *file = vma->vm_file;
+        struct bin_attribute *battr = of->sd->priv;
-        struct sysfs_open_file *of = sysfs_of(file);
+        struct kobject *kobj = of->sd->s_parent->priv;
-        if (!of->vm_ops)
-                return;
-        if (!sysfs_get_active(of->sd))
-                return;
-        if (of->vm_ops->open)
-                of->vm_ops->open(vma);
-        sysfs_put_active(of->sd);
-}
-static int sysfs_bin_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
-{
-        struct file *file = vma->vm_file;
-        struct sysfs_open_file *of = sysfs_of(file);
-        int ret;
-        if (!of->vm_ops)
-                return VM_FAULT_SIGBUS;
-        if (!sysfs_get_active(of->sd))
-                return VM_FAULT_SIGBUS;
-        ret = VM_FAULT_SIGBUS;
-        if (of->vm_ops->fault)
-                ret = of->vm_ops->fault(vma, vmf);
-        sysfs_put_active(of->sd);
-        return ret;
-}
-static int sysfs_bin_page_mkwrite(struct vm_area_struct *vma,
-                                  struct vm_fault *vmf)
-{
-        struct file *file = vma->vm_file;
-        struct sysfs_open_file *of = sysfs_of(file);
-        int ret;
-        if (!of->vm_ops)
-                return VM_FAULT_SIGBUS;
-        if (!sysfs_get_active(of->sd))
-                return VM_FAULT_SIGBUS;
-        ret = 0;
-        if (of->vm_ops->page_mkwrite)
-                ret = of->vm_ops->page_mkwrite(vma, vmf);
-        else
-                file_update_time(file);
-        sysfs_put_active(of->sd);
-        return ret;
-}
-static int sysfs_bin_access(struct vm_area_struct *vma, unsigned long addr,
-                            void *buf, int len, int write)
-{
-        struct file *file = vma->vm_file;
-        struct sysfs_open_file *of = sysfs_of(file);
-        int ret;
-        if (!of->vm_ops)
-                return -EINVAL;
-        if (!sysfs_get_active(of->sd))
-                return -EINVAL;
-        ret = -EINVAL;
-        if (of->vm_ops->access)
-                ret = of->vm_ops->access(vma, addr, buf, len, write);
-        sysfs_put_active(of->sd);
-        return ret;
-}
-#ifdef CONFIG_NUMA
-static int sysfs_bin_set_policy(struct vm_area_struct *vma,
-                                struct mempolicy *new)
-{
-        struct file *file = vma->vm_file;
-        struct sysfs_open_file *of = sysfs_of(file);
-        int ret;
-        if (!of->vm_ops)
-                return 0;
-        if (!sysfs_get_active(of->sd))
-                return -EINVAL;
-        ret = 0;
-        if (of->vm_ops->set_policy)
-                ret = of->vm_ops->set_policy(vma, new);
-        sysfs_put_active(of->sd);
-        return ret;
-}
-static struct mempolicy *sysfs_bin_get_policy(struct vm_area_struct *vma,
-                                              unsigned long addr)
-{
-        struct file *file = vma->vm_file;
-        struct sysfs_open_file *of = sysfs_of(file);
-        struct mempolicy *pol;
-        if (!of->vm_ops)
-                return vma->vm_policy;
-        if (!sysfs_get_active(of->sd))
-                return vma->vm_policy;
-        pol = vma->vm_policy;
-        if (of->vm_ops->get_policy)
-                pol = of->vm_ops->get_policy(vma, addr);
-        sysfs_put_active(of->sd);
-        return pol;
-}
-static int sysfs_bin_migrate(struct vm_area_struct *vma, const nodemask_t *from,
-                             const nodemask_t *to, unsigned long flags)
-{
-        struct file *file = vma->vm_file;
-        struct sysfs_open_file *of = sysfs_of(file);
-        int ret;
-        if (!of->vm_ops)
-                return 0;
-        if (!sysfs_get_active(of->sd))
-                return 0;
-        ret = 0;
-        if (of->vm_ops->migrate)
-                ret = of->vm_ops->migrate(vma, from, to, flags);
-        sysfs_put_active(of->sd);
-        return ret;
-}
-#endif
-static const struct vm_operations_struct sysfs_bin_vm_ops = {
-        .open           = sysfs_bin_vma_open,
-        .fault          = sysfs_bin_fault,
-        .page_mkwrite   = sysfs_bin_page_mkwrite,
-        .access         = sysfs_bin_access,
-#ifdef CONFIG_NUMA
-        .set_policy     = sysfs_bin_set_policy,
-        .get_policy     = sysfs_bin_get_policy,
-        .migrate        = sysfs_bin_migrate,
-#endif
-};
-static int sysfs_bin_mmap(struct file *file, struct vm_area_struct *vma)
-{
-        struct sysfs_open_file *of = sysfs_of(file);
-        struct bin_attribute *battr = of->sd->s_attr.bin_attr;
-        struct kobject *kobj = of->sd->s_parent->s_dir.kobj;
-        int rc;
-        mutex_lock(&of->mutex);
-        /* need of->sd for battr, its parent for kobj */
-        rc = -ENODEV;
-        if (!sysfs_get_active(of->sd))
-                goto out_unlock;
        if (!battr->mmap)
-                goto out_put;
+                return -ENODEV;
-        rc = battr->mmap(file, kobj, battr, vma);
-        if (rc)
-                goto out_put;
-        /*
-         * PowerPC's pci_mmap of legacy_mem uses shmem_zero_setup()
-         * to satisfy versions of X which crash if the mmap fails: that
-         * substitutes a new vm_file, and we don't then want bin_vm_ops.
-         */
-        if (vma->vm_file != file)
-                goto out_put;
-        rc = -EINVAL;
-        if (of->mmapped && of->vm_ops != vma->vm_ops)
-                goto out_put;
-        /*
-         * It is not possible to successfully wrap close.
-         * So error if someone is trying to use close.
-         */
-        rc = -EINVAL;
-        if (vma->vm_ops && vma->vm_ops->close)
-                goto out_put;
-        rc = 0;
-        of->mmapped = 1;
-        of->vm_ops = vma->vm_ops;
-        vma->vm_ops = &sysfs_bin_vm_ops;
-out_put:
-        sysfs_put_active(of->sd);
-out_unlock:
-        mutex_unlock(&of->mutex);
-        return rc;
-}
-/**
- *      sysfs_get_open_dirent - get or create sysfs_open_dirent
- *      @sd: target sysfs_dirent
- *      @of: sysfs_open_file for this instance of open
- *
- *      If @sd->s_attr.open exists, increment its reference count;
- *      otherwise, create one.  @of is chained to the files list.
- *
- *      LOCKING:
- *      Kernel thread context (may sleep).
- *
- *      RETURNS:
- *      0 on success, -errno on failure.
- */
-static int sysfs_get_open_dirent(struct sysfs_dirent *sd,
-                                 struct sysfs_open_file *of)
-{
-        struct sysfs_open_dirent *od, *new_od = NULL;
- retry:
-        mutex_lock(&sysfs_open_file_mutex);
-        spin_lock_irq(&sysfs_open_dirent_lock);
-        if (!sd->s_attr.open && new_od) {
-                sd->s_attr.open = new_od;
-                new_od = NULL;
-        }
-        od = sd->s_attr.open;
-        if (od) {
-                atomic_inc(&od->refcnt);
-                list_add_tail(&of->list, &od->files);
-        }
-        spin_unlock_irq(&sysfs_open_dirent_lock);
-        mutex_unlock(&sysfs_open_file_mutex);
-        if (od) {
-                kfree(new_od);
-                return 0;
-        }
-        /* not there, initialize a new one and retry */
-        new_od = kmalloc(sizeof(*new_od), GFP_KERNEL);
-        if (!new_od)
-                return -ENOMEM;
-        atomic_set(&new_od->refcnt, 0);
+        return battr->mmap(of->file, kobj, battr, vma);
-        atomic_set(&new_od->event, 1);
-        init_waitqueue_head(&new_od->poll);
-        INIT_LIST_HEAD(&new_od->files);
-        goto retry;
 }
-/**
+void sysfs_notify(struct kobject *k, const char *dir, const char *attr)
- *      sysfs_put_open_dirent - put sysfs_open_dirent
- *      @sd: target sysfs_dirent
- *      @of: associated sysfs_open_file
- *
- *      Put @sd->s_attr.open and unlink @of from the files list.  If
- *      reference count reaches zero, disassociate and free it.
- *
- *      LOCKING:
- *      None.
- */
-static void sysfs_put_open_dirent(struct sysfs_dirent *sd,
-                                  struct sysfs_open_file *of)
 {
-        struct sysfs_open_dirent *od = sd->s_attr.open;
+        struct sysfs_dirent *sd = k->sd, *tmp;
-        unsigned long flags;
-        mutex_lock(&sysfs_open_file_mutex);
-        spin_lock_irqsave(&sysfs_open_dirent_lock, flags);
-        if (of)
+        if (sd && dir)
-                list_del(&of->list);
+                sd = kernfs_find_and_get(sd, dir);
-        if (atomic_dec_and_test(&od->refcnt))
-                sd->s_attr.open = NULL;
        else
-                od = NULL;
+                kernfs_get(sd);
-        spin_unlock_irqrestore(&sysfs_open_dirent_lock, flags);
-        mutex_unlock(&sysfs_open_file_mutex);
-        kfree(od);
-}
-static int sysfs_open_file(struct inode *inode, struct file *file)
-{
-        struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
-        struct kobject *kobj = attr_sd->s_parent->s_dir.kobj;
-        struct sysfs_open_file *of;
-        bool has_read, has_write;
-        int error = -EACCES;
-        /* need attr_sd for attr and ops, its parent for kobj */
-        if (!sysfs_get_active(attr_sd))
-                return -ENODEV;
-        if (sysfs_is_bin(attr_sd)) {
+        if (sd && attr) {
-                struct bin_attribute *battr = attr_sd->s_attr.bin_attr;
+                tmp = kernfs_find_and_get(sd, attr);
+                kernfs_put(sd);
-                has_read = battr->read || battr->mmap;
+                sd = tmp;
-                has_write = battr->write || battr->mmap;
-        } else {
-                const struct sysfs_ops *ops = sysfs_file_ops(attr_sd);
-                /* every kobject with an attribute needs a ktype assigned */
-                if (WARN(!ops, KERN_ERR
-                         "missing sysfs attribute operations for kobject: %s\n",
-                         kobject_name(kobj)))
-                        goto err_out;
-                has_read = ops->show;
-                has_write = ops->store;
        }
-        /* check perms and supported operations */
+        if (sd) {
-        if ((file->f_mode & FMODE_WRITE) &&
+                kernfs_notify(sd);
-            (!(inode->i_mode & S_IWUGO) || !has_write))
+                kernfs_put(sd);
-                goto err_out;
-        if ((file->f_mode & FMODE_READ) &&
-            (!(inode->i_mode & S_IRUGO) || !has_read))
-                goto err_out;
-        /* allocate a sysfs_open_file for the file */
-        error = -ENOMEM;
-        of = kzalloc(sizeof(struct sysfs_open_file), GFP_KERNEL);
-        if (!of)
-                goto err_out;
-        /*
-         * The following is done to give a different lockdep key to
-         * @of->mutex for files which implement mmap.  This is a rather
-         * crude way to avoid false positive lockdep warning around
-         * mm->mmap_sem - mmap nests @of->mutex under mm->mmap_sem and
-         * reading /sys/block/sda/trace/act_mask grabs sr_mutex, under
-         * which mm->mmap_sem nests, while holding @of->mutex.  As each
-         * open file has a separate mutex, it's okay as long as those don't
-         * happen on the same file.  At this point, we can't easily give
-         * each file a separate locking class.  Let's differentiate on
-         * whether the file is bin or not for now.
-         */
-        if (sysfs_is_bin(attr_sd))
-                mutex_init(&of->mutex);
-        else
-                mutex_init(&of->mutex);
-        of->sd = attr_sd;
-        of->file = file;
-        /*
-         * Always instantiate seq_file even if read access doesn't use
-         * seq_file or is not requested.  This unifies private data access
-         * and readable regular files are the vast majority anyway.
-         */
-        if (sysfs_is_bin(attr_sd))
-                error = single_open(file, NULL, of);
-        else
-                error = single_open(file, sysfs_seq_show, of);
-        if (error)
-                goto err_free;
-        /* seq_file clears PWRITE unconditionally, restore it if WRITE */
-        if (file->f_mode & FMODE_WRITE)
-                file->f_mode |= FMODE_PWRITE;
-        /* make sure we have open dirent struct */
-        error = sysfs_get_open_dirent(attr_sd, of);
-        if (error)
-                goto err_close;
-        /* open succeeded, put active references */
-        sysfs_put_active(attr_sd);
-        return 0;
-err_close:
-        single_release(inode, file);
-err_free:
-        kfree(of);
-err_out:
-        sysfs_put_active(attr_sd);
-        return error;
-}
-static int sysfs_release(struct inode *inode, struct file *filp)
-{
-        struct sysfs_dirent *sd = filp->f_path.dentry->d_fsdata;
-        struct sysfs_open_file *of = sysfs_of(filp);
-        sysfs_put_open_dirent(sd, of);
-        single_release(inode, filp);
-        kfree(of);
-        return 0;
-}
-void sysfs_unmap_bin_file(struct sysfs_dirent *sd)
-{
-        struct sysfs_open_dirent *od;
-        struct sysfs_open_file *of;
-        if (!sysfs_is_bin(sd))
-                return;
-        spin_lock_irq(&sysfs_open_dirent_lock);
-        od = sd->s_attr.open;
-        if (od)
-                atomic_inc(&od->refcnt);
-        spin_unlock_irq(&sysfs_open_dirent_lock);
-        if (!od)
-                return;
-        mutex_lock(&sysfs_open_file_mutex);
-        list_for_each_entry(of, &od->files, list) {
-                struct inode *inode = file_inode(of->file);
-                unmap_mapping_range(inode->i_mapping, 0, 0, 1);
        }
-        mutex_unlock(&sysfs_open_file_mutex);
-        sysfs_put_open_dirent(sd, NULL);
-}
-/* Sysfs attribute files are pollable.  The idea is that you read
- * the content and then you use 'poll' or 'select' to wait for
- * the content to change.  When the content changes (assuming the
- * manager for the kobject supports notification), poll will
- * return POLLERR|POLLPRI, and select will return the fd whether
- * it is waiting for read, write, or exceptions.
- * Once poll/select indicates that the value has changed, you
- * need to close and re-open the file, or seek to 0 and read again.
- * Reminder: this only works for attributes which actively support
- * it, and it is not possible to test an attribute from userspace
- * to see if it supports poll (Neither 'poll' nor 'select' return
- * an appropriate error code).  When in doubt, set a suitable timeout value.
- */
-static unsigned int sysfs_poll(struct file *filp, poll_table *wait)
-{
-        struct sysfs_open_file *of = sysfs_of(filp);
-        struct sysfs_dirent *attr_sd = filp->f_path.dentry->d_fsdata;
-        struct sysfs_open_dirent *od = attr_sd->s_attr.open;
-        /* need parent for the kobj, grab both */
-        if (!sysfs_get_active(attr_sd))
-                goto trigger;
-        poll_wait(filp, &od->poll, wait);
-        sysfs_put_active(attr_sd);
-        if (of->event != atomic_read(&od->event))
-                goto trigger;
-        return DEFAULT_POLLMASK;
- trigger:
-        return DEFAULT_POLLMASK|POLLERR|POLLPRI;
 }
+EXPORT_SYMBOL_GPL(sysfs_notify);
-void sysfs_notify_dirent(struct sysfs_dirent *sd)
+static const struct kernfs_ops sysfs_file_kfops_empty = {
-{
+};
-        struct sysfs_open_dirent *od;
-        unsigned long flags;
-        spin_lock_irqsave(&sysfs_open_dirent_lock, flags);
-        if (!WARN_ON(sysfs_type(sd) != SYSFS_KOBJ_ATTR)) {
-                od = sd->s_attr.open;
-                if (od) {
-                        atomic_inc(&od->event);
-                        wake_up_interruptible(&od->poll);
-                }
-        }
-        spin_unlock_irqrestore(&sysfs_open_dirent_lock, flags);
-}
-EXPORT_SYMBOL_GPL(sysfs_notify_dirent);
-void sysfs_notify(struct kobject *k, const char *dir, const char *attr)
+static const struct kernfs_ops sysfs_file_kfops_ro = {
-{
+        .seq_show       = sysfs_kf_seq_show,
-        struct sysfs_dirent *sd = k->sd;
+};
-        mutex_lock(&sysfs_mutex);
+static const struct kernfs_ops sysfs_file_kfops_wo = {
+        .write          = sysfs_kf_write,
+};
-        if (sd && dir)
+static const struct kernfs_ops sysfs_file_kfops_rw = {
-                sd = sysfs_find_dirent(sd, dir, NULL);
+        .seq_show       = sysfs_kf_seq_show,
-        if (sd && attr)
+        .write          = sysfs_kf_write,
-                sd = sysfs_find_dirent(sd, attr, NULL);
+};
-        if (sd)
-                sysfs_notify_dirent(sd);
-        mutex_unlock(&sysfs_mutex);
+static const struct kernfs_ops sysfs_bin_kfops_ro = {
-}
+        .read           = sysfs_kf_bin_read,
-EXPORT_SYMBOL_GPL(sysfs_notify);
+};
-const struct file_operations sysfs_file_operations = {
+static const struct kernfs_ops sysfs_bin_kfops_wo = {
-        .read           = seq_read,
+        .write          = sysfs_kf_bin_write,
-        .write          = sysfs_write_file,
-        .llseek         = generic_file_llseek,
-        .open           = sysfs_open_file,
-        .release        = sysfs_release,
-        .poll           = sysfs_poll,
 };
-const struct file_operations sysfs_bin_operations = {
+static const struct kernfs_ops sysfs_bin_kfops_rw = {
-        .read           = sysfs_bin_read,
+        .read           = sysfs_kf_bin_read,
-        .write          = sysfs_write_file,
+        .write          = sysfs_kf_bin_write,
-        .llseek         = generic_file_llseek,
+        .mmap           = sysfs_kf_bin_mmap,
-        .mmap           = sysfs_bin_mmap,
-        .open           = sysfs_open_file,
-        .release        = sysfs_release,
-        .poll           = sysfs_poll,
 };
 int sysfs_add_file_mode_ns(struct sysfs_dirent *dir_sd,
-                           const struct attribute *attr, int type,
+                           const struct attribute *attr, bool is_bin,
-                           umode_t amode, const void *ns)
+                           umode_t mode, const void *ns)
 {
-        umode_t mode = (amode & S_IALLUGO) | S_IFREG;
+        struct lock_class_key *key = NULL;
-        struct sysfs_addrm_cxt acxt;
+        const struct kernfs_ops *ops;
        struct sysfs_dirent *sd;
-        int rc;
+        loff_t size;
-        sd = sysfs_new_dirent(attr->name, mode, type);
-        if (!sd)
-                return -ENOMEM;
-        sd->s_ns = ns;
+        if (!is_bin) {
-        sd->s_attr.attr = (void *)attr;
+                struct kobject *kobj = dir_sd->priv;
-        sysfs_dirent_init_lockdep(sd);
+                const struct sysfs_ops *sysfs_ops = kobj->ktype->sysfs_ops;
-        sysfs_addrm_start(&acxt);
+                /* every kobject with an attribute needs a ktype assigned */
-        rc = sysfs_add_one(&acxt, sd, dir_sd);
+                if (WARN(!sysfs_ops, KERN_ERR
-        sysfs_addrm_finish(&acxt);
+                         "missing sysfs attribute operations for kobject: %s\n",
+                         kobject_name(kobj)))
-        if (rc)
+                        return -EINVAL;
-                sysfs_put(sd);
+                if (sysfs_ops->show && sysfs_ops->store)
+                        ops = &sysfs_file_kfops_rw;
+                else if (sysfs_ops->show)
+                        ops = &sysfs_file_kfops_ro;
+                else if (sysfs_ops->store)
+                        ops = &sysfs_file_kfops_wo;
+                else
+                        ops = &sysfs_file_kfops_empty;
+                size = PAGE_SIZE;
+        } else {
+                struct bin_attribute *battr = (void *)attr;
+                if ((battr->read && battr->write) || battr->mmap)
+                        ops = &sysfs_bin_kfops_rw;
+                else if (battr->read)
+                        ops = &sysfs_bin_kfops_ro;
+                else if (battr->write)
+                        ops = &sysfs_bin_kfops_wo;
+                else
+                        ops = &sysfs_file_kfops_empty;
+                size = battr->size;
+        }
-        return rc;
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+        if (!attr->ignore_lockdep)
+                key = attr->key ?: (struct lock_class_key *)&attr->skey;
+#endif
+        sd = kernfs_create_file_ns_key(dir_sd, attr->name, mode, size,
+                                       ops, (void *)attr, ns, key);
+        if (IS_ERR(sd)) {
+                if (PTR_ERR(sd) == -EEXIST)
+                        sysfs_warn_dup(dir_sd, attr->name);
+                return PTR_ERR(sd);
+        }
+        return 0;
 }
 int sysfs_add_file(struct sysfs_dirent *dir_sd, const struct attribute *attr,
-                   int type)
+                   bool is_bin)
 {
-        return sysfs_add_file_mode_ns(dir_sd, attr, type, attr->mode, NULL);
+        return sysfs_add_file_mode_ns(dir_sd, attr, is_bin, attr->mode, NULL);
 }
 /**
@@ -877,8 +275,7 @@ int sysfs_create_file_ns(struct kobject *kobj, const struct attribute *attr,
 {
        BUG_ON(!kobj || !kobj->sd || !attr);
-        return sysfs_add_file_mode_ns(kobj->sd, attr, SYSFS_KOBJ_ATTR,
+        return sysfs_add_file_mode_ns(kobj->sd, attr, false, attr->mode, ns);
-                                      attr->mode, ns);
 }
 EXPORT_SYMBOL_GPL(sysfs_create_file_ns);
@@ -909,16 +306,18 @@ int sysfs_add_file_to_group(struct kobject *kobj,
        struct sysfs_dirent *dir_sd;
        int error;
-        if (group)
+        if (group) {
-                dir_sd = sysfs_get_dirent(kobj->sd, group);
+                dir_sd = kernfs_find_and_get(kobj->sd, group);
-        else
+        } else {
-                dir_sd = sysfs_get(kobj->sd);
+                dir_sd = kobj->sd;
+                kernfs_get(dir_sd);
+        }
        if (!dir_sd)
                return -ENOENT;
-        error = sysfs_add_file(dir_sd, attr, SYSFS_KOBJ_ATTR);
+        error = sysfs_add_file(dir_sd, attr, false);
-        sysfs_put(dir_sd);
+        kernfs_put(dir_sd);
        return error;
 }
@@ -938,19 +337,16 @@ int sysfs_chmod_file(struct kobject *kobj, const struct attribute *attr,
        struct iattr newattrs;
        int rc;
-        mutex_lock(&sysfs_mutex);
+        sd = kernfs_find_and_get(kobj->sd, attr->name);
-        rc = -ENOENT;
-        sd = sysfs_find_dirent(kobj->sd, attr->name, NULL);
        if (!sd)
-                goto out;
+                return -ENOENT;
        newattrs.ia_mode = (mode & S_IALLUGO) | (sd->s_mode & ~S_IALLUGO);
        newattrs.ia_valid = ATTR_MODE;
-        rc = sysfs_sd_setattr(sd, &newattrs);
- out:
+        rc = kernfs_setattr(sd, &newattrs);
-        mutex_unlock(&sysfs_mutex);
+        kernfs_put(sd);
        return rc;
 }
 EXPORT_SYMBOL_GPL(sysfs_chmod_file);
@@ -968,7 +364,7 @@ void sysfs_remove_file_ns(struct kobject *kobj, const struct attribute *attr,
 {
        struct sysfs_dirent *dir_sd = kobj->sd;
-        sysfs_hash_and_remove(dir_sd, attr->name, ns);
+        kernfs_remove_by_name_ns(dir_sd, attr->name, ns);
 }
 EXPORT_SYMBOL_GPL(sysfs_remove_file_ns);
@@ -991,13 +387,16 @@ void sysfs_remove_file_from_group(struct kobject *kobj,
 {
        struct sysfs_dirent *dir_sd;
-        if (group)
+        if (group) {
-                dir_sd = sysfs_get_dirent(kobj->sd, group);
+                dir_sd = kernfs_find_and_get(kobj->sd, group);
-        else
+        } else {
-                dir_sd = sysfs_get(kobj->sd);
+                dir_sd = kobj->sd;
+                kernfs_get(dir_sd);
+        }
        if (dir_sd) {
-                sysfs_hash_and_remove(dir_sd, attr->name, NULL);
+                kernfs_remove_by_name(dir_sd, attr->name);
-                sysfs_put(dir_sd);
+                kernfs_put(dir_sd);
        }
 }
 EXPORT_SYMBOL_GPL(sysfs_remove_file_from_group);
@@ -1012,7 +411,7 @@ int sysfs_create_bin_file(struct kobject *kobj,
 {
        BUG_ON(!kobj || !kobj->sd || !attr);
-        return sysfs_add_file(kobj->sd, &attr->attr, SYSFS_KOBJ_BIN_ATTR);
+        return sysfs_add_file(kobj->sd, &attr->attr, true);
 }
 EXPORT_SYMBOL_GPL(sysfs_create_bin_file);
@@ -1024,7 +423,7 @@ EXPORT_SYMBOL_GPL(sysfs_create_bin_file);
 void sysfs_remove_bin_file(struct kobject *kobj,
                           const struct bin_attribute *attr)
 {
-        sysfs_hash_and_remove(kobj->sd, attr->attr.name, NULL);
+        kernfs_remove_by_name(kobj->sd, attr->attr.name);
 }
 EXPORT_SYMBOL_GPL(sysfs_remove_bin_file);
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index 1898a10e38ce..7177532b8f7b 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -26,7 +26,7 @@ static void remove_files(struct sysfs_dirent *dir_sd, struct kobject *kobj,
        if (grp->attrs)
                for (attr = grp->attrs; *attr; attr++)
-                        sysfs_hash_and_remove(dir_sd, (*attr)->name, NULL);
+                        kernfs_remove_by_name(dir_sd, (*attr)->name);
        if (grp->bin_attrs)
                for (bin_attr = grp->bin_attrs; *bin_attr; bin_attr++)
                        sysfs_remove_bin_file(kobj, *bin_attr);
@@ -49,15 +49,13 @@ static int create_files(struct sysfs_dirent *dir_sd, struct kobject *kobj,
                         * re-adding (if required) the file.
                         */
                        if (update)
-                                sysfs_hash_and_remove(dir_sd, (*attr)->name,
+                                kernfs_remove_by_name(dir_sd, (*attr)->name);
-                                                      NULL);
                        if (grp->is_visible) {
                                mode = grp->is_visible(kobj, *attr, i);
                                if (!mode)
                                        continue;
                        }
-                        error = sysfs_add_file_mode_ns(dir_sd, *attr,
+                        error = sysfs_add_file_mode_ns(dir_sd, *attr, false,
-                                                       SYSFS_KOBJ_ATTR,
                                                       (*attr)->mode | mode,
                                                       NULL);
                        if (unlikely(error))
@@ -102,18 +100,21 @@ static int internal_create_group(struct kobject *kobj, int update,
                return -EINVAL;
        }
        if (grp->name) {
-                error = sysfs_create_subdir(kobj, grp->name, &sd);
+                sd = kernfs_create_dir(kobj->sd, grp->name, kobj);
-                if (error)
+                if (IS_ERR(sd)) {
-                        return error;
+                        if (PTR_ERR(sd) == -EEXIST)
+                                sysfs_warn_dup(kobj->sd, grp->name);
+                        return PTR_ERR(sd);
+                }
        } else
                sd = kobj->sd;
-        sysfs_get(sd);
+        kernfs_get(sd);
        error = create_files(sd, kobj, grp, update);
        if (error) {
                if (grp->name)
-                        sysfs_remove(sd);
+                        kernfs_remove(sd);
        }
-        sysfs_put(sd);
+        kernfs_put(sd);
        return error;
 }
@@ -207,21 +208,23 @@ void sysfs_remove_group(struct kobject *kobj,
        struct sysfs_dirent *sd;
        if (grp->name) {
-                sd = sysfs_get_dirent(dir_sd, grp->name);
+                sd = kernfs_find_and_get(dir_sd, grp->name);
                if (!sd) {
                        WARN(!sd, KERN_WARNING
                             "sysfs group %p not found for kobject '%s'\n",
                             grp, kobject_name(kobj));
                        return;
                }
-        } else
+        } else {
-                sd = sysfs_get(dir_sd);
+                sd = dir_sd;
+                kernfs_get(sd);
+        }
        remove_files(sd, kobj, grp);
        if (grp->name)
-                sysfs_remove(sd);
+                kernfs_remove(sd);
-        sysfs_put(sd);
+        kernfs_put(sd);
 }
 EXPORT_SYMBOL_GPL(sysfs_remove_group);
@@ -262,17 +265,17 @@ int sysfs_merge_group(struct kobject *kobj,
        struct attribute *const *attr;
        int i;
-        dir_sd = sysfs_get_dirent(kobj->sd, grp->name);
+        dir_sd = kernfs_find_and_get(kobj->sd, grp->name);
        if (!dir_sd)
                return -ENOENT;
        for ((i = 0, attr = grp->attrs); *attr && !error; (++i, ++attr))
-                error = sysfs_add_file(dir_sd, *attr, SYSFS_KOBJ_ATTR);
+                error = sysfs_add_file(dir_sd, *attr, false);
        if (error) {
                while (--i >= 0)
-                        sysfs_hash_and_remove(dir_sd, (*--attr)->name, NULL);
+                        kernfs_remove_by_name(dir_sd, (*--attr)->name);
        }
-        sysfs_put(dir_sd);
+        kernfs_put(dir_sd);
        return error;
 }
@@ -289,11 +292,11 @@ void sysfs_unmerge_group(struct kobject *kobj,
        struct sysfs_dirent *dir_sd;
        struct attribute *const *attr;
-        dir_sd = sysfs_get_dirent(kobj->sd, grp->name);
+        dir_sd = kernfs_find_and_get(kobj->sd, grp->name);
        if (dir_sd) {
                for (attr = grp->attrs; *attr; ++attr)
-                        sysfs_hash_and_remove(dir_sd, (*attr)->name, NULL);
+                        kernfs_remove_by_name(dir_sd, (*attr)->name);
-                sysfs_put(dir_sd);
+                kernfs_put(dir_sd);
        }
 }
 EXPORT_SYMBOL_GPL(sysfs_unmerge_group);
@@ -311,12 +314,12 @@ int sysfs_add_link_to_group(struct kobject *kobj, const char *group_name,
        struct sysfs_dirent *dir_sd;
        int error = 0;
-        dir_sd = sysfs_get_dirent(kobj->sd, group_name);
+        dir_sd = kernfs_find_and_get(kobj->sd, group_name);
        if (!dir_sd)
                return -ENOENT;
        error = sysfs_create_link_sd(dir_sd, target, link_name);
-        sysfs_put(dir_sd);
+        kernfs_put(dir_sd);
        return error;
 }
@@ -333,10 +336,10 @@ void sysfs_remove_link_from_group(struct kobject *kobj, const char *group_name,
 {
        struct sysfs_dirent *dir_sd;
-        dir_sd = sysfs_get_dirent(kobj->sd, group_name);
+        dir_sd = kernfs_find_and_get(kobj->sd, group_name);
        if (dir_sd) {
-                sysfs_hash_and_remove(dir_sd, link_name, NULL);
+                kernfs_remove_by_name(dir_sd, link_name);
-                sysfs_put(dir_sd);
+                kernfs_put(dir_sd);
        }
 }
 EXPORT_SYMBOL_GPL(sysfs_remove_link_from_group);
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index 834ec2cdb7a3..e7e3aa8e7b78 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -14,146 +14,39 @@
 #include <linux/fs.h>
 #include <linux/mount.h>
-#include <linux/pagemap.h>
 #include <linux/init.h>
-#include <linux/module.h>
-#include <linux/magic.h>
-#include <linux/slab.h>
 #include <linux/user_namespace.h>
 #include "sysfs.h"
+static struct kernfs_root *sysfs_root;
-static struct vfsmount *sysfs_mnt;
+struct sysfs_dirent *sysfs_root_sd;
-struct kmem_cache *sysfs_dir_cachep;
-static const struct super_operations sysfs_ops = {
-        .statfs         = simple_statfs,
-        .drop_inode     = generic_delete_inode,
-        .evict_inode    = sysfs_evict_inode,
-};
-struct sysfs_dirent sysfs_root = {
-        .s_name         = "",
-        .s_count        = ATOMIC_INIT(1),
-        .s_flags        = SYSFS_DIR | (KOBJ_NS_TYPE_NONE << SYSFS_NS_TYPE_SHIFT),
-        .s_mode         = S_IFDIR | S_IRUGO | S_IXUGO,
-        .s_ino          = 1,
-};
-static int sysfs_fill_super(struct super_block *sb, void *data, int silent)
-{
-        struct inode *inode;
-        struct dentry *root;
-        sb->s_blocksize = PAGE_CACHE_SIZE;
-        sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
-        sb->s_magic = SYSFS_MAGIC;
-        sb->s_op = &sysfs_ops;
-        sb->s_time_gran = 1;
-        /* get root inode, initialize and unlock it */
-        mutex_lock(&sysfs_mutex);
-        inode = sysfs_get_inode(sb, &sysfs_root);
-        mutex_unlock(&sysfs_mutex);
-        if (!inode) {
-                pr_debug("sysfs: could not get root inode\n");
-                return -ENOMEM;
-        }
-        /* instantiate and link root dentry */
-        root = d_make_root(inode);
-        if (!root) {
-                pr_debug("%s: could not get root dentry!\n", __func__);
-                return -ENOMEM;
-        }
-        root->d_fsdata = &sysfs_root;
-        sb->s_root = root;
-        sb->s_d_op = &sysfs_dentry_ops;
-        return 0;
-}
-static int sysfs_test_super(struct super_block *sb, void *data)
-{
-        struct sysfs_super_info *sb_info = sysfs_info(sb);
-        struct sysfs_super_info *info = data;
-        enum kobj_ns_type type;
-        int found = 1;
-        for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++) {
-                if (sb_info->ns[type] != info->ns[type])
-                        found = 0;
-        }
-        return found;
-}
-static int sysfs_set_super(struct super_block *sb, void *data)
-{
-        int error;
-        error = set_anon_super(sb, data);
-        if (!error)
-                sb->s_fs_info = data;
-        return error;
-}
-static void free_sysfs_super_info(struct sysfs_super_info *info)
-{
-        int type;
-        for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++)
-                kobj_ns_drop(type, info->ns[type]);
-        kfree(info);
-}
 static struct dentry *sysfs_mount(struct file_system_type *fs_type,
        int flags, const char *dev_name, void *data)
 {
-        struct sysfs_super_info *info;
+        struct dentry *root;
-        enum kobj_ns_type type;
+        void *ns;
-        struct super_block *sb;
-        int error;
        if (!(flags & MS_KERNMOUNT)) {
                if (!capable(CAP_SYS_ADMIN) && !fs_fully_visible(fs_type))
                        return ERR_PTR(-EPERM);
-                for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++) {
+                if (!kobj_ns_current_may_mount(KOBJ_NS_TYPE_NET))
-                        if (!kobj_ns_current_may_mount(type))
+                        return ERR_PTR(-EPERM);
-                                return ERR_PTR(-EPERM);
-                }
-        }
-        info = kzalloc(sizeof(*info), GFP_KERNEL);
-        if (!info)
-                return ERR_PTR(-ENOMEM);
-        for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++)
-                info->ns[type] = kobj_ns_grab_current(type);
-        sb = sget(fs_type, sysfs_test_super, sysfs_set_super, flags, info);
-        if (IS_ERR(sb) || sb->s_fs_info != info)
-                free_sysfs_super_info(info);
-        if (IS_ERR(sb))
-                return ERR_CAST(sb);
-        if (!sb->s_root) {
-                error = sysfs_fill_super(sb, data, flags & MS_SILENT ? 1 : 0);
-                if (error) {
-                        deactivate_locked_super(sb);
-                        return ERR_PTR(error);
-                }
-                sb->s_flags |= MS_ACTIVE;
        }
-        return dget(sb->s_root);
+        ns = kobj_ns_grab_current(KOBJ_NS_TYPE_NET);
+        root = kernfs_mount_ns(fs_type, flags, sysfs_root, ns);
+        if (IS_ERR(root))
+                kobj_ns_drop(KOBJ_NS_TYPE_NET, ns);
+        return root;
 }
 static void sysfs_kill_sb(struct super_block *sb)
 {
-        struct sysfs_super_info *info = sysfs_info(sb);
+        kernfs_kill_sb(sb);
-        /* Remove the superblock from fs_supers/s_instances
+        kobj_ns_drop(KOBJ_NS_TYPE_NET, (void *)kernfs_super_ns(sb));
-         * so we can't find it, before freeing sysfs_super_info.
-         */
-        kill_anon_super(sb);
-        free_sysfs_super_info(info);
 }
 static struct file_system_type sysfs_fs_type = {
@@ -165,48 +58,19 @@ static struct file_system_type sysfs_fs_type = {
 int __init sysfs_init(void)
 {
-        int err = -ENOMEM;
+        int err;
-        sysfs_dir_cachep = kmem_cache_create("sysfs_dir_cache",
+        sysfs_root = kernfs_create_root(NULL);
-                                              sizeof(struct sysfs_dirent),
+        if (IS_ERR(sysfs_root))
-                                              0, 0, NULL);
+                return PTR_ERR(sysfs_root);
-        if (!sysfs_dir_cachep)
-                goto out;
-        err = sysfs_inode_init();
+        sysfs_root_sd = sysfs_root->sd;
-        if (err)
-                goto out_err;
        err = register_filesystem(&sysfs_fs_type);
-        if (!err) {
+        if (err) {
-                sysfs_mnt = kern_mount(&sysfs_fs_type);
+                kernfs_destroy_root(sysfs_root);
-                if (IS_ERR(sysfs_mnt)) {
+                return err;
-                        printk(KERN_ERR "sysfs: could not mount!\n");
+        }
-                        err = PTR_ERR(sysfs_mnt);
-                        sysfs_mnt = NULL;
-                        unregister_filesystem(&sysfs_fs_type);
-                        goto out_err;
-                }
-        } else
-                goto out_err;
-out:
-        return err;
-out_err:
-        kmem_cache_destroy(sysfs_dir_cachep);
-        sysfs_dir_cachep = NULL;
-        goto out;
-}
-#undef sysfs_get
-struct sysfs_dirent *sysfs_get(struct sysfs_dirent *sd)
-{
-        return __sysfs_get(sd);
-}
-EXPORT_SYMBOL_GPL(sysfs_get);
-#undef sysfs_put
+        return 0;
-void sysfs_put(struct sysfs_dirent *sd)
-{
-        __sysfs_put(sd);
 }
-EXPORT_SYMBOL_GPL(sysfs_put);
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c
index 3ae3f1bf1a09..1b8c9ed8511a 100644
--- a/fs/sysfs/symlink.c
+++ b/fs/sysfs/symlink.c
@@ -11,11 +11,8 @@
 */
 #include <linux/fs.h>
-#include <linux/gfp.h>
-#include <linux/mount.h>
 #include <linux/module.h>
 #include <linux/kobject.h>
-#include <linux/namei.h>
 #include <linux/mutex.h>
 #include <linux/security.h>
@@ -25,11 +22,7 @@ static int sysfs_do_create_link_sd(struct sysfs_dirent *parent_sd,
                                   struct kobject *target,
                                   const char *name, int warn)
 {
-        struct sysfs_dirent *target_sd = NULL;
+        struct sysfs_dirent *sd, *target_sd = NULL;
-        struct sysfs_dirent *sd = NULL;
-        struct sysfs_addrm_cxt acxt;
-        enum kobj_ns_type ns_type;
-        int error;
        BUG_ON(!name || !parent_sd);
@@ -39,53 +32,24 @@ static int sysfs_do_create_link_sd(struct sysfs_dirent *parent_sd,
         * sysfs_remove_dir() for details.
         */
        spin_lock(&sysfs_symlink_target_lock);
-        if (target->sd)
+        if (target->sd) {
-                target_sd = sysfs_get(target->sd);
+                target_sd = target->sd;
+                kernfs_get(target_sd);
+        }
        spin_unlock(&sysfs_symlink_target_lock);
-        error = -ENOENT;
        if (!target_sd)
-                goto out_put;
+                return -ENOENT;
-        error = -ENOMEM;
-        sd = sysfs_new_dirent(name, S_IFLNK|S_IRWXUGO, SYSFS_KOBJ_LINK);
-        if (!sd)
-                goto out_put;
-        ns_type = sysfs_ns_type(parent_sd);
+        sd = kernfs_create_link(parent_sd, name, target_sd);
-        if (ns_type)
+        kernfs_put(target_sd);
-                sd->s_ns = target_sd->s_ns;
-        sd->s_symlink.target_sd = target_sd;
-        target_sd = NULL;       /* reference is now owned by the symlink */
-        sysfs_addrm_start(&acxt);
-        /* Symlinks must be between directories with the same ns_type */
-        if (!ns_type ||
-            (ns_type == sysfs_ns_type(sd->s_symlink.target_sd->s_parent))) {
-                if (warn)
-                        error = sysfs_add_one(&acxt, sd, parent_sd);
-                else
-                        error = __sysfs_add_one(&acxt, sd, parent_sd);
-        } else {
-                error = -EINVAL;
-                WARN(1, KERN_WARNING
-                        "sysfs: symlink across ns_types %s/%s -> %s/%s\n",
-                        parent_sd->s_name,
-                        sd->s_name,
-                        sd->s_symlink.target_sd->s_parent->s_name,
-                        sd->s_symlink.target_sd->s_name);
-        }
-        sysfs_addrm_finish(&acxt);
-        if (error)
+        if (!IS_ERR(sd))
-                goto out_put;
+                return 0;
-        return 0;
+        if (warn && PTR_ERR(sd) == -EEXIST)
+                sysfs_warn_dup(parent_sd, name);
- out_put:
+        return PTR_ERR(sd);
-        sysfs_put(target_sd);
-        sysfs_put(sd);
-        return error;
 }
 /**
@@ -106,7 +70,7 @@ static int sysfs_do_create_link(struct kobject *kobj, struct kobject *target,
        struct sysfs_dirent *parent_sd = NULL;
        if (!kobj)
-                parent_sd = &sysfs_root;
+                parent_sd = sysfs_root_sd;
        else
                parent_sd = kobj->sd;
@@ -164,10 +128,10 @@ void sysfs_delete_link(struct kobject *kobj, struct kobject *targ,
         * sysfs_remove_dir() for details.
         */
        spin_lock(&sysfs_symlink_target_lock);
-        if (targ->sd && sysfs_ns_type(kobj->sd))
+        if (targ->sd && kernfs_ns_enabled(kobj->sd))
                ns = targ->sd->s_ns;
        spin_unlock(&sysfs_symlink_target_lock);
-        sysfs_hash_and_remove(kobj->sd, name, ns);
+        kernfs_remove_by_name_ns(kobj->sd, name, ns);
 }
 /**
@@ -180,11 +144,11 @@ void sysfs_remove_link(struct kobject *kobj, const char *name)
        struct sysfs_dirent *parent_sd = NULL;
        if (!kobj)
-                parent_sd = &sysfs_root;
+                parent_sd = sysfs_root_sd;
        else
                parent_sd = kobj->sd;
-        sysfs_hash_and_remove(parent_sd, name, NULL);
+        kernfs_remove_by_name(parent_sd, name);
 }
 EXPORT_SYMBOL_GPL(sysfs_remove_link);
@@ -206,7 +170,7 @@ int sysfs_rename_link_ns(struct kobject *kobj, struct kobject *targ,
        int result;
        if (!kobj)
-                parent_sd = &sysfs_root;
+                parent_sd = sysfs_root_sd;
        else
                parent_sd = kobj->sd;
@@ -214,117 +178,20 @@ int sysfs_rename_link_ns(struct kobject *kobj, struct kobject *targ,
                old_ns = targ->sd->s_ns;
        result = -ENOENT;
-        sd = sysfs_get_dirent_ns(parent_sd, old, old_ns);
+        sd = kernfs_find_and_get_ns(parent_sd, old, old_ns);
        if (!sd)
                goto out;
        result = -EINVAL;
        if (sysfs_type(sd) != SYSFS_KOBJ_LINK)
                goto out;
-        if (sd->s_symlink.target_sd->s_dir.kobj != targ)
+        if (sd->s_symlink.target_sd->priv != targ)
                goto out;
-        result = sysfs_rename(sd, parent_sd, new, new_ns);
+        result = kernfs_rename_ns(sd, parent_sd, new, new_ns);
 out:
-        sysfs_put(sd);
+        kernfs_put(sd);
        return result;
 }
 EXPORT_SYMBOL_GPL(sysfs_rename_link_ns);
-static int sysfs_get_target_path(struct sysfs_dirent *parent_sd,
-                                 struct sysfs_dirent *target_sd, char *path)
-{
-        struct sysfs_dirent *base, *sd;
-        char *s = path;
-        int len = 0;
-        /* go up to the root, stop at the base */
-        base = parent_sd;
-        while (base->s_parent) {
-                sd = target_sd->s_parent;
-                while (sd->s_parent && base != sd)
-                        sd = sd->s_parent;
-                if (base == sd)
-                        break;
-                strcpy(s, "../");
-                s += 3;
-                base = base->s_parent;
-        }
-        /* determine end of target string for reverse fillup */
-        sd = target_sd;
-        while (sd->s_parent && sd != base) {
-                len += strlen(sd->s_name) + 1;
-                sd = sd->s_parent;
-        }
-        /* check limits */
-        if (len < 2)
-                return -EINVAL;
-        len--;
-        if ((s - path) + len > PATH_MAX)
-                return -ENAMETOOLONG;
-        /* reverse fillup of target string from target to base */
-        sd = target_sd;
-        while (sd->s_parent && sd != base) {
-                int slen = strlen(sd->s_name);
-                len -= slen;
-                strncpy(s + len, sd->s_name, slen);
-                if (len)
-                        s[--len] = '/';
-                sd = sd->s_parent;
-        }
-        return 0;
-}
-static int sysfs_getlink(struct dentry *dentry, char *path)
-{
-        struct sysfs_dirent *sd = dentry->d_fsdata;
-        struct sysfs_dirent *parent_sd = sd->s_parent;
-        struct sysfs_dirent *target_sd = sd->s_symlink.target_sd;
-        int error;
-        mutex_lock(&sysfs_mutex);
-        error = sysfs_get_target_path(parent_sd, target_sd, path);
-        mutex_unlock(&sysfs_mutex);
-        return error;
-}
-static void *sysfs_follow_link(struct dentry *dentry, struct nameidata *nd)
-{
-        int error = -ENOMEM;
-        unsigned long page = get_zeroed_page(GFP_KERNEL);
-        if (page) {
-                error = sysfs_getlink(dentry, (char *) page);
-                if (error < 0)
-                        free_page((unsigned long)page);
-        }
-        nd_set_link(nd, error ? ERR_PTR(error) : (char *)page);
-        return NULL;
-}
-static void sysfs_put_link(struct dentry *dentry, struct nameidata *nd,
-                           void *cookie)
-{
-        char *page = nd_get_link(nd);
-        if (!IS_ERR(page))
-                free_page((unsigned long)page);
-}
-const struct inode_operations sysfs_symlink_inode_operations = {
-        .setxattr       = sysfs_setxattr,
-        .readlink       = generic_readlink,
-        .follow_link    = sysfs_follow_link,
-        .put_link       = sysfs_put_link,
-        .setattr        = sysfs_setattr,
-        .getattr        = sysfs_getattr,
-        .permission     = sysfs_permission,
-};
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index 0af09fbfb3f6..c8e395b49330 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -8,248 +8,36 @@
 * This file is released under the GPLv2.
 */
-#include <linux/lockdep.h>
+#ifndef __SYSFS_INTERNAL_H
-#include <linux/kobject_ns.h>
+#define __SYSFS_INTERNAL_H
-#include <linux/fs.h>
-#include <linux/rbtree.h>
-struct sysfs_open_dirent;
+#include <linux/sysfs.h>
-/* type-specific structures for sysfs_dirent->s_* union members */
-struct sysfs_elem_dir {
-        struct kobject          *kobj;
-        unsigned long           subdirs;
-        /* children rbtree starts here and goes through sd->s_rb */
-        struct rb_root          children;
-};
-struct sysfs_elem_symlink {
-        struct sysfs_dirent     *target_sd;
-};
-struct sysfs_elem_attr {
-        union {
-                struct attribute        *attr;
-                struct bin_attribute    *bin_attr;
-        };
-        struct sysfs_open_dirent *open;
-};
-struct sysfs_inode_attrs {
-        struct iattr    ia_iattr;
-        void            *ia_secdata;
-        u32             ia_secdata_len;
-};
-/*
- * sysfs_dirent - the building block of sysfs hierarchy.  Each and
- * every sysfs node is represented by single sysfs_dirent.
- *
- * As long as s_count reference is held, the sysfs_dirent itself is
- * accessible.  Dereferencing s_elem or any other outer entity
- * requires s_active reference.
- */
-struct sysfs_dirent {
-        atomic_t                s_count;
-        atomic_t                s_active;
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-        struct lockdep_map      dep_map;
-#endif
-        struct sysfs_dirent     *s_parent;
-        const char              *s_name;
-        struct rb_node          s_rb;
-        union {
-                struct completion       *completion;
-                struct sysfs_dirent     *removed_list;
-        } u;
-        const void              *s_ns; /* namespace tag */
-        unsigned int            s_hash; /* ns + name hash */
-        union {
-                struct sysfs_elem_dir           s_dir;
-                struct sysfs_elem_symlink       s_symlink;
-                struct sysfs_elem_attr          s_attr;
-        };
-        unsigned short          s_flags;
-        umode_t                 s_mode;
-        unsigned int            s_ino;
-        struct sysfs_inode_attrs *s_iattr;
-};
-#define SD_DEACTIVATED_BIAS             INT_MIN
-#define SYSFS_TYPE_MASK                 0x00ff
-#define SYSFS_DIR                       0x0001
-#define SYSFS_KOBJ_ATTR                 0x0002
-#define SYSFS_KOBJ_BIN_ATTR             0x0004
-#define SYSFS_KOBJ_LINK                 0x0008
-#define SYSFS_COPY_NAME                 (SYSFS_DIR | SYSFS_KOBJ_LINK)
-#define SYSFS_ACTIVE_REF                (SYSFS_KOBJ_ATTR | SYSFS_KOBJ_BIN_ATTR)
-/* identify any namespace tag on sysfs_dirents */
-#define SYSFS_NS_TYPE_MASK              0xf00
-#define SYSFS_NS_TYPE_SHIFT             8
-#define SYSFS_FLAG_MASK                 ~(SYSFS_NS_TYPE_MASK|SYSFS_TYPE_MASK)
-#define SYSFS_FLAG_REMOVED              0x02000
-static inline unsigned int sysfs_type(struct sysfs_dirent *sd)
-{
-        return sd->s_flags & SYSFS_TYPE_MASK;
-}
-/*
- * Return any namespace tags on this dirent.
- * enum kobj_ns_type is defined in linux/kobject.h
- */
-static inline enum kobj_ns_type sysfs_ns_type(struct sysfs_dirent *sd)
-{
-        return (sd->s_flags & SYSFS_NS_TYPE_MASK) >> SYSFS_NS_TYPE_SHIFT;
-}
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-#define sysfs_dirent_init_lockdep(sd)                           \
-do {                                                            \
-        struct attribute *attr = sd->s_attr.attr;               \
-        struct lock_class_key *key = attr->key;                 \
-        if (!key)                                               \
-                key = &attr->skey;                              \
-                                                                \
-        lockdep_init_map(&sd->dep_map, "s_active", key, 0);     \
-} while (0)
-/* Test for attributes that want to ignore lockdep for read-locking */
-static inline bool sysfs_ignore_lockdep(struct sysfs_dirent *sd)
-{
-        int type = sysfs_type(sd);
-        return (type == SYSFS_KOBJ_ATTR || type == SYSFS_KOBJ_BIN_ATTR) &&
-                sd->s_attr.attr->ignore_lockdep;
-}
-#else
-#define sysfs_dirent_init_lockdep(sd) do {} while (0)
-static inline bool sysfs_ignore_lockdep(struct sysfs_dirent *sd)
-{
-        return true;
-}
-#endif
-/*
- * Context structure to be used while adding/removing nodes.
- */
-struct sysfs_addrm_cxt {
-        struct sysfs_dirent     *removed;
-};
 /*
 * mount.c
 */
+extern struct sysfs_dirent *sysfs_root_sd;
-/*
- * Each sb is associated with a set of namespace tags (i.e.
- * the network namespace of the task which mounted this sysfs
- * instance).
- */
-struct sysfs_super_info {
-        void *ns[KOBJ_NS_TYPES];
-};
-#define sysfs_info(SB) ((struct sysfs_super_info *)(SB->s_fs_info))
-extern struct sysfs_dirent sysfs_root;
-extern struct kmem_cache *sysfs_dir_cachep;
 /*
 * dir.c
 */
-extern struct mutex sysfs_mutex;
 extern spinlock_t sysfs_symlink_target_lock;
-extern const struct dentry_operations sysfs_dentry_ops;
-extern const struct file_operations sysfs_dir_operations;
-extern const struct inode_operations sysfs_dir_inode_operations;
-struct sysfs_dirent *sysfs_get_active(struct sysfs_dirent *sd);
-void sysfs_put_active(struct sysfs_dirent *sd);
-void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt);
 void sysfs_warn_dup(struct sysfs_dirent *parent, const char *name);
-int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd,
-                    struct sysfs_dirent *parent_sd);
-int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd,
-                  struct sysfs_dirent *parent_sd);
-void sysfs_remove(struct sysfs_dirent *sd);
-int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name,
-                          const void *ns);
-void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt);
-struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
-                                       const unsigned char *name,
-                                       const void *ns);
-struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type);
-void release_sysfs_dirent(struct sysfs_dirent *sd);
-int sysfs_create_subdir(struct kobject *kobj, const char *name,
-                        struct sysfs_dirent **p_sd);
-int sysfs_rename(struct sysfs_dirent *sd, struct sysfs_dirent *new_parent_sd,
-                 const char *new_name, const void *new_ns);
-static inline struct sysfs_dirent *__sysfs_get(struct sysfs_dirent *sd)
-{
-        if (sd) {
-                WARN_ON(!atomic_read(&sd->s_count));
-                atomic_inc(&sd->s_count);
-        }
-        return sd;
-}
-#define sysfs_get(sd) __sysfs_get(sd)
-static inline void __sysfs_put(struct sysfs_dirent *sd)
-{
-        if (sd && atomic_dec_and_test(&sd->s_count))
-                release_sysfs_dirent(sd);
-}
-#define sysfs_put(sd) __sysfs_put(sd)
-/*
- * inode.c
- */
-struct inode *sysfs_get_inode(struct super_block *sb, struct sysfs_dirent *sd);
-void sysfs_evict_inode(struct inode *inode);
-int sysfs_sd_setattr(struct sysfs_dirent *sd, struct iattr *iattr);
-int sysfs_permission(struct inode *inode, int mask);
-int sysfs_setattr(struct dentry *dentry, struct iattr *iattr);
-int sysfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
-                  struct kstat *stat);
-int sysfs_setxattr(struct dentry *dentry, const char *name, const void *value,
-                   size_t size, int flags);
-int sysfs_inode_init(void);
 /*
 * file.c
 */
-extern const struct file_operations sysfs_file_operations;
-extern const struct file_operations sysfs_bin_operations;
 int sysfs_add_file(struct sysfs_dirent *dir_sd,
-                   const struct attribute *attr, int type);
+                   const struct attribute *attr, bool is_bin);
 int sysfs_add_file_mode_ns(struct sysfs_dirent *dir_sd,
-                           const struct attribute *attr, int type,
+                           const struct attribute *attr, bool is_bin,
                           umode_t amode, const void *ns);
-void sysfs_unmap_bin_file(struct sysfs_dirent *sd);
 /*
 * symlink.c
 */
-extern const struct inode_operations sysfs_symlink_inode_operations;
 int sysfs_create_link_sd(struct sysfs_dirent *sd, struct kobject *target,
                         const char *name);
+#endif  /* __SYSFS_INTERNAL_H */
diff --git a/include/linux/firmware.h b/include/linux/firmware.h
index e154c1005cd1..59529330efd6 100644
--- a/include/linux/firmware.h
+++ b/include/linux/firmware.h
@@ -68,4 +68,11 @@ static inline void release_firmware(const struct firmware *fw)
 #endif
+#ifdef CONFIG_FW_LOADER_USER_HELPER
+int request_firmware_direct(const struct firmware **fw, const char *name,
+                            struct device *device);
+#else
+#define request_firmware_direct request_firmware
+#endif
 #endif
diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
new file mode 100644
index 000000000000..d65541308419
--- /dev/null
+++ b/include/linux/kernfs.h
@@ -0,0 +1,356 @@
+/*
+ * kernfs.h - pseudo filesystem decoupled from vfs locking
+ *
+ * This file is released under the GPLv2.
+ */
+#ifndef __LINUX_KERNFS_H
+#define __LINUX_KERNFS_H
+#include <linux/kernel.h>
+#include <linux/err.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/idr.h>
+#include <linux/lockdep.h>
+#include <linux/rbtree.h>
+#include <linux/atomic.h>
+#include <linux/completion.h>
+struct file;
+struct iattr;
+struct seq_file;
+struct vm_area_struct;
+struct super_block;
+struct file_system_type;
+struct sysfs_open_dirent;
+struct sysfs_inode_attrs;
+enum kernfs_node_type {
+        SYSFS_DIR               = 0x0001,
+        SYSFS_KOBJ_ATTR         = 0x0002,
+        SYSFS_KOBJ_LINK         = 0x0004,
+};
+#define SYSFS_TYPE_MASK         0x000f
+#define SYSFS_COPY_NAME         (SYSFS_DIR | SYSFS_KOBJ_LINK)
+#define SYSFS_ACTIVE_REF        SYSFS_KOBJ_ATTR
+#define SYSFS_FLAG_MASK         ~SYSFS_TYPE_MASK
+enum kernfs_node_flag {
+        SYSFS_FLAG_REMOVED      = 0x0010,
+        SYSFS_FLAG_NS           = 0x0020,
+        SYSFS_FLAG_HAS_SEQ_SHOW = 0x0040,
+        SYSFS_FLAG_HAS_MMAP     = 0x0080,
+        SYSFS_FLAG_LOCKDEP      = 0x0100,
+};
+/* type-specific structures for sysfs_dirent->s_* union members */
+struct sysfs_elem_dir {
+        unsigned long           subdirs;
+        /* children rbtree starts here and goes through sd->s_rb */
+        struct rb_root          children;
+        /*
+         * The kernfs hierarchy this directory belongs to.  This fits
+         * better directly in sysfs_dirent but is here to save space.
+         */
+        struct kernfs_root      *root;
+};
+struct sysfs_elem_symlink {
+        struct sysfs_dirent     *target_sd;
+};
+struct sysfs_elem_attr {
+        const struct kernfs_ops *ops;
+        struct sysfs_open_dirent *open;
+        loff_t                  size;
+};
+/*
+ * sysfs_dirent - the building block of sysfs hierarchy.  Each and every
+ * sysfs node is represented by single sysfs_dirent.  Most fields are
+ * private to kernfs and shouldn't be accessed directly by kernfs users.
+ *
+ * As long as s_count reference is held, the sysfs_dirent itself is
+ * accessible.  Dereferencing s_elem or any other outer entity
+ * requires s_active reference.
+ */
+struct sysfs_dirent {
+        atomic_t                s_count;
+        atomic_t                s_active;
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+        struct lockdep_map      dep_map;
+#endif
+        /* the following two fields are published */
+        struct sysfs_dirent     *s_parent;
+        const char              *s_name;
+        struct rb_node          s_rb;
+        union {
+                struct completion       *completion;
+                struct sysfs_dirent     *removed_list;
+        } u;
+        const void              *s_ns; /* namespace tag */
+        unsigned int            s_hash; /* ns + name hash */
+        union {
+                struct sysfs_elem_dir           s_dir;
+                struct sysfs_elem_symlink       s_symlink;
+                struct sysfs_elem_attr          s_attr;
+        };
+        void                    *priv;
+        unsigned short          s_flags;
+        umode_t                 s_mode;
+        unsigned int            s_ino;
+        struct sysfs_inode_attrs *s_iattr;
+};
+struct kernfs_root {
+        /* published fields */
+        struct sysfs_dirent     *sd;
+        /* private fields, do not use outside kernfs proper */
+        struct ida              ino_ida;
+};
+struct sysfs_open_file {
+        /* published fields */
+        struct sysfs_dirent     *sd;
+        struct file             *file;
+        /* private fields, do not use outside kernfs proper */
+        struct mutex            mutex;
+        int                     event;
+        struct list_head        list;
+        bool                    mmapped;
+        const struct vm_operations_struct *vm_ops;
+};
+struct kernfs_ops {
+        /*
+         * Read is handled by either seq_file or raw_read().
+         *
+         * If seq_show() is present, seq_file path is active.  Other seq
+         * operations are optional and if not implemented, the behavior is
+         * equivalent to single_open().  @sf->private points to the
+         * associated sysfs_open_file.
+         *
+         * read() is bounced through kernel buffer and a read larger than
+         * PAGE_SIZE results in partial operation of PAGE_SIZE.
+         */
+        int (*seq_show)(struct seq_file *sf, void *v);
+        void *(*seq_start)(struct seq_file *sf, loff_t *ppos);
+        void *(*seq_next)(struct seq_file *sf, void *v, loff_t *ppos);
+        void (*seq_stop)(struct seq_file *sf, void *v);
+        ssize_t (*read)(struct sysfs_open_file *of, char *buf, size_t bytes,
+                        loff_t off);
+        /*
+         * write() is bounced through kernel buffer and a write larger than
+         * PAGE_SIZE results in partial operation of PAGE_SIZE.
+         */
+        ssize_t (*write)(struct sysfs_open_file *of, char *buf, size_t bytes,
+                         loff_t off);
+        int (*mmap)(struct sysfs_open_file *of, struct vm_area_struct *vma);
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+        struct lock_class_key   lockdep_key;
+#endif
+};
+#ifdef CONFIG_SYSFS
+static inline enum kernfs_node_type sysfs_type(struct sysfs_dirent *sd)
+{
+        return sd->s_flags & SYSFS_TYPE_MASK;
+}
+/**
+ * kernfs_enable_ns - enable namespace under a directory
+ * @sd: directory of interest, should be empty
+ *
+ * This is to be called right after @sd is created to enable namespace
+ * under it.  All children of @sd must have non-NULL namespace tags and
+ * only the ones which match the super_block's tag will be visible.
+ */
+static inline void kernfs_enable_ns(struct sysfs_dirent *sd)
+{
+        WARN_ON_ONCE(sysfs_type(sd) != SYSFS_DIR);
+        WARN_ON_ONCE(!RB_EMPTY_ROOT(&sd->s_dir.children));
+        sd->s_flags |= SYSFS_FLAG_NS;
+}
+/**
+ * kernfs_ns_enabled - test whether namespace is enabled
+ * @sd: the node to test
+ *
+ * Test whether namespace filtering is enabled for the children of @ns.
+ */
+static inline bool kernfs_ns_enabled(struct sysfs_dirent *sd)
+{
+        return sd->s_flags & SYSFS_FLAG_NS;
+}
+struct sysfs_dirent *kernfs_find_and_get_ns(struct sysfs_dirent *parent,
+                                            const char *name, const void *ns);
+void kernfs_get(struct sysfs_dirent *sd);
+void kernfs_put(struct sysfs_dirent *sd);
+struct kernfs_root *kernfs_create_root(void *priv);
+void kernfs_destroy_root(struct kernfs_root *root);
+struct sysfs_dirent *kernfs_create_dir_ns(struct sysfs_dirent *parent,
+                                          const char *name, void *priv,
+                                          const void *ns);
+struct sysfs_dirent *kernfs_create_file_ns_key(struct sysfs_dirent *parent,
+                                               const char *name,
+                                               umode_t mode, loff_t size,
+                                               const struct kernfs_ops *ops,
+                                               void *priv, const void *ns,
+                                               struct lock_class_key *key);
+struct sysfs_dirent *kernfs_create_link(struct sysfs_dirent *parent,
+                                        const char *name,
+                                        struct sysfs_dirent *target);
+void kernfs_remove(struct sysfs_dirent *sd);
+int kernfs_remove_by_name_ns(struct sysfs_dirent *parent, const char *name,
+                             const void *ns);
+int kernfs_rename_ns(struct sysfs_dirent *sd, struct sysfs_dirent *new_parent,
+                     const char *new_name, const void *new_ns);
+int kernfs_setattr(struct sysfs_dirent *sd, const struct iattr *iattr);
+void kernfs_notify(struct sysfs_dirent *sd);
+const void *kernfs_super_ns(struct super_block *sb);
+struct dentry *kernfs_mount_ns(struct file_system_type *fs_type, int flags,
+                               struct kernfs_root *root, const void *ns);
+void kernfs_kill_sb(struct super_block *sb);
+void kernfs_init(void);
+#else   /* CONFIG_SYSFS */
+static inline enum kernfs_node_type sysfs_type(struct sysfs_dirent *sd)
+{ return 0; }   /* whatever */
+static inline void kernfs_enable_ns(struct sysfs_dirent *sd) { }
+static inline bool kernfs_ns_enabled(struct sysfs_dirent *sd)
+{ return false; }
+static inline struct sysfs_dirent *
+kernfs_find_and_get_ns(struct sysfs_dirent *parent, const char *name,
+                       const void *ns)
+{ return NULL; }
+static inline void kernfs_get(struct sysfs_dirent *sd) { }
+static inline void kernfs_put(struct sysfs_dirent *sd) { }
+static inline struct kernfs_root *kernfs_create_root(void *priv)
+{ return ERR_PTR(-ENOSYS); }
+static inline void kernfs_destroy_root(struct kernfs_root *root) { }
+static inline struct sysfs_dirent *
+kernfs_create_dir_ns(struct sysfs_dirent *parent, const char *name, void *priv,
+                     const void *ns)
+{ return ERR_PTR(-ENOSYS); }
+static inline struct sysfs_dirent *
+kernfs_create_file_ns_key(struct sysfs_dirent *parent, const char *name,
+                          umode_t mode, loff_t size,
+                          const struct kernfs_ops *ops, void *priv,
+                          const void *ns, struct lock_class_key *key)
+{ return ERR_PTR(-ENOSYS); }
+static inline struct sysfs_dirent *
+kernfs_create_link(struct sysfs_dirent *parent, const char *name,
+                   struct sysfs_dirent *target)
+{ return ERR_PTR(-ENOSYS); }
+static inline void kernfs_remove(struct sysfs_dirent *sd) { }
+static inline int kernfs_remove_by_name_ns(struct sysfs_dirent *parent,
+                                           const char *name, const void *ns)
+{ return -ENOSYS; }
+static inline int kernfs_rename_ns(struct sysfs_dirent *sd,
+                                   struct sysfs_dirent *new_parent,
+                                   const char *new_name, const void *new_ns)
+{ return -ENOSYS; }
+static inline int kernfs_setattr(struct sysfs_dirent *sd,
+                                 const struct iattr *iattr)
+{ return -ENOSYS; }
+static inline void kernfs_notify(struct sysfs_dirent *sd) { }
+static inline const void *kernfs_super_ns(struct super_block *sb)
+{ return NULL; }
+static inline struct dentry *
+kernfs_mount_ns(struct file_system_type *fs_type, int flags,
+                struct kernfs_root *root, const void *ns)
+{ return ERR_PTR(-ENOSYS); }
+static inline void kernfs_kill_sb(struct super_block *sb) { }
+static inline void kernfs_init(void) { }
+#endif  /* CONFIG_SYSFS */
+static inline struct sysfs_dirent *
+kernfs_find_and_get(struct sysfs_dirent *sd, const char *name)
+{
+        return kernfs_find_and_get_ns(sd, name, NULL);
+}
+static inline struct sysfs_dirent *
+kernfs_create_dir(struct sysfs_dirent *parent, const char *name, void *priv)
+{
+        return kernfs_create_dir_ns(parent, name, priv, NULL);
+}
+static inline struct sysfs_dirent *
+kernfs_create_file_ns(struct sysfs_dirent *parent, const char *name,
+                      umode_t mode, loff_t size, const struct kernfs_ops *ops,
+                      void *priv, const void *ns)
+{
+        struct lock_class_key *key = NULL;
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+        key = (struct lock_class_key *)&ops->lockdep_key;
+#endif
+        return kernfs_create_file_ns_key(parent, name, mode, size, ops, priv,
+                                         ns, key);
+}
+static inline struct sysfs_dirent *
+kernfs_create_file(struct sysfs_dirent *parent, const char *name, umode_t mode,
+                   loff_t size, const struct kernfs_ops *ops, void *priv)
+{
+        return kernfs_create_file_ns(parent, name, mode, size, ops, priv, NULL);
+}
+static inline int kernfs_remove_by_name(struct sysfs_dirent *parent,
+                                        const char *name)
+{
+        return kernfs_remove_by_name_ns(parent, name, NULL);
+}
+static inline struct dentry *
+kernfs_mount(struct file_system_type *fs_type, int flags,
+             struct kernfs_root *root)
+{
+        return kernfs_mount_ns(fs_type, flags, root, NULL);
+}
+#endif  /* __LINUX_KERNFS_H */
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index 6695040a0317..cd8f90bf51a7 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -12,6 +12,7 @@
 #ifndef _SYSFS_H_
 #define _SYSFS_H_
+#include <linux/kernfs.h>
 #include <linux/compiler.h>
 #include <linux/errno.h>
 #include <linux/list.h>
@@ -175,8 +176,6 @@ struct sysfs_ops {
        ssize_t (*store)(struct kobject *, struct attribute *, const char *, size_t);
 };
-struct sysfs_dirent;
 #ifdef CONFIG_SYSFS
 int sysfs_schedule_callback(struct kobject *kobj, void (*func)(void *),
@@ -244,12 +243,6 @@ void sysfs_remove_link_from_group(struct kobject *kobj, const char *group_name,
                                  const char *link_name);
 void sysfs_notify(struct kobject *kobj, const char *dir, const char *attr);
-void sysfs_notify_dirent(struct sysfs_dirent *sd);
-struct sysfs_dirent *sysfs_get_dirent_ns(struct sysfs_dirent *parent_sd,
-                                         const unsigned char *name,
-                                         const void *ns);
-struct sysfs_dirent *sysfs_get(struct sysfs_dirent *sd);
-void sysfs_put(struct sysfs_dirent *sd);
 int __must_check sysfs_init(void);
@@ -419,22 +412,6 @@ static inline void sysfs_notify(struct kobject *kobj, const char *dir,
                                const char *attr)
 {
 }
-static inline void sysfs_notify_dirent(struct sysfs_dirent *sd)
-{
-}
-static inline struct sysfs_dirent *
-sysfs_get_dirent_ns(struct sysfs_dirent *parent_sd, const unsigned char *name,
-                    const void *ns)
-{
-        return NULL;
-}
-static inline struct sysfs_dirent *sysfs_get(struct sysfs_dirent *sd)
-{
-        return NULL;
-}
-static inline void sysfs_put(struct sysfs_dirent *sd)
-{
-}
 static inline int __must_check sysfs_init(void)
 {
@@ -461,10 +438,26 @@ static inline int sysfs_rename_link(struct kobject *kobj, struct kobject *target
        return sysfs_rename_link_ns(kobj, target, old_name, new_name, NULL);
 }
+static inline void sysfs_notify_dirent(struct sysfs_dirent *sd)
+{
+        kernfs_notify(sd);
+}
 static inline struct sysfs_dirent *
 sysfs_get_dirent(struct sysfs_dirent *parent_sd, const unsigned char *name)
 {
-        return sysfs_get_dirent_ns(parent_sd, name, NULL);
+        return kernfs_find_and_get(parent_sd, name);
+}
+static inline struct sysfs_dirent *sysfs_get(struct sysfs_dirent *sd)
+{
+        kernfs_get(sd);
+        return sd;
+}
+static inline void sysfs_put(struct sysfs_dirent *sd)
+{
+        kernfs_put(sd);
 }
 #endif /* _SYSFS_H_ */
diff --git a/lib/kobject.c b/lib/kobject.c
index 5b4b8886435e..94b321f4ac67 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -18,6 +18,7 @@
 #include <linux/export.h>
 #include <linux/stat.h>
 #include <linux/slab.h>
+#include <linux/random.h>
 /**
 * kobject_namespace - return @kobj's namespace tag
@@ -65,13 +66,17 @@ static int populate_dir(struct kobject *kobj)
 static int create_dir(struct kobject *kobj)
 {
+        const struct kobj_ns_type_operations *ops;
        int error;
        error = sysfs_create_dir_ns(kobj, kobject_namespace(kobj));
-        if (!error) {
+        if (error)
-                error = populate_dir(kobj);
+                return error;
-                if (error)
-                        sysfs_remove_dir(kobj);
+        error = populate_dir(kobj);
+        if (error) {
+                sysfs_remove_dir(kobj);
+                return error;
        }
        /*
@@ -80,7 +85,20 @@ static int create_dir(struct kobject *kobj)
         */
        sysfs_get(kobj->sd);
-        return error;
+        /*
+         * If @kobj has ns_ops, its children need to be filtered based on
+         * their namespace tags.  Enable namespace support on @kobj->sd.
+         */
+        ops = kobj_child_ns_ops(kobj);
+        if (ops) {
+                BUG_ON(ops->type <= KOBJ_NS_TYPE_NONE);
+                BUG_ON(ops->type >= KOBJ_NS_TYPES);
+                BUG_ON(!kobj_ns_type_registered(ops->type));
+                kernfs_enable_ns(kobj->sd);
+        }
+        return 0;
 }
 static int get_kobj_path_length(struct kobject *kobj)
@@ -247,8 +265,10 @@ int kobject_set_name_vargs(struct kobject *kobj, const char *fmt,
                return 0;
        kobj->name = kvasprintf(GFP_KERNEL, fmt, vargs);
-        if (!kobj->name)
+        if (!kobj->name) {
+                kobj->name = old_name;
                return -ENOMEM;
+        }
        /* ewww... some of these buggers have '/' in the name ... */
        while ((s = strchr(kobj->name, '/')))
@@ -625,10 +645,12 @@ static void kobject_release(struct kref *kref)
 {
        struct kobject *kobj = container_of(kref, struct kobject, kref);
 #ifdef CONFIG_DEBUG_KOBJECT_RELEASE
-        pr_info("kobject: '%s' (%p): %s, parent %p (delayed)\n",
+        unsigned long delay = HZ + HZ * (get_random_int() & 0x3);
-                 kobject_name(kobj), kobj, __func__, kobj->parent);
+        pr_info("kobject: '%s' (%p): %s, parent %p (delayed %ld)\n",
+                 kobject_name(kobj), kobj, __func__, kobj->parent, delay);
        INIT_DELAYED_WORK(&kobj->release, kobject_delayed_cleanup);
-        schedule_delayed_work(&kobj->release, HZ);
+        schedule_delayed_work(&kobj->release, delay);
 #else
        kobject_cleanup(kobj);
 #endif
@@ -835,6 +857,7 @@ void kset_unregister(struct kset *k)
 {
        if (!k)
                return;
+        kobject_del(&k->kobj);
        kobject_put(&k->kobj);
 }
diff --git a/samples/kobject/kset-example.c b/samples/kobject/kset-example.c
index d0c687fd9802..5dce351f131f 100644
--- a/samples/kobject/kset-example.c
+++ b/samples/kobject/kset-example.c
@@ -262,6 +262,7 @@ baz_error:
 bar_error:
        destroy_foo_obj(foo_obj);
 foo_error:
+        kset_unregister(example_kset);
        return -EINVAL;
 }