aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/kobject.txt5
-rw-r--r--arch/x86/kernel/microcode_amd.c2
-rw-r--r--arch/x86/kernel/microcode_intel.c2
-rw-r--r--drivers/base/core.c3
-rw-r--r--drivers/base/firmware_class.c86
-rw-r--r--drivers/firmware/dmi-sysfs.c3
-rw-r--r--fs/Makefile2
-rw-r--r--fs/kernfs/Makefile5
-rw-r--r--fs/kernfs/dir.c1020
-rw-r--r--fs/kernfs/file.c813
-rw-r--r--fs/kernfs/inode.c (renamed from fs/sysfs/inode.c)179
-rw-r--r--fs/kernfs/kernfs-internal.h122
-rw-r--r--fs/kernfs/mount.c165
-rw-r--r--fs/kernfs/symlink.c152
-rw-r--r--fs/namespace.c2
-rw-r--r--fs/sysfs/Makefile2
-rw-r--r--fs/sysfs/dir.c1030
-rw-r--r--fs/sysfs/file.c929
-rw-r--r--fs/sysfs/group.c59
-rw-r--r--fs/sysfs/mount.c182
-rw-r--r--fs/sysfs/symlink.c179
-rw-r--r--fs/sysfs/sysfs.h228
-rw-r--r--include/linux/firmware.h7
-rw-r--r--include/linux/kernfs.h356
-rw-r--r--include/linux/sysfs.h43
-rw-r--r--lib/kobject.c41
-rw-r--r--samples/kobject/kset-example.c1
27 files changed, 3140 insertions, 2478 deletions
diff --git a/Documentation/kobject.txt b/Documentation/kobject.txt
index c5182bb2c16c..f87241dfed87 100644
--- a/Documentation/kobject.txt
+++ b/Documentation/kobject.txt
@@ -342,7 +342,10 @@ kset use:
342 342
343When you are finished with the kset, call: 343When you are finished with the kset, call:
344 void kset_unregister(struct kset *kset); 344 void kset_unregister(struct kset *kset);
345to destroy it. 345to destroy it. This removes the kset from sysfs and decrements its reference
346count. When the reference count goes to zero, the kset will be released.
347Because other references to the kset may still exist, the release may happen
348after kset_unregister() returns.
346 349
347An example of using a kset can be seen in the 350An example of using a kset can be seen in the
348samples/kobject/kset-example.c file in the kernel tree. 351samples/kobject/kset-example.c file in the kernel tree.
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index c3d4cc972eca..22b3a1191ab3 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -430,7 +430,7 @@ static enum ucode_state request_microcode_amd(int cpu, struct device *device,
430 if (c->x86 >= 0x15) 430 if (c->x86 >= 0x15)
431 snprintf(fw_name, sizeof(fw_name), "amd-ucode/microcode_amd_fam%.2xh.bin", c->x86); 431 snprintf(fw_name, sizeof(fw_name), "amd-ucode/microcode_amd_fam%.2xh.bin", c->x86);
432 432
433 if (request_firmware(&fw, (const char *)fw_name, device)) { 433 if (request_firmware_direct(&fw, (const char *)fw_name, device)) {
434 pr_debug("failed to load file %s\n", fw_name); 434 pr_debug("failed to load file %s\n", fw_name);
435 goto out; 435 goto out;
436 } 436 }
diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c
index 5fb2cebf556b..a276fa75d9b5 100644
--- a/arch/x86/kernel/microcode_intel.c
+++ b/arch/x86/kernel/microcode_intel.c
@@ -278,7 +278,7 @@ static enum ucode_state request_microcode_fw(int cpu, struct device *device,
278 sprintf(name, "intel-ucode/%02x-%02x-%02x", 278 sprintf(name, "intel-ucode/%02x-%02x-%02x",
279 c->x86, c->x86_model, c->x86_mask); 279 c->x86, c->x86_model, c->x86_mask);
280 280
281 if (request_firmware(&firmware, name, device)) { 281 if (request_firmware_direct(&firmware, name, device)) {
282 pr_debug("data file %s load failed\n", name); 282 pr_debug("data file %s load failed\n", name);
283 return UCODE_NFOUND; 283 return UCODE_NFOUND;
284 } 284 }
diff --git a/drivers/base/core.c b/drivers/base/core.c
index 67b180d855b2..aab43fbb8336 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -1603,6 +1603,7 @@ device_create_groups_vargs(struct class *class, struct device *parent,
1603 goto error; 1603 goto error;
1604 } 1604 }
1605 1605
1606 device_initialize(dev);
1606 dev->devt = devt; 1607 dev->devt = devt;
1607 dev->class = class; 1608 dev->class = class;
1608 dev->parent = parent; 1609 dev->parent = parent;
@@ -1614,7 +1615,7 @@ device_create_groups_vargs(struct class *class, struct device *parent,
1614 if (retval) 1615 if (retval)
1615 goto error; 1616 goto error;
1616 1617
1617 retval = device_register(dev); 1618 retval = device_add(dev);
1618 if (retval) 1619 if (retval)
1619 goto error; 1620 goto error;
1620 1621
diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c
index eb8fb94ae2c5..33b87bf664ab 100644
--- a/drivers/base/firmware_class.c
+++ b/drivers/base/firmware_class.c
@@ -96,6 +96,15 @@ static inline long firmware_loading_timeout(void)
96 return loading_timeout > 0 ? loading_timeout * HZ : MAX_SCHEDULE_TIMEOUT; 96 return loading_timeout > 0 ? loading_timeout * HZ : MAX_SCHEDULE_TIMEOUT;
97} 97}
98 98
99/* firmware behavior options */
100#define FW_OPT_UEVENT (1U << 0)
101#define FW_OPT_NOWAIT (1U << 1)
102#ifdef CONFIG_FW_LOADER_USER_HELPER
103#define FW_OPT_FALLBACK (1U << 2)
104#else
105#define FW_OPT_FALLBACK 0
106#endif
107
99struct firmware_cache { 108struct firmware_cache {
100 /* firmware_buf instance will be added into the below list */ 109 /* firmware_buf instance will be added into the below list */
101 spinlock_t lock; 110 spinlock_t lock;
@@ -820,7 +829,7 @@ static void firmware_class_timeout_work(struct work_struct *work)
820 829
821static struct firmware_priv * 830static struct firmware_priv *
822fw_create_instance(struct firmware *firmware, const char *fw_name, 831fw_create_instance(struct firmware *firmware, const char *fw_name,
823 struct device *device, bool uevent, bool nowait) 832 struct device *device, unsigned int opt_flags)
824{ 833{
825 struct firmware_priv *fw_priv; 834 struct firmware_priv *fw_priv;
826 struct device *f_dev; 835 struct device *f_dev;
@@ -832,7 +841,7 @@ fw_create_instance(struct firmware *firmware, const char *fw_name,
832 goto exit; 841 goto exit;
833 } 842 }
834 843
835 fw_priv->nowait = nowait; 844 fw_priv->nowait = !!(opt_flags & FW_OPT_NOWAIT);
836 fw_priv->fw = firmware; 845 fw_priv->fw = firmware;
837 INIT_DELAYED_WORK(&fw_priv->timeout_work, 846 INIT_DELAYED_WORK(&fw_priv->timeout_work,
838 firmware_class_timeout_work); 847 firmware_class_timeout_work);
@@ -848,8 +857,8 @@ exit:
848} 857}
849 858
850/* load a firmware via user helper */ 859/* load a firmware via user helper */
851static int _request_firmware_load(struct firmware_priv *fw_priv, bool uevent, 860static int _request_firmware_load(struct firmware_priv *fw_priv,
852 long timeout) 861 unsigned int opt_flags, long timeout)
853{ 862{
854 int retval = 0; 863 int retval = 0;
855 struct device *f_dev = &fw_priv->dev; 864 struct device *f_dev = &fw_priv->dev;
@@ -885,7 +894,7 @@ static int _request_firmware_load(struct firmware_priv *fw_priv, bool uevent,
885 goto err_del_bin_attr; 894 goto err_del_bin_attr;
886 } 895 }
887 896
888 if (uevent) { 897 if (opt_flags & FW_OPT_UEVENT) {
889 buf->need_uevent = true; 898 buf->need_uevent = true;
890 dev_set_uevent_suppress(f_dev, false); 899 dev_set_uevent_suppress(f_dev, false);
891 dev_dbg(f_dev, "firmware: requesting %s\n", buf->fw_id); 900 dev_dbg(f_dev, "firmware: requesting %s\n", buf->fw_id);
@@ -911,16 +920,16 @@ err_put_dev:
911 920
912static int fw_load_from_user_helper(struct firmware *firmware, 921static int fw_load_from_user_helper(struct firmware *firmware,
913 const char *name, struct device *device, 922 const char *name, struct device *device,
914 bool uevent, bool nowait, long timeout) 923 unsigned int opt_flags, long timeout)
915{ 924{
916 struct firmware_priv *fw_priv; 925 struct firmware_priv *fw_priv;
917 926
918 fw_priv = fw_create_instance(firmware, name, device, uevent, nowait); 927 fw_priv = fw_create_instance(firmware, name, device, opt_flags);
919 if (IS_ERR(fw_priv)) 928 if (IS_ERR(fw_priv))
920 return PTR_ERR(fw_priv); 929 return PTR_ERR(fw_priv);
921 930
922 fw_priv->buf = firmware->priv; 931 fw_priv->buf = firmware->priv;
923 return _request_firmware_load(fw_priv, uevent, timeout); 932 return _request_firmware_load(fw_priv, opt_flags, timeout);
924} 933}
925 934
926#ifdef CONFIG_PM_SLEEP 935#ifdef CONFIG_PM_SLEEP
@@ -942,7 +951,7 @@ static void kill_requests_without_uevent(void)
942#else /* CONFIG_FW_LOADER_USER_HELPER */ 951#else /* CONFIG_FW_LOADER_USER_HELPER */
943static inline int 952static inline int
944fw_load_from_user_helper(struct firmware *firmware, const char *name, 953fw_load_from_user_helper(struct firmware *firmware, const char *name,
945 struct device *device, bool uevent, bool nowait, 954 struct device *device, unsigned int opt_flags,
946 long timeout) 955 long timeout)
947{ 956{
948 return -ENOENT; 957 return -ENOENT;
@@ -1023,7 +1032,7 @@ _request_firmware_prepare(struct firmware **firmware_p, const char *name,
1023} 1032}
1024 1033
1025static int assign_firmware_buf(struct firmware *fw, struct device *device, 1034static int assign_firmware_buf(struct firmware *fw, struct device *device,
1026 bool skip_cache) 1035 unsigned int opt_flags)
1027{ 1036{
1028 struct firmware_buf *buf = fw->priv; 1037 struct firmware_buf *buf = fw->priv;
1029 1038
@@ -1040,7 +1049,8 @@ static int assign_firmware_buf(struct firmware *fw, struct device *device,
1040 * device may has been deleted already, but the problem 1049 * device may has been deleted already, but the problem
1041 * should be fixed in devres or driver core. 1050 * should be fixed in devres or driver core.
1042 */ 1051 */
1043 if (device && !skip_cache) 1052 /* don't cache firmware handled without uevent */
1053 if (device && (opt_flags & FW_OPT_UEVENT))
1044 fw_add_devm_name(device, buf->fw_id); 1054 fw_add_devm_name(device, buf->fw_id);
1045 1055
1046 /* 1056 /*
@@ -1061,7 +1071,7 @@ static int assign_firmware_buf(struct firmware *fw, struct device *device,
1061/* called from request_firmware() and request_firmware_work_func() */ 1071/* called from request_firmware() and request_firmware_work_func() */
1062static int 1072static int
1063_request_firmware(const struct firmware **firmware_p, const char *name, 1073_request_firmware(const struct firmware **firmware_p, const char *name,
1064 struct device *device, bool uevent, bool nowait) 1074 struct device *device, unsigned int opt_flags)
1065{ 1075{
1066 struct firmware *fw; 1076 struct firmware *fw;
1067 long timeout; 1077 long timeout;
@@ -1076,7 +1086,7 @@ _request_firmware(const struct firmware **firmware_p, const char *name,
1076 1086
1077 ret = 0; 1087 ret = 0;
1078 timeout = firmware_loading_timeout(); 1088 timeout = firmware_loading_timeout();
1079 if (nowait) { 1089 if (opt_flags & FW_OPT_NOWAIT) {
1080 timeout = usermodehelper_read_lock_wait(timeout); 1090 timeout = usermodehelper_read_lock_wait(timeout);
1081 if (!timeout) { 1091 if (!timeout) {
1082 dev_dbg(device, "firmware: %s loading timed out\n", 1092 dev_dbg(device, "firmware: %s loading timed out\n",
@@ -1095,16 +1105,18 @@ _request_firmware(const struct firmware **firmware_p, const char *name,
1095 1105
1096 ret = fw_get_filesystem_firmware(device, fw->priv); 1106 ret = fw_get_filesystem_firmware(device, fw->priv);
1097 if (ret) { 1107 if (ret) {
1098 dev_warn(device, "Direct firmware load failed with error %d\n", 1108 if (opt_flags & FW_OPT_FALLBACK) {
1099 ret); 1109 dev_warn(device,
1100 dev_warn(device, "Falling back to user helper\n"); 1110 "Direct firmware load failed with error %d\n",
1101 ret = fw_load_from_user_helper(fw, name, device, 1111 ret);
1102 uevent, nowait, timeout); 1112 dev_warn(device, "Falling back to user helper\n");
1113 ret = fw_load_from_user_helper(fw, name, device,
1114 opt_flags, timeout);
1115 }
1103 } 1116 }
1104 1117
1105 /* don't cache firmware handled without uevent */
1106 if (!ret) 1118 if (!ret)
1107 ret = assign_firmware_buf(fw, device, !uevent); 1119 ret = assign_firmware_buf(fw, device, opt_flags);
1108 1120
1109 usermodehelper_read_unlock(); 1121 usermodehelper_read_unlock();
1110 1122
@@ -1146,12 +1158,37 @@ request_firmware(const struct firmware **firmware_p, const char *name,
1146 1158
1147 /* Need to pin this module until return */ 1159 /* Need to pin this module until return */
1148 __module_get(THIS_MODULE); 1160 __module_get(THIS_MODULE);
1149 ret = _request_firmware(firmware_p, name, device, true, false); 1161 ret = _request_firmware(firmware_p, name, device,
1162 FW_OPT_UEVENT | FW_OPT_FALLBACK);
1150 module_put(THIS_MODULE); 1163 module_put(THIS_MODULE);
1151 return ret; 1164 return ret;
1152} 1165}
1153EXPORT_SYMBOL(request_firmware); 1166EXPORT_SYMBOL(request_firmware);
1154 1167
1168#ifdef CONFIG_FW_LOADER_USER_HELPER
1169/**
1170 * request_firmware: - load firmware directly without usermode helper
1171 * @firmware_p: pointer to firmware image
1172 * @name: name of firmware file
1173 * @device: device for which firmware is being loaded
1174 *
1175 * This function works pretty much like request_firmware(), but this doesn't
1176 * fall back to usermode helper even if the firmware couldn't be loaded
1177 * directly from fs. Hence it's useful for loading optional firmwares, which
1178 * aren't always present, without extra long timeouts of udev.
1179 **/
1180int request_firmware_direct(const struct firmware **firmware_p,
1181 const char *name, struct device *device)
1182{
1183 int ret;
1184 __module_get(THIS_MODULE);
1185 ret = _request_firmware(firmware_p, name, device, FW_OPT_UEVENT);
1186 module_put(THIS_MODULE);
1187 return ret;
1188}
1189EXPORT_SYMBOL_GPL(request_firmware_direct);
1190#endif
1191
1155/** 1192/**
1156 * release_firmware: - release the resource associated with a firmware image 1193 * release_firmware: - release the resource associated with a firmware image
1157 * @fw: firmware resource to release 1194 * @fw: firmware resource to release
@@ -1174,7 +1211,7 @@ struct firmware_work {
1174 struct device *device; 1211 struct device *device;
1175 void *context; 1212 void *context;
1176 void (*cont)(const struct firmware *fw, void *context); 1213 void (*cont)(const struct firmware *fw, void *context);
1177 bool uevent; 1214 unsigned int opt_flags;
1178}; 1215};
1179 1216
1180static void request_firmware_work_func(struct work_struct *work) 1217static void request_firmware_work_func(struct work_struct *work)
@@ -1185,7 +1222,7 @@ static void request_firmware_work_func(struct work_struct *work)
1185 fw_work = container_of(work, struct firmware_work, work); 1222 fw_work = container_of(work, struct firmware_work, work);
1186 1223
1187 _request_firmware(&fw, fw_work->name, fw_work->device, 1224 _request_firmware(&fw, fw_work->name, fw_work->device,
1188 fw_work->uevent, true); 1225 fw_work->opt_flags);
1189 fw_work->cont(fw, fw_work->context); 1226 fw_work->cont(fw, fw_work->context);
1190 put_device(fw_work->device); /* taken in request_firmware_nowait() */ 1227 put_device(fw_work->device); /* taken in request_firmware_nowait() */
1191 1228
@@ -1233,7 +1270,8 @@ request_firmware_nowait(
1233 fw_work->device = device; 1270 fw_work->device = device;
1234 fw_work->context = context; 1271 fw_work->context = context;
1235 fw_work->cont = cont; 1272 fw_work->cont = cont;
1236 fw_work->uevent = uevent; 1273 fw_work->opt_flags = FW_OPT_NOWAIT | FW_OPT_FALLBACK |
1274 (uevent ? FW_OPT_UEVENT : 0);
1237 1275
1238 if (!try_module_get(module)) { 1276 if (!try_module_get(module)) {
1239 kfree(fw_work); 1277 kfree(fw_work);
diff --git a/drivers/firmware/dmi-sysfs.c b/drivers/firmware/dmi-sysfs.c
index eb26d62e5188..e0f1cb3d3598 100644
--- a/drivers/firmware/dmi-sysfs.c
+++ b/drivers/firmware/dmi-sysfs.c
@@ -553,7 +553,7 @@ static const struct bin_attribute dmi_entry_raw_attr = {
553static void dmi_sysfs_entry_release(struct kobject *kobj) 553static void dmi_sysfs_entry_release(struct kobject *kobj)
554{ 554{
555 struct dmi_sysfs_entry *entry = to_entry(kobj); 555 struct dmi_sysfs_entry *entry = to_entry(kobj);
556 sysfs_remove_bin_file(&entry->kobj, &dmi_entry_raw_attr); 556
557 spin_lock(&entry_list_lock); 557 spin_lock(&entry_list_lock);
558 list_del(&entry->list); 558 list_del(&entry->list);
559 spin_unlock(&entry_list_lock); 559 spin_unlock(&entry_list_lock);
@@ -685,6 +685,7 @@ static void __exit dmi_sysfs_exit(void)
685 pr_debug("dmi-sysfs: unloading.\n"); 685 pr_debug("dmi-sysfs: unloading.\n");
686 cleanup_entry_list(); 686 cleanup_entry_list();
687 kset_unregister(dmi_kset); 687 kset_unregister(dmi_kset);
688 kobject_del(dmi_kobj);
688 kobject_put(dmi_kobj); 689 kobject_put(dmi_kobj);
689} 690}
690 691
diff --git a/fs/Makefile b/fs/Makefile
index 4fe6df3ec28f..39a824f44e7c 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -53,7 +53,7 @@ obj-$(CONFIG_FHANDLE) += fhandle.o
53obj-y += quota/ 53obj-y += quota/
54 54
55obj-$(CONFIG_PROC_FS) += proc/ 55obj-$(CONFIG_PROC_FS) += proc/
56obj-$(CONFIG_SYSFS) += sysfs/ 56obj-$(CONFIG_SYSFS) += sysfs/ kernfs/
57obj-$(CONFIG_CONFIGFS_FS) += configfs/ 57obj-$(CONFIG_CONFIGFS_FS) += configfs/
58obj-y += devpts/ 58obj-y += devpts/
59 59
diff --git a/fs/kernfs/Makefile b/fs/kernfs/Makefile
new file mode 100644
index 000000000000..674337c76673
--- /dev/null
+++ b/fs/kernfs/Makefile
@@ -0,0 +1,5 @@
1#
2# Makefile for the kernfs pseudo filesystem
3#
4
5obj-y := mount.o inode.o dir.o file.o symlink.o
diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
new file mode 100644
index 000000000000..a441e3be8052
--- /dev/null
+++ b/fs/kernfs/dir.c
@@ -0,0 +1,1020 @@
1/*
2 * fs/kernfs/dir.c - kernfs directory implementation
3 *
4 * Copyright (c) 2001-3 Patrick Mochel
5 * Copyright (c) 2007 SUSE Linux Products GmbH
6 * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org>
7 *
8 * This file is released under the GPLv2.
9 */
10
11#include <linux/fs.h>
12#include <linux/namei.h>
13#include <linux/idr.h>
14#include <linux/slab.h>
15#include <linux/security.h>
16#include <linux/hash.h>
17
18#include "kernfs-internal.h"
19
20DEFINE_MUTEX(sysfs_mutex);
21
22#define to_sysfs_dirent(X) rb_entry((X), struct sysfs_dirent, s_rb)
23
24/**
25 * sysfs_name_hash
26 * @name: Null terminated string to hash
27 * @ns: Namespace tag to hash
28 *
29 * Returns 31 bit hash of ns + name (so it fits in an off_t )
30 */
31static unsigned int sysfs_name_hash(const char *name, const void *ns)
32{
33 unsigned long hash = init_name_hash();
34 unsigned int len = strlen(name);
35 while (len--)
36 hash = partial_name_hash(*name++, hash);
37 hash = (end_name_hash(hash) ^ hash_ptr((void *)ns, 31));
38 hash &= 0x7fffffffU;
39 /* Reserve hash numbers 0, 1 and INT_MAX for magic directory entries */
40 if (hash < 1)
41 hash += 2;
42 if (hash >= INT_MAX)
43 hash = INT_MAX - 1;
44 return hash;
45}
46
47static int sysfs_name_compare(unsigned int hash, const char *name,
48 const void *ns, const struct sysfs_dirent *sd)
49{
50 if (hash != sd->s_hash)
51 return hash - sd->s_hash;
52 if (ns != sd->s_ns)
53 return ns - sd->s_ns;
54 return strcmp(name, sd->s_name);
55}
56
57static int sysfs_sd_compare(const struct sysfs_dirent *left,
58 const struct sysfs_dirent *right)
59{
60 return sysfs_name_compare(left->s_hash, left->s_name, left->s_ns,
61 right);
62}
63
64/**
65 * sysfs_link_sibling - link sysfs_dirent into sibling rbtree
66 * @sd: sysfs_dirent of interest
67 *
68 * Link @sd into its sibling rbtree which starts from
69 * sd->s_parent->s_dir.children.
70 *
71 * Locking:
72 * mutex_lock(sysfs_mutex)
73 *
74 * RETURNS:
75 * 0 on susccess -EEXIST on failure.
76 */
77static int sysfs_link_sibling(struct sysfs_dirent *sd)
78{
79 struct rb_node **node = &sd->s_parent->s_dir.children.rb_node;
80 struct rb_node *parent = NULL;
81
82 if (sysfs_type(sd) == SYSFS_DIR)
83 sd->s_parent->s_dir.subdirs++;
84
85 while (*node) {
86 struct sysfs_dirent *pos;
87 int result;
88
89 pos = to_sysfs_dirent(*node);
90 parent = *node;
91 result = sysfs_sd_compare(sd, pos);
92 if (result < 0)
93 node = &pos->s_rb.rb_left;
94 else if (result > 0)
95 node = &pos->s_rb.rb_right;
96 else
97 return -EEXIST;
98 }
99 /* add new node and rebalance the tree */
100 rb_link_node(&sd->s_rb, parent, node);
101 rb_insert_color(&sd->s_rb, &sd->s_parent->s_dir.children);
102 return 0;
103}
104
105/**
106 * sysfs_unlink_sibling - unlink sysfs_dirent from sibling rbtree
107 * @sd: sysfs_dirent of interest
108 *
109 * Unlink @sd from its sibling rbtree which starts from
110 * sd->s_parent->s_dir.children.
111 *
112 * Locking:
113 * mutex_lock(sysfs_mutex)
114 */
115static void sysfs_unlink_sibling(struct sysfs_dirent *sd)
116{
117 if (sysfs_type(sd) == SYSFS_DIR)
118 sd->s_parent->s_dir.subdirs--;
119
120 rb_erase(&sd->s_rb, &sd->s_parent->s_dir.children);
121}
122
123/**
124 * sysfs_get_active - get an active reference to sysfs_dirent
125 * @sd: sysfs_dirent to get an active reference to
126 *
127 * Get an active reference of @sd. This function is noop if @sd
128 * is NULL.
129 *
130 * RETURNS:
131 * Pointer to @sd on success, NULL on failure.
132 */
133struct sysfs_dirent *sysfs_get_active(struct sysfs_dirent *sd)
134{
135 if (unlikely(!sd))
136 return NULL;
137
138 if (!atomic_inc_unless_negative(&sd->s_active))
139 return NULL;
140
141 if (sd->s_flags & SYSFS_FLAG_LOCKDEP)
142 rwsem_acquire_read(&sd->dep_map, 0, 1, _RET_IP_);
143 return sd;
144}
145
146/**
147 * sysfs_put_active - put an active reference to sysfs_dirent
148 * @sd: sysfs_dirent to put an active reference to
149 *
150 * Put an active reference to @sd. This function is noop if @sd
151 * is NULL.
152 */
153void sysfs_put_active(struct sysfs_dirent *sd)
154{
155 int v;
156
157 if (unlikely(!sd))
158 return;
159
160 if (sd->s_flags & SYSFS_FLAG_LOCKDEP)
161 rwsem_release(&sd->dep_map, 1, _RET_IP_);
162 v = atomic_dec_return(&sd->s_active);
163 if (likely(v != SD_DEACTIVATED_BIAS))
164 return;
165
166 /* atomic_dec_return() is a mb(), we'll always see the updated
167 * sd->u.completion.
168 */
169 complete(sd->u.completion);
170}
171
172/**
173 * sysfs_deactivate - deactivate sysfs_dirent
174 * @sd: sysfs_dirent to deactivate
175 *
176 * Deny new active references and drain existing ones.
177 */
178static void sysfs_deactivate(struct sysfs_dirent *sd)
179{
180 DECLARE_COMPLETION_ONSTACK(wait);
181 int v;
182
183 BUG_ON(!(sd->s_flags & SYSFS_FLAG_REMOVED));
184
185 if (!(sysfs_type(sd) & SYSFS_ACTIVE_REF))
186 return;
187
188 sd->u.completion = (void *)&wait;
189
190 rwsem_acquire(&sd->dep_map, 0, 0, _RET_IP_);
191 /* atomic_add_return() is a mb(), put_active() will always see
192 * the updated sd->u.completion.
193 */
194 v = atomic_add_return(SD_DEACTIVATED_BIAS, &sd->s_active);
195
196 if (v != SD_DEACTIVATED_BIAS) {
197 lock_contended(&sd->dep_map, _RET_IP_);
198 wait_for_completion(&wait);
199 }
200
201 lock_acquired(&sd->dep_map, _RET_IP_);
202 rwsem_release(&sd->dep_map, 1, _RET_IP_);
203}
204
205/**
206 * kernfs_get - get a reference count on a sysfs_dirent
207 * @sd: the target sysfs_dirent
208 */
209void kernfs_get(struct sysfs_dirent *sd)
210{
211 if (sd) {
212 WARN_ON(!atomic_read(&sd->s_count));
213 atomic_inc(&sd->s_count);
214 }
215}
216EXPORT_SYMBOL_GPL(kernfs_get);
217
218/**
219 * kernfs_put - put a reference count on a sysfs_dirent
220 * @sd: the target sysfs_dirent
221 *
222 * Put a reference count of @sd and destroy it if it reached zero.
223 */
224void kernfs_put(struct sysfs_dirent *sd)
225{
226 struct sysfs_dirent *parent_sd;
227 struct kernfs_root *root;
228
229 if (!sd || !atomic_dec_and_test(&sd->s_count))
230 return;
231 root = kernfs_root(sd);
232 repeat:
233 /* Moving/renaming is always done while holding reference.
234 * sd->s_parent won't change beneath us.
235 */
236 parent_sd = sd->s_parent;
237
238 WARN(!(sd->s_flags & SYSFS_FLAG_REMOVED),
239 "sysfs: free using entry: %s/%s\n",
240 parent_sd ? parent_sd->s_name : "", sd->s_name);
241
242 if (sysfs_type(sd) == SYSFS_KOBJ_LINK)
243 kernfs_put(sd->s_symlink.target_sd);
244 if (sysfs_type(sd) & SYSFS_COPY_NAME)
245 kfree(sd->s_name);
246 if (sd->s_iattr) {
247 if (sd->s_iattr->ia_secdata)
248 security_release_secctx(sd->s_iattr->ia_secdata,
249 sd->s_iattr->ia_secdata_len);
250 simple_xattrs_free(&sd->s_iattr->xattrs);
251 }
252 kfree(sd->s_iattr);
253 ida_simple_remove(&root->ino_ida, sd->s_ino);
254 kmem_cache_free(sysfs_dir_cachep, sd);
255
256 sd = parent_sd;
257 if (sd) {
258 if (atomic_dec_and_test(&sd->s_count))
259 goto repeat;
260 } else {
261 /* just released the root sd, free @root too */
262 ida_destroy(&root->ino_ida);
263 kfree(root);
264 }
265}
266EXPORT_SYMBOL_GPL(kernfs_put);
267
268static int sysfs_dentry_delete(const struct dentry *dentry)
269{
270 struct sysfs_dirent *sd = dentry->d_fsdata;
271 return !(sd && !(sd->s_flags & SYSFS_FLAG_REMOVED));
272}
273
274static int sysfs_dentry_revalidate(struct dentry *dentry, unsigned int flags)
275{
276 struct sysfs_dirent *sd;
277
278 if (flags & LOOKUP_RCU)
279 return -ECHILD;
280
281 sd = dentry->d_fsdata;
282 mutex_lock(&sysfs_mutex);
283
284 /* The sysfs dirent has been deleted */
285 if (sd->s_flags & SYSFS_FLAG_REMOVED)
286 goto out_bad;
287
288 /* The sysfs dirent has been moved? */
289 if (dentry->d_parent->d_fsdata != sd->s_parent)
290 goto out_bad;
291
292 /* The sysfs dirent has been renamed */
293 if (strcmp(dentry->d_name.name, sd->s_name) != 0)
294 goto out_bad;
295
296 /* The sysfs dirent has been moved to a different namespace */
297 if (sd->s_parent && kernfs_ns_enabled(sd->s_parent) &&
298 sysfs_info(dentry->d_sb)->ns != sd->s_ns)
299 goto out_bad;
300
301 mutex_unlock(&sysfs_mutex);
302out_valid:
303 return 1;
304out_bad:
305 /* Remove the dentry from the dcache hashes.
306 * If this is a deleted dentry we use d_drop instead of d_delete
307 * so sysfs doesn't need to cope with negative dentries.
308 *
309 * If this is a dentry that has simply been renamed we
310 * use d_drop to remove it from the dcache lookup on its
311 * old parent. If this dentry persists later when a lookup
312 * is performed at its new name the dentry will be readded
313 * to the dcache hashes.
314 */
315 mutex_unlock(&sysfs_mutex);
316
317 /* If we have submounts we must allow the vfs caches
318 * to lie about the state of the filesystem to prevent
319 * leaks and other nasty things.
320 */
321 if (check_submounts_and_drop(dentry) != 0)
322 goto out_valid;
323
324 return 0;
325}
326
327static void sysfs_dentry_release(struct dentry *dentry)
328{
329 kernfs_put(dentry->d_fsdata);
330}
331
332const struct dentry_operations sysfs_dentry_ops = {
333 .d_revalidate = sysfs_dentry_revalidate,
334 .d_delete = sysfs_dentry_delete,
335 .d_release = sysfs_dentry_release,
336};
337
338struct sysfs_dirent *sysfs_new_dirent(struct kernfs_root *root,
339 const char *name, umode_t mode, int type)
340{
341 char *dup_name = NULL;
342 struct sysfs_dirent *sd;
343 int ret;
344
345 if (type & SYSFS_COPY_NAME) {
346 name = dup_name = kstrdup(name, GFP_KERNEL);
347 if (!name)
348 return NULL;
349 }
350
351 sd = kmem_cache_zalloc(sysfs_dir_cachep, GFP_KERNEL);
352 if (!sd)
353 goto err_out1;
354
355 ret = ida_simple_get(&root->ino_ida, 1, 0, GFP_KERNEL);
356 if (ret < 0)
357 goto err_out2;
358 sd->s_ino = ret;
359
360 atomic_set(&sd->s_count, 1);
361 atomic_set(&sd->s_active, 0);
362
363 sd->s_name = name;
364 sd->s_mode = mode;
365 sd->s_flags = type | SYSFS_FLAG_REMOVED;
366
367 return sd;
368
369 err_out2:
370 kmem_cache_free(sysfs_dir_cachep, sd);
371 err_out1:
372 kfree(dup_name);
373 return NULL;
374}
375
376/**
377 * sysfs_addrm_start - prepare for sysfs_dirent add/remove
378 * @acxt: pointer to sysfs_addrm_cxt to be used
379 *
380 * This function is called when the caller is about to add or remove
381 * sysfs_dirent. This function acquires sysfs_mutex. @acxt is used
382 * to keep and pass context to other addrm functions.
383 *
384 * LOCKING:
385 * Kernel thread context (may sleep). sysfs_mutex is locked on
386 * return.
387 */
388void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt)
389 __acquires(sysfs_mutex)
390{
391 memset(acxt, 0, sizeof(*acxt));
392
393 mutex_lock(&sysfs_mutex);
394}
395
396/**
397 * sysfs_add_one - add sysfs_dirent to parent without warning
398 * @acxt: addrm context to use
399 * @sd: sysfs_dirent to be added
400 * @parent_sd: the parent sysfs_dirent to add @sd to
401 *
402 * Get @parent_sd and set @sd->s_parent to it and increment nlink of
403 * the parent inode if @sd is a directory and link into the children
404 * list of the parent.
405 *
406 * This function should be called between calls to
407 * sysfs_addrm_start() and sysfs_addrm_finish() and should be
408 * passed the same @acxt as passed to sysfs_addrm_start().
409 *
410 * LOCKING:
411 * Determined by sysfs_addrm_start().
412 *
413 * RETURNS:
414 * 0 on success, -EEXIST if entry with the given name already
415 * exists.
416 */
417int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd,
418 struct sysfs_dirent *parent_sd)
419{
420 bool has_ns = kernfs_ns_enabled(parent_sd);
421 struct sysfs_inode_attrs *ps_iattr;
422 int ret;
423
424 if (has_ns != (bool)sd->s_ns) {
425 WARN(1, KERN_WARNING "sysfs: ns %s in '%s' for '%s'\n",
426 has_ns ? "required" : "invalid",
427 parent_sd->s_name, sd->s_name);
428 return -EINVAL;
429 }
430
431 if (sysfs_type(parent_sd) != SYSFS_DIR)
432 return -EINVAL;
433
434 sd->s_hash = sysfs_name_hash(sd->s_name, sd->s_ns);
435 sd->s_parent = parent_sd;
436 kernfs_get(parent_sd);
437
438 ret = sysfs_link_sibling(sd);
439 if (ret)
440 return ret;
441
442 /* Update timestamps on the parent */
443 ps_iattr = parent_sd->s_iattr;
444 if (ps_iattr) {
445 struct iattr *ps_iattrs = &ps_iattr->ia_iattr;
446 ps_iattrs->ia_ctime = ps_iattrs->ia_mtime = CURRENT_TIME;
447 }
448
449 /* Mark the entry added into directory tree */
450 sd->s_flags &= ~SYSFS_FLAG_REMOVED;
451
452 return 0;
453}
454
455/**
456 * sysfs_remove_one - remove sysfs_dirent from parent
457 * @acxt: addrm context to use
458 * @sd: sysfs_dirent to be removed
459 *
460 * Mark @sd removed and drop nlink of parent inode if @sd is a
461 * directory. @sd is unlinked from the children list.
462 *
463 * This function should be called between calls to
464 * sysfs_addrm_start() and sysfs_addrm_finish() and should be
465 * passed the same @acxt as passed to sysfs_addrm_start().
466 *
467 * LOCKING:
468 * Determined by sysfs_addrm_start().
469 */
470static void sysfs_remove_one(struct sysfs_addrm_cxt *acxt,
471 struct sysfs_dirent *sd)
472{
473 struct sysfs_inode_attrs *ps_iattr;
474
475 /*
476 * Removal can be called multiple times on the same node. Only the
477 * first invocation is effective and puts the base ref.
478 */
479 if (sd->s_flags & SYSFS_FLAG_REMOVED)
480 return;
481
482 if (sd->s_parent) {
483 sysfs_unlink_sibling(sd);
484
485 /* Update timestamps on the parent */
486 ps_iattr = sd->s_parent->s_iattr;
487 if (ps_iattr) {
488 ps_iattr->ia_iattr.ia_ctime = CURRENT_TIME;
489 ps_iattr->ia_iattr.ia_mtime = CURRENT_TIME;
490 }
491 }
492
493 sd->s_flags |= SYSFS_FLAG_REMOVED;
494 sd->u.removed_list = acxt->removed;
495 acxt->removed = sd;
496}
497
498/**
499 * sysfs_addrm_finish - finish up sysfs_dirent add/remove
500 * @acxt: addrm context to finish up
501 *
502 * Finish up sysfs_dirent add/remove. Resources acquired by
503 * sysfs_addrm_start() are released and removed sysfs_dirents are
504 * cleaned up.
505 *
506 * LOCKING:
507 * sysfs_mutex is released.
508 */
509void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt)
510 __releases(sysfs_mutex)
511{
512 /* release resources acquired by sysfs_addrm_start() */
513 mutex_unlock(&sysfs_mutex);
514
515 /* kill removed sysfs_dirents */
516 while (acxt->removed) {
517 struct sysfs_dirent *sd = acxt->removed;
518
519 acxt->removed = sd->u.removed_list;
520
521 sysfs_deactivate(sd);
522 sysfs_unmap_bin_file(sd);
523 kernfs_put(sd);
524 }
525}
526
527/**
528 * kernfs_find_ns - find sysfs_dirent with the given name
529 * @parent: sysfs_dirent to search under
530 * @name: name to look for
531 * @ns: the namespace tag to use
532 *
533 * Look for sysfs_dirent with name @name under @parent. Returns pointer to
534 * the found sysfs_dirent on success, %NULL on failure.
535 */
536static struct sysfs_dirent *kernfs_find_ns(struct sysfs_dirent *parent,
537 const unsigned char *name,
538 const void *ns)
539{
540 struct rb_node *node = parent->s_dir.children.rb_node;
541 bool has_ns = kernfs_ns_enabled(parent);
542 unsigned int hash;
543
544 lockdep_assert_held(&sysfs_mutex);
545
546 if (has_ns != (bool)ns) {
547 WARN(1, KERN_WARNING "sysfs: ns %s in '%s' for '%s'\n",
548 has_ns ? "required" : "invalid",
549 parent->s_name, name);
550 return NULL;
551 }
552
553 hash = sysfs_name_hash(name, ns);
554 while (node) {
555 struct sysfs_dirent *sd;
556 int result;
557
558 sd = to_sysfs_dirent(node);
559 result = sysfs_name_compare(hash, name, ns, sd);
560 if (result < 0)
561 node = node->rb_left;
562 else if (result > 0)
563 node = node->rb_right;
564 else
565 return sd;
566 }
567 return NULL;
568}
569
570/**
571 * kernfs_find_and_get_ns - find and get sysfs_dirent with the given name
572 * @parent: sysfs_dirent to search under
573 * @name: name to look for
574 * @ns: the namespace tag to use
575 *
576 * Look for sysfs_dirent with name @name under @parent and get a reference
577 * if found. This function may sleep and returns pointer to the found
578 * sysfs_dirent on success, %NULL on failure.
579 */
580struct sysfs_dirent *kernfs_find_and_get_ns(struct sysfs_dirent *parent,
581 const char *name, const void *ns)
582{
583 struct sysfs_dirent *sd;
584
585 mutex_lock(&sysfs_mutex);
586 sd = kernfs_find_ns(parent, name, ns);
587 kernfs_get(sd);
588 mutex_unlock(&sysfs_mutex);
589
590 return sd;
591}
592EXPORT_SYMBOL_GPL(kernfs_find_and_get_ns);
593
594/**
595 * kernfs_create_root - create a new kernfs hierarchy
596 * @priv: opaque data associated with the new directory
597 *
598 * Returns the root of the new hierarchy on success, ERR_PTR() value on
599 * failure.
600 */
601struct kernfs_root *kernfs_create_root(void *priv)
602{
603 struct kernfs_root *root;
604 struct sysfs_dirent *sd;
605
606 root = kzalloc(sizeof(*root), GFP_KERNEL);
607 if (!root)
608 return ERR_PTR(-ENOMEM);
609
610 ida_init(&root->ino_ida);
611
612 sd = sysfs_new_dirent(root, "", S_IFDIR | S_IRUGO | S_IXUGO, SYSFS_DIR);
613 if (!sd) {
614 ida_destroy(&root->ino_ida);
615 kfree(root);
616 return ERR_PTR(-ENOMEM);
617 }
618
619 sd->s_flags &= ~SYSFS_FLAG_REMOVED;
620 sd->priv = priv;
621 sd->s_dir.root = root;
622
623 root->sd = sd;
624
625 return root;
626}
627
628/**
629 * kernfs_destroy_root - destroy a kernfs hierarchy
630 * @root: root of the hierarchy to destroy
631 *
632 * Destroy the hierarchy anchored at @root by removing all existing
633 * directories and destroying @root.
634 */
635void kernfs_destroy_root(struct kernfs_root *root)
636{
637 kernfs_remove(root->sd); /* will also free @root */
638}
639
640/**
641 * kernfs_create_dir_ns - create a directory
642 * @parent: parent in which to create a new directory
643 * @name: name of the new directory
644 * @priv: opaque data associated with the new directory
645 * @ns: optional namespace tag of the directory
646 *
647 * Returns the created node on success, ERR_PTR() value on failure.
648 */
649struct sysfs_dirent *kernfs_create_dir_ns(struct sysfs_dirent *parent,
650 const char *name, void *priv,
651 const void *ns)
652{
653 umode_t mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO;
654 struct sysfs_addrm_cxt acxt;
655 struct sysfs_dirent *sd;
656 int rc;
657
658 /* allocate */
659 sd = sysfs_new_dirent(kernfs_root(parent), name, mode, SYSFS_DIR);
660 if (!sd)
661 return ERR_PTR(-ENOMEM);
662
663 sd->s_dir.root = parent->s_dir.root;
664 sd->s_ns = ns;
665 sd->priv = priv;
666
667 /* link in */
668 sysfs_addrm_start(&acxt);
669 rc = sysfs_add_one(&acxt, sd, parent);
670 sysfs_addrm_finish(&acxt);
671
672 if (!rc)
673 return sd;
674
675 kernfs_put(sd);
676 return ERR_PTR(rc);
677}
678
679static struct dentry *sysfs_lookup(struct inode *dir, struct dentry *dentry,
680 unsigned int flags)
681{
682 struct dentry *ret = NULL;
683 struct dentry *parent = dentry->d_parent;
684 struct sysfs_dirent *parent_sd = parent->d_fsdata;
685 struct sysfs_dirent *sd;
686 struct inode *inode;
687 const void *ns = NULL;
688
689 mutex_lock(&sysfs_mutex);
690
691 if (kernfs_ns_enabled(parent_sd))
692 ns = sysfs_info(dir->i_sb)->ns;
693
694 sd = kernfs_find_ns(parent_sd, dentry->d_name.name, ns);
695
696 /* no such entry */
697 if (!sd) {
698 ret = ERR_PTR(-ENOENT);
699 goto out_unlock;
700 }
701 kernfs_get(sd);
702 dentry->d_fsdata = sd;
703
704 /* attach dentry and inode */
705 inode = sysfs_get_inode(dir->i_sb, sd);
706 if (!inode) {
707 ret = ERR_PTR(-ENOMEM);
708 goto out_unlock;
709 }
710
711 /* instantiate and hash dentry */
712 ret = d_materialise_unique(dentry, inode);
713 out_unlock:
714 mutex_unlock(&sysfs_mutex);
715 return ret;
716}
717
718const struct inode_operations sysfs_dir_inode_operations = {
719 .lookup = sysfs_lookup,
720 .permission = sysfs_permission,
721 .setattr = sysfs_setattr,
722 .getattr = sysfs_getattr,
723 .setxattr = sysfs_setxattr,
724 .removexattr = sysfs_removexattr,
725 .getxattr = sysfs_getxattr,
726 .listxattr = sysfs_listxattr,
727};
728
729static struct sysfs_dirent *sysfs_leftmost_descendant(struct sysfs_dirent *pos)
730{
731 struct sysfs_dirent *last;
732
733 while (true) {
734 struct rb_node *rbn;
735
736 last = pos;
737
738 if (sysfs_type(pos) != SYSFS_DIR)
739 break;
740
741 rbn = rb_first(&pos->s_dir.children);
742 if (!rbn)
743 break;
744
745 pos = to_sysfs_dirent(rbn);
746 }
747
748 return last;
749}
750
751/**
752 * sysfs_next_descendant_post - find the next descendant for post-order walk
753 * @pos: the current position (%NULL to initiate traversal)
754 * @root: sysfs_dirent whose descendants to walk
755 *
756 * Find the next descendant to visit for post-order traversal of @root's
757 * descendants. @root is included in the iteration and the last node to be
758 * visited.
759 */
760static struct sysfs_dirent *sysfs_next_descendant_post(struct sysfs_dirent *pos,
761 struct sysfs_dirent *root)
762{
763 struct rb_node *rbn;
764
765 lockdep_assert_held(&sysfs_mutex);
766
767 /* if first iteration, visit leftmost descendant which may be root */
768 if (!pos)
769 return sysfs_leftmost_descendant(root);
770
771 /* if we visited @root, we're done */
772 if (pos == root)
773 return NULL;
774
775 /* if there's an unvisited sibling, visit its leftmost descendant */
776 rbn = rb_next(&pos->s_rb);
777 if (rbn)
778 return sysfs_leftmost_descendant(to_sysfs_dirent(rbn));
779
780 /* no sibling left, visit parent */
781 return pos->s_parent;
782}
783
784static void __kernfs_remove(struct sysfs_addrm_cxt *acxt,
785 struct sysfs_dirent *sd)
786{
787 struct sysfs_dirent *pos, *next;
788
789 if (!sd)
790 return;
791
792 pr_debug("sysfs %s: removing\n", sd->s_name);
793
794 next = NULL;
795 do {
796 pos = next;
797 next = sysfs_next_descendant_post(pos, sd);
798 if (pos)
799 sysfs_remove_one(acxt, pos);
800 } while (next);
801}
802
803/**
804 * kernfs_remove - remove a sysfs_dirent recursively
805 * @sd: the sysfs_dirent to remove
806 *
807 * Remove @sd along with all its subdirectories and files.
808 */
809void kernfs_remove(struct sysfs_dirent *sd)
810{
811 struct sysfs_addrm_cxt acxt;
812
813 sysfs_addrm_start(&acxt);
814 __kernfs_remove(&acxt, sd);
815 sysfs_addrm_finish(&acxt);
816}
817
818/**
819 * kernfs_remove_by_name_ns - find a sysfs_dirent by name and remove it
820 * @dir_sd: parent of the target
821 * @name: name of the sysfs_dirent to remove
822 * @ns: namespace tag of the sysfs_dirent to remove
823 *
824 * Look for the sysfs_dirent with @name and @ns under @dir_sd and remove
825 * it. Returns 0 on success, -ENOENT if such entry doesn't exist.
826 */
827int kernfs_remove_by_name_ns(struct sysfs_dirent *dir_sd, const char *name,
828 const void *ns)
829{
830 struct sysfs_addrm_cxt acxt;
831 struct sysfs_dirent *sd;
832
833 if (!dir_sd) {
834 WARN(1, KERN_WARNING "sysfs: can not remove '%s', no directory\n",
835 name);
836 return -ENOENT;
837 }
838
839 sysfs_addrm_start(&acxt);
840
841 sd = kernfs_find_ns(dir_sd, name, ns);
842 if (sd)
843 __kernfs_remove(&acxt, sd);
844
845 sysfs_addrm_finish(&acxt);
846
847 if (sd)
848 return 0;
849 else
850 return -ENOENT;
851}
852
853/**
854 * kernfs_rename_ns - move and rename a kernfs_node
855 * @sd: target node
856 * @new_parent: new parent to put @sd under
857 * @new_name: new name
858 * @new_ns: new namespace tag
859 */
860int kernfs_rename_ns(struct sysfs_dirent *sd, struct sysfs_dirent *new_parent,
861 const char *new_name, const void *new_ns)
862{
863 int error;
864
865 mutex_lock(&sysfs_mutex);
866
867 error = 0;
868 if ((sd->s_parent == new_parent) && (sd->s_ns == new_ns) &&
869 (strcmp(sd->s_name, new_name) == 0))
870 goto out; /* nothing to rename */
871
872 error = -EEXIST;
873 if (kernfs_find_ns(new_parent, new_name, new_ns))
874 goto out;
875
876 /* rename sysfs_dirent */
877 if (strcmp(sd->s_name, new_name) != 0) {
878 error = -ENOMEM;
879 new_name = kstrdup(new_name, GFP_KERNEL);
880 if (!new_name)
881 goto out;
882
883 kfree(sd->s_name);
884 sd->s_name = new_name;
885 }
886
887 /*
888 * Move to the appropriate place in the appropriate directories rbtree.
889 */
890 sysfs_unlink_sibling(sd);
891 kernfs_get(new_parent);
892 kernfs_put(sd->s_parent);
893 sd->s_ns = new_ns;
894 sd->s_hash = sysfs_name_hash(sd->s_name, sd->s_ns);
895 sd->s_parent = new_parent;
896 sysfs_link_sibling(sd);
897
898 error = 0;
899 out:
900 mutex_unlock(&sysfs_mutex);
901 return error;
902}
903
904/* Relationship between s_mode and the DT_xxx types */
905static inline unsigned char dt_type(struct sysfs_dirent *sd)
906{
907 return (sd->s_mode >> 12) & 15;
908}
909
910static int sysfs_dir_release(struct inode *inode, struct file *filp)
911{
912 kernfs_put(filp->private_data);
913 return 0;
914}
915
916static struct sysfs_dirent *sysfs_dir_pos(const void *ns,
917 struct sysfs_dirent *parent_sd, loff_t hash, struct sysfs_dirent *pos)
918{
919 if (pos) {
920 int valid = !(pos->s_flags & SYSFS_FLAG_REMOVED) &&
921 pos->s_parent == parent_sd &&
922 hash == pos->s_hash;
923 kernfs_put(pos);
924 if (!valid)
925 pos = NULL;
926 }
927 if (!pos && (hash > 1) && (hash < INT_MAX)) {
928 struct rb_node *node = parent_sd->s_dir.children.rb_node;
929 while (node) {
930 pos = to_sysfs_dirent(node);
931
932 if (hash < pos->s_hash)
933 node = node->rb_left;
934 else if (hash > pos->s_hash)
935 node = node->rb_right;
936 else
937 break;
938 }
939 }
940 /* Skip over entries in the wrong namespace */
941 while (pos && pos->s_ns != ns) {
942 struct rb_node *node = rb_next(&pos->s_rb);
943 if (!node)
944 pos = NULL;
945 else
946 pos = to_sysfs_dirent(node);
947 }
948 return pos;
949}
950
951static struct sysfs_dirent *sysfs_dir_next_pos(const void *ns,
952 struct sysfs_dirent *parent_sd, ino_t ino, struct sysfs_dirent *pos)
953{
954 pos = sysfs_dir_pos(ns, parent_sd, ino, pos);
955 if (pos)
956 do {
957 struct rb_node *node = rb_next(&pos->s_rb);
958 if (!node)
959 pos = NULL;
960 else
961 pos = to_sysfs_dirent(node);
962 } while (pos && pos->s_ns != ns);
963 return pos;
964}
965
966static int sysfs_readdir(struct file *file, struct dir_context *ctx)
967{
968 struct dentry *dentry = file->f_path.dentry;
969 struct sysfs_dirent *parent_sd = dentry->d_fsdata;
970 struct sysfs_dirent *pos = file->private_data;
971 const void *ns = NULL;
972
973 if (!dir_emit_dots(file, ctx))
974 return 0;
975 mutex_lock(&sysfs_mutex);
976
977 if (kernfs_ns_enabled(parent_sd))
978 ns = sysfs_info(dentry->d_sb)->ns;
979
980 for (pos = sysfs_dir_pos(ns, parent_sd, ctx->pos, pos);
981 pos;
982 pos = sysfs_dir_next_pos(ns, parent_sd, ctx->pos, pos)) {
983 const char *name = pos->s_name;
984 unsigned int type = dt_type(pos);
985 int len = strlen(name);
986 ino_t ino = pos->s_ino;
987
988 ctx->pos = pos->s_hash;
989 file->private_data = pos;
990 kernfs_get(pos);
991
992 mutex_unlock(&sysfs_mutex);
993 if (!dir_emit(ctx, name, len, ino, type))
994 return 0;
995 mutex_lock(&sysfs_mutex);
996 }
997 mutex_unlock(&sysfs_mutex);
998 file->private_data = NULL;
999 ctx->pos = INT_MAX;
1000 return 0;
1001}
1002
1003static loff_t sysfs_dir_llseek(struct file *file, loff_t offset, int whence)
1004{
1005 struct inode *inode = file_inode(file);
1006 loff_t ret;
1007
1008 mutex_lock(&inode->i_mutex);
1009 ret = generic_file_llseek(file, offset, whence);
1010 mutex_unlock(&inode->i_mutex);
1011
1012 return ret;
1013}
1014
1015const struct file_operations sysfs_dir_operations = {
1016 .read = generic_read_dir,
1017 .iterate = sysfs_readdir,
1018 .release = sysfs_dir_release,
1019 .llseek = sysfs_dir_llseek,
1020};
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
new file mode 100644
index 000000000000..4a5863b79de9
--- /dev/null
+++ b/fs/kernfs/file.c
@@ -0,0 +1,813 @@
1/*
2 * fs/kernfs/file.c - kernfs file implementation
3 *
4 * Copyright (c) 2001-3 Patrick Mochel
5 * Copyright (c) 2007 SUSE Linux Products GmbH
6 * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org>
7 *
8 * This file is released under the GPLv2.
9 */
10
11#include <linux/fs.h>
12#include <linux/seq_file.h>
13#include <linux/slab.h>
14#include <linux/poll.h>
15#include <linux/pagemap.h>
16#include <linux/sched.h>
17
18#include "kernfs-internal.h"
19
20/*
21 * There's one sysfs_open_file for each open file and one sysfs_open_dirent
22 * for each sysfs_dirent with one or more open files.
23 *
24 * sysfs_dirent->s_attr.open points to sysfs_open_dirent. s_attr.open is
25 * protected by sysfs_open_dirent_lock.
26 *
27 * filp->private_data points to seq_file whose ->private points to
28 * sysfs_open_file. sysfs_open_files are chained at
29 * sysfs_open_dirent->files, which is protected by sysfs_open_file_mutex.
30 */
31static DEFINE_SPINLOCK(sysfs_open_dirent_lock);
32static DEFINE_MUTEX(sysfs_open_file_mutex);
33
34struct sysfs_open_dirent {
35 atomic_t refcnt;
36 atomic_t event;
37 wait_queue_head_t poll;
38 struct list_head files; /* goes through sysfs_open_file.list */
39};
40
41static struct sysfs_open_file *sysfs_of(struct file *file)
42{
43 return ((struct seq_file *)file->private_data)->private;
44}
45
46/*
47 * Determine the kernfs_ops for the given sysfs_dirent. This function must
48 * be called while holding an active reference.
49 */
50static const struct kernfs_ops *kernfs_ops(struct sysfs_dirent *sd)
51{
52 if (sd->s_flags & SYSFS_FLAG_LOCKDEP)
53 lockdep_assert_held(sd);
54 return sd->s_attr.ops;
55}
56
57static void *kernfs_seq_start(struct seq_file *sf, loff_t *ppos)
58{
59 struct sysfs_open_file *of = sf->private;
60 const struct kernfs_ops *ops;
61
62 /*
63 * @of->mutex nests outside active ref and is just to ensure that
64 * the ops aren't called concurrently for the same open file.
65 */
66 mutex_lock(&of->mutex);
67 if (!sysfs_get_active(of->sd))
68 return ERR_PTR(-ENODEV);
69
70 ops = kernfs_ops(of->sd);
71 if (ops->seq_start) {
72 return ops->seq_start(sf, ppos);
73 } else {
74 /*
75 * The same behavior and code as single_open(). Returns
76 * !NULL if pos is at the beginning; otherwise, NULL.
77 */
78 return NULL + !*ppos;
79 }
80}
81
82static void *kernfs_seq_next(struct seq_file *sf, void *v, loff_t *ppos)
83{
84 struct sysfs_open_file *of = sf->private;
85 const struct kernfs_ops *ops = kernfs_ops(of->sd);
86
87 if (ops->seq_next) {
88 return ops->seq_next(sf, v, ppos);
89 } else {
90 /*
91 * The same behavior and code as single_open(), always
92 * terminate after the initial read.
93 */
94 ++*ppos;
95 return NULL;
96 }
97}
98
99static void kernfs_seq_stop(struct seq_file *sf, void *v)
100{
101 struct sysfs_open_file *of = sf->private;
102 const struct kernfs_ops *ops = kernfs_ops(of->sd);
103
104 if (ops->seq_stop)
105 ops->seq_stop(sf, v);
106
107 sysfs_put_active(of->sd);
108 mutex_unlock(&of->mutex);
109}
110
111static int kernfs_seq_show(struct seq_file *sf, void *v)
112{
113 struct sysfs_open_file *of = sf->private;
114
115 of->event = atomic_read(&of->sd->s_attr.open->event);
116
117 return of->sd->s_attr.ops->seq_show(sf, v);
118}
119
120static const struct seq_operations kernfs_seq_ops = {
121 .start = kernfs_seq_start,
122 .next = kernfs_seq_next,
123 .stop = kernfs_seq_stop,
124 .show = kernfs_seq_show,
125};
126
127/*
128 * As reading a bin file can have side-effects, the exact offset and bytes
129 * specified in read(2) call should be passed to the read callback making
130 * it difficult to use seq_file. Implement simplistic custom buffering for
131 * bin files.
132 */
133static ssize_t kernfs_file_direct_read(struct sysfs_open_file *of,
134 char __user *user_buf, size_t count,
135 loff_t *ppos)
136{
137 ssize_t len = min_t(size_t, count, PAGE_SIZE);
138 const struct kernfs_ops *ops;
139 char *buf;
140
141 buf = kmalloc(len, GFP_KERNEL);
142 if (!buf)
143 return -ENOMEM;
144
145 /*
146 * @of->mutex nests outside active ref and is just to ensure that
147 * the ops aren't called concurrently for the same open file.
148 */
149 mutex_lock(&of->mutex);
150 if (!sysfs_get_active(of->sd)) {
151 len = -ENODEV;
152 mutex_unlock(&of->mutex);
153 goto out_free;
154 }
155
156 ops = kernfs_ops(of->sd);
157 if (ops->read)
158 len = ops->read(of, buf, len, *ppos);
159 else
160 len = -EINVAL;
161
162 sysfs_put_active(of->sd);
163 mutex_unlock(&of->mutex);
164
165 if (len < 0)
166 goto out_free;
167
168 if (copy_to_user(user_buf, buf, len)) {
169 len = -EFAULT;
170 goto out_free;
171 }
172
173 *ppos += len;
174
175 out_free:
176 kfree(buf);
177 return len;
178}
179
180/**
181 * kernfs_file_read - kernfs vfs read callback
182 * @file: file pointer
183 * @user_buf: data to write
184 * @count: number of bytes
185 * @ppos: starting offset
186 */
187static ssize_t kernfs_file_read(struct file *file, char __user *user_buf,
188 size_t count, loff_t *ppos)
189{
190 struct sysfs_open_file *of = sysfs_of(file);
191
192 if (of->sd->s_flags & SYSFS_FLAG_HAS_SEQ_SHOW)
193 return seq_read(file, user_buf, count, ppos);
194 else
195 return kernfs_file_direct_read(of, user_buf, count, ppos);
196}
197
198/**
199 * kernfs_file_write - kernfs vfs write callback
200 * @file: file pointer
201 * @user_buf: data to write
202 * @count: number of bytes
203 * @ppos: starting offset
204 *
205 * Copy data in from userland and pass it to the matching kernfs write
206 * operation.
207 *
208 * There is no easy way for us to know if userspace is only doing a partial
209 * write, so we don't support them. We expect the entire buffer to come on
210 * the first write. Hint: if you're writing a value, first read the file,
211 * modify only the the value you're changing, then write entire buffer
212 * back.
213 */
214static ssize_t kernfs_file_write(struct file *file, const char __user *user_buf,
215 size_t count, loff_t *ppos)
216{
217 struct sysfs_open_file *of = sysfs_of(file);
218 ssize_t len = min_t(size_t, count, PAGE_SIZE);
219 const struct kernfs_ops *ops;
220 char *buf;
221
222 buf = kmalloc(len + 1, GFP_KERNEL);
223 if (!buf)
224 return -ENOMEM;
225
226 if (copy_from_user(buf, user_buf, len)) {
227 len = -EFAULT;
228 goto out_free;
229 }
230 buf[len] = '\0'; /* guarantee string termination */
231
232 /*
233 * @of->mutex nests outside active ref and is just to ensure that
234 * the ops aren't called concurrently for the same open file.
235 */
236 mutex_lock(&of->mutex);
237 if (!sysfs_get_active(of->sd)) {
238 mutex_unlock(&of->mutex);
239 len = -ENODEV;
240 goto out_free;
241 }
242
243 ops = kernfs_ops(of->sd);
244 if (ops->write)
245 len = ops->write(of, buf, len, *ppos);
246 else
247 len = -EINVAL;
248
249 sysfs_put_active(of->sd);
250 mutex_unlock(&of->mutex);
251
252 if (len > 0)
253 *ppos += len;
254out_free:
255 kfree(buf);
256 return len;
257}
258
259static void kernfs_vma_open(struct vm_area_struct *vma)
260{
261 struct file *file = vma->vm_file;
262 struct sysfs_open_file *of = sysfs_of(file);
263
264 if (!of->vm_ops)
265 return;
266
267 if (!sysfs_get_active(of->sd))
268 return;
269
270 if (of->vm_ops->open)
271 of->vm_ops->open(vma);
272
273 sysfs_put_active(of->sd);
274}
275
276static int kernfs_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
277{
278 struct file *file = vma->vm_file;
279 struct sysfs_open_file *of = sysfs_of(file);
280 int ret;
281
282 if (!of->vm_ops)
283 return VM_FAULT_SIGBUS;
284
285 if (!sysfs_get_active(of->sd))
286 return VM_FAULT_SIGBUS;
287
288 ret = VM_FAULT_SIGBUS;
289 if (of->vm_ops->fault)
290 ret = of->vm_ops->fault(vma, vmf);
291
292 sysfs_put_active(of->sd);
293 return ret;
294}
295
296static int kernfs_vma_page_mkwrite(struct vm_area_struct *vma,
297 struct vm_fault *vmf)
298{
299 struct file *file = vma->vm_file;
300 struct sysfs_open_file *of = sysfs_of(file);
301 int ret;
302
303 if (!of->vm_ops)
304 return VM_FAULT_SIGBUS;
305
306 if (!sysfs_get_active(of->sd))
307 return VM_FAULT_SIGBUS;
308
309 ret = 0;
310 if (of->vm_ops->page_mkwrite)
311 ret = of->vm_ops->page_mkwrite(vma, vmf);
312 else
313 file_update_time(file);
314
315 sysfs_put_active(of->sd);
316 return ret;
317}
318
319static int kernfs_vma_access(struct vm_area_struct *vma, unsigned long addr,
320 void *buf, int len, int write)
321{
322 struct file *file = vma->vm_file;
323 struct sysfs_open_file *of = sysfs_of(file);
324 int ret;
325
326 if (!of->vm_ops)
327 return -EINVAL;
328
329 if (!sysfs_get_active(of->sd))
330 return -EINVAL;
331
332 ret = -EINVAL;
333 if (of->vm_ops->access)
334 ret = of->vm_ops->access(vma, addr, buf, len, write);
335
336 sysfs_put_active(of->sd);
337 return ret;
338}
339
340#ifdef CONFIG_NUMA
341static int kernfs_vma_set_policy(struct vm_area_struct *vma,
342 struct mempolicy *new)
343{
344 struct file *file = vma->vm_file;
345 struct sysfs_open_file *of = sysfs_of(file);
346 int ret;
347
348 if (!of->vm_ops)
349 return 0;
350
351 if (!sysfs_get_active(of->sd))
352 return -EINVAL;
353
354 ret = 0;
355 if (of->vm_ops->set_policy)
356 ret = of->vm_ops->set_policy(vma, new);
357
358 sysfs_put_active(of->sd);
359 return ret;
360}
361
362static struct mempolicy *kernfs_vma_get_policy(struct vm_area_struct *vma,
363 unsigned long addr)
364{
365 struct file *file = vma->vm_file;
366 struct sysfs_open_file *of = sysfs_of(file);
367 struct mempolicy *pol;
368
369 if (!of->vm_ops)
370 return vma->vm_policy;
371
372 if (!sysfs_get_active(of->sd))
373 return vma->vm_policy;
374
375 pol = vma->vm_policy;
376 if (of->vm_ops->get_policy)
377 pol = of->vm_ops->get_policy(vma, addr);
378
379 sysfs_put_active(of->sd);
380 return pol;
381}
382
383static int kernfs_vma_migrate(struct vm_area_struct *vma,
384 const nodemask_t *from, const nodemask_t *to,
385 unsigned long flags)
386{
387 struct file *file = vma->vm_file;
388 struct sysfs_open_file *of = sysfs_of(file);
389 int ret;
390
391 if (!of->vm_ops)
392 return 0;
393
394 if (!sysfs_get_active(of->sd))
395 return 0;
396
397 ret = 0;
398 if (of->vm_ops->migrate)
399 ret = of->vm_ops->migrate(vma, from, to, flags);
400
401 sysfs_put_active(of->sd);
402 return ret;
403}
404#endif
405
406static const struct vm_operations_struct kernfs_vm_ops = {
407 .open = kernfs_vma_open,
408 .fault = kernfs_vma_fault,
409 .page_mkwrite = kernfs_vma_page_mkwrite,
410 .access = kernfs_vma_access,
411#ifdef CONFIG_NUMA
412 .set_policy = kernfs_vma_set_policy,
413 .get_policy = kernfs_vma_get_policy,
414 .migrate = kernfs_vma_migrate,
415#endif
416};
417
418static int kernfs_file_mmap(struct file *file, struct vm_area_struct *vma)
419{
420 struct sysfs_open_file *of = sysfs_of(file);
421 const struct kernfs_ops *ops;
422 int rc;
423
424 mutex_lock(&of->mutex);
425
426 rc = -ENODEV;
427 if (!sysfs_get_active(of->sd))
428 goto out_unlock;
429
430 ops = kernfs_ops(of->sd);
431 if (ops->mmap)
432 rc = ops->mmap(of, vma);
433 if (rc)
434 goto out_put;
435
436 /*
437 * PowerPC's pci_mmap of legacy_mem uses shmem_zero_setup()
438 * to satisfy versions of X which crash if the mmap fails: that
439 * substitutes a new vm_file, and we don't then want bin_vm_ops.
440 */
441 if (vma->vm_file != file)
442 goto out_put;
443
444 rc = -EINVAL;
445 if (of->mmapped && of->vm_ops != vma->vm_ops)
446 goto out_put;
447
448 /*
449 * It is not possible to successfully wrap close.
450 * So error if someone is trying to use close.
451 */
452 rc = -EINVAL;
453 if (vma->vm_ops && vma->vm_ops->close)
454 goto out_put;
455
456 rc = 0;
457 of->mmapped = 1;
458 of->vm_ops = vma->vm_ops;
459 vma->vm_ops = &kernfs_vm_ops;
460out_put:
461 sysfs_put_active(of->sd);
462out_unlock:
463 mutex_unlock(&of->mutex);
464
465 return rc;
466}
467
468/**
469 * sysfs_get_open_dirent - get or create sysfs_open_dirent
470 * @sd: target sysfs_dirent
471 * @of: sysfs_open_file for this instance of open
472 *
473 * If @sd->s_attr.open exists, increment its reference count;
474 * otherwise, create one. @of is chained to the files list.
475 *
476 * LOCKING:
477 * Kernel thread context (may sleep).
478 *
479 * RETURNS:
480 * 0 on success, -errno on failure.
481 */
482static int sysfs_get_open_dirent(struct sysfs_dirent *sd,
483 struct sysfs_open_file *of)
484{
485 struct sysfs_open_dirent *od, *new_od = NULL;
486
487 retry:
488 mutex_lock(&sysfs_open_file_mutex);
489 spin_lock_irq(&sysfs_open_dirent_lock);
490
491 if (!sd->s_attr.open && new_od) {
492 sd->s_attr.open = new_od;
493 new_od = NULL;
494 }
495
496 od = sd->s_attr.open;
497 if (od) {
498 atomic_inc(&od->refcnt);
499 list_add_tail(&of->list, &od->files);
500 }
501
502 spin_unlock_irq(&sysfs_open_dirent_lock);
503 mutex_unlock(&sysfs_open_file_mutex);
504
505 if (od) {
506 kfree(new_od);
507 return 0;
508 }
509
510 /* not there, initialize a new one and retry */
511 new_od = kmalloc(sizeof(*new_od), GFP_KERNEL);
512 if (!new_od)
513 return -ENOMEM;
514
515 atomic_set(&new_od->refcnt, 0);
516 atomic_set(&new_od->event, 1);
517 init_waitqueue_head(&new_od->poll);
518 INIT_LIST_HEAD(&new_od->files);
519 goto retry;
520}
521
522/**
523 * sysfs_put_open_dirent - put sysfs_open_dirent
524 * @sd: target sysfs_dirent
525 * @of: associated sysfs_open_file
526 *
527 * Put @sd->s_attr.open and unlink @of from the files list. If
528 * reference count reaches zero, disassociate and free it.
529 *
530 * LOCKING:
531 * None.
532 */
533static void sysfs_put_open_dirent(struct sysfs_dirent *sd,
534 struct sysfs_open_file *of)
535{
536 struct sysfs_open_dirent *od = sd->s_attr.open;
537 unsigned long flags;
538
539 mutex_lock(&sysfs_open_file_mutex);
540 spin_lock_irqsave(&sysfs_open_dirent_lock, flags);
541
542 if (of)
543 list_del(&of->list);
544
545 if (atomic_dec_and_test(&od->refcnt))
546 sd->s_attr.open = NULL;
547 else
548 od = NULL;
549
550 spin_unlock_irqrestore(&sysfs_open_dirent_lock, flags);
551 mutex_unlock(&sysfs_open_file_mutex);
552
553 kfree(od);
554}
555
556static int kernfs_file_open(struct inode *inode, struct file *file)
557{
558 struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
559 const struct kernfs_ops *ops;
560 struct sysfs_open_file *of;
561 bool has_read, has_write, has_mmap;
562 int error = -EACCES;
563
564 if (!sysfs_get_active(attr_sd))
565 return -ENODEV;
566
567 ops = kernfs_ops(attr_sd);
568
569 has_read = ops->seq_show || ops->read || ops->mmap;
570 has_write = ops->write || ops->mmap;
571 has_mmap = ops->mmap;
572
573 /* check perms and supported operations */
574 if ((file->f_mode & FMODE_WRITE) &&
575 (!(inode->i_mode & S_IWUGO) || !has_write))
576 goto err_out;
577
578 if ((file->f_mode & FMODE_READ) &&
579 (!(inode->i_mode & S_IRUGO) || !has_read))
580 goto err_out;
581
582 /* allocate a sysfs_open_file for the file */
583 error = -ENOMEM;
584 of = kzalloc(sizeof(struct sysfs_open_file), GFP_KERNEL);
585 if (!of)
586 goto err_out;
587
588 /*
589 * The following is done to give a different lockdep key to
590 * @of->mutex for files which implement mmap. This is a rather
591 * crude way to avoid false positive lockdep warning around
592 * mm->mmap_sem - mmap nests @of->mutex under mm->mmap_sem and
593 * reading /sys/block/sda/trace/act_mask grabs sr_mutex, under
594 * which mm->mmap_sem nests, while holding @of->mutex. As each
595 * open file has a separate mutex, it's okay as long as those don't
596 * happen on the same file. At this point, we can't easily give
597 * each file a separate locking class. Let's differentiate on
598 * whether the file has mmap or not for now.
599 */
600 if (has_mmap)
601 mutex_init(&of->mutex);
602 else
603 mutex_init(&of->mutex);
604
605 of->sd = attr_sd;
606 of->file = file;
607
608 /*
609 * Always instantiate seq_file even if read access doesn't use
610 * seq_file or is not requested. This unifies private data access
611 * and readable regular files are the vast majority anyway.
612 */
613 if (ops->seq_show)
614 error = seq_open(file, &kernfs_seq_ops);
615 else
616 error = seq_open(file, NULL);
617 if (error)
618 goto err_free;
619
620 ((struct seq_file *)file->private_data)->private = of;
621
622 /* seq_file clears PWRITE unconditionally, restore it if WRITE */
623 if (file->f_mode & FMODE_WRITE)
624 file->f_mode |= FMODE_PWRITE;
625
626 /* make sure we have open dirent struct */
627 error = sysfs_get_open_dirent(attr_sd, of);
628 if (error)
629 goto err_close;
630
631 /* open succeeded, put active references */
632 sysfs_put_active(attr_sd);
633 return 0;
634
635err_close:
636 seq_release(inode, file);
637err_free:
638 kfree(of);
639err_out:
640 sysfs_put_active(attr_sd);
641 return error;
642}
643
644static int kernfs_file_release(struct inode *inode, struct file *filp)
645{
646 struct sysfs_dirent *sd = filp->f_path.dentry->d_fsdata;
647 struct sysfs_open_file *of = sysfs_of(filp);
648
649 sysfs_put_open_dirent(sd, of);
650 seq_release(inode, filp);
651 kfree(of);
652
653 return 0;
654}
655
656void sysfs_unmap_bin_file(struct sysfs_dirent *sd)
657{
658 struct sysfs_open_dirent *od;
659 struct sysfs_open_file *of;
660
661 if (!(sd->s_flags & SYSFS_FLAG_HAS_MMAP))
662 return;
663
664 spin_lock_irq(&sysfs_open_dirent_lock);
665 od = sd->s_attr.open;
666 if (od)
667 atomic_inc(&od->refcnt);
668 spin_unlock_irq(&sysfs_open_dirent_lock);
669 if (!od)
670 return;
671
672 mutex_lock(&sysfs_open_file_mutex);
673 list_for_each_entry(of, &od->files, list) {
674 struct inode *inode = file_inode(of->file);
675 unmap_mapping_range(inode->i_mapping, 0, 0, 1);
676 }
677 mutex_unlock(&sysfs_open_file_mutex);
678
679 sysfs_put_open_dirent(sd, NULL);
680}
681
682/* Sysfs attribute files are pollable. The idea is that you read
683 * the content and then you use 'poll' or 'select' to wait for
684 * the content to change. When the content changes (assuming the
685 * manager for the kobject supports notification), poll will
686 * return POLLERR|POLLPRI, and select will return the fd whether
687 * it is waiting for read, write, or exceptions.
688 * Once poll/select indicates that the value has changed, you
689 * need to close and re-open the file, or seek to 0 and read again.
690 * Reminder: this only works for attributes which actively support
691 * it, and it is not possible to test an attribute from userspace
692 * to see if it supports poll (Neither 'poll' nor 'select' return
693 * an appropriate error code). When in doubt, set a suitable timeout value.
694 */
695static unsigned int kernfs_file_poll(struct file *filp, poll_table *wait)
696{
697 struct sysfs_open_file *of = sysfs_of(filp);
698 struct sysfs_dirent *attr_sd = filp->f_path.dentry->d_fsdata;
699 struct sysfs_open_dirent *od = attr_sd->s_attr.open;
700
701 /* need parent for the kobj, grab both */
702 if (!sysfs_get_active(attr_sd))
703 goto trigger;
704
705 poll_wait(filp, &od->poll, wait);
706
707 sysfs_put_active(attr_sd);
708
709 if (of->event != atomic_read(&od->event))
710 goto trigger;
711
712 return DEFAULT_POLLMASK;
713
714 trigger:
715 return DEFAULT_POLLMASK|POLLERR|POLLPRI;
716}
717
718/**
719 * kernfs_notify - notify a kernfs file
720 * @sd: file to notify
721 *
722 * Notify @sd such that poll(2) on @sd wakes up.
723 */
724void kernfs_notify(struct sysfs_dirent *sd)
725{
726 struct sysfs_open_dirent *od;
727 unsigned long flags;
728
729 spin_lock_irqsave(&sysfs_open_dirent_lock, flags);
730
731 if (!WARN_ON(sysfs_type(sd) != SYSFS_KOBJ_ATTR)) {
732 od = sd->s_attr.open;
733 if (od) {
734 atomic_inc(&od->event);
735 wake_up_interruptible(&od->poll);
736 }
737 }
738
739 spin_unlock_irqrestore(&sysfs_open_dirent_lock, flags);
740}
741EXPORT_SYMBOL_GPL(kernfs_notify);
742
743const struct file_operations kernfs_file_operations = {
744 .read = kernfs_file_read,
745 .write = kernfs_file_write,
746 .llseek = generic_file_llseek,
747 .mmap = kernfs_file_mmap,
748 .open = kernfs_file_open,
749 .release = kernfs_file_release,
750 .poll = kernfs_file_poll,
751};
752
753/**
754 * kernfs_create_file_ns_key - create a file
755 * @parent: directory to create the file in
756 * @name: name of the file
757 * @mode: mode of the file
758 * @size: size of the file
759 * @ops: kernfs operations for the file
760 * @priv: private data for the file
761 * @ns: optional namespace tag of the file
762 * @key: lockdep key for the file's active_ref, %NULL to disable lockdep
763 *
764 * Returns the created node on success, ERR_PTR() value on error.
765 */
766struct sysfs_dirent *kernfs_create_file_ns_key(struct sysfs_dirent *parent,
767 const char *name,
768 umode_t mode, loff_t size,
769 const struct kernfs_ops *ops,
770 void *priv, const void *ns,
771 struct lock_class_key *key)
772{
773 struct sysfs_addrm_cxt acxt;
774 struct sysfs_dirent *sd;
775 int rc;
776
777 sd = sysfs_new_dirent(kernfs_root(parent), name,
778 (mode & S_IALLUGO) | S_IFREG, SYSFS_KOBJ_ATTR);
779 if (!sd)
780 return ERR_PTR(-ENOMEM);
781
782 sd->s_attr.ops = ops;
783 sd->s_attr.size = size;
784 sd->s_ns = ns;
785 sd->priv = priv;
786
787#ifdef CONFIG_DEBUG_LOCK_ALLOC
788 if (key) {
789 lockdep_init_map(&sd->dep_map, "s_active", key, 0);
790 sd->s_flags |= SYSFS_FLAG_LOCKDEP;
791 }
792#endif
793
794 /*
795 * sd->s_attr.ops is accesible only while holding active ref. We
796 * need to know whether some ops are implemented outside active
797 * ref. Cache their existence in flags.
798 */
799 if (ops->seq_show)
800 sd->s_flags |= SYSFS_FLAG_HAS_SEQ_SHOW;
801 if (ops->mmap)
802 sd->s_flags |= SYSFS_FLAG_HAS_MMAP;
803
804 sysfs_addrm_start(&acxt);
805 rc = sysfs_add_one(&acxt, sd, parent);
806 sysfs_addrm_finish(&acxt);
807
808 if (rc) {
809 kernfs_put(sd);
810 return ERR_PTR(rc);
811 }
812 return sd;
813}
diff --git a/fs/sysfs/inode.c b/fs/kernfs/inode.c
index 1750f790af3b..18ad431e8c2a 100644
--- a/fs/sysfs/inode.c
+++ b/fs/kernfs/inode.c
@@ -1,28 +1,22 @@
1/* 1/*
2 * fs/sysfs/inode.c - basic sysfs inode and dentry operations 2 * fs/kernfs/inode.c - kernfs inode implementation
3 * 3 *
4 * Copyright (c) 2001-3 Patrick Mochel 4 * Copyright (c) 2001-3 Patrick Mochel
5 * Copyright (c) 2007 SUSE Linux Products GmbH 5 * Copyright (c) 2007 SUSE Linux Products GmbH
6 * Copyright (c) 2007 Tejun Heo <teheo@suse.de> 6 * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org>
7 * 7 *
8 * This file is released under the GPLv2. 8 * This file is released under the GPLv2.
9 *
10 * Please see Documentation/filesystems/sysfs.txt for more information.
11 */ 9 */
12 10
13#undef DEBUG
14
15#include <linux/pagemap.h> 11#include <linux/pagemap.h>
16#include <linux/namei.h>
17#include <linux/backing-dev.h> 12#include <linux/backing-dev.h>
18#include <linux/capability.h> 13#include <linux/capability.h>
19#include <linux/errno.h> 14#include <linux/errno.h>
20#include <linux/sched.h>
21#include <linux/slab.h> 15#include <linux/slab.h>
22#include <linux/sysfs.h>
23#include <linux/xattr.h> 16#include <linux/xattr.h>
24#include <linux/security.h> 17#include <linux/security.h>
25#include "sysfs.h" 18
19#include "kernfs-internal.h"
26 20
27static const struct address_space_operations sysfs_aops = { 21static const struct address_space_operations sysfs_aops = {
28 .readpage = simple_readpage, 22 .readpage = simple_readpage,
@@ -41,22 +35,28 @@ static const struct inode_operations sysfs_inode_operations = {
41 .setattr = sysfs_setattr, 35 .setattr = sysfs_setattr,
42 .getattr = sysfs_getattr, 36 .getattr = sysfs_getattr,
43 .setxattr = sysfs_setxattr, 37 .setxattr = sysfs_setxattr,
38 .removexattr = sysfs_removexattr,
39 .getxattr = sysfs_getxattr,
40 .listxattr = sysfs_listxattr,
44}; 41};
45 42
46int __init sysfs_inode_init(void) 43void __init sysfs_inode_init(void)
47{ 44{
48 return bdi_init(&sysfs_backing_dev_info); 45 if (bdi_init(&sysfs_backing_dev_info))
46 panic("failed to init sysfs_backing_dev_info");
49} 47}
50 48
51static struct sysfs_inode_attrs *sysfs_init_inode_attrs(struct sysfs_dirent *sd) 49static struct sysfs_inode_attrs *sysfs_inode_attrs(struct sysfs_dirent *sd)
52{ 50{
53 struct sysfs_inode_attrs *attrs;
54 struct iattr *iattrs; 51 struct iattr *iattrs;
55 52
56 attrs = kzalloc(sizeof(struct sysfs_inode_attrs), GFP_KERNEL); 53 if (sd->s_iattr)
57 if (!attrs) 54 return sd->s_iattr;
55
56 sd->s_iattr = kzalloc(sizeof(struct sysfs_inode_attrs), GFP_KERNEL);
57 if (!sd->s_iattr)
58 return NULL; 58 return NULL;
59 iattrs = &attrs->ia_iattr; 59 iattrs = &sd->s_iattr->ia_iattr;
60 60
61 /* assign default attributes */ 61 /* assign default attributes */
62 iattrs->ia_mode = sd->s_mode; 62 iattrs->ia_mode = sd->s_mode;
@@ -64,26 +64,22 @@ static struct sysfs_inode_attrs *sysfs_init_inode_attrs(struct sysfs_dirent *sd)
64 iattrs->ia_gid = GLOBAL_ROOT_GID; 64 iattrs->ia_gid = GLOBAL_ROOT_GID;
65 iattrs->ia_atime = iattrs->ia_mtime = iattrs->ia_ctime = CURRENT_TIME; 65 iattrs->ia_atime = iattrs->ia_mtime = iattrs->ia_ctime = CURRENT_TIME;
66 66
67 return attrs; 67 simple_xattrs_init(&sd->s_iattr->xattrs);
68
69 return sd->s_iattr;
68} 70}
69 71
70int sysfs_sd_setattr(struct sysfs_dirent *sd, struct iattr *iattr) 72static int __kernfs_setattr(struct sysfs_dirent *sd, const struct iattr *iattr)
71{ 73{
72 struct sysfs_inode_attrs *sd_attrs; 74 struct sysfs_inode_attrs *attrs;
73 struct iattr *iattrs; 75 struct iattr *iattrs;
74 unsigned int ia_valid = iattr->ia_valid; 76 unsigned int ia_valid = iattr->ia_valid;
75 77
76 sd_attrs = sd->s_iattr; 78 attrs = sysfs_inode_attrs(sd);
79 if (!attrs)
80 return -ENOMEM;
77 81
78 if (!sd_attrs) { 82 iattrs = &attrs->ia_iattr;
79 /* setting attributes for the first time, allocate now */
80 sd_attrs = sysfs_init_inode_attrs(sd);
81 if (!sd_attrs)
82 return -ENOMEM;
83 sd->s_iattr = sd_attrs;
84 }
85 /* attributes were changed at least once in past */
86 iattrs = &sd_attrs->ia_iattr;
87 83
88 if (ia_valid & ATTR_UID) 84 if (ia_valid & ATTR_UID)
89 iattrs->ia_uid = iattr->ia_uid; 85 iattrs->ia_uid = iattr->ia_uid;
@@ -102,6 +98,23 @@ int sysfs_sd_setattr(struct sysfs_dirent *sd, struct iattr *iattr)
102 return 0; 98 return 0;
103} 99}
104 100
101/**
102 * kernfs_setattr - set iattr on a node
103 * @sd: target node
104 * @iattr: iattr to set
105 *
106 * Returns 0 on success, -errno on failure.
107 */
108int kernfs_setattr(struct sysfs_dirent *sd, const struct iattr *iattr)
109{
110 int ret;
111
112 mutex_lock(&sysfs_mutex);
113 ret = __kernfs_setattr(sd, iattr);
114 mutex_unlock(&sysfs_mutex);
115 return ret;
116}
117
105int sysfs_setattr(struct dentry *dentry, struct iattr *iattr) 118int sysfs_setattr(struct dentry *dentry, struct iattr *iattr)
106{ 119{
107 struct inode *inode = dentry->d_inode; 120 struct inode *inode = dentry->d_inode;
@@ -116,7 +129,7 @@ int sysfs_setattr(struct dentry *dentry, struct iattr *iattr)
116 if (error) 129 if (error)
117 goto out; 130 goto out;
118 131
119 error = sysfs_sd_setattr(sd, iattr); 132 error = __kernfs_setattr(sd, iattr);
120 if (error) 133 if (error)
121 goto out; 134 goto out;
122 135
@@ -131,22 +144,19 @@ out:
131static int sysfs_sd_setsecdata(struct sysfs_dirent *sd, void **secdata, 144static int sysfs_sd_setsecdata(struct sysfs_dirent *sd, void **secdata,
132 u32 *secdata_len) 145 u32 *secdata_len)
133{ 146{
134 struct sysfs_inode_attrs *iattrs; 147 struct sysfs_inode_attrs *attrs;
135 void *old_secdata; 148 void *old_secdata;
136 size_t old_secdata_len; 149 size_t old_secdata_len;
137 150
138 if (!sd->s_iattr) { 151 attrs = sysfs_inode_attrs(sd);
139 sd->s_iattr = sysfs_init_inode_attrs(sd); 152 if (!attrs)
140 if (!sd->s_iattr) 153 return -ENOMEM;
141 return -ENOMEM;
142 }
143 154
144 iattrs = sd->s_iattr; 155 old_secdata = attrs->ia_secdata;
145 old_secdata = iattrs->ia_secdata; 156 old_secdata_len = attrs->ia_secdata_len;
146 old_secdata_len = iattrs->ia_secdata_len;
147 157
148 iattrs->ia_secdata = *secdata; 158 attrs->ia_secdata = *secdata;
149 iattrs->ia_secdata_len = *secdata_len; 159 attrs->ia_secdata_len = *secdata_len;
150 160
151 *secdata = old_secdata; 161 *secdata = old_secdata;
152 *secdata_len = old_secdata_len; 162 *secdata_len = old_secdata_len;
@@ -157,23 +167,25 @@ int sysfs_setxattr(struct dentry *dentry, const char *name, const void *value,
157 size_t size, int flags) 167 size_t size, int flags)
158{ 168{
159 struct sysfs_dirent *sd = dentry->d_fsdata; 169 struct sysfs_dirent *sd = dentry->d_fsdata;
170 struct sysfs_inode_attrs *attrs;
160 void *secdata; 171 void *secdata;
161 int error; 172 int error;
162 u32 secdata_len = 0; 173 u32 secdata_len = 0;
163 174
164 if (!sd) 175 attrs = sysfs_inode_attrs(sd);
165 return -EINVAL; 176 if (!attrs)
177 return -ENOMEM;
166 178
167 if (!strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN)) { 179 if (!strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN)) {
168 const char *suffix = name + XATTR_SECURITY_PREFIX_LEN; 180 const char *suffix = name + XATTR_SECURITY_PREFIX_LEN;
169 error = security_inode_setsecurity(dentry->d_inode, suffix, 181 error = security_inode_setsecurity(dentry->d_inode, suffix,
170 value, size, flags); 182 value, size, flags);
171 if (error) 183 if (error)
172 goto out; 184 return error;
173 error = security_inode_getsecctx(dentry->d_inode, 185 error = security_inode_getsecctx(dentry->d_inode,
174 &secdata, &secdata_len); 186 &secdata, &secdata_len);
175 if (error) 187 if (error)
176 goto out; 188 return error;
177 189
178 mutex_lock(&sysfs_mutex); 190 mutex_lock(&sysfs_mutex);
179 error = sysfs_sd_setsecdata(sd, &secdata, &secdata_len); 191 error = sysfs_sd_setsecdata(sd, &secdata, &secdata_len);
@@ -181,10 +193,50 @@ int sysfs_setxattr(struct dentry *dentry, const char *name, const void *value,
181 193
182 if (secdata) 194 if (secdata)
183 security_release_secctx(secdata, secdata_len); 195 security_release_secctx(secdata, secdata_len);
184 } else 196 return error;
185 return -EINVAL; 197 } else if (!strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN)) {
186out: 198 return simple_xattr_set(&attrs->xattrs, name, value, size,
187 return error; 199 flags);
200 }
201
202 return -EINVAL;
203}
204
205int sysfs_removexattr(struct dentry *dentry, const char *name)
206{
207 struct sysfs_dirent *sd = dentry->d_fsdata;
208 struct sysfs_inode_attrs *attrs;
209
210 attrs = sysfs_inode_attrs(sd);
211 if (!attrs)
212 return -ENOMEM;
213
214 return simple_xattr_remove(&attrs->xattrs, name);
215}
216
217ssize_t sysfs_getxattr(struct dentry *dentry, const char *name, void *buf,
218 size_t size)
219{
220 struct sysfs_dirent *sd = dentry->d_fsdata;
221 struct sysfs_inode_attrs *attrs;
222
223 attrs = sysfs_inode_attrs(sd);
224 if (!attrs)
225 return -ENOMEM;
226
227 return simple_xattr_get(&attrs->xattrs, name, buf, size);
228}
229
230ssize_t sysfs_listxattr(struct dentry *dentry, char *buf, size_t size)
231{
232 struct sysfs_dirent *sd = dentry->d_fsdata;
233 struct sysfs_inode_attrs *attrs;
234
235 attrs = sysfs_inode_attrs(sd);
236 if (!attrs)
237 return -ENOMEM;
238
239 return simple_xattr_list(&attrs->xattrs, buf, size);
188} 240}
189 241
190static inline void set_default_inode_attr(struct inode *inode, umode_t mode) 242static inline void set_default_inode_attr(struct inode *inode, umode_t mode)
@@ -204,17 +256,16 @@ static inline void set_inode_attr(struct inode *inode, struct iattr *iattr)
204 256
205static void sysfs_refresh_inode(struct sysfs_dirent *sd, struct inode *inode) 257static void sysfs_refresh_inode(struct sysfs_dirent *sd, struct inode *inode)
206{ 258{
207 struct sysfs_inode_attrs *iattrs = sd->s_iattr; 259 struct sysfs_inode_attrs *attrs = sd->s_iattr;
208 260
209 inode->i_mode = sd->s_mode; 261 inode->i_mode = sd->s_mode;
210 if (iattrs) { 262 if (attrs) {
211 /* sysfs_dirent has non-default attributes 263 /* sysfs_dirent has non-default attributes
212 * get them from persistent copy in sysfs_dirent 264 * get them from persistent copy in sysfs_dirent
213 */ 265 */
214 set_inode_attr(inode, &iattrs->ia_iattr); 266 set_inode_attr(inode, &attrs->ia_iattr);
215 security_inode_notifysecctx(inode, 267 security_inode_notifysecctx(inode, attrs->ia_secdata,
216 iattrs->ia_secdata, 268 attrs->ia_secdata_len);
217 iattrs->ia_secdata_len);
218 } 269 }
219 270
220 if (sysfs_type(sd) == SYSFS_DIR) 271 if (sysfs_type(sd) == SYSFS_DIR)
@@ -237,9 +288,8 @@ int sysfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
237 288
238static void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode) 289static void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode)
239{ 290{
240 struct bin_attribute *bin_attr; 291 kernfs_get(sd);
241 292 inode->i_private = sd;
242 inode->i_private = sysfs_get(sd);
243 inode->i_mapping->a_ops = &sysfs_aops; 293 inode->i_mapping->a_ops = &sysfs_aops;
244 inode->i_mapping->backing_dev_info = &sysfs_backing_dev_info; 294 inode->i_mapping->backing_dev_info = &sysfs_backing_dev_info;
245 inode->i_op = &sysfs_inode_operations; 295 inode->i_op = &sysfs_inode_operations;
@@ -254,13 +304,8 @@ static void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode)
254 inode->i_fop = &sysfs_dir_operations; 304 inode->i_fop = &sysfs_dir_operations;
255 break; 305 break;
256 case SYSFS_KOBJ_ATTR: 306 case SYSFS_KOBJ_ATTR:
257 inode->i_size = PAGE_SIZE; 307 inode->i_size = sd->s_attr.size;
258 inode->i_fop = &sysfs_file_operations; 308 inode->i_fop = &kernfs_file_operations;
259 break;
260 case SYSFS_KOBJ_BIN_ATTR:
261 bin_attr = sd->s_attr.bin_attr;
262 inode->i_size = bin_attr->size;
263 inode->i_fop = &sysfs_bin_operations;
264 break; 309 break;
265 case SYSFS_KOBJ_LINK: 310 case SYSFS_KOBJ_LINK:
266 inode->i_op = &sysfs_symlink_inode_operations; 311 inode->i_op = &sysfs_symlink_inode_operations;
@@ -311,7 +356,7 @@ void sysfs_evict_inode(struct inode *inode)
311 356
312 truncate_inode_pages(&inode->i_data, 0); 357 truncate_inode_pages(&inode->i_data, 0);
313 clear_inode(inode); 358 clear_inode(inode);
314 sysfs_put(sd); 359 kernfs_put(sd);
315} 360}
316 361
317int sysfs_permission(struct inode *inode, int mask) 362int sysfs_permission(struct inode *inode, int mask)
diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h
new file mode 100644
index 000000000000..910e485b7333
--- /dev/null
+++ b/fs/kernfs/kernfs-internal.h
@@ -0,0 +1,122 @@
1/*
2 * fs/kernfs/kernfs-internal.h - kernfs internal header file
3 *
4 * Copyright (c) 2001-3 Patrick Mochel
5 * Copyright (c) 2007 SUSE Linux Products GmbH
6 * Copyright (c) 2007, 2013 Tejun Heo <teheo@suse.de>
7 *
8 * This file is released under the GPLv2.
9 */
10
11#ifndef __KERNFS_INTERNAL_H
12#define __KERNFS_INTERNAL_H
13
14#include <linux/lockdep.h>
15#include <linux/fs.h>
16#include <linux/mutex.h>
17#include <linux/xattr.h>
18
19#include <linux/kernfs.h>
20
21struct sysfs_inode_attrs {
22 struct iattr ia_iattr;
23 void *ia_secdata;
24 u32 ia_secdata_len;
25
26 struct simple_xattrs xattrs;
27};
28
29#define SD_DEACTIVATED_BIAS INT_MIN
30
31/* SYSFS_TYPE_MASK and types are defined in include/linux/kernfs.h */
32
33/**
34 * kernfs_root - find out the kernfs_root a sysfs_dirent belongs to
35 * @sd: sysfs_dirent of interest
36 *
37 * Return the kernfs_root @sd belongs to.
38 */
39static inline struct kernfs_root *kernfs_root(struct sysfs_dirent *sd)
40{
41 /* if parent exists, it's always a dir; otherwise, @sd is a dir */
42 if (sd->s_parent)
43 sd = sd->s_parent;
44 return sd->s_dir.root;
45}
46
47/*
48 * Context structure to be used while adding/removing nodes.
49 */
50struct sysfs_addrm_cxt {
51 struct sysfs_dirent *removed;
52};
53
54/*
55 * mount.c
56 */
57struct sysfs_super_info {
58 /*
59 * The root associated with this super_block. Each super_block is
60 * identified by the root and ns it's associated with.
61 */
62 struct kernfs_root *root;
63
64 /*
65 * Each sb is associated with one namespace tag, currently the network
66 * namespace of the task which mounted this sysfs instance. If multiple
67 * tags become necessary, make the following an array and compare
68 * sysfs_dirent tag against every entry.
69 */
70 const void *ns;
71};
72#define sysfs_info(SB) ((struct sysfs_super_info *)(SB->s_fs_info))
73
74extern struct kmem_cache *sysfs_dir_cachep;
75
76/*
77 * inode.c
78 */
79struct inode *sysfs_get_inode(struct super_block *sb, struct sysfs_dirent *sd);
80void sysfs_evict_inode(struct inode *inode);
81int sysfs_permission(struct inode *inode, int mask);
82int sysfs_setattr(struct dentry *dentry, struct iattr *iattr);
83int sysfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
84 struct kstat *stat);
85int sysfs_setxattr(struct dentry *dentry, const char *name, const void *value,
86 size_t size, int flags);
87int sysfs_removexattr(struct dentry *dentry, const char *name);
88ssize_t sysfs_getxattr(struct dentry *dentry, const char *name, void *buf,
89 size_t size);
90ssize_t sysfs_listxattr(struct dentry *dentry, char *buf, size_t size);
91void sysfs_inode_init(void);
92
93/*
94 * dir.c
95 */
96extern struct mutex sysfs_mutex;
97extern const struct dentry_operations sysfs_dentry_ops;
98extern const struct file_operations sysfs_dir_operations;
99extern const struct inode_operations sysfs_dir_inode_operations;
100
101struct sysfs_dirent *sysfs_get_active(struct sysfs_dirent *sd);
102void sysfs_put_active(struct sysfs_dirent *sd);
103void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt);
104int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd,
105 struct sysfs_dirent *parent_sd);
106void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt);
107struct sysfs_dirent *sysfs_new_dirent(struct kernfs_root *root,
108 const char *name, umode_t mode, int type);
109
110/*
111 * file.c
112 */
113extern const struct file_operations kernfs_file_operations;
114
115void sysfs_unmap_bin_file(struct sysfs_dirent *sd);
116
117/*
118 * symlink.c
119 */
120extern const struct inode_operations sysfs_symlink_inode_operations;
121
122#endif /* __KERNFS_INTERNAL_H */
diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c
new file mode 100644
index 000000000000..84c83e24bf25
--- /dev/null
+++ b/fs/kernfs/mount.c
@@ -0,0 +1,165 @@
1/*
2 * fs/kernfs/mount.c - kernfs mount implementation
3 *
4 * Copyright (c) 2001-3 Patrick Mochel
5 * Copyright (c) 2007 SUSE Linux Products GmbH
6 * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org>
7 *
8 * This file is released under the GPLv2.
9 */
10
11#include <linux/fs.h>
12#include <linux/mount.h>
13#include <linux/init.h>
14#include <linux/magic.h>
15#include <linux/slab.h>
16#include <linux/pagemap.h>
17
18#include "kernfs-internal.h"
19
20struct kmem_cache *sysfs_dir_cachep;
21
22static const struct super_operations sysfs_ops = {
23 .statfs = simple_statfs,
24 .drop_inode = generic_delete_inode,
25 .evict_inode = sysfs_evict_inode,
26};
27
28static int sysfs_fill_super(struct super_block *sb)
29{
30 struct sysfs_super_info *info = sysfs_info(sb);
31 struct inode *inode;
32 struct dentry *root;
33
34 sb->s_blocksize = PAGE_CACHE_SIZE;
35 sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
36 sb->s_magic = SYSFS_MAGIC;
37 sb->s_op = &sysfs_ops;
38 sb->s_time_gran = 1;
39
40 /* get root inode, initialize and unlock it */
41 mutex_lock(&sysfs_mutex);
42 inode = sysfs_get_inode(sb, info->root->sd);
43 mutex_unlock(&sysfs_mutex);
44 if (!inode) {
45 pr_debug("sysfs: could not get root inode\n");
46 return -ENOMEM;
47 }
48
49 /* instantiate and link root dentry */
50 root = d_make_root(inode);
51 if (!root) {
52 pr_debug("%s: could not get root dentry!\n", __func__);
53 return -ENOMEM;
54 }
55 kernfs_get(info->root->sd);
56 root->d_fsdata = info->root->sd;
57 sb->s_root = root;
58 sb->s_d_op = &sysfs_dentry_ops;
59 return 0;
60}
61
62static int sysfs_test_super(struct super_block *sb, void *data)
63{
64 struct sysfs_super_info *sb_info = sysfs_info(sb);
65 struct sysfs_super_info *info = data;
66
67 return sb_info->root == info->root && sb_info->ns == info->ns;
68}
69
70static int sysfs_set_super(struct super_block *sb, void *data)
71{
72 int error;
73 error = set_anon_super(sb, data);
74 if (!error)
75 sb->s_fs_info = data;
76 return error;
77}
78
79/**
80 * kernfs_super_ns - determine the namespace tag of a kernfs super_block
81 * @sb: super_block of interest
82 *
83 * Return the namespace tag associated with kernfs super_block @sb.
84 */
85const void *kernfs_super_ns(struct super_block *sb)
86{
87 struct sysfs_super_info *info = sysfs_info(sb);
88
89 return info->ns;
90}
91
92/**
93 * kernfs_mount_ns - kernfs mount helper
94 * @fs_type: file_system_type of the fs being mounted
95 * @flags: mount flags specified for the mount
96 * @root: kernfs_root of the hierarchy being mounted
97 * @ns: optional namespace tag of the mount
98 *
99 * This is to be called from each kernfs user's file_system_type->mount()
100 * implementation, which should pass through the specified @fs_type and
101 * @flags, and specify the hierarchy and namespace tag to mount via @root
102 * and @ns, respectively.
103 *
104 * The return value can be passed to the vfs layer verbatim.
105 */
106struct dentry *kernfs_mount_ns(struct file_system_type *fs_type, int flags,
107 struct kernfs_root *root, const void *ns)
108{
109 struct super_block *sb;
110 struct sysfs_super_info *info;
111 int error;
112
113 info = kzalloc(sizeof(*info), GFP_KERNEL);
114 if (!info)
115 return ERR_PTR(-ENOMEM);
116
117 info->root = root;
118 info->ns = ns;
119
120 sb = sget(fs_type, sysfs_test_super, sysfs_set_super, flags, info);
121 if (IS_ERR(sb) || sb->s_fs_info != info)
122 kfree(info);
123 if (IS_ERR(sb))
124 return ERR_CAST(sb);
125 if (!sb->s_root) {
126 error = sysfs_fill_super(sb);
127 if (error) {
128 deactivate_locked_super(sb);
129 return ERR_PTR(error);
130 }
131 sb->s_flags |= MS_ACTIVE;
132 }
133
134 return dget(sb->s_root);
135}
136
137/**
138 * kernfs_kill_sb - kill_sb for kernfs
139 * @sb: super_block being killed
140 *
141 * This can be used directly for file_system_type->kill_sb(). If a kernfs
142 * user needs extra cleanup, it can implement its own kill_sb() and call
143 * this function at the end.
144 */
145void kernfs_kill_sb(struct super_block *sb)
146{
147 struct sysfs_super_info *info = sysfs_info(sb);
148 struct sysfs_dirent *root_sd = sb->s_root->d_fsdata;
149
150 /*
151 * Remove the superblock from fs_supers/s_instances
152 * so we can't find it, before freeing sysfs_super_info.
153 */
154 kill_anon_super(sb);
155 kfree(info);
156 kernfs_put(root_sd);
157}
158
159void __init kernfs_init(void)
160{
161 sysfs_dir_cachep = kmem_cache_create("sysfs_dir_cache",
162 sizeof(struct sysfs_dirent),
163 0, SLAB_PANIC, NULL);
164 sysfs_inode_init();
165}
diff --git a/fs/kernfs/symlink.c b/fs/kernfs/symlink.c
new file mode 100644
index 000000000000..adf28755b0ee
--- /dev/null
+++ b/fs/kernfs/symlink.c
@@ -0,0 +1,152 @@
1/*
2 * fs/kernfs/symlink.c - kernfs symlink implementation
3 *
4 * Copyright (c) 2001-3 Patrick Mochel
5 * Copyright (c) 2007 SUSE Linux Products GmbH
6 * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org>
7 *
8 * This file is released under the GPLv2.
9 */
10
11#include <linux/fs.h>
12#include <linux/gfp.h>
13#include <linux/namei.h>
14
15#include "kernfs-internal.h"
16
17/**
18 * kernfs_create_link - create a symlink
19 * @parent: directory to create the symlink in
20 * @name: name of the symlink
21 * @target: target node for the symlink to point to
22 *
23 * Returns the created node on success, ERR_PTR() value on error.
24 */
25struct sysfs_dirent *kernfs_create_link(struct sysfs_dirent *parent,
26 const char *name,
27 struct sysfs_dirent *target)
28{
29 struct sysfs_dirent *sd;
30 struct sysfs_addrm_cxt acxt;
31 int error;
32
33 sd = sysfs_new_dirent(kernfs_root(parent), name, S_IFLNK|S_IRWXUGO,
34 SYSFS_KOBJ_LINK);
35 if (!sd)
36 return ERR_PTR(-ENOMEM);
37
38 if (kernfs_ns_enabled(parent))
39 sd->s_ns = target->s_ns;
40 sd->s_symlink.target_sd = target;
41 kernfs_get(target); /* ref owned by symlink */
42
43 sysfs_addrm_start(&acxt);
44 error = sysfs_add_one(&acxt, sd, parent);
45 sysfs_addrm_finish(&acxt);
46
47 if (!error)
48 return sd;
49
50 kernfs_put(sd);
51 return ERR_PTR(error);
52}
53
54static int sysfs_get_target_path(struct sysfs_dirent *parent_sd,
55 struct sysfs_dirent *target_sd, char *path)
56{
57 struct sysfs_dirent *base, *sd;
58 char *s = path;
59 int len = 0;
60
61 /* go up to the root, stop at the base */
62 base = parent_sd;
63 while (base->s_parent) {
64 sd = target_sd->s_parent;
65 while (sd->s_parent && base != sd)
66 sd = sd->s_parent;
67
68 if (base == sd)
69 break;
70
71 strcpy(s, "../");
72 s += 3;
73 base = base->s_parent;
74 }
75
76 /* determine end of target string for reverse fillup */
77 sd = target_sd;
78 while (sd->s_parent && sd != base) {
79 len += strlen(sd->s_name) + 1;
80 sd = sd->s_parent;
81 }
82
83 /* check limits */
84 if (len < 2)
85 return -EINVAL;
86 len--;
87 if ((s - path) + len > PATH_MAX)
88 return -ENAMETOOLONG;
89
90 /* reverse fillup of target string from target to base */
91 sd = target_sd;
92 while (sd->s_parent && sd != base) {
93 int slen = strlen(sd->s_name);
94
95 len -= slen;
96 strncpy(s + len, sd->s_name, slen);
97 if (len)
98 s[--len] = '/';
99
100 sd = sd->s_parent;
101 }
102
103 return 0;
104}
105
106static int sysfs_getlink(struct dentry *dentry, char *path)
107{
108 struct sysfs_dirent *sd = dentry->d_fsdata;
109 struct sysfs_dirent *parent_sd = sd->s_parent;
110 struct sysfs_dirent *target_sd = sd->s_symlink.target_sd;
111 int error;
112
113 mutex_lock(&sysfs_mutex);
114 error = sysfs_get_target_path(parent_sd, target_sd, path);
115 mutex_unlock(&sysfs_mutex);
116
117 return error;
118}
119
120static void *sysfs_follow_link(struct dentry *dentry, struct nameidata *nd)
121{
122 int error = -ENOMEM;
123 unsigned long page = get_zeroed_page(GFP_KERNEL);
124 if (page) {
125 error = sysfs_getlink(dentry, (char *) page);
126 if (error < 0)
127 free_page((unsigned long)page);
128 }
129 nd_set_link(nd, error ? ERR_PTR(error) : (char *)page);
130 return NULL;
131}
132
133static void sysfs_put_link(struct dentry *dentry, struct nameidata *nd,
134 void *cookie)
135{
136 char *page = nd_get_link(nd);
137 if (!IS_ERR(page))
138 free_page((unsigned long)page);
139}
140
141const struct inode_operations sysfs_symlink_inode_operations = {
142 .setxattr = sysfs_setxattr,
143 .removexattr = sysfs_removexattr,
144 .getxattr = sysfs_getxattr,
145 .listxattr = sysfs_listxattr,
146 .readlink = generic_readlink,
147 .follow_link = sysfs_follow_link,
148 .put_link = sysfs_put_link,
149 .setattr = sysfs_setattr,
150 .getattr = sysfs_getattr,
151 .permission = sysfs_permission,
152};
diff --git a/fs/namespace.c b/fs/namespace.c
index ac2ce8a766e1..a511ea003f89 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2790,6 +2790,8 @@ void __init mnt_init(void)
2790 for (u = 0; u < HASH_SIZE; u++) 2790 for (u = 0; u < HASH_SIZE; u++)
2791 INIT_LIST_HEAD(&mountpoint_hashtable[u]); 2791 INIT_LIST_HEAD(&mountpoint_hashtable[u]);
2792 2792
2793 kernfs_init();
2794
2793 err = sysfs_init(); 2795 err = sysfs_init();
2794 if (err) 2796 if (err)
2795 printk(KERN_WARNING "%s: sysfs_init error: %d\n", 2797 printk(KERN_WARNING "%s: sysfs_init error: %d\n",
diff --git a/fs/sysfs/Makefile b/fs/sysfs/Makefile
index 8876ac183373..6eff6e1205a5 100644
--- a/fs/sysfs/Makefile
+++ b/fs/sysfs/Makefile
@@ -2,4 +2,4 @@
2# Makefile for the sysfs virtual filesystem 2# Makefile for the sysfs virtual filesystem
3# 3#
4 4
5obj-y := inode.o file.o dir.o symlink.o mount.o group.o 5obj-y := file.o dir.o symlink.o mount.o group.o
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 5e73d6626e50..2fea501889e7 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -13,446 +13,12 @@
13#undef DEBUG 13#undef DEBUG
14 14
15#include <linux/fs.h> 15#include <linux/fs.h>
16#include <linux/mount.h>
17#include <linux/module.h>
18#include <linux/kobject.h> 16#include <linux/kobject.h>
19#include <linux/namei.h>
20#include <linux/idr.h>
21#include <linux/completion.h>
22#include <linux/mutex.h>
23#include <linux/slab.h> 17#include <linux/slab.h>
24#include <linux/security.h>
25#include <linux/hash.h>
26#include "sysfs.h" 18#include "sysfs.h"
27 19
28DEFINE_MUTEX(sysfs_mutex);
29DEFINE_SPINLOCK(sysfs_symlink_target_lock); 20DEFINE_SPINLOCK(sysfs_symlink_target_lock);
30 21
31#define to_sysfs_dirent(X) rb_entry((X), struct sysfs_dirent, s_rb)
32
33static DEFINE_SPINLOCK(sysfs_ino_lock);
34static DEFINE_IDA(sysfs_ino_ida);
35
36/**
37 * sysfs_name_hash
38 * @name: Null terminated string to hash
39 * @ns: Namespace tag to hash
40 *
41 * Returns 31 bit hash of ns + name (so it fits in an off_t )
42 */
43static unsigned int sysfs_name_hash(const char *name, const void *ns)
44{
45 unsigned long hash = init_name_hash();
46 unsigned int len = strlen(name);
47 while (len--)
48 hash = partial_name_hash(*name++, hash);
49 hash = (end_name_hash(hash) ^ hash_ptr((void *)ns, 31));
50 hash &= 0x7fffffffU;
51 /* Reserve hash numbers 0, 1 and INT_MAX for magic directory entries */
52 if (hash < 1)
53 hash += 2;
54 if (hash >= INT_MAX)
55 hash = INT_MAX - 1;
56 return hash;
57}
58
59static int sysfs_name_compare(unsigned int hash, const char *name,
60 const void *ns, const struct sysfs_dirent *sd)
61{
62 if (hash != sd->s_hash)
63 return hash - sd->s_hash;
64 if (ns != sd->s_ns)
65 return ns - sd->s_ns;
66 return strcmp(name, sd->s_name);
67}
68
69static int sysfs_sd_compare(const struct sysfs_dirent *left,
70 const struct sysfs_dirent *right)
71{
72 return sysfs_name_compare(left->s_hash, left->s_name, left->s_ns,
73 right);
74}
75
76/**
77 * sysfs_link_sibling - link sysfs_dirent into sibling rbtree
78 * @sd: sysfs_dirent of interest
79 *
80 * Link @sd into its sibling rbtree which starts from
81 * sd->s_parent->s_dir.children.
82 *
83 * Locking:
84 * mutex_lock(sysfs_mutex)
85 *
86 * RETURNS:
87 * 0 on susccess -EEXIST on failure.
88 */
89static int sysfs_link_sibling(struct sysfs_dirent *sd)
90{
91 struct rb_node **node = &sd->s_parent->s_dir.children.rb_node;
92 struct rb_node *parent = NULL;
93
94 if (sysfs_type(sd) == SYSFS_DIR)
95 sd->s_parent->s_dir.subdirs++;
96
97 while (*node) {
98 struct sysfs_dirent *pos;
99 int result;
100
101 pos = to_sysfs_dirent(*node);
102 parent = *node;
103 result = sysfs_sd_compare(sd, pos);
104 if (result < 0)
105 node = &pos->s_rb.rb_left;
106 else if (result > 0)
107 node = &pos->s_rb.rb_right;
108 else
109 return -EEXIST;
110 }
111 /* add new node and rebalance the tree */
112 rb_link_node(&sd->s_rb, parent, node);
113 rb_insert_color(&sd->s_rb, &sd->s_parent->s_dir.children);
114 return 0;
115}
116
117/**
118 * sysfs_unlink_sibling - unlink sysfs_dirent from sibling rbtree
119 * @sd: sysfs_dirent of interest
120 *
121 * Unlink @sd from its sibling rbtree which starts from
122 * sd->s_parent->s_dir.children.
123 *
124 * Locking:
125 * mutex_lock(sysfs_mutex)
126 */
127static void sysfs_unlink_sibling(struct sysfs_dirent *sd)
128{
129 if (sysfs_type(sd) == SYSFS_DIR)
130 sd->s_parent->s_dir.subdirs--;
131
132 rb_erase(&sd->s_rb, &sd->s_parent->s_dir.children);
133}
134
135/**
136 * sysfs_get_active - get an active reference to sysfs_dirent
137 * @sd: sysfs_dirent to get an active reference to
138 *
139 * Get an active reference of @sd. This function is noop if @sd
140 * is NULL.
141 *
142 * RETURNS:
143 * Pointer to @sd on success, NULL on failure.
144 */
145struct sysfs_dirent *sysfs_get_active(struct sysfs_dirent *sd)
146{
147 if (unlikely(!sd))
148 return NULL;
149
150 if (!atomic_inc_unless_negative(&sd->s_active))
151 return NULL;
152
153 if (likely(!sysfs_ignore_lockdep(sd)))
154 rwsem_acquire_read(&sd->dep_map, 0, 1, _RET_IP_);
155 return sd;
156}
157
158/**
159 * sysfs_put_active - put an active reference to sysfs_dirent
160 * @sd: sysfs_dirent to put an active reference to
161 *
162 * Put an active reference to @sd. This function is noop if @sd
163 * is NULL.
164 */
165void sysfs_put_active(struct sysfs_dirent *sd)
166{
167 int v;
168
169 if (unlikely(!sd))
170 return;
171
172 if (likely(!sysfs_ignore_lockdep(sd)))
173 rwsem_release(&sd->dep_map, 1, _RET_IP_);
174 v = atomic_dec_return(&sd->s_active);
175 if (likely(v != SD_DEACTIVATED_BIAS))
176 return;
177
178 /* atomic_dec_return() is a mb(), we'll always see the updated
179 * sd->u.completion.
180 */
181 complete(sd->u.completion);
182}
183
184/**
185 * sysfs_deactivate - deactivate sysfs_dirent
186 * @sd: sysfs_dirent to deactivate
187 *
188 * Deny new active references and drain existing ones.
189 */
190static void sysfs_deactivate(struct sysfs_dirent *sd)
191{
192 DECLARE_COMPLETION_ONSTACK(wait);
193 int v;
194
195 BUG_ON(!(sd->s_flags & SYSFS_FLAG_REMOVED));
196
197 if (!(sysfs_type(sd) & SYSFS_ACTIVE_REF))
198 return;
199
200 sd->u.completion = (void *)&wait;
201
202 rwsem_acquire(&sd->dep_map, 0, 0, _RET_IP_);
203 /* atomic_add_return() is a mb(), put_active() will always see
204 * the updated sd->u.completion.
205 */
206 v = atomic_add_return(SD_DEACTIVATED_BIAS, &sd->s_active);
207
208 if (v != SD_DEACTIVATED_BIAS) {
209 lock_contended(&sd->dep_map, _RET_IP_);
210 wait_for_completion(&wait);
211 }
212
213 lock_acquired(&sd->dep_map, _RET_IP_);
214 rwsem_release(&sd->dep_map, 1, _RET_IP_);
215}
216
217static int sysfs_alloc_ino(unsigned int *pino)
218{
219 int ino, rc;
220
221 retry:
222 spin_lock(&sysfs_ino_lock);
223 rc = ida_get_new_above(&sysfs_ino_ida, 2, &ino);
224 spin_unlock(&sysfs_ino_lock);
225
226 if (rc == -EAGAIN) {
227 if (ida_pre_get(&sysfs_ino_ida, GFP_KERNEL))
228 goto retry;
229 rc = -ENOMEM;
230 }
231
232 *pino = ino;
233 return rc;
234}
235
236static void sysfs_free_ino(unsigned int ino)
237{
238 spin_lock(&sysfs_ino_lock);
239 ida_remove(&sysfs_ino_ida, ino);
240 spin_unlock(&sysfs_ino_lock);
241}
242
243void release_sysfs_dirent(struct sysfs_dirent *sd)
244{
245 struct sysfs_dirent *parent_sd;
246
247 repeat:
248 /* Moving/renaming is always done while holding reference.
249 * sd->s_parent won't change beneath us.
250 */
251 parent_sd = sd->s_parent;
252
253 WARN(!(sd->s_flags & SYSFS_FLAG_REMOVED),
254 "sysfs: free using entry: %s/%s\n",
255 parent_sd ? parent_sd->s_name : "", sd->s_name);
256
257 if (sysfs_type(sd) == SYSFS_KOBJ_LINK)
258 sysfs_put(sd->s_symlink.target_sd);
259 if (sysfs_type(sd) & SYSFS_COPY_NAME)
260 kfree(sd->s_name);
261 if (sd->s_iattr && sd->s_iattr->ia_secdata)
262 security_release_secctx(sd->s_iattr->ia_secdata,
263 sd->s_iattr->ia_secdata_len);
264 kfree(sd->s_iattr);
265 sysfs_free_ino(sd->s_ino);
266 kmem_cache_free(sysfs_dir_cachep, sd);
267
268 sd = parent_sd;
269 if (sd && atomic_dec_and_test(&sd->s_count))
270 goto repeat;
271}
272
273static int sysfs_dentry_delete(const struct dentry *dentry)
274{
275 struct sysfs_dirent *sd = dentry->d_fsdata;
276 return !(sd && !(sd->s_flags & SYSFS_FLAG_REMOVED));
277}
278
279static int sysfs_dentry_revalidate(struct dentry *dentry, unsigned int flags)
280{
281 struct sysfs_dirent *sd;
282 int type;
283
284 if (flags & LOOKUP_RCU)
285 return -ECHILD;
286
287 sd = dentry->d_fsdata;
288 mutex_lock(&sysfs_mutex);
289
290 /* The sysfs dirent has been deleted */
291 if (sd->s_flags & SYSFS_FLAG_REMOVED)
292 goto out_bad;
293
294 /* The sysfs dirent has been moved? */
295 if (dentry->d_parent->d_fsdata != sd->s_parent)
296 goto out_bad;
297
298 /* The sysfs dirent has been renamed */
299 if (strcmp(dentry->d_name.name, sd->s_name) != 0)
300 goto out_bad;
301
302 /* The sysfs dirent has been moved to a different namespace */
303 type = KOBJ_NS_TYPE_NONE;
304 if (sd->s_parent) {
305 type = sysfs_ns_type(sd->s_parent);
306 if (type != KOBJ_NS_TYPE_NONE &&
307 sysfs_info(dentry->d_sb)->ns[type] != sd->s_ns)
308 goto out_bad;
309 }
310
311 mutex_unlock(&sysfs_mutex);
312out_valid:
313 return 1;
314out_bad:
315 /* Remove the dentry from the dcache hashes.
316 * If this is a deleted dentry we use d_drop instead of d_delete
317 * so sysfs doesn't need to cope with negative dentries.
318 *
319 * If this is a dentry that has simply been renamed we
320 * use d_drop to remove it from the dcache lookup on its
321 * old parent. If this dentry persists later when a lookup
322 * is performed at its new name the dentry will be readded
323 * to the dcache hashes.
324 */
325 mutex_unlock(&sysfs_mutex);
326
327 /* If we have submounts we must allow the vfs caches
328 * to lie about the state of the filesystem to prevent
329 * leaks and other nasty things.
330 */
331 if (check_submounts_and_drop(dentry) != 0)
332 goto out_valid;
333
334 return 0;
335}
336
337static void sysfs_dentry_release(struct dentry *dentry)
338{
339 sysfs_put(dentry->d_fsdata);
340}
341
342const struct dentry_operations sysfs_dentry_ops = {
343 .d_revalidate = sysfs_dentry_revalidate,
344 .d_delete = sysfs_dentry_delete,
345 .d_release = sysfs_dentry_release,
346};
347
348struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type)
349{
350 char *dup_name = NULL;
351 struct sysfs_dirent *sd;
352
353 if (type & SYSFS_COPY_NAME) {
354 name = dup_name = kstrdup(name, GFP_KERNEL);
355 if (!name)
356 return NULL;
357 }
358
359 sd = kmem_cache_zalloc(sysfs_dir_cachep, GFP_KERNEL);
360 if (!sd)
361 goto err_out1;
362
363 if (sysfs_alloc_ino(&sd->s_ino))
364 goto err_out2;
365
366 atomic_set(&sd->s_count, 1);
367 atomic_set(&sd->s_active, 0);
368
369 sd->s_name = name;
370 sd->s_mode = mode;
371 sd->s_flags = type | SYSFS_FLAG_REMOVED;
372
373 return sd;
374
375 err_out2:
376 kmem_cache_free(sysfs_dir_cachep, sd);
377 err_out1:
378 kfree(dup_name);
379 return NULL;
380}
381
382/**
383 * sysfs_addrm_start - prepare for sysfs_dirent add/remove
384 * @acxt: pointer to sysfs_addrm_cxt to be used
385 *
386 * This function is called when the caller is about to add or remove
387 * sysfs_dirent. This function acquires sysfs_mutex. @acxt is used
388 * to keep and pass context to other addrm functions.
389 *
390 * LOCKING:
391 * Kernel thread context (may sleep). sysfs_mutex is locked on
392 * return.
393 */
394void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt)
395 __acquires(sysfs_mutex)
396{
397 memset(acxt, 0, sizeof(*acxt));
398
399 mutex_lock(&sysfs_mutex);
400}
401
402/**
403 * __sysfs_add_one - add sysfs_dirent to parent without warning
404 * @acxt: addrm context to use
405 * @sd: sysfs_dirent to be added
406 * @parent_sd: the parent sysfs_dirent to add @sd to
407 *
408 * Get @parent_sd and set @sd->s_parent to it and increment nlink of
409 * the parent inode if @sd is a directory and link into the children
410 * list of the parent.
411 *
412 * This function should be called between calls to
413 * sysfs_addrm_start() and sysfs_addrm_finish() and should be
414 * passed the same @acxt as passed to sysfs_addrm_start().
415 *
416 * LOCKING:
417 * Determined by sysfs_addrm_start().
418 *
419 * RETURNS:
420 * 0 on success, -EEXIST if entry with the given name already
421 * exists.
422 */
423int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd,
424 struct sysfs_dirent *parent_sd)
425{
426 struct sysfs_inode_attrs *ps_iattr;
427 int ret;
428
429 if (!!sysfs_ns_type(parent_sd) != !!sd->s_ns) {
430 WARN(1, KERN_WARNING "sysfs: ns %s in '%s' for '%s'\n",
431 sysfs_ns_type(parent_sd) ? "required" : "invalid",
432 parent_sd->s_name, sd->s_name);
433 return -EINVAL;
434 }
435
436 sd->s_hash = sysfs_name_hash(sd->s_name, sd->s_ns);
437 sd->s_parent = sysfs_get(parent_sd);
438
439 ret = sysfs_link_sibling(sd);
440 if (ret)
441 return ret;
442
443 /* Update timestamps on the parent */
444 ps_iattr = parent_sd->s_iattr;
445 if (ps_iattr) {
446 struct iattr *ps_iattrs = &ps_iattr->ia_iattr;
447 ps_iattrs->ia_ctime = ps_iattrs->ia_mtime = CURRENT_TIME;
448 }
449
450 /* Mark the entry added into directory tree */
451 sd->s_flags &= ~SYSFS_FLAG_REMOVED;
452
453 return 0;
454}
455
456/** 22/**
457 * sysfs_pathname - return full path to sysfs dirent 23 * sysfs_pathname - return full path to sysfs dirent
458 * @sd: sysfs_dirent whose path we want 24 * @sd: sysfs_dirent whose path we want
@@ -489,445 +55,33 @@ void sysfs_warn_dup(struct sysfs_dirent *parent, const char *name)
489} 55}
490 56
491/** 57/**
492 * sysfs_add_one - add sysfs_dirent to parent
493 * @acxt: addrm context to use
494 * @sd: sysfs_dirent to be added
495 * @parent_sd: the parent sysfs_dirent to add @sd to
496 *
497 * Get @parent_sd and set @sd->s_parent to it and increment nlink of
498 * the parent inode if @sd is a directory and link into the children
499 * list of the parent.
500 *
501 * This function should be called between calls to
502 * sysfs_addrm_start() and sysfs_addrm_finish() and should be
503 * passed the same @acxt as passed to sysfs_addrm_start().
504 *
505 * LOCKING:
506 * Determined by sysfs_addrm_start().
507 *
508 * RETURNS:
509 * 0 on success, -EEXIST if entry with the given name already
510 * exists.
511 */
512int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd,
513 struct sysfs_dirent *parent_sd)
514{
515 int ret;
516
517 ret = __sysfs_add_one(acxt, sd, parent_sd);
518
519 if (ret == -EEXIST)
520 sysfs_warn_dup(parent_sd, sd->s_name);
521 return ret;
522}
523
524/**
525 * sysfs_remove_one - remove sysfs_dirent from parent
526 * @acxt: addrm context to use
527 * @sd: sysfs_dirent to be removed
528 *
529 * Mark @sd removed and drop nlink of parent inode if @sd is a
530 * directory. @sd is unlinked from the children list.
531 *
532 * This function should be called between calls to
533 * sysfs_addrm_start() and sysfs_addrm_finish() and should be
534 * passed the same @acxt as passed to sysfs_addrm_start().
535 *
536 * LOCKING:
537 * Determined by sysfs_addrm_start().
538 */
539static void sysfs_remove_one(struct sysfs_addrm_cxt *acxt,
540 struct sysfs_dirent *sd)
541{
542 struct sysfs_inode_attrs *ps_iattr;
543
544 /*
545 * Removal can be called multiple times on the same node. Only the
546 * first invocation is effective and puts the base ref.
547 */
548 if (sd->s_flags & SYSFS_FLAG_REMOVED)
549 return;
550
551 sysfs_unlink_sibling(sd);
552
553 /* Update timestamps on the parent */
554 ps_iattr = sd->s_parent->s_iattr;
555 if (ps_iattr) {
556 struct iattr *ps_iattrs = &ps_iattr->ia_iattr;
557 ps_iattrs->ia_ctime = ps_iattrs->ia_mtime = CURRENT_TIME;
558 }
559
560 sd->s_flags |= SYSFS_FLAG_REMOVED;
561 sd->u.removed_list = acxt->removed;
562 acxt->removed = sd;
563}
564
565/**
566 * sysfs_addrm_finish - finish up sysfs_dirent add/remove
567 * @acxt: addrm context to finish up
568 *
569 * Finish up sysfs_dirent add/remove. Resources acquired by
570 * sysfs_addrm_start() are released and removed sysfs_dirents are
571 * cleaned up.
572 *
573 * LOCKING:
574 * sysfs_mutex is released.
575 */
576void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt)
577 __releases(sysfs_mutex)
578{
579 /* release resources acquired by sysfs_addrm_start() */
580 mutex_unlock(&sysfs_mutex);
581
582 /* kill removed sysfs_dirents */
583 while (acxt->removed) {
584 struct sysfs_dirent *sd = acxt->removed;
585
586 acxt->removed = sd->u.removed_list;
587
588 sysfs_deactivate(sd);
589 sysfs_unmap_bin_file(sd);
590 sysfs_put(sd);
591 }
592}
593
594/**
595 * sysfs_find_dirent - find sysfs_dirent with the given name
596 * @parent_sd: sysfs_dirent to search under
597 * @name: name to look for
598 * @ns: the namespace tag to use
599 *
600 * Look for sysfs_dirent with name @name under @parent_sd.
601 *
602 * LOCKING:
603 * mutex_lock(sysfs_mutex)
604 *
605 * RETURNS:
606 * Pointer to sysfs_dirent if found, NULL if not.
607 */
608struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
609 const unsigned char *name,
610 const void *ns)
611{
612 struct rb_node *node = parent_sd->s_dir.children.rb_node;
613 unsigned int hash;
614
615 if (!!sysfs_ns_type(parent_sd) != !!ns) {
616 WARN(1, KERN_WARNING "sysfs: ns %s in '%s' for '%s'\n",
617 sysfs_ns_type(parent_sd) ? "required" : "invalid",
618 parent_sd->s_name, name);
619 return NULL;
620 }
621
622 hash = sysfs_name_hash(name, ns);
623 while (node) {
624 struct sysfs_dirent *sd;
625 int result;
626
627 sd = to_sysfs_dirent(node);
628 result = sysfs_name_compare(hash, name, ns, sd);
629 if (result < 0)
630 node = node->rb_left;
631 else if (result > 0)
632 node = node->rb_right;
633 else
634 return sd;
635 }
636 return NULL;
637}
638
639/**
640 * sysfs_get_dirent_ns - find and get sysfs_dirent with the given name
641 * @parent_sd: sysfs_dirent to search under
642 * @name: name to look for
643 * @ns: the namespace tag to use
644 *
645 * Look for sysfs_dirent with name @name under @parent_sd and get
646 * it if found.
647 *
648 * LOCKING:
649 * Kernel thread context (may sleep). Grabs sysfs_mutex.
650 *
651 * RETURNS:
652 * Pointer to sysfs_dirent if found, NULL if not.
653 */
654struct sysfs_dirent *sysfs_get_dirent_ns(struct sysfs_dirent *parent_sd,
655 const unsigned char *name,
656 const void *ns)
657{
658 struct sysfs_dirent *sd;
659
660 mutex_lock(&sysfs_mutex);
661 sd = sysfs_find_dirent(parent_sd, name, ns);
662 sysfs_get(sd);
663 mutex_unlock(&sysfs_mutex);
664
665 return sd;
666}
667EXPORT_SYMBOL_GPL(sysfs_get_dirent_ns);
668
669static int create_dir(struct kobject *kobj, struct sysfs_dirent *parent_sd,
670 enum kobj_ns_type type,
671 const char *name, const void *ns,
672 struct sysfs_dirent **p_sd)
673{
674 umode_t mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO;
675 struct sysfs_addrm_cxt acxt;
676 struct sysfs_dirent *sd;
677 int rc;
678
679 /* allocate */
680 sd = sysfs_new_dirent(name, mode, SYSFS_DIR);
681 if (!sd)
682 return -ENOMEM;
683
684 sd->s_flags |= (type << SYSFS_NS_TYPE_SHIFT);
685 sd->s_ns = ns;
686 sd->s_dir.kobj = kobj;
687
688 /* link in */
689 sysfs_addrm_start(&acxt);
690 rc = sysfs_add_one(&acxt, sd, parent_sd);
691 sysfs_addrm_finish(&acxt);
692
693 if (rc == 0)
694 *p_sd = sd;
695 else
696 sysfs_put(sd);
697
698 return rc;
699}
700
701int sysfs_create_subdir(struct kobject *kobj, const char *name,
702 struct sysfs_dirent **p_sd)
703{
704 return create_dir(kobj, kobj->sd,
705 KOBJ_NS_TYPE_NONE, name, NULL, p_sd);
706}
707
708/**
709 * sysfs_read_ns_type: return associated ns_type
710 * @kobj: the kobject being queried
711 *
712 * Each kobject can be tagged with exactly one namespace type
713 * (i.e. network or user). Return the ns_type associated with
714 * this object if any
715 */
716static enum kobj_ns_type sysfs_read_ns_type(struct kobject *kobj)
717{
718 const struct kobj_ns_type_operations *ops;
719 enum kobj_ns_type type;
720
721 ops = kobj_child_ns_ops(kobj);
722 if (!ops)
723 return KOBJ_NS_TYPE_NONE;
724
725 type = ops->type;
726 BUG_ON(type <= KOBJ_NS_TYPE_NONE);
727 BUG_ON(type >= KOBJ_NS_TYPES);
728 BUG_ON(!kobj_ns_type_registered(type));
729
730 return type;
731}
732
733/**
734 * sysfs_create_dir_ns - create a directory for an object with a namespace tag 58 * sysfs_create_dir_ns - create a directory for an object with a namespace tag
735 * @kobj: object we're creating directory for 59 * @kobj: object we're creating directory for
736 * @ns: the namespace tag to use 60 * @ns: the namespace tag to use
737 */ 61 */
738int sysfs_create_dir_ns(struct kobject *kobj, const void *ns) 62int sysfs_create_dir_ns(struct kobject *kobj, const void *ns)
739{ 63{
740 enum kobj_ns_type type;
741 struct sysfs_dirent *parent_sd, *sd; 64 struct sysfs_dirent *parent_sd, *sd;
742 int error = 0;
743 65
744 BUG_ON(!kobj); 66 BUG_ON(!kobj);
745 67
746 if (kobj->parent) 68 if (kobj->parent)
747 parent_sd = kobj->parent->sd; 69 parent_sd = kobj->parent->sd;
748 else 70 else
749 parent_sd = &sysfs_root; 71 parent_sd = sysfs_root_sd;
750 72
751 if (!parent_sd) 73 if (!parent_sd)
752 return -ENOENT; 74 return -ENOENT;
753 75
754 type = sysfs_read_ns_type(kobj); 76 sd = kernfs_create_dir_ns(parent_sd, kobject_name(kobj), kobj, ns);
755 77 if (IS_ERR(sd)) {
756 error = create_dir(kobj, parent_sd, type, kobject_name(kobj), ns, &sd); 78 if (PTR_ERR(sd) == -EEXIST)
757 if (!error) 79 sysfs_warn_dup(parent_sd, kobject_name(kobj));
758 kobj->sd = sd; 80 return PTR_ERR(sd);
759 return error;
760}
761
762static struct dentry *sysfs_lookup(struct inode *dir, struct dentry *dentry,
763 unsigned int flags)
764{
765 struct dentry *ret = NULL;
766 struct dentry *parent = dentry->d_parent;
767 struct sysfs_dirent *parent_sd = parent->d_fsdata;
768 struct sysfs_dirent *sd;
769 struct inode *inode;
770 enum kobj_ns_type type;
771 const void *ns;
772
773 mutex_lock(&sysfs_mutex);
774
775 type = sysfs_ns_type(parent_sd);
776 ns = sysfs_info(dir->i_sb)->ns[type];
777
778 sd = sysfs_find_dirent(parent_sd, dentry->d_name.name, ns);
779
780 /* no such entry */
781 if (!sd) {
782 ret = ERR_PTR(-ENOENT);
783 goto out_unlock;
784 }
785 dentry->d_fsdata = sysfs_get(sd);
786
787 /* attach dentry and inode */
788 inode = sysfs_get_inode(dir->i_sb, sd);
789 if (!inode) {
790 ret = ERR_PTR(-ENOMEM);
791 goto out_unlock;
792 } 81 }
793 82
794 /* instantiate and hash dentry */ 83 kobj->sd = sd;
795 ret = d_materialise_unique(dentry, inode); 84 return 0;
796 out_unlock:
797 mutex_unlock(&sysfs_mutex);
798 return ret;
799}
800
801const struct inode_operations sysfs_dir_inode_operations = {
802 .lookup = sysfs_lookup,
803 .permission = sysfs_permission,
804 .setattr = sysfs_setattr,
805 .getattr = sysfs_getattr,
806 .setxattr = sysfs_setxattr,
807};
808
809static struct sysfs_dirent *sysfs_leftmost_descendant(struct sysfs_dirent *pos)
810{
811 struct sysfs_dirent *last;
812
813 while (true) {
814 struct rb_node *rbn;
815
816 last = pos;
817
818 if (sysfs_type(pos) != SYSFS_DIR)
819 break;
820
821 rbn = rb_first(&pos->s_dir.children);
822 if (!rbn)
823 break;
824
825 pos = to_sysfs_dirent(rbn);
826 }
827
828 return last;
829}
830
831/**
832 * sysfs_next_descendant_post - find the next descendant for post-order walk
833 * @pos: the current position (%NULL to initiate traversal)
834 * @root: sysfs_dirent whose descendants to walk
835 *
836 * Find the next descendant to visit for post-order traversal of @root's
837 * descendants. @root is included in the iteration and the last node to be
838 * visited.
839 */
840static struct sysfs_dirent *sysfs_next_descendant_post(struct sysfs_dirent *pos,
841 struct sysfs_dirent *root)
842{
843 struct rb_node *rbn;
844
845 lockdep_assert_held(&sysfs_mutex);
846
847 /* if first iteration, visit leftmost descendant which may be root */
848 if (!pos)
849 return sysfs_leftmost_descendant(root);
850
851 /* if we visited @root, we're done */
852 if (pos == root)
853 return NULL;
854
855 /* if there's an unvisited sibling, visit its leftmost descendant */
856 rbn = rb_next(&pos->s_rb);
857 if (rbn)
858 return sysfs_leftmost_descendant(to_sysfs_dirent(rbn));
859
860 /* no sibling left, visit parent */
861 return pos->s_parent;
862}
863
864static void __sysfs_remove(struct sysfs_addrm_cxt *acxt,
865 struct sysfs_dirent *sd)
866{
867 struct sysfs_dirent *pos, *next;
868
869 if (!sd)
870 return;
871
872 pr_debug("sysfs %s: removing\n", sd->s_name);
873
874 next = NULL;
875 do {
876 pos = next;
877 next = sysfs_next_descendant_post(pos, sd);
878 if (pos)
879 sysfs_remove_one(acxt, pos);
880 } while (next);
881}
882
883/**
884 * sysfs_remove - remove a sysfs_dirent recursively
885 * @sd: the sysfs_dirent to remove
886 *
887 * Remove @sd along with all its subdirectories and files.
888 */
889void sysfs_remove(struct sysfs_dirent *sd)
890{
891 struct sysfs_addrm_cxt acxt;
892
893 sysfs_addrm_start(&acxt);
894 __sysfs_remove(&acxt, sd);
895 sysfs_addrm_finish(&acxt);
896}
897
898/**
899 * sysfs_hash_and_remove - find a sysfs_dirent by name and remove it
900 * @dir_sd: parent of the target
901 * @name: name of the sysfs_dirent to remove
902 * @ns: namespace tag of the sysfs_dirent to remove
903 *
904 * Look for the sysfs_dirent with @name and @ns under @dir_sd and remove
905 * it. Returns 0 on success, -ENOENT if such entry doesn't exist.
906 */
907int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name,
908 const void *ns)
909{
910 struct sysfs_addrm_cxt acxt;
911 struct sysfs_dirent *sd;
912
913 if (!dir_sd) {
914 WARN(1, KERN_WARNING "sysfs: can not remove '%s', no directory\n",
915 name);
916 return -ENOENT;
917 }
918
919 sysfs_addrm_start(&acxt);
920
921 sd = sysfs_find_dirent(dir_sd, name, ns);
922 if (sd)
923 __sysfs_remove(&acxt, sd);
924
925 sysfs_addrm_finish(&acxt);
926
927 if (sd)
928 return 0;
929 else
930 return -ENOENT;
931} 85}
932 86
933/** 87/**
@@ -960,60 +114,16 @@ void sysfs_remove_dir(struct kobject *kobj)
960 114
961 if (sd) { 115 if (sd) {
962 WARN_ON_ONCE(sysfs_type(sd) != SYSFS_DIR); 116 WARN_ON_ONCE(sysfs_type(sd) != SYSFS_DIR);
963 sysfs_remove(sd); 117 kernfs_remove(sd);
964 } 118 }
965} 119}
966 120
967int sysfs_rename(struct sysfs_dirent *sd, struct sysfs_dirent *new_parent_sd,
968 const char *new_name, const void *new_ns)
969{
970 int error;
971
972 mutex_lock(&sysfs_mutex);
973
974 error = 0;
975 if ((sd->s_parent == new_parent_sd) && (sd->s_ns == new_ns) &&
976 (strcmp(sd->s_name, new_name) == 0))
977 goto out; /* nothing to rename */
978
979 error = -EEXIST;
980 if (sysfs_find_dirent(new_parent_sd, new_name, new_ns))
981 goto out;
982
983 /* rename sysfs_dirent */
984 if (strcmp(sd->s_name, new_name) != 0) {
985 error = -ENOMEM;
986 new_name = kstrdup(new_name, GFP_KERNEL);
987 if (!new_name)
988 goto out;
989
990 kfree(sd->s_name);
991 sd->s_name = new_name;
992 }
993
994 /*
995 * Move to the appropriate place in the appropriate directories rbtree.
996 */
997 sysfs_unlink_sibling(sd);
998 sysfs_get(new_parent_sd);
999 sysfs_put(sd->s_parent);
1000 sd->s_ns = new_ns;
1001 sd->s_hash = sysfs_name_hash(sd->s_name, sd->s_ns);
1002 sd->s_parent = new_parent_sd;
1003 sysfs_link_sibling(sd);
1004
1005 error = 0;
1006 out:
1007 mutex_unlock(&sysfs_mutex);
1008 return error;
1009}
1010
1011int sysfs_rename_dir_ns(struct kobject *kobj, const char *new_name, 121int sysfs_rename_dir_ns(struct kobject *kobj, const char *new_name,
1012 const void *new_ns) 122 const void *new_ns)
1013{ 123{
1014 struct sysfs_dirent *parent_sd = kobj->sd->s_parent; 124 struct sysfs_dirent *parent_sd = kobj->sd->s_parent;
1015 125
1016 return sysfs_rename(kobj->sd, parent_sd, new_name, new_ns); 126 return kernfs_rename_ns(kobj->sd, parent_sd, new_name, new_ns);
1017} 127}
1018 128
1019int sysfs_move_dir_ns(struct kobject *kobj, struct kobject *new_parent_kobj, 129int sysfs_move_dir_ns(struct kobject *kobj, struct kobject *new_parent_kobj,
@@ -1024,123 +134,7 @@ int sysfs_move_dir_ns(struct kobject *kobj, struct kobject *new_parent_kobj,
1024 134
1025 BUG_ON(!sd->s_parent); 135 BUG_ON(!sd->s_parent);
1026 new_parent_sd = new_parent_kobj && new_parent_kobj->sd ? 136 new_parent_sd = new_parent_kobj && new_parent_kobj->sd ?
1027 new_parent_kobj->sd : &sysfs_root; 137 new_parent_kobj->sd : sysfs_root_sd;
1028 138
1029 return sysfs_rename(sd, new_parent_sd, sd->s_name, new_ns); 139 return kernfs_rename_ns(sd, new_parent_sd, sd->s_name, new_ns);
1030} 140}
1031
1032/* Relationship between s_mode and the DT_xxx types */
1033static inline unsigned char dt_type(struct sysfs_dirent *sd)
1034{
1035 return (sd->s_mode >> 12) & 15;
1036}
1037
1038static int sysfs_dir_release(struct inode *inode, struct file *filp)
1039{
1040 sysfs_put(filp->private_data);
1041 return 0;
1042}
1043
1044static struct sysfs_dirent *sysfs_dir_pos(const void *ns,
1045 struct sysfs_dirent *parent_sd, loff_t hash, struct sysfs_dirent *pos)
1046{
1047 if (pos) {
1048 int valid = !(pos->s_flags & SYSFS_FLAG_REMOVED) &&
1049 pos->s_parent == parent_sd &&
1050 hash == pos->s_hash;
1051 sysfs_put(pos);
1052 if (!valid)
1053 pos = NULL;
1054 }
1055 if (!pos && (hash > 1) && (hash < INT_MAX)) {
1056 struct rb_node *node = parent_sd->s_dir.children.rb_node;
1057 while (node) {
1058 pos = to_sysfs_dirent(node);
1059
1060 if (hash < pos->s_hash)
1061 node = node->rb_left;
1062 else if (hash > pos->s_hash)
1063 node = node->rb_right;
1064 else
1065 break;
1066 }
1067 }
1068 /* Skip over entries in the wrong namespace */
1069 while (pos && pos->s_ns != ns) {
1070 struct rb_node *node = rb_next(&pos->s_rb);
1071 if (!node)
1072 pos = NULL;
1073 else
1074 pos = to_sysfs_dirent(node);
1075 }
1076 return pos;
1077}
1078
1079static struct sysfs_dirent *sysfs_dir_next_pos(const void *ns,
1080 struct sysfs_dirent *parent_sd, ino_t ino, struct sysfs_dirent *pos)
1081{
1082 pos = sysfs_dir_pos(ns, parent_sd, ino, pos);
1083 if (pos)
1084 do {
1085 struct rb_node *node = rb_next(&pos->s_rb);
1086 if (!node)
1087 pos = NULL;
1088 else
1089 pos = to_sysfs_dirent(node);
1090 } while (pos && pos->s_ns != ns);
1091 return pos;
1092}
1093
1094static int sysfs_readdir(struct file *file, struct dir_context *ctx)
1095{
1096 struct dentry *dentry = file->f_path.dentry;
1097 struct sysfs_dirent *parent_sd = dentry->d_fsdata;
1098 struct sysfs_dirent *pos = file->private_data;
1099 enum kobj_ns_type type;
1100 const void *ns;
1101
1102 type = sysfs_ns_type(parent_sd);
1103 ns = sysfs_info(dentry->d_sb)->ns[type];
1104
1105 if (!dir_emit_dots(file, ctx))
1106 return 0;
1107 mutex_lock(&sysfs_mutex);
1108 for (pos = sysfs_dir_pos(ns, parent_sd, ctx->pos, pos);
1109 pos;
1110 pos = sysfs_dir_next_pos(ns, parent_sd, ctx->pos, pos)) {
1111 const char *name = pos->s_name;
1112 unsigned int type = dt_type(pos);
1113 int len = strlen(name);
1114 ino_t ino = pos->s_ino;
1115 ctx->pos = pos->s_hash;
1116 file->private_data = sysfs_get(pos);
1117
1118 mutex_unlock(&sysfs_mutex);
1119 if (!dir_emit(ctx, name, len, ino, type))
1120 return 0;
1121 mutex_lock(&sysfs_mutex);
1122 }
1123 mutex_unlock(&sysfs_mutex);
1124 file->private_data = NULL;
1125 ctx->pos = INT_MAX;
1126 return 0;
1127}
1128
1129static loff_t sysfs_dir_llseek(struct file *file, loff_t offset, int whence)
1130{
1131 struct inode *inode = file_inode(file);
1132 loff_t ret;
1133
1134 mutex_lock(&inode->i_mutex);
1135 ret = generic_file_llseek(file, offset, whence);
1136 mutex_unlock(&inode->i_mutex);
1137
1138 return ret;
1139}
1140
1141const struct file_operations sysfs_dir_operations = {
1142 .read = generic_read_dir,
1143 .iterate = sysfs_readdir,
1144 .release = sysfs_dir_release,
1145 .llseek = sysfs_dir_llseek,
1146};
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 35e7d08fe629..ac77d2be3c31 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -14,59 +14,12 @@
14#include <linux/kobject.h> 14#include <linux/kobject.h>
15#include <linux/kallsyms.h> 15#include <linux/kallsyms.h>
16#include <linux/slab.h> 16#include <linux/slab.h>
17#include <linux/fsnotify.h>
18#include <linux/namei.h>
19#include <linux/poll.h>
20#include <linux/list.h> 17#include <linux/list.h>
21#include <linux/mutex.h> 18#include <linux/mutex.h>
22#include <linux/limits.h>
23#include <linux/uaccess.h>
24#include <linux/seq_file.h> 19#include <linux/seq_file.h>
25#include <linux/mm.h>
26 20
27#include "sysfs.h" 21#include "sysfs.h"
28 22#include "../kernfs/kernfs-internal.h"
29/*
30 * There's one sysfs_open_file for each open file and one sysfs_open_dirent
31 * for each sysfs_dirent with one or more open files.
32 *
33 * sysfs_dirent->s_attr.open points to sysfs_open_dirent. s_attr.open is
34 * protected by sysfs_open_dirent_lock.
35 *
36 * filp->private_data points to seq_file whose ->private points to
37 * sysfs_open_file. sysfs_open_files are chained at
38 * sysfs_open_dirent->files, which is protected by sysfs_open_file_mutex.
39 */
40static DEFINE_SPINLOCK(sysfs_open_dirent_lock);
41static DEFINE_MUTEX(sysfs_open_file_mutex);
42
43struct sysfs_open_dirent {
44 atomic_t refcnt;
45 atomic_t event;
46 wait_queue_head_t poll;
47 struct list_head files; /* goes through sysfs_open_file.list */
48};
49
50struct sysfs_open_file {
51 struct sysfs_dirent *sd;
52 struct file *file;
53 struct mutex mutex;
54 int event;
55 struct list_head list;
56
57 bool mmapped;
58 const struct vm_operations_struct *vm_ops;
59};
60
61static bool sysfs_is_bin(struct sysfs_dirent *sd)
62{
63 return sysfs_type(sd) == SYSFS_KOBJ_BIN_ATTR;
64}
65
66static struct sysfs_open_file *sysfs_of(struct file *file)
67{
68 return ((struct seq_file *)file->private_data)->private;
69}
70 23
71/* 24/*
72 * Determine ktype->sysfs_ops for the given sysfs_dirent. This function 25 * Determine ktype->sysfs_ops for the given sysfs_dirent. This function
@@ -74,9 +27,9 @@ static struct sysfs_open_file *sysfs_of(struct file *file)
74 */ 27 */
75static const struct sysfs_ops *sysfs_file_ops(struct sysfs_dirent *sd) 28static const struct sysfs_ops *sysfs_file_ops(struct sysfs_dirent *sd)
76{ 29{
77 struct kobject *kobj = sd->s_parent->s_dir.kobj; 30 struct kobject *kobj = sd->s_parent->priv;
78 31
79 if (!sysfs_ignore_lockdep(sd)) 32 if (sd->s_flags & SYSFS_FLAG_LOCKDEP)
80 lockdep_assert_held(sd); 33 lockdep_assert_held(sd);
81 return kobj->ktype ? kobj->ktype->sysfs_ops : NULL; 34 return kobj->ktype ? kobj->ktype->sysfs_ops : NULL;
82} 35}
@@ -86,13 +39,13 @@ static const struct sysfs_ops *sysfs_file_ops(struct sysfs_dirent *sd)
86 * details like buffering and seeking. The following function pipes 39 * details like buffering and seeking. The following function pipes
87 * sysfs_ops->show() result through seq_file. 40 * sysfs_ops->show() result through seq_file.
88 */ 41 */
89static int sysfs_seq_show(struct seq_file *sf, void *v) 42static int sysfs_kf_seq_show(struct seq_file *sf, void *v)
90{ 43{
91 struct sysfs_open_file *of = sf->private; 44 struct sysfs_open_file *of = sf->private;
92 struct kobject *kobj = of->sd->s_parent->s_dir.kobj; 45 struct kobject *kobj = of->sd->s_parent->priv;
93 const struct sysfs_ops *ops; 46 const struct sysfs_ops *ops = sysfs_file_ops(of->sd);
94 char *buf;
95 ssize_t count; 47 ssize_t count;
48 char *buf;
96 49
97 /* acquire buffer and ensure that it's >= PAGE_SIZE */ 50 /* acquire buffer and ensure that it's >= PAGE_SIZE */
98 count = seq_get_buf(sf, &buf); 51 count = seq_get_buf(sf, &buf);
@@ -102,34 +55,15 @@ static int sysfs_seq_show(struct seq_file *sf, void *v)
102 } 55 }
103 56
104 /* 57 /*
105 * Need @of->sd for attr and ops, its parent for kobj. @of->mutex 58 * Invoke show(). Control may reach here via seq file lseek even
106 * nests outside active ref and is just to ensure that the ops 59 * if @ops->show() isn't implemented.
107 * aren't called concurrently for the same open file.
108 */ 60 */
109 mutex_lock(&of->mutex); 61 if (ops->show) {
110 if (!sysfs_get_active(of->sd)) { 62 count = ops->show(kobj, of->sd->priv, buf);
111 mutex_unlock(&of->mutex); 63 if (count < 0)
112 return -ENODEV; 64 return count;
113 } 65 }
114 66
115 of->event = atomic_read(&of->sd->s_attr.open->event);
116
117 /*
118 * Lookup @ops and invoke show(). Control may reach here via seq
119 * file lseek even if @ops->show() isn't implemented.
120 */
121 ops = sysfs_file_ops(of->sd);
122 if (ops->show)
123 count = ops->show(kobj, of->sd->s_attr.attr, buf);
124 else
125 count = 0;
126
127 sysfs_put_active(of->sd);
128 mutex_unlock(&of->mutex);
129
130 if (count < 0)
131 return count;
132
133 /* 67 /*
134 * The code works fine with PAGE_SIZE return but it's likely to 68 * The code works fine with PAGE_SIZE return but it's likely to
135 * indicate truncated result or overflow in normal use cases. 69 * indicate truncated result or overflow in normal use cases.
@@ -144,726 +78,190 @@ static int sysfs_seq_show(struct seq_file *sf, void *v)
144 return 0; 78 return 0;
145} 79}
146 80
147/* 81static ssize_t sysfs_kf_bin_read(struct sysfs_open_file *of, char *buf,
148 * Read method for bin files. As reading a bin file can have side-effects, 82 size_t count, loff_t pos)
149 * the exact offset and bytes specified in read(2) call should be passed to
150 * the read callback making it difficult to use seq_file. Implement
151 * simplistic custom buffering for bin files.
152 */
153static ssize_t sysfs_bin_read(struct file *file, char __user *userbuf,
154 size_t bytes, loff_t *off)
155{ 83{
156 struct sysfs_open_file *of = sysfs_of(file); 84 struct bin_attribute *battr = of->sd->priv;
157 struct bin_attribute *battr = of->sd->s_attr.bin_attr; 85 struct kobject *kobj = of->sd->s_parent->priv;
158 struct kobject *kobj = of->sd->s_parent->s_dir.kobj; 86 loff_t size = file_inode(of->file)->i_size;
159 loff_t size = file_inode(file)->i_size;
160 int count = min_t(size_t, bytes, PAGE_SIZE);
161 loff_t offs = *off;
162 char *buf;
163 87
164 if (!bytes) 88 if (!count)
165 return 0; 89 return 0;
166 90
167 if (size) { 91 if (size) {
168 if (offs > size) 92 if (pos > size)
169 return 0; 93 return 0;
170 if (offs + count > size) 94 if (pos + count > size)
171 count = size - offs; 95 count = size - pos;
172 }
173
174 buf = kmalloc(count, GFP_KERNEL);
175 if (!buf)
176 return -ENOMEM;
177
178 /* need of->sd for battr, its parent for kobj */
179 mutex_lock(&of->mutex);
180 if (!sysfs_get_active(of->sd)) {
181 count = -ENODEV;
182 mutex_unlock(&of->mutex);
183 goto out_free;
184 } 96 }
185 97
186 if (battr->read) 98 if (!battr->read)
187 count = battr->read(file, kobj, battr, buf, offs, count); 99 return -EIO;
188 else
189 count = -EIO;
190
191 sysfs_put_active(of->sd);
192 mutex_unlock(&of->mutex);
193
194 if (count < 0)
195 goto out_free;
196
197 if (copy_to_user(userbuf, buf, count)) {
198 count = -EFAULT;
199 goto out_free;
200 }
201
202 pr_debug("offs = %lld, *off = %lld, count = %d\n", offs, *off, count);
203 100
204 *off = offs + count; 101 return battr->read(of->file, kobj, battr, buf, pos, count);
205
206 out_free:
207 kfree(buf);
208 return count;
209} 102}
210 103
211/** 104/* kernfs write callback for regular sysfs files */
212 * flush_write_buffer - push buffer to kobject 105static ssize_t sysfs_kf_write(struct sysfs_open_file *of, char *buf,
213 * @of: open file 106 size_t count, loff_t pos)
214 * @buf: data buffer for file
215 * @off: file offset to write to
216 * @count: number of bytes
217 *
218 * Get the correct pointers for the kobject and the attribute we're dealing
219 * with, then call the store() method for it with @buf.
220 */
221static int flush_write_buffer(struct sysfs_open_file *of, char *buf, loff_t off,
222 size_t count)
223{ 107{
224 struct kobject *kobj = of->sd->s_parent->s_dir.kobj; 108 const struct sysfs_ops *ops = sysfs_file_ops(of->sd);
225 int rc = 0; 109 struct kobject *kobj = of->sd->s_parent->priv;
226
227 /*
228 * Need @of->sd for attr and ops, its parent for kobj. @of->mutex
229 * nests outside active ref and is just to ensure that the ops
230 * aren't called concurrently for the same open file.
231 */
232 mutex_lock(&of->mutex);
233 if (!sysfs_get_active(of->sd)) {
234 mutex_unlock(&of->mutex);
235 return -ENODEV;
236 }
237
238 if (sysfs_is_bin(of->sd)) {
239 struct bin_attribute *battr = of->sd->s_attr.bin_attr;
240 110
241 rc = -EIO; 111 if (!count)
242 if (battr->write) 112 return 0;
243 rc = battr->write(of->file, kobj, battr, buf, off,
244 count);
245 } else {
246 const struct sysfs_ops *ops = sysfs_file_ops(of->sd);
247
248 rc = ops->store(kobj, of->sd->s_attr.attr, buf, count);
249 }
250
251 sysfs_put_active(of->sd);
252 mutex_unlock(&of->mutex);
253 113
254 return rc; 114 return ops->store(kobj, of->sd->priv, buf, count);
255} 115}
256 116
257/** 117/* kernfs write callback for bin sysfs files */
258 * sysfs_write_file - write an attribute 118static ssize_t sysfs_kf_bin_write(struct sysfs_open_file *of, char *buf,
259 * @file: file pointer 119 size_t count, loff_t pos)
260 * @user_buf: data to write
261 * @count: number of bytes
262 * @ppos: starting offset
263 *
264 * Copy data in from userland and pass it to the matching
265 * sysfs_ops->store() by invoking flush_write_buffer().
266 *
267 * There is no easy way for us to know if userspace is only doing a partial
268 * write, so we don't support them. We expect the entire buffer to come on
269 * the first write. Hint: if you're writing a value, first read the file,
270 * modify only the the value you're changing, then write entire buffer
271 * back.
272 */
273static ssize_t sysfs_write_file(struct file *file, const char __user *user_buf,
274 size_t count, loff_t *ppos)
275{ 120{
276 struct sysfs_open_file *of = sysfs_of(file); 121 struct bin_attribute *battr = of->sd->priv;
277 ssize_t len = min_t(size_t, count, PAGE_SIZE); 122 struct kobject *kobj = of->sd->s_parent->priv;
278 loff_t size = file_inode(file)->i_size; 123 loff_t size = file_inode(of->file)->i_size;
279 char *buf;
280 124
281 if (sysfs_is_bin(of->sd) && size) { 125 if (size) {
282 if (size <= *ppos) 126 if (size <= pos)
283 return 0; 127 return 0;
284 len = min_t(ssize_t, len, size - *ppos); 128 count = min_t(ssize_t, count, size - pos);
285 } 129 }
286 130 if (!count)
287 if (!len)
288 return 0; 131 return 0;
289 132
290 buf = kmalloc(len + 1, GFP_KERNEL); 133 if (!battr->write)
291 if (!buf) 134 return -EIO;
292 return -ENOMEM;
293 135
294 if (copy_from_user(buf, user_buf, len)) { 136 return battr->write(of->file, kobj, battr, buf, pos, count);
295 len = -EFAULT;
296 goto out_free;
297 }
298 buf[len] = '\0'; /* guarantee string termination */
299
300 len = flush_write_buffer(of, buf, *ppos, len);
301 if (len > 0)
302 *ppos += len;
303out_free:
304 kfree(buf);
305 return len;
306} 137}
307 138
308static void sysfs_bin_vma_open(struct vm_area_struct *vma) 139static int sysfs_kf_bin_mmap(struct sysfs_open_file *of,
140 struct vm_area_struct *vma)
309{ 141{
310 struct file *file = vma->vm_file; 142 struct bin_attribute *battr = of->sd->priv;
311 struct sysfs_open_file *of = sysfs_of(file); 143 struct kobject *kobj = of->sd->s_parent->priv;
312
313 if (!of->vm_ops)
314 return;
315
316 if (!sysfs_get_active(of->sd))
317 return;
318
319 if (of->vm_ops->open)
320 of->vm_ops->open(vma);
321
322 sysfs_put_active(of->sd);
323}
324
325static int sysfs_bin_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
326{
327 struct file *file = vma->vm_file;
328 struct sysfs_open_file *of = sysfs_of(file);
329 int ret;
330
331 if (!of->vm_ops)
332 return VM_FAULT_SIGBUS;
333
334 if (!sysfs_get_active(of->sd))
335 return VM_FAULT_SIGBUS;
336
337 ret = VM_FAULT_SIGBUS;
338 if (of->vm_ops->fault)
339 ret = of->vm_ops->fault(vma, vmf);
340
341 sysfs_put_active(of->sd);
342 return ret;
343}
344
345static int sysfs_bin_page_mkwrite(struct vm_area_struct *vma,
346 struct vm_fault *vmf)
347{
348 struct file *file = vma->vm_file;
349 struct sysfs_open_file *of = sysfs_of(file);
350 int ret;
351
352 if (!of->vm_ops)
353 return VM_FAULT_SIGBUS;
354
355 if (!sysfs_get_active(of->sd))
356 return VM_FAULT_SIGBUS;
357
358 ret = 0;
359 if (of->vm_ops->page_mkwrite)
360 ret = of->vm_ops->page_mkwrite(vma, vmf);
361 else
362 file_update_time(file);
363
364 sysfs_put_active(of->sd);
365 return ret;
366}
367
368static int sysfs_bin_access(struct vm_area_struct *vma, unsigned long addr,
369 void *buf, int len, int write)
370{
371 struct file *file = vma->vm_file;
372 struct sysfs_open_file *of = sysfs_of(file);
373 int ret;
374
375 if (!of->vm_ops)
376 return -EINVAL;
377
378 if (!sysfs_get_active(of->sd))
379 return -EINVAL;
380
381 ret = -EINVAL;
382 if (of->vm_ops->access)
383 ret = of->vm_ops->access(vma, addr, buf, len, write);
384
385 sysfs_put_active(of->sd);
386 return ret;
387}
388
389#ifdef CONFIG_NUMA
390static int sysfs_bin_set_policy(struct vm_area_struct *vma,
391 struct mempolicy *new)
392{
393 struct file *file = vma->vm_file;
394 struct sysfs_open_file *of = sysfs_of(file);
395 int ret;
396
397 if (!of->vm_ops)
398 return 0;
399
400 if (!sysfs_get_active(of->sd))
401 return -EINVAL;
402
403 ret = 0;
404 if (of->vm_ops->set_policy)
405 ret = of->vm_ops->set_policy(vma, new);
406
407 sysfs_put_active(of->sd);
408 return ret;
409}
410
411static struct mempolicy *sysfs_bin_get_policy(struct vm_area_struct *vma,
412 unsigned long addr)
413{
414 struct file *file = vma->vm_file;
415 struct sysfs_open_file *of = sysfs_of(file);
416 struct mempolicy *pol;
417
418 if (!of->vm_ops)
419 return vma->vm_policy;
420
421 if (!sysfs_get_active(of->sd))
422 return vma->vm_policy;
423
424 pol = vma->vm_policy;
425 if (of->vm_ops->get_policy)
426 pol = of->vm_ops->get_policy(vma, addr);
427
428 sysfs_put_active(of->sd);
429 return pol;
430}
431
432static int sysfs_bin_migrate(struct vm_area_struct *vma, const nodemask_t *from,
433 const nodemask_t *to, unsigned long flags)
434{
435 struct file *file = vma->vm_file;
436 struct sysfs_open_file *of = sysfs_of(file);
437 int ret;
438
439 if (!of->vm_ops)
440 return 0;
441
442 if (!sysfs_get_active(of->sd))
443 return 0;
444
445 ret = 0;
446 if (of->vm_ops->migrate)
447 ret = of->vm_ops->migrate(vma, from, to, flags);
448
449 sysfs_put_active(of->sd);
450 return ret;
451}
452#endif
453
454static const struct vm_operations_struct sysfs_bin_vm_ops = {
455 .open = sysfs_bin_vma_open,
456 .fault = sysfs_bin_fault,
457 .page_mkwrite = sysfs_bin_page_mkwrite,
458 .access = sysfs_bin_access,
459#ifdef CONFIG_NUMA
460 .set_policy = sysfs_bin_set_policy,
461 .get_policy = sysfs_bin_get_policy,
462 .migrate = sysfs_bin_migrate,
463#endif
464};
465
466static int sysfs_bin_mmap(struct file *file, struct vm_area_struct *vma)
467{
468 struct sysfs_open_file *of = sysfs_of(file);
469 struct bin_attribute *battr = of->sd->s_attr.bin_attr;
470 struct kobject *kobj = of->sd->s_parent->s_dir.kobj;
471 int rc;
472
473 mutex_lock(&of->mutex);
474
475 /* need of->sd for battr, its parent for kobj */
476 rc = -ENODEV;
477 if (!sysfs_get_active(of->sd))
478 goto out_unlock;
479 144
480 if (!battr->mmap) 145 if (!battr->mmap)
481 goto out_put; 146 return -ENODEV;
482
483 rc = battr->mmap(file, kobj, battr, vma);
484 if (rc)
485 goto out_put;
486
487 /*
488 * PowerPC's pci_mmap of legacy_mem uses shmem_zero_setup()
489 * to satisfy versions of X which crash if the mmap fails: that
490 * substitutes a new vm_file, and we don't then want bin_vm_ops.
491 */
492 if (vma->vm_file != file)
493 goto out_put;
494
495 rc = -EINVAL;
496 if (of->mmapped && of->vm_ops != vma->vm_ops)
497 goto out_put;
498
499 /*
500 * It is not possible to successfully wrap close.
501 * So error if someone is trying to use close.
502 */
503 rc = -EINVAL;
504 if (vma->vm_ops && vma->vm_ops->close)
505 goto out_put;
506
507 rc = 0;
508 of->mmapped = 1;
509 of->vm_ops = vma->vm_ops;
510 vma->vm_ops = &sysfs_bin_vm_ops;
511out_put:
512 sysfs_put_active(of->sd);
513out_unlock:
514 mutex_unlock(&of->mutex);
515
516 return rc;
517}
518
519/**
520 * sysfs_get_open_dirent - get or create sysfs_open_dirent
521 * @sd: target sysfs_dirent
522 * @of: sysfs_open_file for this instance of open
523 *
524 * If @sd->s_attr.open exists, increment its reference count;
525 * otherwise, create one. @of is chained to the files list.
526 *
527 * LOCKING:
528 * Kernel thread context (may sleep).
529 *
530 * RETURNS:
531 * 0 on success, -errno on failure.
532 */
533static int sysfs_get_open_dirent(struct sysfs_dirent *sd,
534 struct sysfs_open_file *of)
535{
536 struct sysfs_open_dirent *od, *new_od = NULL;
537
538 retry:
539 mutex_lock(&sysfs_open_file_mutex);
540 spin_lock_irq(&sysfs_open_dirent_lock);
541
542 if (!sd->s_attr.open && new_od) {
543 sd->s_attr.open = new_od;
544 new_od = NULL;
545 }
546
547 od = sd->s_attr.open;
548 if (od) {
549 atomic_inc(&od->refcnt);
550 list_add_tail(&of->list, &od->files);
551 }
552
553 spin_unlock_irq(&sysfs_open_dirent_lock);
554 mutex_unlock(&sysfs_open_file_mutex);
555
556 if (od) {
557 kfree(new_od);
558 return 0;
559 }
560
561 /* not there, initialize a new one and retry */
562 new_od = kmalloc(sizeof(*new_od), GFP_KERNEL);
563 if (!new_od)
564 return -ENOMEM;
565 147
566 atomic_set(&new_od->refcnt, 0); 148 return battr->mmap(of->file, kobj, battr, vma);
567 atomic_set(&new_od->event, 1);
568 init_waitqueue_head(&new_od->poll);
569 INIT_LIST_HEAD(&new_od->files);
570 goto retry;
571} 149}
572 150
573/** 151void sysfs_notify(struct kobject *k, const char *dir, const char *attr)
574 * sysfs_put_open_dirent - put sysfs_open_dirent
575 * @sd: target sysfs_dirent
576 * @of: associated sysfs_open_file
577 *
578 * Put @sd->s_attr.open and unlink @of from the files list. If
579 * reference count reaches zero, disassociate and free it.
580 *
581 * LOCKING:
582 * None.
583 */
584static void sysfs_put_open_dirent(struct sysfs_dirent *sd,
585 struct sysfs_open_file *of)
586{ 152{
587 struct sysfs_open_dirent *od = sd->s_attr.open; 153 struct sysfs_dirent *sd = k->sd, *tmp;
588 unsigned long flags;
589
590 mutex_lock(&sysfs_open_file_mutex);
591 spin_lock_irqsave(&sysfs_open_dirent_lock, flags);
592 154
593 if (of) 155 if (sd && dir)
594 list_del(&of->list); 156 sd = kernfs_find_and_get(sd, dir);
595
596 if (atomic_dec_and_test(&od->refcnt))
597 sd->s_attr.open = NULL;
598 else 157 else
599 od = NULL; 158 kernfs_get(sd);
600
601 spin_unlock_irqrestore(&sysfs_open_dirent_lock, flags);
602 mutex_unlock(&sysfs_open_file_mutex);
603
604 kfree(od);
605}
606
607static int sysfs_open_file(struct inode *inode, struct file *file)
608{
609 struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
610 struct kobject *kobj = attr_sd->s_parent->s_dir.kobj;
611 struct sysfs_open_file *of;
612 bool has_read, has_write;
613 int error = -EACCES;
614
615 /* need attr_sd for attr and ops, its parent for kobj */
616 if (!sysfs_get_active(attr_sd))
617 return -ENODEV;
618 159
619 if (sysfs_is_bin(attr_sd)) { 160 if (sd && attr) {
620 struct bin_attribute *battr = attr_sd->s_attr.bin_attr; 161 tmp = kernfs_find_and_get(sd, attr);
621 162 kernfs_put(sd);
622 has_read = battr->read || battr->mmap; 163 sd = tmp;
623 has_write = battr->write || battr->mmap;
624 } else {
625 const struct sysfs_ops *ops = sysfs_file_ops(attr_sd);
626
627 /* every kobject with an attribute needs a ktype assigned */
628 if (WARN(!ops, KERN_ERR
629 "missing sysfs attribute operations for kobject: %s\n",
630 kobject_name(kobj)))
631 goto err_out;
632
633 has_read = ops->show;
634 has_write = ops->store;
635 } 164 }
636 165
637 /* check perms and supported operations */ 166 if (sd) {
638 if ((file->f_mode & FMODE_WRITE) && 167 kernfs_notify(sd);
639 (!(inode->i_mode & S_IWUGO) || !has_write)) 168 kernfs_put(sd);
640 goto err_out;
641
642 if ((file->f_mode & FMODE_READ) &&
643 (!(inode->i_mode & S_IRUGO) || !has_read))
644 goto err_out;
645
646 /* allocate a sysfs_open_file for the file */
647 error = -ENOMEM;
648 of = kzalloc(sizeof(struct sysfs_open_file), GFP_KERNEL);
649 if (!of)
650 goto err_out;
651
652 /*
653 * The following is done to give a different lockdep key to
654 * @of->mutex for files which implement mmap. This is a rather
655 * crude way to avoid false positive lockdep warning around
656 * mm->mmap_sem - mmap nests @of->mutex under mm->mmap_sem and
657 * reading /sys/block/sda/trace/act_mask grabs sr_mutex, under
658 * which mm->mmap_sem nests, while holding @of->mutex. As each
659 * open file has a separate mutex, it's okay as long as those don't
660 * happen on the same file. At this point, we can't easily give
661 * each file a separate locking class. Let's differentiate on
662 * whether the file is bin or not for now.
663 */
664 if (sysfs_is_bin(attr_sd))
665 mutex_init(&of->mutex);
666 else
667 mutex_init(&of->mutex);
668
669 of->sd = attr_sd;
670 of->file = file;
671
672 /*
673 * Always instantiate seq_file even if read access doesn't use
674 * seq_file or is not requested. This unifies private data access
675 * and readable regular files are the vast majority anyway.
676 */
677 if (sysfs_is_bin(attr_sd))
678 error = single_open(file, NULL, of);
679 else
680 error = single_open(file, sysfs_seq_show, of);
681 if (error)
682 goto err_free;
683
684 /* seq_file clears PWRITE unconditionally, restore it if WRITE */
685 if (file->f_mode & FMODE_WRITE)
686 file->f_mode |= FMODE_PWRITE;
687
688 /* make sure we have open dirent struct */
689 error = sysfs_get_open_dirent(attr_sd, of);
690 if (error)
691 goto err_close;
692
693 /* open succeeded, put active references */
694 sysfs_put_active(attr_sd);
695 return 0;
696
697err_close:
698 single_release(inode, file);
699err_free:
700 kfree(of);
701err_out:
702 sysfs_put_active(attr_sd);
703 return error;
704}
705
706static int sysfs_release(struct inode *inode, struct file *filp)
707{
708 struct sysfs_dirent *sd = filp->f_path.dentry->d_fsdata;
709 struct sysfs_open_file *of = sysfs_of(filp);
710
711 sysfs_put_open_dirent(sd, of);
712 single_release(inode, filp);
713 kfree(of);
714
715 return 0;
716}
717
718void sysfs_unmap_bin_file(struct sysfs_dirent *sd)
719{
720 struct sysfs_open_dirent *od;
721 struct sysfs_open_file *of;
722
723 if (!sysfs_is_bin(sd))
724 return;
725
726 spin_lock_irq(&sysfs_open_dirent_lock);
727 od = sd->s_attr.open;
728 if (od)
729 atomic_inc(&od->refcnt);
730 spin_unlock_irq(&sysfs_open_dirent_lock);
731 if (!od)
732 return;
733
734 mutex_lock(&sysfs_open_file_mutex);
735 list_for_each_entry(of, &od->files, list) {
736 struct inode *inode = file_inode(of->file);
737 unmap_mapping_range(inode->i_mapping, 0, 0, 1);
738 } 169 }
739 mutex_unlock(&sysfs_open_file_mutex);
740
741 sysfs_put_open_dirent(sd, NULL);
742}
743
744/* Sysfs attribute files are pollable. The idea is that you read
745 * the content and then you use 'poll' or 'select' to wait for
746 * the content to change. When the content changes (assuming the
747 * manager for the kobject supports notification), poll will
748 * return POLLERR|POLLPRI, and select will return the fd whether
749 * it is waiting for read, write, or exceptions.
750 * Once poll/select indicates that the value has changed, you
751 * need to close and re-open the file, or seek to 0 and read again.
752 * Reminder: this only works for attributes which actively support
753 * it, and it is not possible to test an attribute from userspace
754 * to see if it supports poll (Neither 'poll' nor 'select' return
755 * an appropriate error code). When in doubt, set a suitable timeout value.
756 */
757static unsigned int sysfs_poll(struct file *filp, poll_table *wait)
758{
759 struct sysfs_open_file *of = sysfs_of(filp);
760 struct sysfs_dirent *attr_sd = filp->f_path.dentry->d_fsdata;
761 struct sysfs_open_dirent *od = attr_sd->s_attr.open;
762
763 /* need parent for the kobj, grab both */
764 if (!sysfs_get_active(attr_sd))
765 goto trigger;
766
767 poll_wait(filp, &od->poll, wait);
768
769 sysfs_put_active(attr_sd);
770
771 if (of->event != atomic_read(&od->event))
772 goto trigger;
773
774 return DEFAULT_POLLMASK;
775
776 trigger:
777 return DEFAULT_POLLMASK|POLLERR|POLLPRI;
778} 170}
171EXPORT_SYMBOL_GPL(sysfs_notify);
779 172
780void sysfs_notify_dirent(struct sysfs_dirent *sd) 173static const struct kernfs_ops sysfs_file_kfops_empty = {
781{ 174};
782 struct sysfs_open_dirent *od;
783 unsigned long flags;
784
785 spin_lock_irqsave(&sysfs_open_dirent_lock, flags);
786
787 if (!WARN_ON(sysfs_type(sd) != SYSFS_KOBJ_ATTR)) {
788 od = sd->s_attr.open;
789 if (od) {
790 atomic_inc(&od->event);
791 wake_up_interruptible(&od->poll);
792 }
793 }
794
795 spin_unlock_irqrestore(&sysfs_open_dirent_lock, flags);
796}
797EXPORT_SYMBOL_GPL(sysfs_notify_dirent);
798 175
799void sysfs_notify(struct kobject *k, const char *dir, const char *attr) 176static const struct kernfs_ops sysfs_file_kfops_ro = {
800{ 177 .seq_show = sysfs_kf_seq_show,
801 struct sysfs_dirent *sd = k->sd; 178};
802 179
803 mutex_lock(&sysfs_mutex); 180static const struct kernfs_ops sysfs_file_kfops_wo = {
181 .write = sysfs_kf_write,
182};
804 183
805 if (sd && dir) 184static const struct kernfs_ops sysfs_file_kfops_rw = {
806 sd = sysfs_find_dirent(sd, dir, NULL); 185 .seq_show = sysfs_kf_seq_show,
807 if (sd && attr) 186 .write = sysfs_kf_write,
808 sd = sysfs_find_dirent(sd, attr, NULL); 187};
809 if (sd)
810 sysfs_notify_dirent(sd);
811 188
812 mutex_unlock(&sysfs_mutex); 189static const struct kernfs_ops sysfs_bin_kfops_ro = {
813} 190 .read = sysfs_kf_bin_read,
814EXPORT_SYMBOL_GPL(sysfs_notify); 191};
815 192
816const struct file_operations sysfs_file_operations = { 193static const struct kernfs_ops sysfs_bin_kfops_wo = {
817 .read = seq_read, 194 .write = sysfs_kf_bin_write,
818 .write = sysfs_write_file,
819 .llseek = generic_file_llseek,
820 .open = sysfs_open_file,
821 .release = sysfs_release,
822 .poll = sysfs_poll,
823}; 195};
824 196
825const struct file_operations sysfs_bin_operations = { 197static const struct kernfs_ops sysfs_bin_kfops_rw = {
826 .read = sysfs_bin_read, 198 .read = sysfs_kf_bin_read,
827 .write = sysfs_write_file, 199 .write = sysfs_kf_bin_write,
828 .llseek = generic_file_llseek, 200 .mmap = sysfs_kf_bin_mmap,
829 .mmap = sysfs_bin_mmap,
830 .open = sysfs_open_file,
831 .release = sysfs_release,
832 .poll = sysfs_poll,
833}; 201};
834 202
835int sysfs_add_file_mode_ns(struct sysfs_dirent *dir_sd, 203int sysfs_add_file_mode_ns(struct sysfs_dirent *dir_sd,
836 const struct attribute *attr, int type, 204 const struct attribute *attr, bool is_bin,
837 umode_t amode, const void *ns) 205 umode_t mode, const void *ns)
838{ 206{
839 umode_t mode = (amode & S_IALLUGO) | S_IFREG; 207 struct lock_class_key *key = NULL;
840 struct sysfs_addrm_cxt acxt; 208 const struct kernfs_ops *ops;
841 struct sysfs_dirent *sd; 209 struct sysfs_dirent *sd;
842 int rc; 210 loff_t size;
843
844 sd = sysfs_new_dirent(attr->name, mode, type);
845 if (!sd)
846 return -ENOMEM;
847 211
848 sd->s_ns = ns; 212 if (!is_bin) {
849 sd->s_attr.attr = (void *)attr; 213 struct kobject *kobj = dir_sd->priv;
850 sysfs_dirent_init_lockdep(sd); 214 const struct sysfs_ops *sysfs_ops = kobj->ktype->sysfs_ops;
851 215
852 sysfs_addrm_start(&acxt); 216 /* every kobject with an attribute needs a ktype assigned */
853 rc = sysfs_add_one(&acxt, sd, dir_sd); 217 if (WARN(!sysfs_ops, KERN_ERR
854 sysfs_addrm_finish(&acxt); 218 "missing sysfs attribute operations for kobject: %s\n",
855 219 kobject_name(kobj)))
856 if (rc) 220 return -EINVAL;
857 sysfs_put(sd); 221
222 if (sysfs_ops->show && sysfs_ops->store)
223 ops = &sysfs_file_kfops_rw;
224 else if (sysfs_ops->show)
225 ops = &sysfs_file_kfops_ro;
226 else if (sysfs_ops->store)
227 ops = &sysfs_file_kfops_wo;
228 else
229 ops = &sysfs_file_kfops_empty;
230
231 size = PAGE_SIZE;
232 } else {
233 struct bin_attribute *battr = (void *)attr;
234
235 if ((battr->read && battr->write) || battr->mmap)
236 ops = &sysfs_bin_kfops_rw;
237 else if (battr->read)
238 ops = &sysfs_bin_kfops_ro;
239 else if (battr->write)
240 ops = &sysfs_bin_kfops_wo;
241 else
242 ops = &sysfs_file_kfops_empty;
243
244 size = battr->size;
245 }
858 246
859 return rc; 247#ifdef CONFIG_DEBUG_LOCK_ALLOC
248 if (!attr->ignore_lockdep)
249 key = attr->key ?: (struct lock_class_key *)&attr->skey;
250#endif
251 sd = kernfs_create_file_ns_key(dir_sd, attr->name, mode, size,
252 ops, (void *)attr, ns, key);
253 if (IS_ERR(sd)) {
254 if (PTR_ERR(sd) == -EEXIST)
255 sysfs_warn_dup(dir_sd, attr->name);
256 return PTR_ERR(sd);
257 }
258 return 0;
860} 259}
861 260
862
863int sysfs_add_file(struct sysfs_dirent *dir_sd, const struct attribute *attr, 261int sysfs_add_file(struct sysfs_dirent *dir_sd, const struct attribute *attr,
864 int type) 262 bool is_bin)
865{ 263{
866 return sysfs_add_file_mode_ns(dir_sd, attr, type, attr->mode, NULL); 264 return sysfs_add_file_mode_ns(dir_sd, attr, is_bin, attr->mode, NULL);
867} 265}
868 266
869/** 267/**
@@ -877,8 +275,7 @@ int sysfs_create_file_ns(struct kobject *kobj, const struct attribute *attr,
877{ 275{
878 BUG_ON(!kobj || !kobj->sd || !attr); 276 BUG_ON(!kobj || !kobj->sd || !attr);
879 277
880 return sysfs_add_file_mode_ns(kobj->sd, attr, SYSFS_KOBJ_ATTR, 278 return sysfs_add_file_mode_ns(kobj->sd, attr, false, attr->mode, ns);
881 attr->mode, ns);
882 279
883} 280}
884EXPORT_SYMBOL_GPL(sysfs_create_file_ns); 281EXPORT_SYMBOL_GPL(sysfs_create_file_ns);
@@ -909,16 +306,18 @@ int sysfs_add_file_to_group(struct kobject *kobj,
909 struct sysfs_dirent *dir_sd; 306 struct sysfs_dirent *dir_sd;
910 int error; 307 int error;
911 308
912 if (group) 309 if (group) {
913 dir_sd = sysfs_get_dirent(kobj->sd, group); 310 dir_sd = kernfs_find_and_get(kobj->sd, group);
914 else 311 } else {
915 dir_sd = sysfs_get(kobj->sd); 312 dir_sd = kobj->sd;
313 kernfs_get(dir_sd);
314 }
916 315
917 if (!dir_sd) 316 if (!dir_sd)
918 return -ENOENT; 317 return -ENOENT;
919 318
920 error = sysfs_add_file(dir_sd, attr, SYSFS_KOBJ_ATTR); 319 error = sysfs_add_file(dir_sd, attr, false);
921 sysfs_put(dir_sd); 320 kernfs_put(dir_sd);
922 321
923 return error; 322 return error;
924} 323}
@@ -938,19 +337,16 @@ int sysfs_chmod_file(struct kobject *kobj, const struct attribute *attr,
938 struct iattr newattrs; 337 struct iattr newattrs;
939 int rc; 338 int rc;
940 339
941 mutex_lock(&sysfs_mutex); 340 sd = kernfs_find_and_get(kobj->sd, attr->name);
942
943 rc = -ENOENT;
944 sd = sysfs_find_dirent(kobj->sd, attr->name, NULL);
945 if (!sd) 341 if (!sd)
946 goto out; 342 return -ENOENT;
947 343
948 newattrs.ia_mode = (mode & S_IALLUGO) | (sd->s_mode & ~S_IALLUGO); 344 newattrs.ia_mode = (mode & S_IALLUGO) | (sd->s_mode & ~S_IALLUGO);
949 newattrs.ia_valid = ATTR_MODE; 345 newattrs.ia_valid = ATTR_MODE;
950 rc = sysfs_sd_setattr(sd, &newattrs);
951 346
952 out: 347 rc = kernfs_setattr(sd, &newattrs);
953 mutex_unlock(&sysfs_mutex); 348
349 kernfs_put(sd);
954 return rc; 350 return rc;
955} 351}
956EXPORT_SYMBOL_GPL(sysfs_chmod_file); 352EXPORT_SYMBOL_GPL(sysfs_chmod_file);
@@ -968,7 +364,7 @@ void sysfs_remove_file_ns(struct kobject *kobj, const struct attribute *attr,
968{ 364{
969 struct sysfs_dirent *dir_sd = kobj->sd; 365 struct sysfs_dirent *dir_sd = kobj->sd;
970 366
971 sysfs_hash_and_remove(dir_sd, attr->name, ns); 367 kernfs_remove_by_name_ns(dir_sd, attr->name, ns);
972} 368}
973EXPORT_SYMBOL_GPL(sysfs_remove_file_ns); 369EXPORT_SYMBOL_GPL(sysfs_remove_file_ns);
974 370
@@ -991,13 +387,16 @@ void sysfs_remove_file_from_group(struct kobject *kobj,
991{ 387{
992 struct sysfs_dirent *dir_sd; 388 struct sysfs_dirent *dir_sd;
993 389
994 if (group) 390 if (group) {
995 dir_sd = sysfs_get_dirent(kobj->sd, group); 391 dir_sd = kernfs_find_and_get(kobj->sd, group);
996 else 392 } else {
997 dir_sd = sysfs_get(kobj->sd); 393 dir_sd = kobj->sd;
394 kernfs_get(dir_sd);
395 }
396
998 if (dir_sd) { 397 if (dir_sd) {
999 sysfs_hash_and_remove(dir_sd, attr->name, NULL); 398 kernfs_remove_by_name(dir_sd, attr->name);
1000 sysfs_put(dir_sd); 399 kernfs_put(dir_sd);
1001 } 400 }
1002} 401}
1003EXPORT_SYMBOL_GPL(sysfs_remove_file_from_group); 402EXPORT_SYMBOL_GPL(sysfs_remove_file_from_group);
@@ -1012,7 +411,7 @@ int sysfs_create_bin_file(struct kobject *kobj,
1012{ 411{
1013 BUG_ON(!kobj || !kobj->sd || !attr); 412 BUG_ON(!kobj || !kobj->sd || !attr);
1014 413
1015 return sysfs_add_file(kobj->sd, &attr->attr, SYSFS_KOBJ_BIN_ATTR); 414 return sysfs_add_file(kobj->sd, &attr->attr, true);
1016} 415}
1017EXPORT_SYMBOL_GPL(sysfs_create_bin_file); 416EXPORT_SYMBOL_GPL(sysfs_create_bin_file);
1018 417
@@ -1024,7 +423,7 @@ EXPORT_SYMBOL_GPL(sysfs_create_bin_file);
1024void sysfs_remove_bin_file(struct kobject *kobj, 423void sysfs_remove_bin_file(struct kobject *kobj,
1025 const struct bin_attribute *attr) 424 const struct bin_attribute *attr)
1026{ 425{
1027 sysfs_hash_and_remove(kobj->sd, attr->attr.name, NULL); 426 kernfs_remove_by_name(kobj->sd, attr->attr.name);
1028} 427}
1029EXPORT_SYMBOL_GPL(sysfs_remove_bin_file); 428EXPORT_SYMBOL_GPL(sysfs_remove_bin_file);
1030 429
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index 1898a10e38ce..7177532b8f7b 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -26,7 +26,7 @@ static void remove_files(struct sysfs_dirent *dir_sd, struct kobject *kobj,
26 26
27 if (grp->attrs) 27 if (grp->attrs)
28 for (attr = grp->attrs; *attr; attr++) 28 for (attr = grp->attrs; *attr; attr++)
29 sysfs_hash_and_remove(dir_sd, (*attr)->name, NULL); 29 kernfs_remove_by_name(dir_sd, (*attr)->name);
30 if (grp->bin_attrs) 30 if (grp->bin_attrs)
31 for (bin_attr = grp->bin_attrs; *bin_attr; bin_attr++) 31 for (bin_attr = grp->bin_attrs; *bin_attr; bin_attr++)
32 sysfs_remove_bin_file(kobj, *bin_attr); 32 sysfs_remove_bin_file(kobj, *bin_attr);
@@ -49,15 +49,13 @@ static int create_files(struct sysfs_dirent *dir_sd, struct kobject *kobj,
49 * re-adding (if required) the file. 49 * re-adding (if required) the file.
50 */ 50 */
51 if (update) 51 if (update)
52 sysfs_hash_and_remove(dir_sd, (*attr)->name, 52 kernfs_remove_by_name(dir_sd, (*attr)->name);
53 NULL);
54 if (grp->is_visible) { 53 if (grp->is_visible) {
55 mode = grp->is_visible(kobj, *attr, i); 54 mode = grp->is_visible(kobj, *attr, i);
56 if (!mode) 55 if (!mode)
57 continue; 56 continue;
58 } 57 }
59 error = sysfs_add_file_mode_ns(dir_sd, *attr, 58 error = sysfs_add_file_mode_ns(dir_sd, *attr, false,
60 SYSFS_KOBJ_ATTR,
61 (*attr)->mode | mode, 59 (*attr)->mode | mode,
62 NULL); 60 NULL);
63 if (unlikely(error)) 61 if (unlikely(error))
@@ -102,18 +100,21 @@ static int internal_create_group(struct kobject *kobj, int update,
102 return -EINVAL; 100 return -EINVAL;
103 } 101 }
104 if (grp->name) { 102 if (grp->name) {
105 error = sysfs_create_subdir(kobj, grp->name, &sd); 103 sd = kernfs_create_dir(kobj->sd, grp->name, kobj);
106 if (error) 104 if (IS_ERR(sd)) {
107 return error; 105 if (PTR_ERR(sd) == -EEXIST)
106 sysfs_warn_dup(kobj->sd, grp->name);
107 return PTR_ERR(sd);
108 }
108 } else 109 } else
109 sd = kobj->sd; 110 sd = kobj->sd;
110 sysfs_get(sd); 111 kernfs_get(sd);
111 error = create_files(sd, kobj, grp, update); 112 error = create_files(sd, kobj, grp, update);
112 if (error) { 113 if (error) {
113 if (grp->name) 114 if (grp->name)
114 sysfs_remove(sd); 115 kernfs_remove(sd);
115 } 116 }
116 sysfs_put(sd); 117 kernfs_put(sd);
117 return error; 118 return error;
118} 119}
119 120
@@ -207,21 +208,23 @@ void sysfs_remove_group(struct kobject *kobj,
207 struct sysfs_dirent *sd; 208 struct sysfs_dirent *sd;
208 209
209 if (grp->name) { 210 if (grp->name) {
210 sd = sysfs_get_dirent(dir_sd, grp->name); 211 sd = kernfs_find_and_get(dir_sd, grp->name);
211 if (!sd) { 212 if (!sd) {
212 WARN(!sd, KERN_WARNING 213 WARN(!sd, KERN_WARNING
213 "sysfs group %p not found for kobject '%s'\n", 214 "sysfs group %p not found for kobject '%s'\n",
214 grp, kobject_name(kobj)); 215 grp, kobject_name(kobj));
215 return; 216 return;
216 } 217 }
217 } else 218 } else {
218 sd = sysfs_get(dir_sd); 219 sd = dir_sd;
220 kernfs_get(sd);
221 }
219 222
220 remove_files(sd, kobj, grp); 223 remove_files(sd, kobj, grp);
221 if (grp->name) 224 if (grp->name)
222 sysfs_remove(sd); 225 kernfs_remove(sd);
223 226
224 sysfs_put(sd); 227 kernfs_put(sd);
225} 228}
226EXPORT_SYMBOL_GPL(sysfs_remove_group); 229EXPORT_SYMBOL_GPL(sysfs_remove_group);
227 230
@@ -262,17 +265,17 @@ int sysfs_merge_group(struct kobject *kobj,
262 struct attribute *const *attr; 265 struct attribute *const *attr;
263 int i; 266 int i;
264 267
265 dir_sd = sysfs_get_dirent(kobj->sd, grp->name); 268 dir_sd = kernfs_find_and_get(kobj->sd, grp->name);
266 if (!dir_sd) 269 if (!dir_sd)
267 return -ENOENT; 270 return -ENOENT;
268 271
269 for ((i = 0, attr = grp->attrs); *attr && !error; (++i, ++attr)) 272 for ((i = 0, attr = grp->attrs); *attr && !error; (++i, ++attr))
270 error = sysfs_add_file(dir_sd, *attr, SYSFS_KOBJ_ATTR); 273 error = sysfs_add_file(dir_sd, *attr, false);
271 if (error) { 274 if (error) {
272 while (--i >= 0) 275 while (--i >= 0)
273 sysfs_hash_and_remove(dir_sd, (*--attr)->name, NULL); 276 kernfs_remove_by_name(dir_sd, (*--attr)->name);
274 } 277 }
275 sysfs_put(dir_sd); 278 kernfs_put(dir_sd);
276 279
277 return error; 280 return error;
278} 281}
@@ -289,11 +292,11 @@ void sysfs_unmerge_group(struct kobject *kobj,
289 struct sysfs_dirent *dir_sd; 292 struct sysfs_dirent *dir_sd;
290 struct attribute *const *attr; 293 struct attribute *const *attr;
291 294
292 dir_sd = sysfs_get_dirent(kobj->sd, grp->name); 295 dir_sd = kernfs_find_and_get(kobj->sd, grp->name);
293 if (dir_sd) { 296 if (dir_sd) {
294 for (attr = grp->attrs; *attr; ++attr) 297 for (attr = grp->attrs; *attr; ++attr)
295 sysfs_hash_and_remove(dir_sd, (*attr)->name, NULL); 298 kernfs_remove_by_name(dir_sd, (*attr)->name);
296 sysfs_put(dir_sd); 299 kernfs_put(dir_sd);
297 } 300 }
298} 301}
299EXPORT_SYMBOL_GPL(sysfs_unmerge_group); 302EXPORT_SYMBOL_GPL(sysfs_unmerge_group);
@@ -311,12 +314,12 @@ int sysfs_add_link_to_group(struct kobject *kobj, const char *group_name,
311 struct sysfs_dirent *dir_sd; 314 struct sysfs_dirent *dir_sd;
312 int error = 0; 315 int error = 0;
313 316
314 dir_sd = sysfs_get_dirent(kobj->sd, group_name); 317 dir_sd = kernfs_find_and_get(kobj->sd, group_name);
315 if (!dir_sd) 318 if (!dir_sd)
316 return -ENOENT; 319 return -ENOENT;
317 320
318 error = sysfs_create_link_sd(dir_sd, target, link_name); 321 error = sysfs_create_link_sd(dir_sd, target, link_name);
319 sysfs_put(dir_sd); 322 kernfs_put(dir_sd);
320 323
321 return error; 324 return error;
322} 325}
@@ -333,10 +336,10 @@ void sysfs_remove_link_from_group(struct kobject *kobj, const char *group_name,
333{ 336{
334 struct sysfs_dirent *dir_sd; 337 struct sysfs_dirent *dir_sd;
335 338
336 dir_sd = sysfs_get_dirent(kobj->sd, group_name); 339 dir_sd = kernfs_find_and_get(kobj->sd, group_name);
337 if (dir_sd) { 340 if (dir_sd) {
338 sysfs_hash_and_remove(dir_sd, link_name, NULL); 341 kernfs_remove_by_name(dir_sd, link_name);
339 sysfs_put(dir_sd); 342 kernfs_put(dir_sd);
340 } 343 }
341} 344}
342EXPORT_SYMBOL_GPL(sysfs_remove_link_from_group); 345EXPORT_SYMBOL_GPL(sysfs_remove_link_from_group);
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index 834ec2cdb7a3..e7e3aa8e7b78 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -14,146 +14,39 @@
14 14
15#include <linux/fs.h> 15#include <linux/fs.h>
16#include <linux/mount.h> 16#include <linux/mount.h>
17#include <linux/pagemap.h>
18#include <linux/init.h> 17#include <linux/init.h>
19#include <linux/module.h>
20#include <linux/magic.h>
21#include <linux/slab.h>
22#include <linux/user_namespace.h> 18#include <linux/user_namespace.h>
23 19
24#include "sysfs.h" 20#include "sysfs.h"
25 21
26 22static struct kernfs_root *sysfs_root;
27static struct vfsmount *sysfs_mnt; 23struct sysfs_dirent *sysfs_root_sd;
28struct kmem_cache *sysfs_dir_cachep;
29
30static const struct super_operations sysfs_ops = {
31 .statfs = simple_statfs,
32 .drop_inode = generic_delete_inode,
33 .evict_inode = sysfs_evict_inode,
34};
35
36struct sysfs_dirent sysfs_root = {
37 .s_name = "",
38 .s_count = ATOMIC_INIT(1),
39 .s_flags = SYSFS_DIR | (KOBJ_NS_TYPE_NONE << SYSFS_NS_TYPE_SHIFT),
40 .s_mode = S_IFDIR | S_IRUGO | S_IXUGO,
41 .s_ino = 1,
42};
43
44static int sysfs_fill_super(struct super_block *sb, void *data, int silent)
45{
46 struct inode *inode;
47 struct dentry *root;
48
49 sb->s_blocksize = PAGE_CACHE_SIZE;
50 sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
51 sb->s_magic = SYSFS_MAGIC;
52 sb->s_op = &sysfs_ops;
53 sb->s_time_gran = 1;
54
55 /* get root inode, initialize and unlock it */
56 mutex_lock(&sysfs_mutex);
57 inode = sysfs_get_inode(sb, &sysfs_root);
58 mutex_unlock(&sysfs_mutex);
59 if (!inode) {
60 pr_debug("sysfs: could not get root inode\n");
61 return -ENOMEM;
62 }
63
64 /* instantiate and link root dentry */
65 root = d_make_root(inode);
66 if (!root) {
67 pr_debug("%s: could not get root dentry!\n", __func__);
68 return -ENOMEM;
69 }
70 root->d_fsdata = &sysfs_root;
71 sb->s_root = root;
72 sb->s_d_op = &sysfs_dentry_ops;
73 return 0;
74}
75
76static int sysfs_test_super(struct super_block *sb, void *data)
77{
78 struct sysfs_super_info *sb_info = sysfs_info(sb);
79 struct sysfs_super_info *info = data;
80 enum kobj_ns_type type;
81 int found = 1;
82
83 for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++) {
84 if (sb_info->ns[type] != info->ns[type])
85 found = 0;
86 }
87 return found;
88}
89
90static int sysfs_set_super(struct super_block *sb, void *data)
91{
92 int error;
93 error = set_anon_super(sb, data);
94 if (!error)
95 sb->s_fs_info = data;
96 return error;
97}
98
99static void free_sysfs_super_info(struct sysfs_super_info *info)
100{
101 int type;
102 for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++)
103 kobj_ns_drop(type, info->ns[type]);
104 kfree(info);
105}
106 24
107static struct dentry *sysfs_mount(struct file_system_type *fs_type, 25static struct dentry *sysfs_mount(struct file_system_type *fs_type,
108 int flags, const char *dev_name, void *data) 26 int flags, const char *dev_name, void *data)
109{ 27{
110 struct sysfs_super_info *info; 28 struct dentry *root;
111 enum kobj_ns_type type; 29 void *ns;
112 struct super_block *sb;
113 int error;
114 30
115 if (!(flags & MS_KERNMOUNT)) { 31 if (!(flags & MS_KERNMOUNT)) {
116 if (!capable(CAP_SYS_ADMIN) && !fs_fully_visible(fs_type)) 32 if (!capable(CAP_SYS_ADMIN) && !fs_fully_visible(fs_type))
117 return ERR_PTR(-EPERM); 33 return ERR_PTR(-EPERM);
118 34
119 for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++) { 35 if (!kobj_ns_current_may_mount(KOBJ_NS_TYPE_NET))
120 if (!kobj_ns_current_may_mount(type)) 36 return ERR_PTR(-EPERM);
121 return ERR_PTR(-EPERM);
122 }
123 }
124
125 info = kzalloc(sizeof(*info), GFP_KERNEL);
126 if (!info)
127 return ERR_PTR(-ENOMEM);
128
129 for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++)
130 info->ns[type] = kobj_ns_grab_current(type);
131
132 sb = sget(fs_type, sysfs_test_super, sysfs_set_super, flags, info);
133 if (IS_ERR(sb) || sb->s_fs_info != info)
134 free_sysfs_super_info(info);
135 if (IS_ERR(sb))
136 return ERR_CAST(sb);
137 if (!sb->s_root) {
138 error = sysfs_fill_super(sb, data, flags & MS_SILENT ? 1 : 0);
139 if (error) {
140 deactivate_locked_super(sb);
141 return ERR_PTR(error);
142 }
143 sb->s_flags |= MS_ACTIVE;
144 } 37 }
145 38
146 return dget(sb->s_root); 39 ns = kobj_ns_grab_current(KOBJ_NS_TYPE_NET);
40 root = kernfs_mount_ns(fs_type, flags, sysfs_root, ns);
41 if (IS_ERR(root))
42 kobj_ns_drop(KOBJ_NS_TYPE_NET, ns);
43 return root;
147} 44}
148 45
149static void sysfs_kill_sb(struct super_block *sb) 46static void sysfs_kill_sb(struct super_block *sb)
150{ 47{
151 struct sysfs_super_info *info = sysfs_info(sb); 48 kernfs_kill_sb(sb);
152 /* Remove the superblock from fs_supers/s_instances 49 kobj_ns_drop(KOBJ_NS_TYPE_NET, (void *)kernfs_super_ns(sb));
153 * so we can't find it, before freeing sysfs_super_info.
154 */
155 kill_anon_super(sb);
156 free_sysfs_super_info(info);
157} 50}
158 51
159static struct file_system_type sysfs_fs_type = { 52static struct file_system_type sysfs_fs_type = {
@@ -165,48 +58,19 @@ static struct file_system_type sysfs_fs_type = {
165 58
166int __init sysfs_init(void) 59int __init sysfs_init(void)
167{ 60{
168 int err = -ENOMEM; 61 int err;
169 62
170 sysfs_dir_cachep = kmem_cache_create("sysfs_dir_cache", 63 sysfs_root = kernfs_create_root(NULL);
171 sizeof(struct sysfs_dirent), 64 if (IS_ERR(sysfs_root))
172 0, 0, NULL); 65 return PTR_ERR(sysfs_root);
173 if (!sysfs_dir_cachep)
174 goto out;
175 66
176 err = sysfs_inode_init(); 67 sysfs_root_sd = sysfs_root->sd;
177 if (err)
178 goto out_err;
179 68
180 err = register_filesystem(&sysfs_fs_type); 69 err = register_filesystem(&sysfs_fs_type);
181 if (!err) { 70 if (err) {
182 sysfs_mnt = kern_mount(&sysfs_fs_type); 71 kernfs_destroy_root(sysfs_root);
183 if (IS_ERR(sysfs_mnt)) { 72 return err;
184 printk(KERN_ERR "sysfs: could not mount!\n"); 73 }
185 err = PTR_ERR(sysfs_mnt);
186 sysfs_mnt = NULL;
187 unregister_filesystem(&sysfs_fs_type);
188 goto out_err;
189 }
190 } else
191 goto out_err;
192out:
193 return err;
194out_err:
195 kmem_cache_destroy(sysfs_dir_cachep);
196 sysfs_dir_cachep = NULL;
197 goto out;
198}
199
200#undef sysfs_get
201struct sysfs_dirent *sysfs_get(struct sysfs_dirent *sd)
202{
203 return __sysfs_get(sd);
204}
205EXPORT_SYMBOL_GPL(sysfs_get);
206 74
207#undef sysfs_put 75 return 0;
208void sysfs_put(struct sysfs_dirent *sd)
209{
210 __sysfs_put(sd);
211} 76}
212EXPORT_SYMBOL_GPL(sysfs_put);
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c
index 3ae3f1bf1a09..1b8c9ed8511a 100644
--- a/fs/sysfs/symlink.c
+++ b/fs/sysfs/symlink.c
@@ -11,11 +11,8 @@
11 */ 11 */
12 12
13#include <linux/fs.h> 13#include <linux/fs.h>
14#include <linux/gfp.h>
15#include <linux/mount.h>
16#include <linux/module.h> 14#include <linux/module.h>
17#include <linux/kobject.h> 15#include <linux/kobject.h>
18#include <linux/namei.h>
19#include <linux/mutex.h> 16#include <linux/mutex.h>
20#include <linux/security.h> 17#include <linux/security.h>
21 18
@@ -25,11 +22,7 @@ static int sysfs_do_create_link_sd(struct sysfs_dirent *parent_sd,
25 struct kobject *target, 22 struct kobject *target,
26 const char *name, int warn) 23 const char *name, int warn)
27{ 24{
28 struct sysfs_dirent *target_sd = NULL; 25 struct sysfs_dirent *sd, *target_sd = NULL;
29 struct sysfs_dirent *sd = NULL;
30 struct sysfs_addrm_cxt acxt;
31 enum kobj_ns_type ns_type;
32 int error;
33 26
34 BUG_ON(!name || !parent_sd); 27 BUG_ON(!name || !parent_sd);
35 28
@@ -39,53 +32,24 @@ static int sysfs_do_create_link_sd(struct sysfs_dirent *parent_sd,
39 * sysfs_remove_dir() for details. 32 * sysfs_remove_dir() for details.
40 */ 33 */
41 spin_lock(&sysfs_symlink_target_lock); 34 spin_lock(&sysfs_symlink_target_lock);
42 if (target->sd) 35 if (target->sd) {
43 target_sd = sysfs_get(target->sd); 36 target_sd = target->sd;
37 kernfs_get(target_sd);
38 }
44 spin_unlock(&sysfs_symlink_target_lock); 39 spin_unlock(&sysfs_symlink_target_lock);
45 40
46 error = -ENOENT;
47 if (!target_sd) 41 if (!target_sd)
48 goto out_put; 42 return -ENOENT;
49
50 error = -ENOMEM;
51 sd = sysfs_new_dirent(name, S_IFLNK|S_IRWXUGO, SYSFS_KOBJ_LINK);
52 if (!sd)
53 goto out_put;
54 43
55 ns_type = sysfs_ns_type(parent_sd); 44 sd = kernfs_create_link(parent_sd, name, target_sd);
56 if (ns_type) 45 kernfs_put(target_sd);
57 sd->s_ns = target_sd->s_ns;
58 sd->s_symlink.target_sd = target_sd;
59 target_sd = NULL; /* reference is now owned by the symlink */
60
61 sysfs_addrm_start(&acxt);
62 /* Symlinks must be between directories with the same ns_type */
63 if (!ns_type ||
64 (ns_type == sysfs_ns_type(sd->s_symlink.target_sd->s_parent))) {
65 if (warn)
66 error = sysfs_add_one(&acxt, sd, parent_sd);
67 else
68 error = __sysfs_add_one(&acxt, sd, parent_sd);
69 } else {
70 error = -EINVAL;
71 WARN(1, KERN_WARNING
72 "sysfs: symlink across ns_types %s/%s -> %s/%s\n",
73 parent_sd->s_name,
74 sd->s_name,
75 sd->s_symlink.target_sd->s_parent->s_name,
76 sd->s_symlink.target_sd->s_name);
77 }
78 sysfs_addrm_finish(&acxt);
79 46
80 if (error) 47 if (!IS_ERR(sd))
81 goto out_put; 48 return 0;
82 49
83 return 0; 50 if (warn && PTR_ERR(sd) == -EEXIST)
84 51 sysfs_warn_dup(parent_sd, name);
85 out_put: 52 return PTR_ERR(sd);
86 sysfs_put(target_sd);
87 sysfs_put(sd);
88 return error;
89} 53}
90 54
91/** 55/**
@@ -106,7 +70,7 @@ static int sysfs_do_create_link(struct kobject *kobj, struct kobject *target,
106 struct sysfs_dirent *parent_sd = NULL; 70 struct sysfs_dirent *parent_sd = NULL;
107 71
108 if (!kobj) 72 if (!kobj)
109 parent_sd = &sysfs_root; 73 parent_sd = sysfs_root_sd;
110 else 74 else
111 parent_sd = kobj->sd; 75 parent_sd = kobj->sd;
112 76
@@ -164,10 +128,10 @@ void sysfs_delete_link(struct kobject *kobj, struct kobject *targ,
164 * sysfs_remove_dir() for details. 128 * sysfs_remove_dir() for details.
165 */ 129 */
166 spin_lock(&sysfs_symlink_target_lock); 130 spin_lock(&sysfs_symlink_target_lock);
167 if (targ->sd && sysfs_ns_type(kobj->sd)) 131 if (targ->sd && kernfs_ns_enabled(kobj->sd))
168 ns = targ->sd->s_ns; 132 ns = targ->sd->s_ns;
169 spin_unlock(&sysfs_symlink_target_lock); 133 spin_unlock(&sysfs_symlink_target_lock);
170 sysfs_hash_and_remove(kobj->sd, name, ns); 134 kernfs_remove_by_name_ns(kobj->sd, name, ns);
171} 135}
172 136
173/** 137/**
@@ -180,11 +144,11 @@ void sysfs_remove_link(struct kobject *kobj, const char *name)
180 struct sysfs_dirent *parent_sd = NULL; 144 struct sysfs_dirent *parent_sd = NULL;
181 145
182 if (!kobj) 146 if (!kobj)
183 parent_sd = &sysfs_root; 147 parent_sd = sysfs_root_sd;
184 else 148 else
185 parent_sd = kobj->sd; 149 parent_sd = kobj->sd;
186 150
187 sysfs_hash_and_remove(parent_sd, name, NULL); 151 kernfs_remove_by_name(parent_sd, name);
188} 152}
189EXPORT_SYMBOL_GPL(sysfs_remove_link); 153EXPORT_SYMBOL_GPL(sysfs_remove_link);
190 154
@@ -206,7 +170,7 @@ int sysfs_rename_link_ns(struct kobject *kobj, struct kobject *targ,
206 int result; 170 int result;
207 171
208 if (!kobj) 172 if (!kobj)
209 parent_sd = &sysfs_root; 173 parent_sd = sysfs_root_sd;
210 else 174 else
211 parent_sd = kobj->sd; 175 parent_sd = kobj->sd;
212 176
@@ -214,117 +178,20 @@ int sysfs_rename_link_ns(struct kobject *kobj, struct kobject *targ,
214 old_ns = targ->sd->s_ns; 178 old_ns = targ->sd->s_ns;
215 179
216 result = -ENOENT; 180 result = -ENOENT;
217 sd = sysfs_get_dirent_ns(parent_sd, old, old_ns); 181 sd = kernfs_find_and_get_ns(parent_sd, old, old_ns);
218 if (!sd) 182 if (!sd)
219 goto out; 183 goto out;
220 184
221 result = -EINVAL; 185 result = -EINVAL;
222 if (sysfs_type(sd) != SYSFS_KOBJ_LINK) 186 if (sysfs_type(sd) != SYSFS_KOBJ_LINK)
223 goto out; 187 goto out;
224 if (sd->s_symlink.target_sd->s_dir.kobj != targ) 188 if (sd->s_symlink.target_sd->priv != targ)
225 goto out; 189 goto out;
226 190
227 result = sysfs_rename(sd, parent_sd, new, new_ns); 191 result = kernfs_rename_ns(sd, parent_sd, new, new_ns);
228 192
229out: 193out:
230 sysfs_put(sd); 194 kernfs_put(sd);
231 return result; 195 return result;
232} 196}
233EXPORT_SYMBOL_GPL(sysfs_rename_link_ns); 197EXPORT_SYMBOL_GPL(sysfs_rename_link_ns);
234
235static int sysfs_get_target_path(struct sysfs_dirent *parent_sd,
236 struct sysfs_dirent *target_sd, char *path)
237{
238 struct sysfs_dirent *base, *sd;
239 char *s = path;
240 int len = 0;
241
242 /* go up to the root, stop at the base */
243 base = parent_sd;
244 while (base->s_parent) {
245 sd = target_sd->s_parent;
246 while (sd->s_parent && base != sd)
247 sd = sd->s_parent;
248
249 if (base == sd)
250 break;
251
252 strcpy(s, "../");
253 s += 3;
254 base = base->s_parent;
255 }
256
257 /* determine end of target string for reverse fillup */
258 sd = target_sd;
259 while (sd->s_parent && sd != base) {
260 len += strlen(sd->s_name) + 1;
261 sd = sd->s_parent;
262 }
263
264 /* check limits */
265 if (len < 2)
266 return -EINVAL;
267 len--;
268 if ((s - path) + len > PATH_MAX)
269 return -ENAMETOOLONG;
270
271 /* reverse fillup of target string from target to base */
272 sd = target_sd;
273 while (sd->s_parent && sd != base) {
274 int slen = strlen(sd->s_name);
275
276 len -= slen;
277 strncpy(s + len, sd->s_name, slen);
278 if (len)
279 s[--len] = '/';
280
281 sd = sd->s_parent;
282 }
283
284 return 0;
285}
286
287static int sysfs_getlink(struct dentry *dentry, char *path)
288{
289 struct sysfs_dirent *sd = dentry->d_fsdata;
290 struct sysfs_dirent *parent_sd = sd->s_parent;
291 struct sysfs_dirent *target_sd = sd->s_symlink.target_sd;
292 int error;
293
294 mutex_lock(&sysfs_mutex);
295 error = sysfs_get_target_path(parent_sd, target_sd, path);
296 mutex_unlock(&sysfs_mutex);
297
298 return error;
299}
300
301static void *sysfs_follow_link(struct dentry *dentry, struct nameidata *nd)
302{
303 int error = -ENOMEM;
304 unsigned long page = get_zeroed_page(GFP_KERNEL);
305 if (page) {
306 error = sysfs_getlink(dentry, (char *) page);
307 if (error < 0)
308 free_page((unsigned long)page);
309 }
310 nd_set_link(nd, error ? ERR_PTR(error) : (char *)page);
311 return NULL;
312}
313
314static void sysfs_put_link(struct dentry *dentry, struct nameidata *nd,
315 void *cookie)
316{
317 char *page = nd_get_link(nd);
318 if (!IS_ERR(page))
319 free_page((unsigned long)page);
320}
321
322const struct inode_operations sysfs_symlink_inode_operations = {
323 .setxattr = sysfs_setxattr,
324 .readlink = generic_readlink,
325 .follow_link = sysfs_follow_link,
326 .put_link = sysfs_put_link,
327 .setattr = sysfs_setattr,
328 .getattr = sysfs_getattr,
329 .permission = sysfs_permission,
330};
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index 0af09fbfb3f6..c8e395b49330 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -8,248 +8,36 @@
8 * This file is released under the GPLv2. 8 * This file is released under the GPLv2.
9 */ 9 */
10 10
11#include <linux/lockdep.h> 11#ifndef __SYSFS_INTERNAL_H
12#include <linux/kobject_ns.h> 12#define __SYSFS_INTERNAL_H
13#include <linux/fs.h>
14#include <linux/rbtree.h>
15 13
16struct sysfs_open_dirent; 14#include <linux/sysfs.h>
17
18/* type-specific structures for sysfs_dirent->s_* union members */
19struct sysfs_elem_dir {
20 struct kobject *kobj;
21
22 unsigned long subdirs;
23 /* children rbtree starts here and goes through sd->s_rb */
24 struct rb_root children;
25};
26
27struct sysfs_elem_symlink {
28 struct sysfs_dirent *target_sd;
29};
30
31struct sysfs_elem_attr {
32 union {
33 struct attribute *attr;
34 struct bin_attribute *bin_attr;
35 };
36 struct sysfs_open_dirent *open;
37};
38
39struct sysfs_inode_attrs {
40 struct iattr ia_iattr;
41 void *ia_secdata;
42 u32 ia_secdata_len;
43};
44
45/*
46 * sysfs_dirent - the building block of sysfs hierarchy. Each and
47 * every sysfs node is represented by single sysfs_dirent.
48 *
49 * As long as s_count reference is held, the sysfs_dirent itself is
50 * accessible. Dereferencing s_elem or any other outer entity
51 * requires s_active reference.
52 */
53struct sysfs_dirent {
54 atomic_t s_count;
55 atomic_t s_active;
56#ifdef CONFIG_DEBUG_LOCK_ALLOC
57 struct lockdep_map dep_map;
58#endif
59 struct sysfs_dirent *s_parent;
60 const char *s_name;
61
62 struct rb_node s_rb;
63
64 union {
65 struct completion *completion;
66 struct sysfs_dirent *removed_list;
67 } u;
68
69 const void *s_ns; /* namespace tag */
70 unsigned int s_hash; /* ns + name hash */
71 union {
72 struct sysfs_elem_dir s_dir;
73 struct sysfs_elem_symlink s_symlink;
74 struct sysfs_elem_attr s_attr;
75 };
76
77 unsigned short s_flags;
78 umode_t s_mode;
79 unsigned int s_ino;
80 struct sysfs_inode_attrs *s_iattr;
81};
82
83#define SD_DEACTIVATED_BIAS INT_MIN
84
85#define SYSFS_TYPE_MASK 0x00ff
86#define SYSFS_DIR 0x0001
87#define SYSFS_KOBJ_ATTR 0x0002
88#define SYSFS_KOBJ_BIN_ATTR 0x0004
89#define SYSFS_KOBJ_LINK 0x0008
90#define SYSFS_COPY_NAME (SYSFS_DIR | SYSFS_KOBJ_LINK)
91#define SYSFS_ACTIVE_REF (SYSFS_KOBJ_ATTR | SYSFS_KOBJ_BIN_ATTR)
92
93/* identify any namespace tag on sysfs_dirents */
94#define SYSFS_NS_TYPE_MASK 0xf00
95#define SYSFS_NS_TYPE_SHIFT 8
96
97#define SYSFS_FLAG_MASK ~(SYSFS_NS_TYPE_MASK|SYSFS_TYPE_MASK)
98#define SYSFS_FLAG_REMOVED 0x02000
99
100static inline unsigned int sysfs_type(struct sysfs_dirent *sd)
101{
102 return sd->s_flags & SYSFS_TYPE_MASK;
103}
104
105/*
106 * Return any namespace tags on this dirent.
107 * enum kobj_ns_type is defined in linux/kobject.h
108 */
109static inline enum kobj_ns_type sysfs_ns_type(struct sysfs_dirent *sd)
110{
111 return (sd->s_flags & SYSFS_NS_TYPE_MASK) >> SYSFS_NS_TYPE_SHIFT;
112}
113
114#ifdef CONFIG_DEBUG_LOCK_ALLOC
115
116#define sysfs_dirent_init_lockdep(sd) \
117do { \
118 struct attribute *attr = sd->s_attr.attr; \
119 struct lock_class_key *key = attr->key; \
120 if (!key) \
121 key = &attr->skey; \
122 \
123 lockdep_init_map(&sd->dep_map, "s_active", key, 0); \
124} while (0)
125
126/* Test for attributes that want to ignore lockdep for read-locking */
127static inline bool sysfs_ignore_lockdep(struct sysfs_dirent *sd)
128{
129 int type = sysfs_type(sd);
130
131 return (type == SYSFS_KOBJ_ATTR || type == SYSFS_KOBJ_BIN_ATTR) &&
132 sd->s_attr.attr->ignore_lockdep;
133}
134
135#else
136
137#define sysfs_dirent_init_lockdep(sd) do {} while (0)
138
139static inline bool sysfs_ignore_lockdep(struct sysfs_dirent *sd)
140{
141 return true;
142}
143
144#endif
145
146/*
147 * Context structure to be used while adding/removing nodes.
148 */
149struct sysfs_addrm_cxt {
150 struct sysfs_dirent *removed;
151};
152 15
153/* 16/*
154 * mount.c 17 * mount.c
155 */ 18 */
156 19extern struct sysfs_dirent *sysfs_root_sd;
157/*
158 * Each sb is associated with a set of namespace tags (i.e.
159 * the network namespace of the task which mounted this sysfs
160 * instance).
161 */
162struct sysfs_super_info {
163 void *ns[KOBJ_NS_TYPES];
164};
165#define sysfs_info(SB) ((struct sysfs_super_info *)(SB->s_fs_info))
166extern struct sysfs_dirent sysfs_root;
167extern struct kmem_cache *sysfs_dir_cachep;
168 20
169/* 21/*
170 * dir.c 22 * dir.c
171 */ 23 */
172extern struct mutex sysfs_mutex;
173extern spinlock_t sysfs_symlink_target_lock; 24extern spinlock_t sysfs_symlink_target_lock;
174extern const struct dentry_operations sysfs_dentry_ops;
175
176extern const struct file_operations sysfs_dir_operations;
177extern const struct inode_operations sysfs_dir_inode_operations;
178 25
179struct sysfs_dirent *sysfs_get_active(struct sysfs_dirent *sd);
180void sysfs_put_active(struct sysfs_dirent *sd);
181void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt);
182void sysfs_warn_dup(struct sysfs_dirent *parent, const char *name); 26void sysfs_warn_dup(struct sysfs_dirent *parent, const char *name);
183int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd,
184 struct sysfs_dirent *parent_sd);
185int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd,
186 struct sysfs_dirent *parent_sd);
187void sysfs_remove(struct sysfs_dirent *sd);
188int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name,
189 const void *ns);
190void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt);
191
192struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
193 const unsigned char *name,
194 const void *ns);
195struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type);
196
197void release_sysfs_dirent(struct sysfs_dirent *sd);
198
199int sysfs_create_subdir(struct kobject *kobj, const char *name,
200 struct sysfs_dirent **p_sd);
201
202int sysfs_rename(struct sysfs_dirent *sd, struct sysfs_dirent *new_parent_sd,
203 const char *new_name, const void *new_ns);
204
205static inline struct sysfs_dirent *__sysfs_get(struct sysfs_dirent *sd)
206{
207 if (sd) {
208 WARN_ON(!atomic_read(&sd->s_count));
209 atomic_inc(&sd->s_count);
210 }
211 return sd;
212}
213#define sysfs_get(sd) __sysfs_get(sd)
214
215static inline void __sysfs_put(struct sysfs_dirent *sd)
216{
217 if (sd && atomic_dec_and_test(&sd->s_count))
218 release_sysfs_dirent(sd);
219}
220#define sysfs_put(sd) __sysfs_put(sd)
221
222/*
223 * inode.c
224 */
225struct inode *sysfs_get_inode(struct super_block *sb, struct sysfs_dirent *sd);
226void sysfs_evict_inode(struct inode *inode);
227int sysfs_sd_setattr(struct sysfs_dirent *sd, struct iattr *iattr);
228int sysfs_permission(struct inode *inode, int mask);
229int sysfs_setattr(struct dentry *dentry, struct iattr *iattr);
230int sysfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
231 struct kstat *stat);
232int sysfs_setxattr(struct dentry *dentry, const char *name, const void *value,
233 size_t size, int flags);
234int sysfs_inode_init(void);
235 27
236/* 28/*
237 * file.c 29 * file.c
238 */ 30 */
239extern const struct file_operations sysfs_file_operations;
240extern const struct file_operations sysfs_bin_operations;
241
242int sysfs_add_file(struct sysfs_dirent *dir_sd, 31int sysfs_add_file(struct sysfs_dirent *dir_sd,
243 const struct attribute *attr, int type); 32 const struct attribute *attr, bool is_bin);
244
245int sysfs_add_file_mode_ns(struct sysfs_dirent *dir_sd, 33int sysfs_add_file_mode_ns(struct sysfs_dirent *dir_sd,
246 const struct attribute *attr, int type, 34 const struct attribute *attr, bool is_bin,
247 umode_t amode, const void *ns); 35 umode_t amode, const void *ns);
248void sysfs_unmap_bin_file(struct sysfs_dirent *sd);
249 36
250/* 37/*
251 * symlink.c 38 * symlink.c
252 */ 39 */
253extern const struct inode_operations sysfs_symlink_inode_operations;
254int sysfs_create_link_sd(struct sysfs_dirent *sd, struct kobject *target, 40int sysfs_create_link_sd(struct sysfs_dirent *sd, struct kobject *target,
255 const char *name); 41 const char *name);
42
43#endif /* __SYSFS_INTERNAL_H */
diff --git a/include/linux/firmware.h b/include/linux/firmware.h
index e154c1005cd1..59529330efd6 100644
--- a/include/linux/firmware.h
+++ b/include/linux/firmware.h
@@ -68,4 +68,11 @@ static inline void release_firmware(const struct firmware *fw)
68 68
69#endif 69#endif
70 70
71#ifdef CONFIG_FW_LOADER_USER_HELPER
72int request_firmware_direct(const struct firmware **fw, const char *name,
73 struct device *device);
74#else
75#define request_firmware_direct request_firmware
76#endif
77
71#endif 78#endif
diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
new file mode 100644
index 000000000000..d65541308419
--- /dev/null
+++ b/include/linux/kernfs.h
@@ -0,0 +1,356 @@
1/*
2 * kernfs.h - pseudo filesystem decoupled from vfs locking
3 *
4 * This file is released under the GPLv2.
5 */
6
7#ifndef __LINUX_KERNFS_H
8#define __LINUX_KERNFS_H
9
10#include <linux/kernel.h>
11#include <linux/err.h>
12#include <linux/list.h>
13#include <linux/mutex.h>
14#include <linux/idr.h>
15#include <linux/lockdep.h>
16#include <linux/rbtree.h>
17#include <linux/atomic.h>
18#include <linux/completion.h>
19
20struct file;
21struct iattr;
22struct seq_file;
23struct vm_area_struct;
24struct super_block;
25struct file_system_type;
26
27struct sysfs_open_dirent;
28struct sysfs_inode_attrs;
29
30enum kernfs_node_type {
31 SYSFS_DIR = 0x0001,
32 SYSFS_KOBJ_ATTR = 0x0002,
33 SYSFS_KOBJ_LINK = 0x0004,
34};
35
36#define SYSFS_TYPE_MASK 0x000f
37#define SYSFS_COPY_NAME (SYSFS_DIR | SYSFS_KOBJ_LINK)
38#define SYSFS_ACTIVE_REF SYSFS_KOBJ_ATTR
39#define SYSFS_FLAG_MASK ~SYSFS_TYPE_MASK
40
41enum kernfs_node_flag {
42 SYSFS_FLAG_REMOVED = 0x0010,
43 SYSFS_FLAG_NS = 0x0020,
44 SYSFS_FLAG_HAS_SEQ_SHOW = 0x0040,
45 SYSFS_FLAG_HAS_MMAP = 0x0080,
46 SYSFS_FLAG_LOCKDEP = 0x0100,
47};
48
49/* type-specific structures for sysfs_dirent->s_* union members */
50struct sysfs_elem_dir {
51 unsigned long subdirs;
52 /* children rbtree starts here and goes through sd->s_rb */
53 struct rb_root children;
54
55 /*
56 * The kernfs hierarchy this directory belongs to. This fits
57 * better directly in sysfs_dirent but is here to save space.
58 */
59 struct kernfs_root *root;
60};
61
62struct sysfs_elem_symlink {
63 struct sysfs_dirent *target_sd;
64};
65
66struct sysfs_elem_attr {
67 const struct kernfs_ops *ops;
68 struct sysfs_open_dirent *open;
69 loff_t size;
70};
71
72/*
73 * sysfs_dirent - the building block of sysfs hierarchy. Each and every
74 * sysfs node is represented by single sysfs_dirent. Most fields are
75 * private to kernfs and shouldn't be accessed directly by kernfs users.
76 *
77 * As long as s_count reference is held, the sysfs_dirent itself is
78 * accessible. Dereferencing s_elem or any other outer entity
79 * requires s_active reference.
80 */
81struct sysfs_dirent {
82 atomic_t s_count;
83 atomic_t s_active;
84#ifdef CONFIG_DEBUG_LOCK_ALLOC
85 struct lockdep_map dep_map;
86#endif
87 /* the following two fields are published */
88 struct sysfs_dirent *s_parent;
89 const char *s_name;
90
91 struct rb_node s_rb;
92
93 union {
94 struct completion *completion;
95 struct sysfs_dirent *removed_list;
96 } u;
97
98 const void *s_ns; /* namespace tag */
99 unsigned int s_hash; /* ns + name hash */
100 union {
101 struct sysfs_elem_dir s_dir;
102 struct sysfs_elem_symlink s_symlink;
103 struct sysfs_elem_attr s_attr;
104 };
105
106 void *priv;
107
108 unsigned short s_flags;
109 umode_t s_mode;
110 unsigned int s_ino;
111 struct sysfs_inode_attrs *s_iattr;
112};
113
114struct kernfs_root {
115 /* published fields */
116 struct sysfs_dirent *sd;
117
118 /* private fields, do not use outside kernfs proper */
119 struct ida ino_ida;
120};
121
122struct sysfs_open_file {
123 /* published fields */
124 struct sysfs_dirent *sd;
125 struct file *file;
126
127 /* private fields, do not use outside kernfs proper */
128 struct mutex mutex;
129 int event;
130 struct list_head list;
131
132 bool mmapped;
133 const struct vm_operations_struct *vm_ops;
134};
135
136struct kernfs_ops {
137 /*
138 * Read is handled by either seq_file or raw_read().
139 *
140 * If seq_show() is present, seq_file path is active. Other seq
141 * operations are optional and if not implemented, the behavior is
142 * equivalent to single_open(). @sf->private points to the
143 * associated sysfs_open_file.
144 *
145 * read() is bounced through kernel buffer and a read larger than
146 * PAGE_SIZE results in partial operation of PAGE_SIZE.
147 */
148 int (*seq_show)(struct seq_file *sf, void *v);
149
150 void *(*seq_start)(struct seq_file *sf, loff_t *ppos);
151 void *(*seq_next)(struct seq_file *sf, void *v, loff_t *ppos);
152 void (*seq_stop)(struct seq_file *sf, void *v);
153
154 ssize_t (*read)(struct sysfs_open_file *of, char *buf, size_t bytes,
155 loff_t off);
156
157 /*
158 * write() is bounced through kernel buffer and a write larger than
159 * PAGE_SIZE results in partial operation of PAGE_SIZE.
160 */
161 ssize_t (*write)(struct sysfs_open_file *of, char *buf, size_t bytes,
162 loff_t off);
163
164 int (*mmap)(struct sysfs_open_file *of, struct vm_area_struct *vma);
165
166#ifdef CONFIG_DEBUG_LOCK_ALLOC
167 struct lock_class_key lockdep_key;
168#endif
169};
170
171#ifdef CONFIG_SYSFS
172
173static inline enum kernfs_node_type sysfs_type(struct sysfs_dirent *sd)
174{
175 return sd->s_flags & SYSFS_TYPE_MASK;
176}
177
178/**
179 * kernfs_enable_ns - enable namespace under a directory
180 * @sd: directory of interest, should be empty
181 *
182 * This is to be called right after @sd is created to enable namespace
183 * under it. All children of @sd must have non-NULL namespace tags and
184 * only the ones which match the super_block's tag will be visible.
185 */
186static inline void kernfs_enable_ns(struct sysfs_dirent *sd)
187{
188 WARN_ON_ONCE(sysfs_type(sd) != SYSFS_DIR);
189 WARN_ON_ONCE(!RB_EMPTY_ROOT(&sd->s_dir.children));
190 sd->s_flags |= SYSFS_FLAG_NS;
191}
192
193/**
194 * kernfs_ns_enabled - test whether namespace is enabled
195 * @sd: the node to test
196 *
197 * Test whether namespace filtering is enabled for the children of @ns.
198 */
199static inline bool kernfs_ns_enabled(struct sysfs_dirent *sd)
200{
201 return sd->s_flags & SYSFS_FLAG_NS;
202}
203
204struct sysfs_dirent *kernfs_find_and_get_ns(struct sysfs_dirent *parent,
205 const char *name, const void *ns);
206void kernfs_get(struct sysfs_dirent *sd);
207void kernfs_put(struct sysfs_dirent *sd);
208
209struct kernfs_root *kernfs_create_root(void *priv);
210void kernfs_destroy_root(struct kernfs_root *root);
211
212struct sysfs_dirent *kernfs_create_dir_ns(struct sysfs_dirent *parent,
213 const char *name, void *priv,
214 const void *ns);
215struct sysfs_dirent *kernfs_create_file_ns_key(struct sysfs_dirent *parent,
216 const char *name,
217 umode_t mode, loff_t size,
218 const struct kernfs_ops *ops,
219 void *priv, const void *ns,
220 struct lock_class_key *key);
221struct sysfs_dirent *kernfs_create_link(struct sysfs_dirent *parent,
222 const char *name,
223 struct sysfs_dirent *target);
224void kernfs_remove(struct sysfs_dirent *sd);
225int kernfs_remove_by_name_ns(struct sysfs_dirent *parent, const char *name,
226 const void *ns);
227int kernfs_rename_ns(struct sysfs_dirent *sd, struct sysfs_dirent *new_parent,
228 const char *new_name, const void *new_ns);
229int kernfs_setattr(struct sysfs_dirent *sd, const struct iattr *iattr);
230void kernfs_notify(struct sysfs_dirent *sd);
231
232const void *kernfs_super_ns(struct super_block *sb);
233struct dentry *kernfs_mount_ns(struct file_system_type *fs_type, int flags,
234 struct kernfs_root *root, const void *ns);
235void kernfs_kill_sb(struct super_block *sb);
236
237void kernfs_init(void);
238
239#else /* CONFIG_SYSFS */
240
241static inline enum kernfs_node_type sysfs_type(struct sysfs_dirent *sd)
242{ return 0; } /* whatever */
243
244static inline void kernfs_enable_ns(struct sysfs_dirent *sd) { }
245
246static inline bool kernfs_ns_enabled(struct sysfs_dirent *sd)
247{ return false; }
248
249static inline struct sysfs_dirent *
250kernfs_find_and_get_ns(struct sysfs_dirent *parent, const char *name,
251 const void *ns)
252{ return NULL; }
253
254static inline void kernfs_get(struct sysfs_dirent *sd) { }
255static inline void kernfs_put(struct sysfs_dirent *sd) { }
256
257static inline struct kernfs_root *kernfs_create_root(void *priv)
258{ return ERR_PTR(-ENOSYS); }
259
260static inline void kernfs_destroy_root(struct kernfs_root *root) { }
261
262static inline struct sysfs_dirent *
263kernfs_create_dir_ns(struct sysfs_dirent *parent, const char *name, void *priv,
264 const void *ns)
265{ return ERR_PTR(-ENOSYS); }
266
267static inline struct sysfs_dirent *
268kernfs_create_file_ns_key(struct sysfs_dirent *parent, const char *name,
269 umode_t mode, loff_t size,
270 const struct kernfs_ops *ops, void *priv,
271 const void *ns, struct lock_class_key *key)
272{ return ERR_PTR(-ENOSYS); }
273
274static inline struct sysfs_dirent *
275kernfs_create_link(struct sysfs_dirent *parent, const char *name,
276 struct sysfs_dirent *target)
277{ return ERR_PTR(-ENOSYS); }
278
279static inline void kernfs_remove(struct sysfs_dirent *sd) { }
280
281static inline int kernfs_remove_by_name_ns(struct sysfs_dirent *parent,
282 const char *name, const void *ns)
283{ return -ENOSYS; }
284
285static inline int kernfs_rename_ns(struct sysfs_dirent *sd,
286 struct sysfs_dirent *new_parent,
287 const char *new_name, const void *new_ns)
288{ return -ENOSYS; }
289
290static inline int kernfs_setattr(struct sysfs_dirent *sd,
291 const struct iattr *iattr)
292{ return -ENOSYS; }
293
294static inline void kernfs_notify(struct sysfs_dirent *sd) { }
295
296static inline const void *kernfs_super_ns(struct super_block *sb)
297{ return NULL; }
298
299static inline struct dentry *
300kernfs_mount_ns(struct file_system_type *fs_type, int flags,
301 struct kernfs_root *root, const void *ns)
302{ return ERR_PTR(-ENOSYS); }
303
304static inline void kernfs_kill_sb(struct super_block *sb) { }
305
306static inline void kernfs_init(void) { }
307
308#endif /* CONFIG_SYSFS */
309
310static inline struct sysfs_dirent *
311kernfs_find_and_get(struct sysfs_dirent *sd, const char *name)
312{
313 return kernfs_find_and_get_ns(sd, name, NULL);
314}
315
316static inline struct sysfs_dirent *
317kernfs_create_dir(struct sysfs_dirent *parent, const char *name, void *priv)
318{
319 return kernfs_create_dir_ns(parent, name, priv, NULL);
320}
321
322static inline struct sysfs_dirent *
323kernfs_create_file_ns(struct sysfs_dirent *parent, const char *name,
324 umode_t mode, loff_t size, const struct kernfs_ops *ops,
325 void *priv, const void *ns)
326{
327 struct lock_class_key *key = NULL;
328
329#ifdef CONFIG_DEBUG_LOCK_ALLOC
330 key = (struct lock_class_key *)&ops->lockdep_key;
331#endif
332 return kernfs_create_file_ns_key(parent, name, mode, size, ops, priv,
333 ns, key);
334}
335
336static inline struct sysfs_dirent *
337kernfs_create_file(struct sysfs_dirent *parent, const char *name, umode_t mode,
338 loff_t size, const struct kernfs_ops *ops, void *priv)
339{
340 return kernfs_create_file_ns(parent, name, mode, size, ops, priv, NULL);
341}
342
343static inline int kernfs_remove_by_name(struct sysfs_dirent *parent,
344 const char *name)
345{
346 return kernfs_remove_by_name_ns(parent, name, NULL);
347}
348
349static inline struct dentry *
350kernfs_mount(struct file_system_type *fs_type, int flags,
351 struct kernfs_root *root)
352{
353 return kernfs_mount_ns(fs_type, flags, root, NULL);
354}
355
356#endif /* __LINUX_KERNFS_H */
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index 6695040a0317..cd8f90bf51a7 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -12,6 +12,7 @@
12#ifndef _SYSFS_H_ 12#ifndef _SYSFS_H_
13#define _SYSFS_H_ 13#define _SYSFS_H_
14 14
15#include <linux/kernfs.h>
15#include <linux/compiler.h> 16#include <linux/compiler.h>
16#include <linux/errno.h> 17#include <linux/errno.h>
17#include <linux/list.h> 18#include <linux/list.h>
@@ -175,8 +176,6 @@ struct sysfs_ops {
175 ssize_t (*store)(struct kobject *, struct attribute *, const char *, size_t); 176 ssize_t (*store)(struct kobject *, struct attribute *, const char *, size_t);
176}; 177};
177 178
178struct sysfs_dirent;
179
180#ifdef CONFIG_SYSFS 179#ifdef CONFIG_SYSFS
181 180
182int sysfs_schedule_callback(struct kobject *kobj, void (*func)(void *), 181int sysfs_schedule_callback(struct kobject *kobj, void (*func)(void *),
@@ -244,12 +243,6 @@ void sysfs_remove_link_from_group(struct kobject *kobj, const char *group_name,
244 const char *link_name); 243 const char *link_name);
245 244
246void sysfs_notify(struct kobject *kobj, const char *dir, const char *attr); 245void sysfs_notify(struct kobject *kobj, const char *dir, const char *attr);
247void sysfs_notify_dirent(struct sysfs_dirent *sd);
248struct sysfs_dirent *sysfs_get_dirent_ns(struct sysfs_dirent *parent_sd,
249 const unsigned char *name,
250 const void *ns);
251struct sysfs_dirent *sysfs_get(struct sysfs_dirent *sd);
252void sysfs_put(struct sysfs_dirent *sd);
253 246
254int __must_check sysfs_init(void); 247int __must_check sysfs_init(void);
255 248
@@ -419,22 +412,6 @@ static inline void sysfs_notify(struct kobject *kobj, const char *dir,
419 const char *attr) 412 const char *attr)
420{ 413{
421} 414}
422static inline void sysfs_notify_dirent(struct sysfs_dirent *sd)
423{
424}
425static inline struct sysfs_dirent *
426sysfs_get_dirent_ns(struct sysfs_dirent *parent_sd, const unsigned char *name,
427 const void *ns)
428{
429 return NULL;
430}
431static inline struct sysfs_dirent *sysfs_get(struct sysfs_dirent *sd)
432{
433 return NULL;
434}
435static inline void sysfs_put(struct sysfs_dirent *sd)
436{
437}
438 415
439static inline int __must_check sysfs_init(void) 416static inline int __must_check sysfs_init(void)
440{ 417{
@@ -461,10 +438,26 @@ static inline int sysfs_rename_link(struct kobject *kobj, struct kobject *target
461 return sysfs_rename_link_ns(kobj, target, old_name, new_name, NULL); 438 return sysfs_rename_link_ns(kobj, target, old_name, new_name, NULL);
462} 439}
463 440
441static inline void sysfs_notify_dirent(struct sysfs_dirent *sd)
442{
443 kernfs_notify(sd);
444}
445
464static inline struct sysfs_dirent * 446static inline struct sysfs_dirent *
465sysfs_get_dirent(struct sysfs_dirent *parent_sd, const unsigned char *name) 447sysfs_get_dirent(struct sysfs_dirent *parent_sd, const unsigned char *name)
466{ 448{
467 return sysfs_get_dirent_ns(parent_sd, name, NULL); 449 return kernfs_find_and_get(parent_sd, name);
450}
451
452static inline struct sysfs_dirent *sysfs_get(struct sysfs_dirent *sd)
453{
454 kernfs_get(sd);
455 return sd;
456}
457
458static inline void sysfs_put(struct sysfs_dirent *sd)
459{
460 kernfs_put(sd);
468} 461}
469 462
470#endif /* _SYSFS_H_ */ 463#endif /* _SYSFS_H_ */
diff --git a/lib/kobject.c b/lib/kobject.c
index 5b4b8886435e..94b321f4ac67 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -18,6 +18,7 @@
18#include <linux/export.h> 18#include <linux/export.h>
19#include <linux/stat.h> 19#include <linux/stat.h>
20#include <linux/slab.h> 20#include <linux/slab.h>
21#include <linux/random.h>
21 22
22/** 23/**
23 * kobject_namespace - return @kobj's namespace tag 24 * kobject_namespace - return @kobj's namespace tag
@@ -65,13 +66,17 @@ static int populate_dir(struct kobject *kobj)
65 66
66static int create_dir(struct kobject *kobj) 67static int create_dir(struct kobject *kobj)
67{ 68{
69 const struct kobj_ns_type_operations *ops;
68 int error; 70 int error;
69 71
70 error = sysfs_create_dir_ns(kobj, kobject_namespace(kobj)); 72 error = sysfs_create_dir_ns(kobj, kobject_namespace(kobj));
71 if (!error) { 73 if (error)
72 error = populate_dir(kobj); 74 return error;
73 if (error) 75
74 sysfs_remove_dir(kobj); 76 error = populate_dir(kobj);
77 if (error) {
78 sysfs_remove_dir(kobj);
79 return error;
75 } 80 }
76 81
77 /* 82 /*
@@ -80,7 +85,20 @@ static int create_dir(struct kobject *kobj)
80 */ 85 */
81 sysfs_get(kobj->sd); 86 sysfs_get(kobj->sd);
82 87
83 return error; 88 /*
89 * If @kobj has ns_ops, its children need to be filtered based on
90 * their namespace tags. Enable namespace support on @kobj->sd.
91 */
92 ops = kobj_child_ns_ops(kobj);
93 if (ops) {
94 BUG_ON(ops->type <= KOBJ_NS_TYPE_NONE);
95 BUG_ON(ops->type >= KOBJ_NS_TYPES);
96 BUG_ON(!kobj_ns_type_registered(ops->type));
97
98 kernfs_enable_ns(kobj->sd);
99 }
100
101 return 0;
84} 102}
85 103
86static int get_kobj_path_length(struct kobject *kobj) 104static int get_kobj_path_length(struct kobject *kobj)
@@ -247,8 +265,10 @@ int kobject_set_name_vargs(struct kobject *kobj, const char *fmt,
247 return 0; 265 return 0;
248 266
249 kobj->name = kvasprintf(GFP_KERNEL, fmt, vargs); 267 kobj->name = kvasprintf(GFP_KERNEL, fmt, vargs);
250 if (!kobj->name) 268 if (!kobj->name) {
269 kobj->name = old_name;
251 return -ENOMEM; 270 return -ENOMEM;
271 }
252 272
253 /* ewww... some of these buggers have '/' in the name ... */ 273 /* ewww... some of these buggers have '/' in the name ... */
254 while ((s = strchr(kobj->name, '/'))) 274 while ((s = strchr(kobj->name, '/')))
@@ -625,10 +645,12 @@ static void kobject_release(struct kref *kref)
625{ 645{
626 struct kobject *kobj = container_of(kref, struct kobject, kref); 646 struct kobject *kobj = container_of(kref, struct kobject, kref);
627#ifdef CONFIG_DEBUG_KOBJECT_RELEASE 647#ifdef CONFIG_DEBUG_KOBJECT_RELEASE
628 pr_info("kobject: '%s' (%p): %s, parent %p (delayed)\n", 648 unsigned long delay = HZ + HZ * (get_random_int() & 0x3);
629 kobject_name(kobj), kobj, __func__, kobj->parent); 649 pr_info("kobject: '%s' (%p): %s, parent %p (delayed %ld)\n",
650 kobject_name(kobj), kobj, __func__, kobj->parent, delay);
630 INIT_DELAYED_WORK(&kobj->release, kobject_delayed_cleanup); 651 INIT_DELAYED_WORK(&kobj->release, kobject_delayed_cleanup);
631 schedule_delayed_work(&kobj->release, HZ); 652
653 schedule_delayed_work(&kobj->release, delay);
632#else 654#else
633 kobject_cleanup(kobj); 655 kobject_cleanup(kobj);
634#endif 656#endif
@@ -835,6 +857,7 @@ void kset_unregister(struct kset *k)
835{ 857{
836 if (!k) 858 if (!k)
837 return; 859 return;
860 kobject_del(&k->kobj);
838 kobject_put(&k->kobj); 861 kobject_put(&k->kobj);
839} 862}
840 863
diff --git a/samples/kobject/kset-example.c b/samples/kobject/kset-example.c
index d0c687fd9802..5dce351f131f 100644
--- a/samples/kobject/kset-example.c
+++ b/samples/kobject/kset-example.c
@@ -262,6 +262,7 @@ baz_error:
262bar_error: 262bar_error:
263 destroy_foo_obj(foo_obj); 263 destroy_foo_obj(foo_obj);
264foo_error: 264foo_error:
265 kset_unregister(example_kset);
265 return -EINVAL; 266 return -EINVAL;
266} 267}
267 268