aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-01-20 18:49:44 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2014-01-20 18:49:44 -0500
commitd3bad75a6d57416cf7478ca2a1e42f699bc17ec5 (patch)
treed79e9403e17aef5fee028fc550eec583dda38e0c
parent9f67627a0fea99b080a190d2d24cc1e2634aa2f7 (diff)
parentdb4aad209bc9aefd91f0a9aeb9e37364088b39ad (diff)
Merge tag 'driver-core-3.14-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-core
Pull driver core / sysfs patches from Greg KH: "Here's the big driver core and sysfs patch set for 3.14-rc1. There's a lot of work here moving sysfs logic out into a "kernfs" to allow other subsystems to also have a virtual filesystem with the same attributes of sysfs (handle device disconnect, dynamic creation / removal as needed / unneeded, etc) This is primarily being done for the cgroups filesystem, but the goal is to also move debugfs to it when it is ready, solving all of the known issues in that filesystem as well. The code isn't completed yet, but all should be stable now (there is a big section that was reverted due to problems found when testing) There's also some other smaller fixes, and a driver core addition that allows for a "collection" of objects, that the DRM people will be using soon (it's in this tree to make merges after -rc1 easier) All of this has been in linux-next with no reported issues" * tag 'driver-core-3.14-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-core: (113 commits) kernfs: associate a new kernfs_node with its parent on creation kernfs: add struct dentry declaration in kernfs.h kernfs: fix get_active failure handling in kernfs_seq_*() Revert "kernfs: fix get_active failure handling in kernfs_seq_*()" Revert "kernfs: replace kernfs_node->u.completion with kernfs_root->deactivate_waitq" Revert "kernfs: remove KERNFS_ACTIVE_REF and add kernfs_lockdep()" Revert "kernfs: remove KERNFS_REMOVED" Revert "kernfs: restructure removal path to fix possible premature return" Revert "kernfs: invoke kernfs_unmap_bin_file() directly from __kernfs_remove()" Revert "kernfs: remove kernfs_addrm_cxt" Revert "kernfs: make kernfs_get_active() block if the node is deactivated but not removed" Revert "kernfs: implement kernfs_{de|re}activate[_self]()" Revert "kernfs, sysfs, driver-core: implement kernfs_remove_self() and its wrappers" Revert "pci: use device_remove_file_self() instead of device_schedule_callback()" Revert "scsi: use device_remove_file_self() instead of device_schedule_callback()" Revert "s390: use device_remove_file_self() instead of device_schedule_callback()" Revert "sysfs, driver-core: remove unused {sysfs|device}_schedule_callback_owner()" Revert "kernfs: remove unnecessary NULL check in __kernfs_remove()" kernfs: remove unnecessary NULL check in __kernfs_remove() drivers/base: provide an infrastructure for componentised subsystems ...
-rw-r--r--Documentation/driver-model/design-patterns.txt116
-rw-r--r--Documentation/kobject.txt5
-rw-r--r--arch/x86/kernel/cpu/microcode/amd.c2
-rw-r--r--arch/x86/kernel/cpu/microcode/intel.c2
-rw-r--r--drivers/base/Makefile2
-rw-r--r--drivers/base/bus.c13
-rw-r--r--drivers/base/component.c382
-rw-r--r--drivers/base/core.c7
-rw-r--r--drivers/base/devtmpfs.c2
-rw-r--r--drivers/base/firmware_class.c93
-rw-r--r--drivers/firmware/dmi-sysfs.c3
-rw-r--r--drivers/gpio/gpiolib.c4
-rw-r--r--drivers/md/bitmap.c2
-rw-r--r--drivers/md/bitmap.h2
-rw-r--r--drivers/md/md.h10
-rw-r--r--drivers/misc/mic/host/mic_device.h2
-rw-r--r--fs/Makefile2
-rw-r--r--fs/kernfs/Makefile5
-rw-r--r--fs/kernfs/dir.c1073
-rw-r--r--fs/kernfs/file.c867
-rw-r--r--fs/kernfs/inode.c377
-rw-r--r--fs/kernfs/kernfs-internal.h122
-rw-r--r--fs/kernfs/mount.c165
-rw-r--r--fs/kernfs/symlink.c151
-rw-r--r--fs/namespace.c2
-rw-r--r--fs/sysfs/Makefile2
-rw-r--r--fs/sysfs/dir.c1075
-rw-r--r--fs/sysfs/file.c961
-rw-r--r--fs/sysfs/group.c102
-rw-r--r--fs/sysfs/inode.c331
-rw-r--r--fs/sysfs/mount.c184
-rw-r--r--fs/sysfs/symlink.c219
-rw-r--r--fs/sysfs/sysfs.h236
-rw-r--r--include/linux/component.h32
-rw-r--r--include/linux/firmware.h7
-rw-r--r--include/linux/kernfs.h376
-rw-r--r--include/linux/kobj_completion.h18
-rw-r--r--include/linux/kobject.h2
-rw-r--r--include/linux/memory.h1
-rw-r--r--include/linux/sysfs.h47
-rw-r--r--lib/kobject.c95
-rw-r--r--samples/kobject/kset-example.c1
42 files changed, 4186 insertions, 2914 deletions
diff --git a/Documentation/driver-model/design-patterns.txt b/Documentation/driver-model/design-patterns.txt
new file mode 100644
index 000000000000..ba7b2df64904
--- /dev/null
+++ b/Documentation/driver-model/design-patterns.txt
@@ -0,0 +1,116 @@
1
2Device Driver Design Patterns
3~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
4
5This document describes a few common design patterns found in device drivers.
6It is likely that subsystem maintainers will ask driver developers to
7conform to these design patterns.
8
91. State Container
102. container_of()
11
12
131. State Container
14~~~~~~~~~~~~~~~~~~
15
16While the kernel contains a few device drivers that assume that they will
17only be probed() once on a certain system (singletons), it is custom to assume
18that the device the driver binds to will appear in several instances. This
19means that the probe() function and all callbacks need to be reentrant.
20
21The most common way to achieve this is to use the state container design
22pattern. It usually has this form:
23
24struct foo {
25 spinlock_t lock; /* Example member */
26 (...)
27};
28
29static int foo_probe(...)
30{
31 struct foo *foo;
32
33 foo = devm_kzalloc(dev, sizeof(*foo), GFP_KERNEL);
34 if (!foo)
35 return -ENOMEM;
36 spin_lock_init(&foo->lock);
37 (...)
38}
39
40This will create an instance of struct foo in memory every time probe() is
41called. This is our state container for this instance of the device driver.
42Of course it is then necessary to always pass this instance of the
43state around to all functions that need access to the state and its members.
44
45For example, if the driver is registering an interrupt handler, you would
46pass around a pointer to struct foo like this:
47
48static irqreturn_t foo_handler(int irq, void *arg)
49{
50 struct foo *foo = arg;
51 (...)
52}
53
54static int foo_probe(...)
55{
56 struct foo *foo;
57
58 (...)
59 ret = request_irq(irq, foo_handler, 0, "foo", foo);
60}
61
62This way you always get a pointer back to the correct instance of foo in
63your interrupt handler.
64
65
662. container_of()
67~~~~~~~~~~~~~~~~~
68
69Continuing on the above example we add an offloaded work:
70
71struct foo {
72 spinlock_t lock;
73 struct workqueue_struct *wq;
74 struct work_struct offload;
75 (...)
76};
77
78static void foo_work(struct work_struct *work)
79{
80 struct foo *foo = container_of(work, struct foo, offload);
81
82 (...)
83}
84
85static irqreturn_t foo_handler(int irq, void *arg)
86{
87 struct foo *foo = arg;
88
89 queue_work(foo->wq, &foo->offload);
90 (...)
91}
92
93static int foo_probe(...)
94{
95 struct foo *foo;
96
97 foo->wq = create_singlethread_workqueue("foo-wq");
98 INIT_WORK(&foo->offload, foo_work);
99 (...)
100}
101
102The design pattern is the same for an hrtimer or something similar that will
103return a single argument which is a pointer to a struct member in the
104callback.
105
106container_of() is a macro defined in <linux/kernel.h>
107
108What container_of() does is to obtain a pointer to the containing struct from
109a pointer to a member by a simple subtraction using the offsetof() macro from
110standard C, which allows something similar to object oriented behaviours.
111Notice that the contained member must not be a pointer, but an actual member
112for this to work.
113
114We can see here that we avoid having global pointers to our struct foo *
115instance this way, while still keeping the number of parameters passed to the
116work function to a single pointer.
diff --git a/Documentation/kobject.txt b/Documentation/kobject.txt
index c5182bb2c16c..f87241dfed87 100644
--- a/Documentation/kobject.txt
+++ b/Documentation/kobject.txt
@@ -342,7 +342,10 @@ kset use:
342 342
343When you are finished with the kset, call: 343When you are finished with the kset, call:
344 void kset_unregister(struct kset *kset); 344 void kset_unregister(struct kset *kset);
345to destroy it. 345to destroy it. This removes the kset from sysfs and decrements its reference
346count. When the reference count goes to zero, the kset will be released.
347Because other references to the kset may still exist, the release may happen
348after kset_unregister() returns.
346 349
347An example of using a kset can be seen in the 350An example of using a kset can be seen in the
348samples/kobject/kset-example.c file in the kernel tree. 351samples/kobject/kset-example.c file in the kernel tree.
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index 4a6ff747aaad..8fffd845e22b 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -433,7 +433,7 @@ static enum ucode_state request_microcode_amd(int cpu, struct device *device,
433 if (c->x86 >= 0x15) 433 if (c->x86 >= 0x15)
434 snprintf(fw_name, sizeof(fw_name), "amd-ucode/microcode_amd_fam%.2xh.bin", c->x86); 434 snprintf(fw_name, sizeof(fw_name), "amd-ucode/microcode_amd_fam%.2xh.bin", c->x86);
435 435
436 if (request_firmware(&fw, (const char *)fw_name, device)) { 436 if (request_firmware_direct(&fw, (const char *)fw_name, device)) {
437 pr_debug("failed to load file %s\n", fw_name); 437 pr_debug("failed to load file %s\n", fw_name);
438 goto out; 438 goto out;
439 } 439 }
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 5fb2cebf556b..a276fa75d9b5 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -278,7 +278,7 @@ static enum ucode_state request_microcode_fw(int cpu, struct device *device,
278 sprintf(name, "intel-ucode/%02x-%02x-%02x", 278 sprintf(name, "intel-ucode/%02x-%02x-%02x",
279 c->x86, c->x86_model, c->x86_mask); 279 c->x86, c->x86_model, c->x86_mask);
280 280
281 if (request_firmware(&firmware, name, device)) { 281 if (request_firmware_direct(&firmware, name, device)) {
282 pr_debug("data file %s load failed\n", name); 282 pr_debug("data file %s load failed\n", name);
283 return UCODE_NFOUND; 283 return UCODE_NFOUND;
284 } 284 }
diff --git a/drivers/base/Makefile b/drivers/base/Makefile
index 94e8a80e87f8..870ecfd503af 100644
--- a/drivers/base/Makefile
+++ b/drivers/base/Makefile
@@ -1,6 +1,6 @@
1# Makefile for the Linux device tree 1# Makefile for the Linux device tree
2 2
3obj-y := core.o bus.o dd.o syscore.o \ 3obj-y := component.o core.o bus.o dd.o syscore.o \
4 driver.o class.o platform.o \ 4 driver.o class.o platform.o \
5 cpu.o firmware.o init.o map.o devres.o \ 5 cpu.o firmware.o init.o map.o devres.o \
6 attribute_container.o transport_class.o \ 6 attribute_container.o transport_class.o \
diff --git a/drivers/base/bus.c b/drivers/base/bus.c
index 73f6c2925281..59dc8086e4fa 100644
--- a/drivers/base/bus.c
+++ b/drivers/base/bus.c
@@ -146,8 +146,19 @@ void bus_remove_file(struct bus_type *bus, struct bus_attribute *attr)
146} 146}
147EXPORT_SYMBOL_GPL(bus_remove_file); 147EXPORT_SYMBOL_GPL(bus_remove_file);
148 148
149static void bus_release(struct kobject *kobj)
150{
151 struct subsys_private *priv =
152 container_of(kobj, typeof(*priv), subsys.kobj);
153 struct bus_type *bus = priv->bus;
154
155 kfree(priv);
156 bus->p = NULL;
157}
158
149static struct kobj_type bus_ktype = { 159static struct kobj_type bus_ktype = {
150 .sysfs_ops = &bus_sysfs_ops, 160 .sysfs_ops = &bus_sysfs_ops,
161 .release = bus_release,
151}; 162};
152 163
153static int bus_uevent_filter(struct kset *kset, struct kobject *kobj) 164static int bus_uevent_filter(struct kset *kset, struct kobject *kobj)
@@ -953,8 +964,6 @@ void bus_unregister(struct bus_type *bus)
953 kset_unregister(bus->p->devices_kset); 964 kset_unregister(bus->p->devices_kset);
954 bus_remove_file(bus, &bus_attr_uevent); 965 bus_remove_file(bus, &bus_attr_uevent);
955 kset_unregister(&bus->p->subsys); 966 kset_unregister(&bus->p->subsys);
956 kfree(bus->p);
957 bus->p = NULL;
958} 967}
959EXPORT_SYMBOL_GPL(bus_unregister); 968EXPORT_SYMBOL_GPL(bus_unregister);
960 969
diff --git a/drivers/base/component.c b/drivers/base/component.c
new file mode 100644
index 000000000000..c53efe6c6d8e
--- /dev/null
+++ b/drivers/base/component.c
@@ -0,0 +1,382 @@
1/*
2 * Componentized device handling.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * This is work in progress. We gather up the component devices into a list,
9 * and bind them when instructed. At the moment, we're specific to the DRM
10 * subsystem, and only handles one master device, but this doesn't have to be
11 * the case.
12 */
13#include <linux/component.h>
14#include <linux/device.h>
15#include <linux/kref.h>
16#include <linux/list.h>
17#include <linux/module.h>
18#include <linux/mutex.h>
19#include <linux/slab.h>
20
21struct master {
22 struct list_head node;
23 struct list_head components;
24 bool bound;
25
26 const struct component_master_ops *ops;
27 struct device *dev;
28};
29
30struct component {
31 struct list_head node;
32 struct list_head master_node;
33 struct master *master;
34 bool bound;
35
36 const struct component_ops *ops;
37 struct device *dev;
38};
39
40static DEFINE_MUTEX(component_mutex);
41static LIST_HEAD(component_list);
42static LIST_HEAD(masters);
43
44static struct master *__master_find(struct device *dev,
45 const struct component_master_ops *ops)
46{
47 struct master *m;
48
49 list_for_each_entry(m, &masters, node)
50 if (m->dev == dev && (!ops || m->ops == ops))
51 return m;
52
53 return NULL;
54}
55
56/* Attach an unattached component to a master. */
57static void component_attach_master(struct master *master, struct component *c)
58{
59 c->master = master;
60
61 list_add_tail(&c->master_node, &master->components);
62}
63
64/* Detach a component from a master. */
65static void component_detach_master(struct master *master, struct component *c)
66{
67 list_del(&c->master_node);
68
69 c->master = NULL;
70}
71
72int component_master_add_child(struct master *master,
73 int (*compare)(struct device *, void *), void *compare_data)
74{
75 struct component *c;
76 int ret = -ENXIO;
77
78 list_for_each_entry(c, &component_list, node) {
79 if (c->master)
80 continue;
81
82 if (compare(c->dev, compare_data)) {
83 component_attach_master(master, c);
84 ret = 0;
85 break;
86 }
87 }
88
89 return ret;
90}
91EXPORT_SYMBOL_GPL(component_master_add_child);
92
93/* Detach all attached components from this master */
94static void master_remove_components(struct master *master)
95{
96 while (!list_empty(&master->components)) {
97 struct component *c = list_first_entry(&master->components,
98 struct component, master_node);
99
100 WARN_ON(c->master != master);
101
102 component_detach_master(master, c);
103 }
104}
105
106/*
107 * Try to bring up a master. If component is NULL, we're interested in
108 * this master, otherwise it's a component which must be present to try
109 * and bring up the master.
110 *
111 * Returns 1 for successful bringup, 0 if not ready, or -ve errno.
112 */
113static int try_to_bring_up_master(struct master *master,
114 struct component *component)
115{
116 int ret = 0;
117
118 if (!master->bound) {
119 /*
120 * Search the list of components, looking for components that
121 * belong to this master, and attach them to the master.
122 */
123 if (master->ops->add_components(master->dev, master)) {
124 /* Failed to find all components */
125 master_remove_components(master);
126 ret = 0;
127 goto out;
128 }
129
130 if (component && component->master != master) {
131 master_remove_components(master);
132 ret = 0;
133 goto out;
134 }
135
136 /* Found all components */
137 ret = master->ops->bind(master->dev);
138 if (ret < 0) {
139 master_remove_components(master);
140 goto out;
141 }
142
143 master->bound = true;
144 ret = 1;
145 }
146out:
147
148 return ret;
149}
150
151static int try_to_bring_up_masters(struct component *component)
152{
153 struct master *m;
154 int ret = 0;
155
156 list_for_each_entry(m, &masters, node) {
157 ret = try_to_bring_up_master(m, component);
158 if (ret != 0)
159 break;
160 }
161
162 return ret;
163}
164
165static void take_down_master(struct master *master)
166{
167 if (master->bound) {
168 master->ops->unbind(master->dev);
169 master->bound = false;
170 }
171
172 master_remove_components(master);
173}
174
175int component_master_add(struct device *dev,
176 const struct component_master_ops *ops)
177{
178 struct master *master;
179 int ret;
180
181 master = kzalloc(sizeof(*master), GFP_KERNEL);
182 if (!master)
183 return -ENOMEM;
184
185 master->dev = dev;
186 master->ops = ops;
187 INIT_LIST_HEAD(&master->components);
188
189 /* Add to the list of available masters. */
190 mutex_lock(&component_mutex);
191 list_add(&master->node, &masters);
192
193 ret = try_to_bring_up_master(master, NULL);
194
195 if (ret < 0) {
196 /* Delete off the list if we weren't successful */
197 list_del(&master->node);
198 kfree(master);
199 }
200 mutex_unlock(&component_mutex);
201
202 return ret < 0 ? ret : 0;
203}
204EXPORT_SYMBOL_GPL(component_master_add);
205
206void component_master_del(struct device *dev,
207 const struct component_master_ops *ops)
208{
209 struct master *master;
210
211 mutex_lock(&component_mutex);
212 master = __master_find(dev, ops);
213 if (master) {
214 take_down_master(master);
215
216 list_del(&master->node);
217 kfree(master);
218 }
219 mutex_unlock(&component_mutex);
220}
221EXPORT_SYMBOL_GPL(component_master_del);
222
223static void component_unbind(struct component *component,
224 struct master *master, void *data)
225{
226 WARN_ON(!component->bound);
227
228 component->ops->unbind(component->dev, master->dev, data);
229 component->bound = false;
230
231 /* Release all resources claimed in the binding of this component */
232 devres_release_group(component->dev, component);
233}
234
235void component_unbind_all(struct device *master_dev, void *data)
236{
237 struct master *master;
238 struct component *c;
239
240 WARN_ON(!mutex_is_locked(&component_mutex));
241
242 master = __master_find(master_dev, NULL);
243 if (!master)
244 return;
245
246 list_for_each_entry_reverse(c, &master->components, master_node)
247 component_unbind(c, master, data);
248}
249EXPORT_SYMBOL_GPL(component_unbind_all);
250
251static int component_bind(struct component *component, struct master *master,
252 void *data)
253{
254 int ret;
255
256 /*
257 * Each component initialises inside its own devres group.
258 * This allows us to roll-back a failed component without
259 * affecting anything else.
260 */
261 if (!devres_open_group(master->dev, NULL, GFP_KERNEL))
262 return -ENOMEM;
263
264 /*
265 * Also open a group for the device itself: this allows us
266 * to release the resources claimed against the sub-device
267 * at the appropriate moment.
268 */
269 if (!devres_open_group(component->dev, component, GFP_KERNEL)) {
270 devres_release_group(master->dev, NULL);
271 return -ENOMEM;
272 }
273
274 dev_dbg(master->dev, "binding %s (ops %ps)\n",
275 dev_name(component->dev), component->ops);
276
277 ret = component->ops->bind(component->dev, master->dev, data);
278 if (!ret) {
279 component->bound = true;
280
281 /*
282 * Close the component device's group so that resources
283 * allocated in the binding are encapsulated for removal
284 * at unbind. Remove the group on the DRM device as we
285 * can clean those resources up independently.
286 */
287 devres_close_group(component->dev, NULL);
288 devres_remove_group(master->dev, NULL);
289
290 dev_info(master->dev, "bound %s (ops %ps)\n",
291 dev_name(component->dev), component->ops);
292 } else {
293 devres_release_group(component->dev, NULL);
294 devres_release_group(master->dev, NULL);
295
296 dev_err(master->dev, "failed to bind %s (ops %ps): %d\n",
297 dev_name(component->dev), component->ops, ret);
298 }
299
300 return ret;
301}
302
303int component_bind_all(struct device *master_dev, void *data)
304{
305 struct master *master;
306 struct component *c;
307 int ret = 0;
308
309 WARN_ON(!mutex_is_locked(&component_mutex));
310
311 master = __master_find(master_dev, NULL);
312 if (!master)
313 return -EINVAL;
314
315 list_for_each_entry(c, &master->components, master_node) {
316 ret = component_bind(c, master, data);
317 if (ret)
318 break;
319 }
320
321 if (ret != 0) {
322 list_for_each_entry_continue_reverse(c, &master->components,
323 master_node)
324 component_unbind(c, master, data);
325 }
326
327 return ret;
328}
329EXPORT_SYMBOL_GPL(component_bind_all);
330
331int component_add(struct device *dev, const struct component_ops *ops)
332{
333 struct component *component;
334 int ret;
335
336 component = kzalloc(sizeof(*component), GFP_KERNEL);
337 if (!component)
338 return -ENOMEM;
339
340 component->ops = ops;
341 component->dev = dev;
342
343 dev_dbg(dev, "adding component (ops %ps)\n", ops);
344
345 mutex_lock(&component_mutex);
346 list_add_tail(&component->node, &component_list);
347
348 ret = try_to_bring_up_masters(component);
349 if (ret < 0) {
350 list_del(&component->node);
351
352 kfree(component);
353 }
354 mutex_unlock(&component_mutex);
355
356 return ret < 0 ? ret : 0;
357}
358EXPORT_SYMBOL_GPL(component_add);
359
360void component_del(struct device *dev, const struct component_ops *ops)
361{
362 struct component *c, *component = NULL;
363
364 mutex_lock(&component_mutex);
365 list_for_each_entry(c, &component_list, node)
366 if (c->dev == dev && c->ops == ops) {
367 list_del(&c->node);
368 component = c;
369 break;
370 }
371
372 if (component && component->master)
373 take_down_master(component->master);
374
375 mutex_unlock(&component_mutex);
376
377 WARN_ON(!component);
378 kfree(component);
379}
380EXPORT_SYMBOL_GPL(component_del);
381
382MODULE_LICENSE("GPL v2");
diff --git a/drivers/base/core.c b/drivers/base/core.c
index 67b180d855b2..2b567177ef78 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -491,11 +491,13 @@ static int device_add_attrs(struct device *dev)
491 if (device_supports_offline(dev) && !dev->offline_disabled) { 491 if (device_supports_offline(dev) && !dev->offline_disabled) {
492 error = device_create_file(dev, &dev_attr_online); 492 error = device_create_file(dev, &dev_attr_online);
493 if (error) 493 if (error)
494 goto err_remove_type_groups; 494 goto err_remove_dev_groups;
495 } 495 }
496 496
497 return 0; 497 return 0;
498 498
499 err_remove_dev_groups:
500 device_remove_groups(dev, dev->groups);
499 err_remove_type_groups: 501 err_remove_type_groups:
500 if (type) 502 if (type)
501 device_remove_groups(dev, type->groups); 503 device_remove_groups(dev, type->groups);
@@ -1603,6 +1605,7 @@ device_create_groups_vargs(struct class *class, struct device *parent,
1603 goto error; 1605 goto error;
1604 } 1606 }
1605 1607
1608 device_initialize(dev);
1606 dev->devt = devt; 1609 dev->devt = devt;
1607 dev->class = class; 1610 dev->class = class;
1608 dev->parent = parent; 1611 dev->parent = parent;
@@ -1614,7 +1617,7 @@ device_create_groups_vargs(struct class *class, struct device *parent,
1614 if (retval) 1617 if (retval)
1615 goto error; 1618 goto error;
1616 1619
1617 retval = device_register(dev); 1620 retval = device_add(dev);
1618 if (retval) 1621 if (retval)
1619 goto error; 1622 goto error;
1620 1623
diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c
index 0f3820121e02..25798db14553 100644
--- a/drivers/base/devtmpfs.c
+++ b/drivers/base/devtmpfs.c
@@ -299,7 +299,7 @@ static int handle_remove(const char *nodename, struct device *dev)
299{ 299{
300 struct path parent; 300 struct path parent;
301 struct dentry *dentry; 301 struct dentry *dentry;
302 int deleted = 1; 302 int deleted = 0;
303 int err; 303 int err;
304 304
305 dentry = kern_path_locked(nodename, &parent); 305 dentry = kern_path_locked(nodename, &parent);
diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c
index eb8fb94ae2c5..8a97ddfa6122 100644
--- a/drivers/base/firmware_class.c
+++ b/drivers/base/firmware_class.c
@@ -96,6 +96,15 @@ static inline long firmware_loading_timeout(void)
96 return loading_timeout > 0 ? loading_timeout * HZ : MAX_SCHEDULE_TIMEOUT; 96 return loading_timeout > 0 ? loading_timeout * HZ : MAX_SCHEDULE_TIMEOUT;
97} 97}
98 98
99/* firmware behavior options */
100#define FW_OPT_UEVENT (1U << 0)
101#define FW_OPT_NOWAIT (1U << 1)
102#ifdef CONFIG_FW_LOADER_USER_HELPER
103#define FW_OPT_FALLBACK (1U << 2)
104#else
105#define FW_OPT_FALLBACK 0
106#endif
107
99struct firmware_cache { 108struct firmware_cache {
100 /* firmware_buf instance will be added into the below list */ 109 /* firmware_buf instance will be added into the below list */
101 spinlock_t lock; 110 spinlock_t lock;
@@ -219,6 +228,7 @@ static int fw_lookup_and_allocate_buf(const char *fw_name,
219} 228}
220 229
221static void __fw_free_buf(struct kref *ref) 230static void __fw_free_buf(struct kref *ref)
231 __releases(&fwc->lock)
222{ 232{
223 struct firmware_buf *buf = to_fwbuf(ref); 233 struct firmware_buf *buf = to_fwbuf(ref);
224 struct firmware_cache *fwc = buf->fwc; 234 struct firmware_cache *fwc = buf->fwc;
@@ -270,21 +280,21 @@ module_param_string(path, fw_path_para, sizeof(fw_path_para), 0644);
270MODULE_PARM_DESC(path, "customized firmware image search path with a higher priority than default path"); 280MODULE_PARM_DESC(path, "customized firmware image search path with a higher priority than default path");
271 281
272/* Don't inline this: 'struct kstat' is biggish */ 282/* Don't inline this: 'struct kstat' is biggish */
273static noinline_for_stack long fw_file_size(struct file *file) 283static noinline_for_stack int fw_file_size(struct file *file)
274{ 284{
275 struct kstat st; 285 struct kstat st;
276 if (vfs_getattr(&file->f_path, &st)) 286 if (vfs_getattr(&file->f_path, &st))
277 return -1; 287 return -1;
278 if (!S_ISREG(st.mode)) 288 if (!S_ISREG(st.mode))
279 return -1; 289 return -1;
280 if (st.size != (long)st.size) 290 if (st.size != (int)st.size)
281 return -1; 291 return -1;
282 return st.size; 292 return st.size;
283} 293}
284 294
285static int fw_read_file_contents(struct file *file, struct firmware_buf *fw_buf) 295static int fw_read_file_contents(struct file *file, struct firmware_buf *fw_buf)
286{ 296{
287 long size; 297 int size;
288 char *buf; 298 char *buf;
289 int rc; 299 int rc;
290 300
@@ -820,7 +830,7 @@ static void firmware_class_timeout_work(struct work_struct *work)
820 830
821static struct firmware_priv * 831static struct firmware_priv *
822fw_create_instance(struct firmware *firmware, const char *fw_name, 832fw_create_instance(struct firmware *firmware, const char *fw_name,
823 struct device *device, bool uevent, bool nowait) 833 struct device *device, unsigned int opt_flags)
824{ 834{
825 struct firmware_priv *fw_priv; 835 struct firmware_priv *fw_priv;
826 struct device *f_dev; 836 struct device *f_dev;
@@ -832,7 +842,7 @@ fw_create_instance(struct firmware *firmware, const char *fw_name,
832 goto exit; 842 goto exit;
833 } 843 }
834 844
835 fw_priv->nowait = nowait; 845 fw_priv->nowait = !!(opt_flags & FW_OPT_NOWAIT);
836 fw_priv->fw = firmware; 846 fw_priv->fw = firmware;
837 INIT_DELAYED_WORK(&fw_priv->timeout_work, 847 INIT_DELAYED_WORK(&fw_priv->timeout_work,
838 firmware_class_timeout_work); 848 firmware_class_timeout_work);
@@ -848,8 +858,8 @@ exit:
848} 858}
849 859
850/* load a firmware via user helper */ 860/* load a firmware via user helper */
851static int _request_firmware_load(struct firmware_priv *fw_priv, bool uevent, 861static int _request_firmware_load(struct firmware_priv *fw_priv,
852 long timeout) 862 unsigned int opt_flags, long timeout)
853{ 863{
854 int retval = 0; 864 int retval = 0;
855 struct device *f_dev = &fw_priv->dev; 865 struct device *f_dev = &fw_priv->dev;
@@ -885,7 +895,7 @@ static int _request_firmware_load(struct firmware_priv *fw_priv, bool uevent,
885 goto err_del_bin_attr; 895 goto err_del_bin_attr;
886 } 896 }
887 897
888 if (uevent) { 898 if (opt_flags & FW_OPT_UEVENT) {
889 buf->need_uevent = true; 899 buf->need_uevent = true;
890 dev_set_uevent_suppress(f_dev, false); 900 dev_set_uevent_suppress(f_dev, false);
891 dev_dbg(f_dev, "firmware: requesting %s\n", buf->fw_id); 901 dev_dbg(f_dev, "firmware: requesting %s\n", buf->fw_id);
@@ -911,16 +921,16 @@ err_put_dev:
911 921
912static int fw_load_from_user_helper(struct firmware *firmware, 922static int fw_load_from_user_helper(struct firmware *firmware,
913 const char *name, struct device *device, 923 const char *name, struct device *device,
914 bool uevent, bool nowait, long timeout) 924 unsigned int opt_flags, long timeout)
915{ 925{
916 struct firmware_priv *fw_priv; 926 struct firmware_priv *fw_priv;
917 927
918 fw_priv = fw_create_instance(firmware, name, device, uevent, nowait); 928 fw_priv = fw_create_instance(firmware, name, device, opt_flags);
919 if (IS_ERR(fw_priv)) 929 if (IS_ERR(fw_priv))
920 return PTR_ERR(fw_priv); 930 return PTR_ERR(fw_priv);
921 931
922 fw_priv->buf = firmware->priv; 932 fw_priv->buf = firmware->priv;
923 return _request_firmware_load(fw_priv, uevent, timeout); 933 return _request_firmware_load(fw_priv, opt_flags, timeout);
924} 934}
925 935
926#ifdef CONFIG_PM_SLEEP 936#ifdef CONFIG_PM_SLEEP
@@ -942,7 +952,7 @@ static void kill_requests_without_uevent(void)
942#else /* CONFIG_FW_LOADER_USER_HELPER */ 952#else /* CONFIG_FW_LOADER_USER_HELPER */
943static inline int 953static inline int
944fw_load_from_user_helper(struct firmware *firmware, const char *name, 954fw_load_from_user_helper(struct firmware *firmware, const char *name,
945 struct device *device, bool uevent, bool nowait, 955 struct device *device, unsigned int opt_flags,
946 long timeout) 956 long timeout)
947{ 957{
948 return -ENOENT; 958 return -ENOENT;
@@ -1023,7 +1033,7 @@ _request_firmware_prepare(struct firmware **firmware_p, const char *name,
1023} 1033}
1024 1034
1025static int assign_firmware_buf(struct firmware *fw, struct device *device, 1035static int assign_firmware_buf(struct firmware *fw, struct device *device,
1026 bool skip_cache) 1036 unsigned int opt_flags)
1027{ 1037{
1028 struct firmware_buf *buf = fw->priv; 1038 struct firmware_buf *buf = fw->priv;
1029 1039
@@ -1040,7 +1050,8 @@ static int assign_firmware_buf(struct firmware *fw, struct device *device,
1040 * device may has been deleted already, but the problem 1050 * device may has been deleted already, but the problem
1041 * should be fixed in devres or driver core. 1051 * should be fixed in devres or driver core.
1042 */ 1052 */
1043 if (device && !skip_cache) 1053 /* don't cache firmware handled without uevent */
1054 if (device && (opt_flags & FW_OPT_UEVENT))
1044 fw_add_devm_name(device, buf->fw_id); 1055 fw_add_devm_name(device, buf->fw_id);
1045 1056
1046 /* 1057 /*
@@ -1061,7 +1072,7 @@ static int assign_firmware_buf(struct firmware *fw, struct device *device,
1061/* called from request_firmware() and request_firmware_work_func() */ 1072/* called from request_firmware() and request_firmware_work_func() */
1062static int 1073static int
1063_request_firmware(const struct firmware **firmware_p, const char *name, 1074_request_firmware(const struct firmware **firmware_p, const char *name,
1064 struct device *device, bool uevent, bool nowait) 1075 struct device *device, unsigned int opt_flags)
1065{ 1076{
1066 struct firmware *fw; 1077 struct firmware *fw;
1067 long timeout; 1078 long timeout;
@@ -1076,7 +1087,7 @@ _request_firmware(const struct firmware **firmware_p, const char *name,
1076 1087
1077 ret = 0; 1088 ret = 0;
1078 timeout = firmware_loading_timeout(); 1089 timeout = firmware_loading_timeout();
1079 if (nowait) { 1090 if (opt_flags & FW_OPT_NOWAIT) {
1080 timeout = usermodehelper_read_lock_wait(timeout); 1091 timeout = usermodehelper_read_lock_wait(timeout);
1081 if (!timeout) { 1092 if (!timeout) {
1082 dev_dbg(device, "firmware: %s loading timed out\n", 1093 dev_dbg(device, "firmware: %s loading timed out\n",
@@ -1095,16 +1106,18 @@ _request_firmware(const struct firmware **firmware_p, const char *name,
1095 1106
1096 ret = fw_get_filesystem_firmware(device, fw->priv); 1107 ret = fw_get_filesystem_firmware(device, fw->priv);
1097 if (ret) { 1108 if (ret) {
1098 dev_warn(device, "Direct firmware load failed with error %d\n", 1109 if (opt_flags & FW_OPT_FALLBACK) {
1099 ret); 1110 dev_warn(device,
1100 dev_warn(device, "Falling back to user helper\n"); 1111 "Direct firmware load failed with error %d\n",
1101 ret = fw_load_from_user_helper(fw, name, device, 1112 ret);
1102 uevent, nowait, timeout); 1113 dev_warn(device, "Falling back to user helper\n");
1114 ret = fw_load_from_user_helper(fw, name, device,
1115 opt_flags, timeout);
1116 }
1103 } 1117 }
1104 1118
1105 /* don't cache firmware handled without uevent */
1106 if (!ret) 1119 if (!ret)
1107 ret = assign_firmware_buf(fw, device, !uevent); 1120 ret = assign_firmware_buf(fw, device, opt_flags);
1108 1121
1109 usermodehelper_read_unlock(); 1122 usermodehelper_read_unlock();
1110 1123
@@ -1146,12 +1159,37 @@ request_firmware(const struct firmware **firmware_p, const char *name,
1146 1159
1147 /* Need to pin this module until return */ 1160 /* Need to pin this module until return */
1148 __module_get(THIS_MODULE); 1161 __module_get(THIS_MODULE);
1149 ret = _request_firmware(firmware_p, name, device, true, false); 1162 ret = _request_firmware(firmware_p, name, device,
1163 FW_OPT_UEVENT | FW_OPT_FALLBACK);
1150 module_put(THIS_MODULE); 1164 module_put(THIS_MODULE);
1151 return ret; 1165 return ret;
1152} 1166}
1153EXPORT_SYMBOL(request_firmware); 1167EXPORT_SYMBOL(request_firmware);
1154 1168
1169#ifdef CONFIG_FW_LOADER_USER_HELPER
1170/**
1171 * request_firmware: - load firmware directly without usermode helper
1172 * @firmware_p: pointer to firmware image
1173 * @name: name of firmware file
1174 * @device: device for which firmware is being loaded
1175 *
1176 * This function works pretty much like request_firmware(), but this doesn't
1177 * fall back to usermode helper even if the firmware couldn't be loaded
1178 * directly from fs. Hence it's useful for loading optional firmwares, which
1179 * aren't always present, without extra long timeouts of udev.
1180 **/
1181int request_firmware_direct(const struct firmware **firmware_p,
1182 const char *name, struct device *device)
1183{
1184 int ret;
1185 __module_get(THIS_MODULE);
1186 ret = _request_firmware(firmware_p, name, device, FW_OPT_UEVENT);
1187 module_put(THIS_MODULE);
1188 return ret;
1189}
1190EXPORT_SYMBOL_GPL(request_firmware_direct);
1191#endif
1192
1155/** 1193/**
1156 * release_firmware: - release the resource associated with a firmware image 1194 * release_firmware: - release the resource associated with a firmware image
1157 * @fw: firmware resource to release 1195 * @fw: firmware resource to release
@@ -1174,7 +1212,7 @@ struct firmware_work {
1174 struct device *device; 1212 struct device *device;
1175 void *context; 1213 void *context;
1176 void (*cont)(const struct firmware *fw, void *context); 1214 void (*cont)(const struct firmware *fw, void *context);
1177 bool uevent; 1215 unsigned int opt_flags;
1178}; 1216};
1179 1217
1180static void request_firmware_work_func(struct work_struct *work) 1218static void request_firmware_work_func(struct work_struct *work)
@@ -1185,7 +1223,7 @@ static void request_firmware_work_func(struct work_struct *work)
1185 fw_work = container_of(work, struct firmware_work, work); 1223 fw_work = container_of(work, struct firmware_work, work);
1186 1224
1187 _request_firmware(&fw, fw_work->name, fw_work->device, 1225 _request_firmware(&fw, fw_work->name, fw_work->device,
1188 fw_work->uevent, true); 1226 fw_work->opt_flags);
1189 fw_work->cont(fw, fw_work->context); 1227 fw_work->cont(fw, fw_work->context);
1190 put_device(fw_work->device); /* taken in request_firmware_nowait() */ 1228 put_device(fw_work->device); /* taken in request_firmware_nowait() */
1191 1229
@@ -1233,7 +1271,8 @@ request_firmware_nowait(
1233 fw_work->device = device; 1271 fw_work->device = device;
1234 fw_work->context = context; 1272 fw_work->context = context;
1235 fw_work->cont = cont; 1273 fw_work->cont = cont;
1236 fw_work->uevent = uevent; 1274 fw_work->opt_flags = FW_OPT_NOWAIT | FW_OPT_FALLBACK |
1275 (uevent ? FW_OPT_UEVENT : 0);
1237 1276
1238 if (!try_module_get(module)) { 1277 if (!try_module_get(module)) {
1239 kfree(fw_work); 1278 kfree(fw_work);
diff --git a/drivers/firmware/dmi-sysfs.c b/drivers/firmware/dmi-sysfs.c
index eb26d62e5188..e0f1cb3d3598 100644
--- a/drivers/firmware/dmi-sysfs.c
+++ b/drivers/firmware/dmi-sysfs.c
@@ -553,7 +553,7 @@ static const struct bin_attribute dmi_entry_raw_attr = {
553static void dmi_sysfs_entry_release(struct kobject *kobj) 553static void dmi_sysfs_entry_release(struct kobject *kobj)
554{ 554{
555 struct dmi_sysfs_entry *entry = to_entry(kobj); 555 struct dmi_sysfs_entry *entry = to_entry(kobj);
556 sysfs_remove_bin_file(&entry->kobj, &dmi_entry_raw_attr); 556
557 spin_lock(&entry_list_lock); 557 spin_lock(&entry_list_lock);
558 list_del(&entry->list); 558 list_del(&entry->list);
559 spin_unlock(&entry_list_lock); 559 spin_unlock(&entry_list_lock);
@@ -685,6 +685,7 @@ static void __exit dmi_sysfs_exit(void)
685 pr_debug("dmi-sysfs: unloading.\n"); 685 pr_debug("dmi-sysfs: unloading.\n");
686 cleanup_entry_list(); 686 cleanup_entry_list();
687 kset_unregister(dmi_kset); 687 kset_unregister(dmi_kset);
688 kobject_del(dmi_kobj);
688 kobject_put(dmi_kobj); 689 kobject_put(dmi_kobj);
689} 690}
690 691
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 85f772c0b26a..c8a7c810bade 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -393,7 +393,7 @@ static const DEVICE_ATTR(value, 0644,
393 393
394static irqreturn_t gpio_sysfs_irq(int irq, void *priv) 394static irqreturn_t gpio_sysfs_irq(int irq, void *priv)
395{ 395{
396 struct sysfs_dirent *value_sd = priv; 396 struct kernfs_node *value_sd = priv;
397 397
398 sysfs_notify_dirent(value_sd); 398 sysfs_notify_dirent(value_sd);
399 return IRQ_HANDLED; 399 return IRQ_HANDLED;
@@ -402,7 +402,7 @@ static irqreturn_t gpio_sysfs_irq(int irq, void *priv)
402static int gpio_setup_irq(struct gpio_desc *desc, struct device *dev, 402static int gpio_setup_irq(struct gpio_desc *desc, struct device *dev,
403 unsigned long gpio_flags) 403 unsigned long gpio_flags)
404{ 404{
405 struct sysfs_dirent *value_sd; 405 struct kernfs_node *value_sd;
406 unsigned long irq_flags; 406 unsigned long irq_flags;
407 int ret, irq, id; 407 int ret, irq, id;
408 408
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index 12dc29ba7399..4195a01b1535 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -1635,7 +1635,7 @@ int bitmap_create(struct mddev *mddev)
1635 sector_t blocks = mddev->resync_max_sectors; 1635 sector_t blocks = mddev->resync_max_sectors;
1636 struct file *file = mddev->bitmap_info.file; 1636 struct file *file = mddev->bitmap_info.file;
1637 int err; 1637 int err;
1638 struct sysfs_dirent *bm = NULL; 1638 struct kernfs_node *bm = NULL;
1639 1639
1640 BUILD_BUG_ON(sizeof(bitmap_super_t) != 256); 1640 BUILD_BUG_ON(sizeof(bitmap_super_t) != 256);
1641 1641
diff --git a/drivers/md/bitmap.h b/drivers/md/bitmap.h
index df4aeb6ac6f0..30210b9c4ef9 100644
--- a/drivers/md/bitmap.h
+++ b/drivers/md/bitmap.h
@@ -225,7 +225,7 @@ struct bitmap {
225 wait_queue_head_t overflow_wait; 225 wait_queue_head_t overflow_wait;
226 wait_queue_head_t behind_wait; 226 wait_queue_head_t behind_wait;
227 227
228 struct sysfs_dirent *sysfs_can_clear; 228 struct kernfs_node *sysfs_can_clear;
229}; 229};
230 230
231/* the bitmap API */ 231/* the bitmap API */
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 0095ec84ffc7..07bba96de260 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -106,7 +106,7 @@ struct md_rdev {
106 */ 106 */
107 struct work_struct del_work; /* used for delayed sysfs removal */ 107 struct work_struct del_work; /* used for delayed sysfs removal */
108 108
109 struct sysfs_dirent *sysfs_state; /* handle for 'state' 109 struct kernfs_node *sysfs_state; /* handle for 'state'
110 * sysfs entry */ 110 * sysfs entry */
111 111
112 struct badblocks { 112 struct badblocks {
@@ -379,10 +379,10 @@ struct mddev {
379 sector_t resync_max; /* resync should pause 379 sector_t resync_max; /* resync should pause
380 * when it gets here */ 380 * when it gets here */
381 381
382 struct sysfs_dirent *sysfs_state; /* handle for 'array_state' 382 struct kernfs_node *sysfs_state; /* handle for 'array_state'
383 * file in sysfs. 383 * file in sysfs.
384 */ 384 */
385 struct sysfs_dirent *sysfs_action; /* handle for 'sync_action' */ 385 struct kernfs_node *sysfs_action; /* handle for 'sync_action' */
386 386
387 struct work_struct del_work; /* used for delayed sysfs removal */ 387 struct work_struct del_work; /* used for delayed sysfs removal */
388 388
@@ -501,13 +501,13 @@ struct md_sysfs_entry {
501}; 501};
502extern struct attribute_group md_bitmap_group; 502extern struct attribute_group md_bitmap_group;
503 503
504static inline struct sysfs_dirent *sysfs_get_dirent_safe(struct sysfs_dirent *sd, char *name) 504static inline struct kernfs_node *sysfs_get_dirent_safe(struct kernfs_node *sd, char *name)
505{ 505{
506 if (sd) 506 if (sd)
507 return sysfs_get_dirent(sd, name); 507 return sysfs_get_dirent(sd, name);
508 return sd; 508 return sd;
509} 509}
510static inline void sysfs_notify_dirent_safe(struct sysfs_dirent *sd) 510static inline void sysfs_notify_dirent_safe(struct kernfs_node *sd)
511{ 511{
512 if (sd) 512 if (sd)
513 sysfs_notify_dirent(sd); 513 sysfs_notify_dirent(sd);
diff --git a/drivers/misc/mic/host/mic_device.h b/drivers/misc/mic/host/mic_device.h
index b2da289320c9..1a6edce2ecde 100644
--- a/drivers/misc/mic/host/mic_device.h
+++ b/drivers/misc/mic/host/mic_device.h
@@ -112,7 +112,7 @@ struct mic_device {
112 struct work_struct shutdown_work; 112 struct work_struct shutdown_work;
113 u8 state; 113 u8 state;
114 u8 shutdown_status; 114 u8 shutdown_status;
115 struct sysfs_dirent *state_sysfs; 115 struct kernfs_node *state_sysfs;
116 struct completion reset_wait; 116 struct completion reset_wait;
117 void *log_buf_addr; 117 void *log_buf_addr;
118 int *log_buf_len; 118 int *log_buf_len;
diff --git a/fs/Makefile b/fs/Makefile
index 4fe6df3ec28f..39a824f44e7c 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -53,7 +53,7 @@ obj-$(CONFIG_FHANDLE) += fhandle.o
53obj-y += quota/ 53obj-y += quota/
54 54
55obj-$(CONFIG_PROC_FS) += proc/ 55obj-$(CONFIG_PROC_FS) += proc/
56obj-$(CONFIG_SYSFS) += sysfs/ 56obj-$(CONFIG_SYSFS) += sysfs/ kernfs/
57obj-$(CONFIG_CONFIGFS_FS) += configfs/ 57obj-$(CONFIG_CONFIGFS_FS) += configfs/
58obj-y += devpts/ 58obj-y += devpts/
59 59
diff --git a/fs/kernfs/Makefile b/fs/kernfs/Makefile
new file mode 100644
index 000000000000..674337c76673
--- /dev/null
+++ b/fs/kernfs/Makefile
@@ -0,0 +1,5 @@
1#
2# Makefile for the kernfs pseudo filesystem
3#
4
5obj-y := mount.o inode.o dir.o file.o symlink.o
diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
new file mode 100644
index 000000000000..5104cf5d25c5
--- /dev/null
+++ b/fs/kernfs/dir.c
@@ -0,0 +1,1073 @@
1/*
2 * fs/kernfs/dir.c - kernfs directory implementation
3 *
4 * Copyright (c) 2001-3 Patrick Mochel
5 * Copyright (c) 2007 SUSE Linux Products GmbH
6 * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org>
7 *
8 * This file is released under the GPLv2.
9 */
10
11#include <linux/fs.h>
12#include <linux/namei.h>
13#include <linux/idr.h>
14#include <linux/slab.h>
15#include <linux/security.h>
16#include <linux/hash.h>
17
18#include "kernfs-internal.h"
19
20DEFINE_MUTEX(kernfs_mutex);
21
22#define rb_to_kn(X) rb_entry((X), struct kernfs_node, rb)
23
24/**
25 * kernfs_name_hash
26 * @name: Null terminated string to hash
27 * @ns: Namespace tag to hash
28 *
29 * Returns 31 bit hash of ns + name (so it fits in an off_t )
30 */
31static unsigned int kernfs_name_hash(const char *name, const void *ns)
32{
33 unsigned long hash = init_name_hash();
34 unsigned int len = strlen(name);
35 while (len--)
36 hash = partial_name_hash(*name++, hash);
37 hash = (end_name_hash(hash) ^ hash_ptr((void *)ns, 31));
38 hash &= 0x7fffffffU;
39 /* Reserve hash numbers 0, 1 and INT_MAX for magic directory entries */
40 if (hash < 1)
41 hash += 2;
42 if (hash >= INT_MAX)
43 hash = INT_MAX - 1;
44 return hash;
45}
46
47static int kernfs_name_compare(unsigned int hash, const char *name,
48 const void *ns, const struct kernfs_node *kn)
49{
50 if (hash != kn->hash)
51 return hash - kn->hash;
52 if (ns != kn->ns)
53 return ns - kn->ns;
54 return strcmp(name, kn->name);
55}
56
57static int kernfs_sd_compare(const struct kernfs_node *left,
58 const struct kernfs_node *right)
59{
60 return kernfs_name_compare(left->hash, left->name, left->ns, right);
61}
62
63/**
64 * kernfs_link_sibling - link kernfs_node into sibling rbtree
65 * @kn: kernfs_node of interest
66 *
67 * Link @kn into its sibling rbtree which starts from
68 * @kn->parent->dir.children.
69 *
70 * Locking:
71 * mutex_lock(kernfs_mutex)
72 *
73 * RETURNS:
74 * 0 on susccess -EEXIST on failure.
75 */
76static int kernfs_link_sibling(struct kernfs_node *kn)
77{
78 struct rb_node **node = &kn->parent->dir.children.rb_node;
79 struct rb_node *parent = NULL;
80
81 if (kernfs_type(kn) == KERNFS_DIR)
82 kn->parent->dir.subdirs++;
83
84 while (*node) {
85 struct kernfs_node *pos;
86 int result;
87
88 pos = rb_to_kn(*node);
89 parent = *node;
90 result = kernfs_sd_compare(kn, pos);
91 if (result < 0)
92 node = &pos->rb.rb_left;
93 else if (result > 0)
94 node = &pos->rb.rb_right;
95 else
96 return -EEXIST;
97 }
98 /* add new node and rebalance the tree */
99 rb_link_node(&kn->rb, parent, node);
100 rb_insert_color(&kn->rb, &kn->parent->dir.children);
101 return 0;
102}
103
104/**
105 * kernfs_unlink_sibling - unlink kernfs_node from sibling rbtree
106 * @kn: kernfs_node of interest
107 *
108 * Unlink @kn from its sibling rbtree which starts from
109 * kn->parent->dir.children.
110 *
111 * Locking:
112 * mutex_lock(kernfs_mutex)
113 */
114static void kernfs_unlink_sibling(struct kernfs_node *kn)
115{
116 if (kernfs_type(kn) == KERNFS_DIR)
117 kn->parent->dir.subdirs--;
118
119 rb_erase(&kn->rb, &kn->parent->dir.children);
120}
121
122/**
123 * kernfs_get_active - get an active reference to kernfs_node
124 * @kn: kernfs_node to get an active reference to
125 *
126 * Get an active reference of @kn. This function is noop if @kn
127 * is NULL.
128 *
129 * RETURNS:
130 * Pointer to @kn on success, NULL on failure.
131 */
132struct kernfs_node *kernfs_get_active(struct kernfs_node *kn)
133{
134 if (unlikely(!kn))
135 return NULL;
136
137 if (!atomic_inc_unless_negative(&kn->active))
138 return NULL;
139
140 if (kn->flags & KERNFS_LOCKDEP)
141 rwsem_acquire_read(&kn->dep_map, 0, 1, _RET_IP_);
142 return kn;
143}
144
145/**
146 * kernfs_put_active - put an active reference to kernfs_node
147 * @kn: kernfs_node to put an active reference to
148 *
149 * Put an active reference to @kn. This function is noop if @kn
150 * is NULL.
151 */
152void kernfs_put_active(struct kernfs_node *kn)
153{
154 int v;
155
156 if (unlikely(!kn))
157 return;
158
159 if (kn->flags & KERNFS_LOCKDEP)
160 rwsem_release(&kn->dep_map, 1, _RET_IP_);
161 v = atomic_dec_return(&kn->active);
162 if (likely(v != KN_DEACTIVATED_BIAS))
163 return;
164
165 /*
166 * atomic_dec_return() is a mb(), we'll always see the updated
167 * kn->u.completion.
168 */
169 complete(kn->u.completion);
170}
171
172/**
173 * kernfs_deactivate - deactivate kernfs_node
174 * @kn: kernfs_node to deactivate
175 *
176 * Deny new active references and drain existing ones.
177 */
178static void kernfs_deactivate(struct kernfs_node *kn)
179{
180 DECLARE_COMPLETION_ONSTACK(wait);
181 int v;
182
183 BUG_ON(!(kn->flags & KERNFS_REMOVED));
184
185 if (!(kernfs_type(kn) & KERNFS_ACTIVE_REF))
186 return;
187
188 kn->u.completion = (void *)&wait;
189
190 rwsem_acquire(&kn->dep_map, 0, 0, _RET_IP_);
191 /* atomic_add_return() is a mb(), put_active() will always see
192 * the updated kn->u.completion.
193 */
194 v = atomic_add_return(KN_DEACTIVATED_BIAS, &kn->active);
195
196 if (v != KN_DEACTIVATED_BIAS) {
197 lock_contended(&kn->dep_map, _RET_IP_);
198 wait_for_completion(&wait);
199 }
200
201 lock_acquired(&kn->dep_map, _RET_IP_);
202 rwsem_release(&kn->dep_map, 1, _RET_IP_);
203}
204
205/**
206 * kernfs_get - get a reference count on a kernfs_node
207 * @kn: the target kernfs_node
208 */
209void kernfs_get(struct kernfs_node *kn)
210{
211 if (kn) {
212 WARN_ON(!atomic_read(&kn->count));
213 atomic_inc(&kn->count);
214 }
215}
216EXPORT_SYMBOL_GPL(kernfs_get);
217
218/**
219 * kernfs_put - put a reference count on a kernfs_node
220 * @kn: the target kernfs_node
221 *
222 * Put a reference count of @kn and destroy it if it reached zero.
223 */
224void kernfs_put(struct kernfs_node *kn)
225{
226 struct kernfs_node *parent;
227 struct kernfs_root *root;
228
229 if (!kn || !atomic_dec_and_test(&kn->count))
230 return;
231 root = kernfs_root(kn);
232 repeat:
233 /* Moving/renaming is always done while holding reference.
234 * kn->parent won't change beneath us.
235 */
236 parent = kn->parent;
237
238 WARN(!(kn->flags & KERNFS_REMOVED), "kernfs: free using entry: %s/%s\n",
239 parent ? parent->name : "", kn->name);
240
241 if (kernfs_type(kn) == KERNFS_LINK)
242 kernfs_put(kn->symlink.target_kn);
243 if (!(kn->flags & KERNFS_STATIC_NAME))
244 kfree(kn->name);
245 if (kn->iattr) {
246 if (kn->iattr->ia_secdata)
247 security_release_secctx(kn->iattr->ia_secdata,
248 kn->iattr->ia_secdata_len);
249 simple_xattrs_free(&kn->iattr->xattrs);
250 }
251 kfree(kn->iattr);
252 ida_simple_remove(&root->ino_ida, kn->ino);
253 kmem_cache_free(kernfs_node_cache, kn);
254
255 kn = parent;
256 if (kn) {
257 if (atomic_dec_and_test(&kn->count))
258 goto repeat;
259 } else {
260 /* just released the root kn, free @root too */
261 ida_destroy(&root->ino_ida);
262 kfree(root);
263 }
264}
265EXPORT_SYMBOL_GPL(kernfs_put);
266
267static int kernfs_dop_revalidate(struct dentry *dentry, unsigned int flags)
268{
269 struct kernfs_node *kn;
270
271 if (flags & LOOKUP_RCU)
272 return -ECHILD;
273
274 /* Always perform fresh lookup for negatives */
275 if (!dentry->d_inode)
276 goto out_bad_unlocked;
277
278 kn = dentry->d_fsdata;
279 mutex_lock(&kernfs_mutex);
280
281 /* The kernfs node has been deleted */
282 if (kn->flags & KERNFS_REMOVED)
283 goto out_bad;
284
285 /* The kernfs node has been moved? */
286 if (dentry->d_parent->d_fsdata != kn->parent)
287 goto out_bad;
288
289 /* The kernfs node has been renamed */
290 if (strcmp(dentry->d_name.name, kn->name) != 0)
291 goto out_bad;
292
293 /* The kernfs node has been moved to a different namespace */
294 if (kn->parent && kernfs_ns_enabled(kn->parent) &&
295 kernfs_info(dentry->d_sb)->ns != kn->ns)
296 goto out_bad;
297
298 mutex_unlock(&kernfs_mutex);
299out_valid:
300 return 1;
301out_bad:
302 mutex_unlock(&kernfs_mutex);
303out_bad_unlocked:
304 /*
305 * @dentry doesn't match the underlying kernfs node, drop the
306 * dentry and force lookup. If we have submounts we must allow the
307 * vfs caches to lie about the state of the filesystem to prevent
308 * leaks and other nasty things, so use check_submounts_and_drop()
309 * instead of d_drop().
310 */
311 if (check_submounts_and_drop(dentry) != 0)
312 goto out_valid;
313
314 return 0;
315}
316
317static void kernfs_dop_release(struct dentry *dentry)
318{
319 kernfs_put(dentry->d_fsdata);
320}
321
322const struct dentry_operations kernfs_dops = {
323 .d_revalidate = kernfs_dop_revalidate,
324 .d_release = kernfs_dop_release,
325};
326
327static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root,
328 const char *name, umode_t mode,
329 unsigned flags)
330{
331 char *dup_name = NULL;
332 struct kernfs_node *kn;
333 int ret;
334
335 if (!(flags & KERNFS_STATIC_NAME)) {
336 name = dup_name = kstrdup(name, GFP_KERNEL);
337 if (!name)
338 return NULL;
339 }
340
341 kn = kmem_cache_zalloc(kernfs_node_cache, GFP_KERNEL);
342 if (!kn)
343 goto err_out1;
344
345 ret = ida_simple_get(&root->ino_ida, 1, 0, GFP_KERNEL);
346 if (ret < 0)
347 goto err_out2;
348 kn->ino = ret;
349
350 atomic_set(&kn->count, 1);
351 atomic_set(&kn->active, 0);
352
353 kn->name = name;
354 kn->mode = mode;
355 kn->flags = flags | KERNFS_REMOVED;
356
357 return kn;
358
359 err_out2:
360 kmem_cache_free(kernfs_node_cache, kn);
361 err_out1:
362 kfree(dup_name);
363 return NULL;
364}
365
366struct kernfs_node *kernfs_new_node(struct kernfs_node *parent,
367 const char *name, umode_t mode,
368 unsigned flags)
369{
370 struct kernfs_node *kn;
371
372 kn = __kernfs_new_node(kernfs_root(parent), name, mode, flags);
373 if (kn) {
374 kernfs_get(parent);
375 kn->parent = parent;
376 }
377 return kn;
378}
379
380/**
381 * kernfs_addrm_start - prepare for kernfs_node add/remove
382 * @acxt: pointer to kernfs_addrm_cxt to be used
383 *
384 * This function is called when the caller is about to add or remove
385 * kernfs_node. This function acquires kernfs_mutex. @acxt is used
386 * to keep and pass context to other addrm functions.
387 *
388 * LOCKING:
389 * Kernel thread context (may sleep). kernfs_mutex is locked on
390 * return.
391 */
392void kernfs_addrm_start(struct kernfs_addrm_cxt *acxt)
393 __acquires(kernfs_mutex)
394{
395 memset(acxt, 0, sizeof(*acxt));
396
397 mutex_lock(&kernfs_mutex);
398}
399
400/**
401 * kernfs_add_one - add kernfs_node to parent without warning
402 * @acxt: addrm context to use
403 * @kn: kernfs_node to be added
404 *
405 * The caller must already have initialized @kn->parent. This
406 * function increments nlink of the parent's inode if @kn is a
407 * directory and link into the children list of the parent.
408 *
409 * This function should be called between calls to
410 * kernfs_addrm_start() and kernfs_addrm_finish() and should be passed
411 * the same @acxt as passed to kernfs_addrm_start().
412 *
413 * LOCKING:
414 * Determined by kernfs_addrm_start().
415 *
416 * RETURNS:
417 * 0 on success, -EEXIST if entry with the given name already
418 * exists.
419 */
420int kernfs_add_one(struct kernfs_addrm_cxt *acxt, struct kernfs_node *kn)
421{
422 struct kernfs_node *parent = kn->parent;
423 bool has_ns = kernfs_ns_enabled(parent);
424 struct kernfs_iattrs *ps_iattr;
425 int ret;
426
427 if (has_ns != (bool)kn->ns) {
428 WARN(1, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n",
429 has_ns ? "required" : "invalid", parent->name, kn->name);
430 return -EINVAL;
431 }
432
433 if (kernfs_type(parent) != KERNFS_DIR)
434 return -EINVAL;
435
436 if (parent->flags & KERNFS_REMOVED)
437 return -ENOENT;
438
439 kn->hash = kernfs_name_hash(kn->name, kn->ns);
440
441 ret = kernfs_link_sibling(kn);
442 if (ret)
443 return ret;
444
445 /* Update timestamps on the parent */
446 ps_iattr = parent->iattr;
447 if (ps_iattr) {
448 struct iattr *ps_iattrs = &ps_iattr->ia_iattr;
449 ps_iattrs->ia_ctime = ps_iattrs->ia_mtime = CURRENT_TIME;
450 }
451
452 /* Mark the entry added into directory tree */
453 kn->flags &= ~KERNFS_REMOVED;
454
455 return 0;
456}
457
458/**
459 * kernfs_remove_one - remove kernfs_node from parent
460 * @acxt: addrm context to use
461 * @kn: kernfs_node to be removed
462 *
463 * Mark @kn removed and drop nlink of parent inode if @kn is a
464 * directory. @kn is unlinked from the children list.
465 *
466 * This function should be called between calls to
467 * kernfs_addrm_start() and kernfs_addrm_finish() and should be
468 * passed the same @acxt as passed to kernfs_addrm_start().
469 *
470 * LOCKING:
471 * Determined by kernfs_addrm_start().
472 */
473static void kernfs_remove_one(struct kernfs_addrm_cxt *acxt,
474 struct kernfs_node *kn)
475{
476 struct kernfs_iattrs *ps_iattr;
477
478 /*
479 * Removal can be called multiple times on the same node. Only the
480 * first invocation is effective and puts the base ref.
481 */
482 if (kn->flags & KERNFS_REMOVED)
483 return;
484
485 if (kn->parent) {
486 kernfs_unlink_sibling(kn);
487
488 /* Update timestamps on the parent */
489 ps_iattr = kn->parent->iattr;
490 if (ps_iattr) {
491 ps_iattr->ia_iattr.ia_ctime = CURRENT_TIME;
492 ps_iattr->ia_iattr.ia_mtime = CURRENT_TIME;
493 }
494 }
495
496 kn->flags |= KERNFS_REMOVED;
497 kn->u.removed_list = acxt->removed;
498 acxt->removed = kn;
499}
500
501/**
502 * kernfs_addrm_finish - finish up kernfs_node add/remove
503 * @acxt: addrm context to finish up
504 *
505 * Finish up kernfs_node add/remove. Resources acquired by
506 * kernfs_addrm_start() are released and removed kernfs_nodes are
507 * cleaned up.
508 *
509 * LOCKING:
510 * kernfs_mutex is released.
511 */
512void kernfs_addrm_finish(struct kernfs_addrm_cxt *acxt)
513 __releases(kernfs_mutex)
514{
515 /* release resources acquired by kernfs_addrm_start() */
516 mutex_unlock(&kernfs_mutex);
517
518 /* kill removed kernfs_nodes */
519 while (acxt->removed) {
520 struct kernfs_node *kn = acxt->removed;
521
522 acxt->removed = kn->u.removed_list;
523
524 kernfs_deactivate(kn);
525 kernfs_unmap_bin_file(kn);
526 kernfs_put(kn);
527 }
528}
529
530/**
531 * kernfs_find_ns - find kernfs_node with the given name
532 * @parent: kernfs_node to search under
533 * @name: name to look for
534 * @ns: the namespace tag to use
535 *
536 * Look for kernfs_node with name @name under @parent. Returns pointer to
537 * the found kernfs_node on success, %NULL on failure.
538 */
539static struct kernfs_node *kernfs_find_ns(struct kernfs_node *parent,
540 const unsigned char *name,
541 const void *ns)
542{
543 struct rb_node *node = parent->dir.children.rb_node;
544 bool has_ns = kernfs_ns_enabled(parent);
545 unsigned int hash;
546
547 lockdep_assert_held(&kernfs_mutex);
548
549 if (has_ns != (bool)ns) {
550 WARN(1, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n",
551 has_ns ? "required" : "invalid", parent->name, name);
552 return NULL;
553 }
554
555 hash = kernfs_name_hash(name, ns);
556 while (node) {
557 struct kernfs_node *kn;
558 int result;
559
560 kn = rb_to_kn(node);
561 result = kernfs_name_compare(hash, name, ns, kn);
562 if (result < 0)
563 node = node->rb_left;
564 else if (result > 0)
565 node = node->rb_right;
566 else
567 return kn;
568 }
569 return NULL;
570}
571
572/**
573 * kernfs_find_and_get_ns - find and get kernfs_node with the given name
574 * @parent: kernfs_node to search under
575 * @name: name to look for
576 * @ns: the namespace tag to use
577 *
578 * Look for kernfs_node with name @name under @parent and get a reference
579 * if found. This function may sleep and returns pointer to the found
580 * kernfs_node on success, %NULL on failure.
581 */
582struct kernfs_node *kernfs_find_and_get_ns(struct kernfs_node *parent,
583 const char *name, const void *ns)
584{
585 struct kernfs_node *kn;
586
587 mutex_lock(&kernfs_mutex);
588 kn = kernfs_find_ns(parent, name, ns);
589 kernfs_get(kn);
590 mutex_unlock(&kernfs_mutex);
591
592 return kn;
593}
594EXPORT_SYMBOL_GPL(kernfs_find_and_get_ns);
595
596/**
597 * kernfs_create_root - create a new kernfs hierarchy
598 * @kdops: optional directory syscall operations for the hierarchy
599 * @priv: opaque data associated with the new directory
600 *
601 * Returns the root of the new hierarchy on success, ERR_PTR() value on
602 * failure.
603 */
604struct kernfs_root *kernfs_create_root(struct kernfs_dir_ops *kdops, void *priv)
605{
606 struct kernfs_root *root;
607 struct kernfs_node *kn;
608
609 root = kzalloc(sizeof(*root), GFP_KERNEL);
610 if (!root)
611 return ERR_PTR(-ENOMEM);
612
613 ida_init(&root->ino_ida);
614
615 kn = __kernfs_new_node(root, "", S_IFDIR | S_IRUGO | S_IXUGO,
616 KERNFS_DIR);
617 if (!kn) {
618 ida_destroy(&root->ino_ida);
619 kfree(root);
620 return ERR_PTR(-ENOMEM);
621 }
622
623 kn->flags &= ~KERNFS_REMOVED;
624 kn->priv = priv;
625 kn->dir.root = root;
626
627 root->dir_ops = kdops;
628 root->kn = kn;
629
630 return root;
631}
632
633/**
634 * kernfs_destroy_root - destroy a kernfs hierarchy
635 * @root: root of the hierarchy to destroy
636 *
637 * Destroy the hierarchy anchored at @root by removing all existing
638 * directories and destroying @root.
639 */
640void kernfs_destroy_root(struct kernfs_root *root)
641{
642 kernfs_remove(root->kn); /* will also free @root */
643}
644
645/**
646 * kernfs_create_dir_ns - create a directory
647 * @parent: parent in which to create a new directory
648 * @name: name of the new directory
649 * @mode: mode of the new directory
650 * @priv: opaque data associated with the new directory
651 * @ns: optional namespace tag of the directory
652 *
653 * Returns the created node on success, ERR_PTR() value on failure.
654 */
655struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent,
656 const char *name, umode_t mode,
657 void *priv, const void *ns)
658{
659 struct kernfs_addrm_cxt acxt;
660 struct kernfs_node *kn;
661 int rc;
662
663 /* allocate */
664 kn = kernfs_new_node(parent, name, mode | S_IFDIR, KERNFS_DIR);
665 if (!kn)
666 return ERR_PTR(-ENOMEM);
667
668 kn->dir.root = parent->dir.root;
669 kn->ns = ns;
670 kn->priv = priv;
671
672 /* link in */
673 kernfs_addrm_start(&acxt);
674 rc = kernfs_add_one(&acxt, kn);
675 kernfs_addrm_finish(&acxt);
676
677 if (!rc)
678 return kn;
679
680 kernfs_put(kn);
681 return ERR_PTR(rc);
682}
683
684static struct dentry *kernfs_iop_lookup(struct inode *dir,
685 struct dentry *dentry,
686 unsigned int flags)
687{
688 struct dentry *ret;
689 struct kernfs_node *parent = dentry->d_parent->d_fsdata;
690 struct kernfs_node *kn;
691 struct inode *inode;
692 const void *ns = NULL;
693
694 mutex_lock(&kernfs_mutex);
695
696 if (kernfs_ns_enabled(parent))
697 ns = kernfs_info(dir->i_sb)->ns;
698
699 kn = kernfs_find_ns(parent, dentry->d_name.name, ns);
700
701 /* no such entry */
702 if (!kn) {
703 ret = NULL;
704 goto out_unlock;
705 }
706 kernfs_get(kn);
707 dentry->d_fsdata = kn;
708
709 /* attach dentry and inode */
710 inode = kernfs_get_inode(dir->i_sb, kn);
711 if (!inode) {
712 ret = ERR_PTR(-ENOMEM);
713 goto out_unlock;
714 }
715
716 /* instantiate and hash dentry */
717 ret = d_materialise_unique(dentry, inode);
718 out_unlock:
719 mutex_unlock(&kernfs_mutex);
720 return ret;
721}
722
723static int kernfs_iop_mkdir(struct inode *dir, struct dentry *dentry,
724 umode_t mode)
725{
726 struct kernfs_node *parent = dir->i_private;
727 struct kernfs_dir_ops *kdops = kernfs_root(parent)->dir_ops;
728
729 if (!kdops || !kdops->mkdir)
730 return -EPERM;
731
732 return kdops->mkdir(parent, dentry->d_name.name, mode);
733}
734
735static int kernfs_iop_rmdir(struct inode *dir, struct dentry *dentry)
736{
737 struct kernfs_node *kn = dentry->d_fsdata;
738 struct kernfs_dir_ops *kdops = kernfs_root(kn)->dir_ops;
739
740 if (!kdops || !kdops->rmdir)
741 return -EPERM;
742
743 return kdops->rmdir(kn);
744}
745
746static int kernfs_iop_rename(struct inode *old_dir, struct dentry *old_dentry,
747 struct inode *new_dir, struct dentry *new_dentry)
748{
749 struct kernfs_node *kn = old_dentry->d_fsdata;
750 struct kernfs_node *new_parent = new_dir->i_private;
751 struct kernfs_dir_ops *kdops = kernfs_root(kn)->dir_ops;
752
753 if (!kdops || !kdops->rename)
754 return -EPERM;
755
756 return kdops->rename(kn, new_parent, new_dentry->d_name.name);
757}
758
759const struct inode_operations kernfs_dir_iops = {
760 .lookup = kernfs_iop_lookup,
761 .permission = kernfs_iop_permission,
762 .setattr = kernfs_iop_setattr,
763 .getattr = kernfs_iop_getattr,
764 .setxattr = kernfs_iop_setxattr,
765 .removexattr = kernfs_iop_removexattr,
766 .getxattr = kernfs_iop_getxattr,
767 .listxattr = kernfs_iop_listxattr,
768
769 .mkdir = kernfs_iop_mkdir,
770 .rmdir = kernfs_iop_rmdir,
771 .rename = kernfs_iop_rename,
772};
773
774static struct kernfs_node *kernfs_leftmost_descendant(struct kernfs_node *pos)
775{
776 struct kernfs_node *last;
777
778 while (true) {
779 struct rb_node *rbn;
780
781 last = pos;
782
783 if (kernfs_type(pos) != KERNFS_DIR)
784 break;
785
786 rbn = rb_first(&pos->dir.children);
787 if (!rbn)
788 break;
789
790 pos = rb_to_kn(rbn);
791 }
792
793 return last;
794}
795
796/**
797 * kernfs_next_descendant_post - find the next descendant for post-order walk
798 * @pos: the current position (%NULL to initiate traversal)
799 * @root: kernfs_node whose descendants to walk
800 *
801 * Find the next descendant to visit for post-order traversal of @root's
802 * descendants. @root is included in the iteration and the last node to be
803 * visited.
804 */
805static struct kernfs_node *kernfs_next_descendant_post(struct kernfs_node *pos,
806 struct kernfs_node *root)
807{
808 struct rb_node *rbn;
809
810 lockdep_assert_held(&kernfs_mutex);
811
812 /* if first iteration, visit leftmost descendant which may be root */
813 if (!pos)
814 return kernfs_leftmost_descendant(root);
815
816 /* if we visited @root, we're done */
817 if (pos == root)
818 return NULL;
819
820 /* if there's an unvisited sibling, visit its leftmost descendant */
821 rbn = rb_next(&pos->rb);
822 if (rbn)
823 return kernfs_leftmost_descendant(rb_to_kn(rbn));
824
825 /* no sibling left, visit parent */
826 return pos->parent;
827}
828
829static void __kernfs_remove(struct kernfs_addrm_cxt *acxt,
830 struct kernfs_node *kn)
831{
832 struct kernfs_node *pos, *next;
833
834 if (!kn)
835 return;
836
837 pr_debug("kernfs %s: removing\n", kn->name);
838
839 next = NULL;
840 do {
841 pos = next;
842 next = kernfs_next_descendant_post(pos, kn);
843 if (pos)
844 kernfs_remove_one(acxt, pos);
845 } while (next);
846}
847
848/**
849 * kernfs_remove - remove a kernfs_node recursively
850 * @kn: the kernfs_node to remove
851 *
852 * Remove @kn along with all its subdirectories and files.
853 */
854void kernfs_remove(struct kernfs_node *kn)
855{
856 struct kernfs_addrm_cxt acxt;
857
858 kernfs_addrm_start(&acxt);
859 __kernfs_remove(&acxt, kn);
860 kernfs_addrm_finish(&acxt);
861}
862
863/**
864 * kernfs_remove_by_name_ns - find a kernfs_node by name and remove it
865 * @parent: parent of the target
866 * @name: name of the kernfs_node to remove
867 * @ns: namespace tag of the kernfs_node to remove
868 *
869 * Look for the kernfs_node with @name and @ns under @parent and remove it.
870 * Returns 0 on success, -ENOENT if such entry doesn't exist.
871 */
872int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name,
873 const void *ns)
874{
875 struct kernfs_addrm_cxt acxt;
876 struct kernfs_node *kn;
877
878 if (!parent) {
879 WARN(1, KERN_WARNING "kernfs: can not remove '%s', no directory\n",
880 name);
881 return -ENOENT;
882 }
883
884 kernfs_addrm_start(&acxt);
885
886 kn = kernfs_find_ns(parent, name, ns);
887 if (kn)
888 __kernfs_remove(&acxt, kn);
889
890 kernfs_addrm_finish(&acxt);
891
892 if (kn)
893 return 0;
894 else
895 return -ENOENT;
896}
897
898/**
899 * kernfs_rename_ns - move and rename a kernfs_node
900 * @kn: target node
901 * @new_parent: new parent to put @sd under
902 * @new_name: new name
903 * @new_ns: new namespace tag
904 */
905int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent,
906 const char *new_name, const void *new_ns)
907{
908 int error;
909
910 mutex_lock(&kernfs_mutex);
911
912 error = -ENOENT;
913 if ((kn->flags | new_parent->flags) & KERNFS_REMOVED)
914 goto out;
915
916 error = 0;
917 if ((kn->parent == new_parent) && (kn->ns == new_ns) &&
918 (strcmp(kn->name, new_name) == 0))
919 goto out; /* nothing to rename */
920
921 error = -EEXIST;
922 if (kernfs_find_ns(new_parent, new_name, new_ns))
923 goto out;
924
925 /* rename kernfs_node */
926 if (strcmp(kn->name, new_name) != 0) {
927 error = -ENOMEM;
928 new_name = kstrdup(new_name, GFP_KERNEL);
929 if (!new_name)
930 goto out;
931
932 if (kn->flags & KERNFS_STATIC_NAME)
933 kn->flags &= ~KERNFS_STATIC_NAME;
934 else
935 kfree(kn->name);
936
937 kn->name = new_name;
938 }
939
940 /*
941 * Move to the appropriate place in the appropriate directories rbtree.
942 */
943 kernfs_unlink_sibling(kn);
944 kernfs_get(new_parent);
945 kernfs_put(kn->parent);
946 kn->ns = new_ns;
947 kn->hash = kernfs_name_hash(kn->name, kn->ns);
948 kn->parent = new_parent;
949 kernfs_link_sibling(kn);
950
951 error = 0;
952 out:
953 mutex_unlock(&kernfs_mutex);
954 return error;
955}
956
957/* Relationship between s_mode and the DT_xxx types */
958static inline unsigned char dt_type(struct kernfs_node *kn)
959{
960 return (kn->mode >> 12) & 15;
961}
962
963static int kernfs_dir_fop_release(struct inode *inode, struct file *filp)
964{
965 kernfs_put(filp->private_data);
966 return 0;
967}
968
969static struct kernfs_node *kernfs_dir_pos(const void *ns,
970 struct kernfs_node *parent, loff_t hash, struct kernfs_node *pos)
971{
972 if (pos) {
973 int valid = !(pos->flags & KERNFS_REMOVED) &&
974 pos->parent == parent && hash == pos->hash;
975 kernfs_put(pos);
976 if (!valid)
977 pos = NULL;
978 }
979 if (!pos && (hash > 1) && (hash < INT_MAX)) {
980 struct rb_node *node = parent->dir.children.rb_node;
981 while (node) {
982 pos = rb_to_kn(node);
983
984 if (hash < pos->hash)
985 node = node->rb_left;
986 else if (hash > pos->hash)
987 node = node->rb_right;
988 else
989 break;
990 }
991 }
992 /* Skip over entries in the wrong namespace */
993 while (pos && pos->ns != ns) {
994 struct rb_node *node = rb_next(&pos->rb);
995 if (!node)
996 pos = NULL;
997 else
998 pos = rb_to_kn(node);
999 }
1000 return pos;
1001}
1002
1003static struct kernfs_node *kernfs_dir_next_pos(const void *ns,
1004 struct kernfs_node *parent, ino_t ino, struct kernfs_node *pos)
1005{
1006 pos = kernfs_dir_pos(ns, parent, ino, pos);
1007 if (pos)
1008 do {
1009 struct rb_node *node = rb_next(&pos->rb);
1010 if (!node)
1011 pos = NULL;
1012 else
1013 pos = rb_to_kn(node);
1014 } while (pos && pos->ns != ns);
1015 return pos;
1016}
1017
1018static int kernfs_fop_readdir(struct file *file, struct dir_context *ctx)
1019{
1020 struct dentry *dentry = file->f_path.dentry;
1021 struct kernfs_node *parent = dentry->d_fsdata;
1022 struct kernfs_node *pos = file->private_data;
1023 const void *ns = NULL;
1024
1025 if (!dir_emit_dots(file, ctx))
1026 return 0;
1027 mutex_lock(&kernfs_mutex);
1028
1029 if (kernfs_ns_enabled(parent))
1030 ns = kernfs_info(dentry->d_sb)->ns;
1031
1032 for (pos = kernfs_dir_pos(ns, parent, ctx->pos, pos);
1033 pos;
1034 pos = kernfs_dir_next_pos(ns, parent, ctx->pos, pos)) {
1035 const char *name = pos->name;
1036 unsigned int type = dt_type(pos);
1037 int len = strlen(name);
1038 ino_t ino = pos->ino;
1039
1040 ctx->pos = pos->hash;
1041 file->private_data = pos;
1042 kernfs_get(pos);
1043
1044 mutex_unlock(&kernfs_mutex);
1045 if (!dir_emit(ctx, name, len, ino, type))
1046 return 0;
1047 mutex_lock(&kernfs_mutex);
1048 }
1049 mutex_unlock(&kernfs_mutex);
1050 file->private_data = NULL;
1051 ctx->pos = INT_MAX;
1052 return 0;
1053}
1054
1055static loff_t kernfs_dir_fop_llseek(struct file *file, loff_t offset,
1056 int whence)
1057{
1058 struct inode *inode = file_inode(file);
1059 loff_t ret;
1060
1061 mutex_lock(&inode->i_mutex);
1062 ret = generic_file_llseek(file, offset, whence);
1063 mutex_unlock(&inode->i_mutex);
1064
1065 return ret;
1066}
1067
1068const struct file_operations kernfs_dir_fops = {
1069 .read = generic_read_dir,
1070 .iterate = kernfs_fop_readdir,
1071 .release = kernfs_dir_fop_release,
1072 .llseek = kernfs_dir_fop_llseek,
1073};
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
new file mode 100644
index 000000000000..dbf397bfdff2
--- /dev/null
+++ b/fs/kernfs/file.c
@@ -0,0 +1,867 @@
1/*
2 * fs/kernfs/file.c - kernfs file implementation
3 *
4 * Copyright (c) 2001-3 Patrick Mochel
5 * Copyright (c) 2007 SUSE Linux Products GmbH
6 * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org>
7 *
8 * This file is released under the GPLv2.
9 */
10
11#include <linux/fs.h>
12#include <linux/seq_file.h>
13#include <linux/slab.h>
14#include <linux/poll.h>
15#include <linux/pagemap.h>
16#include <linux/sched.h>
17
18#include "kernfs-internal.h"
19
20/*
21 * There's one kernfs_open_file for each open file and one kernfs_open_node
22 * for each kernfs_node with one or more open files.
23 *
24 * kernfs_node->attr.open points to kernfs_open_node. attr.open is
25 * protected by kernfs_open_node_lock.
26 *
27 * filp->private_data points to seq_file whose ->private points to
28 * kernfs_open_file. kernfs_open_files are chained at
29 * kernfs_open_node->files, which is protected by kernfs_open_file_mutex.
30 */
31static DEFINE_SPINLOCK(kernfs_open_node_lock);
32static DEFINE_MUTEX(kernfs_open_file_mutex);
33
34struct kernfs_open_node {
35 atomic_t refcnt;
36 atomic_t event;
37 wait_queue_head_t poll;
38 struct list_head files; /* goes through kernfs_open_file.list */
39};
40
41static struct kernfs_open_file *kernfs_of(struct file *file)
42{
43 return ((struct seq_file *)file->private_data)->private;
44}
45
46/*
47 * Determine the kernfs_ops for the given kernfs_node. This function must
48 * be called while holding an active reference.
49 */
50static const struct kernfs_ops *kernfs_ops(struct kernfs_node *kn)
51{
52 if (kn->flags & KERNFS_LOCKDEP)
53 lockdep_assert_held(kn);
54 return kn->attr.ops;
55}
56
57/*
58 * As kernfs_seq_stop() is also called after kernfs_seq_start() or
59 * kernfs_seq_next() failure, it needs to distinguish whether it's stopping
60 * a seq_file iteration which is fully initialized with an active reference
61 * or an aborted kernfs_seq_start() due to get_active failure. The
62 * position pointer is the only context for each seq_file iteration and
63 * thus the stop condition should be encoded in it. As the return value is
64 * directly visible to userland, ERR_PTR(-ENODEV) is the only acceptable
65 * choice to indicate get_active failure.
66 *
67 * Unfortunately, this is complicated due to the optional custom seq_file
68 * operations which may return ERR_PTR(-ENODEV) too. kernfs_seq_stop()
69 * can't distinguish whether ERR_PTR(-ENODEV) is from get_active failure or
70 * custom seq_file operations and thus can't decide whether put_active
71 * should be performed or not only on ERR_PTR(-ENODEV).
72 *
73 * This is worked around by factoring out the custom seq_stop() and
74 * put_active part into kernfs_seq_stop_active(), skipping it from
75 * kernfs_seq_stop() if ERR_PTR(-ENODEV) while invoking it directly after
76 * custom seq_file operations fail with ERR_PTR(-ENODEV) - this ensures
77 * that kernfs_seq_stop_active() is skipped only after get_active failure.
78 */
79static void kernfs_seq_stop_active(struct seq_file *sf, void *v)
80{
81 struct kernfs_open_file *of = sf->private;
82 const struct kernfs_ops *ops = kernfs_ops(of->kn);
83
84 if (ops->seq_stop)
85 ops->seq_stop(sf, v);
86 kernfs_put_active(of->kn);
87}
88
89static void *kernfs_seq_start(struct seq_file *sf, loff_t *ppos)
90{
91 struct kernfs_open_file *of = sf->private;
92 const struct kernfs_ops *ops;
93
94 /*
95 * @of->mutex nests outside active ref and is just to ensure that
96 * the ops aren't called concurrently for the same open file.
97 */
98 mutex_lock(&of->mutex);
99 if (!kernfs_get_active(of->kn))
100 return ERR_PTR(-ENODEV);
101
102 ops = kernfs_ops(of->kn);
103 if (ops->seq_start) {
104 void *next = ops->seq_start(sf, ppos);
105 /* see the comment above kernfs_seq_stop_active() */
106 if (next == ERR_PTR(-ENODEV))
107 kernfs_seq_stop_active(sf, next);
108 return next;
109 } else {
110 /*
111 * The same behavior and code as single_open(). Returns
112 * !NULL if pos is at the beginning; otherwise, NULL.
113 */
114 return NULL + !*ppos;
115 }
116}
117
118static void *kernfs_seq_next(struct seq_file *sf, void *v, loff_t *ppos)
119{
120 struct kernfs_open_file *of = sf->private;
121 const struct kernfs_ops *ops = kernfs_ops(of->kn);
122
123 if (ops->seq_next) {
124 void *next = ops->seq_next(sf, v, ppos);
125 /* see the comment above kernfs_seq_stop_active() */
126 if (next == ERR_PTR(-ENODEV))
127 kernfs_seq_stop_active(sf, next);
128 return next;
129 } else {
130 /*
131 * The same behavior and code as single_open(), always
132 * terminate after the initial read.
133 */
134 ++*ppos;
135 return NULL;
136 }
137}
138
139static void kernfs_seq_stop(struct seq_file *sf, void *v)
140{
141 struct kernfs_open_file *of = sf->private;
142
143 if (v != ERR_PTR(-ENODEV))
144 kernfs_seq_stop_active(sf, v);
145 mutex_unlock(&of->mutex);
146}
147
148static int kernfs_seq_show(struct seq_file *sf, void *v)
149{
150 struct kernfs_open_file *of = sf->private;
151
152 of->event = atomic_read(&of->kn->attr.open->event);
153
154 return of->kn->attr.ops->seq_show(sf, v);
155}
156
157static const struct seq_operations kernfs_seq_ops = {
158 .start = kernfs_seq_start,
159 .next = kernfs_seq_next,
160 .stop = kernfs_seq_stop,
161 .show = kernfs_seq_show,
162};
163
164/*
165 * As reading a bin file can have side-effects, the exact offset and bytes
166 * specified in read(2) call should be passed to the read callback making
167 * it difficult to use seq_file. Implement simplistic custom buffering for
168 * bin files.
169 */
170static ssize_t kernfs_file_direct_read(struct kernfs_open_file *of,
171 char __user *user_buf, size_t count,
172 loff_t *ppos)
173{
174 ssize_t len = min_t(size_t, count, PAGE_SIZE);
175 const struct kernfs_ops *ops;
176 char *buf;
177
178 buf = kmalloc(len, GFP_KERNEL);
179 if (!buf)
180 return -ENOMEM;
181
182 /*
183 * @of->mutex nests outside active ref and is just to ensure that
184 * the ops aren't called concurrently for the same open file.
185 */
186 mutex_lock(&of->mutex);
187 if (!kernfs_get_active(of->kn)) {
188 len = -ENODEV;
189 mutex_unlock(&of->mutex);
190 goto out_free;
191 }
192
193 ops = kernfs_ops(of->kn);
194 if (ops->read)
195 len = ops->read(of, buf, len, *ppos);
196 else
197 len = -EINVAL;
198
199 kernfs_put_active(of->kn);
200 mutex_unlock(&of->mutex);
201
202 if (len < 0)
203 goto out_free;
204
205 if (copy_to_user(user_buf, buf, len)) {
206 len = -EFAULT;
207 goto out_free;
208 }
209
210 *ppos += len;
211
212 out_free:
213 kfree(buf);
214 return len;
215}
216
217/**
218 * kernfs_fop_read - kernfs vfs read callback
219 * @file: file pointer
220 * @user_buf: data to write
221 * @count: number of bytes
222 * @ppos: starting offset
223 */
224static ssize_t kernfs_fop_read(struct file *file, char __user *user_buf,
225 size_t count, loff_t *ppos)
226{
227 struct kernfs_open_file *of = kernfs_of(file);
228
229 if (of->kn->flags & KERNFS_HAS_SEQ_SHOW)
230 return seq_read(file, user_buf, count, ppos);
231 else
232 return kernfs_file_direct_read(of, user_buf, count, ppos);
233}
234
235/**
236 * kernfs_fop_write - kernfs vfs write callback
237 * @file: file pointer
238 * @user_buf: data to write
239 * @count: number of bytes
240 * @ppos: starting offset
241 *
242 * Copy data in from userland and pass it to the matching kernfs write
243 * operation.
244 *
245 * There is no easy way for us to know if userspace is only doing a partial
246 * write, so we don't support them. We expect the entire buffer to come on
247 * the first write. Hint: if you're writing a value, first read the file,
248 * modify only the the value you're changing, then write entire buffer
249 * back.
250 */
251static ssize_t kernfs_fop_write(struct file *file, const char __user *user_buf,
252 size_t count, loff_t *ppos)
253{
254 struct kernfs_open_file *of = kernfs_of(file);
255 ssize_t len = min_t(size_t, count, PAGE_SIZE);
256 const struct kernfs_ops *ops;
257 char *buf;
258
259 buf = kmalloc(len + 1, GFP_KERNEL);
260 if (!buf)
261 return -ENOMEM;
262
263 if (copy_from_user(buf, user_buf, len)) {
264 len = -EFAULT;
265 goto out_free;
266 }
267 buf[len] = '\0'; /* guarantee string termination */
268
269 /*
270 * @of->mutex nests outside active ref and is just to ensure that
271 * the ops aren't called concurrently for the same open file.
272 */
273 mutex_lock(&of->mutex);
274 if (!kernfs_get_active(of->kn)) {
275 mutex_unlock(&of->mutex);
276 len = -ENODEV;
277 goto out_free;
278 }
279
280 ops = kernfs_ops(of->kn);
281 if (ops->write)
282 len = ops->write(of, buf, len, *ppos);
283 else
284 len = -EINVAL;
285
286 kernfs_put_active(of->kn);
287 mutex_unlock(&of->mutex);
288
289 if (len > 0)
290 *ppos += len;
291out_free:
292 kfree(buf);
293 return len;
294}
295
296static void kernfs_vma_open(struct vm_area_struct *vma)
297{
298 struct file *file = vma->vm_file;
299 struct kernfs_open_file *of = kernfs_of(file);
300
301 if (!of->vm_ops)
302 return;
303
304 if (!kernfs_get_active(of->kn))
305 return;
306
307 if (of->vm_ops->open)
308 of->vm_ops->open(vma);
309
310 kernfs_put_active(of->kn);
311}
312
313static int kernfs_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
314{
315 struct file *file = vma->vm_file;
316 struct kernfs_open_file *of = kernfs_of(file);
317 int ret;
318
319 if (!of->vm_ops)
320 return VM_FAULT_SIGBUS;
321
322 if (!kernfs_get_active(of->kn))
323 return VM_FAULT_SIGBUS;
324
325 ret = VM_FAULT_SIGBUS;
326 if (of->vm_ops->fault)
327 ret = of->vm_ops->fault(vma, vmf);
328
329 kernfs_put_active(of->kn);
330 return ret;
331}
332
333static int kernfs_vma_page_mkwrite(struct vm_area_struct *vma,
334 struct vm_fault *vmf)
335{
336 struct file *file = vma->vm_file;
337 struct kernfs_open_file *of = kernfs_of(file);
338 int ret;
339
340 if (!of->vm_ops)
341 return VM_FAULT_SIGBUS;
342
343 if (!kernfs_get_active(of->kn))
344 return VM_FAULT_SIGBUS;
345
346 ret = 0;
347 if (of->vm_ops->page_mkwrite)
348 ret = of->vm_ops->page_mkwrite(vma, vmf);
349 else
350 file_update_time(file);
351
352 kernfs_put_active(of->kn);
353 return ret;
354}
355
356static int kernfs_vma_access(struct vm_area_struct *vma, unsigned long addr,
357 void *buf, int len, int write)
358{
359 struct file *file = vma->vm_file;
360 struct kernfs_open_file *of = kernfs_of(file);
361 int ret;
362
363 if (!of->vm_ops)
364 return -EINVAL;
365
366 if (!kernfs_get_active(of->kn))
367 return -EINVAL;
368
369 ret = -EINVAL;
370 if (of->vm_ops->access)
371 ret = of->vm_ops->access(vma, addr, buf, len, write);
372
373 kernfs_put_active(of->kn);
374 return ret;
375}
376
377#ifdef CONFIG_NUMA
378static int kernfs_vma_set_policy(struct vm_area_struct *vma,
379 struct mempolicy *new)
380{
381 struct file *file = vma->vm_file;
382 struct kernfs_open_file *of = kernfs_of(file);
383 int ret;
384
385 if (!of->vm_ops)
386 return 0;
387
388 if (!kernfs_get_active(of->kn))
389 return -EINVAL;
390
391 ret = 0;
392 if (of->vm_ops->set_policy)
393 ret = of->vm_ops->set_policy(vma, new);
394
395 kernfs_put_active(of->kn);
396 return ret;
397}
398
399static struct mempolicy *kernfs_vma_get_policy(struct vm_area_struct *vma,
400 unsigned long addr)
401{
402 struct file *file = vma->vm_file;
403 struct kernfs_open_file *of = kernfs_of(file);
404 struct mempolicy *pol;
405
406 if (!of->vm_ops)
407 return vma->vm_policy;
408
409 if (!kernfs_get_active(of->kn))
410 return vma->vm_policy;
411
412 pol = vma->vm_policy;
413 if (of->vm_ops->get_policy)
414 pol = of->vm_ops->get_policy(vma, addr);
415
416 kernfs_put_active(of->kn);
417 return pol;
418}
419
420static int kernfs_vma_migrate(struct vm_area_struct *vma,
421 const nodemask_t *from, const nodemask_t *to,
422 unsigned long flags)
423{
424 struct file *file = vma->vm_file;
425 struct kernfs_open_file *of = kernfs_of(file);
426 int ret;
427
428 if (!of->vm_ops)
429 return 0;
430
431 if (!kernfs_get_active(of->kn))
432 return 0;
433
434 ret = 0;
435 if (of->vm_ops->migrate)
436 ret = of->vm_ops->migrate(vma, from, to, flags);
437
438 kernfs_put_active(of->kn);
439 return ret;
440}
441#endif
442
443static const struct vm_operations_struct kernfs_vm_ops = {
444 .open = kernfs_vma_open,
445 .fault = kernfs_vma_fault,
446 .page_mkwrite = kernfs_vma_page_mkwrite,
447 .access = kernfs_vma_access,
448#ifdef CONFIG_NUMA
449 .set_policy = kernfs_vma_set_policy,
450 .get_policy = kernfs_vma_get_policy,
451 .migrate = kernfs_vma_migrate,
452#endif
453};
454
455static int kernfs_fop_mmap(struct file *file, struct vm_area_struct *vma)
456{
457 struct kernfs_open_file *of = kernfs_of(file);
458 const struct kernfs_ops *ops;
459 int rc;
460
461 /*
462 * mmap path and of->mutex are prone to triggering spurious lockdep
463 * warnings and we don't want to add spurious locking dependency
464 * between the two. Check whether mmap is actually implemented
465 * without grabbing @of->mutex by testing HAS_MMAP flag. See the
466 * comment in kernfs_file_open() for more details.
467 */
468 if (!(of->kn->flags & KERNFS_HAS_MMAP))
469 return -ENODEV;
470
471 mutex_lock(&of->mutex);
472
473 rc = -ENODEV;
474 if (!kernfs_get_active(of->kn))
475 goto out_unlock;
476
477 ops = kernfs_ops(of->kn);
478 rc = ops->mmap(of, vma);
479
480 /*
481 * PowerPC's pci_mmap of legacy_mem uses shmem_zero_setup()
482 * to satisfy versions of X which crash if the mmap fails: that
483 * substitutes a new vm_file, and we don't then want bin_vm_ops.
484 */
485 if (vma->vm_file != file)
486 goto out_put;
487
488 rc = -EINVAL;
489 if (of->mmapped && of->vm_ops != vma->vm_ops)
490 goto out_put;
491
492 /*
493 * It is not possible to successfully wrap close.
494 * So error if someone is trying to use close.
495 */
496 rc = -EINVAL;
497 if (vma->vm_ops && vma->vm_ops->close)
498 goto out_put;
499
500 rc = 0;
501 of->mmapped = 1;
502 of->vm_ops = vma->vm_ops;
503 vma->vm_ops = &kernfs_vm_ops;
504out_put:
505 kernfs_put_active(of->kn);
506out_unlock:
507 mutex_unlock(&of->mutex);
508
509 return rc;
510}
511
512/**
513 * kernfs_get_open_node - get or create kernfs_open_node
514 * @kn: target kernfs_node
515 * @of: kernfs_open_file for this instance of open
516 *
517 * If @kn->attr.open exists, increment its reference count; otherwise,
518 * create one. @of is chained to the files list.
519 *
520 * LOCKING:
521 * Kernel thread context (may sleep).
522 *
523 * RETURNS:
524 * 0 on success, -errno on failure.
525 */
526static int kernfs_get_open_node(struct kernfs_node *kn,
527 struct kernfs_open_file *of)
528{
529 struct kernfs_open_node *on, *new_on = NULL;
530
531 retry:
532 mutex_lock(&kernfs_open_file_mutex);
533 spin_lock_irq(&kernfs_open_node_lock);
534
535 if (!kn->attr.open && new_on) {
536 kn->attr.open = new_on;
537 new_on = NULL;
538 }
539
540 on = kn->attr.open;
541 if (on) {
542 atomic_inc(&on->refcnt);
543 list_add_tail(&of->list, &on->files);
544 }
545
546 spin_unlock_irq(&kernfs_open_node_lock);
547 mutex_unlock(&kernfs_open_file_mutex);
548
549 if (on) {
550 kfree(new_on);
551 return 0;
552 }
553
554 /* not there, initialize a new one and retry */
555 new_on = kmalloc(sizeof(*new_on), GFP_KERNEL);
556 if (!new_on)
557 return -ENOMEM;
558
559 atomic_set(&new_on->refcnt, 0);
560 atomic_set(&new_on->event, 1);
561 init_waitqueue_head(&new_on->poll);
562 INIT_LIST_HEAD(&new_on->files);
563 goto retry;
564}
565
566/**
567 * kernfs_put_open_node - put kernfs_open_node
568 * @kn: target kernfs_nodet
569 * @of: associated kernfs_open_file
570 *
571 * Put @kn->attr.open and unlink @of from the files list. If
572 * reference count reaches zero, disassociate and free it.
573 *
574 * LOCKING:
575 * None.
576 */
577static void kernfs_put_open_node(struct kernfs_node *kn,
578 struct kernfs_open_file *of)
579{
580 struct kernfs_open_node *on = kn->attr.open;
581 unsigned long flags;
582
583 mutex_lock(&kernfs_open_file_mutex);
584 spin_lock_irqsave(&kernfs_open_node_lock, flags);
585
586 if (of)
587 list_del(&of->list);
588
589 if (atomic_dec_and_test(&on->refcnt))
590 kn->attr.open = NULL;
591 else
592 on = NULL;
593
594 spin_unlock_irqrestore(&kernfs_open_node_lock, flags);
595 mutex_unlock(&kernfs_open_file_mutex);
596
597 kfree(on);
598}
599
600static int kernfs_fop_open(struct inode *inode, struct file *file)
601{
602 struct kernfs_node *kn = file->f_path.dentry->d_fsdata;
603 const struct kernfs_ops *ops;
604 struct kernfs_open_file *of;
605 bool has_read, has_write, has_mmap;
606 int error = -EACCES;
607
608 if (!kernfs_get_active(kn))
609 return -ENODEV;
610
611 ops = kernfs_ops(kn);
612
613 has_read = ops->seq_show || ops->read || ops->mmap;
614 has_write = ops->write || ops->mmap;
615 has_mmap = ops->mmap;
616
617 /* check perms and supported operations */
618 if ((file->f_mode & FMODE_WRITE) &&
619 (!(inode->i_mode & S_IWUGO) || !has_write))
620 goto err_out;
621
622 if ((file->f_mode & FMODE_READ) &&
623 (!(inode->i_mode & S_IRUGO) || !has_read))
624 goto err_out;
625
626 /* allocate a kernfs_open_file for the file */
627 error = -ENOMEM;
628 of = kzalloc(sizeof(struct kernfs_open_file), GFP_KERNEL);
629 if (!of)
630 goto err_out;
631
632 /*
633 * The following is done to give a different lockdep key to
634 * @of->mutex for files which implement mmap. This is a rather
635 * crude way to avoid false positive lockdep warning around
636 * mm->mmap_sem - mmap nests @of->mutex under mm->mmap_sem and
637 * reading /sys/block/sda/trace/act_mask grabs sr_mutex, under
638 * which mm->mmap_sem nests, while holding @of->mutex. As each
639 * open file has a separate mutex, it's okay as long as those don't
640 * happen on the same file. At this point, we can't easily give
641 * each file a separate locking class. Let's differentiate on
642 * whether the file has mmap or not for now.
643 *
644 * Both paths of the branch look the same. They're supposed to
645 * look that way and give @of->mutex different static lockdep keys.
646 */
647 if (has_mmap)
648 mutex_init(&of->mutex);
649 else
650 mutex_init(&of->mutex);
651
652 of->kn = kn;
653 of->file = file;
654
655 /*
656 * Always instantiate seq_file even if read access doesn't use
657 * seq_file or is not requested. This unifies private data access
658 * and readable regular files are the vast majority anyway.
659 */
660 if (ops->seq_show)
661 error = seq_open(file, &kernfs_seq_ops);
662 else
663 error = seq_open(file, NULL);
664 if (error)
665 goto err_free;
666
667 ((struct seq_file *)file->private_data)->private = of;
668
669 /* seq_file clears PWRITE unconditionally, restore it if WRITE */
670 if (file->f_mode & FMODE_WRITE)
671 file->f_mode |= FMODE_PWRITE;
672
673 /* make sure we have open node struct */
674 error = kernfs_get_open_node(kn, of);
675 if (error)
676 goto err_close;
677
678 /* open succeeded, put active references */
679 kernfs_put_active(kn);
680 return 0;
681
682err_close:
683 seq_release(inode, file);
684err_free:
685 kfree(of);
686err_out:
687 kernfs_put_active(kn);
688 return error;
689}
690
691static int kernfs_fop_release(struct inode *inode, struct file *filp)
692{
693 struct kernfs_node *kn = filp->f_path.dentry->d_fsdata;
694 struct kernfs_open_file *of = kernfs_of(filp);
695
696 kernfs_put_open_node(kn, of);
697 seq_release(inode, filp);
698 kfree(of);
699
700 return 0;
701}
702
703void kernfs_unmap_bin_file(struct kernfs_node *kn)
704{
705 struct kernfs_open_node *on;
706 struct kernfs_open_file *of;
707
708 if (!(kn->flags & KERNFS_HAS_MMAP))
709 return;
710
711 spin_lock_irq(&kernfs_open_node_lock);
712 on = kn->attr.open;
713 if (on)
714 atomic_inc(&on->refcnt);
715 spin_unlock_irq(&kernfs_open_node_lock);
716 if (!on)
717 return;
718
719 mutex_lock(&kernfs_open_file_mutex);
720 list_for_each_entry(of, &on->files, list) {
721 struct inode *inode = file_inode(of->file);
722 unmap_mapping_range(inode->i_mapping, 0, 0, 1);
723 }
724 mutex_unlock(&kernfs_open_file_mutex);
725
726 kernfs_put_open_node(kn, NULL);
727}
728
729/*
730 * Kernfs attribute files are pollable. The idea is that you read
731 * the content and then you use 'poll' or 'select' to wait for
732 * the content to change. When the content changes (assuming the
733 * manager for the kobject supports notification), poll will
734 * return POLLERR|POLLPRI, and select will return the fd whether
735 * it is waiting for read, write, or exceptions.
736 * Once poll/select indicates that the value has changed, you
737 * need to close and re-open the file, or seek to 0 and read again.
738 * Reminder: this only works for attributes which actively support
739 * it, and it is not possible to test an attribute from userspace
740 * to see if it supports poll (Neither 'poll' nor 'select' return
741 * an appropriate error code). When in doubt, set a suitable timeout value.
742 */
743static unsigned int kernfs_fop_poll(struct file *filp, poll_table *wait)
744{
745 struct kernfs_open_file *of = kernfs_of(filp);
746 struct kernfs_node *kn = filp->f_path.dentry->d_fsdata;
747 struct kernfs_open_node *on = kn->attr.open;
748
749 /* need parent for the kobj, grab both */
750 if (!kernfs_get_active(kn))
751 goto trigger;
752
753 poll_wait(filp, &on->poll, wait);
754
755 kernfs_put_active(kn);
756
757 if (of->event != atomic_read(&on->event))
758 goto trigger;
759
760 return DEFAULT_POLLMASK;
761
762 trigger:
763 return DEFAULT_POLLMASK|POLLERR|POLLPRI;
764}
765
766/**
767 * kernfs_notify - notify a kernfs file
768 * @kn: file to notify
769 *
770 * Notify @kn such that poll(2) on @kn wakes up.
771 */
772void kernfs_notify(struct kernfs_node *kn)
773{
774 struct kernfs_open_node *on;
775 unsigned long flags;
776
777 spin_lock_irqsave(&kernfs_open_node_lock, flags);
778
779 if (!WARN_ON(kernfs_type(kn) != KERNFS_FILE)) {
780 on = kn->attr.open;
781 if (on) {
782 atomic_inc(&on->event);
783 wake_up_interruptible(&on->poll);
784 }
785 }
786
787 spin_unlock_irqrestore(&kernfs_open_node_lock, flags);
788}
789EXPORT_SYMBOL_GPL(kernfs_notify);
790
791const struct file_operations kernfs_file_fops = {
792 .read = kernfs_fop_read,
793 .write = kernfs_fop_write,
794 .llseek = generic_file_llseek,
795 .mmap = kernfs_fop_mmap,
796 .open = kernfs_fop_open,
797 .release = kernfs_fop_release,
798 .poll = kernfs_fop_poll,
799};
800
801/**
802 * __kernfs_create_file - kernfs internal function to create a file
803 * @parent: directory to create the file in
804 * @name: name of the file
805 * @mode: mode of the file
806 * @size: size of the file
807 * @ops: kernfs operations for the file
808 * @priv: private data for the file
809 * @ns: optional namespace tag of the file
810 * @static_name: don't copy file name
811 * @key: lockdep key for the file's active_ref, %NULL to disable lockdep
812 *
813 * Returns the created node on success, ERR_PTR() value on error.
814 */
815struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent,
816 const char *name,
817 umode_t mode, loff_t size,
818 const struct kernfs_ops *ops,
819 void *priv, const void *ns,
820 bool name_is_static,
821 struct lock_class_key *key)
822{
823 struct kernfs_addrm_cxt acxt;
824 struct kernfs_node *kn;
825 unsigned flags;
826 int rc;
827
828 flags = KERNFS_FILE;
829 if (name_is_static)
830 flags |= KERNFS_STATIC_NAME;
831
832 kn = kernfs_new_node(parent, name, (mode & S_IALLUGO) | S_IFREG, flags);
833 if (!kn)
834 return ERR_PTR(-ENOMEM);
835
836 kn->attr.ops = ops;
837 kn->attr.size = size;
838 kn->ns = ns;
839 kn->priv = priv;
840
841#ifdef CONFIG_DEBUG_LOCK_ALLOC
842 if (key) {
843 lockdep_init_map(&kn->dep_map, "s_active", key, 0);
844 kn->flags |= KERNFS_LOCKDEP;
845 }
846#endif
847
848 /*
849 * kn->attr.ops is accesible only while holding active ref. We
850 * need to know whether some ops are implemented outside active
851 * ref. Cache their existence in flags.
852 */
853 if (ops->seq_show)
854 kn->flags |= KERNFS_HAS_SEQ_SHOW;
855 if (ops->mmap)
856 kn->flags |= KERNFS_HAS_MMAP;
857
858 kernfs_addrm_start(&acxt);
859 rc = kernfs_add_one(&acxt, kn);
860 kernfs_addrm_finish(&acxt);
861
862 if (rc) {
863 kernfs_put(kn);
864 return ERR_PTR(rc);
865 }
866 return kn;
867}
diff --git a/fs/kernfs/inode.c b/fs/kernfs/inode.c
new file mode 100644
index 000000000000..e55126f85bd2
--- /dev/null
+++ b/fs/kernfs/inode.c
@@ -0,0 +1,377 @@
1/*
2 * fs/kernfs/inode.c - kernfs inode implementation
3 *
4 * Copyright (c) 2001-3 Patrick Mochel
5 * Copyright (c) 2007 SUSE Linux Products GmbH
6 * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org>
7 *
8 * This file is released under the GPLv2.
9 */
10
11#include <linux/pagemap.h>
12#include <linux/backing-dev.h>
13#include <linux/capability.h>
14#include <linux/errno.h>
15#include <linux/slab.h>
16#include <linux/xattr.h>
17#include <linux/security.h>
18
19#include "kernfs-internal.h"
20
21static const struct address_space_operations kernfs_aops = {
22 .readpage = simple_readpage,
23 .write_begin = simple_write_begin,
24 .write_end = simple_write_end,
25};
26
27static struct backing_dev_info kernfs_bdi = {
28 .name = "kernfs",
29 .ra_pages = 0, /* No readahead */
30 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK,
31};
32
33static const struct inode_operations kernfs_iops = {
34 .permission = kernfs_iop_permission,
35 .setattr = kernfs_iop_setattr,
36 .getattr = kernfs_iop_getattr,
37 .setxattr = kernfs_iop_setxattr,
38 .removexattr = kernfs_iop_removexattr,
39 .getxattr = kernfs_iop_getxattr,
40 .listxattr = kernfs_iop_listxattr,
41};
42
43void __init kernfs_inode_init(void)
44{
45 if (bdi_init(&kernfs_bdi))
46 panic("failed to init kernfs_bdi");
47}
48
49static struct kernfs_iattrs *kernfs_iattrs(struct kernfs_node *kn)
50{
51 struct iattr *iattrs;
52
53 if (kn->iattr)
54 return kn->iattr;
55
56 kn->iattr = kzalloc(sizeof(struct kernfs_iattrs), GFP_KERNEL);
57 if (!kn->iattr)
58 return NULL;
59 iattrs = &kn->iattr->ia_iattr;
60
61 /* assign default attributes */
62 iattrs->ia_mode = kn->mode;
63 iattrs->ia_uid = GLOBAL_ROOT_UID;
64 iattrs->ia_gid = GLOBAL_ROOT_GID;
65 iattrs->ia_atime = iattrs->ia_mtime = iattrs->ia_ctime = CURRENT_TIME;
66
67 simple_xattrs_init(&kn->iattr->xattrs);
68
69 return kn->iattr;
70}
71
72static int __kernfs_setattr(struct kernfs_node *kn, const struct iattr *iattr)
73{
74 struct kernfs_iattrs *attrs;
75 struct iattr *iattrs;
76 unsigned int ia_valid = iattr->ia_valid;
77
78 attrs = kernfs_iattrs(kn);
79 if (!attrs)
80 return -ENOMEM;
81
82 iattrs = &attrs->ia_iattr;
83
84 if (ia_valid & ATTR_UID)
85 iattrs->ia_uid = iattr->ia_uid;
86 if (ia_valid & ATTR_GID)
87 iattrs->ia_gid = iattr->ia_gid;
88 if (ia_valid & ATTR_ATIME)
89 iattrs->ia_atime = iattr->ia_atime;
90 if (ia_valid & ATTR_MTIME)
91 iattrs->ia_mtime = iattr->ia_mtime;
92 if (ia_valid & ATTR_CTIME)
93 iattrs->ia_ctime = iattr->ia_ctime;
94 if (ia_valid & ATTR_MODE) {
95 umode_t mode = iattr->ia_mode;
96 iattrs->ia_mode = kn->mode = mode;
97 }
98 return 0;
99}
100
101/**
102 * kernfs_setattr - set iattr on a node
103 * @kn: target node
104 * @iattr: iattr to set
105 *
106 * Returns 0 on success, -errno on failure.
107 */
108int kernfs_setattr(struct kernfs_node *kn, const struct iattr *iattr)
109{
110 int ret;
111
112 mutex_lock(&kernfs_mutex);
113 ret = __kernfs_setattr(kn, iattr);
114 mutex_unlock(&kernfs_mutex);
115 return ret;
116}
117
118int kernfs_iop_setattr(struct dentry *dentry, struct iattr *iattr)
119{
120 struct inode *inode = dentry->d_inode;
121 struct kernfs_node *kn = dentry->d_fsdata;
122 int error;
123
124 if (!kn)
125 return -EINVAL;
126
127 mutex_lock(&kernfs_mutex);
128 error = inode_change_ok(inode, iattr);
129 if (error)
130 goto out;
131
132 error = __kernfs_setattr(kn, iattr);
133 if (error)
134 goto out;
135
136 /* this ignores size changes */
137 setattr_copy(inode, iattr);
138
139out:
140 mutex_unlock(&kernfs_mutex);
141 return error;
142}
143
144static int kernfs_node_setsecdata(struct kernfs_node *kn, void **secdata,
145 u32 *secdata_len)
146{
147 struct kernfs_iattrs *attrs;
148 void *old_secdata;
149 size_t old_secdata_len;
150
151 attrs = kernfs_iattrs(kn);
152 if (!attrs)
153 return -ENOMEM;
154
155 old_secdata = attrs->ia_secdata;
156 old_secdata_len = attrs->ia_secdata_len;
157
158 attrs->ia_secdata = *secdata;
159 attrs->ia_secdata_len = *secdata_len;
160
161 *secdata = old_secdata;
162 *secdata_len = old_secdata_len;
163 return 0;
164}
165
166int kernfs_iop_setxattr(struct dentry *dentry, const char *name,
167 const void *value, size_t size, int flags)
168{
169 struct kernfs_node *kn = dentry->d_fsdata;
170 struct kernfs_iattrs *attrs;
171 void *secdata;
172 int error;
173 u32 secdata_len = 0;
174
175 attrs = kernfs_iattrs(kn);
176 if (!attrs)
177 return -ENOMEM;
178
179 if (!strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN)) {
180 const char *suffix = name + XATTR_SECURITY_PREFIX_LEN;
181 error = security_inode_setsecurity(dentry->d_inode, suffix,
182 value, size, flags);
183 if (error)
184 return error;
185 error = security_inode_getsecctx(dentry->d_inode,
186 &secdata, &secdata_len);
187 if (error)
188 return error;
189
190 mutex_lock(&kernfs_mutex);
191 error = kernfs_node_setsecdata(kn, &secdata, &secdata_len);
192 mutex_unlock(&kernfs_mutex);
193
194 if (secdata)
195 security_release_secctx(secdata, secdata_len);
196 return error;
197 } else if (!strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN)) {
198 return simple_xattr_set(&attrs->xattrs, name, value, size,
199 flags);
200 }
201
202 return -EINVAL;
203}
204
205int kernfs_iop_removexattr(struct dentry *dentry, const char *name)
206{
207 struct kernfs_node *kn = dentry->d_fsdata;
208 struct kernfs_iattrs *attrs;
209
210 attrs = kernfs_iattrs(kn);
211 if (!attrs)
212 return -ENOMEM;
213
214 return simple_xattr_remove(&attrs->xattrs, name);
215}
216
217ssize_t kernfs_iop_getxattr(struct dentry *dentry, const char *name, void *buf,
218 size_t size)
219{
220 struct kernfs_node *kn = dentry->d_fsdata;
221 struct kernfs_iattrs *attrs;
222
223 attrs = kernfs_iattrs(kn);
224 if (!attrs)
225 return -ENOMEM;
226
227 return simple_xattr_get(&attrs->xattrs, name, buf, size);
228}
229
230ssize_t kernfs_iop_listxattr(struct dentry *dentry, char *buf, size_t size)
231{
232 struct kernfs_node *kn = dentry->d_fsdata;
233 struct kernfs_iattrs *attrs;
234
235 attrs = kernfs_iattrs(kn);
236 if (!attrs)
237 return -ENOMEM;
238
239 return simple_xattr_list(&attrs->xattrs, buf, size);
240}
241
242static inline void set_default_inode_attr(struct inode *inode, umode_t mode)
243{
244 inode->i_mode = mode;
245 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
246}
247
248static inline void set_inode_attr(struct inode *inode, struct iattr *iattr)
249{
250 inode->i_uid = iattr->ia_uid;
251 inode->i_gid = iattr->ia_gid;
252 inode->i_atime = iattr->ia_atime;
253 inode->i_mtime = iattr->ia_mtime;
254 inode->i_ctime = iattr->ia_ctime;
255}
256
257static void kernfs_refresh_inode(struct kernfs_node *kn, struct inode *inode)
258{
259 struct kernfs_iattrs *attrs = kn->iattr;
260
261 inode->i_mode = kn->mode;
262 if (attrs) {
263 /*
264 * kernfs_node has non-default attributes get them from
265 * persistent copy in kernfs_node.
266 */
267 set_inode_attr(inode, &attrs->ia_iattr);
268 security_inode_notifysecctx(inode, attrs->ia_secdata,
269 attrs->ia_secdata_len);
270 }
271
272 if (kernfs_type(kn) == KERNFS_DIR)
273 set_nlink(inode, kn->dir.subdirs + 2);
274}
275
276int kernfs_iop_getattr(struct vfsmount *mnt, struct dentry *dentry,
277 struct kstat *stat)
278{
279 struct kernfs_node *kn = dentry->d_fsdata;
280 struct inode *inode = dentry->d_inode;
281
282 mutex_lock(&kernfs_mutex);
283 kernfs_refresh_inode(kn, inode);
284 mutex_unlock(&kernfs_mutex);
285
286 generic_fillattr(inode, stat);
287 return 0;
288}
289
290static void kernfs_init_inode(struct kernfs_node *kn, struct inode *inode)
291{
292 kernfs_get(kn);
293 inode->i_private = kn;
294 inode->i_mapping->a_ops = &kernfs_aops;
295 inode->i_mapping->backing_dev_info = &kernfs_bdi;
296 inode->i_op = &kernfs_iops;
297
298 set_default_inode_attr(inode, kn->mode);
299 kernfs_refresh_inode(kn, inode);
300
301 /* initialize inode according to type */
302 switch (kernfs_type(kn)) {
303 case KERNFS_DIR:
304 inode->i_op = &kernfs_dir_iops;
305 inode->i_fop = &kernfs_dir_fops;
306 break;
307 case KERNFS_FILE:
308 inode->i_size = kn->attr.size;
309 inode->i_fop = &kernfs_file_fops;
310 break;
311 case KERNFS_LINK:
312 inode->i_op = &kernfs_symlink_iops;
313 break;
314 default:
315 BUG();
316 }
317
318 unlock_new_inode(inode);
319}
320
321/**
322 * kernfs_get_inode - get inode for kernfs_node
323 * @sb: super block
324 * @kn: kernfs_node to allocate inode for
325 *
326 * Get inode for @kn. If such inode doesn't exist, a new inode is
327 * allocated and basics are initialized. New inode is returned
328 * locked.
329 *
330 * LOCKING:
331 * Kernel thread context (may sleep).
332 *
333 * RETURNS:
334 * Pointer to allocated inode on success, NULL on failure.
335 */
336struct inode *kernfs_get_inode(struct super_block *sb, struct kernfs_node *kn)
337{
338 struct inode *inode;
339
340 inode = iget_locked(sb, kn->ino);
341 if (inode && (inode->i_state & I_NEW))
342 kernfs_init_inode(kn, inode);
343
344 return inode;
345}
346
347/*
348 * The kernfs_node serves as both an inode and a directory entry for
349 * kernfs. To prevent the kernfs inode numbers from being freed
350 * prematurely we take a reference to kernfs_node from the kernfs inode. A
351 * super_operations.evict_inode() implementation is needed to drop that
352 * reference upon inode destruction.
353 */
354void kernfs_evict_inode(struct inode *inode)
355{
356 struct kernfs_node *kn = inode->i_private;
357
358 truncate_inode_pages(&inode->i_data, 0);
359 clear_inode(inode);
360 kernfs_put(kn);
361}
362
363int kernfs_iop_permission(struct inode *inode, int mask)
364{
365 struct kernfs_node *kn;
366
367 if (mask & MAY_NOT_BLOCK)
368 return -ECHILD;
369
370 kn = inode->i_private;
371
372 mutex_lock(&kernfs_mutex);
373 kernfs_refresh_inode(kn, inode);
374 mutex_unlock(&kernfs_mutex);
375
376 return generic_permission(inode, mask);
377}
diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h
new file mode 100644
index 000000000000..eb536b76374a
--- /dev/null
+++ b/fs/kernfs/kernfs-internal.h
@@ -0,0 +1,122 @@
1/*
2 * fs/kernfs/kernfs-internal.h - kernfs internal header file
3 *
4 * Copyright (c) 2001-3 Patrick Mochel
5 * Copyright (c) 2007 SUSE Linux Products GmbH
6 * Copyright (c) 2007, 2013 Tejun Heo <teheo@suse.de>
7 *
8 * This file is released under the GPLv2.
9 */
10
11#ifndef __KERNFS_INTERNAL_H
12#define __KERNFS_INTERNAL_H
13
14#include <linux/lockdep.h>
15#include <linux/fs.h>
16#include <linux/mutex.h>
17#include <linux/xattr.h>
18
19#include <linux/kernfs.h>
20
21struct kernfs_iattrs {
22 struct iattr ia_iattr;
23 void *ia_secdata;
24 u32 ia_secdata_len;
25
26 struct simple_xattrs xattrs;
27};
28
29#define KN_DEACTIVATED_BIAS INT_MIN
30
31/* KERNFS_TYPE_MASK and types are defined in include/linux/kernfs.h */
32
33/**
34 * kernfs_root - find out the kernfs_root a kernfs_node belongs to
35 * @kn: kernfs_node of interest
36 *
37 * Return the kernfs_root @kn belongs to.
38 */
39static inline struct kernfs_root *kernfs_root(struct kernfs_node *kn)
40{
41 /* if parent exists, it's always a dir; otherwise, @sd is a dir */
42 if (kn->parent)
43 kn = kn->parent;
44 return kn->dir.root;
45}
46
47/*
48 * Context structure to be used while adding/removing nodes.
49 */
50struct kernfs_addrm_cxt {
51 struct kernfs_node *removed;
52};
53
54/*
55 * mount.c
56 */
57struct kernfs_super_info {
58 /*
59 * The root associated with this super_block. Each super_block is
60 * identified by the root and ns it's associated with.
61 */
62 struct kernfs_root *root;
63
64 /*
65 * Each sb is associated with one namespace tag, currently the
66 * network namespace of the task which mounted this kernfs
67 * instance. If multiple tags become necessary, make the following
68 * an array and compare kernfs_node tag against every entry.
69 */
70 const void *ns;
71};
72#define kernfs_info(SB) ((struct kernfs_super_info *)(SB->s_fs_info))
73
74extern struct kmem_cache *kernfs_node_cache;
75
76/*
77 * inode.c
78 */
79struct inode *kernfs_get_inode(struct super_block *sb, struct kernfs_node *kn);
80void kernfs_evict_inode(struct inode *inode);
81int kernfs_iop_permission(struct inode *inode, int mask);
82int kernfs_iop_setattr(struct dentry *dentry, struct iattr *iattr);
83int kernfs_iop_getattr(struct vfsmount *mnt, struct dentry *dentry,
84 struct kstat *stat);
85int kernfs_iop_setxattr(struct dentry *dentry, const char *name, const void *value,
86 size_t size, int flags);
87int kernfs_iop_removexattr(struct dentry *dentry, const char *name);
88ssize_t kernfs_iop_getxattr(struct dentry *dentry, const char *name, void *buf,
89 size_t size);
90ssize_t kernfs_iop_listxattr(struct dentry *dentry, char *buf, size_t size);
91void kernfs_inode_init(void);
92
93/*
94 * dir.c
95 */
96extern struct mutex kernfs_mutex;
97extern const struct dentry_operations kernfs_dops;
98extern const struct file_operations kernfs_dir_fops;
99extern const struct inode_operations kernfs_dir_iops;
100
101struct kernfs_node *kernfs_get_active(struct kernfs_node *kn);
102void kernfs_put_active(struct kernfs_node *kn);
103void kernfs_addrm_start(struct kernfs_addrm_cxt *acxt);
104int kernfs_add_one(struct kernfs_addrm_cxt *acxt, struct kernfs_node *kn);
105void kernfs_addrm_finish(struct kernfs_addrm_cxt *acxt);
106struct kernfs_node *kernfs_new_node(struct kernfs_node *parent,
107 const char *name, umode_t mode,
108 unsigned flags);
109
110/*
111 * file.c
112 */
113extern const struct file_operations kernfs_file_fops;
114
115void kernfs_unmap_bin_file(struct kernfs_node *kn);
116
117/*
118 * symlink.c
119 */
120extern const struct inode_operations kernfs_symlink_iops;
121
122#endif /* __KERNFS_INTERNAL_H */
diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c
new file mode 100644
index 000000000000..0d6ce895a9ee
--- /dev/null
+++ b/fs/kernfs/mount.c
@@ -0,0 +1,165 @@
1/*
2 * fs/kernfs/mount.c - kernfs mount implementation
3 *
4 * Copyright (c) 2001-3 Patrick Mochel
5 * Copyright (c) 2007 SUSE Linux Products GmbH
6 * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org>
7 *
8 * This file is released under the GPLv2.
9 */
10
11#include <linux/fs.h>
12#include <linux/mount.h>
13#include <linux/init.h>
14#include <linux/magic.h>
15#include <linux/slab.h>
16#include <linux/pagemap.h>
17
18#include "kernfs-internal.h"
19
20struct kmem_cache *kernfs_node_cache;
21
22static const struct super_operations kernfs_sops = {
23 .statfs = simple_statfs,
24 .drop_inode = generic_delete_inode,
25 .evict_inode = kernfs_evict_inode,
26};
27
28static int kernfs_fill_super(struct super_block *sb)
29{
30 struct kernfs_super_info *info = kernfs_info(sb);
31 struct inode *inode;
32 struct dentry *root;
33
34 sb->s_blocksize = PAGE_CACHE_SIZE;
35 sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
36 sb->s_magic = SYSFS_MAGIC;
37 sb->s_op = &kernfs_sops;
38 sb->s_time_gran = 1;
39
40 /* get root inode, initialize and unlock it */
41 mutex_lock(&kernfs_mutex);
42 inode = kernfs_get_inode(sb, info->root->kn);
43 mutex_unlock(&kernfs_mutex);
44 if (!inode) {
45 pr_debug("kernfs: could not get root inode\n");
46 return -ENOMEM;
47 }
48
49 /* instantiate and link root dentry */
50 root = d_make_root(inode);
51 if (!root) {
52 pr_debug("%s: could not get root dentry!\n", __func__);
53 return -ENOMEM;
54 }
55 kernfs_get(info->root->kn);
56 root->d_fsdata = info->root->kn;
57 sb->s_root = root;
58 sb->s_d_op = &kernfs_dops;
59 return 0;
60}
61
62static int kernfs_test_super(struct super_block *sb, void *data)
63{
64 struct kernfs_super_info *sb_info = kernfs_info(sb);
65 struct kernfs_super_info *info = data;
66
67 return sb_info->root == info->root && sb_info->ns == info->ns;
68}
69
70static int kernfs_set_super(struct super_block *sb, void *data)
71{
72 int error;
73 error = set_anon_super(sb, data);
74 if (!error)
75 sb->s_fs_info = data;
76 return error;
77}
78
79/**
80 * kernfs_super_ns - determine the namespace tag of a kernfs super_block
81 * @sb: super_block of interest
82 *
83 * Return the namespace tag associated with kernfs super_block @sb.
84 */
85const void *kernfs_super_ns(struct super_block *sb)
86{
87 struct kernfs_super_info *info = kernfs_info(sb);
88
89 return info->ns;
90}
91
92/**
93 * kernfs_mount_ns - kernfs mount helper
94 * @fs_type: file_system_type of the fs being mounted
95 * @flags: mount flags specified for the mount
96 * @root: kernfs_root of the hierarchy being mounted
97 * @ns: optional namespace tag of the mount
98 *
99 * This is to be called from each kernfs user's file_system_type->mount()
100 * implementation, which should pass through the specified @fs_type and
101 * @flags, and specify the hierarchy and namespace tag to mount via @root
102 * and @ns, respectively.
103 *
104 * The return value can be passed to the vfs layer verbatim.
105 */
106struct dentry *kernfs_mount_ns(struct file_system_type *fs_type, int flags,
107 struct kernfs_root *root, const void *ns)
108{
109 struct super_block *sb;
110 struct kernfs_super_info *info;
111 int error;
112
113 info = kzalloc(sizeof(*info), GFP_KERNEL);
114 if (!info)
115 return ERR_PTR(-ENOMEM);
116
117 info->root = root;
118 info->ns = ns;
119
120 sb = sget(fs_type, kernfs_test_super, kernfs_set_super, flags, info);
121 if (IS_ERR(sb) || sb->s_fs_info != info)
122 kfree(info);
123 if (IS_ERR(sb))
124 return ERR_CAST(sb);
125 if (!sb->s_root) {
126 error = kernfs_fill_super(sb);
127 if (error) {
128 deactivate_locked_super(sb);
129 return ERR_PTR(error);
130 }
131 sb->s_flags |= MS_ACTIVE;
132 }
133
134 return dget(sb->s_root);
135}
136
137/**
138 * kernfs_kill_sb - kill_sb for kernfs
139 * @sb: super_block being killed
140 *
141 * This can be used directly for file_system_type->kill_sb(). If a kernfs
142 * user needs extra cleanup, it can implement its own kill_sb() and call
143 * this function at the end.
144 */
145void kernfs_kill_sb(struct super_block *sb)
146{
147 struct kernfs_super_info *info = kernfs_info(sb);
148 struct kernfs_node *root_kn = sb->s_root->d_fsdata;
149
150 /*
151 * Remove the superblock from fs_supers/s_instances
152 * so we can't find it, before freeing kernfs_super_info.
153 */
154 kill_anon_super(sb);
155 kfree(info);
156 kernfs_put(root_kn);
157}
158
159void __init kernfs_init(void)
160{
161 kernfs_node_cache = kmem_cache_create("kernfs_node_cache",
162 sizeof(struct kernfs_node),
163 0, SLAB_PANIC, NULL);
164 kernfs_inode_init();
165}
diff --git a/fs/kernfs/symlink.c b/fs/kernfs/symlink.c
new file mode 100644
index 000000000000..4d457055acb9
--- /dev/null
+++ b/fs/kernfs/symlink.c
@@ -0,0 +1,151 @@
1/*
2 * fs/kernfs/symlink.c - kernfs symlink implementation
3 *
4 * Copyright (c) 2001-3 Patrick Mochel
5 * Copyright (c) 2007 SUSE Linux Products GmbH
6 * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org>
7 *
8 * This file is released under the GPLv2.
9 */
10
11#include <linux/fs.h>
12#include <linux/gfp.h>
13#include <linux/namei.h>
14
15#include "kernfs-internal.h"
16
17/**
18 * kernfs_create_link - create a symlink
19 * @parent: directory to create the symlink in
20 * @name: name of the symlink
21 * @target: target node for the symlink to point to
22 *
23 * Returns the created node on success, ERR_PTR() value on error.
24 */
25struct kernfs_node *kernfs_create_link(struct kernfs_node *parent,
26 const char *name,
27 struct kernfs_node *target)
28{
29 struct kernfs_node *kn;
30 struct kernfs_addrm_cxt acxt;
31 int error;
32
33 kn = kernfs_new_node(parent, name, S_IFLNK|S_IRWXUGO, KERNFS_LINK);
34 if (!kn)
35 return ERR_PTR(-ENOMEM);
36
37 if (kernfs_ns_enabled(parent))
38 kn->ns = target->ns;
39 kn->symlink.target_kn = target;
40 kernfs_get(target); /* ref owned by symlink */
41
42 kernfs_addrm_start(&acxt);
43 error = kernfs_add_one(&acxt, kn);
44 kernfs_addrm_finish(&acxt);
45
46 if (!error)
47 return kn;
48
49 kernfs_put(kn);
50 return ERR_PTR(error);
51}
52
53static int kernfs_get_target_path(struct kernfs_node *parent,
54 struct kernfs_node *target, char *path)
55{
56 struct kernfs_node *base, *kn;
57 char *s = path;
58 int len = 0;
59
60 /* go up to the root, stop at the base */
61 base = parent;
62 while (base->parent) {
63 kn = target->parent;
64 while (kn->parent && base != kn)
65 kn = kn->parent;
66
67 if (base == kn)
68 break;
69
70 strcpy(s, "../");
71 s += 3;
72 base = base->parent;
73 }
74
75 /* determine end of target string for reverse fillup */
76 kn = target;
77 while (kn->parent && kn != base) {
78 len += strlen(kn->name) + 1;
79 kn = kn->parent;
80 }
81
82 /* check limits */
83 if (len < 2)
84 return -EINVAL;
85 len--;
86 if ((s - path) + len > PATH_MAX)
87 return -ENAMETOOLONG;
88
89 /* reverse fillup of target string from target to base */
90 kn = target;
91 while (kn->parent && kn != base) {
92 int slen = strlen(kn->name);
93
94 len -= slen;
95 strncpy(s + len, kn->name, slen);
96 if (len)
97 s[--len] = '/';
98
99 kn = kn->parent;
100 }
101
102 return 0;
103}
104
105static int kernfs_getlink(struct dentry *dentry, char *path)
106{
107 struct kernfs_node *kn = dentry->d_fsdata;
108 struct kernfs_node *parent = kn->parent;
109 struct kernfs_node *target = kn->symlink.target_kn;
110 int error;
111
112 mutex_lock(&kernfs_mutex);
113 error = kernfs_get_target_path(parent, target, path);
114 mutex_unlock(&kernfs_mutex);
115
116 return error;
117}
118
119static void *kernfs_iop_follow_link(struct dentry *dentry, struct nameidata *nd)
120{
121 int error = -ENOMEM;
122 unsigned long page = get_zeroed_page(GFP_KERNEL);
123 if (page) {
124 error = kernfs_getlink(dentry, (char *) page);
125 if (error < 0)
126 free_page((unsigned long)page);
127 }
128 nd_set_link(nd, error ? ERR_PTR(error) : (char *)page);
129 return NULL;
130}
131
132static void kernfs_iop_put_link(struct dentry *dentry, struct nameidata *nd,
133 void *cookie)
134{
135 char *page = nd_get_link(nd);
136 if (!IS_ERR(page))
137 free_page((unsigned long)page);
138}
139
140const struct inode_operations kernfs_symlink_iops = {
141 .setxattr = kernfs_iop_setxattr,
142 .removexattr = kernfs_iop_removexattr,
143 .getxattr = kernfs_iop_getxattr,
144 .listxattr = kernfs_iop_listxattr,
145 .readlink = generic_readlink,
146 .follow_link = kernfs_iop_follow_link,
147 .put_link = kernfs_iop_put_link,
148 .setattr = kernfs_iop_setattr,
149 .getattr = kernfs_iop_getattr,
150 .permission = kernfs_iop_permission,
151};
diff --git a/fs/namespace.c b/fs/namespace.c
index be32ebccdeb1..22e536705c45 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2790,6 +2790,8 @@ void __init mnt_init(void)
2790 for (u = 0; u < HASH_SIZE; u++) 2790 for (u = 0; u < HASH_SIZE; u++)
2791 INIT_LIST_HEAD(&mountpoint_hashtable[u]); 2791 INIT_LIST_HEAD(&mountpoint_hashtable[u]);
2792 2792
2793 kernfs_init();
2794
2793 err = sysfs_init(); 2795 err = sysfs_init();
2794 if (err) 2796 if (err)
2795 printk(KERN_WARNING "%s: sysfs_init error: %d\n", 2797 printk(KERN_WARNING "%s: sysfs_init error: %d\n",
diff --git a/fs/sysfs/Makefile b/fs/sysfs/Makefile
index 8876ac183373..6eff6e1205a5 100644
--- a/fs/sysfs/Makefile
+++ b/fs/sysfs/Makefile
@@ -2,4 +2,4 @@
2# Makefile for the sysfs virtual filesystem 2# Makefile for the sysfs virtual filesystem
3# 3#
4 4
5obj-y := inode.o file.o dir.o symlink.o mount.o group.o 5obj-y := file.o dir.o symlink.o mount.o group.o
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 5e73d6626e50..ee0d761c3179 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -13,465 +13,31 @@
13#undef DEBUG 13#undef DEBUG
14 14
15#include <linux/fs.h> 15#include <linux/fs.h>
16#include <linux/mount.h>
17#include <linux/module.h>
18#include <linux/kobject.h> 16#include <linux/kobject.h>
19#include <linux/namei.h>
20#include <linux/idr.h>
21#include <linux/completion.h>
22#include <linux/mutex.h>
23#include <linux/slab.h> 17#include <linux/slab.h>
24#include <linux/security.h>
25#include <linux/hash.h>
26#include "sysfs.h" 18#include "sysfs.h"
27 19
28DEFINE_MUTEX(sysfs_mutex);
29DEFINE_SPINLOCK(sysfs_symlink_target_lock); 20DEFINE_SPINLOCK(sysfs_symlink_target_lock);
30 21
31#define to_sysfs_dirent(X) rb_entry((X), struct sysfs_dirent, s_rb)
32
33static DEFINE_SPINLOCK(sysfs_ino_lock);
34static DEFINE_IDA(sysfs_ino_ida);
35
36/**
37 * sysfs_name_hash
38 * @name: Null terminated string to hash
39 * @ns: Namespace tag to hash
40 *
41 * Returns 31 bit hash of ns + name (so it fits in an off_t )
42 */
43static unsigned int sysfs_name_hash(const char *name, const void *ns)
44{
45 unsigned long hash = init_name_hash();
46 unsigned int len = strlen(name);
47 while (len--)
48 hash = partial_name_hash(*name++, hash);
49 hash = (end_name_hash(hash) ^ hash_ptr((void *)ns, 31));
50 hash &= 0x7fffffffU;
51 /* Reserve hash numbers 0, 1 and INT_MAX for magic directory entries */
52 if (hash < 1)
53 hash += 2;
54 if (hash >= INT_MAX)
55 hash = INT_MAX - 1;
56 return hash;
57}
58
59static int sysfs_name_compare(unsigned int hash, const char *name,
60 const void *ns, const struct sysfs_dirent *sd)
61{
62 if (hash != sd->s_hash)
63 return hash - sd->s_hash;
64 if (ns != sd->s_ns)
65 return ns - sd->s_ns;
66 return strcmp(name, sd->s_name);
67}
68
69static int sysfs_sd_compare(const struct sysfs_dirent *left,
70 const struct sysfs_dirent *right)
71{
72 return sysfs_name_compare(left->s_hash, left->s_name, left->s_ns,
73 right);
74}
75
76/**
77 * sysfs_link_sibling - link sysfs_dirent into sibling rbtree
78 * @sd: sysfs_dirent of interest
79 *
80 * Link @sd into its sibling rbtree which starts from
81 * sd->s_parent->s_dir.children.
82 *
83 * Locking:
84 * mutex_lock(sysfs_mutex)
85 *
86 * RETURNS:
87 * 0 on susccess -EEXIST on failure.
88 */
89static int sysfs_link_sibling(struct sysfs_dirent *sd)
90{
91 struct rb_node **node = &sd->s_parent->s_dir.children.rb_node;
92 struct rb_node *parent = NULL;
93
94 if (sysfs_type(sd) == SYSFS_DIR)
95 sd->s_parent->s_dir.subdirs++;
96
97 while (*node) {
98 struct sysfs_dirent *pos;
99 int result;
100
101 pos = to_sysfs_dirent(*node);
102 parent = *node;
103 result = sysfs_sd_compare(sd, pos);
104 if (result < 0)
105 node = &pos->s_rb.rb_left;
106 else if (result > 0)
107 node = &pos->s_rb.rb_right;
108 else
109 return -EEXIST;
110 }
111 /* add new node and rebalance the tree */
112 rb_link_node(&sd->s_rb, parent, node);
113 rb_insert_color(&sd->s_rb, &sd->s_parent->s_dir.children);
114 return 0;
115}
116
117/**
118 * sysfs_unlink_sibling - unlink sysfs_dirent from sibling rbtree
119 * @sd: sysfs_dirent of interest
120 *
121 * Unlink @sd from its sibling rbtree which starts from
122 * sd->s_parent->s_dir.children.
123 *
124 * Locking:
125 * mutex_lock(sysfs_mutex)
126 */
127static void sysfs_unlink_sibling(struct sysfs_dirent *sd)
128{
129 if (sysfs_type(sd) == SYSFS_DIR)
130 sd->s_parent->s_dir.subdirs--;
131
132 rb_erase(&sd->s_rb, &sd->s_parent->s_dir.children);
133}
134
135/**
136 * sysfs_get_active - get an active reference to sysfs_dirent
137 * @sd: sysfs_dirent to get an active reference to
138 *
139 * Get an active reference of @sd. This function is noop if @sd
140 * is NULL.
141 *
142 * RETURNS:
143 * Pointer to @sd on success, NULL on failure.
144 */
145struct sysfs_dirent *sysfs_get_active(struct sysfs_dirent *sd)
146{
147 if (unlikely(!sd))
148 return NULL;
149
150 if (!atomic_inc_unless_negative(&sd->s_active))
151 return NULL;
152
153 if (likely(!sysfs_ignore_lockdep(sd)))
154 rwsem_acquire_read(&sd->dep_map, 0, 1, _RET_IP_);
155 return sd;
156}
157
158/**
159 * sysfs_put_active - put an active reference to sysfs_dirent
160 * @sd: sysfs_dirent to put an active reference to
161 *
162 * Put an active reference to @sd. This function is noop if @sd
163 * is NULL.
164 */
165void sysfs_put_active(struct sysfs_dirent *sd)
166{
167 int v;
168
169 if (unlikely(!sd))
170 return;
171
172 if (likely(!sysfs_ignore_lockdep(sd)))
173 rwsem_release(&sd->dep_map, 1, _RET_IP_);
174 v = atomic_dec_return(&sd->s_active);
175 if (likely(v != SD_DEACTIVATED_BIAS))
176 return;
177
178 /* atomic_dec_return() is a mb(), we'll always see the updated
179 * sd->u.completion.
180 */
181 complete(sd->u.completion);
182}
183
184/**
185 * sysfs_deactivate - deactivate sysfs_dirent
186 * @sd: sysfs_dirent to deactivate
187 *
188 * Deny new active references and drain existing ones.
189 */
190static void sysfs_deactivate(struct sysfs_dirent *sd)
191{
192 DECLARE_COMPLETION_ONSTACK(wait);
193 int v;
194
195 BUG_ON(!(sd->s_flags & SYSFS_FLAG_REMOVED));
196
197 if (!(sysfs_type(sd) & SYSFS_ACTIVE_REF))
198 return;
199
200 sd->u.completion = (void *)&wait;
201
202 rwsem_acquire(&sd->dep_map, 0, 0, _RET_IP_);
203 /* atomic_add_return() is a mb(), put_active() will always see
204 * the updated sd->u.completion.
205 */
206 v = atomic_add_return(SD_DEACTIVATED_BIAS, &sd->s_active);
207
208 if (v != SD_DEACTIVATED_BIAS) {
209 lock_contended(&sd->dep_map, _RET_IP_);
210 wait_for_completion(&wait);
211 }
212
213 lock_acquired(&sd->dep_map, _RET_IP_);
214 rwsem_release(&sd->dep_map, 1, _RET_IP_);
215}
216
217static int sysfs_alloc_ino(unsigned int *pino)
218{
219 int ino, rc;
220
221 retry:
222 spin_lock(&sysfs_ino_lock);
223 rc = ida_get_new_above(&sysfs_ino_ida, 2, &ino);
224 spin_unlock(&sysfs_ino_lock);
225
226 if (rc == -EAGAIN) {
227 if (ida_pre_get(&sysfs_ino_ida, GFP_KERNEL))
228 goto retry;
229 rc = -ENOMEM;
230 }
231
232 *pino = ino;
233 return rc;
234}
235
236static void sysfs_free_ino(unsigned int ino)
237{
238 spin_lock(&sysfs_ino_lock);
239 ida_remove(&sysfs_ino_ida, ino);
240 spin_unlock(&sysfs_ino_lock);
241}
242
243void release_sysfs_dirent(struct sysfs_dirent *sd)
244{
245 struct sysfs_dirent *parent_sd;
246
247 repeat:
248 /* Moving/renaming is always done while holding reference.
249 * sd->s_parent won't change beneath us.
250 */
251 parent_sd = sd->s_parent;
252
253 WARN(!(sd->s_flags & SYSFS_FLAG_REMOVED),
254 "sysfs: free using entry: %s/%s\n",
255 parent_sd ? parent_sd->s_name : "", sd->s_name);
256
257 if (sysfs_type(sd) == SYSFS_KOBJ_LINK)
258 sysfs_put(sd->s_symlink.target_sd);
259 if (sysfs_type(sd) & SYSFS_COPY_NAME)
260 kfree(sd->s_name);
261 if (sd->s_iattr && sd->s_iattr->ia_secdata)
262 security_release_secctx(sd->s_iattr->ia_secdata,
263 sd->s_iattr->ia_secdata_len);
264 kfree(sd->s_iattr);
265 sysfs_free_ino(sd->s_ino);
266 kmem_cache_free(sysfs_dir_cachep, sd);
267
268 sd = parent_sd;
269 if (sd && atomic_dec_and_test(&sd->s_count))
270 goto repeat;
271}
272
273static int sysfs_dentry_delete(const struct dentry *dentry)
274{
275 struct sysfs_dirent *sd = dentry->d_fsdata;
276 return !(sd && !(sd->s_flags & SYSFS_FLAG_REMOVED));
277}
278
279static int sysfs_dentry_revalidate(struct dentry *dentry, unsigned int flags)
280{
281 struct sysfs_dirent *sd;
282 int type;
283
284 if (flags & LOOKUP_RCU)
285 return -ECHILD;
286
287 sd = dentry->d_fsdata;
288 mutex_lock(&sysfs_mutex);
289
290 /* The sysfs dirent has been deleted */
291 if (sd->s_flags & SYSFS_FLAG_REMOVED)
292 goto out_bad;
293
294 /* The sysfs dirent has been moved? */
295 if (dentry->d_parent->d_fsdata != sd->s_parent)
296 goto out_bad;
297
298 /* The sysfs dirent has been renamed */
299 if (strcmp(dentry->d_name.name, sd->s_name) != 0)
300 goto out_bad;
301
302 /* The sysfs dirent has been moved to a different namespace */
303 type = KOBJ_NS_TYPE_NONE;
304 if (sd->s_parent) {
305 type = sysfs_ns_type(sd->s_parent);
306 if (type != KOBJ_NS_TYPE_NONE &&
307 sysfs_info(dentry->d_sb)->ns[type] != sd->s_ns)
308 goto out_bad;
309 }
310
311 mutex_unlock(&sysfs_mutex);
312out_valid:
313 return 1;
314out_bad:
315 /* Remove the dentry from the dcache hashes.
316 * If this is a deleted dentry we use d_drop instead of d_delete
317 * so sysfs doesn't need to cope with negative dentries.
318 *
319 * If this is a dentry that has simply been renamed we
320 * use d_drop to remove it from the dcache lookup on its
321 * old parent. If this dentry persists later when a lookup
322 * is performed at its new name the dentry will be readded
323 * to the dcache hashes.
324 */
325 mutex_unlock(&sysfs_mutex);
326
327 /* If we have submounts we must allow the vfs caches
328 * to lie about the state of the filesystem to prevent
329 * leaks and other nasty things.
330 */
331 if (check_submounts_and_drop(dentry) != 0)
332 goto out_valid;
333
334 return 0;
335}
336
337static void sysfs_dentry_release(struct dentry *dentry)
338{
339 sysfs_put(dentry->d_fsdata);
340}
341
342const struct dentry_operations sysfs_dentry_ops = {
343 .d_revalidate = sysfs_dentry_revalidate,
344 .d_delete = sysfs_dentry_delete,
345 .d_release = sysfs_dentry_release,
346};
347
348struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type)
349{
350 char *dup_name = NULL;
351 struct sysfs_dirent *sd;
352
353 if (type & SYSFS_COPY_NAME) {
354 name = dup_name = kstrdup(name, GFP_KERNEL);
355 if (!name)
356 return NULL;
357 }
358
359 sd = kmem_cache_zalloc(sysfs_dir_cachep, GFP_KERNEL);
360 if (!sd)
361 goto err_out1;
362
363 if (sysfs_alloc_ino(&sd->s_ino))
364 goto err_out2;
365
366 atomic_set(&sd->s_count, 1);
367 atomic_set(&sd->s_active, 0);
368
369 sd->s_name = name;
370 sd->s_mode = mode;
371 sd->s_flags = type | SYSFS_FLAG_REMOVED;
372
373 return sd;
374
375 err_out2:
376 kmem_cache_free(sysfs_dir_cachep, sd);
377 err_out1:
378 kfree(dup_name);
379 return NULL;
380}
381
382/**
383 * sysfs_addrm_start - prepare for sysfs_dirent add/remove
384 * @acxt: pointer to sysfs_addrm_cxt to be used
385 *
386 * This function is called when the caller is about to add or remove
387 * sysfs_dirent. This function acquires sysfs_mutex. @acxt is used
388 * to keep and pass context to other addrm functions.
389 *
390 * LOCKING:
391 * Kernel thread context (may sleep). sysfs_mutex is locked on
392 * return.
393 */
394void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt)
395 __acquires(sysfs_mutex)
396{
397 memset(acxt, 0, sizeof(*acxt));
398
399 mutex_lock(&sysfs_mutex);
400}
401
402/**
403 * __sysfs_add_one - add sysfs_dirent to parent without warning
404 * @acxt: addrm context to use
405 * @sd: sysfs_dirent to be added
406 * @parent_sd: the parent sysfs_dirent to add @sd to
407 *
408 * Get @parent_sd and set @sd->s_parent to it and increment nlink of
409 * the parent inode if @sd is a directory and link into the children
410 * list of the parent.
411 *
412 * This function should be called between calls to
413 * sysfs_addrm_start() and sysfs_addrm_finish() and should be
414 * passed the same @acxt as passed to sysfs_addrm_start().
415 *
416 * LOCKING:
417 * Determined by sysfs_addrm_start().
418 *
419 * RETURNS:
420 * 0 on success, -EEXIST if entry with the given name already
421 * exists.
422 */
423int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd,
424 struct sysfs_dirent *parent_sd)
425{
426 struct sysfs_inode_attrs *ps_iattr;
427 int ret;
428
429 if (!!sysfs_ns_type(parent_sd) != !!sd->s_ns) {
430 WARN(1, KERN_WARNING "sysfs: ns %s in '%s' for '%s'\n",
431 sysfs_ns_type(parent_sd) ? "required" : "invalid",
432 parent_sd->s_name, sd->s_name);
433 return -EINVAL;
434 }
435
436 sd->s_hash = sysfs_name_hash(sd->s_name, sd->s_ns);
437 sd->s_parent = sysfs_get(parent_sd);
438
439 ret = sysfs_link_sibling(sd);
440 if (ret)
441 return ret;
442
443 /* Update timestamps on the parent */
444 ps_iattr = parent_sd->s_iattr;
445 if (ps_iattr) {
446 struct iattr *ps_iattrs = &ps_iattr->ia_iattr;
447 ps_iattrs->ia_ctime = ps_iattrs->ia_mtime = CURRENT_TIME;
448 }
449
450 /* Mark the entry added into directory tree */
451 sd->s_flags &= ~SYSFS_FLAG_REMOVED;
452
453 return 0;
454}
455
456/** 22/**
457 * sysfs_pathname - return full path to sysfs dirent 23 * sysfs_pathname - return full path to sysfs dirent
458 * @sd: sysfs_dirent whose path we want 24 * @kn: kernfs_node whose path we want
459 * @path: caller allocated buffer of size PATH_MAX 25 * @path: caller allocated buffer of size PATH_MAX
460 * 26 *
461 * Gives the name "/" to the sysfs_root entry; any path returned 27 * Gives the name "/" to the sysfs_root entry; any path returned
462 * is relative to wherever sysfs is mounted. 28 * is relative to wherever sysfs is mounted.
463 */ 29 */
464static char *sysfs_pathname(struct sysfs_dirent *sd, char *path) 30static char *sysfs_pathname(struct kernfs_node *kn, char *path)
465{ 31{
466 if (sd->s_parent) { 32 if (kn->parent) {
467 sysfs_pathname(sd->s_parent, path); 33 sysfs_pathname(kn->parent, path);
468 strlcat(path, "/", PATH_MAX); 34 strlcat(path, "/", PATH_MAX);
469 } 35 }
470 strlcat(path, sd->s_name, PATH_MAX); 36 strlcat(path, kn->name, PATH_MAX);
471 return path; 37 return path;
472} 38}
473 39
474void sysfs_warn_dup(struct sysfs_dirent *parent, const char *name) 40void sysfs_warn_dup(struct kernfs_node *parent, const char *name)
475{ 41{
476 char *path; 42 char *path;
477 43
@@ -489,445 +55,34 @@ void sysfs_warn_dup(struct sysfs_dirent *parent, const char *name)
489} 55}
490 56
491/** 57/**
492 * sysfs_add_one - add sysfs_dirent to parent
493 * @acxt: addrm context to use
494 * @sd: sysfs_dirent to be added
495 * @parent_sd: the parent sysfs_dirent to add @sd to
496 *
497 * Get @parent_sd and set @sd->s_parent to it and increment nlink of
498 * the parent inode if @sd is a directory and link into the children
499 * list of the parent.
500 *
501 * This function should be called between calls to
502 * sysfs_addrm_start() and sysfs_addrm_finish() and should be
503 * passed the same @acxt as passed to sysfs_addrm_start().
504 *
505 * LOCKING:
506 * Determined by sysfs_addrm_start().
507 *
508 * RETURNS:
509 * 0 on success, -EEXIST if entry with the given name already
510 * exists.
511 */
512int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd,
513 struct sysfs_dirent *parent_sd)
514{
515 int ret;
516
517 ret = __sysfs_add_one(acxt, sd, parent_sd);
518
519 if (ret == -EEXIST)
520 sysfs_warn_dup(parent_sd, sd->s_name);
521 return ret;
522}
523
524/**
525 * sysfs_remove_one - remove sysfs_dirent from parent
526 * @acxt: addrm context to use
527 * @sd: sysfs_dirent to be removed
528 *
529 * Mark @sd removed and drop nlink of parent inode if @sd is a
530 * directory. @sd is unlinked from the children list.
531 *
532 * This function should be called between calls to
533 * sysfs_addrm_start() and sysfs_addrm_finish() and should be
534 * passed the same @acxt as passed to sysfs_addrm_start().
535 *
536 * LOCKING:
537 * Determined by sysfs_addrm_start().
538 */
539static void sysfs_remove_one(struct sysfs_addrm_cxt *acxt,
540 struct sysfs_dirent *sd)
541{
542 struct sysfs_inode_attrs *ps_iattr;
543
544 /*
545 * Removal can be called multiple times on the same node. Only the
546 * first invocation is effective and puts the base ref.
547 */
548 if (sd->s_flags & SYSFS_FLAG_REMOVED)
549 return;
550
551 sysfs_unlink_sibling(sd);
552
553 /* Update timestamps on the parent */
554 ps_iattr = sd->s_parent->s_iattr;
555 if (ps_iattr) {
556 struct iattr *ps_iattrs = &ps_iattr->ia_iattr;
557 ps_iattrs->ia_ctime = ps_iattrs->ia_mtime = CURRENT_TIME;
558 }
559
560 sd->s_flags |= SYSFS_FLAG_REMOVED;
561 sd->u.removed_list = acxt->removed;
562 acxt->removed = sd;
563}
564
565/**
566 * sysfs_addrm_finish - finish up sysfs_dirent add/remove
567 * @acxt: addrm context to finish up
568 *
569 * Finish up sysfs_dirent add/remove. Resources acquired by
570 * sysfs_addrm_start() are released and removed sysfs_dirents are
571 * cleaned up.
572 *
573 * LOCKING:
574 * sysfs_mutex is released.
575 */
576void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt)
577 __releases(sysfs_mutex)
578{
579 /* release resources acquired by sysfs_addrm_start() */
580 mutex_unlock(&sysfs_mutex);
581
582 /* kill removed sysfs_dirents */
583 while (acxt->removed) {
584 struct sysfs_dirent *sd = acxt->removed;
585
586 acxt->removed = sd->u.removed_list;
587
588 sysfs_deactivate(sd);
589 sysfs_unmap_bin_file(sd);
590 sysfs_put(sd);
591 }
592}
593
594/**
595 * sysfs_find_dirent - find sysfs_dirent with the given name
596 * @parent_sd: sysfs_dirent to search under
597 * @name: name to look for
598 * @ns: the namespace tag to use
599 *
600 * Look for sysfs_dirent with name @name under @parent_sd.
601 *
602 * LOCKING:
603 * mutex_lock(sysfs_mutex)
604 *
605 * RETURNS:
606 * Pointer to sysfs_dirent if found, NULL if not.
607 */
608struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
609 const unsigned char *name,
610 const void *ns)
611{
612 struct rb_node *node = parent_sd->s_dir.children.rb_node;
613 unsigned int hash;
614
615 if (!!sysfs_ns_type(parent_sd) != !!ns) {
616 WARN(1, KERN_WARNING "sysfs: ns %s in '%s' for '%s'\n",
617 sysfs_ns_type(parent_sd) ? "required" : "invalid",
618 parent_sd->s_name, name);
619 return NULL;
620 }
621
622 hash = sysfs_name_hash(name, ns);
623 while (node) {
624 struct sysfs_dirent *sd;
625 int result;
626
627 sd = to_sysfs_dirent(node);
628 result = sysfs_name_compare(hash, name, ns, sd);
629 if (result < 0)
630 node = node->rb_left;
631 else if (result > 0)
632 node = node->rb_right;
633 else
634 return sd;
635 }
636 return NULL;
637}
638
639/**
640 * sysfs_get_dirent_ns - find and get sysfs_dirent with the given name
641 * @parent_sd: sysfs_dirent to search under
642 * @name: name to look for
643 * @ns: the namespace tag to use
644 *
645 * Look for sysfs_dirent with name @name under @parent_sd and get
646 * it if found.
647 *
648 * LOCKING:
649 * Kernel thread context (may sleep). Grabs sysfs_mutex.
650 *
651 * RETURNS:
652 * Pointer to sysfs_dirent if found, NULL if not.
653 */
654struct sysfs_dirent *sysfs_get_dirent_ns(struct sysfs_dirent *parent_sd,
655 const unsigned char *name,
656 const void *ns)
657{
658 struct sysfs_dirent *sd;
659
660 mutex_lock(&sysfs_mutex);
661 sd = sysfs_find_dirent(parent_sd, name, ns);
662 sysfs_get(sd);
663 mutex_unlock(&sysfs_mutex);
664
665 return sd;
666}
667EXPORT_SYMBOL_GPL(sysfs_get_dirent_ns);
668
669static int create_dir(struct kobject *kobj, struct sysfs_dirent *parent_sd,
670 enum kobj_ns_type type,
671 const char *name, const void *ns,
672 struct sysfs_dirent **p_sd)
673{
674 umode_t mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO;
675 struct sysfs_addrm_cxt acxt;
676 struct sysfs_dirent *sd;
677 int rc;
678
679 /* allocate */
680 sd = sysfs_new_dirent(name, mode, SYSFS_DIR);
681 if (!sd)
682 return -ENOMEM;
683
684 sd->s_flags |= (type << SYSFS_NS_TYPE_SHIFT);
685 sd->s_ns = ns;
686 sd->s_dir.kobj = kobj;
687
688 /* link in */
689 sysfs_addrm_start(&acxt);
690 rc = sysfs_add_one(&acxt, sd, parent_sd);
691 sysfs_addrm_finish(&acxt);
692
693 if (rc == 0)
694 *p_sd = sd;
695 else
696 sysfs_put(sd);
697
698 return rc;
699}
700
701int sysfs_create_subdir(struct kobject *kobj, const char *name,
702 struct sysfs_dirent **p_sd)
703{
704 return create_dir(kobj, kobj->sd,
705 KOBJ_NS_TYPE_NONE, name, NULL, p_sd);
706}
707
708/**
709 * sysfs_read_ns_type: return associated ns_type
710 * @kobj: the kobject being queried
711 *
712 * Each kobject can be tagged with exactly one namespace type
713 * (i.e. network or user). Return the ns_type associated with
714 * this object if any
715 */
716static enum kobj_ns_type sysfs_read_ns_type(struct kobject *kobj)
717{
718 const struct kobj_ns_type_operations *ops;
719 enum kobj_ns_type type;
720
721 ops = kobj_child_ns_ops(kobj);
722 if (!ops)
723 return KOBJ_NS_TYPE_NONE;
724
725 type = ops->type;
726 BUG_ON(type <= KOBJ_NS_TYPE_NONE);
727 BUG_ON(type >= KOBJ_NS_TYPES);
728 BUG_ON(!kobj_ns_type_registered(type));
729
730 return type;
731}
732
733/**
734 * sysfs_create_dir_ns - create a directory for an object with a namespace tag 58 * sysfs_create_dir_ns - create a directory for an object with a namespace tag
735 * @kobj: object we're creating directory for 59 * @kobj: object we're creating directory for
736 * @ns: the namespace tag to use 60 * @ns: the namespace tag to use
737 */ 61 */
738int sysfs_create_dir_ns(struct kobject *kobj, const void *ns) 62int sysfs_create_dir_ns(struct kobject *kobj, const void *ns)
739{ 63{
740 enum kobj_ns_type type; 64 struct kernfs_node *parent, *kn;
741 struct sysfs_dirent *parent_sd, *sd;
742 int error = 0;
743 65
744 BUG_ON(!kobj); 66 BUG_ON(!kobj);
745 67
746 if (kobj->parent) 68 if (kobj->parent)
747 parent_sd = kobj->parent->sd; 69 parent = kobj->parent->sd;
748 else 70 else
749 parent_sd = &sysfs_root; 71 parent = sysfs_root_kn;
750 72
751 if (!parent_sd) 73 if (!parent)
752 return -ENOENT; 74 return -ENOENT;
753 75
754 type = sysfs_read_ns_type(kobj); 76 kn = kernfs_create_dir_ns(parent, kobject_name(kobj),
755 77 S_IRWXU | S_IRUGO | S_IXUGO, kobj, ns);
756 error = create_dir(kobj, parent_sd, type, kobject_name(kobj), ns, &sd); 78 if (IS_ERR(kn)) {
757 if (!error) 79 if (PTR_ERR(kn) == -EEXIST)
758 kobj->sd = sd; 80 sysfs_warn_dup(parent, kobject_name(kobj));
759 return error; 81 return PTR_ERR(kn);
760}
761
762static struct dentry *sysfs_lookup(struct inode *dir, struct dentry *dentry,
763 unsigned int flags)
764{
765 struct dentry *ret = NULL;
766 struct dentry *parent = dentry->d_parent;
767 struct sysfs_dirent *parent_sd = parent->d_fsdata;
768 struct sysfs_dirent *sd;
769 struct inode *inode;
770 enum kobj_ns_type type;
771 const void *ns;
772
773 mutex_lock(&sysfs_mutex);
774
775 type = sysfs_ns_type(parent_sd);
776 ns = sysfs_info(dir->i_sb)->ns[type];
777
778 sd = sysfs_find_dirent(parent_sd, dentry->d_name.name, ns);
779
780 /* no such entry */
781 if (!sd) {
782 ret = ERR_PTR(-ENOENT);
783 goto out_unlock;
784 }
785 dentry->d_fsdata = sysfs_get(sd);
786
787 /* attach dentry and inode */
788 inode = sysfs_get_inode(dir->i_sb, sd);
789 if (!inode) {
790 ret = ERR_PTR(-ENOMEM);
791 goto out_unlock;
792 }
793
794 /* instantiate and hash dentry */
795 ret = d_materialise_unique(dentry, inode);
796 out_unlock:
797 mutex_unlock(&sysfs_mutex);
798 return ret;
799}
800
801const struct inode_operations sysfs_dir_inode_operations = {
802 .lookup = sysfs_lookup,
803 .permission = sysfs_permission,
804 .setattr = sysfs_setattr,
805 .getattr = sysfs_getattr,
806 .setxattr = sysfs_setxattr,
807};
808
809static struct sysfs_dirent *sysfs_leftmost_descendant(struct sysfs_dirent *pos)
810{
811 struct sysfs_dirent *last;
812
813 while (true) {
814 struct rb_node *rbn;
815
816 last = pos;
817
818 if (sysfs_type(pos) != SYSFS_DIR)
819 break;
820
821 rbn = rb_first(&pos->s_dir.children);
822 if (!rbn)
823 break;
824
825 pos = to_sysfs_dirent(rbn);
826 }
827
828 return last;
829}
830
831/**
832 * sysfs_next_descendant_post - find the next descendant for post-order walk
833 * @pos: the current position (%NULL to initiate traversal)
834 * @root: sysfs_dirent whose descendants to walk
835 *
836 * Find the next descendant to visit for post-order traversal of @root's
837 * descendants. @root is included in the iteration and the last node to be
838 * visited.
839 */
840static struct sysfs_dirent *sysfs_next_descendant_post(struct sysfs_dirent *pos,
841 struct sysfs_dirent *root)
842{
843 struct rb_node *rbn;
844
845 lockdep_assert_held(&sysfs_mutex);
846
847 /* if first iteration, visit leftmost descendant which may be root */
848 if (!pos)
849 return sysfs_leftmost_descendant(root);
850
851 /* if we visited @root, we're done */
852 if (pos == root)
853 return NULL;
854
855 /* if there's an unvisited sibling, visit its leftmost descendant */
856 rbn = rb_next(&pos->s_rb);
857 if (rbn)
858 return sysfs_leftmost_descendant(to_sysfs_dirent(rbn));
859
860 /* no sibling left, visit parent */
861 return pos->s_parent;
862}
863
864static void __sysfs_remove(struct sysfs_addrm_cxt *acxt,
865 struct sysfs_dirent *sd)
866{
867 struct sysfs_dirent *pos, *next;
868
869 if (!sd)
870 return;
871
872 pr_debug("sysfs %s: removing\n", sd->s_name);
873
874 next = NULL;
875 do {
876 pos = next;
877 next = sysfs_next_descendant_post(pos, sd);
878 if (pos)
879 sysfs_remove_one(acxt, pos);
880 } while (next);
881}
882
883/**
884 * sysfs_remove - remove a sysfs_dirent recursively
885 * @sd: the sysfs_dirent to remove
886 *
887 * Remove @sd along with all its subdirectories and files.
888 */
889void sysfs_remove(struct sysfs_dirent *sd)
890{
891 struct sysfs_addrm_cxt acxt;
892
893 sysfs_addrm_start(&acxt);
894 __sysfs_remove(&acxt, sd);
895 sysfs_addrm_finish(&acxt);
896}
897
898/**
899 * sysfs_hash_and_remove - find a sysfs_dirent by name and remove it
900 * @dir_sd: parent of the target
901 * @name: name of the sysfs_dirent to remove
902 * @ns: namespace tag of the sysfs_dirent to remove
903 *
904 * Look for the sysfs_dirent with @name and @ns under @dir_sd and remove
905 * it. Returns 0 on success, -ENOENT if such entry doesn't exist.
906 */
907int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name,
908 const void *ns)
909{
910 struct sysfs_addrm_cxt acxt;
911 struct sysfs_dirent *sd;
912
913 if (!dir_sd) {
914 WARN(1, KERN_WARNING "sysfs: can not remove '%s', no directory\n",
915 name);
916 return -ENOENT;
917 } 82 }
918 83
919 sysfs_addrm_start(&acxt); 84 kobj->sd = kn;
920 85 return 0;
921 sd = sysfs_find_dirent(dir_sd, name, ns);
922 if (sd)
923 __sysfs_remove(&acxt, sd);
924
925 sysfs_addrm_finish(&acxt);
926
927 if (sd)
928 return 0;
929 else
930 return -ENOENT;
931} 86}
932 87
933/** 88/**
@@ -940,207 +95,47 @@ int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name,
940 */ 95 */
941void sysfs_remove_dir(struct kobject *kobj) 96void sysfs_remove_dir(struct kobject *kobj)
942{ 97{
943 struct sysfs_dirent *sd = kobj->sd; 98 struct kernfs_node *kn = kobj->sd;
944 99
945 /* 100 /*
946 * In general, kboject owner is responsible for ensuring removal 101 * In general, kboject owner is responsible for ensuring removal
947 * doesn't race with other operations and sysfs doesn't provide any 102 * doesn't race with other operations and sysfs doesn't provide any
948 * protection; however, when @kobj is used as a symlink target, the 103 * protection; however, when @kobj is used as a symlink target, the
949 * symlinking entity usually doesn't own @kobj and thus has no 104 * symlinking entity usually doesn't own @kobj and thus has no
950 * control over removal. @kobj->sd may be removed anytime and 105 * control over removal. @kobj->sd may be removed anytime
951 * symlink code may end up dereferencing an already freed sd. 106 * and symlink code may end up dereferencing an already freed node.
952 * 107 *
953 * sysfs_symlink_target_lock synchronizes @kobj->sd disassociation 108 * sysfs_symlink_target_lock synchronizes @kobj->sd
954 * against symlink operations so that symlink code can safely 109 * disassociation against symlink operations so that symlink code
955 * dereference @kobj->sd. 110 * can safely dereference @kobj->sd.
956 */ 111 */
957 spin_lock(&sysfs_symlink_target_lock); 112 spin_lock(&sysfs_symlink_target_lock);
958 kobj->sd = NULL; 113 kobj->sd = NULL;
959 spin_unlock(&sysfs_symlink_target_lock); 114 spin_unlock(&sysfs_symlink_target_lock);
960 115
961 if (sd) { 116 if (kn) {
962 WARN_ON_ONCE(sysfs_type(sd) != SYSFS_DIR); 117 WARN_ON_ONCE(kernfs_type(kn) != KERNFS_DIR);
963 sysfs_remove(sd); 118 kernfs_remove(kn);
964 } 119 }
965} 120}
966 121
967int sysfs_rename(struct sysfs_dirent *sd, struct sysfs_dirent *new_parent_sd,
968 const char *new_name, const void *new_ns)
969{
970 int error;
971
972 mutex_lock(&sysfs_mutex);
973
974 error = 0;
975 if ((sd->s_parent == new_parent_sd) && (sd->s_ns == new_ns) &&
976 (strcmp(sd->s_name, new_name) == 0))
977 goto out; /* nothing to rename */
978
979 error = -EEXIST;
980 if (sysfs_find_dirent(new_parent_sd, new_name, new_ns))
981 goto out;
982
983 /* rename sysfs_dirent */
984 if (strcmp(sd->s_name, new_name) != 0) {
985 error = -ENOMEM;
986 new_name = kstrdup(new_name, GFP_KERNEL);
987 if (!new_name)
988 goto out;
989
990 kfree(sd->s_name);
991 sd->s_name = new_name;
992 }
993
994 /*
995 * Move to the appropriate place in the appropriate directories rbtree.
996 */
997 sysfs_unlink_sibling(sd);
998 sysfs_get(new_parent_sd);
999 sysfs_put(sd->s_parent);
1000 sd->s_ns = new_ns;
1001 sd->s_hash = sysfs_name_hash(sd->s_name, sd->s_ns);
1002 sd->s_parent = new_parent_sd;
1003 sysfs_link_sibling(sd);
1004
1005 error = 0;
1006 out:
1007 mutex_unlock(&sysfs_mutex);
1008 return error;
1009}
1010
1011int sysfs_rename_dir_ns(struct kobject *kobj, const char *new_name, 122int sysfs_rename_dir_ns(struct kobject *kobj, const char *new_name,
1012 const void *new_ns) 123 const void *new_ns)
1013{ 124{
1014 struct sysfs_dirent *parent_sd = kobj->sd->s_parent; 125 struct kernfs_node *parent = kobj->sd->parent;
1015 126
1016 return sysfs_rename(kobj->sd, parent_sd, new_name, new_ns); 127 return kernfs_rename_ns(kobj->sd, parent, new_name, new_ns);
1017} 128}
1018 129
1019int sysfs_move_dir_ns(struct kobject *kobj, struct kobject *new_parent_kobj, 130int sysfs_move_dir_ns(struct kobject *kobj, struct kobject *new_parent_kobj,
1020 const void *new_ns) 131 const void *new_ns)
1021{ 132{
1022 struct sysfs_dirent *sd = kobj->sd; 133 struct kernfs_node *kn = kobj->sd;
1023 struct sysfs_dirent *new_parent_sd; 134 struct kernfs_node *new_parent;
1024 135
1025 BUG_ON(!sd->s_parent); 136 BUG_ON(!kn->parent);
1026 new_parent_sd = new_parent_kobj && new_parent_kobj->sd ? 137 new_parent = new_parent_kobj && new_parent_kobj->sd ?
1027 new_parent_kobj->sd : &sysfs_root; 138 new_parent_kobj->sd : sysfs_root_kn;
1028 139
1029 return sysfs_rename(sd, new_parent_sd, sd->s_name, new_ns); 140 return kernfs_rename_ns(kn, new_parent, kn->name, new_ns);
1030} 141}
1031
1032/* Relationship between s_mode and the DT_xxx types */
1033static inline unsigned char dt_type(struct sysfs_dirent *sd)
1034{
1035 return (sd->s_mode >> 12) & 15;
1036}
1037
1038static int sysfs_dir_release(struct inode *inode, struct file *filp)
1039{
1040 sysfs_put(filp->private_data);
1041 return 0;
1042}
1043
1044static struct sysfs_dirent *sysfs_dir_pos(const void *ns,
1045 struct sysfs_dirent *parent_sd, loff_t hash, struct sysfs_dirent *pos)
1046{
1047 if (pos) {
1048 int valid = !(pos->s_flags & SYSFS_FLAG_REMOVED) &&
1049 pos->s_parent == parent_sd &&
1050 hash == pos->s_hash;
1051 sysfs_put(pos);
1052 if (!valid)
1053 pos = NULL;
1054 }
1055 if (!pos && (hash > 1) && (hash < INT_MAX)) {
1056 struct rb_node *node = parent_sd->s_dir.children.rb_node;
1057 while (node) {
1058 pos = to_sysfs_dirent(node);
1059
1060 if (hash < pos->s_hash)
1061 node = node->rb_left;
1062 else if (hash > pos->s_hash)
1063 node = node->rb_right;
1064 else
1065 break;
1066 }
1067 }
1068 /* Skip over entries in the wrong namespace */
1069 while (pos && pos->s_ns != ns) {
1070 struct rb_node *node = rb_next(&pos->s_rb);
1071 if (!node)
1072 pos = NULL;
1073 else
1074 pos = to_sysfs_dirent(node);
1075 }
1076 return pos;
1077}
1078
1079static struct sysfs_dirent *sysfs_dir_next_pos(const void *ns,
1080 struct sysfs_dirent *parent_sd, ino_t ino, struct sysfs_dirent *pos)
1081{
1082 pos = sysfs_dir_pos(ns, parent_sd, ino, pos);
1083 if (pos)
1084 do {
1085 struct rb_node *node = rb_next(&pos->s_rb);
1086 if (!node)
1087 pos = NULL;
1088 else
1089 pos = to_sysfs_dirent(node);
1090 } while (pos && pos->s_ns != ns);
1091 return pos;
1092}
1093
1094static int sysfs_readdir(struct file *file, struct dir_context *ctx)
1095{
1096 struct dentry *dentry = file->f_path.dentry;
1097 struct sysfs_dirent *parent_sd = dentry->d_fsdata;
1098 struct sysfs_dirent *pos = file->private_data;
1099 enum kobj_ns_type type;
1100 const void *ns;
1101
1102 type = sysfs_ns_type(parent_sd);
1103 ns = sysfs_info(dentry->d_sb)->ns[type];
1104
1105 if (!dir_emit_dots(file, ctx))
1106 return 0;
1107 mutex_lock(&sysfs_mutex);
1108 for (pos = sysfs_dir_pos(ns, parent_sd, ctx->pos, pos);
1109 pos;
1110 pos = sysfs_dir_next_pos(ns, parent_sd, ctx->pos, pos)) {
1111 const char *name = pos->s_name;
1112 unsigned int type = dt_type(pos);
1113 int len = strlen(name);
1114 ino_t ino = pos->s_ino;
1115 ctx->pos = pos->s_hash;
1116 file->private_data = sysfs_get(pos);
1117
1118 mutex_unlock(&sysfs_mutex);
1119 if (!dir_emit(ctx, name, len, ino, type))
1120 return 0;
1121 mutex_lock(&sysfs_mutex);
1122 }
1123 mutex_unlock(&sysfs_mutex);
1124 file->private_data = NULL;
1125 ctx->pos = INT_MAX;
1126 return 0;
1127}
1128
1129static loff_t sysfs_dir_llseek(struct file *file, loff_t offset, int whence)
1130{
1131 struct inode *inode = file_inode(file);
1132 loff_t ret;
1133
1134 mutex_lock(&inode->i_mutex);
1135 ret = generic_file_llseek(file, offset, whence);
1136 mutex_unlock(&inode->i_mutex);
1137
1138 return ret;
1139}
1140
1141const struct file_operations sysfs_dir_operations = {
1142 .read = generic_read_dir,
1143 .iterate = sysfs_readdir,
1144 .release = sysfs_dir_release,
1145 .llseek = sysfs_dir_llseek,
1146};
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 35e7d08fe629..810cf6e613e5 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -14,70 +14,23 @@
14#include <linux/kobject.h> 14#include <linux/kobject.h>
15#include <linux/kallsyms.h> 15#include <linux/kallsyms.h>
16#include <linux/slab.h> 16#include <linux/slab.h>
17#include <linux/fsnotify.h>
18#include <linux/namei.h>
19#include <linux/poll.h>
20#include <linux/list.h> 17#include <linux/list.h>
21#include <linux/mutex.h> 18#include <linux/mutex.h>
22#include <linux/limits.h>
23#include <linux/uaccess.h>
24#include <linux/seq_file.h> 19#include <linux/seq_file.h>
25#include <linux/mm.h>
26 20
27#include "sysfs.h" 21#include "sysfs.h"
22#include "../kernfs/kernfs-internal.h"
28 23
29/* 24/*
30 * There's one sysfs_open_file for each open file and one sysfs_open_dirent 25 * Determine ktype->sysfs_ops for the given kernfs_node. This function
31 * for each sysfs_dirent with one or more open files.
32 *
33 * sysfs_dirent->s_attr.open points to sysfs_open_dirent. s_attr.open is
34 * protected by sysfs_open_dirent_lock.
35 *
36 * filp->private_data points to seq_file whose ->private points to
37 * sysfs_open_file. sysfs_open_files are chained at
38 * sysfs_open_dirent->files, which is protected by sysfs_open_file_mutex.
39 */
40static DEFINE_SPINLOCK(sysfs_open_dirent_lock);
41static DEFINE_MUTEX(sysfs_open_file_mutex);
42
43struct sysfs_open_dirent {
44 atomic_t refcnt;
45 atomic_t event;
46 wait_queue_head_t poll;
47 struct list_head files; /* goes through sysfs_open_file.list */
48};
49
50struct sysfs_open_file {
51 struct sysfs_dirent *sd;
52 struct file *file;
53 struct mutex mutex;
54 int event;
55 struct list_head list;
56
57 bool mmapped;
58 const struct vm_operations_struct *vm_ops;
59};
60
61static bool sysfs_is_bin(struct sysfs_dirent *sd)
62{
63 return sysfs_type(sd) == SYSFS_KOBJ_BIN_ATTR;
64}
65
66static struct sysfs_open_file *sysfs_of(struct file *file)
67{
68 return ((struct seq_file *)file->private_data)->private;
69}
70
71/*
72 * Determine ktype->sysfs_ops for the given sysfs_dirent. This function
73 * must be called while holding an active reference. 26 * must be called while holding an active reference.
74 */ 27 */
75static const struct sysfs_ops *sysfs_file_ops(struct sysfs_dirent *sd) 28static const struct sysfs_ops *sysfs_file_ops(struct kernfs_node *kn)
76{ 29{
77 struct kobject *kobj = sd->s_parent->s_dir.kobj; 30 struct kobject *kobj = kn->parent->priv;
78 31
79 if (!sysfs_ignore_lockdep(sd)) 32 if (kn->flags & KERNFS_LOCKDEP)
80 lockdep_assert_held(sd); 33 lockdep_assert_held(kn);
81 return kobj->ktype ? kobj->ktype->sysfs_ops : NULL; 34 return kobj->ktype ? kobj->ktype->sysfs_ops : NULL;
82} 35}
83 36
@@ -86,13 +39,13 @@ static const struct sysfs_ops *sysfs_file_ops(struct sysfs_dirent *sd)
86 * details like buffering and seeking. The following function pipes 39 * details like buffering and seeking. The following function pipes
87 * sysfs_ops->show() result through seq_file. 40 * sysfs_ops->show() result through seq_file.
88 */ 41 */
89static int sysfs_seq_show(struct seq_file *sf, void *v) 42static int sysfs_kf_seq_show(struct seq_file *sf, void *v)
90{ 43{
91 struct sysfs_open_file *of = sf->private; 44 struct kernfs_open_file *of = sf->private;
92 struct kobject *kobj = of->sd->s_parent->s_dir.kobj; 45 struct kobject *kobj = of->kn->parent->priv;
93 const struct sysfs_ops *ops; 46 const struct sysfs_ops *ops = sysfs_file_ops(of->kn);
94 char *buf;
95 ssize_t count; 47 ssize_t count;
48 char *buf;
96 49
97 /* acquire buffer and ensure that it's >= PAGE_SIZE */ 50 /* acquire buffer and ensure that it's >= PAGE_SIZE */
98 count = seq_get_buf(sf, &buf); 51 count = seq_get_buf(sf, &buf);
@@ -102,34 +55,15 @@ static int sysfs_seq_show(struct seq_file *sf, void *v)
102 } 55 }
103 56
104 /* 57 /*
105 * Need @of->sd for attr and ops, its parent for kobj. @of->mutex 58 * Invoke show(). Control may reach here via seq file lseek even
106 * nests outside active ref and is just to ensure that the ops 59 * if @ops->show() isn't implemented.
107 * aren't called concurrently for the same open file.
108 */ 60 */
109 mutex_lock(&of->mutex); 61 if (ops->show) {
110 if (!sysfs_get_active(of->sd)) { 62 count = ops->show(kobj, of->kn->priv, buf);
111 mutex_unlock(&of->mutex); 63 if (count < 0)
112 return -ENODEV; 64 return count;
113 } 65 }
114 66
115 of->event = atomic_read(&of->sd->s_attr.open->event);
116
117 /*
118 * Lookup @ops and invoke show(). Control may reach here via seq
119 * file lseek even if @ops->show() isn't implemented.
120 */
121 ops = sysfs_file_ops(of->sd);
122 if (ops->show)
123 count = ops->show(kobj, of->sd->s_attr.attr, buf);
124 else
125 count = 0;
126
127 sysfs_put_active(of->sd);
128 mutex_unlock(&of->mutex);
129
130 if (count < 0)
131 return count;
132
133 /* 67 /*
134 * The code works fine with PAGE_SIZE return but it's likely to 68 * The code works fine with PAGE_SIZE return but it's likely to
135 * indicate truncated result or overflow in normal use cases. 69 * indicate truncated result or overflow in normal use cases.
@@ -144,726 +78,194 @@ static int sysfs_seq_show(struct seq_file *sf, void *v)
144 return 0; 78 return 0;
145} 79}
146 80
147/* 81static ssize_t sysfs_kf_bin_read(struct kernfs_open_file *of, char *buf,
148 * Read method for bin files. As reading a bin file can have side-effects, 82 size_t count, loff_t pos)
149 * the exact offset and bytes specified in read(2) call should be passed to
150 * the read callback making it difficult to use seq_file. Implement
151 * simplistic custom buffering for bin files.
152 */
153static ssize_t sysfs_bin_read(struct file *file, char __user *userbuf,
154 size_t bytes, loff_t *off)
155{ 83{
156 struct sysfs_open_file *of = sysfs_of(file); 84 struct bin_attribute *battr = of->kn->priv;
157 struct bin_attribute *battr = of->sd->s_attr.bin_attr; 85 struct kobject *kobj = of->kn->parent->priv;
158 struct kobject *kobj = of->sd->s_parent->s_dir.kobj; 86 loff_t size = file_inode(of->file)->i_size;
159 loff_t size = file_inode(file)->i_size;
160 int count = min_t(size_t, bytes, PAGE_SIZE);
161 loff_t offs = *off;
162 char *buf;
163 87
164 if (!bytes) 88 if (!count)
165 return 0; 89 return 0;
166 90
167 if (size) { 91 if (size) {
168 if (offs > size) 92 if (pos > size)
169 return 0; 93 return 0;
170 if (offs + count > size) 94 if (pos + count > size)
171 count = size - offs; 95 count = size - pos;
172 }
173
174 buf = kmalloc(count, GFP_KERNEL);
175 if (!buf)
176 return -ENOMEM;
177
178 /* need of->sd for battr, its parent for kobj */
179 mutex_lock(&of->mutex);
180 if (!sysfs_get_active(of->sd)) {
181 count = -ENODEV;
182 mutex_unlock(&of->mutex);
183 goto out_free;
184 }
185
186 if (battr->read)
187 count = battr->read(file, kobj, battr, buf, offs, count);
188 else
189 count = -EIO;
190
191 sysfs_put_active(of->sd);
192 mutex_unlock(&of->mutex);
193
194 if (count < 0)
195 goto out_free;
196
197 if (copy_to_user(userbuf, buf, count)) {
198 count = -EFAULT;
199 goto out_free;
200 } 96 }
201 97
202 pr_debug("offs = %lld, *off = %lld, count = %d\n", offs, *off, count); 98 if (!battr->read)
203 99 return -EIO;
204 *off = offs + count;
205 100
206 out_free: 101 return battr->read(of->file, kobj, battr, buf, pos, count);
207 kfree(buf);
208 return count;
209} 102}
210 103
211/** 104/* kernfs write callback for regular sysfs files */
212 * flush_write_buffer - push buffer to kobject 105static ssize_t sysfs_kf_write(struct kernfs_open_file *of, char *buf,
213 * @of: open file 106 size_t count, loff_t pos)
214 * @buf: data buffer for file
215 * @off: file offset to write to
216 * @count: number of bytes
217 *
218 * Get the correct pointers for the kobject and the attribute we're dealing
219 * with, then call the store() method for it with @buf.
220 */
221static int flush_write_buffer(struct sysfs_open_file *of, char *buf, loff_t off,
222 size_t count)
223{ 107{
224 struct kobject *kobj = of->sd->s_parent->s_dir.kobj; 108 const struct sysfs_ops *ops = sysfs_file_ops(of->kn);
225 int rc = 0; 109 struct kobject *kobj = of->kn->parent->priv;
226
227 /*
228 * Need @of->sd for attr and ops, its parent for kobj. @of->mutex
229 * nests outside active ref and is just to ensure that the ops
230 * aren't called concurrently for the same open file.
231 */
232 mutex_lock(&of->mutex);
233 if (!sysfs_get_active(of->sd)) {
234 mutex_unlock(&of->mutex);
235 return -ENODEV;
236 }
237 110
238 if (sysfs_is_bin(of->sd)) { 111 if (!count)
239 struct bin_attribute *battr = of->sd->s_attr.bin_attr; 112 return 0;
240
241 rc = -EIO;
242 if (battr->write)
243 rc = battr->write(of->file, kobj, battr, buf, off,
244 count);
245 } else {
246 const struct sysfs_ops *ops = sysfs_file_ops(of->sd);
247
248 rc = ops->store(kobj, of->sd->s_attr.attr, buf, count);
249 }
250
251 sysfs_put_active(of->sd);
252 mutex_unlock(&of->mutex);
253 113
254 return rc; 114 return ops->store(kobj, of->kn->priv, buf, count);
255} 115}
256 116
257/** 117/* kernfs write callback for bin sysfs files */
258 * sysfs_write_file - write an attribute 118static ssize_t sysfs_kf_bin_write(struct kernfs_open_file *of, char *buf,
259 * @file: file pointer 119 size_t count, loff_t pos)
260 * @user_buf: data to write
261 * @count: number of bytes
262 * @ppos: starting offset
263 *
264 * Copy data in from userland and pass it to the matching
265 * sysfs_ops->store() by invoking flush_write_buffer().
266 *
267 * There is no easy way for us to know if userspace is only doing a partial
268 * write, so we don't support them. We expect the entire buffer to come on
269 * the first write. Hint: if you're writing a value, first read the file,
270 * modify only the the value you're changing, then write entire buffer
271 * back.
272 */
273static ssize_t sysfs_write_file(struct file *file, const char __user *user_buf,
274 size_t count, loff_t *ppos)
275{ 120{
276 struct sysfs_open_file *of = sysfs_of(file); 121 struct bin_attribute *battr = of->kn->priv;
277 ssize_t len = min_t(size_t, count, PAGE_SIZE); 122 struct kobject *kobj = of->kn->parent->priv;
278 loff_t size = file_inode(file)->i_size; 123 loff_t size = file_inode(of->file)->i_size;
279 char *buf;
280 124
281 if (sysfs_is_bin(of->sd) && size) { 125 if (size) {
282 if (size <= *ppos) 126 if (size <= pos)
283 return 0; 127 return 0;
284 len = min_t(ssize_t, len, size - *ppos); 128 count = min_t(ssize_t, count, size - pos);
285 } 129 }
286 130 if (!count)
287 if (!len)
288 return 0; 131 return 0;
289 132
290 buf = kmalloc(len + 1, GFP_KERNEL); 133 if (!battr->write)
291 if (!buf) 134 return -EIO;
292 return -ENOMEM;
293 135
294 if (copy_from_user(buf, user_buf, len)) { 136 return battr->write(of->file, kobj, battr, buf, pos, count);
295 len = -EFAULT;
296 goto out_free;
297 }
298 buf[len] = '\0'; /* guarantee string termination */
299
300 len = flush_write_buffer(of, buf, *ppos, len);
301 if (len > 0)
302 *ppos += len;
303out_free:
304 kfree(buf);
305 return len;
306}
307
308static void sysfs_bin_vma_open(struct vm_area_struct *vma)
309{
310 struct file *file = vma->vm_file;
311 struct sysfs_open_file *of = sysfs_of(file);
312
313 if (!of->vm_ops)
314 return;
315
316 if (!sysfs_get_active(of->sd))
317 return;
318
319 if (of->vm_ops->open)
320 of->vm_ops->open(vma);
321
322 sysfs_put_active(of->sd);
323} 137}
324 138
325static int sysfs_bin_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 139static int sysfs_kf_bin_mmap(struct kernfs_open_file *of,
140 struct vm_area_struct *vma)
326{ 141{
327 struct file *file = vma->vm_file; 142 struct bin_attribute *battr = of->kn->priv;
328 struct sysfs_open_file *of = sysfs_of(file); 143 struct kobject *kobj = of->kn->parent->priv;
329 int ret;
330 144
331 if (!of->vm_ops) 145 return battr->mmap(of->file, kobj, battr, vma);
332 return VM_FAULT_SIGBUS;
333
334 if (!sysfs_get_active(of->sd))
335 return VM_FAULT_SIGBUS;
336
337 ret = VM_FAULT_SIGBUS;
338 if (of->vm_ops->fault)
339 ret = of->vm_ops->fault(vma, vmf);
340
341 sysfs_put_active(of->sd);
342 return ret;
343} 146}
344 147
345static int sysfs_bin_page_mkwrite(struct vm_area_struct *vma, 148void sysfs_notify(struct kobject *kobj, const char *dir, const char *attr)
346 struct vm_fault *vmf)
347{ 149{
348 struct file *file = vma->vm_file; 150 struct kernfs_node *kn = kobj->sd, *tmp;
349 struct sysfs_open_file *of = sysfs_of(file);
350 int ret;
351
352 if (!of->vm_ops)
353 return VM_FAULT_SIGBUS;
354 151
355 if (!sysfs_get_active(of->sd)) 152 if (kn && dir)
356 return VM_FAULT_SIGBUS; 153 kn = kernfs_find_and_get(kn, dir);
357
358 ret = 0;
359 if (of->vm_ops->page_mkwrite)
360 ret = of->vm_ops->page_mkwrite(vma, vmf);
361 else 154 else
362 file_update_time(file); 155 kernfs_get(kn);
363
364 sysfs_put_active(of->sd);
365 return ret;
366}
367
368static int sysfs_bin_access(struct vm_area_struct *vma, unsigned long addr,
369 void *buf, int len, int write)
370{
371 struct file *file = vma->vm_file;
372 struct sysfs_open_file *of = sysfs_of(file);
373 int ret;
374
375 if (!of->vm_ops)
376 return -EINVAL;
377
378 if (!sysfs_get_active(of->sd))
379 return -EINVAL;
380
381 ret = -EINVAL;
382 if (of->vm_ops->access)
383 ret = of->vm_ops->access(vma, addr, buf, len, write);
384
385 sysfs_put_active(of->sd);
386 return ret;
387}
388
389#ifdef CONFIG_NUMA
390static int sysfs_bin_set_policy(struct vm_area_struct *vma,
391 struct mempolicy *new)
392{
393 struct file *file = vma->vm_file;
394 struct sysfs_open_file *of = sysfs_of(file);
395 int ret;
396
397 if (!of->vm_ops)
398 return 0;
399
400 if (!sysfs_get_active(of->sd))
401 return -EINVAL;
402
403 ret = 0;
404 if (of->vm_ops->set_policy)
405 ret = of->vm_ops->set_policy(vma, new);
406
407 sysfs_put_active(of->sd);
408 return ret;
409}
410
411static struct mempolicy *sysfs_bin_get_policy(struct vm_area_struct *vma,
412 unsigned long addr)
413{
414 struct file *file = vma->vm_file;
415 struct sysfs_open_file *of = sysfs_of(file);
416 struct mempolicy *pol;
417
418 if (!of->vm_ops)
419 return vma->vm_policy;
420
421 if (!sysfs_get_active(of->sd))
422 return vma->vm_policy;
423
424 pol = vma->vm_policy;
425 if (of->vm_ops->get_policy)
426 pol = of->vm_ops->get_policy(vma, addr);
427
428 sysfs_put_active(of->sd);
429 return pol;
430}
431
432static int sysfs_bin_migrate(struct vm_area_struct *vma, const nodemask_t *from,
433 const nodemask_t *to, unsigned long flags)
434{
435 struct file *file = vma->vm_file;
436 struct sysfs_open_file *of = sysfs_of(file);
437 int ret;
438
439 if (!of->vm_ops)
440 return 0;
441
442 if (!sysfs_get_active(of->sd))
443 return 0;
444
445 ret = 0;
446 if (of->vm_ops->migrate)
447 ret = of->vm_ops->migrate(vma, from, to, flags);
448
449 sysfs_put_active(of->sd);
450 return ret;
451}
452#endif
453
454static const struct vm_operations_struct sysfs_bin_vm_ops = {
455 .open = sysfs_bin_vma_open,
456 .fault = sysfs_bin_fault,
457 .page_mkwrite = sysfs_bin_page_mkwrite,
458 .access = sysfs_bin_access,
459#ifdef CONFIG_NUMA
460 .set_policy = sysfs_bin_set_policy,
461 .get_policy = sysfs_bin_get_policy,
462 .migrate = sysfs_bin_migrate,
463#endif
464};
465
466static int sysfs_bin_mmap(struct file *file, struct vm_area_struct *vma)
467{
468 struct sysfs_open_file *of = sysfs_of(file);
469 struct bin_attribute *battr = of->sd->s_attr.bin_attr;
470 struct kobject *kobj = of->sd->s_parent->s_dir.kobj;
471 int rc;
472
473 mutex_lock(&of->mutex);
474
475 /* need of->sd for battr, its parent for kobj */
476 rc = -ENODEV;
477 if (!sysfs_get_active(of->sd))
478 goto out_unlock;
479
480 if (!battr->mmap)
481 goto out_put;
482
483 rc = battr->mmap(file, kobj, battr, vma);
484 if (rc)
485 goto out_put;
486
487 /*
488 * PowerPC's pci_mmap of legacy_mem uses shmem_zero_setup()
489 * to satisfy versions of X which crash if the mmap fails: that
490 * substitutes a new vm_file, and we don't then want bin_vm_ops.
491 */
492 if (vma->vm_file != file)
493 goto out_put;
494
495 rc = -EINVAL;
496 if (of->mmapped && of->vm_ops != vma->vm_ops)
497 goto out_put;
498 156
499 /* 157 if (kn && attr) {
500 * It is not possible to successfully wrap close. 158 tmp = kernfs_find_and_get(kn, attr);
501 * So error if someone is trying to use close. 159 kernfs_put(kn);
502 */ 160 kn = tmp;
503 rc = -EINVAL;
504 if (vma->vm_ops && vma->vm_ops->close)
505 goto out_put;
506
507 rc = 0;
508 of->mmapped = 1;
509 of->vm_ops = vma->vm_ops;
510 vma->vm_ops = &sysfs_bin_vm_ops;
511out_put:
512 sysfs_put_active(of->sd);
513out_unlock:
514 mutex_unlock(&of->mutex);
515
516 return rc;
517}
518
519/**
520 * sysfs_get_open_dirent - get or create sysfs_open_dirent
521 * @sd: target sysfs_dirent
522 * @of: sysfs_open_file for this instance of open
523 *
524 * If @sd->s_attr.open exists, increment its reference count;
525 * otherwise, create one. @of is chained to the files list.
526 *
527 * LOCKING:
528 * Kernel thread context (may sleep).
529 *
530 * RETURNS:
531 * 0 on success, -errno on failure.
532 */
533static int sysfs_get_open_dirent(struct sysfs_dirent *sd,
534 struct sysfs_open_file *of)
535{
536 struct sysfs_open_dirent *od, *new_od = NULL;
537
538 retry:
539 mutex_lock(&sysfs_open_file_mutex);
540 spin_lock_irq(&sysfs_open_dirent_lock);
541
542 if (!sd->s_attr.open && new_od) {
543 sd->s_attr.open = new_od;
544 new_od = NULL;
545 } 161 }
546 162
547 od = sd->s_attr.open; 163 if (kn) {
548 if (od) { 164 kernfs_notify(kn);
549 atomic_inc(&od->refcnt); 165 kernfs_put(kn);
550 list_add_tail(&of->list, &od->files);
551 }
552
553 spin_unlock_irq(&sysfs_open_dirent_lock);
554 mutex_unlock(&sysfs_open_file_mutex);
555
556 if (od) {
557 kfree(new_od);
558 return 0;
559 } 166 }
167}
168EXPORT_SYMBOL_GPL(sysfs_notify);
560 169
561 /* not there, initialize a new one and retry */ 170static const struct kernfs_ops sysfs_file_kfops_empty = {
562 new_od = kmalloc(sizeof(*new_od), GFP_KERNEL); 171};
563 if (!new_od)
564 return -ENOMEM;
565 172
566 atomic_set(&new_od->refcnt, 0); 173static const struct kernfs_ops sysfs_file_kfops_ro = {
567 atomic_set(&new_od->event, 1); 174 .seq_show = sysfs_kf_seq_show,
568 init_waitqueue_head(&new_od->poll); 175};
569 INIT_LIST_HEAD(&new_od->files);
570 goto retry;
571}
572 176
573/** 177static const struct kernfs_ops sysfs_file_kfops_wo = {
574 * sysfs_put_open_dirent - put sysfs_open_dirent 178 .write = sysfs_kf_write,
575 * @sd: target sysfs_dirent 179};
576 * @of: associated sysfs_open_file
577 *
578 * Put @sd->s_attr.open and unlink @of from the files list. If
579 * reference count reaches zero, disassociate and free it.
580 *
581 * LOCKING:
582 * None.
583 */
584static void sysfs_put_open_dirent(struct sysfs_dirent *sd,
585 struct sysfs_open_file *of)
586{
587 struct sysfs_open_dirent *od = sd->s_attr.open;
588 unsigned long flags;
589 180
590 mutex_lock(&sysfs_open_file_mutex); 181static const struct kernfs_ops sysfs_file_kfops_rw = {
591 spin_lock_irqsave(&sysfs_open_dirent_lock, flags); 182 .seq_show = sysfs_kf_seq_show,
183 .write = sysfs_kf_write,
184};
592 185
593 if (of) 186static const struct kernfs_ops sysfs_bin_kfops_ro = {
594 list_del(&of->list); 187 .read = sysfs_kf_bin_read,
188};
595 189
596 if (atomic_dec_and_test(&od->refcnt)) 190static const struct kernfs_ops sysfs_bin_kfops_wo = {
597 sd->s_attr.open = NULL; 191 .write = sysfs_kf_bin_write,
598 else 192};
599 od = NULL;
600 193
601 spin_unlock_irqrestore(&sysfs_open_dirent_lock, flags); 194static const struct kernfs_ops sysfs_bin_kfops_rw = {
602 mutex_unlock(&sysfs_open_file_mutex); 195 .read = sysfs_kf_bin_read,
196 .write = sysfs_kf_bin_write,
197};
603 198
604 kfree(od); 199static const struct kernfs_ops sysfs_bin_kfops_mmap = {
605} 200 .read = sysfs_kf_bin_read,
201 .write = sysfs_kf_bin_write,
202 .mmap = sysfs_kf_bin_mmap,
203};
606 204
607static int sysfs_open_file(struct inode *inode, struct file *file) 205int sysfs_add_file_mode_ns(struct kernfs_node *parent,
206 const struct attribute *attr, bool is_bin,
207 umode_t mode, const void *ns)
608{ 208{
609 struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; 209 struct lock_class_key *key = NULL;
610 struct kobject *kobj = attr_sd->s_parent->s_dir.kobj; 210 const struct kernfs_ops *ops;
611 struct sysfs_open_file *of; 211 struct kernfs_node *kn;
612 bool has_read, has_write; 212 loff_t size;
613 int error = -EACCES;
614
615 /* need attr_sd for attr and ops, its parent for kobj */
616 if (!sysfs_get_active(attr_sd))
617 return -ENODEV;
618 213
619 if (sysfs_is_bin(attr_sd)) { 214 if (!is_bin) {
620 struct bin_attribute *battr = attr_sd->s_attr.bin_attr; 215 struct kobject *kobj = parent->priv;
621 216 const struct sysfs_ops *sysfs_ops = kobj->ktype->sysfs_ops;
622 has_read = battr->read || battr->mmap;
623 has_write = battr->write || battr->mmap;
624 } else {
625 const struct sysfs_ops *ops = sysfs_file_ops(attr_sd);
626 217
627 /* every kobject with an attribute needs a ktype assigned */ 218 /* every kobject with an attribute needs a ktype assigned */
628 if (WARN(!ops, KERN_ERR 219 if (WARN(!sysfs_ops, KERN_ERR
629 "missing sysfs attribute operations for kobject: %s\n", 220 "missing sysfs attribute operations for kobject: %s\n",
630 kobject_name(kobj))) 221 kobject_name(kobj)))
631 goto err_out; 222 return -EINVAL;
632 223
633 has_read = ops->show; 224 if (sysfs_ops->show && sysfs_ops->store)
634 has_write = ops->store; 225 ops = &sysfs_file_kfops_rw;
635 } 226 else if (sysfs_ops->show)
636 227 ops = &sysfs_file_kfops_ro;
637 /* check perms and supported operations */ 228 else if (sysfs_ops->store)
638 if ((file->f_mode & FMODE_WRITE) && 229 ops = &sysfs_file_kfops_wo;
639 (!(inode->i_mode & S_IWUGO) || !has_write)) 230 else
640 goto err_out; 231 ops = &sysfs_file_kfops_empty;
641 232
642 if ((file->f_mode & FMODE_READ) && 233 size = PAGE_SIZE;
643 (!(inode->i_mode & S_IRUGO) || !has_read)) 234 } else {
644 goto err_out; 235 struct bin_attribute *battr = (void *)attr;
645 236
646 /* allocate a sysfs_open_file for the file */ 237 if (battr->mmap)
647 error = -ENOMEM; 238 ops = &sysfs_bin_kfops_mmap;
648 of = kzalloc(sizeof(struct sysfs_open_file), GFP_KERNEL); 239 else if (battr->read && battr->write)
649 if (!of) 240 ops = &sysfs_bin_kfops_rw;
650 goto err_out; 241 else if (battr->read)
651 242 ops = &sysfs_bin_kfops_ro;
652 /* 243 else if (battr->write)
653 * The following is done to give a different lockdep key to 244 ops = &sysfs_bin_kfops_wo;
654 * @of->mutex for files which implement mmap. This is a rather 245 else
655 * crude way to avoid false positive lockdep warning around 246 ops = &sysfs_file_kfops_empty;
656 * mm->mmap_sem - mmap nests @of->mutex under mm->mmap_sem and 247
657 * reading /sys/block/sda/trace/act_mask grabs sr_mutex, under 248 size = battr->size;
658 * which mm->mmap_sem nests, while holding @of->mutex. As each
659 * open file has a separate mutex, it's okay as long as those don't
660 * happen on the same file. At this point, we can't easily give
661 * each file a separate locking class. Let's differentiate on
662 * whether the file is bin or not for now.
663 */
664 if (sysfs_is_bin(attr_sd))
665 mutex_init(&of->mutex);
666 else
667 mutex_init(&of->mutex);
668
669 of->sd = attr_sd;
670 of->file = file;
671
672 /*
673 * Always instantiate seq_file even if read access doesn't use
674 * seq_file or is not requested. This unifies private data access
675 * and readable regular files are the vast majority anyway.
676 */
677 if (sysfs_is_bin(attr_sd))
678 error = single_open(file, NULL, of);
679 else
680 error = single_open(file, sysfs_seq_show, of);
681 if (error)
682 goto err_free;
683
684 /* seq_file clears PWRITE unconditionally, restore it if WRITE */
685 if (file->f_mode & FMODE_WRITE)
686 file->f_mode |= FMODE_PWRITE;
687
688 /* make sure we have open dirent struct */
689 error = sysfs_get_open_dirent(attr_sd, of);
690 if (error)
691 goto err_close;
692
693 /* open succeeded, put active references */
694 sysfs_put_active(attr_sd);
695 return 0;
696
697err_close:
698 single_release(inode, file);
699err_free:
700 kfree(of);
701err_out:
702 sysfs_put_active(attr_sd);
703 return error;
704}
705
706static int sysfs_release(struct inode *inode, struct file *filp)
707{
708 struct sysfs_dirent *sd = filp->f_path.dentry->d_fsdata;
709 struct sysfs_open_file *of = sysfs_of(filp);
710
711 sysfs_put_open_dirent(sd, of);
712 single_release(inode, filp);
713 kfree(of);
714
715 return 0;
716}
717
718void sysfs_unmap_bin_file(struct sysfs_dirent *sd)
719{
720 struct sysfs_open_dirent *od;
721 struct sysfs_open_file *of;
722
723 if (!sysfs_is_bin(sd))
724 return;
725
726 spin_lock_irq(&sysfs_open_dirent_lock);
727 od = sd->s_attr.open;
728 if (od)
729 atomic_inc(&od->refcnt);
730 spin_unlock_irq(&sysfs_open_dirent_lock);
731 if (!od)
732 return;
733
734 mutex_lock(&sysfs_open_file_mutex);
735 list_for_each_entry(of, &od->files, list) {
736 struct inode *inode = file_inode(of->file);
737 unmap_mapping_range(inode->i_mapping, 0, 0, 1);
738 } 249 }
739 mutex_unlock(&sysfs_open_file_mutex);
740
741 sysfs_put_open_dirent(sd, NULL);
742}
743
744/* Sysfs attribute files are pollable. The idea is that you read
745 * the content and then you use 'poll' or 'select' to wait for
746 * the content to change. When the content changes (assuming the
747 * manager for the kobject supports notification), poll will
748 * return POLLERR|POLLPRI, and select will return the fd whether
749 * it is waiting for read, write, or exceptions.
750 * Once poll/select indicates that the value has changed, you
751 * need to close and re-open the file, or seek to 0 and read again.
752 * Reminder: this only works for attributes which actively support
753 * it, and it is not possible to test an attribute from userspace
754 * to see if it supports poll (Neither 'poll' nor 'select' return
755 * an appropriate error code). When in doubt, set a suitable timeout value.
756 */
757static unsigned int sysfs_poll(struct file *filp, poll_table *wait)
758{
759 struct sysfs_open_file *of = sysfs_of(filp);
760 struct sysfs_dirent *attr_sd = filp->f_path.dentry->d_fsdata;
761 struct sysfs_open_dirent *od = attr_sd->s_attr.open;
762
763 /* need parent for the kobj, grab both */
764 if (!sysfs_get_active(attr_sd))
765 goto trigger;
766
767 poll_wait(filp, &od->poll, wait);
768 250
769 sysfs_put_active(attr_sd); 251#ifdef CONFIG_DEBUG_LOCK_ALLOC
770 252 if (!attr->ignore_lockdep)
771 if (of->event != atomic_read(&od->event)) 253 key = attr->key ?: (struct lock_class_key *)&attr->skey;
772 goto trigger; 254#endif
773 255 kn = __kernfs_create_file(parent, attr->name, mode, size, ops,
774 return DEFAULT_POLLMASK; 256 (void *)attr, ns, true, key);
775 257 if (IS_ERR(kn)) {
776 trigger: 258 if (PTR_ERR(kn) == -EEXIST)
777 return DEFAULT_POLLMASK|POLLERR|POLLPRI; 259 sysfs_warn_dup(parent, attr->name);
778} 260 return PTR_ERR(kn);
779
780void sysfs_notify_dirent(struct sysfs_dirent *sd)
781{
782 struct sysfs_open_dirent *od;
783 unsigned long flags;
784
785 spin_lock_irqsave(&sysfs_open_dirent_lock, flags);
786
787 if (!WARN_ON(sysfs_type(sd) != SYSFS_KOBJ_ATTR)) {
788 od = sd->s_attr.open;
789 if (od) {
790 atomic_inc(&od->event);
791 wake_up_interruptible(&od->poll);
792 }
793 } 261 }
794 262 return 0;
795 spin_unlock_irqrestore(&sysfs_open_dirent_lock, flags);
796}
797EXPORT_SYMBOL_GPL(sysfs_notify_dirent);
798
799void sysfs_notify(struct kobject *k, const char *dir, const char *attr)
800{
801 struct sysfs_dirent *sd = k->sd;
802
803 mutex_lock(&sysfs_mutex);
804
805 if (sd && dir)
806 sd = sysfs_find_dirent(sd, dir, NULL);
807 if (sd && attr)
808 sd = sysfs_find_dirent(sd, attr, NULL);
809 if (sd)
810 sysfs_notify_dirent(sd);
811
812 mutex_unlock(&sysfs_mutex);
813}
814EXPORT_SYMBOL_GPL(sysfs_notify);
815
816const struct file_operations sysfs_file_operations = {
817 .read = seq_read,
818 .write = sysfs_write_file,
819 .llseek = generic_file_llseek,
820 .open = sysfs_open_file,
821 .release = sysfs_release,
822 .poll = sysfs_poll,
823};
824
825const struct file_operations sysfs_bin_operations = {
826 .read = sysfs_bin_read,
827 .write = sysfs_write_file,
828 .llseek = generic_file_llseek,
829 .mmap = sysfs_bin_mmap,
830 .open = sysfs_open_file,
831 .release = sysfs_release,
832 .poll = sysfs_poll,
833};
834
835int sysfs_add_file_mode_ns(struct sysfs_dirent *dir_sd,
836 const struct attribute *attr, int type,
837 umode_t amode, const void *ns)
838{
839 umode_t mode = (amode & S_IALLUGO) | S_IFREG;
840 struct sysfs_addrm_cxt acxt;
841 struct sysfs_dirent *sd;
842 int rc;
843
844 sd = sysfs_new_dirent(attr->name, mode, type);
845 if (!sd)
846 return -ENOMEM;
847
848 sd->s_ns = ns;
849 sd->s_attr.attr = (void *)attr;
850 sysfs_dirent_init_lockdep(sd);
851
852 sysfs_addrm_start(&acxt);
853 rc = sysfs_add_one(&acxt, sd, dir_sd);
854 sysfs_addrm_finish(&acxt);
855
856 if (rc)
857 sysfs_put(sd);
858
859 return rc;
860} 263}
861 264
862 265int sysfs_add_file(struct kernfs_node *parent, const struct attribute *attr,
863int sysfs_add_file(struct sysfs_dirent *dir_sd, const struct attribute *attr, 266 bool is_bin)
864 int type)
865{ 267{
866 return sysfs_add_file_mode_ns(dir_sd, attr, type, attr->mode, NULL); 268 return sysfs_add_file_mode_ns(parent, attr, is_bin, attr->mode, NULL);
867} 269}
868 270
869/** 271/**
@@ -877,8 +279,7 @@ int sysfs_create_file_ns(struct kobject *kobj, const struct attribute *attr,
877{ 279{
878 BUG_ON(!kobj || !kobj->sd || !attr); 280 BUG_ON(!kobj || !kobj->sd || !attr);
879 281
880 return sysfs_add_file_mode_ns(kobj->sd, attr, SYSFS_KOBJ_ATTR, 282 return sysfs_add_file_mode_ns(kobj->sd, attr, false, attr->mode, ns);
881 attr->mode, ns);
882 283
883} 284}
884EXPORT_SYMBOL_GPL(sysfs_create_file_ns); 285EXPORT_SYMBOL_GPL(sysfs_create_file_ns);
@@ -906,19 +307,21 @@ EXPORT_SYMBOL_GPL(sysfs_create_files);
906int sysfs_add_file_to_group(struct kobject *kobj, 307int sysfs_add_file_to_group(struct kobject *kobj,
907 const struct attribute *attr, const char *group) 308 const struct attribute *attr, const char *group)
908{ 309{
909 struct sysfs_dirent *dir_sd; 310 struct kernfs_node *parent;
910 int error; 311 int error;
911 312
912 if (group) 313 if (group) {
913 dir_sd = sysfs_get_dirent(kobj->sd, group); 314 parent = kernfs_find_and_get(kobj->sd, group);
914 else 315 } else {
915 dir_sd = sysfs_get(kobj->sd); 316 parent = kobj->sd;
317 kernfs_get(parent);
318 }
916 319
917 if (!dir_sd) 320 if (!parent)
918 return -ENOENT; 321 return -ENOENT;
919 322
920 error = sysfs_add_file(dir_sd, attr, SYSFS_KOBJ_ATTR); 323 error = sysfs_add_file(parent, attr, false);
921 sysfs_put(dir_sd); 324 kernfs_put(parent);
922 325
923 return error; 326 return error;
924} 327}
@@ -934,23 +337,20 @@ EXPORT_SYMBOL_GPL(sysfs_add_file_to_group);
934int sysfs_chmod_file(struct kobject *kobj, const struct attribute *attr, 337int sysfs_chmod_file(struct kobject *kobj, const struct attribute *attr,
935 umode_t mode) 338 umode_t mode)
936{ 339{
937 struct sysfs_dirent *sd; 340 struct kernfs_node *kn;
938 struct iattr newattrs; 341 struct iattr newattrs;
939 int rc; 342 int rc;
940 343
941 mutex_lock(&sysfs_mutex); 344 kn = kernfs_find_and_get(kobj->sd, attr->name);
942 345 if (!kn)
943 rc = -ENOENT; 346 return -ENOENT;
944 sd = sysfs_find_dirent(kobj->sd, attr->name, NULL);
945 if (!sd)
946 goto out;
947 347
948 newattrs.ia_mode = (mode & S_IALLUGO) | (sd->s_mode & ~S_IALLUGO); 348 newattrs.ia_mode = (mode & S_IALLUGO) | (kn->mode & ~S_IALLUGO);
949 newattrs.ia_valid = ATTR_MODE; 349 newattrs.ia_valid = ATTR_MODE;
950 rc = sysfs_sd_setattr(sd, &newattrs);
951 350
952 out: 351 rc = kernfs_setattr(kn, &newattrs);
953 mutex_unlock(&sysfs_mutex); 352
353 kernfs_put(kn);
954 return rc; 354 return rc;
955} 355}
956EXPORT_SYMBOL_GPL(sysfs_chmod_file); 356EXPORT_SYMBOL_GPL(sysfs_chmod_file);
@@ -966,9 +366,9 @@ EXPORT_SYMBOL_GPL(sysfs_chmod_file);
966void sysfs_remove_file_ns(struct kobject *kobj, const struct attribute *attr, 366void sysfs_remove_file_ns(struct kobject *kobj, const struct attribute *attr,
967 const void *ns) 367 const void *ns)
968{ 368{
969 struct sysfs_dirent *dir_sd = kobj->sd; 369 struct kernfs_node *parent = kobj->sd;
970 370
971 sysfs_hash_and_remove(dir_sd, attr->name, ns); 371 kernfs_remove_by_name_ns(parent, attr->name, ns);
972} 372}
973EXPORT_SYMBOL_GPL(sysfs_remove_file_ns); 373EXPORT_SYMBOL_GPL(sysfs_remove_file_ns);
974 374
@@ -989,15 +389,18 @@ EXPORT_SYMBOL_GPL(sysfs_remove_files);
989void sysfs_remove_file_from_group(struct kobject *kobj, 389void sysfs_remove_file_from_group(struct kobject *kobj,
990 const struct attribute *attr, const char *group) 390 const struct attribute *attr, const char *group)
991{ 391{
992 struct sysfs_dirent *dir_sd; 392 struct kernfs_node *parent;
993 393
994 if (group) 394 if (group) {
995 dir_sd = sysfs_get_dirent(kobj->sd, group); 395 parent = kernfs_find_and_get(kobj->sd, group);
996 else 396 } else {
997 dir_sd = sysfs_get(kobj->sd); 397 parent = kobj->sd;
998 if (dir_sd) { 398 kernfs_get(parent);
999 sysfs_hash_and_remove(dir_sd, attr->name, NULL); 399 }
1000 sysfs_put(dir_sd); 400
401 if (parent) {
402 kernfs_remove_by_name(parent, attr->name);
403 kernfs_put(parent);
1001 } 404 }
1002} 405}
1003EXPORT_SYMBOL_GPL(sysfs_remove_file_from_group); 406EXPORT_SYMBOL_GPL(sysfs_remove_file_from_group);
@@ -1012,7 +415,7 @@ int sysfs_create_bin_file(struct kobject *kobj,
1012{ 415{
1013 BUG_ON(!kobj || !kobj->sd || !attr); 416 BUG_ON(!kobj || !kobj->sd || !attr);
1014 417
1015 return sysfs_add_file(kobj->sd, &attr->attr, SYSFS_KOBJ_BIN_ATTR); 418 return sysfs_add_file(kobj->sd, &attr->attr, true);
1016} 419}
1017EXPORT_SYMBOL_GPL(sysfs_create_bin_file); 420EXPORT_SYMBOL_GPL(sysfs_create_bin_file);
1018 421
@@ -1024,7 +427,7 @@ EXPORT_SYMBOL_GPL(sysfs_create_bin_file);
1024void sysfs_remove_bin_file(struct kobject *kobj, 427void sysfs_remove_bin_file(struct kobject *kobj,
1025 const struct bin_attribute *attr) 428 const struct bin_attribute *attr)
1026{ 429{
1027 sysfs_hash_and_remove(kobj->sd, attr->attr.name, NULL); 430 kernfs_remove_by_name(kobj->sd, attr->attr.name);
1028} 431}
1029EXPORT_SYMBOL_GPL(sysfs_remove_bin_file); 432EXPORT_SYMBOL_GPL(sysfs_remove_bin_file);
1030 433
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index 1898a10e38ce..6b579387c67a 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -18,7 +18,7 @@
18#include "sysfs.h" 18#include "sysfs.h"
19 19
20 20
21static void remove_files(struct sysfs_dirent *dir_sd, struct kobject *kobj, 21static void remove_files(struct kernfs_node *parent, struct kobject *kobj,
22 const struct attribute_group *grp) 22 const struct attribute_group *grp)
23{ 23{
24 struct attribute *const *attr; 24 struct attribute *const *attr;
@@ -26,13 +26,13 @@ static void remove_files(struct sysfs_dirent *dir_sd, struct kobject *kobj,
26 26
27 if (grp->attrs) 27 if (grp->attrs)
28 for (attr = grp->attrs; *attr; attr++) 28 for (attr = grp->attrs; *attr; attr++)
29 sysfs_hash_and_remove(dir_sd, (*attr)->name, NULL); 29 kernfs_remove_by_name(parent, (*attr)->name);
30 if (grp->bin_attrs) 30 if (grp->bin_attrs)
31 for (bin_attr = grp->bin_attrs; *bin_attr; bin_attr++) 31 for (bin_attr = grp->bin_attrs; *bin_attr; bin_attr++)
32 sysfs_remove_bin_file(kobj, *bin_attr); 32 sysfs_remove_bin_file(kobj, *bin_attr);
33} 33}
34 34
35static int create_files(struct sysfs_dirent *dir_sd, struct kobject *kobj, 35static int create_files(struct kernfs_node *parent, struct kobject *kobj,
36 const struct attribute_group *grp, int update) 36 const struct attribute_group *grp, int update)
37{ 37{
38 struct attribute *const *attr; 38 struct attribute *const *attr;
@@ -49,22 +49,20 @@ static int create_files(struct sysfs_dirent *dir_sd, struct kobject *kobj,
49 * re-adding (if required) the file. 49 * re-adding (if required) the file.
50 */ 50 */
51 if (update) 51 if (update)
52 sysfs_hash_and_remove(dir_sd, (*attr)->name, 52 kernfs_remove_by_name(parent, (*attr)->name);
53 NULL);
54 if (grp->is_visible) { 53 if (grp->is_visible) {
55 mode = grp->is_visible(kobj, *attr, i); 54 mode = grp->is_visible(kobj, *attr, i);
56 if (!mode) 55 if (!mode)
57 continue; 56 continue;
58 } 57 }
59 error = sysfs_add_file_mode_ns(dir_sd, *attr, 58 error = sysfs_add_file_mode_ns(parent, *attr, false,
60 SYSFS_KOBJ_ATTR,
61 (*attr)->mode | mode, 59 (*attr)->mode | mode,
62 NULL); 60 NULL);
63 if (unlikely(error)) 61 if (unlikely(error))
64 break; 62 break;
65 } 63 }
66 if (error) { 64 if (error) {
67 remove_files(dir_sd, kobj, grp); 65 remove_files(parent, kobj, grp);
68 goto exit; 66 goto exit;
69 } 67 }
70 } 68 }
@@ -78,7 +76,7 @@ static int create_files(struct sysfs_dirent *dir_sd, struct kobject *kobj,
78 break; 76 break;
79 } 77 }
80 if (error) 78 if (error)
81 remove_files(dir_sd, kobj, grp); 79 remove_files(parent, kobj, grp);
82 } 80 }
83exit: 81exit:
84 return error; 82 return error;
@@ -88,7 +86,7 @@ exit:
88static int internal_create_group(struct kobject *kobj, int update, 86static int internal_create_group(struct kobject *kobj, int update,
89 const struct attribute_group *grp) 87 const struct attribute_group *grp)
90{ 88{
91 struct sysfs_dirent *sd; 89 struct kernfs_node *kn;
92 int error; 90 int error;
93 91
94 BUG_ON(!kobj || (!update && !kobj->sd)); 92 BUG_ON(!kobj || (!update && !kobj->sd));
@@ -102,18 +100,22 @@ static int internal_create_group(struct kobject *kobj, int update,
102 return -EINVAL; 100 return -EINVAL;
103 } 101 }
104 if (grp->name) { 102 if (grp->name) {
105 error = sysfs_create_subdir(kobj, grp->name, &sd); 103 kn = kernfs_create_dir(kobj->sd, grp->name,
106 if (error) 104 S_IRWXU | S_IRUGO | S_IXUGO, kobj);
107 return error; 105 if (IS_ERR(kn)) {
106 if (PTR_ERR(kn) == -EEXIST)
107 sysfs_warn_dup(kobj->sd, grp->name);
108 return PTR_ERR(kn);
109 }
108 } else 110 } else
109 sd = kobj->sd; 111 kn = kobj->sd;
110 sysfs_get(sd); 112 kernfs_get(kn);
111 error = create_files(sd, kobj, grp, update); 113 error = create_files(kn, kobj, grp, update);
112 if (error) { 114 if (error) {
113 if (grp->name) 115 if (grp->name)
114 sysfs_remove(sd); 116 kernfs_remove(kn);
115 } 117 }
116 sysfs_put(sd); 118 kernfs_put(kn);
117 return error; 119 return error;
118} 120}
119 121
@@ -203,25 +205,27 @@ EXPORT_SYMBOL_GPL(sysfs_update_group);
203void sysfs_remove_group(struct kobject *kobj, 205void sysfs_remove_group(struct kobject *kobj,
204 const struct attribute_group *grp) 206 const struct attribute_group *grp)
205{ 207{
206 struct sysfs_dirent *dir_sd = kobj->sd; 208 struct kernfs_node *parent = kobj->sd;
207 struct sysfs_dirent *sd; 209 struct kernfs_node *kn;
208 210
209 if (grp->name) { 211 if (grp->name) {
210 sd = sysfs_get_dirent(dir_sd, grp->name); 212 kn = kernfs_find_and_get(parent, grp->name);
211 if (!sd) { 213 if (!kn) {
212 WARN(!sd, KERN_WARNING 214 WARN(!kn, KERN_WARNING
213 "sysfs group %p not found for kobject '%s'\n", 215 "sysfs group %p not found for kobject '%s'\n",
214 grp, kobject_name(kobj)); 216 grp, kobject_name(kobj));
215 return; 217 return;
216 } 218 }
217 } else 219 } else {
218 sd = sysfs_get(dir_sd); 220 kn = parent;
221 kernfs_get(kn);
222 }
219 223
220 remove_files(sd, kobj, grp); 224 remove_files(kn, kobj, grp);
221 if (grp->name) 225 if (grp->name)
222 sysfs_remove(sd); 226 kernfs_remove(kn);
223 227
224 sysfs_put(sd); 228 kernfs_put(kn);
225} 229}
226EXPORT_SYMBOL_GPL(sysfs_remove_group); 230EXPORT_SYMBOL_GPL(sysfs_remove_group);
227 231
@@ -257,22 +261,22 @@ EXPORT_SYMBOL_GPL(sysfs_remove_groups);
257int sysfs_merge_group(struct kobject *kobj, 261int sysfs_merge_group(struct kobject *kobj,
258 const struct attribute_group *grp) 262 const struct attribute_group *grp)
259{ 263{
260 struct sysfs_dirent *dir_sd; 264 struct kernfs_node *parent;
261 int error = 0; 265 int error = 0;
262 struct attribute *const *attr; 266 struct attribute *const *attr;
263 int i; 267 int i;
264 268
265 dir_sd = sysfs_get_dirent(kobj->sd, grp->name); 269 parent = kernfs_find_and_get(kobj->sd, grp->name);
266 if (!dir_sd) 270 if (!parent)
267 return -ENOENT; 271 return -ENOENT;
268 272
269 for ((i = 0, attr = grp->attrs); *attr && !error; (++i, ++attr)) 273 for ((i = 0, attr = grp->attrs); *attr && !error; (++i, ++attr))
270 error = sysfs_add_file(dir_sd, *attr, SYSFS_KOBJ_ATTR); 274 error = sysfs_add_file(parent, *attr, false);
271 if (error) { 275 if (error) {
272 while (--i >= 0) 276 while (--i >= 0)
273 sysfs_hash_and_remove(dir_sd, (*--attr)->name, NULL); 277 kernfs_remove_by_name(parent, (*--attr)->name);
274 } 278 }
275 sysfs_put(dir_sd); 279 kernfs_put(parent);
276 280
277 return error; 281 return error;
278} 282}
@@ -286,14 +290,14 @@ EXPORT_SYMBOL_GPL(sysfs_merge_group);
286void sysfs_unmerge_group(struct kobject *kobj, 290void sysfs_unmerge_group(struct kobject *kobj,
287 const struct attribute_group *grp) 291 const struct attribute_group *grp)
288{ 292{
289 struct sysfs_dirent *dir_sd; 293 struct kernfs_node *parent;
290 struct attribute *const *attr; 294 struct attribute *const *attr;
291 295
292 dir_sd = sysfs_get_dirent(kobj->sd, grp->name); 296 parent = kernfs_find_and_get(kobj->sd, grp->name);
293 if (dir_sd) { 297 if (parent) {
294 for (attr = grp->attrs; *attr; ++attr) 298 for (attr = grp->attrs; *attr; ++attr)
295 sysfs_hash_and_remove(dir_sd, (*attr)->name, NULL); 299 kernfs_remove_by_name(parent, (*attr)->name);
296 sysfs_put(dir_sd); 300 kernfs_put(parent);
297 } 301 }
298} 302}
299EXPORT_SYMBOL_GPL(sysfs_unmerge_group); 303EXPORT_SYMBOL_GPL(sysfs_unmerge_group);
@@ -308,15 +312,15 @@ EXPORT_SYMBOL_GPL(sysfs_unmerge_group);
308int sysfs_add_link_to_group(struct kobject *kobj, const char *group_name, 312int sysfs_add_link_to_group(struct kobject *kobj, const char *group_name,
309 struct kobject *target, const char *link_name) 313 struct kobject *target, const char *link_name)
310{ 314{
311 struct sysfs_dirent *dir_sd; 315 struct kernfs_node *parent;
312 int error = 0; 316 int error = 0;
313 317
314 dir_sd = sysfs_get_dirent(kobj->sd, group_name); 318 parent = kernfs_find_and_get(kobj->sd, group_name);
315 if (!dir_sd) 319 if (!parent)
316 return -ENOENT; 320 return -ENOENT;
317 321
318 error = sysfs_create_link_sd(dir_sd, target, link_name); 322 error = sysfs_create_link_sd(parent, target, link_name);
319 sysfs_put(dir_sd); 323 kernfs_put(parent);
320 324
321 return error; 325 return error;
322} 326}
@@ -331,12 +335,12 @@ EXPORT_SYMBOL_GPL(sysfs_add_link_to_group);
331void sysfs_remove_link_from_group(struct kobject *kobj, const char *group_name, 335void sysfs_remove_link_from_group(struct kobject *kobj, const char *group_name,
332 const char *link_name) 336 const char *link_name)
333{ 337{
334 struct sysfs_dirent *dir_sd; 338 struct kernfs_node *parent;
335 339
336 dir_sd = sysfs_get_dirent(kobj->sd, group_name); 340 parent = kernfs_find_and_get(kobj->sd, group_name);
337 if (dir_sd) { 341 if (parent) {
338 sysfs_hash_and_remove(dir_sd, link_name, NULL); 342 kernfs_remove_by_name(parent, link_name);
339 sysfs_put(dir_sd); 343 kernfs_put(parent);
340 } 344 }
341} 345}
342EXPORT_SYMBOL_GPL(sysfs_remove_link_from_group); 346EXPORT_SYMBOL_GPL(sysfs_remove_link_from_group);
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
deleted file mode 100644
index 1750f790af3b..000000000000
--- a/fs/sysfs/inode.c
+++ /dev/null
@@ -1,331 +0,0 @@
1/*
2 * fs/sysfs/inode.c - basic sysfs inode and dentry operations
3 *
4 * Copyright (c) 2001-3 Patrick Mochel
5 * Copyright (c) 2007 SUSE Linux Products GmbH
6 * Copyright (c) 2007 Tejun Heo <teheo@suse.de>
7 *
8 * This file is released under the GPLv2.
9 *
10 * Please see Documentation/filesystems/sysfs.txt for more information.
11 */
12
13#undef DEBUG
14
15#include <linux/pagemap.h>
16#include <linux/namei.h>
17#include <linux/backing-dev.h>
18#include <linux/capability.h>
19#include <linux/errno.h>
20#include <linux/sched.h>
21#include <linux/slab.h>
22#include <linux/sysfs.h>
23#include <linux/xattr.h>
24#include <linux/security.h>
25#include "sysfs.h"
26
27static const struct address_space_operations sysfs_aops = {
28 .readpage = simple_readpage,
29 .write_begin = simple_write_begin,
30 .write_end = simple_write_end,
31};
32
33static struct backing_dev_info sysfs_backing_dev_info = {
34 .name = "sysfs",
35 .ra_pages = 0, /* No readahead */
36 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK,
37};
38
39static const struct inode_operations sysfs_inode_operations = {
40 .permission = sysfs_permission,
41 .setattr = sysfs_setattr,
42 .getattr = sysfs_getattr,
43 .setxattr = sysfs_setxattr,
44};
45
46int __init sysfs_inode_init(void)
47{
48 return bdi_init(&sysfs_backing_dev_info);
49}
50
51static struct sysfs_inode_attrs *sysfs_init_inode_attrs(struct sysfs_dirent *sd)
52{
53 struct sysfs_inode_attrs *attrs;
54 struct iattr *iattrs;
55
56 attrs = kzalloc(sizeof(struct sysfs_inode_attrs), GFP_KERNEL);
57 if (!attrs)
58 return NULL;
59 iattrs = &attrs->ia_iattr;
60
61 /* assign default attributes */
62 iattrs->ia_mode = sd->s_mode;
63 iattrs->ia_uid = GLOBAL_ROOT_UID;
64 iattrs->ia_gid = GLOBAL_ROOT_GID;
65 iattrs->ia_atime = iattrs->ia_mtime = iattrs->ia_ctime = CURRENT_TIME;
66
67 return attrs;
68}
69
70int sysfs_sd_setattr(struct sysfs_dirent *sd, struct iattr *iattr)
71{
72 struct sysfs_inode_attrs *sd_attrs;
73 struct iattr *iattrs;
74 unsigned int ia_valid = iattr->ia_valid;
75
76 sd_attrs = sd->s_iattr;
77
78 if (!sd_attrs) {
79 /* setting attributes for the first time, allocate now */
80 sd_attrs = sysfs_init_inode_attrs(sd);
81 if (!sd_attrs)
82 return -ENOMEM;
83 sd->s_iattr = sd_attrs;
84 }
85 /* attributes were changed at least once in past */
86 iattrs = &sd_attrs->ia_iattr;
87
88 if (ia_valid & ATTR_UID)
89 iattrs->ia_uid = iattr->ia_uid;
90 if (ia_valid & ATTR_GID)
91 iattrs->ia_gid = iattr->ia_gid;
92 if (ia_valid & ATTR_ATIME)
93 iattrs->ia_atime = iattr->ia_atime;
94 if (ia_valid & ATTR_MTIME)
95 iattrs->ia_mtime = iattr->ia_mtime;
96 if (ia_valid & ATTR_CTIME)
97 iattrs->ia_ctime = iattr->ia_ctime;
98 if (ia_valid & ATTR_MODE) {
99 umode_t mode = iattr->ia_mode;
100 iattrs->ia_mode = sd->s_mode = mode;
101 }
102 return 0;
103}
104
105int sysfs_setattr(struct dentry *dentry, struct iattr *iattr)
106{
107 struct inode *inode = dentry->d_inode;
108 struct sysfs_dirent *sd = dentry->d_fsdata;
109 int error;
110
111 if (!sd)
112 return -EINVAL;
113
114 mutex_lock(&sysfs_mutex);
115 error = inode_change_ok(inode, iattr);
116 if (error)
117 goto out;
118
119 error = sysfs_sd_setattr(sd, iattr);
120 if (error)
121 goto out;
122
123 /* this ignores size changes */
124 setattr_copy(inode, iattr);
125
126out:
127 mutex_unlock(&sysfs_mutex);
128 return error;
129}
130
131static int sysfs_sd_setsecdata(struct sysfs_dirent *sd, void **secdata,
132 u32 *secdata_len)
133{
134 struct sysfs_inode_attrs *iattrs;
135 void *old_secdata;
136 size_t old_secdata_len;
137
138 if (!sd->s_iattr) {
139 sd->s_iattr = sysfs_init_inode_attrs(sd);
140 if (!sd->s_iattr)
141 return -ENOMEM;
142 }
143
144 iattrs = sd->s_iattr;
145 old_secdata = iattrs->ia_secdata;
146 old_secdata_len = iattrs->ia_secdata_len;
147
148 iattrs->ia_secdata = *secdata;
149 iattrs->ia_secdata_len = *secdata_len;
150
151 *secdata = old_secdata;
152 *secdata_len = old_secdata_len;
153 return 0;
154}
155
156int sysfs_setxattr(struct dentry *dentry, const char *name, const void *value,
157 size_t size, int flags)
158{
159 struct sysfs_dirent *sd = dentry->d_fsdata;
160 void *secdata;
161 int error;
162 u32 secdata_len = 0;
163
164 if (!sd)
165 return -EINVAL;
166
167 if (!strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN)) {
168 const char *suffix = name + XATTR_SECURITY_PREFIX_LEN;
169 error = security_inode_setsecurity(dentry->d_inode, suffix,
170 value, size, flags);
171 if (error)
172 goto out;
173 error = security_inode_getsecctx(dentry->d_inode,
174 &secdata, &secdata_len);
175 if (error)
176 goto out;
177
178 mutex_lock(&sysfs_mutex);
179 error = sysfs_sd_setsecdata(sd, &secdata, &secdata_len);
180 mutex_unlock(&sysfs_mutex);
181
182 if (secdata)
183 security_release_secctx(secdata, secdata_len);
184 } else
185 return -EINVAL;
186out:
187 return error;
188}
189
190static inline void set_default_inode_attr(struct inode *inode, umode_t mode)
191{
192 inode->i_mode = mode;
193 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
194}
195
196static inline void set_inode_attr(struct inode *inode, struct iattr *iattr)
197{
198 inode->i_uid = iattr->ia_uid;
199 inode->i_gid = iattr->ia_gid;
200 inode->i_atime = iattr->ia_atime;
201 inode->i_mtime = iattr->ia_mtime;
202 inode->i_ctime = iattr->ia_ctime;
203}
204
205static void sysfs_refresh_inode(struct sysfs_dirent *sd, struct inode *inode)
206{
207 struct sysfs_inode_attrs *iattrs = sd->s_iattr;
208
209 inode->i_mode = sd->s_mode;
210 if (iattrs) {
211 /* sysfs_dirent has non-default attributes
212 * get them from persistent copy in sysfs_dirent
213 */
214 set_inode_attr(inode, &iattrs->ia_iattr);
215 security_inode_notifysecctx(inode,
216 iattrs->ia_secdata,
217 iattrs->ia_secdata_len);
218 }
219
220 if (sysfs_type(sd) == SYSFS_DIR)
221 set_nlink(inode, sd->s_dir.subdirs + 2);
222}
223
224int sysfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
225 struct kstat *stat)
226{
227 struct sysfs_dirent *sd = dentry->d_fsdata;
228 struct inode *inode = dentry->d_inode;
229
230 mutex_lock(&sysfs_mutex);
231 sysfs_refresh_inode(sd, inode);
232 mutex_unlock(&sysfs_mutex);
233
234 generic_fillattr(inode, stat);
235 return 0;
236}
237
238static void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode)
239{
240 struct bin_attribute *bin_attr;
241
242 inode->i_private = sysfs_get(sd);
243 inode->i_mapping->a_ops = &sysfs_aops;
244 inode->i_mapping->backing_dev_info = &sysfs_backing_dev_info;
245 inode->i_op = &sysfs_inode_operations;
246
247 set_default_inode_attr(inode, sd->s_mode);
248 sysfs_refresh_inode(sd, inode);
249
250 /* initialize inode according to type */
251 switch (sysfs_type(sd)) {
252 case SYSFS_DIR:
253 inode->i_op = &sysfs_dir_inode_operations;
254 inode->i_fop = &sysfs_dir_operations;
255 break;
256 case SYSFS_KOBJ_ATTR:
257 inode->i_size = PAGE_SIZE;
258 inode->i_fop = &sysfs_file_operations;
259 break;
260 case SYSFS_KOBJ_BIN_ATTR:
261 bin_attr = sd->s_attr.bin_attr;
262 inode->i_size = bin_attr->size;
263 inode->i_fop = &sysfs_bin_operations;
264 break;
265 case SYSFS_KOBJ_LINK:
266 inode->i_op = &sysfs_symlink_inode_operations;
267 break;
268 default:
269 BUG();
270 }
271
272 unlock_new_inode(inode);
273}
274
275/**
276 * sysfs_get_inode - get inode for sysfs_dirent
277 * @sb: super block
278 * @sd: sysfs_dirent to allocate inode for
279 *
280 * Get inode for @sd. If such inode doesn't exist, a new inode
281 * is allocated and basics are initialized. New inode is
282 * returned locked.
283 *
284 * LOCKING:
285 * Kernel thread context (may sleep).
286 *
287 * RETURNS:
288 * Pointer to allocated inode on success, NULL on failure.
289 */
290struct inode *sysfs_get_inode(struct super_block *sb, struct sysfs_dirent *sd)
291{
292 struct inode *inode;
293
294 inode = iget_locked(sb, sd->s_ino);
295 if (inode && (inode->i_state & I_NEW))
296 sysfs_init_inode(sd, inode);
297
298 return inode;
299}
300
301/*
302 * The sysfs_dirent serves as both an inode and a directory entry for sysfs.
303 * To prevent the sysfs inode numbers from being freed prematurely we take a
304 * reference to sysfs_dirent from the sysfs inode. A
305 * super_operations.evict_inode() implementation is needed to drop that
306 * reference upon inode destruction.
307 */
308void sysfs_evict_inode(struct inode *inode)
309{
310 struct sysfs_dirent *sd = inode->i_private;
311
312 truncate_inode_pages(&inode->i_data, 0);
313 clear_inode(inode);
314 sysfs_put(sd);
315}
316
317int sysfs_permission(struct inode *inode, int mask)
318{
319 struct sysfs_dirent *sd;
320
321 if (mask & MAY_NOT_BLOCK)
322 return -ECHILD;
323
324 sd = inode->i_private;
325
326 mutex_lock(&sysfs_mutex);
327 sysfs_refresh_inode(sd, inode);
328 mutex_unlock(&sysfs_mutex);
329
330 return generic_permission(inode, mask);
331}
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index 834ec2cdb7a3..6211230814fd 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -14,146 +14,41 @@
14 14
15#include <linux/fs.h> 15#include <linux/fs.h>
16#include <linux/mount.h> 16#include <linux/mount.h>
17#include <linux/pagemap.h>
18#include <linux/init.h> 17#include <linux/init.h>
19#include <linux/module.h>
20#include <linux/magic.h>
21#include <linux/slab.h>
22#include <linux/user_namespace.h> 18#include <linux/user_namespace.h>
23 19
24#include "sysfs.h" 20#include "sysfs.h"
25 21
26 22static struct kernfs_root *sysfs_root;
27static struct vfsmount *sysfs_mnt; 23struct kernfs_node *sysfs_root_kn;
28struct kmem_cache *sysfs_dir_cachep;
29
30static const struct super_operations sysfs_ops = {
31 .statfs = simple_statfs,
32 .drop_inode = generic_delete_inode,
33 .evict_inode = sysfs_evict_inode,
34};
35
36struct sysfs_dirent sysfs_root = {
37 .s_name = "",
38 .s_count = ATOMIC_INIT(1),
39 .s_flags = SYSFS_DIR | (KOBJ_NS_TYPE_NONE << SYSFS_NS_TYPE_SHIFT),
40 .s_mode = S_IFDIR | S_IRUGO | S_IXUGO,
41 .s_ino = 1,
42};
43
44static int sysfs_fill_super(struct super_block *sb, void *data, int silent)
45{
46 struct inode *inode;
47 struct dentry *root;
48
49 sb->s_blocksize = PAGE_CACHE_SIZE;
50 sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
51 sb->s_magic = SYSFS_MAGIC;
52 sb->s_op = &sysfs_ops;
53 sb->s_time_gran = 1;
54
55 /* get root inode, initialize and unlock it */
56 mutex_lock(&sysfs_mutex);
57 inode = sysfs_get_inode(sb, &sysfs_root);
58 mutex_unlock(&sysfs_mutex);
59 if (!inode) {
60 pr_debug("sysfs: could not get root inode\n");
61 return -ENOMEM;
62 }
63
64 /* instantiate and link root dentry */
65 root = d_make_root(inode);
66 if (!root) {
67 pr_debug("%s: could not get root dentry!\n", __func__);
68 return -ENOMEM;
69 }
70 root->d_fsdata = &sysfs_root;
71 sb->s_root = root;
72 sb->s_d_op = &sysfs_dentry_ops;
73 return 0;
74}
75
76static int sysfs_test_super(struct super_block *sb, void *data)
77{
78 struct sysfs_super_info *sb_info = sysfs_info(sb);
79 struct sysfs_super_info *info = data;
80 enum kobj_ns_type type;
81 int found = 1;
82
83 for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++) {
84 if (sb_info->ns[type] != info->ns[type])
85 found = 0;
86 }
87 return found;
88}
89
90static int sysfs_set_super(struct super_block *sb, void *data)
91{
92 int error;
93 error = set_anon_super(sb, data);
94 if (!error)
95 sb->s_fs_info = data;
96 return error;
97}
98
99static void free_sysfs_super_info(struct sysfs_super_info *info)
100{
101 int type;
102 for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++)
103 kobj_ns_drop(type, info->ns[type]);
104 kfree(info);
105}
106 24
107static struct dentry *sysfs_mount(struct file_system_type *fs_type, 25static struct dentry *sysfs_mount(struct file_system_type *fs_type,
108 int flags, const char *dev_name, void *data) 26 int flags, const char *dev_name, void *data)
109{ 27{
110 struct sysfs_super_info *info; 28 struct dentry *root;
111 enum kobj_ns_type type; 29 void *ns;
112 struct super_block *sb;
113 int error;
114 30
115 if (!(flags & MS_KERNMOUNT)) { 31 if (!(flags & MS_KERNMOUNT)) {
116 if (!capable(CAP_SYS_ADMIN) && !fs_fully_visible(fs_type)) 32 if (!capable(CAP_SYS_ADMIN) && !fs_fully_visible(fs_type))
117 return ERR_PTR(-EPERM); 33 return ERR_PTR(-EPERM);
118 34
119 for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++) { 35 if (!kobj_ns_current_may_mount(KOBJ_NS_TYPE_NET))
120 if (!kobj_ns_current_may_mount(type)) 36 return ERR_PTR(-EPERM);
121 return ERR_PTR(-EPERM);
122 }
123 }
124
125 info = kzalloc(sizeof(*info), GFP_KERNEL);
126 if (!info)
127 return ERR_PTR(-ENOMEM);
128
129 for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++)
130 info->ns[type] = kobj_ns_grab_current(type);
131
132 sb = sget(fs_type, sysfs_test_super, sysfs_set_super, flags, info);
133 if (IS_ERR(sb) || sb->s_fs_info != info)
134 free_sysfs_super_info(info);
135 if (IS_ERR(sb))
136 return ERR_CAST(sb);
137 if (!sb->s_root) {
138 error = sysfs_fill_super(sb, data, flags & MS_SILENT ? 1 : 0);
139 if (error) {
140 deactivate_locked_super(sb);
141 return ERR_PTR(error);
142 }
143 sb->s_flags |= MS_ACTIVE;
144 } 37 }
145 38
146 return dget(sb->s_root); 39 ns = kobj_ns_grab_current(KOBJ_NS_TYPE_NET);
40 root = kernfs_mount_ns(fs_type, flags, sysfs_root, ns);
41 if (IS_ERR(root))
42 kobj_ns_drop(KOBJ_NS_TYPE_NET, ns);
43 return root;
147} 44}
148 45
149static void sysfs_kill_sb(struct super_block *sb) 46static void sysfs_kill_sb(struct super_block *sb)
150{ 47{
151 struct sysfs_super_info *info = sysfs_info(sb); 48 void *ns = (void *)kernfs_super_ns(sb);
152 /* Remove the superblock from fs_supers/s_instances 49
153 * so we can't find it, before freeing sysfs_super_info. 50 kernfs_kill_sb(sb);
154 */ 51 kobj_ns_drop(KOBJ_NS_TYPE_NET, ns);
155 kill_anon_super(sb);
156 free_sysfs_super_info(info);
157} 52}
158 53
159static struct file_system_type sysfs_fs_type = { 54static struct file_system_type sysfs_fs_type = {
@@ -165,48 +60,19 @@ static struct file_system_type sysfs_fs_type = {
165 60
166int __init sysfs_init(void) 61int __init sysfs_init(void)
167{ 62{
168 int err = -ENOMEM; 63 int err;
169 64
170 sysfs_dir_cachep = kmem_cache_create("sysfs_dir_cache", 65 sysfs_root = kernfs_create_root(NULL, NULL);
171 sizeof(struct sysfs_dirent), 66 if (IS_ERR(sysfs_root))
172 0, 0, NULL); 67 return PTR_ERR(sysfs_root);
173 if (!sysfs_dir_cachep)
174 goto out;
175 68
176 err = sysfs_inode_init(); 69 sysfs_root_kn = sysfs_root->kn;
177 if (err)
178 goto out_err;
179 70
180 err = register_filesystem(&sysfs_fs_type); 71 err = register_filesystem(&sysfs_fs_type);
181 if (!err) { 72 if (err) {
182 sysfs_mnt = kern_mount(&sysfs_fs_type); 73 kernfs_destroy_root(sysfs_root);
183 if (IS_ERR(sysfs_mnt)) { 74 return err;
184 printk(KERN_ERR "sysfs: could not mount!\n"); 75 }
185 err = PTR_ERR(sysfs_mnt);
186 sysfs_mnt = NULL;
187 unregister_filesystem(&sysfs_fs_type);
188 goto out_err;
189 }
190 } else
191 goto out_err;
192out:
193 return err;
194out_err:
195 kmem_cache_destroy(sysfs_dir_cachep);
196 sysfs_dir_cachep = NULL;
197 goto out;
198}
199
200#undef sysfs_get
201struct sysfs_dirent *sysfs_get(struct sysfs_dirent *sd)
202{
203 return __sysfs_get(sd);
204}
205EXPORT_SYMBOL_GPL(sysfs_get);
206 76
207#undef sysfs_put 77 return 0;
208void sysfs_put(struct sysfs_dirent *sd)
209{
210 __sysfs_put(sd);
211} 78}
212EXPORT_SYMBOL_GPL(sysfs_put);
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c
index 3ae3f1bf1a09..aecb15f84557 100644
--- a/fs/sysfs/symlink.c
+++ b/fs/sysfs/symlink.c
@@ -11,109 +11,73 @@
11 */ 11 */
12 12
13#include <linux/fs.h> 13#include <linux/fs.h>
14#include <linux/gfp.h>
15#include <linux/mount.h>
16#include <linux/module.h> 14#include <linux/module.h>
17#include <linux/kobject.h> 15#include <linux/kobject.h>
18#include <linux/namei.h>
19#include <linux/mutex.h> 16#include <linux/mutex.h>
20#include <linux/security.h> 17#include <linux/security.h>
21 18
22#include "sysfs.h" 19#include "sysfs.h"
23 20
24static int sysfs_do_create_link_sd(struct sysfs_dirent *parent_sd, 21static int sysfs_do_create_link_sd(struct kernfs_node *parent,
25 struct kobject *target, 22 struct kobject *target_kobj,
26 const char *name, int warn) 23 const char *name, int warn)
27{ 24{
28 struct sysfs_dirent *target_sd = NULL; 25 struct kernfs_node *kn, *target = NULL;
29 struct sysfs_dirent *sd = NULL;
30 struct sysfs_addrm_cxt acxt;
31 enum kobj_ns_type ns_type;
32 int error;
33 26
34 BUG_ON(!name || !parent_sd); 27 BUG_ON(!name || !parent);
35 28
36 /* 29 /*
37 * We don't own @target and it may be removed at any time. 30 * We don't own @target_kobj and it may be removed at any time.
38 * Synchronize using sysfs_symlink_target_lock. See 31 * Synchronize using sysfs_symlink_target_lock. See
39 * sysfs_remove_dir() for details. 32 * sysfs_remove_dir() for details.
40 */ 33 */
41 spin_lock(&sysfs_symlink_target_lock); 34 spin_lock(&sysfs_symlink_target_lock);
42 if (target->sd) 35 if (target_kobj->sd) {
43 target_sd = sysfs_get(target->sd); 36 target = target_kobj->sd;
37 kernfs_get(target);
38 }
44 spin_unlock(&sysfs_symlink_target_lock); 39 spin_unlock(&sysfs_symlink_target_lock);
45 40
46 error = -ENOENT; 41 if (!target)
47 if (!target_sd) 42 return -ENOENT;
48 goto out_put;
49
50 error = -ENOMEM;
51 sd = sysfs_new_dirent(name, S_IFLNK|S_IRWXUGO, SYSFS_KOBJ_LINK);
52 if (!sd)
53 goto out_put;
54 43
55 ns_type = sysfs_ns_type(parent_sd); 44 kn = kernfs_create_link(parent, name, target);
56 if (ns_type) 45 kernfs_put(target);
57 sd->s_ns = target_sd->s_ns;
58 sd->s_symlink.target_sd = target_sd;
59 target_sd = NULL; /* reference is now owned by the symlink */
60
61 sysfs_addrm_start(&acxt);
62 /* Symlinks must be between directories with the same ns_type */
63 if (!ns_type ||
64 (ns_type == sysfs_ns_type(sd->s_symlink.target_sd->s_parent))) {
65 if (warn)
66 error = sysfs_add_one(&acxt, sd, parent_sd);
67 else
68 error = __sysfs_add_one(&acxt, sd, parent_sd);
69 } else {
70 error = -EINVAL;
71 WARN(1, KERN_WARNING
72 "sysfs: symlink across ns_types %s/%s -> %s/%s\n",
73 parent_sd->s_name,
74 sd->s_name,
75 sd->s_symlink.target_sd->s_parent->s_name,
76 sd->s_symlink.target_sd->s_name);
77 }
78 sysfs_addrm_finish(&acxt);
79 46
80 if (error) 47 if (!IS_ERR(kn))
81 goto out_put; 48 return 0;
82 49
83 return 0; 50 if (warn && PTR_ERR(kn) == -EEXIST)
84 51 sysfs_warn_dup(parent, name);
85 out_put: 52 return PTR_ERR(kn);
86 sysfs_put(target_sd);
87 sysfs_put(sd);
88 return error;
89} 53}
90 54
91/** 55/**
92 * sysfs_create_link_sd - create symlink to a given object. 56 * sysfs_create_link_sd - create symlink to a given object.
93 * @sd: directory we're creating the link in. 57 * @kn: directory we're creating the link in.
94 * @target: object we're pointing to. 58 * @target: object we're pointing to.
95 * @name: name of the symlink. 59 * @name: name of the symlink.
96 */ 60 */
97int sysfs_create_link_sd(struct sysfs_dirent *sd, struct kobject *target, 61int sysfs_create_link_sd(struct kernfs_node *kn, struct kobject *target,
98 const char *name) 62 const char *name)
99{ 63{
100 return sysfs_do_create_link_sd(sd, target, name, 1); 64 return sysfs_do_create_link_sd(kn, target, name, 1);
101} 65}
102 66
103static int sysfs_do_create_link(struct kobject *kobj, struct kobject *target, 67static int sysfs_do_create_link(struct kobject *kobj, struct kobject *target,
104 const char *name, int warn) 68 const char *name, int warn)
105{ 69{
106 struct sysfs_dirent *parent_sd = NULL; 70 struct kernfs_node *parent = NULL;
107 71
108 if (!kobj) 72 if (!kobj)
109 parent_sd = &sysfs_root; 73 parent = sysfs_root_kn;
110 else 74 else
111 parent_sd = kobj->sd; 75 parent = kobj->sd;
112 76
113 if (!parent_sd) 77 if (!parent)
114 return -EFAULT; 78 return -EFAULT;
115 79
116 return sysfs_do_create_link_sd(parent_sd, target, name, warn); 80 return sysfs_do_create_link_sd(parent, target, name, warn);
117} 81}
118 82
119/** 83/**
@@ -164,10 +128,10 @@ void sysfs_delete_link(struct kobject *kobj, struct kobject *targ,
164 * sysfs_remove_dir() for details. 128 * sysfs_remove_dir() for details.
165 */ 129 */
166 spin_lock(&sysfs_symlink_target_lock); 130 spin_lock(&sysfs_symlink_target_lock);
167 if (targ->sd && sysfs_ns_type(kobj->sd)) 131 if (targ->sd && kernfs_ns_enabled(kobj->sd))
168 ns = targ->sd->s_ns; 132 ns = targ->sd->ns;
169 spin_unlock(&sysfs_symlink_target_lock); 133 spin_unlock(&sysfs_symlink_target_lock);
170 sysfs_hash_and_remove(kobj->sd, name, ns); 134 kernfs_remove_by_name_ns(kobj->sd, name, ns);
171} 135}
172 136
173/** 137/**
@@ -177,14 +141,14 @@ void sysfs_delete_link(struct kobject *kobj, struct kobject *targ,
177 */ 141 */
178void sysfs_remove_link(struct kobject *kobj, const char *name) 142void sysfs_remove_link(struct kobject *kobj, const char *name)
179{ 143{
180 struct sysfs_dirent *parent_sd = NULL; 144 struct kernfs_node *parent = NULL;
181 145
182 if (!kobj) 146 if (!kobj)
183 parent_sd = &sysfs_root; 147 parent = sysfs_root_kn;
184 else 148 else
185 parent_sd = kobj->sd; 149 parent = kobj->sd;
186 150
187 sysfs_hash_and_remove(parent_sd, name, NULL); 151 kernfs_remove_by_name(parent, name);
188} 152}
189EXPORT_SYMBOL_GPL(sysfs_remove_link); 153EXPORT_SYMBOL_GPL(sysfs_remove_link);
190 154
@@ -201,130 +165,33 @@ EXPORT_SYMBOL_GPL(sysfs_remove_link);
201int sysfs_rename_link_ns(struct kobject *kobj, struct kobject *targ, 165int sysfs_rename_link_ns(struct kobject *kobj, struct kobject *targ,
202 const char *old, const char *new, const void *new_ns) 166 const char *old, const char *new, const void *new_ns)
203{ 167{
204 struct sysfs_dirent *parent_sd, *sd = NULL; 168 struct kernfs_node *parent, *kn = NULL;
205 const void *old_ns = NULL; 169 const void *old_ns = NULL;
206 int result; 170 int result;
207 171
208 if (!kobj) 172 if (!kobj)
209 parent_sd = &sysfs_root; 173 parent = sysfs_root_kn;
210 else 174 else
211 parent_sd = kobj->sd; 175 parent = kobj->sd;
212 176
213 if (targ->sd) 177 if (targ->sd)
214 old_ns = targ->sd->s_ns; 178 old_ns = targ->sd->ns;
215 179
216 result = -ENOENT; 180 result = -ENOENT;
217 sd = sysfs_get_dirent_ns(parent_sd, old, old_ns); 181 kn = kernfs_find_and_get_ns(parent, old, old_ns);
218 if (!sd) 182 if (!kn)
219 goto out; 183 goto out;
220 184
221 result = -EINVAL; 185 result = -EINVAL;
222 if (sysfs_type(sd) != SYSFS_KOBJ_LINK) 186 if (kernfs_type(kn) != KERNFS_LINK)
223 goto out; 187 goto out;
224 if (sd->s_symlink.target_sd->s_dir.kobj != targ) 188 if (kn->symlink.target_kn->priv != targ)
225 goto out; 189 goto out;
226 190
227 result = sysfs_rename(sd, parent_sd, new, new_ns); 191 result = kernfs_rename_ns(kn, parent, new, new_ns);
228 192
229out: 193out:
230 sysfs_put(sd); 194 kernfs_put(kn);
231 return result; 195 return result;
232} 196}
233EXPORT_SYMBOL_GPL(sysfs_rename_link_ns); 197EXPORT_SYMBOL_GPL(sysfs_rename_link_ns);
234
235static int sysfs_get_target_path(struct sysfs_dirent *parent_sd,
236 struct sysfs_dirent *target_sd, char *path)
237{
238 struct sysfs_dirent *base, *sd;
239 char *s = path;
240 int len = 0;
241
242 /* go up to the root, stop at the base */
243 base = parent_sd;
244 while (base->s_parent) {
245 sd = target_sd->s_parent;
246 while (sd->s_parent && base != sd)
247 sd = sd->s_parent;
248
249 if (base == sd)
250 break;
251
252 strcpy(s, "../");
253 s += 3;
254 base = base->s_parent;
255 }
256
257 /* determine end of target string for reverse fillup */
258 sd = target_sd;
259 while (sd->s_parent && sd != base) {
260 len += strlen(sd->s_name) + 1;
261 sd = sd->s_parent;
262 }
263
264 /* check limits */
265 if (len < 2)
266 return -EINVAL;
267 len--;
268 if ((s - path) + len > PATH_MAX)
269 return -ENAMETOOLONG;
270
271 /* reverse fillup of target string from target to base */
272 sd = target_sd;
273 while (sd->s_parent && sd != base) {
274 int slen = strlen(sd->s_name);
275
276 len -= slen;
277 strncpy(s + len, sd->s_name, slen);
278 if (len)
279 s[--len] = '/';
280
281 sd = sd->s_parent;
282 }
283
284 return 0;
285}
286
287static int sysfs_getlink(struct dentry *dentry, char *path)
288{
289 struct sysfs_dirent *sd = dentry->d_fsdata;
290 struct sysfs_dirent *parent_sd = sd->s_parent;
291 struct sysfs_dirent *target_sd = sd->s_symlink.target_sd;
292 int error;
293
294 mutex_lock(&sysfs_mutex);
295 error = sysfs_get_target_path(parent_sd, target_sd, path);
296 mutex_unlock(&sysfs_mutex);
297
298 return error;
299}
300
301static void *sysfs_follow_link(struct dentry *dentry, struct nameidata *nd)
302{
303 int error = -ENOMEM;
304 unsigned long page = get_zeroed_page(GFP_KERNEL);
305 if (page) {
306 error = sysfs_getlink(dentry, (char *) page);
307 if (error < 0)
308 free_page((unsigned long)page);
309 }
310 nd_set_link(nd, error ? ERR_PTR(error) : (char *)page);
311 return NULL;
312}
313
314static void sysfs_put_link(struct dentry *dentry, struct nameidata *nd,
315 void *cookie)
316{
317 char *page = nd_get_link(nd);
318 if (!IS_ERR(page))
319 free_page((unsigned long)page);
320}
321
322const struct inode_operations sysfs_symlink_inode_operations = {
323 .setxattr = sysfs_setxattr,
324 .readlink = generic_readlink,
325 .follow_link = sysfs_follow_link,
326 .put_link = sysfs_put_link,
327 .setattr = sysfs_setattr,
328 .getattr = sysfs_getattr,
329 .permission = sysfs_permission,
330};
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index 0af09fbfb3f6..0e2f1cccb812 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -8,248 +8,36 @@
8 * This file is released under the GPLv2. 8 * This file is released under the GPLv2.
9 */ 9 */
10 10
11#include <linux/lockdep.h> 11#ifndef __SYSFS_INTERNAL_H
12#include <linux/kobject_ns.h> 12#define __SYSFS_INTERNAL_H
13#include <linux/fs.h>
14#include <linux/rbtree.h>
15 13
16struct sysfs_open_dirent; 14#include <linux/sysfs.h>
17
18/* type-specific structures for sysfs_dirent->s_* union members */
19struct sysfs_elem_dir {
20 struct kobject *kobj;
21
22 unsigned long subdirs;
23 /* children rbtree starts here and goes through sd->s_rb */
24 struct rb_root children;
25};
26
27struct sysfs_elem_symlink {
28 struct sysfs_dirent *target_sd;
29};
30
31struct sysfs_elem_attr {
32 union {
33 struct attribute *attr;
34 struct bin_attribute *bin_attr;
35 };
36 struct sysfs_open_dirent *open;
37};
38
39struct sysfs_inode_attrs {
40 struct iattr ia_iattr;
41 void *ia_secdata;
42 u32 ia_secdata_len;
43};
44
45/*
46 * sysfs_dirent - the building block of sysfs hierarchy. Each and
47 * every sysfs node is represented by single sysfs_dirent.
48 *
49 * As long as s_count reference is held, the sysfs_dirent itself is
50 * accessible. Dereferencing s_elem or any other outer entity
51 * requires s_active reference.
52 */
53struct sysfs_dirent {
54 atomic_t s_count;
55 atomic_t s_active;
56#ifdef CONFIG_DEBUG_LOCK_ALLOC
57 struct lockdep_map dep_map;
58#endif
59 struct sysfs_dirent *s_parent;
60 const char *s_name;
61
62 struct rb_node s_rb;
63
64 union {
65 struct completion *completion;
66 struct sysfs_dirent *removed_list;
67 } u;
68
69 const void *s_ns; /* namespace tag */
70 unsigned int s_hash; /* ns + name hash */
71 union {
72 struct sysfs_elem_dir s_dir;
73 struct sysfs_elem_symlink s_symlink;
74 struct sysfs_elem_attr s_attr;
75 };
76
77 unsigned short s_flags;
78 umode_t s_mode;
79 unsigned int s_ino;
80 struct sysfs_inode_attrs *s_iattr;
81};
82
83#define SD_DEACTIVATED_BIAS INT_MIN
84
85#define SYSFS_TYPE_MASK 0x00ff
86#define SYSFS_DIR 0x0001
87#define SYSFS_KOBJ_ATTR 0x0002
88#define SYSFS_KOBJ_BIN_ATTR 0x0004
89#define SYSFS_KOBJ_LINK 0x0008
90#define SYSFS_COPY_NAME (SYSFS_DIR | SYSFS_KOBJ_LINK)
91#define SYSFS_ACTIVE_REF (SYSFS_KOBJ_ATTR | SYSFS_KOBJ_BIN_ATTR)
92
93/* identify any namespace tag on sysfs_dirents */
94#define SYSFS_NS_TYPE_MASK 0xf00
95#define SYSFS_NS_TYPE_SHIFT 8
96
97#define SYSFS_FLAG_MASK ~(SYSFS_NS_TYPE_MASK|SYSFS_TYPE_MASK)
98#define SYSFS_FLAG_REMOVED 0x02000
99
100static inline unsigned int sysfs_type(struct sysfs_dirent *sd)
101{
102 return sd->s_flags & SYSFS_TYPE_MASK;
103}
104
105/*
106 * Return any namespace tags on this dirent.
107 * enum kobj_ns_type is defined in linux/kobject.h
108 */
109static inline enum kobj_ns_type sysfs_ns_type(struct sysfs_dirent *sd)
110{
111 return (sd->s_flags & SYSFS_NS_TYPE_MASK) >> SYSFS_NS_TYPE_SHIFT;
112}
113
114#ifdef CONFIG_DEBUG_LOCK_ALLOC
115
116#define sysfs_dirent_init_lockdep(sd) \
117do { \
118 struct attribute *attr = sd->s_attr.attr; \
119 struct lock_class_key *key = attr->key; \
120 if (!key) \
121 key = &attr->skey; \
122 \
123 lockdep_init_map(&sd->dep_map, "s_active", key, 0); \
124} while (0)
125
126/* Test for attributes that want to ignore lockdep for read-locking */
127static inline bool sysfs_ignore_lockdep(struct sysfs_dirent *sd)
128{
129 int type = sysfs_type(sd);
130
131 return (type == SYSFS_KOBJ_ATTR || type == SYSFS_KOBJ_BIN_ATTR) &&
132 sd->s_attr.attr->ignore_lockdep;
133}
134
135#else
136
137#define sysfs_dirent_init_lockdep(sd) do {} while (0)
138
139static inline bool sysfs_ignore_lockdep(struct sysfs_dirent *sd)
140{
141 return true;
142}
143
144#endif
145
146/*
147 * Context structure to be used while adding/removing nodes.
148 */
149struct sysfs_addrm_cxt {
150 struct sysfs_dirent *removed;
151};
152 15
153/* 16/*
154 * mount.c 17 * mount.c
155 */ 18 */
156 19extern struct kernfs_node *sysfs_root_kn;
157/*
158 * Each sb is associated with a set of namespace tags (i.e.
159 * the network namespace of the task which mounted this sysfs
160 * instance).
161 */
162struct sysfs_super_info {
163 void *ns[KOBJ_NS_TYPES];
164};
165#define sysfs_info(SB) ((struct sysfs_super_info *)(SB->s_fs_info))
166extern struct sysfs_dirent sysfs_root;
167extern struct kmem_cache *sysfs_dir_cachep;
168 20
169/* 21/*
170 * dir.c 22 * dir.c
171 */ 23 */
172extern struct mutex sysfs_mutex;
173extern spinlock_t sysfs_symlink_target_lock; 24extern spinlock_t sysfs_symlink_target_lock;
174extern const struct dentry_operations sysfs_dentry_ops;
175
176extern const struct file_operations sysfs_dir_operations;
177extern const struct inode_operations sysfs_dir_inode_operations;
178 25
179struct sysfs_dirent *sysfs_get_active(struct sysfs_dirent *sd); 26void sysfs_warn_dup(struct kernfs_node *parent, const char *name);
180void sysfs_put_active(struct sysfs_dirent *sd);
181void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt);
182void sysfs_warn_dup(struct sysfs_dirent *parent, const char *name);
183int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd,
184 struct sysfs_dirent *parent_sd);
185int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd,
186 struct sysfs_dirent *parent_sd);
187void sysfs_remove(struct sysfs_dirent *sd);
188int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name,
189 const void *ns);
190void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt);
191
192struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
193 const unsigned char *name,
194 const void *ns);
195struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type);
196
197void release_sysfs_dirent(struct sysfs_dirent *sd);
198
199int sysfs_create_subdir(struct kobject *kobj, const char *name,
200 struct sysfs_dirent **p_sd);
201
202int sysfs_rename(struct sysfs_dirent *sd, struct sysfs_dirent *new_parent_sd,
203 const char *new_name, const void *new_ns);
204
205static inline struct sysfs_dirent *__sysfs_get(struct sysfs_dirent *sd)
206{
207 if (sd) {
208 WARN_ON(!atomic_read(&sd->s_count));
209 atomic_inc(&sd->s_count);
210 }
211 return sd;
212}
213#define sysfs_get(sd) __sysfs_get(sd)
214
215static inline void __sysfs_put(struct sysfs_dirent *sd)
216{
217 if (sd && atomic_dec_and_test(&sd->s_count))
218 release_sysfs_dirent(sd);
219}
220#define sysfs_put(sd) __sysfs_put(sd)
221
222/*
223 * inode.c
224 */
225struct inode *sysfs_get_inode(struct super_block *sb, struct sysfs_dirent *sd);
226void sysfs_evict_inode(struct inode *inode);
227int sysfs_sd_setattr(struct sysfs_dirent *sd, struct iattr *iattr);
228int sysfs_permission(struct inode *inode, int mask);
229int sysfs_setattr(struct dentry *dentry, struct iattr *iattr);
230int sysfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
231 struct kstat *stat);
232int sysfs_setxattr(struct dentry *dentry, const char *name, const void *value,
233 size_t size, int flags);
234int sysfs_inode_init(void);
235 27
236/* 28/*
237 * file.c 29 * file.c
238 */ 30 */
239extern const struct file_operations sysfs_file_operations; 31int sysfs_add_file(struct kernfs_node *parent,
240extern const struct file_operations sysfs_bin_operations; 32 const struct attribute *attr, bool is_bin);
241 33int sysfs_add_file_mode_ns(struct kernfs_node *parent,
242int sysfs_add_file(struct sysfs_dirent *dir_sd, 34 const struct attribute *attr, bool is_bin,
243 const struct attribute *attr, int type);
244
245int sysfs_add_file_mode_ns(struct sysfs_dirent *dir_sd,
246 const struct attribute *attr, int type,
247 umode_t amode, const void *ns); 35 umode_t amode, const void *ns);
248void sysfs_unmap_bin_file(struct sysfs_dirent *sd);
249 36
250/* 37/*
251 * symlink.c 38 * symlink.c
252 */ 39 */
253extern const struct inode_operations sysfs_symlink_inode_operations; 40int sysfs_create_link_sd(struct kernfs_node *kn, struct kobject *target,
254int sysfs_create_link_sd(struct sysfs_dirent *sd, struct kobject *target,
255 const char *name); 41 const char *name);
42
43#endif /* __SYSFS_INTERNAL_H */
diff --git a/include/linux/component.h b/include/linux/component.h
new file mode 100644
index 000000000000..68870182ca1e
--- /dev/null
+++ b/include/linux/component.h
@@ -0,0 +1,32 @@
1#ifndef COMPONENT_H
2#define COMPONENT_H
3
4struct device;
5
6struct component_ops {
7 int (*bind)(struct device *, struct device *, void *);
8 void (*unbind)(struct device *, struct device *, void *);
9};
10
11int component_add(struct device *, const struct component_ops *);
12void component_del(struct device *, const struct component_ops *);
13
14int component_bind_all(struct device *, void *);
15void component_unbind_all(struct device *, void *);
16
17struct master;
18
19struct component_master_ops {
20 int (*add_components)(struct device *, struct master *);
21 int (*bind)(struct device *);
22 void (*unbind)(struct device *);
23};
24
25int component_master_add(struct device *, const struct component_master_ops *);
26void component_master_del(struct device *,
27 const struct component_master_ops *);
28
29int component_master_add_child(struct master *master,
30 int (*compare)(struct device *, void *), void *compare_data);
31
32#endif
diff --git a/include/linux/firmware.h b/include/linux/firmware.h
index e154c1005cd1..59529330efd6 100644
--- a/include/linux/firmware.h
+++ b/include/linux/firmware.h
@@ -68,4 +68,11 @@ static inline void release_firmware(const struct firmware *fw)
68 68
69#endif 69#endif
70 70
71#ifdef CONFIG_FW_LOADER_USER_HELPER
72int request_firmware_direct(const struct firmware **fw, const char *name,
73 struct device *device);
74#else
75#define request_firmware_direct request_firmware
76#endif
77
71#endif 78#endif
diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
new file mode 100644
index 000000000000..5be9f0228a3b
--- /dev/null
+++ b/include/linux/kernfs.h
@@ -0,0 +1,376 @@
1/*
2 * kernfs.h - pseudo filesystem decoupled from vfs locking
3 *
4 * This file is released under the GPLv2.
5 */
6
7#ifndef __LINUX_KERNFS_H
8#define __LINUX_KERNFS_H
9
10#include <linux/kernel.h>
11#include <linux/err.h>
12#include <linux/list.h>
13#include <linux/mutex.h>
14#include <linux/idr.h>
15#include <linux/lockdep.h>
16#include <linux/rbtree.h>
17#include <linux/atomic.h>
18#include <linux/completion.h>
19
20struct file;
21struct dentry;
22struct iattr;
23struct seq_file;
24struct vm_area_struct;
25struct super_block;
26struct file_system_type;
27
28struct kernfs_open_node;
29struct kernfs_iattrs;
30
31enum kernfs_node_type {
32 KERNFS_DIR = 0x0001,
33 KERNFS_FILE = 0x0002,
34 KERNFS_LINK = 0x0004,
35};
36
37#define KERNFS_TYPE_MASK 0x000f
38#define KERNFS_ACTIVE_REF KERNFS_FILE
39#define KERNFS_FLAG_MASK ~KERNFS_TYPE_MASK
40
41enum kernfs_node_flag {
42 KERNFS_REMOVED = 0x0010,
43 KERNFS_NS = 0x0020,
44 KERNFS_HAS_SEQ_SHOW = 0x0040,
45 KERNFS_HAS_MMAP = 0x0080,
46 KERNFS_LOCKDEP = 0x0100,
47 KERNFS_STATIC_NAME = 0x0200,
48};
49
50/* type-specific structures for kernfs_node union members */
51struct kernfs_elem_dir {
52 unsigned long subdirs;
53 /* children rbtree starts here and goes through kn->rb */
54 struct rb_root children;
55
56 /*
57 * The kernfs hierarchy this directory belongs to. This fits
58 * better directly in kernfs_node but is here to save space.
59 */
60 struct kernfs_root *root;
61};
62
63struct kernfs_elem_symlink {
64 struct kernfs_node *target_kn;
65};
66
67struct kernfs_elem_attr {
68 const struct kernfs_ops *ops;
69 struct kernfs_open_node *open;
70 loff_t size;
71};
72
73/*
74 * kernfs_node - the building block of kernfs hierarchy. Each and every
75 * kernfs node is represented by single kernfs_node. Most fields are
76 * private to kernfs and shouldn't be accessed directly by kernfs users.
77 *
78 * As long as s_count reference is held, the kernfs_node itself is
79 * accessible. Dereferencing elem or any other outer entity requires
80 * active reference.
81 */
82struct kernfs_node {
83 atomic_t count;
84 atomic_t active;
85#ifdef CONFIG_DEBUG_LOCK_ALLOC
86 struct lockdep_map dep_map;
87#endif
88 /* the following two fields are published */
89 struct kernfs_node *parent;
90 const char *name;
91
92 struct rb_node rb;
93
94 union {
95 struct completion *completion;
96 struct kernfs_node *removed_list;
97 } u;
98
99 const void *ns; /* namespace tag */
100 unsigned int hash; /* ns + name hash */
101 union {
102 struct kernfs_elem_dir dir;
103 struct kernfs_elem_symlink symlink;
104 struct kernfs_elem_attr attr;
105 };
106
107 void *priv;
108
109 unsigned short flags;
110 umode_t mode;
111 unsigned int ino;
112 struct kernfs_iattrs *iattr;
113};
114
115/*
116 * kernfs_dir_ops may be specified on kernfs_create_root() to support
117 * directory manipulation syscalls. These optional callbacks are invoked
118 * on the matching syscalls and can perform any kernfs operations which
119 * don't necessarily have to be the exact operation requested.
120 */
121struct kernfs_dir_ops {
122 int (*mkdir)(struct kernfs_node *parent, const char *name,
123 umode_t mode);
124 int (*rmdir)(struct kernfs_node *kn);
125 int (*rename)(struct kernfs_node *kn, struct kernfs_node *new_parent,
126 const char *new_name);
127};
128
129struct kernfs_root {
130 /* published fields */
131 struct kernfs_node *kn;
132
133 /* private fields, do not use outside kernfs proper */
134 struct ida ino_ida;
135 struct kernfs_dir_ops *dir_ops;
136};
137
138struct kernfs_open_file {
139 /* published fields */
140 struct kernfs_node *kn;
141 struct file *file;
142
143 /* private fields, do not use outside kernfs proper */
144 struct mutex mutex;
145 int event;
146 struct list_head list;
147
148 bool mmapped;
149 const struct vm_operations_struct *vm_ops;
150};
151
152struct kernfs_ops {
153 /*
154 * Read is handled by either seq_file or raw_read().
155 *
156 * If seq_show() is present, seq_file path is active. Other seq
157 * operations are optional and if not implemented, the behavior is
158 * equivalent to single_open(). @sf->private points to the
159 * associated kernfs_open_file.
160 *
161 * read() is bounced through kernel buffer and a read larger than
162 * PAGE_SIZE results in partial operation of PAGE_SIZE.
163 */
164 int (*seq_show)(struct seq_file *sf, void *v);
165
166 void *(*seq_start)(struct seq_file *sf, loff_t *ppos);
167 void *(*seq_next)(struct seq_file *sf, void *v, loff_t *ppos);
168 void (*seq_stop)(struct seq_file *sf, void *v);
169
170 ssize_t (*read)(struct kernfs_open_file *of, char *buf, size_t bytes,
171 loff_t off);
172
173 /*
174 * write() is bounced through kernel buffer and a write larger than
175 * PAGE_SIZE results in partial operation of PAGE_SIZE.
176 */
177 ssize_t (*write)(struct kernfs_open_file *of, char *buf, size_t bytes,
178 loff_t off);
179
180 int (*mmap)(struct kernfs_open_file *of, struct vm_area_struct *vma);
181
182#ifdef CONFIG_DEBUG_LOCK_ALLOC
183 struct lock_class_key lockdep_key;
184#endif
185};
186
187#ifdef CONFIG_SYSFS
188
189static inline enum kernfs_node_type kernfs_type(struct kernfs_node *kn)
190{
191 return kn->flags & KERNFS_TYPE_MASK;
192}
193
194/**
195 * kernfs_enable_ns - enable namespace under a directory
196 * @kn: directory of interest, should be empty
197 *
198 * This is to be called right after @kn is created to enable namespace
199 * under it. All children of @kn must have non-NULL namespace tags and
200 * only the ones which match the super_block's tag will be visible.
201 */
202static inline void kernfs_enable_ns(struct kernfs_node *kn)
203{
204 WARN_ON_ONCE(kernfs_type(kn) != KERNFS_DIR);
205 WARN_ON_ONCE(!RB_EMPTY_ROOT(&kn->dir.children));
206 kn->flags |= KERNFS_NS;
207}
208
209/**
210 * kernfs_ns_enabled - test whether namespace is enabled
211 * @kn: the node to test
212 *
213 * Test whether namespace filtering is enabled for the children of @ns.
214 */
215static inline bool kernfs_ns_enabled(struct kernfs_node *kn)
216{
217 return kn->flags & KERNFS_NS;
218}
219
220struct kernfs_node *kernfs_find_and_get_ns(struct kernfs_node *parent,
221 const char *name, const void *ns);
222void kernfs_get(struct kernfs_node *kn);
223void kernfs_put(struct kernfs_node *kn);
224
225struct kernfs_root *kernfs_create_root(struct kernfs_dir_ops *kdops,
226 void *priv);
227void kernfs_destroy_root(struct kernfs_root *root);
228
229struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent,
230 const char *name, umode_t mode,
231 void *priv, const void *ns);
232struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent,
233 const char *name,
234 umode_t mode, loff_t size,
235 const struct kernfs_ops *ops,
236 void *priv, const void *ns,
237 bool name_is_static,
238 struct lock_class_key *key);
239struct kernfs_node *kernfs_create_link(struct kernfs_node *parent,
240 const char *name,
241 struct kernfs_node *target);
242void kernfs_remove(struct kernfs_node *kn);
243int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name,
244 const void *ns);
245int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent,
246 const char *new_name, const void *new_ns);
247int kernfs_setattr(struct kernfs_node *kn, const struct iattr *iattr);
248void kernfs_notify(struct kernfs_node *kn);
249
250const void *kernfs_super_ns(struct super_block *sb);
251struct dentry *kernfs_mount_ns(struct file_system_type *fs_type, int flags,
252 struct kernfs_root *root, const void *ns);
253void kernfs_kill_sb(struct super_block *sb);
254
255void kernfs_init(void);
256
257#else /* CONFIG_SYSFS */
258
259static inline enum kernfs_node_type kernfs_type(struct kernfs_node *kn)
260{ return 0; } /* whatever */
261
262static inline void kernfs_enable_ns(struct kernfs_node *kn) { }
263
264static inline bool kernfs_ns_enabled(struct kernfs_node *kn)
265{ return false; }
266
267static inline struct kernfs_node *
268kernfs_find_and_get_ns(struct kernfs_node *parent, const char *name,
269 const void *ns)
270{ return NULL; }
271
272static inline void kernfs_get(struct kernfs_node *kn) { }
273static inline void kernfs_put(struct kernfs_node *kn) { }
274
275static inline struct kernfs_root *
276kernfs_create_root(struct kernfs_dir_ops *kdops, void *priv)
277{ return ERR_PTR(-ENOSYS); }
278
279static inline void kernfs_destroy_root(struct kernfs_root *root) { }
280
281static inline struct kernfs_node *
282kernfs_create_dir_ns(struct kernfs_node *parent, const char *name,
283 umode_t mode, void *priv, const void *ns)
284{ return ERR_PTR(-ENOSYS); }
285
286static inline struct kernfs_node *
287__kernfs_create_file(struct kernfs_node *parent, const char *name,
288 umode_t mode, loff_t size, const struct kernfs_ops *ops,
289 void *priv, const void *ns, bool name_is_static,
290 struct lock_class_key *key)
291{ return ERR_PTR(-ENOSYS); }
292
293static inline struct kernfs_node *
294kernfs_create_link(struct kernfs_node *parent, const char *name,
295 struct kernfs_node *target)
296{ return ERR_PTR(-ENOSYS); }
297
298static inline void kernfs_remove(struct kernfs_node *kn) { }
299
300static inline int kernfs_remove_by_name_ns(struct kernfs_node *kn,
301 const char *name, const void *ns)
302{ return -ENOSYS; }
303
304static inline int kernfs_rename_ns(struct kernfs_node *kn,
305 struct kernfs_node *new_parent,
306 const char *new_name, const void *new_ns)
307{ return -ENOSYS; }
308
309static inline int kernfs_setattr(struct kernfs_node *kn,
310 const struct iattr *iattr)
311{ return -ENOSYS; }
312
313static inline void kernfs_notify(struct kernfs_node *kn) { }
314
315static inline const void *kernfs_super_ns(struct super_block *sb)
316{ return NULL; }
317
318static inline struct dentry *
319kernfs_mount_ns(struct file_system_type *fs_type, int flags,
320 struct kernfs_root *root, const void *ns)
321{ return ERR_PTR(-ENOSYS); }
322
323static inline void kernfs_kill_sb(struct super_block *sb) { }
324
325static inline void kernfs_init(void) { }
326
327#endif /* CONFIG_SYSFS */
328
329static inline struct kernfs_node *
330kernfs_find_and_get(struct kernfs_node *kn, const char *name)
331{
332 return kernfs_find_and_get_ns(kn, name, NULL);
333}
334
335static inline struct kernfs_node *
336kernfs_create_dir(struct kernfs_node *parent, const char *name, umode_t mode,
337 void *priv)
338{
339 return kernfs_create_dir_ns(parent, name, mode, priv, NULL);
340}
341
342static inline struct kernfs_node *
343kernfs_create_file_ns(struct kernfs_node *parent, const char *name,
344 umode_t mode, loff_t size, const struct kernfs_ops *ops,
345 void *priv, const void *ns)
346{
347 struct lock_class_key *key = NULL;
348
349#ifdef CONFIG_DEBUG_LOCK_ALLOC
350 key = (struct lock_class_key *)&ops->lockdep_key;
351#endif
352 return __kernfs_create_file(parent, name, mode, size, ops, priv, ns,
353 false, key);
354}
355
356static inline struct kernfs_node *
357kernfs_create_file(struct kernfs_node *parent, const char *name, umode_t mode,
358 loff_t size, const struct kernfs_ops *ops, void *priv)
359{
360 return kernfs_create_file_ns(parent, name, mode, size, ops, priv, NULL);
361}
362
363static inline int kernfs_remove_by_name(struct kernfs_node *parent,
364 const char *name)
365{
366 return kernfs_remove_by_name_ns(parent, name, NULL);
367}
368
369static inline struct dentry *
370kernfs_mount(struct file_system_type *fs_type, int flags,
371 struct kernfs_root *root)
372{
373 return kernfs_mount_ns(fs_type, flags, root, NULL);
374}
375
376#endif /* __LINUX_KERNFS_H */
diff --git a/include/linux/kobj_completion.h b/include/linux/kobj_completion.h
deleted file mode 100644
index a428f6436063..000000000000
--- a/include/linux/kobj_completion.h
+++ /dev/null
@@ -1,18 +0,0 @@
1#ifndef _KOBJ_COMPLETION_H_
2#define _KOBJ_COMPLETION_H_
3
4#include <linux/kobject.h>
5#include <linux/completion.h>
6
7struct kobj_completion {
8 struct kobject kc_kobj;
9 struct completion kc_unregister;
10};
11
12#define kobj_to_kobj_completion(kobj) \
13 container_of(kobj, struct kobj_completion, kc_kobj)
14
15void kobj_completion_init(struct kobj_completion *kc, struct kobj_type *ktype);
16void kobj_completion_release(struct kobject *kobj);
17void kobj_completion_del_and_wait(struct kobj_completion *kc);
18#endif /* _KOBJ_COMPLETION_H_ */
diff --git a/include/linux/kobject.h b/include/linux/kobject.h
index e7ba650086ce..926afb6f6b5f 100644
--- a/include/linux/kobject.h
+++ b/include/linux/kobject.h
@@ -64,7 +64,7 @@ struct kobject {
64 struct kobject *parent; 64 struct kobject *parent;
65 struct kset *kset; 65 struct kset *kset;
66 struct kobj_type *ktype; 66 struct kobj_type *ktype;
67 struct sysfs_dirent *sd; 67 struct kernfs_node *sd;
68 struct kref kref; 68 struct kref kref;
69#ifdef CONFIG_DEBUG_KOBJECT_RELEASE 69#ifdef CONFIG_DEBUG_KOBJECT_RELEASE
70 struct delayed_work release; 70 struct delayed_work release;
diff --git a/include/linux/memory.h b/include/linux/memory.h
index 9a6bbf76452d..bb7384e3c3d8 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -35,6 +35,7 @@ struct memory_block {
35}; 35};
36 36
37int arch_get_memory_phys_device(unsigned long start_pfn); 37int arch_get_memory_phys_device(unsigned long start_pfn);
38unsigned long __weak memory_block_size_bytes(void);
38 39
39/* These states are exposed to userspace as text strings in sysfs */ 40/* These states are exposed to userspace as text strings in sysfs */
40#define MEM_ONLINE (1<<0) /* exposed to userspace */ 41#define MEM_ONLINE (1<<0) /* exposed to userspace */
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index 6695040a0317..30b2ebee6439 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -12,6 +12,7 @@
12#ifndef _SYSFS_H_ 12#ifndef _SYSFS_H_
13#define _SYSFS_H_ 13#define _SYSFS_H_
14 14
15#include <linux/kernfs.h>
15#include <linux/compiler.h> 16#include <linux/compiler.h>
16#include <linux/errno.h> 17#include <linux/errno.h>
17#include <linux/list.h> 18#include <linux/list.h>
@@ -175,8 +176,6 @@ struct sysfs_ops {
175 ssize_t (*store)(struct kobject *, struct attribute *, const char *, size_t); 176 ssize_t (*store)(struct kobject *, struct attribute *, const char *, size_t);
176}; 177};
177 178
178struct sysfs_dirent;
179
180#ifdef CONFIG_SYSFS 179#ifdef CONFIG_SYSFS
181 180
182int sysfs_schedule_callback(struct kobject *kobj, void (*func)(void *), 181int sysfs_schedule_callback(struct kobject *kobj, void (*func)(void *),
@@ -244,12 +243,6 @@ void sysfs_remove_link_from_group(struct kobject *kobj, const char *group_name,
244 const char *link_name); 243 const char *link_name);
245 244
246void sysfs_notify(struct kobject *kobj, const char *dir, const char *attr); 245void sysfs_notify(struct kobject *kobj, const char *dir, const char *attr);
247void sysfs_notify_dirent(struct sysfs_dirent *sd);
248struct sysfs_dirent *sysfs_get_dirent_ns(struct sysfs_dirent *parent_sd,
249 const unsigned char *name,
250 const void *ns);
251struct sysfs_dirent *sysfs_get(struct sysfs_dirent *sd);
252void sysfs_put(struct sysfs_dirent *sd);
253 246
254int __must_check sysfs_init(void); 247int __must_check sysfs_init(void);
255 248
@@ -419,22 +412,6 @@ static inline void sysfs_notify(struct kobject *kobj, const char *dir,
419 const char *attr) 412 const char *attr)
420{ 413{
421} 414}
422static inline void sysfs_notify_dirent(struct sysfs_dirent *sd)
423{
424}
425static inline struct sysfs_dirent *
426sysfs_get_dirent_ns(struct sysfs_dirent *parent_sd, const unsigned char *name,
427 const void *ns)
428{
429 return NULL;
430}
431static inline struct sysfs_dirent *sysfs_get(struct sysfs_dirent *sd)
432{
433 return NULL;
434}
435static inline void sysfs_put(struct sysfs_dirent *sd)
436{
437}
438 415
439static inline int __must_check sysfs_init(void) 416static inline int __must_check sysfs_init(void)
440{ 417{
@@ -461,10 +438,26 @@ static inline int sysfs_rename_link(struct kobject *kobj, struct kobject *target
461 return sysfs_rename_link_ns(kobj, target, old_name, new_name, NULL); 438 return sysfs_rename_link_ns(kobj, target, old_name, new_name, NULL);
462} 439}
463 440
464static inline struct sysfs_dirent * 441static inline void sysfs_notify_dirent(struct kernfs_node *kn)
465sysfs_get_dirent(struct sysfs_dirent *parent_sd, const unsigned char *name) 442{
443 kernfs_notify(kn);
444}
445
446static inline struct kernfs_node *sysfs_get_dirent(struct kernfs_node *parent,
447 const unsigned char *name)
448{
449 return kernfs_find_and_get(parent, name);
450}
451
452static inline struct kernfs_node *sysfs_get(struct kernfs_node *kn)
453{
454 kernfs_get(kn);
455 return kn;
456}
457
458static inline void sysfs_put(struct kernfs_node *kn)
466{ 459{
467 return sysfs_get_dirent_ns(parent_sd, name, NULL); 460 kernfs_put(kn);
468} 461}
469 462
470#endif /* _SYSFS_H_ */ 463#endif /* _SYSFS_H_ */
diff --git a/lib/kobject.c b/lib/kobject.c
index 5b4b8886435e..b0b26665c611 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -13,11 +13,11 @@
13 */ 13 */
14 14
15#include <linux/kobject.h> 15#include <linux/kobject.h>
16#include <linux/kobj_completion.h>
17#include <linux/string.h> 16#include <linux/string.h>
18#include <linux/export.h> 17#include <linux/export.h>
19#include <linux/stat.h> 18#include <linux/stat.h>
20#include <linux/slab.h> 19#include <linux/slab.h>
20#include <linux/random.h>
21 21
22/** 22/**
23 * kobject_namespace - return @kobj's namespace tag 23 * kobject_namespace - return @kobj's namespace tag
@@ -65,13 +65,17 @@ static int populate_dir(struct kobject *kobj)
65 65
66static int create_dir(struct kobject *kobj) 66static int create_dir(struct kobject *kobj)
67{ 67{
68 const struct kobj_ns_type_operations *ops;
68 int error; 69 int error;
69 70
70 error = sysfs_create_dir_ns(kobj, kobject_namespace(kobj)); 71 error = sysfs_create_dir_ns(kobj, kobject_namespace(kobj));
71 if (!error) { 72 if (error)
72 error = populate_dir(kobj); 73 return error;
73 if (error) 74
74 sysfs_remove_dir(kobj); 75 error = populate_dir(kobj);
76 if (error) {
77 sysfs_remove_dir(kobj);
78 return error;
75 } 79 }
76 80
77 /* 81 /*
@@ -80,7 +84,20 @@ static int create_dir(struct kobject *kobj)
80 */ 84 */
81 sysfs_get(kobj->sd); 85 sysfs_get(kobj->sd);
82 86
83 return error; 87 /*
88 * If @kobj has ns_ops, its children need to be filtered based on
89 * their namespace tags. Enable namespace support on @kobj->sd.
90 */
91 ops = kobj_child_ns_ops(kobj);
92 if (ops) {
93 BUG_ON(ops->type <= KOBJ_NS_TYPE_NONE);
94 BUG_ON(ops->type >= KOBJ_NS_TYPES);
95 BUG_ON(!kobj_ns_type_registered(ops->type));
96
97 kernfs_enable_ns(kobj->sd);
98 }
99
100 return 0;
84} 101}
85 102
86static int get_kobj_path_length(struct kobject *kobj) 103static int get_kobj_path_length(struct kobject *kobj)
@@ -247,8 +264,10 @@ int kobject_set_name_vargs(struct kobject *kobj, const char *fmt,
247 return 0; 264 return 0;
248 265
249 kobj->name = kvasprintf(GFP_KERNEL, fmt, vargs); 266 kobj->name = kvasprintf(GFP_KERNEL, fmt, vargs);
250 if (!kobj->name) 267 if (!kobj->name) {
268 kobj->name = old_name;
251 return -ENOMEM; 269 return -ENOMEM;
270 }
252 271
253 /* ewww... some of these buggers have '/' in the name ... */ 272 /* ewww... some of these buggers have '/' in the name ... */
254 while ((s = strchr(kobj->name, '/'))) 273 while ((s = strchr(kobj->name, '/')))
@@ -346,7 +365,7 @@ static int kobject_add_varg(struct kobject *kobj, struct kobject *parent,
346 * 365 *
347 * If @parent is set, then the parent of the @kobj will be set to it. 366 * If @parent is set, then the parent of the @kobj will be set to it.
348 * If @parent is NULL, then the parent of the @kobj will be set to the 367 * If @parent is NULL, then the parent of the @kobj will be set to the
349 * kobject associted with the kset assigned to this kobject. If no kset 368 * kobject associated with the kset assigned to this kobject. If no kset
350 * is assigned to the kobject, then the kobject will be located in the 369 * is assigned to the kobject, then the kobject will be located in the
351 * root of the sysfs tree. 370 * root of the sysfs tree.
352 * 371 *
@@ -536,7 +555,7 @@ out:
536 */ 555 */
537void kobject_del(struct kobject *kobj) 556void kobject_del(struct kobject *kobj)
538{ 557{
539 struct sysfs_dirent *sd; 558 struct kernfs_node *sd;
540 559
541 if (!kobj) 560 if (!kobj)
542 return; 561 return;
@@ -625,10 +644,12 @@ static void kobject_release(struct kref *kref)
625{ 644{
626 struct kobject *kobj = container_of(kref, struct kobject, kref); 645 struct kobject *kobj = container_of(kref, struct kobject, kref);
627#ifdef CONFIG_DEBUG_KOBJECT_RELEASE 646#ifdef CONFIG_DEBUG_KOBJECT_RELEASE
628 pr_info("kobject: '%s' (%p): %s, parent %p (delayed)\n", 647 unsigned long delay = HZ + HZ * (get_random_int() & 0x3);
629 kobject_name(kobj), kobj, __func__, kobj->parent); 648 pr_info("kobject: '%s' (%p): %s, parent %p (delayed %ld)\n",
649 kobject_name(kobj), kobj, __func__, kobj->parent, delay);
630 INIT_DELAYED_WORK(&kobj->release, kobject_delayed_cleanup); 650 INIT_DELAYED_WORK(&kobj->release, kobject_delayed_cleanup);
631 schedule_delayed_work(&kobj->release, HZ); 651
652 schedule_delayed_work(&kobj->release, delay);
632#else 653#else
633 kobject_cleanup(kobj); 654 kobject_cleanup(kobj);
634#endif 655#endif
@@ -760,55 +781,6 @@ const struct sysfs_ops kobj_sysfs_ops = {
760}; 781};
761 782
762/** 783/**
763 * kobj_completion_init - initialize a kobj_completion object.
764 * @kc: kobj_completion
765 * @ktype: type of kobject to initialize
766 *
767 * kobj_completion structures can be embedded within structures with different
768 * lifetime rules. During the release of the enclosing object, we can
769 * wait on the release of the kobject so that we don't free it while it's
770 * still busy.
771 */
772void kobj_completion_init(struct kobj_completion *kc, struct kobj_type *ktype)
773{
774 init_completion(&kc->kc_unregister);
775 kobject_init(&kc->kc_kobj, ktype);
776}
777EXPORT_SYMBOL_GPL(kobj_completion_init);
778
779/**
780 * kobj_completion_release - release a kobj_completion object
781 * @kobj: kobject embedded in kobj_completion
782 *
783 * Used with kobject_release to notify waiters that the kobject has been
784 * released.
785 */
786void kobj_completion_release(struct kobject *kobj)
787{
788 struct kobj_completion *kc = kobj_to_kobj_completion(kobj);
789 complete(&kc->kc_unregister);
790}
791EXPORT_SYMBOL_GPL(kobj_completion_release);
792
793/**
794 * kobj_completion_del_and_wait - release the kobject and wait for it
795 * @kc: kobj_completion object to release
796 *
797 * Delete the kobject from sysfs and drop the reference count. Then wait
798 * until any other outstanding references are also dropped. This routine
799 * is only necessary once other references may have been taken on the
800 * kobject. Typically this happens when the kobject has been published
801 * to sysfs via kobject_add.
802 */
803void kobj_completion_del_and_wait(struct kobj_completion *kc)
804{
805 kobject_del(&kc->kc_kobj);
806 kobject_put(&kc->kc_kobj);
807 wait_for_completion(&kc->kc_unregister);
808}
809EXPORT_SYMBOL_GPL(kobj_completion_del_and_wait);
810
811/**
812 * kset_register - initialize and add a kset. 784 * kset_register - initialize and add a kset.
813 * @k: kset. 785 * @k: kset.
814 */ 786 */
@@ -835,6 +807,7 @@ void kset_unregister(struct kset *k)
835{ 807{
836 if (!k) 808 if (!k)
837 return; 809 return;
810 kobject_del(&k->kobj);
838 kobject_put(&k->kobj); 811 kobject_put(&k->kobj);
839} 812}
840 813
diff --git a/samples/kobject/kset-example.c b/samples/kobject/kset-example.c
index d0c687fd9802..5dce351f131f 100644
--- a/samples/kobject/kset-example.c
+++ b/samples/kobject/kset-example.c
@@ -262,6 +262,7 @@ baz_error:
262bar_error: 262bar_error:
263 destroy_foo_obj(foo_obj); 263 destroy_foo_obj(foo_obj);
264foo_error: 264foo_error:
265 kset_unregister(example_kset);
265 return -EINVAL; 266 return -EINVAL;
266} 267}
267 268