aboutsummaryrefslogtreecommitdiffstats
path: root/fs/sysfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/sysfs')
-rw-r--r--fs/sysfs/Makefile2
-rw-r--r--fs/sysfs/dir.c1075
-rw-r--r--fs/sysfs/file.c961
-rw-r--r--fs/sysfs/group.c102
-rw-r--r--fs/sysfs/inode.c331
-rw-r--r--fs/sysfs/mount.c185
-rw-r--r--fs/sysfs/symlink.c219
-rw-r--r--fs/sysfs/sysfs.h236
8 files changed, 352 insertions, 2759 deletions
diff --git a/fs/sysfs/Makefile b/fs/sysfs/Makefile
index 8876ac183373..6eff6e1205a5 100644
--- a/fs/sysfs/Makefile
+++ b/fs/sysfs/Makefile
@@ -2,4 +2,4 @@
2# Makefile for the sysfs virtual filesystem 2# Makefile for the sysfs virtual filesystem
3# 3#
4 4
5obj-y := inode.o file.o dir.o symlink.o mount.o group.o 5obj-y := file.o dir.o symlink.o mount.o group.o
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 5e73d6626e50..ee0d761c3179 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -13,465 +13,31 @@
13#undef DEBUG 13#undef DEBUG
14 14
15#include <linux/fs.h> 15#include <linux/fs.h>
16#include <linux/mount.h>
17#include <linux/module.h>
18#include <linux/kobject.h> 16#include <linux/kobject.h>
19#include <linux/namei.h>
20#include <linux/idr.h>
21#include <linux/completion.h>
22#include <linux/mutex.h>
23#include <linux/slab.h> 17#include <linux/slab.h>
24#include <linux/security.h>
25#include <linux/hash.h>
26#include "sysfs.h" 18#include "sysfs.h"
27 19
28DEFINE_MUTEX(sysfs_mutex);
29DEFINE_SPINLOCK(sysfs_symlink_target_lock); 20DEFINE_SPINLOCK(sysfs_symlink_target_lock);
30 21
31#define to_sysfs_dirent(X) rb_entry((X), struct sysfs_dirent, s_rb)
32
33static DEFINE_SPINLOCK(sysfs_ino_lock);
34static DEFINE_IDA(sysfs_ino_ida);
35
36/**
37 * sysfs_name_hash
38 * @name: Null terminated string to hash
39 * @ns: Namespace tag to hash
40 *
41 * Returns 31 bit hash of ns + name (so it fits in an off_t )
42 */
43static unsigned int sysfs_name_hash(const char *name, const void *ns)
44{
45 unsigned long hash = init_name_hash();
46 unsigned int len = strlen(name);
47 while (len--)
48 hash = partial_name_hash(*name++, hash);
49 hash = (end_name_hash(hash) ^ hash_ptr((void *)ns, 31));
50 hash &= 0x7fffffffU;
51 /* Reserve hash numbers 0, 1 and INT_MAX for magic directory entries */
52 if (hash < 1)
53 hash += 2;
54 if (hash >= INT_MAX)
55 hash = INT_MAX - 1;
56 return hash;
57}
58
59static int sysfs_name_compare(unsigned int hash, const char *name,
60 const void *ns, const struct sysfs_dirent *sd)
61{
62 if (hash != sd->s_hash)
63 return hash - sd->s_hash;
64 if (ns != sd->s_ns)
65 return ns - sd->s_ns;
66 return strcmp(name, sd->s_name);
67}
68
69static int sysfs_sd_compare(const struct sysfs_dirent *left,
70 const struct sysfs_dirent *right)
71{
72 return sysfs_name_compare(left->s_hash, left->s_name, left->s_ns,
73 right);
74}
75
76/**
77 * sysfs_link_sibling - link sysfs_dirent into sibling rbtree
78 * @sd: sysfs_dirent of interest
79 *
80 * Link @sd into its sibling rbtree which starts from
81 * sd->s_parent->s_dir.children.
82 *
83 * Locking:
84 * mutex_lock(sysfs_mutex)
85 *
86 * RETURNS:
87 * 0 on susccess -EEXIST on failure.
88 */
89static int sysfs_link_sibling(struct sysfs_dirent *sd)
90{
91 struct rb_node **node = &sd->s_parent->s_dir.children.rb_node;
92 struct rb_node *parent = NULL;
93
94 if (sysfs_type(sd) == SYSFS_DIR)
95 sd->s_parent->s_dir.subdirs++;
96
97 while (*node) {
98 struct sysfs_dirent *pos;
99 int result;
100
101 pos = to_sysfs_dirent(*node);
102 parent = *node;
103 result = sysfs_sd_compare(sd, pos);
104 if (result < 0)
105 node = &pos->s_rb.rb_left;
106 else if (result > 0)
107 node = &pos->s_rb.rb_right;
108 else
109 return -EEXIST;
110 }
111 /* add new node and rebalance the tree */
112 rb_link_node(&sd->s_rb, parent, node);
113 rb_insert_color(&sd->s_rb, &sd->s_parent->s_dir.children);
114 return 0;
115}
116
117/**
118 * sysfs_unlink_sibling - unlink sysfs_dirent from sibling rbtree
119 * @sd: sysfs_dirent of interest
120 *
121 * Unlink @sd from its sibling rbtree which starts from
122 * sd->s_parent->s_dir.children.
123 *
124 * Locking:
125 * mutex_lock(sysfs_mutex)
126 */
127static void sysfs_unlink_sibling(struct sysfs_dirent *sd)
128{
129 if (sysfs_type(sd) == SYSFS_DIR)
130 sd->s_parent->s_dir.subdirs--;
131
132 rb_erase(&sd->s_rb, &sd->s_parent->s_dir.children);
133}
134
135/**
136 * sysfs_get_active - get an active reference to sysfs_dirent
137 * @sd: sysfs_dirent to get an active reference to
138 *
139 * Get an active reference of @sd. This function is noop if @sd
140 * is NULL.
141 *
142 * RETURNS:
143 * Pointer to @sd on success, NULL on failure.
144 */
145struct sysfs_dirent *sysfs_get_active(struct sysfs_dirent *sd)
146{
147 if (unlikely(!sd))
148 return NULL;
149
150 if (!atomic_inc_unless_negative(&sd->s_active))
151 return NULL;
152
153 if (likely(!sysfs_ignore_lockdep(sd)))
154 rwsem_acquire_read(&sd->dep_map, 0, 1, _RET_IP_);
155 return sd;
156}
157
158/**
159 * sysfs_put_active - put an active reference to sysfs_dirent
160 * @sd: sysfs_dirent to put an active reference to
161 *
162 * Put an active reference to @sd. This function is noop if @sd
163 * is NULL.
164 */
165void sysfs_put_active(struct sysfs_dirent *sd)
166{
167 int v;
168
169 if (unlikely(!sd))
170 return;
171
172 if (likely(!sysfs_ignore_lockdep(sd)))
173 rwsem_release(&sd->dep_map, 1, _RET_IP_);
174 v = atomic_dec_return(&sd->s_active);
175 if (likely(v != SD_DEACTIVATED_BIAS))
176 return;
177
178 /* atomic_dec_return() is a mb(), we'll always see the updated
179 * sd->u.completion.
180 */
181 complete(sd->u.completion);
182}
183
184/**
185 * sysfs_deactivate - deactivate sysfs_dirent
186 * @sd: sysfs_dirent to deactivate
187 *
188 * Deny new active references and drain existing ones.
189 */
190static void sysfs_deactivate(struct sysfs_dirent *sd)
191{
192 DECLARE_COMPLETION_ONSTACK(wait);
193 int v;
194
195 BUG_ON(!(sd->s_flags & SYSFS_FLAG_REMOVED));
196
197 if (!(sysfs_type(sd) & SYSFS_ACTIVE_REF))
198 return;
199
200 sd->u.completion = (void *)&wait;
201
202 rwsem_acquire(&sd->dep_map, 0, 0, _RET_IP_);
203 /* atomic_add_return() is a mb(), put_active() will always see
204 * the updated sd->u.completion.
205 */
206 v = atomic_add_return(SD_DEACTIVATED_BIAS, &sd->s_active);
207
208 if (v != SD_DEACTIVATED_BIAS) {
209 lock_contended(&sd->dep_map, _RET_IP_);
210 wait_for_completion(&wait);
211 }
212
213 lock_acquired(&sd->dep_map, _RET_IP_);
214 rwsem_release(&sd->dep_map, 1, _RET_IP_);
215}
216
217static int sysfs_alloc_ino(unsigned int *pino)
218{
219 int ino, rc;
220
221 retry:
222 spin_lock(&sysfs_ino_lock);
223 rc = ida_get_new_above(&sysfs_ino_ida, 2, &ino);
224 spin_unlock(&sysfs_ino_lock);
225
226 if (rc == -EAGAIN) {
227 if (ida_pre_get(&sysfs_ino_ida, GFP_KERNEL))
228 goto retry;
229 rc = -ENOMEM;
230 }
231
232 *pino = ino;
233 return rc;
234}
235
236static void sysfs_free_ino(unsigned int ino)
237{
238 spin_lock(&sysfs_ino_lock);
239 ida_remove(&sysfs_ino_ida, ino);
240 spin_unlock(&sysfs_ino_lock);
241}
242
243void release_sysfs_dirent(struct sysfs_dirent *sd)
244{
245 struct sysfs_dirent *parent_sd;
246
247 repeat:
248 /* Moving/renaming is always done while holding reference.
249 * sd->s_parent won't change beneath us.
250 */
251 parent_sd = sd->s_parent;
252
253 WARN(!(sd->s_flags & SYSFS_FLAG_REMOVED),
254 "sysfs: free using entry: %s/%s\n",
255 parent_sd ? parent_sd->s_name : "", sd->s_name);
256
257 if (sysfs_type(sd) == SYSFS_KOBJ_LINK)
258 sysfs_put(sd->s_symlink.target_sd);
259 if (sysfs_type(sd) & SYSFS_COPY_NAME)
260 kfree(sd->s_name);
261 if (sd->s_iattr && sd->s_iattr->ia_secdata)
262 security_release_secctx(sd->s_iattr->ia_secdata,
263 sd->s_iattr->ia_secdata_len);
264 kfree(sd->s_iattr);
265 sysfs_free_ino(sd->s_ino);
266 kmem_cache_free(sysfs_dir_cachep, sd);
267
268 sd = parent_sd;
269 if (sd && atomic_dec_and_test(&sd->s_count))
270 goto repeat;
271}
272
273static int sysfs_dentry_delete(const struct dentry *dentry)
274{
275 struct sysfs_dirent *sd = dentry->d_fsdata;
276 return !(sd && !(sd->s_flags & SYSFS_FLAG_REMOVED));
277}
278
279static int sysfs_dentry_revalidate(struct dentry *dentry, unsigned int flags)
280{
281 struct sysfs_dirent *sd;
282 int type;
283
284 if (flags & LOOKUP_RCU)
285 return -ECHILD;
286
287 sd = dentry->d_fsdata;
288 mutex_lock(&sysfs_mutex);
289
290 /* The sysfs dirent has been deleted */
291 if (sd->s_flags & SYSFS_FLAG_REMOVED)
292 goto out_bad;
293
294 /* The sysfs dirent has been moved? */
295 if (dentry->d_parent->d_fsdata != sd->s_parent)
296 goto out_bad;
297
298 /* The sysfs dirent has been renamed */
299 if (strcmp(dentry->d_name.name, sd->s_name) != 0)
300 goto out_bad;
301
302 /* The sysfs dirent has been moved to a different namespace */
303 type = KOBJ_NS_TYPE_NONE;
304 if (sd->s_parent) {
305 type = sysfs_ns_type(sd->s_parent);
306 if (type != KOBJ_NS_TYPE_NONE &&
307 sysfs_info(dentry->d_sb)->ns[type] != sd->s_ns)
308 goto out_bad;
309 }
310
311 mutex_unlock(&sysfs_mutex);
312out_valid:
313 return 1;
314out_bad:
315 /* Remove the dentry from the dcache hashes.
316 * If this is a deleted dentry we use d_drop instead of d_delete
317 * so sysfs doesn't need to cope with negative dentries.
318 *
319 * If this is a dentry that has simply been renamed we
320 * use d_drop to remove it from the dcache lookup on its
321 * old parent. If this dentry persists later when a lookup
322 * is performed at its new name the dentry will be readded
323 * to the dcache hashes.
324 */
325 mutex_unlock(&sysfs_mutex);
326
327 /* If we have submounts we must allow the vfs caches
328 * to lie about the state of the filesystem to prevent
329 * leaks and other nasty things.
330 */
331 if (check_submounts_and_drop(dentry) != 0)
332 goto out_valid;
333
334 return 0;
335}
336
337static void sysfs_dentry_release(struct dentry *dentry)
338{
339 sysfs_put(dentry->d_fsdata);
340}
341
342const struct dentry_operations sysfs_dentry_ops = {
343 .d_revalidate = sysfs_dentry_revalidate,
344 .d_delete = sysfs_dentry_delete,
345 .d_release = sysfs_dentry_release,
346};
347
348struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type)
349{
350 char *dup_name = NULL;
351 struct sysfs_dirent *sd;
352
353 if (type & SYSFS_COPY_NAME) {
354 name = dup_name = kstrdup(name, GFP_KERNEL);
355 if (!name)
356 return NULL;
357 }
358
359 sd = kmem_cache_zalloc(sysfs_dir_cachep, GFP_KERNEL);
360 if (!sd)
361 goto err_out1;
362
363 if (sysfs_alloc_ino(&sd->s_ino))
364 goto err_out2;
365
366 atomic_set(&sd->s_count, 1);
367 atomic_set(&sd->s_active, 0);
368
369 sd->s_name = name;
370 sd->s_mode = mode;
371 sd->s_flags = type | SYSFS_FLAG_REMOVED;
372
373 return sd;
374
375 err_out2:
376 kmem_cache_free(sysfs_dir_cachep, sd);
377 err_out1:
378 kfree(dup_name);
379 return NULL;
380}
381
382/**
383 * sysfs_addrm_start - prepare for sysfs_dirent add/remove
384 * @acxt: pointer to sysfs_addrm_cxt to be used
385 *
386 * This function is called when the caller is about to add or remove
387 * sysfs_dirent. This function acquires sysfs_mutex. @acxt is used
388 * to keep and pass context to other addrm functions.
389 *
390 * LOCKING:
391 * Kernel thread context (may sleep). sysfs_mutex is locked on
392 * return.
393 */
394void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt)
395 __acquires(sysfs_mutex)
396{
397 memset(acxt, 0, sizeof(*acxt));
398
399 mutex_lock(&sysfs_mutex);
400}
401
402/**
403 * __sysfs_add_one - add sysfs_dirent to parent without warning
404 * @acxt: addrm context to use
405 * @sd: sysfs_dirent to be added
406 * @parent_sd: the parent sysfs_dirent to add @sd to
407 *
408 * Get @parent_sd and set @sd->s_parent to it and increment nlink of
409 * the parent inode if @sd is a directory and link into the children
410 * list of the parent.
411 *
412 * This function should be called between calls to
413 * sysfs_addrm_start() and sysfs_addrm_finish() and should be
414 * passed the same @acxt as passed to sysfs_addrm_start().
415 *
416 * LOCKING:
417 * Determined by sysfs_addrm_start().
418 *
419 * RETURNS:
420 * 0 on success, -EEXIST if entry with the given name already
421 * exists.
422 */
423int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd,
424 struct sysfs_dirent *parent_sd)
425{
426 struct sysfs_inode_attrs *ps_iattr;
427 int ret;
428
429 if (!!sysfs_ns_type(parent_sd) != !!sd->s_ns) {
430 WARN(1, KERN_WARNING "sysfs: ns %s in '%s' for '%s'\n",
431 sysfs_ns_type(parent_sd) ? "required" : "invalid",
432 parent_sd->s_name, sd->s_name);
433 return -EINVAL;
434 }
435
436 sd->s_hash = sysfs_name_hash(sd->s_name, sd->s_ns);
437 sd->s_parent = sysfs_get(parent_sd);
438
439 ret = sysfs_link_sibling(sd);
440 if (ret)
441 return ret;
442
443 /* Update timestamps on the parent */
444 ps_iattr = parent_sd->s_iattr;
445 if (ps_iattr) {
446 struct iattr *ps_iattrs = &ps_iattr->ia_iattr;
447 ps_iattrs->ia_ctime = ps_iattrs->ia_mtime = CURRENT_TIME;
448 }
449
450 /* Mark the entry added into directory tree */
451 sd->s_flags &= ~SYSFS_FLAG_REMOVED;
452
453 return 0;
454}
455
456/** 22/**
457 * sysfs_pathname - return full path to sysfs dirent 23 * sysfs_pathname - return full path to sysfs dirent
458 * @sd: sysfs_dirent whose path we want 24 * @kn: kernfs_node whose path we want
459 * @path: caller allocated buffer of size PATH_MAX 25 * @path: caller allocated buffer of size PATH_MAX
460 * 26 *
461 * Gives the name "/" to the sysfs_root entry; any path returned 27 * Gives the name "/" to the sysfs_root entry; any path returned
462 * is relative to wherever sysfs is mounted. 28 * is relative to wherever sysfs is mounted.
463 */ 29 */
464static char *sysfs_pathname(struct sysfs_dirent *sd, char *path) 30static char *sysfs_pathname(struct kernfs_node *kn, char *path)
465{ 31{
466 if (sd->s_parent) { 32 if (kn->parent) {
467 sysfs_pathname(sd->s_parent, path); 33 sysfs_pathname(kn->parent, path);
468 strlcat(path, "/", PATH_MAX); 34 strlcat(path, "/", PATH_MAX);
469 } 35 }
470 strlcat(path, sd->s_name, PATH_MAX); 36 strlcat(path, kn->name, PATH_MAX);
471 return path; 37 return path;
472} 38}
473 39
474void sysfs_warn_dup(struct sysfs_dirent *parent, const char *name) 40void sysfs_warn_dup(struct kernfs_node *parent, const char *name)
475{ 41{
476 char *path; 42 char *path;
477 43
@@ -489,445 +55,34 @@ void sysfs_warn_dup(struct sysfs_dirent *parent, const char *name)
489} 55}
490 56
491/** 57/**
492 * sysfs_add_one - add sysfs_dirent to parent
493 * @acxt: addrm context to use
494 * @sd: sysfs_dirent to be added
495 * @parent_sd: the parent sysfs_dirent to add @sd to
496 *
497 * Get @parent_sd and set @sd->s_parent to it and increment nlink of
498 * the parent inode if @sd is a directory and link into the children
499 * list of the parent.
500 *
501 * This function should be called between calls to
502 * sysfs_addrm_start() and sysfs_addrm_finish() and should be
503 * passed the same @acxt as passed to sysfs_addrm_start().
504 *
505 * LOCKING:
506 * Determined by sysfs_addrm_start().
507 *
508 * RETURNS:
509 * 0 on success, -EEXIST if entry with the given name already
510 * exists.
511 */
512int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd,
513 struct sysfs_dirent *parent_sd)
514{
515 int ret;
516
517 ret = __sysfs_add_one(acxt, sd, parent_sd);
518
519 if (ret == -EEXIST)
520 sysfs_warn_dup(parent_sd, sd->s_name);
521 return ret;
522}
523
524/**
525 * sysfs_remove_one - remove sysfs_dirent from parent
526 * @acxt: addrm context to use
527 * @sd: sysfs_dirent to be removed
528 *
529 * Mark @sd removed and drop nlink of parent inode if @sd is a
530 * directory. @sd is unlinked from the children list.
531 *
532 * This function should be called between calls to
533 * sysfs_addrm_start() and sysfs_addrm_finish() and should be
534 * passed the same @acxt as passed to sysfs_addrm_start().
535 *
536 * LOCKING:
537 * Determined by sysfs_addrm_start().
538 */
539static void sysfs_remove_one(struct sysfs_addrm_cxt *acxt,
540 struct sysfs_dirent *sd)
541{
542 struct sysfs_inode_attrs *ps_iattr;
543
544 /*
545 * Removal can be called multiple times on the same node. Only the
546 * first invocation is effective and puts the base ref.
547 */
548 if (sd->s_flags & SYSFS_FLAG_REMOVED)
549 return;
550
551 sysfs_unlink_sibling(sd);
552
553 /* Update timestamps on the parent */
554 ps_iattr = sd->s_parent->s_iattr;
555 if (ps_iattr) {
556 struct iattr *ps_iattrs = &ps_iattr->ia_iattr;
557 ps_iattrs->ia_ctime = ps_iattrs->ia_mtime = CURRENT_TIME;
558 }
559
560 sd->s_flags |= SYSFS_FLAG_REMOVED;
561 sd->u.removed_list = acxt->removed;
562 acxt->removed = sd;
563}
564
565/**
566 * sysfs_addrm_finish - finish up sysfs_dirent add/remove
567 * @acxt: addrm context to finish up
568 *
569 * Finish up sysfs_dirent add/remove. Resources acquired by
570 * sysfs_addrm_start() are released and removed sysfs_dirents are
571 * cleaned up.
572 *
573 * LOCKING:
574 * sysfs_mutex is released.
575 */
576void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt)
577 __releases(sysfs_mutex)
578{
579 /* release resources acquired by sysfs_addrm_start() */
580 mutex_unlock(&sysfs_mutex);
581
582 /* kill removed sysfs_dirents */
583 while (acxt->removed) {
584 struct sysfs_dirent *sd = acxt->removed;
585
586 acxt->removed = sd->u.removed_list;
587
588 sysfs_deactivate(sd);
589 sysfs_unmap_bin_file(sd);
590 sysfs_put(sd);
591 }
592}
593
594/**
595 * sysfs_find_dirent - find sysfs_dirent with the given name
596 * @parent_sd: sysfs_dirent to search under
597 * @name: name to look for
598 * @ns: the namespace tag to use
599 *
600 * Look for sysfs_dirent with name @name under @parent_sd.
601 *
602 * LOCKING:
603 * mutex_lock(sysfs_mutex)
604 *
605 * RETURNS:
606 * Pointer to sysfs_dirent if found, NULL if not.
607 */
608struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
609 const unsigned char *name,
610 const void *ns)
611{
612 struct rb_node *node = parent_sd->s_dir.children.rb_node;
613 unsigned int hash;
614
615 if (!!sysfs_ns_type(parent_sd) != !!ns) {
616 WARN(1, KERN_WARNING "sysfs: ns %s in '%s' for '%s'\n",
617 sysfs_ns_type(parent_sd) ? "required" : "invalid",
618 parent_sd->s_name, name);
619 return NULL;
620 }
621
622 hash = sysfs_name_hash(name, ns);
623 while (node) {
624 struct sysfs_dirent *sd;
625 int result;
626
627 sd = to_sysfs_dirent(node);
628 result = sysfs_name_compare(hash, name, ns, sd);
629 if (result < 0)
630 node = node->rb_left;
631 else if (result > 0)
632 node = node->rb_right;
633 else
634 return sd;
635 }
636 return NULL;
637}
638
639/**
640 * sysfs_get_dirent_ns - find and get sysfs_dirent with the given name
641 * @parent_sd: sysfs_dirent to search under
642 * @name: name to look for
643 * @ns: the namespace tag to use
644 *
645 * Look for sysfs_dirent with name @name under @parent_sd and get
646 * it if found.
647 *
648 * LOCKING:
649 * Kernel thread context (may sleep). Grabs sysfs_mutex.
650 *
651 * RETURNS:
652 * Pointer to sysfs_dirent if found, NULL if not.
653 */
654struct sysfs_dirent *sysfs_get_dirent_ns(struct sysfs_dirent *parent_sd,
655 const unsigned char *name,
656 const void *ns)
657{
658 struct sysfs_dirent *sd;
659
660 mutex_lock(&sysfs_mutex);
661 sd = sysfs_find_dirent(parent_sd, name, ns);
662 sysfs_get(sd);
663 mutex_unlock(&sysfs_mutex);
664
665 return sd;
666}
667EXPORT_SYMBOL_GPL(sysfs_get_dirent_ns);
668
669static int create_dir(struct kobject *kobj, struct sysfs_dirent *parent_sd,
670 enum kobj_ns_type type,
671 const char *name, const void *ns,
672 struct sysfs_dirent **p_sd)
673{
674 umode_t mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO;
675 struct sysfs_addrm_cxt acxt;
676 struct sysfs_dirent *sd;
677 int rc;
678
679 /* allocate */
680 sd = sysfs_new_dirent(name, mode, SYSFS_DIR);
681 if (!sd)
682 return -ENOMEM;
683
684 sd->s_flags |= (type << SYSFS_NS_TYPE_SHIFT);
685 sd->s_ns = ns;
686 sd->s_dir.kobj = kobj;
687
688 /* link in */
689 sysfs_addrm_start(&acxt);
690 rc = sysfs_add_one(&acxt, sd, parent_sd);
691 sysfs_addrm_finish(&acxt);
692
693 if (rc == 0)
694 *p_sd = sd;
695 else
696 sysfs_put(sd);
697
698 return rc;
699}
700
701int sysfs_create_subdir(struct kobject *kobj, const char *name,
702 struct sysfs_dirent **p_sd)
703{
704 return create_dir(kobj, kobj->sd,
705 KOBJ_NS_TYPE_NONE, name, NULL, p_sd);
706}
707
708/**
709 * sysfs_read_ns_type: return associated ns_type
710 * @kobj: the kobject being queried
711 *
712 * Each kobject can be tagged with exactly one namespace type
713 * (i.e. network or user). Return the ns_type associated with
714 * this object if any
715 */
716static enum kobj_ns_type sysfs_read_ns_type(struct kobject *kobj)
717{
718 const struct kobj_ns_type_operations *ops;
719 enum kobj_ns_type type;
720
721 ops = kobj_child_ns_ops(kobj);
722 if (!ops)
723 return KOBJ_NS_TYPE_NONE;
724
725 type = ops->type;
726 BUG_ON(type <= KOBJ_NS_TYPE_NONE);
727 BUG_ON(type >= KOBJ_NS_TYPES);
728 BUG_ON(!kobj_ns_type_registered(type));
729
730 return type;
731}
732
733/**
734 * sysfs_create_dir_ns - create a directory for an object with a namespace tag 58 * sysfs_create_dir_ns - create a directory for an object with a namespace tag
735 * @kobj: object we're creating directory for 59 * @kobj: object we're creating directory for
736 * @ns: the namespace tag to use 60 * @ns: the namespace tag to use
737 */ 61 */
738int sysfs_create_dir_ns(struct kobject *kobj, const void *ns) 62int sysfs_create_dir_ns(struct kobject *kobj, const void *ns)
739{ 63{
740 enum kobj_ns_type type; 64 struct kernfs_node *parent, *kn;
741 struct sysfs_dirent *parent_sd, *sd;
742 int error = 0;
743 65
744 BUG_ON(!kobj); 66 BUG_ON(!kobj);
745 67
746 if (kobj->parent) 68 if (kobj->parent)
747 parent_sd = kobj->parent->sd; 69 parent = kobj->parent->sd;
748 else 70 else
749 parent_sd = &sysfs_root; 71 parent = sysfs_root_kn;
750 72
751 if (!parent_sd) 73 if (!parent)
752 return -ENOENT; 74 return -ENOENT;
753 75
754 type = sysfs_read_ns_type(kobj); 76 kn = kernfs_create_dir_ns(parent, kobject_name(kobj),
755 77 S_IRWXU | S_IRUGO | S_IXUGO, kobj, ns);
756 error = create_dir(kobj, parent_sd, type, kobject_name(kobj), ns, &sd); 78 if (IS_ERR(kn)) {
757 if (!error) 79 if (PTR_ERR(kn) == -EEXIST)
758 kobj->sd = sd; 80 sysfs_warn_dup(parent, kobject_name(kobj));
759 return error; 81 return PTR_ERR(kn);
760}
761
762static struct dentry *sysfs_lookup(struct inode *dir, struct dentry *dentry,
763 unsigned int flags)
764{
765 struct dentry *ret = NULL;
766 struct dentry *parent = dentry->d_parent;
767 struct sysfs_dirent *parent_sd = parent->d_fsdata;
768 struct sysfs_dirent *sd;
769 struct inode *inode;
770 enum kobj_ns_type type;
771 const void *ns;
772
773 mutex_lock(&sysfs_mutex);
774
775 type = sysfs_ns_type(parent_sd);
776 ns = sysfs_info(dir->i_sb)->ns[type];
777
778 sd = sysfs_find_dirent(parent_sd, dentry->d_name.name, ns);
779
780 /* no such entry */
781 if (!sd) {
782 ret = ERR_PTR(-ENOENT);
783 goto out_unlock;
784 }
785 dentry->d_fsdata = sysfs_get(sd);
786
787 /* attach dentry and inode */
788 inode = sysfs_get_inode(dir->i_sb, sd);
789 if (!inode) {
790 ret = ERR_PTR(-ENOMEM);
791 goto out_unlock;
792 }
793
794 /* instantiate and hash dentry */
795 ret = d_materialise_unique(dentry, inode);
796 out_unlock:
797 mutex_unlock(&sysfs_mutex);
798 return ret;
799}
800
801const struct inode_operations sysfs_dir_inode_operations = {
802 .lookup = sysfs_lookup,
803 .permission = sysfs_permission,
804 .setattr = sysfs_setattr,
805 .getattr = sysfs_getattr,
806 .setxattr = sysfs_setxattr,
807};
808
809static struct sysfs_dirent *sysfs_leftmost_descendant(struct sysfs_dirent *pos)
810{
811 struct sysfs_dirent *last;
812
813 while (true) {
814 struct rb_node *rbn;
815
816 last = pos;
817
818 if (sysfs_type(pos) != SYSFS_DIR)
819 break;
820
821 rbn = rb_first(&pos->s_dir.children);
822 if (!rbn)
823 break;
824
825 pos = to_sysfs_dirent(rbn);
826 }
827
828 return last;
829}
830
831/**
832 * sysfs_next_descendant_post - find the next descendant for post-order walk
833 * @pos: the current position (%NULL to initiate traversal)
834 * @root: sysfs_dirent whose descendants to walk
835 *
836 * Find the next descendant to visit for post-order traversal of @root's
837 * descendants. @root is included in the iteration and the last node to be
838 * visited.
839 */
840static struct sysfs_dirent *sysfs_next_descendant_post(struct sysfs_dirent *pos,
841 struct sysfs_dirent *root)
842{
843 struct rb_node *rbn;
844
845 lockdep_assert_held(&sysfs_mutex);
846
847 /* if first iteration, visit leftmost descendant which may be root */
848 if (!pos)
849 return sysfs_leftmost_descendant(root);
850
851 /* if we visited @root, we're done */
852 if (pos == root)
853 return NULL;
854
855 /* if there's an unvisited sibling, visit its leftmost descendant */
856 rbn = rb_next(&pos->s_rb);
857 if (rbn)
858 return sysfs_leftmost_descendant(to_sysfs_dirent(rbn));
859
860 /* no sibling left, visit parent */
861 return pos->s_parent;
862}
863
864static void __sysfs_remove(struct sysfs_addrm_cxt *acxt,
865 struct sysfs_dirent *sd)
866{
867 struct sysfs_dirent *pos, *next;
868
869 if (!sd)
870 return;
871
872 pr_debug("sysfs %s: removing\n", sd->s_name);
873
874 next = NULL;
875 do {
876 pos = next;
877 next = sysfs_next_descendant_post(pos, sd);
878 if (pos)
879 sysfs_remove_one(acxt, pos);
880 } while (next);
881}
882
883/**
884 * sysfs_remove - remove a sysfs_dirent recursively
885 * @sd: the sysfs_dirent to remove
886 *
887 * Remove @sd along with all its subdirectories and files.
888 */
889void sysfs_remove(struct sysfs_dirent *sd)
890{
891 struct sysfs_addrm_cxt acxt;
892
893 sysfs_addrm_start(&acxt);
894 __sysfs_remove(&acxt, sd);
895 sysfs_addrm_finish(&acxt);
896}
897
898/**
899 * sysfs_hash_and_remove - find a sysfs_dirent by name and remove it
900 * @dir_sd: parent of the target
901 * @name: name of the sysfs_dirent to remove
902 * @ns: namespace tag of the sysfs_dirent to remove
903 *
904 * Look for the sysfs_dirent with @name and @ns under @dir_sd and remove
905 * it. Returns 0 on success, -ENOENT if such entry doesn't exist.
906 */
907int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name,
908 const void *ns)
909{
910 struct sysfs_addrm_cxt acxt;
911 struct sysfs_dirent *sd;
912
913 if (!dir_sd) {
914 WARN(1, KERN_WARNING "sysfs: can not remove '%s', no directory\n",
915 name);
916 return -ENOENT;
917 } 82 }
918 83
919 sysfs_addrm_start(&acxt); 84 kobj->sd = kn;
920 85 return 0;
921 sd = sysfs_find_dirent(dir_sd, name, ns);
922 if (sd)
923 __sysfs_remove(&acxt, sd);
924
925 sysfs_addrm_finish(&acxt);
926
927 if (sd)
928 return 0;
929 else
930 return -ENOENT;
931} 86}
932 87
933/** 88/**
@@ -940,207 +95,47 @@ int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name,
940 */ 95 */
941void sysfs_remove_dir(struct kobject *kobj) 96void sysfs_remove_dir(struct kobject *kobj)
942{ 97{
943 struct sysfs_dirent *sd = kobj->sd; 98 struct kernfs_node *kn = kobj->sd;
944 99
945 /* 100 /*
946 * In general, kboject owner is responsible for ensuring removal 101 * In general, kboject owner is responsible for ensuring removal
947 * doesn't race with other operations and sysfs doesn't provide any 102 * doesn't race with other operations and sysfs doesn't provide any
948 * protection; however, when @kobj is used as a symlink target, the 103 * protection; however, when @kobj is used as a symlink target, the
949 * symlinking entity usually doesn't own @kobj and thus has no 104 * symlinking entity usually doesn't own @kobj and thus has no
950 * control over removal. @kobj->sd may be removed anytime and 105 * control over removal. @kobj->sd may be removed anytime
951 * symlink code may end up dereferencing an already freed sd. 106 * and symlink code may end up dereferencing an already freed node.
952 * 107 *
953 * sysfs_symlink_target_lock synchronizes @kobj->sd disassociation 108 * sysfs_symlink_target_lock synchronizes @kobj->sd
954 * against symlink operations so that symlink code can safely 109 * disassociation against symlink operations so that symlink code
955 * dereference @kobj->sd. 110 * can safely dereference @kobj->sd.
956 */ 111 */
957 spin_lock(&sysfs_symlink_target_lock); 112 spin_lock(&sysfs_symlink_target_lock);
958 kobj->sd = NULL; 113 kobj->sd = NULL;
959 spin_unlock(&sysfs_symlink_target_lock); 114 spin_unlock(&sysfs_symlink_target_lock);
960 115
961 if (sd) { 116 if (kn) {
962 WARN_ON_ONCE(sysfs_type(sd) != SYSFS_DIR); 117 WARN_ON_ONCE(kernfs_type(kn) != KERNFS_DIR);
963 sysfs_remove(sd); 118 kernfs_remove(kn);
964 } 119 }
965} 120}
966 121
967int sysfs_rename(struct sysfs_dirent *sd, struct sysfs_dirent *new_parent_sd,
968 const char *new_name, const void *new_ns)
969{
970 int error;
971
972 mutex_lock(&sysfs_mutex);
973
974 error = 0;
975 if ((sd->s_parent == new_parent_sd) && (sd->s_ns == new_ns) &&
976 (strcmp(sd->s_name, new_name) == 0))
977 goto out; /* nothing to rename */
978
979 error = -EEXIST;
980 if (sysfs_find_dirent(new_parent_sd, new_name, new_ns))
981 goto out;
982
983 /* rename sysfs_dirent */
984 if (strcmp(sd->s_name, new_name) != 0) {
985 error = -ENOMEM;
986 new_name = kstrdup(new_name, GFP_KERNEL);
987 if (!new_name)
988 goto out;
989
990 kfree(sd->s_name);
991 sd->s_name = new_name;
992 }
993
994 /*
995 * Move to the appropriate place in the appropriate directories rbtree.
996 */
997 sysfs_unlink_sibling(sd);
998 sysfs_get(new_parent_sd);
999 sysfs_put(sd->s_parent);
1000 sd->s_ns = new_ns;
1001 sd->s_hash = sysfs_name_hash(sd->s_name, sd->s_ns);
1002 sd->s_parent = new_parent_sd;
1003 sysfs_link_sibling(sd);
1004
1005 error = 0;
1006 out:
1007 mutex_unlock(&sysfs_mutex);
1008 return error;
1009}
1010
1011int sysfs_rename_dir_ns(struct kobject *kobj, const char *new_name, 122int sysfs_rename_dir_ns(struct kobject *kobj, const char *new_name,
1012 const void *new_ns) 123 const void *new_ns)
1013{ 124{
1014 struct sysfs_dirent *parent_sd = kobj->sd->s_parent; 125 struct kernfs_node *parent = kobj->sd->parent;
1015 126
1016 return sysfs_rename(kobj->sd, parent_sd, new_name, new_ns); 127 return kernfs_rename_ns(kobj->sd, parent, new_name, new_ns);
1017} 128}
1018 129
1019int sysfs_move_dir_ns(struct kobject *kobj, struct kobject *new_parent_kobj, 130int sysfs_move_dir_ns(struct kobject *kobj, struct kobject *new_parent_kobj,
1020 const void *new_ns) 131 const void *new_ns)
1021{ 132{
1022 struct sysfs_dirent *sd = kobj->sd; 133 struct kernfs_node *kn = kobj->sd;
1023 struct sysfs_dirent *new_parent_sd; 134 struct kernfs_node *new_parent;
1024 135
1025 BUG_ON(!sd->s_parent); 136 BUG_ON(!kn->parent);
1026 new_parent_sd = new_parent_kobj && new_parent_kobj->sd ? 137 new_parent = new_parent_kobj && new_parent_kobj->sd ?
1027 new_parent_kobj->sd : &sysfs_root; 138 new_parent_kobj->sd : sysfs_root_kn;
1028 139
1029 return sysfs_rename(sd, new_parent_sd, sd->s_name, new_ns); 140 return kernfs_rename_ns(kn, new_parent, kn->name, new_ns);
1030} 141}
1031
1032/* Relationship between s_mode and the DT_xxx types */
1033static inline unsigned char dt_type(struct sysfs_dirent *sd)
1034{
1035 return (sd->s_mode >> 12) & 15;
1036}
1037
1038static int sysfs_dir_release(struct inode *inode, struct file *filp)
1039{
1040 sysfs_put(filp->private_data);
1041 return 0;
1042}
1043
1044static struct sysfs_dirent *sysfs_dir_pos(const void *ns,
1045 struct sysfs_dirent *parent_sd, loff_t hash, struct sysfs_dirent *pos)
1046{
1047 if (pos) {
1048 int valid = !(pos->s_flags & SYSFS_FLAG_REMOVED) &&
1049 pos->s_parent == parent_sd &&
1050 hash == pos->s_hash;
1051 sysfs_put(pos);
1052 if (!valid)
1053 pos = NULL;
1054 }
1055 if (!pos && (hash > 1) && (hash < INT_MAX)) {
1056 struct rb_node *node = parent_sd->s_dir.children.rb_node;
1057 while (node) {
1058 pos = to_sysfs_dirent(node);
1059
1060 if (hash < pos->s_hash)
1061 node = node->rb_left;
1062 else if (hash > pos->s_hash)
1063 node = node->rb_right;
1064 else
1065 break;
1066 }
1067 }
1068 /* Skip over entries in the wrong namespace */
1069 while (pos && pos->s_ns != ns) {
1070 struct rb_node *node = rb_next(&pos->s_rb);
1071 if (!node)
1072 pos = NULL;
1073 else
1074 pos = to_sysfs_dirent(node);
1075 }
1076 return pos;
1077}
1078
1079static struct sysfs_dirent *sysfs_dir_next_pos(const void *ns,
1080 struct sysfs_dirent *parent_sd, ino_t ino, struct sysfs_dirent *pos)
1081{
1082 pos = sysfs_dir_pos(ns, parent_sd, ino, pos);
1083 if (pos)
1084 do {
1085 struct rb_node *node = rb_next(&pos->s_rb);
1086 if (!node)
1087 pos = NULL;
1088 else
1089 pos = to_sysfs_dirent(node);
1090 } while (pos && pos->s_ns != ns);
1091 return pos;
1092}
1093
1094static int sysfs_readdir(struct file *file, struct dir_context *ctx)
1095{
1096 struct dentry *dentry = file->f_path.dentry;
1097 struct sysfs_dirent *parent_sd = dentry->d_fsdata;
1098 struct sysfs_dirent *pos = file->private_data;
1099 enum kobj_ns_type type;
1100 const void *ns;
1101
1102 type = sysfs_ns_type(parent_sd);
1103 ns = sysfs_info(dentry->d_sb)->ns[type];
1104
1105 if (!dir_emit_dots(file, ctx))
1106 return 0;
1107 mutex_lock(&sysfs_mutex);
1108 for (pos = sysfs_dir_pos(ns, parent_sd, ctx->pos, pos);
1109 pos;
1110 pos = sysfs_dir_next_pos(ns, parent_sd, ctx->pos, pos)) {
1111 const char *name = pos->s_name;
1112 unsigned int type = dt_type(pos);
1113 int len = strlen(name);
1114 ino_t ino = pos->s_ino;
1115 ctx->pos = pos->s_hash;
1116 file->private_data = sysfs_get(pos);
1117
1118 mutex_unlock(&sysfs_mutex);
1119 if (!dir_emit(ctx, name, len, ino, type))
1120 return 0;
1121 mutex_lock(&sysfs_mutex);
1122 }
1123 mutex_unlock(&sysfs_mutex);
1124 file->private_data = NULL;
1125 ctx->pos = INT_MAX;
1126 return 0;
1127}
1128
1129static loff_t sysfs_dir_llseek(struct file *file, loff_t offset, int whence)
1130{
1131 struct inode *inode = file_inode(file);
1132 loff_t ret;
1133
1134 mutex_lock(&inode->i_mutex);
1135 ret = generic_file_llseek(file, offset, whence);
1136 mutex_unlock(&inode->i_mutex);
1137
1138 return ret;
1139}
1140
1141const struct file_operations sysfs_dir_operations = {
1142 .read = generic_read_dir,
1143 .iterate = sysfs_readdir,
1144 .release = sysfs_dir_release,
1145 .llseek = sysfs_dir_llseek,
1146};
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 35e7d08fe629..810cf6e613e5 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -14,70 +14,23 @@
14#include <linux/kobject.h> 14#include <linux/kobject.h>
15#include <linux/kallsyms.h> 15#include <linux/kallsyms.h>
16#include <linux/slab.h> 16#include <linux/slab.h>
17#include <linux/fsnotify.h>
18#include <linux/namei.h>
19#include <linux/poll.h>
20#include <linux/list.h> 17#include <linux/list.h>
21#include <linux/mutex.h> 18#include <linux/mutex.h>
22#include <linux/limits.h>
23#include <linux/uaccess.h>
24#include <linux/seq_file.h> 19#include <linux/seq_file.h>
25#include <linux/mm.h>
26 20
27#include "sysfs.h" 21#include "sysfs.h"
22#include "../kernfs/kernfs-internal.h"
28 23
29/* 24/*
30 * There's one sysfs_open_file for each open file and one sysfs_open_dirent 25 * Determine ktype->sysfs_ops for the given kernfs_node. This function
31 * for each sysfs_dirent with one or more open files.
32 *
33 * sysfs_dirent->s_attr.open points to sysfs_open_dirent. s_attr.open is
34 * protected by sysfs_open_dirent_lock.
35 *
36 * filp->private_data points to seq_file whose ->private points to
37 * sysfs_open_file. sysfs_open_files are chained at
38 * sysfs_open_dirent->files, which is protected by sysfs_open_file_mutex.
39 */
40static DEFINE_SPINLOCK(sysfs_open_dirent_lock);
41static DEFINE_MUTEX(sysfs_open_file_mutex);
42
43struct sysfs_open_dirent {
44 atomic_t refcnt;
45 atomic_t event;
46 wait_queue_head_t poll;
47 struct list_head files; /* goes through sysfs_open_file.list */
48};
49
50struct sysfs_open_file {
51 struct sysfs_dirent *sd;
52 struct file *file;
53 struct mutex mutex;
54 int event;
55 struct list_head list;
56
57 bool mmapped;
58 const struct vm_operations_struct *vm_ops;
59};
60
61static bool sysfs_is_bin(struct sysfs_dirent *sd)
62{
63 return sysfs_type(sd) == SYSFS_KOBJ_BIN_ATTR;
64}
65
66static struct sysfs_open_file *sysfs_of(struct file *file)
67{
68 return ((struct seq_file *)file->private_data)->private;
69}
70
71/*
72 * Determine ktype->sysfs_ops for the given sysfs_dirent. This function
73 * must be called while holding an active reference. 26 * must be called while holding an active reference.
74 */ 27 */
75static const struct sysfs_ops *sysfs_file_ops(struct sysfs_dirent *sd) 28static const struct sysfs_ops *sysfs_file_ops(struct kernfs_node *kn)
76{ 29{
77 struct kobject *kobj = sd->s_parent->s_dir.kobj; 30 struct kobject *kobj = kn->parent->priv;
78 31
79 if (!sysfs_ignore_lockdep(sd)) 32 if (kn->flags & KERNFS_LOCKDEP)
80 lockdep_assert_held(sd); 33 lockdep_assert_held(kn);
81 return kobj->ktype ? kobj->ktype->sysfs_ops : NULL; 34 return kobj->ktype ? kobj->ktype->sysfs_ops : NULL;
82} 35}
83 36
@@ -86,13 +39,13 @@ static const struct sysfs_ops *sysfs_file_ops(struct sysfs_dirent *sd)
86 * details like buffering and seeking. The following function pipes 39 * details like buffering and seeking. The following function pipes
87 * sysfs_ops->show() result through seq_file. 40 * sysfs_ops->show() result through seq_file.
88 */ 41 */
89static int sysfs_seq_show(struct seq_file *sf, void *v) 42static int sysfs_kf_seq_show(struct seq_file *sf, void *v)
90{ 43{
91 struct sysfs_open_file *of = sf->private; 44 struct kernfs_open_file *of = sf->private;
92 struct kobject *kobj = of->sd->s_parent->s_dir.kobj; 45 struct kobject *kobj = of->kn->parent->priv;
93 const struct sysfs_ops *ops; 46 const struct sysfs_ops *ops = sysfs_file_ops(of->kn);
94 char *buf;
95 ssize_t count; 47 ssize_t count;
48 char *buf;
96 49
97 /* acquire buffer and ensure that it's >= PAGE_SIZE */ 50 /* acquire buffer and ensure that it's >= PAGE_SIZE */
98 count = seq_get_buf(sf, &buf); 51 count = seq_get_buf(sf, &buf);
@@ -102,34 +55,15 @@ static int sysfs_seq_show(struct seq_file *sf, void *v)
102 } 55 }
103 56
104 /* 57 /*
105 * Need @of->sd for attr and ops, its parent for kobj. @of->mutex 58 * Invoke show(). Control may reach here via seq file lseek even
106 * nests outside active ref and is just to ensure that the ops 59 * if @ops->show() isn't implemented.
107 * aren't called concurrently for the same open file.
108 */ 60 */
109 mutex_lock(&of->mutex); 61 if (ops->show) {
110 if (!sysfs_get_active(of->sd)) { 62 count = ops->show(kobj, of->kn->priv, buf);
111 mutex_unlock(&of->mutex); 63 if (count < 0)
112 return -ENODEV; 64 return count;
113 } 65 }
114 66
115 of->event = atomic_read(&of->sd->s_attr.open->event);
116
117 /*
118 * Lookup @ops and invoke show(). Control may reach here via seq
119 * file lseek even if @ops->show() isn't implemented.
120 */
121 ops = sysfs_file_ops(of->sd);
122 if (ops->show)
123 count = ops->show(kobj, of->sd->s_attr.attr, buf);
124 else
125 count = 0;
126
127 sysfs_put_active(of->sd);
128 mutex_unlock(&of->mutex);
129
130 if (count < 0)
131 return count;
132
133 /* 67 /*
134 * The code works fine with PAGE_SIZE return but it's likely to 68 * The code works fine with PAGE_SIZE return but it's likely to
135 * indicate truncated result or overflow in normal use cases. 69 * indicate truncated result or overflow in normal use cases.
@@ -144,726 +78,194 @@ static int sysfs_seq_show(struct seq_file *sf, void *v)
144 return 0; 78 return 0;
145} 79}
146 80
147/* 81static ssize_t sysfs_kf_bin_read(struct kernfs_open_file *of, char *buf,
148 * Read method for bin files. As reading a bin file can have side-effects, 82 size_t count, loff_t pos)
149 * the exact offset and bytes specified in read(2) call should be passed to
150 * the read callback making it difficult to use seq_file. Implement
151 * simplistic custom buffering for bin files.
152 */
153static ssize_t sysfs_bin_read(struct file *file, char __user *userbuf,
154 size_t bytes, loff_t *off)
155{ 83{
156 struct sysfs_open_file *of = sysfs_of(file); 84 struct bin_attribute *battr = of->kn->priv;
157 struct bin_attribute *battr = of->sd->s_attr.bin_attr; 85 struct kobject *kobj = of->kn->parent->priv;
158 struct kobject *kobj = of->sd->s_parent->s_dir.kobj; 86 loff_t size = file_inode(of->file)->i_size;
159 loff_t size = file_inode(file)->i_size;
160 int count = min_t(size_t, bytes, PAGE_SIZE);
161 loff_t offs = *off;
162 char *buf;
163 87
164 if (!bytes) 88 if (!count)
165 return 0; 89 return 0;
166 90
167 if (size) { 91 if (size) {
168 if (offs > size) 92 if (pos > size)
169 return 0; 93 return 0;
170 if (offs + count > size) 94 if (pos + count > size)
171 count = size - offs; 95 count = size - pos;
172 }
173
174 buf = kmalloc(count, GFP_KERNEL);
175 if (!buf)
176 return -ENOMEM;
177
178 /* need of->sd for battr, its parent for kobj */
179 mutex_lock(&of->mutex);
180 if (!sysfs_get_active(of->sd)) {
181 count = -ENODEV;
182 mutex_unlock(&of->mutex);
183 goto out_free;
184 }
185
186 if (battr->read)
187 count = battr->read(file, kobj, battr, buf, offs, count);
188 else
189 count = -EIO;
190
191 sysfs_put_active(of->sd);
192 mutex_unlock(&of->mutex);
193
194 if (count < 0)
195 goto out_free;
196
197 if (copy_to_user(userbuf, buf, count)) {
198 count = -EFAULT;
199 goto out_free;
200 } 96 }
201 97
202 pr_debug("offs = %lld, *off = %lld, count = %d\n", offs, *off, count); 98 if (!battr->read)
203 99 return -EIO;
204 *off = offs + count;
205 100
206 out_free: 101 return battr->read(of->file, kobj, battr, buf, pos, count);
207 kfree(buf);
208 return count;
209} 102}
210 103
211/** 104/* kernfs write callback for regular sysfs files */
212 * flush_write_buffer - push buffer to kobject 105static ssize_t sysfs_kf_write(struct kernfs_open_file *of, char *buf,
213 * @of: open file 106 size_t count, loff_t pos)
214 * @buf: data buffer for file
215 * @off: file offset to write to
216 * @count: number of bytes
217 *
218 * Get the correct pointers for the kobject and the attribute we're dealing
219 * with, then call the store() method for it with @buf.
220 */
221static int flush_write_buffer(struct sysfs_open_file *of, char *buf, loff_t off,
222 size_t count)
223{ 107{
224 struct kobject *kobj = of->sd->s_parent->s_dir.kobj; 108 const struct sysfs_ops *ops = sysfs_file_ops(of->kn);
225 int rc = 0; 109 struct kobject *kobj = of->kn->parent->priv;
226
227 /*
228 * Need @of->sd for attr and ops, its parent for kobj. @of->mutex
229 * nests outside active ref and is just to ensure that the ops
230 * aren't called concurrently for the same open file.
231 */
232 mutex_lock(&of->mutex);
233 if (!sysfs_get_active(of->sd)) {
234 mutex_unlock(&of->mutex);
235 return -ENODEV;
236 }
237 110
238 if (sysfs_is_bin(of->sd)) { 111 if (!count)
239 struct bin_attribute *battr = of->sd->s_attr.bin_attr; 112 return 0;
240
241 rc = -EIO;
242 if (battr->write)
243 rc = battr->write(of->file, kobj, battr, buf, off,
244 count);
245 } else {
246 const struct sysfs_ops *ops = sysfs_file_ops(of->sd);
247
248 rc = ops->store(kobj, of->sd->s_attr.attr, buf, count);
249 }
250
251 sysfs_put_active(of->sd);
252 mutex_unlock(&of->mutex);
253 113
254 return rc; 114 return ops->store(kobj, of->kn->priv, buf, count);
255} 115}
256 116
257/** 117/* kernfs write callback for bin sysfs files */
258 * sysfs_write_file - write an attribute 118static ssize_t sysfs_kf_bin_write(struct kernfs_open_file *of, char *buf,
259 * @file: file pointer 119 size_t count, loff_t pos)
260 * @user_buf: data to write
261 * @count: number of bytes
262 * @ppos: starting offset
263 *
264 * Copy data in from userland and pass it to the matching
265 * sysfs_ops->store() by invoking flush_write_buffer().
266 *
267 * There is no easy way for us to know if userspace is only doing a partial
268 * write, so we don't support them. We expect the entire buffer to come on
269 * the first write. Hint: if you're writing a value, first read the file,
270 * modify only the the value you're changing, then write entire buffer
271 * back.
272 */
273static ssize_t sysfs_write_file(struct file *file, const char __user *user_buf,
274 size_t count, loff_t *ppos)
275{ 120{
276 struct sysfs_open_file *of = sysfs_of(file); 121 struct bin_attribute *battr = of->kn->priv;
277 ssize_t len = min_t(size_t, count, PAGE_SIZE); 122 struct kobject *kobj = of->kn->parent->priv;
278 loff_t size = file_inode(file)->i_size; 123 loff_t size = file_inode(of->file)->i_size;
279 char *buf;
280 124
281 if (sysfs_is_bin(of->sd) && size) { 125 if (size) {
282 if (size <= *ppos) 126 if (size <= pos)
283 return 0; 127 return 0;
284 len = min_t(ssize_t, len, size - *ppos); 128 count = min_t(ssize_t, count, size - pos);
285 } 129 }
286 130 if (!count)
287 if (!len)
288 return 0; 131 return 0;
289 132
290 buf = kmalloc(len + 1, GFP_KERNEL); 133 if (!battr->write)
291 if (!buf) 134 return -EIO;
292 return -ENOMEM;
293 135
294 if (copy_from_user(buf, user_buf, len)) { 136 return battr->write(of->file, kobj, battr, buf, pos, count);
295 len = -EFAULT;
296 goto out_free;
297 }
298 buf[len] = '\0'; /* guarantee string termination */
299
300 len = flush_write_buffer(of, buf, *ppos, len);
301 if (len > 0)
302 *ppos += len;
303out_free:
304 kfree(buf);
305 return len;
306}
307
308static void sysfs_bin_vma_open(struct vm_area_struct *vma)
309{
310 struct file *file = vma->vm_file;
311 struct sysfs_open_file *of = sysfs_of(file);
312
313 if (!of->vm_ops)
314 return;
315
316 if (!sysfs_get_active(of->sd))
317 return;
318
319 if (of->vm_ops->open)
320 of->vm_ops->open(vma);
321
322 sysfs_put_active(of->sd);
323} 137}
324 138
325static int sysfs_bin_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 139static int sysfs_kf_bin_mmap(struct kernfs_open_file *of,
140 struct vm_area_struct *vma)
326{ 141{
327 struct file *file = vma->vm_file; 142 struct bin_attribute *battr = of->kn->priv;
328 struct sysfs_open_file *of = sysfs_of(file); 143 struct kobject *kobj = of->kn->parent->priv;
329 int ret;
330 144
331 if (!of->vm_ops) 145 return battr->mmap(of->file, kobj, battr, vma);
332 return VM_FAULT_SIGBUS;
333
334 if (!sysfs_get_active(of->sd))
335 return VM_FAULT_SIGBUS;
336
337 ret = VM_FAULT_SIGBUS;
338 if (of->vm_ops->fault)
339 ret = of->vm_ops->fault(vma, vmf);
340
341 sysfs_put_active(of->sd);
342 return ret;
343} 146}
344 147
345static int sysfs_bin_page_mkwrite(struct vm_area_struct *vma, 148void sysfs_notify(struct kobject *kobj, const char *dir, const char *attr)
346 struct vm_fault *vmf)
347{ 149{
348 struct file *file = vma->vm_file; 150 struct kernfs_node *kn = kobj->sd, *tmp;
349 struct sysfs_open_file *of = sysfs_of(file);
350 int ret;
351
352 if (!of->vm_ops)
353 return VM_FAULT_SIGBUS;
354 151
355 if (!sysfs_get_active(of->sd)) 152 if (kn && dir)
356 return VM_FAULT_SIGBUS; 153 kn = kernfs_find_and_get(kn, dir);
357
358 ret = 0;
359 if (of->vm_ops->page_mkwrite)
360 ret = of->vm_ops->page_mkwrite(vma, vmf);
361 else 154 else
362 file_update_time(file); 155 kernfs_get(kn);
363
364 sysfs_put_active(of->sd);
365 return ret;
366}
367
368static int sysfs_bin_access(struct vm_area_struct *vma, unsigned long addr,
369 void *buf, int len, int write)
370{
371 struct file *file = vma->vm_file;
372 struct sysfs_open_file *of = sysfs_of(file);
373 int ret;
374
375 if (!of->vm_ops)
376 return -EINVAL;
377
378 if (!sysfs_get_active(of->sd))
379 return -EINVAL;
380
381 ret = -EINVAL;
382 if (of->vm_ops->access)
383 ret = of->vm_ops->access(vma, addr, buf, len, write);
384
385 sysfs_put_active(of->sd);
386 return ret;
387}
388
389#ifdef CONFIG_NUMA
390static int sysfs_bin_set_policy(struct vm_area_struct *vma,
391 struct mempolicy *new)
392{
393 struct file *file = vma->vm_file;
394 struct sysfs_open_file *of = sysfs_of(file);
395 int ret;
396
397 if (!of->vm_ops)
398 return 0;
399
400 if (!sysfs_get_active(of->sd))
401 return -EINVAL;
402
403 ret = 0;
404 if (of->vm_ops->set_policy)
405 ret = of->vm_ops->set_policy(vma, new);
406
407 sysfs_put_active(of->sd);
408 return ret;
409}
410
411static struct mempolicy *sysfs_bin_get_policy(struct vm_area_struct *vma,
412 unsigned long addr)
413{
414 struct file *file = vma->vm_file;
415 struct sysfs_open_file *of = sysfs_of(file);
416 struct mempolicy *pol;
417
418 if (!of->vm_ops)
419 return vma->vm_policy;
420
421 if (!sysfs_get_active(of->sd))
422 return vma->vm_policy;
423
424 pol = vma->vm_policy;
425 if (of->vm_ops->get_policy)
426 pol = of->vm_ops->get_policy(vma, addr);
427
428 sysfs_put_active(of->sd);
429 return pol;
430}
431
432static int sysfs_bin_migrate(struct vm_area_struct *vma, const nodemask_t *from,
433 const nodemask_t *to, unsigned long flags)
434{
435 struct file *file = vma->vm_file;
436 struct sysfs_open_file *of = sysfs_of(file);
437 int ret;
438
439 if (!of->vm_ops)
440 return 0;
441
442 if (!sysfs_get_active(of->sd))
443 return 0;
444
445 ret = 0;
446 if (of->vm_ops->migrate)
447 ret = of->vm_ops->migrate(vma, from, to, flags);
448
449 sysfs_put_active(of->sd);
450 return ret;
451}
452#endif
453
454static const struct vm_operations_struct sysfs_bin_vm_ops = {
455 .open = sysfs_bin_vma_open,
456 .fault = sysfs_bin_fault,
457 .page_mkwrite = sysfs_bin_page_mkwrite,
458 .access = sysfs_bin_access,
459#ifdef CONFIG_NUMA
460 .set_policy = sysfs_bin_set_policy,
461 .get_policy = sysfs_bin_get_policy,
462 .migrate = sysfs_bin_migrate,
463#endif
464};
465
466static int sysfs_bin_mmap(struct file *file, struct vm_area_struct *vma)
467{
468 struct sysfs_open_file *of = sysfs_of(file);
469 struct bin_attribute *battr = of->sd->s_attr.bin_attr;
470 struct kobject *kobj = of->sd->s_parent->s_dir.kobj;
471 int rc;
472
473 mutex_lock(&of->mutex);
474
475 /* need of->sd for battr, its parent for kobj */
476 rc = -ENODEV;
477 if (!sysfs_get_active(of->sd))
478 goto out_unlock;
479
480 if (!battr->mmap)
481 goto out_put;
482
483 rc = battr->mmap(file, kobj, battr, vma);
484 if (rc)
485 goto out_put;
486
487 /*
488 * PowerPC's pci_mmap of legacy_mem uses shmem_zero_setup()
489 * to satisfy versions of X which crash if the mmap fails: that
490 * substitutes a new vm_file, and we don't then want bin_vm_ops.
491 */
492 if (vma->vm_file != file)
493 goto out_put;
494
495 rc = -EINVAL;
496 if (of->mmapped && of->vm_ops != vma->vm_ops)
497 goto out_put;
498 156
499 /* 157 if (kn && attr) {
500 * It is not possible to successfully wrap close. 158 tmp = kernfs_find_and_get(kn, attr);
501 * So error if someone is trying to use close. 159 kernfs_put(kn);
502 */ 160 kn = tmp;
503 rc = -EINVAL;
504 if (vma->vm_ops && vma->vm_ops->close)
505 goto out_put;
506
507 rc = 0;
508 of->mmapped = 1;
509 of->vm_ops = vma->vm_ops;
510 vma->vm_ops = &sysfs_bin_vm_ops;
511out_put:
512 sysfs_put_active(of->sd);
513out_unlock:
514 mutex_unlock(&of->mutex);
515
516 return rc;
517}
518
519/**
520 * sysfs_get_open_dirent - get or create sysfs_open_dirent
521 * @sd: target sysfs_dirent
522 * @of: sysfs_open_file for this instance of open
523 *
524 * If @sd->s_attr.open exists, increment its reference count;
525 * otherwise, create one. @of is chained to the files list.
526 *
527 * LOCKING:
528 * Kernel thread context (may sleep).
529 *
530 * RETURNS:
531 * 0 on success, -errno on failure.
532 */
533static int sysfs_get_open_dirent(struct sysfs_dirent *sd,
534 struct sysfs_open_file *of)
535{
536 struct sysfs_open_dirent *od, *new_od = NULL;
537
538 retry:
539 mutex_lock(&sysfs_open_file_mutex);
540 spin_lock_irq(&sysfs_open_dirent_lock);
541
542 if (!sd->s_attr.open && new_od) {
543 sd->s_attr.open = new_od;
544 new_od = NULL;
545 } 161 }
546 162
547 od = sd->s_attr.open; 163 if (kn) {
548 if (od) { 164 kernfs_notify(kn);
549 atomic_inc(&od->refcnt); 165 kernfs_put(kn);
550 list_add_tail(&of->list, &od->files);
551 }
552
553 spin_unlock_irq(&sysfs_open_dirent_lock);
554 mutex_unlock(&sysfs_open_file_mutex);
555
556 if (od) {
557 kfree(new_od);
558 return 0;
559 } 166 }
167}
168EXPORT_SYMBOL_GPL(sysfs_notify);
560 169
561 /* not there, initialize a new one and retry */ 170static const struct kernfs_ops sysfs_file_kfops_empty = {
562 new_od = kmalloc(sizeof(*new_od), GFP_KERNEL); 171};
563 if (!new_od)
564 return -ENOMEM;
565 172
566 atomic_set(&new_od->refcnt, 0); 173static const struct kernfs_ops sysfs_file_kfops_ro = {
567 atomic_set(&new_od->event, 1); 174 .seq_show = sysfs_kf_seq_show,
568 init_waitqueue_head(&new_od->poll); 175};
569 INIT_LIST_HEAD(&new_od->files);
570 goto retry;
571}
572 176
573/** 177static const struct kernfs_ops sysfs_file_kfops_wo = {
574 * sysfs_put_open_dirent - put sysfs_open_dirent 178 .write = sysfs_kf_write,
575 * @sd: target sysfs_dirent 179};
576 * @of: associated sysfs_open_file
577 *
578 * Put @sd->s_attr.open and unlink @of from the files list. If
579 * reference count reaches zero, disassociate and free it.
580 *
581 * LOCKING:
582 * None.
583 */
584static void sysfs_put_open_dirent(struct sysfs_dirent *sd,
585 struct sysfs_open_file *of)
586{
587 struct sysfs_open_dirent *od = sd->s_attr.open;
588 unsigned long flags;
589 180
590 mutex_lock(&sysfs_open_file_mutex); 181static const struct kernfs_ops sysfs_file_kfops_rw = {
591 spin_lock_irqsave(&sysfs_open_dirent_lock, flags); 182 .seq_show = sysfs_kf_seq_show,
183 .write = sysfs_kf_write,
184};
592 185
593 if (of) 186static const struct kernfs_ops sysfs_bin_kfops_ro = {
594 list_del(&of->list); 187 .read = sysfs_kf_bin_read,
188};
595 189
596 if (atomic_dec_and_test(&od->refcnt)) 190static const struct kernfs_ops sysfs_bin_kfops_wo = {
597 sd->s_attr.open = NULL; 191 .write = sysfs_kf_bin_write,
598 else 192};
599 od = NULL;
600 193
601 spin_unlock_irqrestore(&sysfs_open_dirent_lock, flags); 194static const struct kernfs_ops sysfs_bin_kfops_rw = {
602 mutex_unlock(&sysfs_open_file_mutex); 195 .read = sysfs_kf_bin_read,
196 .write = sysfs_kf_bin_write,
197};
603 198
604 kfree(od); 199static const struct kernfs_ops sysfs_bin_kfops_mmap = {
605} 200 .read = sysfs_kf_bin_read,
201 .write = sysfs_kf_bin_write,
202 .mmap = sysfs_kf_bin_mmap,
203};
606 204
607static int sysfs_open_file(struct inode *inode, struct file *file) 205int sysfs_add_file_mode_ns(struct kernfs_node *parent,
206 const struct attribute *attr, bool is_bin,
207 umode_t mode, const void *ns)
608{ 208{
609 struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; 209 struct lock_class_key *key = NULL;
610 struct kobject *kobj = attr_sd->s_parent->s_dir.kobj; 210 const struct kernfs_ops *ops;
611 struct sysfs_open_file *of; 211 struct kernfs_node *kn;
612 bool has_read, has_write; 212 loff_t size;
613 int error = -EACCES;
614
615 /* need attr_sd for attr and ops, its parent for kobj */
616 if (!sysfs_get_active(attr_sd))
617 return -ENODEV;
618 213
619 if (sysfs_is_bin(attr_sd)) { 214 if (!is_bin) {
620 struct bin_attribute *battr = attr_sd->s_attr.bin_attr; 215 struct kobject *kobj = parent->priv;
621 216 const struct sysfs_ops *sysfs_ops = kobj->ktype->sysfs_ops;
622 has_read = battr->read || battr->mmap;
623 has_write = battr->write || battr->mmap;
624 } else {
625 const struct sysfs_ops *ops = sysfs_file_ops(attr_sd);
626 217
627 /* every kobject with an attribute needs a ktype assigned */ 218 /* every kobject with an attribute needs a ktype assigned */
628 if (WARN(!ops, KERN_ERR 219 if (WARN(!sysfs_ops, KERN_ERR
629 "missing sysfs attribute operations for kobject: %s\n", 220 "missing sysfs attribute operations for kobject: %s\n",
630 kobject_name(kobj))) 221 kobject_name(kobj)))
631 goto err_out; 222 return -EINVAL;
632 223
633 has_read = ops->show; 224 if (sysfs_ops->show && sysfs_ops->store)
634 has_write = ops->store; 225 ops = &sysfs_file_kfops_rw;
635 } 226 else if (sysfs_ops->show)
636 227 ops = &sysfs_file_kfops_ro;
637 /* check perms and supported operations */ 228 else if (sysfs_ops->store)
638 if ((file->f_mode & FMODE_WRITE) && 229 ops = &sysfs_file_kfops_wo;
639 (!(inode->i_mode & S_IWUGO) || !has_write)) 230 else
640 goto err_out; 231 ops = &sysfs_file_kfops_empty;
641 232
642 if ((file->f_mode & FMODE_READ) && 233 size = PAGE_SIZE;
643 (!(inode->i_mode & S_IRUGO) || !has_read)) 234 } else {
644 goto err_out; 235 struct bin_attribute *battr = (void *)attr;
645 236
646 /* allocate a sysfs_open_file for the file */ 237 if (battr->mmap)
647 error = -ENOMEM; 238 ops = &sysfs_bin_kfops_mmap;
648 of = kzalloc(sizeof(struct sysfs_open_file), GFP_KERNEL); 239 else if (battr->read && battr->write)
649 if (!of) 240 ops = &sysfs_bin_kfops_rw;
650 goto err_out; 241 else if (battr->read)
651 242 ops = &sysfs_bin_kfops_ro;
652 /* 243 else if (battr->write)
653 * The following is done to give a different lockdep key to 244 ops = &sysfs_bin_kfops_wo;
654 * @of->mutex for files which implement mmap. This is a rather 245 else
655 * crude way to avoid false positive lockdep warning around 246 ops = &sysfs_file_kfops_empty;
656 * mm->mmap_sem - mmap nests @of->mutex under mm->mmap_sem and 247
657 * reading /sys/block/sda/trace/act_mask grabs sr_mutex, under 248 size = battr->size;
658 * which mm->mmap_sem nests, while holding @of->mutex. As each
659 * open file has a separate mutex, it's okay as long as those don't
660 * happen on the same file. At this point, we can't easily give
661 * each file a separate locking class. Let's differentiate on
662 * whether the file is bin or not for now.
663 */
664 if (sysfs_is_bin(attr_sd))
665 mutex_init(&of->mutex);
666 else
667 mutex_init(&of->mutex);
668
669 of->sd = attr_sd;
670 of->file = file;
671
672 /*
673 * Always instantiate seq_file even if read access doesn't use
674 * seq_file or is not requested. This unifies private data access
675 * and readable regular files are the vast majority anyway.
676 */
677 if (sysfs_is_bin(attr_sd))
678 error = single_open(file, NULL, of);
679 else
680 error = single_open(file, sysfs_seq_show, of);
681 if (error)
682 goto err_free;
683
684 /* seq_file clears PWRITE unconditionally, restore it if WRITE */
685 if (file->f_mode & FMODE_WRITE)
686 file->f_mode |= FMODE_PWRITE;
687
688 /* make sure we have open dirent struct */
689 error = sysfs_get_open_dirent(attr_sd, of);
690 if (error)
691 goto err_close;
692
693 /* open succeeded, put active references */
694 sysfs_put_active(attr_sd);
695 return 0;
696
697err_close:
698 single_release(inode, file);
699err_free:
700 kfree(of);
701err_out:
702 sysfs_put_active(attr_sd);
703 return error;
704}
705
706static int sysfs_release(struct inode *inode, struct file *filp)
707{
708 struct sysfs_dirent *sd = filp->f_path.dentry->d_fsdata;
709 struct sysfs_open_file *of = sysfs_of(filp);
710
711 sysfs_put_open_dirent(sd, of);
712 single_release(inode, filp);
713 kfree(of);
714
715 return 0;
716}
717
718void sysfs_unmap_bin_file(struct sysfs_dirent *sd)
719{
720 struct sysfs_open_dirent *od;
721 struct sysfs_open_file *of;
722
723 if (!sysfs_is_bin(sd))
724 return;
725
726 spin_lock_irq(&sysfs_open_dirent_lock);
727 od = sd->s_attr.open;
728 if (od)
729 atomic_inc(&od->refcnt);
730 spin_unlock_irq(&sysfs_open_dirent_lock);
731 if (!od)
732 return;
733
734 mutex_lock(&sysfs_open_file_mutex);
735 list_for_each_entry(of, &od->files, list) {
736 struct inode *inode = file_inode(of->file);
737 unmap_mapping_range(inode->i_mapping, 0, 0, 1);
738 } 249 }
739 mutex_unlock(&sysfs_open_file_mutex);
740
741 sysfs_put_open_dirent(sd, NULL);
742}
743
744/* Sysfs attribute files are pollable. The idea is that you read
745 * the content and then you use 'poll' or 'select' to wait for
746 * the content to change. When the content changes (assuming the
747 * manager for the kobject supports notification), poll will
748 * return POLLERR|POLLPRI, and select will return the fd whether
749 * it is waiting for read, write, or exceptions.
750 * Once poll/select indicates that the value has changed, you
751 * need to close and re-open the file, or seek to 0 and read again.
752 * Reminder: this only works for attributes which actively support
753 * it, and it is not possible to test an attribute from userspace
754 * to see if it supports poll (Neither 'poll' nor 'select' return
755 * an appropriate error code). When in doubt, set a suitable timeout value.
756 */
757static unsigned int sysfs_poll(struct file *filp, poll_table *wait)
758{
759 struct sysfs_open_file *of = sysfs_of(filp);
760 struct sysfs_dirent *attr_sd = filp->f_path.dentry->d_fsdata;
761 struct sysfs_open_dirent *od = attr_sd->s_attr.open;
762
763 /* need parent for the kobj, grab both */
764 if (!sysfs_get_active(attr_sd))
765 goto trigger;
766
767 poll_wait(filp, &od->poll, wait);
768 250
769 sysfs_put_active(attr_sd); 251#ifdef CONFIG_DEBUG_LOCK_ALLOC
770 252 if (!attr->ignore_lockdep)
771 if (of->event != atomic_read(&od->event)) 253 key = attr->key ?: (struct lock_class_key *)&attr->skey;
772 goto trigger; 254#endif
773 255 kn = __kernfs_create_file(parent, attr->name, mode, size, ops,
774 return DEFAULT_POLLMASK; 256 (void *)attr, ns, true, key);
775 257 if (IS_ERR(kn)) {
776 trigger: 258 if (PTR_ERR(kn) == -EEXIST)
777 return DEFAULT_POLLMASK|POLLERR|POLLPRI; 259 sysfs_warn_dup(parent, attr->name);
778} 260 return PTR_ERR(kn);
779
780void sysfs_notify_dirent(struct sysfs_dirent *sd)
781{
782 struct sysfs_open_dirent *od;
783 unsigned long flags;
784
785 spin_lock_irqsave(&sysfs_open_dirent_lock, flags);
786
787 if (!WARN_ON(sysfs_type(sd) != SYSFS_KOBJ_ATTR)) {
788 od = sd->s_attr.open;
789 if (od) {
790 atomic_inc(&od->event);
791 wake_up_interruptible(&od->poll);
792 }
793 } 261 }
794 262 return 0;
795 spin_unlock_irqrestore(&sysfs_open_dirent_lock, flags);
796}
797EXPORT_SYMBOL_GPL(sysfs_notify_dirent);
798
799void sysfs_notify(struct kobject *k, const char *dir, const char *attr)
800{
801 struct sysfs_dirent *sd = k->sd;
802
803 mutex_lock(&sysfs_mutex);
804
805 if (sd && dir)
806 sd = sysfs_find_dirent(sd, dir, NULL);
807 if (sd && attr)
808 sd = sysfs_find_dirent(sd, attr, NULL);
809 if (sd)
810 sysfs_notify_dirent(sd);
811
812 mutex_unlock(&sysfs_mutex);
813}
814EXPORT_SYMBOL_GPL(sysfs_notify);
815
816const struct file_operations sysfs_file_operations = {
817 .read = seq_read,
818 .write = sysfs_write_file,
819 .llseek = generic_file_llseek,
820 .open = sysfs_open_file,
821 .release = sysfs_release,
822 .poll = sysfs_poll,
823};
824
825const struct file_operations sysfs_bin_operations = {
826 .read = sysfs_bin_read,
827 .write = sysfs_write_file,
828 .llseek = generic_file_llseek,
829 .mmap = sysfs_bin_mmap,
830 .open = sysfs_open_file,
831 .release = sysfs_release,
832 .poll = sysfs_poll,
833};
834
835int sysfs_add_file_mode_ns(struct sysfs_dirent *dir_sd,
836 const struct attribute *attr, int type,
837 umode_t amode, const void *ns)
838{
839 umode_t mode = (amode & S_IALLUGO) | S_IFREG;
840 struct sysfs_addrm_cxt acxt;
841 struct sysfs_dirent *sd;
842 int rc;
843
844 sd = sysfs_new_dirent(attr->name, mode, type);
845 if (!sd)
846 return -ENOMEM;
847
848 sd->s_ns = ns;
849 sd->s_attr.attr = (void *)attr;
850 sysfs_dirent_init_lockdep(sd);
851
852 sysfs_addrm_start(&acxt);
853 rc = sysfs_add_one(&acxt, sd, dir_sd);
854 sysfs_addrm_finish(&acxt);
855
856 if (rc)
857 sysfs_put(sd);
858
859 return rc;
860} 263}
861 264
862 265int sysfs_add_file(struct kernfs_node *parent, const struct attribute *attr,
863int sysfs_add_file(struct sysfs_dirent *dir_sd, const struct attribute *attr, 266 bool is_bin)
864 int type)
865{ 267{
866 return sysfs_add_file_mode_ns(dir_sd, attr, type, attr->mode, NULL); 268 return sysfs_add_file_mode_ns(parent, attr, is_bin, attr->mode, NULL);
867} 269}
868 270
869/** 271/**
@@ -877,8 +279,7 @@ int sysfs_create_file_ns(struct kobject *kobj, const struct attribute *attr,
877{ 279{
878 BUG_ON(!kobj || !kobj->sd || !attr); 280 BUG_ON(!kobj || !kobj->sd || !attr);
879 281
880 return sysfs_add_file_mode_ns(kobj->sd, attr, SYSFS_KOBJ_ATTR, 282 return sysfs_add_file_mode_ns(kobj->sd, attr, false, attr->mode, ns);
881 attr->mode, ns);
882 283
883} 284}
884EXPORT_SYMBOL_GPL(sysfs_create_file_ns); 285EXPORT_SYMBOL_GPL(sysfs_create_file_ns);
@@ -906,19 +307,21 @@ EXPORT_SYMBOL_GPL(sysfs_create_files);
906int sysfs_add_file_to_group(struct kobject *kobj, 307int sysfs_add_file_to_group(struct kobject *kobj,
907 const struct attribute *attr, const char *group) 308 const struct attribute *attr, const char *group)
908{ 309{
909 struct sysfs_dirent *dir_sd; 310 struct kernfs_node *parent;
910 int error; 311 int error;
911 312
912 if (group) 313 if (group) {
913 dir_sd = sysfs_get_dirent(kobj->sd, group); 314 parent = kernfs_find_and_get(kobj->sd, group);
914 else 315 } else {
915 dir_sd = sysfs_get(kobj->sd); 316 parent = kobj->sd;
317 kernfs_get(parent);
318 }
916 319
917 if (!dir_sd) 320 if (!parent)
918 return -ENOENT; 321 return -ENOENT;
919 322
920 error = sysfs_add_file(dir_sd, attr, SYSFS_KOBJ_ATTR); 323 error = sysfs_add_file(parent, attr, false);
921 sysfs_put(dir_sd); 324 kernfs_put(parent);
922 325
923 return error; 326 return error;
924} 327}
@@ -934,23 +337,20 @@ EXPORT_SYMBOL_GPL(sysfs_add_file_to_group);
934int sysfs_chmod_file(struct kobject *kobj, const struct attribute *attr, 337int sysfs_chmod_file(struct kobject *kobj, const struct attribute *attr,
935 umode_t mode) 338 umode_t mode)
936{ 339{
937 struct sysfs_dirent *sd; 340 struct kernfs_node *kn;
938 struct iattr newattrs; 341 struct iattr newattrs;
939 int rc; 342 int rc;
940 343
941 mutex_lock(&sysfs_mutex); 344 kn = kernfs_find_and_get(kobj->sd, attr->name);
942 345 if (!kn)
943 rc = -ENOENT; 346 return -ENOENT;
944 sd = sysfs_find_dirent(kobj->sd, attr->name, NULL);
945 if (!sd)
946 goto out;
947 347
948 newattrs.ia_mode = (mode & S_IALLUGO) | (sd->s_mode & ~S_IALLUGO); 348 newattrs.ia_mode = (mode & S_IALLUGO) | (kn->mode & ~S_IALLUGO);
949 newattrs.ia_valid = ATTR_MODE; 349 newattrs.ia_valid = ATTR_MODE;
950 rc = sysfs_sd_setattr(sd, &newattrs);
951 350
952 out: 351 rc = kernfs_setattr(kn, &newattrs);
953 mutex_unlock(&sysfs_mutex); 352
353 kernfs_put(kn);
954 return rc; 354 return rc;
955} 355}
956EXPORT_SYMBOL_GPL(sysfs_chmod_file); 356EXPORT_SYMBOL_GPL(sysfs_chmod_file);
@@ -966,9 +366,9 @@ EXPORT_SYMBOL_GPL(sysfs_chmod_file);
966void sysfs_remove_file_ns(struct kobject *kobj, const struct attribute *attr, 366void sysfs_remove_file_ns(struct kobject *kobj, const struct attribute *attr,
967 const void *ns) 367 const void *ns)
968{ 368{
969 struct sysfs_dirent *dir_sd = kobj->sd; 369 struct kernfs_node *parent = kobj->sd;
970 370
971 sysfs_hash_and_remove(dir_sd, attr->name, ns); 371 kernfs_remove_by_name_ns(parent, attr->name, ns);
972} 372}
973EXPORT_SYMBOL_GPL(sysfs_remove_file_ns); 373EXPORT_SYMBOL_GPL(sysfs_remove_file_ns);
974 374
@@ -989,15 +389,18 @@ EXPORT_SYMBOL_GPL(sysfs_remove_files);
989void sysfs_remove_file_from_group(struct kobject *kobj, 389void sysfs_remove_file_from_group(struct kobject *kobj,
990 const struct attribute *attr, const char *group) 390 const struct attribute *attr, const char *group)
991{ 391{
992 struct sysfs_dirent *dir_sd; 392 struct kernfs_node *parent;
993 393
994 if (group) 394 if (group) {
995 dir_sd = sysfs_get_dirent(kobj->sd, group); 395 parent = kernfs_find_and_get(kobj->sd, group);
996 else 396 } else {
997 dir_sd = sysfs_get(kobj->sd); 397 parent = kobj->sd;
998 if (dir_sd) { 398 kernfs_get(parent);
999 sysfs_hash_and_remove(dir_sd, attr->name, NULL); 399 }
1000 sysfs_put(dir_sd); 400
401 if (parent) {
402 kernfs_remove_by_name(parent, attr->name);
403 kernfs_put(parent);
1001 } 404 }
1002} 405}
1003EXPORT_SYMBOL_GPL(sysfs_remove_file_from_group); 406EXPORT_SYMBOL_GPL(sysfs_remove_file_from_group);
@@ -1012,7 +415,7 @@ int sysfs_create_bin_file(struct kobject *kobj,
1012{ 415{
1013 BUG_ON(!kobj || !kobj->sd || !attr); 416 BUG_ON(!kobj || !kobj->sd || !attr);
1014 417
1015 return sysfs_add_file(kobj->sd, &attr->attr, SYSFS_KOBJ_BIN_ATTR); 418 return sysfs_add_file(kobj->sd, &attr->attr, true);
1016} 419}
1017EXPORT_SYMBOL_GPL(sysfs_create_bin_file); 420EXPORT_SYMBOL_GPL(sysfs_create_bin_file);
1018 421
@@ -1024,7 +427,7 @@ EXPORT_SYMBOL_GPL(sysfs_create_bin_file);
1024void sysfs_remove_bin_file(struct kobject *kobj, 427void sysfs_remove_bin_file(struct kobject *kobj,
1025 const struct bin_attribute *attr) 428 const struct bin_attribute *attr)
1026{ 429{
1027 sysfs_hash_and_remove(kobj->sd, attr->attr.name, NULL); 430 kernfs_remove_by_name(kobj->sd, attr->attr.name);
1028} 431}
1029EXPORT_SYMBOL_GPL(sysfs_remove_bin_file); 432EXPORT_SYMBOL_GPL(sysfs_remove_bin_file);
1030 433
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index 1898a10e38ce..6b579387c67a 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -18,7 +18,7 @@
18#include "sysfs.h" 18#include "sysfs.h"
19 19
20 20
21static void remove_files(struct sysfs_dirent *dir_sd, struct kobject *kobj, 21static void remove_files(struct kernfs_node *parent, struct kobject *kobj,
22 const struct attribute_group *grp) 22 const struct attribute_group *grp)
23{ 23{
24 struct attribute *const *attr; 24 struct attribute *const *attr;
@@ -26,13 +26,13 @@ static void remove_files(struct sysfs_dirent *dir_sd, struct kobject *kobj,
26 26
27 if (grp->attrs) 27 if (grp->attrs)
28 for (attr = grp->attrs; *attr; attr++) 28 for (attr = grp->attrs; *attr; attr++)
29 sysfs_hash_and_remove(dir_sd, (*attr)->name, NULL); 29 kernfs_remove_by_name(parent, (*attr)->name);
30 if (grp->bin_attrs) 30 if (grp->bin_attrs)
31 for (bin_attr = grp->bin_attrs; *bin_attr; bin_attr++) 31 for (bin_attr = grp->bin_attrs; *bin_attr; bin_attr++)
32 sysfs_remove_bin_file(kobj, *bin_attr); 32 sysfs_remove_bin_file(kobj, *bin_attr);
33} 33}
34 34
35static int create_files(struct sysfs_dirent *dir_sd, struct kobject *kobj, 35static int create_files(struct kernfs_node *parent, struct kobject *kobj,
36 const struct attribute_group *grp, int update) 36 const struct attribute_group *grp, int update)
37{ 37{
38 struct attribute *const *attr; 38 struct attribute *const *attr;
@@ -49,22 +49,20 @@ static int create_files(struct sysfs_dirent *dir_sd, struct kobject *kobj,
49 * re-adding (if required) the file. 49 * re-adding (if required) the file.
50 */ 50 */
51 if (update) 51 if (update)
52 sysfs_hash_and_remove(dir_sd, (*attr)->name, 52 kernfs_remove_by_name(parent, (*attr)->name);
53 NULL);
54 if (grp->is_visible) { 53 if (grp->is_visible) {
55 mode = grp->is_visible(kobj, *attr, i); 54 mode = grp->is_visible(kobj, *attr, i);
56 if (!mode) 55 if (!mode)
57 continue; 56 continue;
58 } 57 }
59 error = sysfs_add_file_mode_ns(dir_sd, *attr, 58 error = sysfs_add_file_mode_ns(parent, *attr, false,
60 SYSFS_KOBJ_ATTR,
61 (*attr)->mode | mode, 59 (*attr)->mode | mode,
62 NULL); 60 NULL);
63 if (unlikely(error)) 61 if (unlikely(error))
64 break; 62 break;
65 } 63 }
66 if (error) { 64 if (error) {
67 remove_files(dir_sd, kobj, grp); 65 remove_files(parent, kobj, grp);
68 goto exit; 66 goto exit;
69 } 67 }
70 } 68 }
@@ -78,7 +76,7 @@ static int create_files(struct sysfs_dirent *dir_sd, struct kobject *kobj,
78 break; 76 break;
79 } 77 }
80 if (error) 78 if (error)
81 remove_files(dir_sd, kobj, grp); 79 remove_files(parent, kobj, grp);
82 } 80 }
83exit: 81exit:
84 return error; 82 return error;
@@ -88,7 +86,7 @@ exit:
88static int internal_create_group(struct kobject *kobj, int update, 86static int internal_create_group(struct kobject *kobj, int update,
89 const struct attribute_group *grp) 87 const struct attribute_group *grp)
90{ 88{
91 struct sysfs_dirent *sd; 89 struct kernfs_node *kn;
92 int error; 90 int error;
93 91
94 BUG_ON(!kobj || (!update && !kobj->sd)); 92 BUG_ON(!kobj || (!update && !kobj->sd));
@@ -102,18 +100,22 @@ static int internal_create_group(struct kobject *kobj, int update,
102 return -EINVAL; 100 return -EINVAL;
103 } 101 }
104 if (grp->name) { 102 if (grp->name) {
105 error = sysfs_create_subdir(kobj, grp->name, &sd); 103 kn = kernfs_create_dir(kobj->sd, grp->name,
106 if (error) 104 S_IRWXU | S_IRUGO | S_IXUGO, kobj);
107 return error; 105 if (IS_ERR(kn)) {
106 if (PTR_ERR(kn) == -EEXIST)
107 sysfs_warn_dup(kobj->sd, grp->name);
108 return PTR_ERR(kn);
109 }
108 } else 110 } else
109 sd = kobj->sd; 111 kn = kobj->sd;
110 sysfs_get(sd); 112 kernfs_get(kn);
111 error = create_files(sd, kobj, grp, update); 113 error = create_files(kn, kobj, grp, update);
112 if (error) { 114 if (error) {
113 if (grp->name) 115 if (grp->name)
114 sysfs_remove(sd); 116 kernfs_remove(kn);
115 } 117 }
116 sysfs_put(sd); 118 kernfs_put(kn);
117 return error; 119 return error;
118} 120}
119 121
@@ -203,25 +205,27 @@ EXPORT_SYMBOL_GPL(sysfs_update_group);
203void sysfs_remove_group(struct kobject *kobj, 205void sysfs_remove_group(struct kobject *kobj,
204 const struct attribute_group *grp) 206 const struct attribute_group *grp)
205{ 207{
206 struct sysfs_dirent *dir_sd = kobj->sd; 208 struct kernfs_node *parent = kobj->sd;
207 struct sysfs_dirent *sd; 209 struct kernfs_node *kn;
208 210
209 if (grp->name) { 211 if (grp->name) {
210 sd = sysfs_get_dirent(dir_sd, grp->name); 212 kn = kernfs_find_and_get(parent, grp->name);
211 if (!sd) { 213 if (!kn) {
212 WARN(!sd, KERN_WARNING 214 WARN(!kn, KERN_WARNING
213 "sysfs group %p not found for kobject '%s'\n", 215 "sysfs group %p not found for kobject '%s'\n",
214 grp, kobject_name(kobj)); 216 grp, kobject_name(kobj));
215 return; 217 return;
216 } 218 }
217 } else 219 } else {
218 sd = sysfs_get(dir_sd); 220 kn = parent;
221 kernfs_get(kn);
222 }
219 223
220 remove_files(sd, kobj, grp); 224 remove_files(kn, kobj, grp);
221 if (grp->name) 225 if (grp->name)
222 sysfs_remove(sd); 226 kernfs_remove(kn);
223 227
224 sysfs_put(sd); 228 kernfs_put(kn);
225} 229}
226EXPORT_SYMBOL_GPL(sysfs_remove_group); 230EXPORT_SYMBOL_GPL(sysfs_remove_group);
227 231
@@ -257,22 +261,22 @@ EXPORT_SYMBOL_GPL(sysfs_remove_groups);
257int sysfs_merge_group(struct kobject *kobj, 261int sysfs_merge_group(struct kobject *kobj,
258 const struct attribute_group *grp) 262 const struct attribute_group *grp)
259{ 263{
260 struct sysfs_dirent *dir_sd; 264 struct kernfs_node *parent;
261 int error = 0; 265 int error = 0;
262 struct attribute *const *attr; 266 struct attribute *const *attr;
263 int i; 267 int i;
264 268
265 dir_sd = sysfs_get_dirent(kobj->sd, grp->name); 269 parent = kernfs_find_and_get(kobj->sd, grp->name);
266 if (!dir_sd) 270 if (!parent)
267 return -ENOENT; 271 return -ENOENT;
268 272
269 for ((i = 0, attr = grp->attrs); *attr && !error; (++i, ++attr)) 273 for ((i = 0, attr = grp->attrs); *attr && !error; (++i, ++attr))
270 error = sysfs_add_file(dir_sd, *attr, SYSFS_KOBJ_ATTR); 274 error = sysfs_add_file(parent, *attr, false);
271 if (error) { 275 if (error) {
272 while (--i >= 0) 276 while (--i >= 0)
273 sysfs_hash_and_remove(dir_sd, (*--attr)->name, NULL); 277 kernfs_remove_by_name(parent, (*--attr)->name);
274 } 278 }
275 sysfs_put(dir_sd); 279 kernfs_put(parent);
276 280
277 return error; 281 return error;
278} 282}
@@ -286,14 +290,14 @@ EXPORT_SYMBOL_GPL(sysfs_merge_group);
286void sysfs_unmerge_group(struct kobject *kobj, 290void sysfs_unmerge_group(struct kobject *kobj,
287 const struct attribute_group *grp) 291 const struct attribute_group *grp)
288{ 292{
289 struct sysfs_dirent *dir_sd; 293 struct kernfs_node *parent;
290 struct attribute *const *attr; 294 struct attribute *const *attr;
291 295
292 dir_sd = sysfs_get_dirent(kobj->sd, grp->name); 296 parent = kernfs_find_and_get(kobj->sd, grp->name);
293 if (dir_sd) { 297 if (parent) {
294 for (attr = grp->attrs; *attr; ++attr) 298 for (attr = grp->attrs; *attr; ++attr)
295 sysfs_hash_and_remove(dir_sd, (*attr)->name, NULL); 299 kernfs_remove_by_name(parent, (*attr)->name);
296 sysfs_put(dir_sd); 300 kernfs_put(parent);
297 } 301 }
298} 302}
299EXPORT_SYMBOL_GPL(sysfs_unmerge_group); 303EXPORT_SYMBOL_GPL(sysfs_unmerge_group);
@@ -308,15 +312,15 @@ EXPORT_SYMBOL_GPL(sysfs_unmerge_group);
308int sysfs_add_link_to_group(struct kobject *kobj, const char *group_name, 312int sysfs_add_link_to_group(struct kobject *kobj, const char *group_name,
309 struct kobject *target, const char *link_name) 313 struct kobject *target, const char *link_name)
310{ 314{
311 struct sysfs_dirent *dir_sd; 315 struct kernfs_node *parent;
312 int error = 0; 316 int error = 0;
313 317
314 dir_sd = sysfs_get_dirent(kobj->sd, group_name); 318 parent = kernfs_find_and_get(kobj->sd, group_name);
315 if (!dir_sd) 319 if (!parent)
316 return -ENOENT; 320 return -ENOENT;
317 321
318 error = sysfs_create_link_sd(dir_sd, target, link_name); 322 error = sysfs_create_link_sd(parent, target, link_name);
319 sysfs_put(dir_sd); 323 kernfs_put(parent);
320 324
321 return error; 325 return error;
322} 326}
@@ -331,12 +335,12 @@ EXPORT_SYMBOL_GPL(sysfs_add_link_to_group);
331void sysfs_remove_link_from_group(struct kobject *kobj, const char *group_name, 335void sysfs_remove_link_from_group(struct kobject *kobj, const char *group_name,
332 const char *link_name) 336 const char *link_name)
333{ 337{
334 struct sysfs_dirent *dir_sd; 338 struct kernfs_node *parent;
335 339
336 dir_sd = sysfs_get_dirent(kobj->sd, group_name); 340 parent = kernfs_find_and_get(kobj->sd, group_name);
337 if (dir_sd) { 341 if (parent) {
338 sysfs_hash_and_remove(dir_sd, link_name, NULL); 342 kernfs_remove_by_name(parent, link_name);
339 sysfs_put(dir_sd); 343 kernfs_put(parent);
340 } 344 }
341} 345}
342EXPORT_SYMBOL_GPL(sysfs_remove_link_from_group); 346EXPORT_SYMBOL_GPL(sysfs_remove_link_from_group);
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
deleted file mode 100644
index 1750f790af3b..000000000000
--- a/fs/sysfs/inode.c
+++ /dev/null
@@ -1,331 +0,0 @@
1/*
2 * fs/sysfs/inode.c - basic sysfs inode and dentry operations
3 *
4 * Copyright (c) 2001-3 Patrick Mochel
5 * Copyright (c) 2007 SUSE Linux Products GmbH
6 * Copyright (c) 2007 Tejun Heo <teheo@suse.de>
7 *
8 * This file is released under the GPLv2.
9 *
10 * Please see Documentation/filesystems/sysfs.txt for more information.
11 */
12
13#undef DEBUG
14
15#include <linux/pagemap.h>
16#include <linux/namei.h>
17#include <linux/backing-dev.h>
18#include <linux/capability.h>
19#include <linux/errno.h>
20#include <linux/sched.h>
21#include <linux/slab.h>
22#include <linux/sysfs.h>
23#include <linux/xattr.h>
24#include <linux/security.h>
25#include "sysfs.h"
26
27static const struct address_space_operations sysfs_aops = {
28 .readpage = simple_readpage,
29 .write_begin = simple_write_begin,
30 .write_end = simple_write_end,
31};
32
33static struct backing_dev_info sysfs_backing_dev_info = {
34 .name = "sysfs",
35 .ra_pages = 0, /* No readahead */
36 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK,
37};
38
39static const struct inode_operations sysfs_inode_operations = {
40 .permission = sysfs_permission,
41 .setattr = sysfs_setattr,
42 .getattr = sysfs_getattr,
43 .setxattr = sysfs_setxattr,
44};
45
46int __init sysfs_inode_init(void)
47{
48 return bdi_init(&sysfs_backing_dev_info);
49}
50
51static struct sysfs_inode_attrs *sysfs_init_inode_attrs(struct sysfs_dirent *sd)
52{
53 struct sysfs_inode_attrs *attrs;
54 struct iattr *iattrs;
55
56 attrs = kzalloc(sizeof(struct sysfs_inode_attrs), GFP_KERNEL);
57 if (!attrs)
58 return NULL;
59 iattrs = &attrs->ia_iattr;
60
61 /* assign default attributes */
62 iattrs->ia_mode = sd->s_mode;
63 iattrs->ia_uid = GLOBAL_ROOT_UID;
64 iattrs->ia_gid = GLOBAL_ROOT_GID;
65 iattrs->ia_atime = iattrs->ia_mtime = iattrs->ia_ctime = CURRENT_TIME;
66
67 return attrs;
68}
69
70int sysfs_sd_setattr(struct sysfs_dirent *sd, struct iattr *iattr)
71{
72 struct sysfs_inode_attrs *sd_attrs;
73 struct iattr *iattrs;
74 unsigned int ia_valid = iattr->ia_valid;
75
76 sd_attrs = sd->s_iattr;
77
78 if (!sd_attrs) {
79 /* setting attributes for the first time, allocate now */
80 sd_attrs = sysfs_init_inode_attrs(sd);
81 if (!sd_attrs)
82 return -ENOMEM;
83 sd->s_iattr = sd_attrs;
84 }
85 /* attributes were changed at least once in past */
86 iattrs = &sd_attrs->ia_iattr;
87
88 if (ia_valid & ATTR_UID)
89 iattrs->ia_uid = iattr->ia_uid;
90 if (ia_valid & ATTR_GID)
91 iattrs->ia_gid = iattr->ia_gid;
92 if (ia_valid & ATTR_ATIME)
93 iattrs->ia_atime = iattr->ia_atime;
94 if (ia_valid & ATTR_MTIME)
95 iattrs->ia_mtime = iattr->ia_mtime;
96 if (ia_valid & ATTR_CTIME)
97 iattrs->ia_ctime = iattr->ia_ctime;
98 if (ia_valid & ATTR_MODE) {
99 umode_t mode = iattr->ia_mode;
100 iattrs->ia_mode = sd->s_mode = mode;
101 }
102 return 0;
103}
104
105int sysfs_setattr(struct dentry *dentry, struct iattr *iattr)
106{
107 struct inode *inode = dentry->d_inode;
108 struct sysfs_dirent *sd = dentry->d_fsdata;
109 int error;
110
111 if (!sd)
112 return -EINVAL;
113
114 mutex_lock(&sysfs_mutex);
115 error = inode_change_ok(inode, iattr);
116 if (error)
117 goto out;
118
119 error = sysfs_sd_setattr(sd, iattr);
120 if (error)
121 goto out;
122
123 /* this ignores size changes */
124 setattr_copy(inode, iattr);
125
126out:
127 mutex_unlock(&sysfs_mutex);
128 return error;
129}
130
131static int sysfs_sd_setsecdata(struct sysfs_dirent *sd, void **secdata,
132 u32 *secdata_len)
133{
134 struct sysfs_inode_attrs *iattrs;
135 void *old_secdata;
136 size_t old_secdata_len;
137
138 if (!sd->s_iattr) {
139 sd->s_iattr = sysfs_init_inode_attrs(sd);
140 if (!sd->s_iattr)
141 return -ENOMEM;
142 }
143
144 iattrs = sd->s_iattr;
145 old_secdata = iattrs->ia_secdata;
146 old_secdata_len = iattrs->ia_secdata_len;
147
148 iattrs->ia_secdata = *secdata;
149 iattrs->ia_secdata_len = *secdata_len;
150
151 *secdata = old_secdata;
152 *secdata_len = old_secdata_len;
153 return 0;
154}
155
156int sysfs_setxattr(struct dentry *dentry, const char *name, const void *value,
157 size_t size, int flags)
158{
159 struct sysfs_dirent *sd = dentry->d_fsdata;
160 void *secdata;
161 int error;
162 u32 secdata_len = 0;
163
164 if (!sd)
165 return -EINVAL;
166
167 if (!strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN)) {
168 const char *suffix = name + XATTR_SECURITY_PREFIX_LEN;
169 error = security_inode_setsecurity(dentry->d_inode, suffix,
170 value, size, flags);
171 if (error)
172 goto out;
173 error = security_inode_getsecctx(dentry->d_inode,
174 &secdata, &secdata_len);
175 if (error)
176 goto out;
177
178 mutex_lock(&sysfs_mutex);
179 error = sysfs_sd_setsecdata(sd, &secdata, &secdata_len);
180 mutex_unlock(&sysfs_mutex);
181
182 if (secdata)
183 security_release_secctx(secdata, secdata_len);
184 } else
185 return -EINVAL;
186out:
187 return error;
188}
189
190static inline void set_default_inode_attr(struct inode *inode, umode_t mode)
191{
192 inode->i_mode = mode;
193 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
194}
195
196static inline void set_inode_attr(struct inode *inode, struct iattr *iattr)
197{
198 inode->i_uid = iattr->ia_uid;
199 inode->i_gid = iattr->ia_gid;
200 inode->i_atime = iattr->ia_atime;
201 inode->i_mtime = iattr->ia_mtime;
202 inode->i_ctime = iattr->ia_ctime;
203}
204
205static void sysfs_refresh_inode(struct sysfs_dirent *sd, struct inode *inode)
206{
207 struct sysfs_inode_attrs *iattrs = sd->s_iattr;
208
209 inode->i_mode = sd->s_mode;
210 if (iattrs) {
211 /* sysfs_dirent has non-default attributes
212 * get them from persistent copy in sysfs_dirent
213 */
214 set_inode_attr(inode, &iattrs->ia_iattr);
215 security_inode_notifysecctx(inode,
216 iattrs->ia_secdata,
217 iattrs->ia_secdata_len);
218 }
219
220 if (sysfs_type(sd) == SYSFS_DIR)
221 set_nlink(inode, sd->s_dir.subdirs + 2);
222}
223
224int sysfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
225 struct kstat *stat)
226{
227 struct sysfs_dirent *sd = dentry->d_fsdata;
228 struct inode *inode = dentry->d_inode;
229
230 mutex_lock(&sysfs_mutex);
231 sysfs_refresh_inode(sd, inode);
232 mutex_unlock(&sysfs_mutex);
233
234 generic_fillattr(inode, stat);
235 return 0;
236}
237
238static void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode)
239{
240 struct bin_attribute *bin_attr;
241
242 inode->i_private = sysfs_get(sd);
243 inode->i_mapping->a_ops = &sysfs_aops;
244 inode->i_mapping->backing_dev_info = &sysfs_backing_dev_info;
245 inode->i_op = &sysfs_inode_operations;
246
247 set_default_inode_attr(inode, sd->s_mode);
248 sysfs_refresh_inode(sd, inode);
249
250 /* initialize inode according to type */
251 switch (sysfs_type(sd)) {
252 case SYSFS_DIR:
253 inode->i_op = &sysfs_dir_inode_operations;
254 inode->i_fop = &sysfs_dir_operations;
255 break;
256 case SYSFS_KOBJ_ATTR:
257 inode->i_size = PAGE_SIZE;
258 inode->i_fop = &sysfs_file_operations;
259 break;
260 case SYSFS_KOBJ_BIN_ATTR:
261 bin_attr = sd->s_attr.bin_attr;
262 inode->i_size = bin_attr->size;
263 inode->i_fop = &sysfs_bin_operations;
264 break;
265 case SYSFS_KOBJ_LINK:
266 inode->i_op = &sysfs_symlink_inode_operations;
267 break;
268 default:
269 BUG();
270 }
271
272 unlock_new_inode(inode);
273}
274
275/**
276 * sysfs_get_inode - get inode for sysfs_dirent
277 * @sb: super block
278 * @sd: sysfs_dirent to allocate inode for
279 *
280 * Get inode for @sd. If such inode doesn't exist, a new inode
281 * is allocated and basics are initialized. New inode is
282 * returned locked.
283 *
284 * LOCKING:
285 * Kernel thread context (may sleep).
286 *
287 * RETURNS:
288 * Pointer to allocated inode on success, NULL on failure.
289 */
290struct inode *sysfs_get_inode(struct super_block *sb, struct sysfs_dirent *sd)
291{
292 struct inode *inode;
293
294 inode = iget_locked(sb, sd->s_ino);
295 if (inode && (inode->i_state & I_NEW))
296 sysfs_init_inode(sd, inode);
297
298 return inode;
299}
300
301/*
302 * The sysfs_dirent serves as both an inode and a directory entry for sysfs.
303 * To prevent the sysfs inode numbers from being freed prematurely we take a
304 * reference to sysfs_dirent from the sysfs inode. A
305 * super_operations.evict_inode() implementation is needed to drop that
306 * reference upon inode destruction.
307 */
308void sysfs_evict_inode(struct inode *inode)
309{
310 struct sysfs_dirent *sd = inode->i_private;
311
312 truncate_inode_pages(&inode->i_data, 0);
313 clear_inode(inode);
314 sysfs_put(sd);
315}
316
317int sysfs_permission(struct inode *inode, int mask)
318{
319 struct sysfs_dirent *sd;
320
321 if (mask & MAY_NOT_BLOCK)
322 return -ECHILD;
323
324 sd = inode->i_private;
325
326 mutex_lock(&sysfs_mutex);
327 sysfs_refresh_inode(sd, inode);
328 mutex_unlock(&sysfs_mutex);
329
330 return generic_permission(inode, mask);
331}
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index 834ec2cdb7a3..3eaf5c6622eb 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -14,146 +14,42 @@
14 14
15#include <linux/fs.h> 15#include <linux/fs.h>
16#include <linux/mount.h> 16#include <linux/mount.h>
17#include <linux/pagemap.h>
18#include <linux/init.h> 17#include <linux/init.h>
19#include <linux/module.h>
20#include <linux/magic.h>
21#include <linux/slab.h>
22#include <linux/user_namespace.h> 18#include <linux/user_namespace.h>
23 19
24#include "sysfs.h" 20#include "sysfs.h"
25 21
26 22static struct kernfs_root *sysfs_root;
27static struct vfsmount *sysfs_mnt; 23struct kernfs_node *sysfs_root_kn;
28struct kmem_cache *sysfs_dir_cachep;
29
30static const struct super_operations sysfs_ops = {
31 .statfs = simple_statfs,
32 .drop_inode = generic_delete_inode,
33 .evict_inode = sysfs_evict_inode,
34};
35
36struct sysfs_dirent sysfs_root = {
37 .s_name = "",
38 .s_count = ATOMIC_INIT(1),
39 .s_flags = SYSFS_DIR | (KOBJ_NS_TYPE_NONE << SYSFS_NS_TYPE_SHIFT),
40 .s_mode = S_IFDIR | S_IRUGO | S_IXUGO,
41 .s_ino = 1,
42};
43
44static int sysfs_fill_super(struct super_block *sb, void *data, int silent)
45{
46 struct inode *inode;
47 struct dentry *root;
48
49 sb->s_blocksize = PAGE_CACHE_SIZE;
50 sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
51 sb->s_magic = SYSFS_MAGIC;
52 sb->s_op = &sysfs_ops;
53 sb->s_time_gran = 1;
54
55 /* get root inode, initialize and unlock it */
56 mutex_lock(&sysfs_mutex);
57 inode = sysfs_get_inode(sb, &sysfs_root);
58 mutex_unlock(&sysfs_mutex);
59 if (!inode) {
60 pr_debug("sysfs: could not get root inode\n");
61 return -ENOMEM;
62 }
63
64 /* instantiate and link root dentry */
65 root = d_make_root(inode);
66 if (!root) {
67 pr_debug("%s: could not get root dentry!\n", __func__);
68 return -ENOMEM;
69 }
70 root->d_fsdata = &sysfs_root;
71 sb->s_root = root;
72 sb->s_d_op = &sysfs_dentry_ops;
73 return 0;
74}
75
76static int sysfs_test_super(struct super_block *sb, void *data)
77{
78 struct sysfs_super_info *sb_info = sysfs_info(sb);
79 struct sysfs_super_info *info = data;
80 enum kobj_ns_type type;
81 int found = 1;
82
83 for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++) {
84 if (sb_info->ns[type] != info->ns[type])
85 found = 0;
86 }
87 return found;
88}
89
90static int sysfs_set_super(struct super_block *sb, void *data)
91{
92 int error;
93 error = set_anon_super(sb, data);
94 if (!error)
95 sb->s_fs_info = data;
96 return error;
97}
98
99static void free_sysfs_super_info(struct sysfs_super_info *info)
100{
101 int type;
102 for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++)
103 kobj_ns_drop(type, info->ns[type]);
104 kfree(info);
105}
106 24
107static struct dentry *sysfs_mount(struct file_system_type *fs_type, 25static struct dentry *sysfs_mount(struct file_system_type *fs_type,
108 int flags, const char *dev_name, void *data) 26 int flags, const char *dev_name, void *data)
109{ 27{
110 struct sysfs_super_info *info; 28 struct dentry *root;
111 enum kobj_ns_type type; 29 void *ns;
112 struct super_block *sb; 30 bool new_sb;
113 int error;
114 31
115 if (!(flags & MS_KERNMOUNT)) { 32 if (!(flags & MS_KERNMOUNT)) {
116 if (!capable(CAP_SYS_ADMIN) && !fs_fully_visible(fs_type)) 33 if (!capable(CAP_SYS_ADMIN) && !fs_fully_visible(fs_type))
117 return ERR_PTR(-EPERM); 34 return ERR_PTR(-EPERM);
118 35
119 for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++) { 36 if (!kobj_ns_current_may_mount(KOBJ_NS_TYPE_NET))
120 if (!kobj_ns_current_may_mount(type)) 37 return ERR_PTR(-EPERM);
121 return ERR_PTR(-EPERM);
122 }
123 }
124
125 info = kzalloc(sizeof(*info), GFP_KERNEL);
126 if (!info)
127 return ERR_PTR(-ENOMEM);
128
129 for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++)
130 info->ns[type] = kobj_ns_grab_current(type);
131
132 sb = sget(fs_type, sysfs_test_super, sysfs_set_super, flags, info);
133 if (IS_ERR(sb) || sb->s_fs_info != info)
134 free_sysfs_super_info(info);
135 if (IS_ERR(sb))
136 return ERR_CAST(sb);
137 if (!sb->s_root) {
138 error = sysfs_fill_super(sb, data, flags & MS_SILENT ? 1 : 0);
139 if (error) {
140 deactivate_locked_super(sb);
141 return ERR_PTR(error);
142 }
143 sb->s_flags |= MS_ACTIVE;
144 } 38 }
145 39
146 return dget(sb->s_root); 40 ns = kobj_ns_grab_current(KOBJ_NS_TYPE_NET);
41 root = kernfs_mount_ns(fs_type, flags, sysfs_root, &new_sb, ns);
42 if (IS_ERR(root) || !new_sb)
43 kobj_ns_drop(KOBJ_NS_TYPE_NET, ns);
44 return root;
147} 45}
148 46
149static void sysfs_kill_sb(struct super_block *sb) 47static void sysfs_kill_sb(struct super_block *sb)
150{ 48{
151 struct sysfs_super_info *info = sysfs_info(sb); 49 void *ns = (void *)kernfs_super_ns(sb);
152 /* Remove the superblock from fs_supers/s_instances 50
153 * so we can't find it, before freeing sysfs_super_info. 51 kernfs_kill_sb(sb);
154 */ 52 kobj_ns_drop(KOBJ_NS_TYPE_NET, ns);
155 kill_anon_super(sb);
156 free_sysfs_super_info(info);
157} 53}
158 54
159static struct file_system_type sysfs_fs_type = { 55static struct file_system_type sysfs_fs_type = {
@@ -165,48 +61,19 @@ static struct file_system_type sysfs_fs_type = {
165 61
166int __init sysfs_init(void) 62int __init sysfs_init(void)
167{ 63{
168 int err = -ENOMEM; 64 int err;
169 65
170 sysfs_dir_cachep = kmem_cache_create("sysfs_dir_cache", 66 sysfs_root = kernfs_create_root(NULL, NULL);
171 sizeof(struct sysfs_dirent), 67 if (IS_ERR(sysfs_root))
172 0, 0, NULL); 68 return PTR_ERR(sysfs_root);
173 if (!sysfs_dir_cachep)
174 goto out;
175 69
176 err = sysfs_inode_init(); 70 sysfs_root_kn = sysfs_root->kn;
177 if (err)
178 goto out_err;
179 71
180 err = register_filesystem(&sysfs_fs_type); 72 err = register_filesystem(&sysfs_fs_type);
181 if (!err) { 73 if (err) {
182 sysfs_mnt = kern_mount(&sysfs_fs_type); 74 kernfs_destroy_root(sysfs_root);
183 if (IS_ERR(sysfs_mnt)) { 75 return err;
184 printk(KERN_ERR "sysfs: could not mount!\n"); 76 }
185 err = PTR_ERR(sysfs_mnt);
186 sysfs_mnt = NULL;
187 unregister_filesystem(&sysfs_fs_type);
188 goto out_err;
189 }
190 } else
191 goto out_err;
192out:
193 return err;
194out_err:
195 kmem_cache_destroy(sysfs_dir_cachep);
196 sysfs_dir_cachep = NULL;
197 goto out;
198}
199
200#undef sysfs_get
201struct sysfs_dirent *sysfs_get(struct sysfs_dirent *sd)
202{
203 return __sysfs_get(sd);
204}
205EXPORT_SYMBOL_GPL(sysfs_get);
206 77
207#undef sysfs_put 78 return 0;
208void sysfs_put(struct sysfs_dirent *sd)
209{
210 __sysfs_put(sd);
211} 79}
212EXPORT_SYMBOL_GPL(sysfs_put);
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c
index 3ae3f1bf1a09..aecb15f84557 100644
--- a/fs/sysfs/symlink.c
+++ b/fs/sysfs/symlink.c
@@ -11,109 +11,73 @@
11 */ 11 */
12 12
13#include <linux/fs.h> 13#include <linux/fs.h>
14#include <linux/gfp.h>
15#include <linux/mount.h>
16#include <linux/module.h> 14#include <linux/module.h>
17#include <linux/kobject.h> 15#include <linux/kobject.h>
18#include <linux/namei.h>
19#include <linux/mutex.h> 16#include <linux/mutex.h>
20#include <linux/security.h> 17#include <linux/security.h>
21 18
22#include "sysfs.h" 19#include "sysfs.h"
23 20
24static int sysfs_do_create_link_sd(struct sysfs_dirent *parent_sd, 21static int sysfs_do_create_link_sd(struct kernfs_node *parent,
25 struct kobject *target, 22 struct kobject *target_kobj,
26 const char *name, int warn) 23 const char *name, int warn)
27{ 24{
28 struct sysfs_dirent *target_sd = NULL; 25 struct kernfs_node *kn, *target = NULL;
29 struct sysfs_dirent *sd = NULL;
30 struct sysfs_addrm_cxt acxt;
31 enum kobj_ns_type ns_type;
32 int error;
33 26
34 BUG_ON(!name || !parent_sd); 27 BUG_ON(!name || !parent);
35 28
36 /* 29 /*
37 * We don't own @target and it may be removed at any time. 30 * We don't own @target_kobj and it may be removed at any time.
38 * Synchronize using sysfs_symlink_target_lock. See 31 * Synchronize using sysfs_symlink_target_lock. See
39 * sysfs_remove_dir() for details. 32 * sysfs_remove_dir() for details.
40 */ 33 */
41 spin_lock(&sysfs_symlink_target_lock); 34 spin_lock(&sysfs_symlink_target_lock);
42 if (target->sd) 35 if (target_kobj->sd) {
43 target_sd = sysfs_get(target->sd); 36 target = target_kobj->sd;
37 kernfs_get(target);
38 }
44 spin_unlock(&sysfs_symlink_target_lock); 39 spin_unlock(&sysfs_symlink_target_lock);
45 40
46 error = -ENOENT; 41 if (!target)
47 if (!target_sd) 42 return -ENOENT;
48 goto out_put;
49
50 error = -ENOMEM;
51 sd = sysfs_new_dirent(name, S_IFLNK|S_IRWXUGO, SYSFS_KOBJ_LINK);
52 if (!sd)
53 goto out_put;
54 43
55 ns_type = sysfs_ns_type(parent_sd); 44 kn = kernfs_create_link(parent, name, target);
56 if (ns_type) 45 kernfs_put(target);
57 sd->s_ns = target_sd->s_ns;
58 sd->s_symlink.target_sd = target_sd;
59 target_sd = NULL; /* reference is now owned by the symlink */
60
61 sysfs_addrm_start(&acxt);
62 /* Symlinks must be between directories with the same ns_type */
63 if (!ns_type ||
64 (ns_type == sysfs_ns_type(sd->s_symlink.target_sd->s_parent))) {
65 if (warn)
66 error = sysfs_add_one(&acxt, sd, parent_sd);
67 else
68 error = __sysfs_add_one(&acxt, sd, parent_sd);
69 } else {
70 error = -EINVAL;
71 WARN(1, KERN_WARNING
72 "sysfs: symlink across ns_types %s/%s -> %s/%s\n",
73 parent_sd->s_name,
74 sd->s_name,
75 sd->s_symlink.target_sd->s_parent->s_name,
76 sd->s_symlink.target_sd->s_name);
77 }
78 sysfs_addrm_finish(&acxt);
79 46
80 if (error) 47 if (!IS_ERR(kn))
81 goto out_put; 48 return 0;
82 49
83 return 0; 50 if (warn && PTR_ERR(kn) == -EEXIST)
84 51 sysfs_warn_dup(parent, name);
85 out_put: 52 return PTR_ERR(kn);
86 sysfs_put(target_sd);
87 sysfs_put(sd);
88 return error;
89} 53}
90 54
91/** 55/**
92 * sysfs_create_link_sd - create symlink to a given object. 56 * sysfs_create_link_sd - create symlink to a given object.
93 * @sd: directory we're creating the link in. 57 * @kn: directory we're creating the link in.
94 * @target: object we're pointing to. 58 * @target: object we're pointing to.
95 * @name: name of the symlink. 59 * @name: name of the symlink.
96 */ 60 */
97int sysfs_create_link_sd(struct sysfs_dirent *sd, struct kobject *target, 61int sysfs_create_link_sd(struct kernfs_node *kn, struct kobject *target,
98 const char *name) 62 const char *name)
99{ 63{
100 return sysfs_do_create_link_sd(sd, target, name, 1); 64 return sysfs_do_create_link_sd(kn, target, name, 1);
101} 65}
102 66
103static int sysfs_do_create_link(struct kobject *kobj, struct kobject *target, 67static int sysfs_do_create_link(struct kobject *kobj, struct kobject *target,
104 const char *name, int warn) 68 const char *name, int warn)
105{ 69{
106 struct sysfs_dirent *parent_sd = NULL; 70 struct kernfs_node *parent = NULL;
107 71
108 if (!kobj) 72 if (!kobj)
109 parent_sd = &sysfs_root; 73 parent = sysfs_root_kn;
110 else 74 else
111 parent_sd = kobj->sd; 75 parent = kobj->sd;
112 76
113 if (!parent_sd) 77 if (!parent)
114 return -EFAULT; 78 return -EFAULT;
115 79
116 return sysfs_do_create_link_sd(parent_sd, target, name, warn); 80 return sysfs_do_create_link_sd(parent, target, name, warn);
117} 81}
118 82
119/** 83/**
@@ -164,10 +128,10 @@ void sysfs_delete_link(struct kobject *kobj, struct kobject *targ,
164 * sysfs_remove_dir() for details. 128 * sysfs_remove_dir() for details.
165 */ 129 */
166 spin_lock(&sysfs_symlink_target_lock); 130 spin_lock(&sysfs_symlink_target_lock);
167 if (targ->sd && sysfs_ns_type(kobj->sd)) 131 if (targ->sd && kernfs_ns_enabled(kobj->sd))
168 ns = targ->sd->s_ns; 132 ns = targ->sd->ns;
169 spin_unlock(&sysfs_symlink_target_lock); 133 spin_unlock(&sysfs_symlink_target_lock);
170 sysfs_hash_and_remove(kobj->sd, name, ns); 134 kernfs_remove_by_name_ns(kobj->sd, name, ns);
171} 135}
172 136
173/** 137/**
@@ -177,14 +141,14 @@ void sysfs_delete_link(struct kobject *kobj, struct kobject *targ,
177 */ 141 */
178void sysfs_remove_link(struct kobject *kobj, const char *name) 142void sysfs_remove_link(struct kobject *kobj, const char *name)
179{ 143{
180 struct sysfs_dirent *parent_sd = NULL; 144 struct kernfs_node *parent = NULL;
181 145
182 if (!kobj) 146 if (!kobj)
183 parent_sd = &sysfs_root; 147 parent = sysfs_root_kn;
184 else 148 else
185 parent_sd = kobj->sd; 149 parent = kobj->sd;
186 150
187 sysfs_hash_and_remove(parent_sd, name, NULL); 151 kernfs_remove_by_name(parent, name);
188} 152}
189EXPORT_SYMBOL_GPL(sysfs_remove_link); 153EXPORT_SYMBOL_GPL(sysfs_remove_link);
190 154
@@ -201,130 +165,33 @@ EXPORT_SYMBOL_GPL(sysfs_remove_link);
201int sysfs_rename_link_ns(struct kobject *kobj, struct kobject *targ, 165int sysfs_rename_link_ns(struct kobject *kobj, struct kobject *targ,
202 const char *old, const char *new, const void *new_ns) 166 const char *old, const char *new, const void *new_ns)
203{ 167{
204 struct sysfs_dirent *parent_sd, *sd = NULL; 168 struct kernfs_node *parent, *kn = NULL;
205 const void *old_ns = NULL; 169 const void *old_ns = NULL;
206 int result; 170 int result;
207 171
208 if (!kobj) 172 if (!kobj)
209 parent_sd = &sysfs_root; 173 parent = sysfs_root_kn;
210 else 174 else
211 parent_sd = kobj->sd; 175 parent = kobj->sd;
212 176
213 if (targ->sd) 177 if (targ->sd)
214 old_ns = targ->sd->s_ns; 178 old_ns = targ->sd->ns;
215 179
216 result = -ENOENT; 180 result = -ENOENT;
217 sd = sysfs_get_dirent_ns(parent_sd, old, old_ns); 181 kn = kernfs_find_and_get_ns(parent, old, old_ns);
218 if (!sd) 182 if (!kn)
219 goto out; 183 goto out;
220 184
221 result = -EINVAL; 185 result = -EINVAL;
222 if (sysfs_type(sd) != SYSFS_KOBJ_LINK) 186 if (kernfs_type(kn) != KERNFS_LINK)
223 goto out; 187 goto out;
224 if (sd->s_symlink.target_sd->s_dir.kobj != targ) 188 if (kn->symlink.target_kn->priv != targ)
225 goto out; 189 goto out;
226 190
227 result = sysfs_rename(sd, parent_sd, new, new_ns); 191 result = kernfs_rename_ns(kn, parent, new, new_ns);
228 192
229out: 193out:
230 sysfs_put(sd); 194 kernfs_put(kn);
231 return result; 195 return result;
232} 196}
233EXPORT_SYMBOL_GPL(sysfs_rename_link_ns); 197EXPORT_SYMBOL_GPL(sysfs_rename_link_ns);
234
235static int sysfs_get_target_path(struct sysfs_dirent *parent_sd,
236 struct sysfs_dirent *target_sd, char *path)
237{
238 struct sysfs_dirent *base, *sd;
239 char *s = path;
240 int len = 0;
241
242 /* go up to the root, stop at the base */
243 base = parent_sd;
244 while (base->s_parent) {
245 sd = target_sd->s_parent;
246 while (sd->s_parent && base != sd)
247 sd = sd->s_parent;
248
249 if (base == sd)
250 break;
251
252 strcpy(s, "../");
253 s += 3;
254 base = base->s_parent;
255 }
256
257 /* determine end of target string for reverse fillup */
258 sd = target_sd;
259 while (sd->s_parent && sd != base) {
260 len += strlen(sd->s_name) + 1;
261 sd = sd->s_parent;
262 }
263
264 /* check limits */
265 if (len < 2)
266 return -EINVAL;
267 len--;
268 if ((s - path) + len > PATH_MAX)
269 return -ENAMETOOLONG;
270
271 /* reverse fillup of target string from target to base */
272 sd = target_sd;
273 while (sd->s_parent && sd != base) {
274 int slen = strlen(sd->s_name);
275
276 len -= slen;
277 strncpy(s + len, sd->s_name, slen);
278 if (len)
279 s[--len] = '/';
280
281 sd = sd->s_parent;
282 }
283
284 return 0;
285}
286
287static int sysfs_getlink(struct dentry *dentry, char *path)
288{
289 struct sysfs_dirent *sd = dentry->d_fsdata;
290 struct sysfs_dirent *parent_sd = sd->s_parent;
291 struct sysfs_dirent *target_sd = sd->s_symlink.target_sd;
292 int error;
293
294 mutex_lock(&sysfs_mutex);
295 error = sysfs_get_target_path(parent_sd, target_sd, path);
296 mutex_unlock(&sysfs_mutex);
297
298 return error;
299}
300
301static void *sysfs_follow_link(struct dentry *dentry, struct nameidata *nd)
302{
303 int error = -ENOMEM;
304 unsigned long page = get_zeroed_page(GFP_KERNEL);
305 if (page) {
306 error = sysfs_getlink(dentry, (char *) page);
307 if (error < 0)
308 free_page((unsigned long)page);
309 }
310 nd_set_link(nd, error ? ERR_PTR(error) : (char *)page);
311 return NULL;
312}
313
314static void sysfs_put_link(struct dentry *dentry, struct nameidata *nd,
315 void *cookie)
316{
317 char *page = nd_get_link(nd);
318 if (!IS_ERR(page))
319 free_page((unsigned long)page);
320}
321
322const struct inode_operations sysfs_symlink_inode_operations = {
323 .setxattr = sysfs_setxattr,
324 .readlink = generic_readlink,
325 .follow_link = sysfs_follow_link,
326 .put_link = sysfs_put_link,
327 .setattr = sysfs_setattr,
328 .getattr = sysfs_getattr,
329 .permission = sysfs_permission,
330};
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index 0af09fbfb3f6..0e2f1cccb812 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -8,248 +8,36 @@
8 * This file is released under the GPLv2. 8 * This file is released under the GPLv2.
9 */ 9 */
10 10
11#include <linux/lockdep.h> 11#ifndef __SYSFS_INTERNAL_H
12#include <linux/kobject_ns.h> 12#define __SYSFS_INTERNAL_H
13#include <linux/fs.h>
14#include <linux/rbtree.h>
15 13
16struct sysfs_open_dirent; 14#include <linux/sysfs.h>
17
18/* type-specific structures for sysfs_dirent->s_* union members */
19struct sysfs_elem_dir {
20 struct kobject *kobj;
21
22 unsigned long subdirs;
23 /* children rbtree starts here and goes through sd->s_rb */
24 struct rb_root children;
25};
26
27struct sysfs_elem_symlink {
28 struct sysfs_dirent *target_sd;
29};
30
31struct sysfs_elem_attr {
32 union {
33 struct attribute *attr;
34 struct bin_attribute *bin_attr;
35 };
36 struct sysfs_open_dirent *open;
37};
38
39struct sysfs_inode_attrs {
40 struct iattr ia_iattr;
41 void *ia_secdata;
42 u32 ia_secdata_len;
43};
44
45/*
46 * sysfs_dirent - the building block of sysfs hierarchy. Each and
47 * every sysfs node is represented by single sysfs_dirent.
48 *
49 * As long as s_count reference is held, the sysfs_dirent itself is
50 * accessible. Dereferencing s_elem or any other outer entity
51 * requires s_active reference.
52 */
53struct sysfs_dirent {
54 atomic_t s_count;
55 atomic_t s_active;
56#ifdef CONFIG_DEBUG_LOCK_ALLOC
57 struct lockdep_map dep_map;
58#endif
59 struct sysfs_dirent *s_parent;
60 const char *s_name;
61
62 struct rb_node s_rb;
63
64 union {
65 struct completion *completion;
66 struct sysfs_dirent *removed_list;
67 } u;
68
69 const void *s_ns; /* namespace tag */
70 unsigned int s_hash; /* ns + name hash */
71 union {
72 struct sysfs_elem_dir s_dir;
73 struct sysfs_elem_symlink s_symlink;
74 struct sysfs_elem_attr s_attr;
75 };
76
77 unsigned short s_flags;
78 umode_t s_mode;
79 unsigned int s_ino;
80 struct sysfs_inode_attrs *s_iattr;
81};
82
83#define SD_DEACTIVATED_BIAS INT_MIN
84
85#define SYSFS_TYPE_MASK 0x00ff
86#define SYSFS_DIR 0x0001
87#define SYSFS_KOBJ_ATTR 0x0002
88#define SYSFS_KOBJ_BIN_ATTR 0x0004
89#define SYSFS_KOBJ_LINK 0x0008
90#define SYSFS_COPY_NAME (SYSFS_DIR | SYSFS_KOBJ_LINK)
91#define SYSFS_ACTIVE_REF (SYSFS_KOBJ_ATTR | SYSFS_KOBJ_BIN_ATTR)
92
93/* identify any namespace tag on sysfs_dirents */
94#define SYSFS_NS_TYPE_MASK 0xf00
95#define SYSFS_NS_TYPE_SHIFT 8
96
97#define SYSFS_FLAG_MASK ~(SYSFS_NS_TYPE_MASK|SYSFS_TYPE_MASK)
98#define SYSFS_FLAG_REMOVED 0x02000
99
100static inline unsigned int sysfs_type(struct sysfs_dirent *sd)
101{
102 return sd->s_flags & SYSFS_TYPE_MASK;
103}
104
105/*
106 * Return any namespace tags on this dirent.
107 * enum kobj_ns_type is defined in linux/kobject.h
108 */
109static inline enum kobj_ns_type sysfs_ns_type(struct sysfs_dirent *sd)
110{
111 return (sd->s_flags & SYSFS_NS_TYPE_MASK) >> SYSFS_NS_TYPE_SHIFT;
112}
113
114#ifdef CONFIG_DEBUG_LOCK_ALLOC
115
116#define sysfs_dirent_init_lockdep(sd) \
117do { \
118 struct attribute *attr = sd->s_attr.attr; \
119 struct lock_class_key *key = attr->key; \
120 if (!key) \
121 key = &attr->skey; \
122 \
123 lockdep_init_map(&sd->dep_map, "s_active", key, 0); \
124} while (0)
125
126/* Test for attributes that want to ignore lockdep for read-locking */
127static inline bool sysfs_ignore_lockdep(struct sysfs_dirent *sd)
128{
129 int type = sysfs_type(sd);
130
131 return (type == SYSFS_KOBJ_ATTR || type == SYSFS_KOBJ_BIN_ATTR) &&
132 sd->s_attr.attr->ignore_lockdep;
133}
134
135#else
136
137#define sysfs_dirent_init_lockdep(sd) do {} while (0)
138
139static inline bool sysfs_ignore_lockdep(struct sysfs_dirent *sd)
140{
141 return true;
142}
143
144#endif
145
146/*
147 * Context structure to be used while adding/removing nodes.
148 */
149struct sysfs_addrm_cxt {
150 struct sysfs_dirent *removed;
151};
152 15
153/* 16/*
154 * mount.c 17 * mount.c
155 */ 18 */
156 19extern struct kernfs_node *sysfs_root_kn;
157/*
158 * Each sb is associated with a set of namespace tags (i.e.
159 * the network namespace of the task which mounted this sysfs
160 * instance).
161 */
162struct sysfs_super_info {
163 void *ns[KOBJ_NS_TYPES];
164};
165#define sysfs_info(SB) ((struct sysfs_super_info *)(SB->s_fs_info))
166extern struct sysfs_dirent sysfs_root;
167extern struct kmem_cache *sysfs_dir_cachep;
168 20
169/* 21/*
170 * dir.c 22 * dir.c
171 */ 23 */
172extern struct mutex sysfs_mutex;
173extern spinlock_t sysfs_symlink_target_lock; 24extern spinlock_t sysfs_symlink_target_lock;
174extern const struct dentry_operations sysfs_dentry_ops;
175
176extern const struct file_operations sysfs_dir_operations;
177extern const struct inode_operations sysfs_dir_inode_operations;
178 25
179struct sysfs_dirent *sysfs_get_active(struct sysfs_dirent *sd); 26void sysfs_warn_dup(struct kernfs_node *parent, const char *name);
180void sysfs_put_active(struct sysfs_dirent *sd);
181void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt);
182void sysfs_warn_dup(struct sysfs_dirent *parent, const char *name);
183int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd,
184 struct sysfs_dirent *parent_sd);
185int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd,
186 struct sysfs_dirent *parent_sd);
187void sysfs_remove(struct sysfs_dirent *sd);
188int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name,
189 const void *ns);
190void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt);
191
192struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
193 const unsigned char *name,
194 const void *ns);
195struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type);
196
197void release_sysfs_dirent(struct sysfs_dirent *sd);
198
199int sysfs_create_subdir(struct kobject *kobj, const char *name,
200 struct sysfs_dirent **p_sd);
201
202int sysfs_rename(struct sysfs_dirent *sd, struct sysfs_dirent *new_parent_sd,
203 const char *new_name, const void *new_ns);
204
205static inline struct sysfs_dirent *__sysfs_get(struct sysfs_dirent *sd)
206{
207 if (sd) {
208 WARN_ON(!atomic_read(&sd->s_count));
209 atomic_inc(&sd->s_count);
210 }
211 return sd;
212}
213#define sysfs_get(sd) __sysfs_get(sd)
214
215static inline void __sysfs_put(struct sysfs_dirent *sd)
216{
217 if (sd && atomic_dec_and_test(&sd->s_count))
218 release_sysfs_dirent(sd);
219}
220#define sysfs_put(sd) __sysfs_put(sd)
221
222/*
223 * inode.c
224 */
225struct inode *sysfs_get_inode(struct super_block *sb, struct sysfs_dirent *sd);
226void sysfs_evict_inode(struct inode *inode);
227int sysfs_sd_setattr(struct sysfs_dirent *sd, struct iattr *iattr);
228int sysfs_permission(struct inode *inode, int mask);
229int sysfs_setattr(struct dentry *dentry, struct iattr *iattr);
230int sysfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
231 struct kstat *stat);
232int sysfs_setxattr(struct dentry *dentry, const char *name, const void *value,
233 size_t size, int flags);
234int sysfs_inode_init(void);
235 27
236/* 28/*
237 * file.c 29 * file.c
238 */ 30 */
239extern const struct file_operations sysfs_file_operations; 31int sysfs_add_file(struct kernfs_node *parent,
240extern const struct file_operations sysfs_bin_operations; 32 const struct attribute *attr, bool is_bin);
241 33int sysfs_add_file_mode_ns(struct kernfs_node *parent,
242int sysfs_add_file(struct sysfs_dirent *dir_sd, 34 const struct attribute *attr, bool is_bin,
243 const struct attribute *attr, int type);
244
245int sysfs_add_file_mode_ns(struct sysfs_dirent *dir_sd,
246 const struct attribute *attr, int type,
247 umode_t amode, const void *ns); 35 umode_t amode, const void *ns);
248void sysfs_unmap_bin_file(struct sysfs_dirent *sd);
249 36
250/* 37/*
251 * symlink.c 38 * symlink.c
252 */ 39 */
253extern const struct inode_operations sysfs_symlink_inode_operations; 40int sysfs_create_link_sd(struct kernfs_node *kn, struct kobject *target,
254int sysfs_create_link_sd(struct sysfs_dirent *sd, struct kobject *target,
255 const char *name); 41 const char *name);
42
43#endif /* __SYSFS_INTERNAL_H */