aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2013-11-28 14:54:34 -0500
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2013-11-29 21:08:39 -0500
commit414985ae23c031efbd6d16d484dea8b5de28b8f7 (patch)
tree06813b5fec1e2e46d9a86cd79e0dd9d23a8d51a0 /fs
parentfd7b9f7b9776b11df629e9dd3865320bf57ce588 (diff)
sysfs, kernfs: move file core code to fs/kernfs/file.c
Move core file code to fs/kernfs/file.c. fs/sysfs/file.c now contains sysfs kernfs_ops callbacks, sysfs wrappers around kernfs interfaces, and sysfs_schedule_callback(). The respective declarations in fs/sysfs/sysfs.h are moved to fs/kernfs/kernfs-internal.h. This is pure relocation. v2: Refreshed on top of the v2 of "sysfs, kernfs: prepare read path for kernfs". v3: Refreshed on top of the v3 of "sysfs, kernfs: prepare read path for kernfs". Signed-off-by: Tejun Heo <tj@kernel.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'fs')
-rw-r--r--fs/kernfs/file.c805
-rw-r--r--fs/kernfs/kernfs-internal.h7
-rw-r--r--fs/sysfs/file.c802
-rw-r--r--fs/sysfs/sysfs.h4
4 files changed, 813 insertions, 805 deletions
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index 90b1e88dad44..fa172e86047f 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -7,3 +7,808 @@
7 * 7 *
8 * This file is released under the GPLv2. 8 * This file is released under the GPLv2.
9 */ 9 */
10
11#include <linux/fs.h>
12#include <linux/seq_file.h>
13#include <linux/slab.h>
14#include <linux/poll.h>
15#include <linux/pagemap.h>
16#include <linux/poll.h>
17#include <linux/sched.h>
18
19#include "kernfs-internal.h"
20
21/*
22 * There's one sysfs_open_file for each open file and one sysfs_open_dirent
23 * for each sysfs_dirent with one or more open files.
24 *
25 * sysfs_dirent->s_attr.open points to sysfs_open_dirent. s_attr.open is
26 * protected by sysfs_open_dirent_lock.
27 *
28 * filp->private_data points to seq_file whose ->private points to
29 * sysfs_open_file. sysfs_open_files are chained at
30 * sysfs_open_dirent->files, which is protected by sysfs_open_file_mutex.
31 */
32static DEFINE_SPINLOCK(sysfs_open_dirent_lock);
33static DEFINE_MUTEX(sysfs_open_file_mutex);
34
35struct sysfs_open_dirent {
36 atomic_t refcnt;
37 atomic_t event;
38 wait_queue_head_t poll;
39 struct list_head files; /* goes through sysfs_open_file.list */
40};
41
42static struct sysfs_open_file *sysfs_of(struct file *file)
43{
44 return ((struct seq_file *)file->private_data)->private;
45}
46
47/*
48 * Determine the kernfs_ops for the given sysfs_dirent. This function must
49 * be called while holding an active reference.
50 */
51static const struct kernfs_ops *kernfs_ops(struct sysfs_dirent *sd)
52{
53 if (sd->s_flags & SYSFS_FLAG_LOCKDEP)
54 lockdep_assert_held(sd);
55 return sd->s_attr.ops;
56}
57
58static void *kernfs_seq_start(struct seq_file *sf, loff_t *ppos)
59{
60 struct sysfs_open_file *of = sf->private;
61 const struct kernfs_ops *ops;
62
63 /*
64 * @of->mutex nests outside active ref and is just to ensure that
65 * the ops aren't called concurrently for the same open file.
66 */
67 mutex_lock(&of->mutex);
68 if (!sysfs_get_active(of->sd))
69 return ERR_PTR(-ENODEV);
70
71 ops = kernfs_ops(of->sd);
72 if (ops->seq_start) {
73 return ops->seq_start(sf, ppos);
74 } else {
75 /*
76 * The same behavior and code as single_open(). Returns
77 * !NULL if pos is at the beginning; otherwise, NULL.
78 */
79 return NULL + !*ppos;
80 }
81}
82
83static void *kernfs_seq_next(struct seq_file *sf, void *v, loff_t *ppos)
84{
85 struct sysfs_open_file *of = sf->private;
86 const struct kernfs_ops *ops = kernfs_ops(of->sd);
87
88 if (ops->seq_next) {
89 return ops->seq_next(sf, v, ppos);
90 } else {
91 /*
92 * The same behavior and code as single_open(), always
93 * terminate after the initial read.
94 */
95 ++*ppos;
96 return NULL;
97 }
98}
99
100static void kernfs_seq_stop(struct seq_file *sf, void *v)
101{
102 struct sysfs_open_file *of = sf->private;
103 const struct kernfs_ops *ops = kernfs_ops(of->sd);
104
105 if (ops->seq_stop)
106 ops->seq_stop(sf, v);
107
108 sysfs_put_active(of->sd);
109 mutex_unlock(&of->mutex);
110}
111
112static int kernfs_seq_show(struct seq_file *sf, void *v)
113{
114 struct sysfs_open_file *of = sf->private;
115
116 of->event = atomic_read(&of->sd->s_attr.open->event);
117
118 return of->sd->s_attr.ops->seq_show(sf, v);
119}
120
121static const struct seq_operations kernfs_seq_ops = {
122 .start = kernfs_seq_start,
123 .next = kernfs_seq_next,
124 .stop = kernfs_seq_stop,
125 .show = kernfs_seq_show,
126};
127
128/*
129 * As reading a bin file can have side-effects, the exact offset and bytes
130 * specified in read(2) call should be passed to the read callback making
131 * it difficult to use seq_file. Implement simplistic custom buffering for
132 * bin files.
133 */
134static ssize_t kernfs_file_direct_read(struct sysfs_open_file *of,
135 char __user *user_buf, size_t count,
136 loff_t *ppos)
137{
138 ssize_t len = min_t(size_t, count, PAGE_SIZE);
139 const struct kernfs_ops *ops;
140 char *buf;
141
142 buf = kmalloc(len, GFP_KERNEL);
143 if (!buf)
144 return -ENOMEM;
145
146 /*
147 * @of->mutex nests outside active ref and is just to ensure that
148 * the ops aren't called concurrently for the same open file.
149 */
150 mutex_lock(&of->mutex);
151 if (!sysfs_get_active(of->sd)) {
152 len = -ENODEV;
153 mutex_unlock(&of->mutex);
154 goto out_free;
155 }
156
157 ops = kernfs_ops(of->sd);
158 if (ops->read)
159 len = ops->read(of, buf, len, *ppos);
160 else
161 len = -EINVAL;
162
163 sysfs_put_active(of->sd);
164 mutex_unlock(&of->mutex);
165
166 if (len < 0)
167 goto out_free;
168
169 if (copy_to_user(user_buf, buf, len)) {
170 len = -EFAULT;
171 goto out_free;
172 }
173
174 *ppos += len;
175
176 out_free:
177 kfree(buf);
178 return len;
179}
180
181/**
182 * kernfs_file_read - kernfs vfs read callback
183 * @file: file pointer
184 * @user_buf: data to write
185 * @count: number of bytes
186 * @ppos: starting offset
187 */
188static ssize_t kernfs_file_read(struct file *file, char __user *user_buf,
189 size_t count, loff_t *ppos)
190{
191 struct sysfs_open_file *of = sysfs_of(file);
192
193 if (of->sd->s_flags & SYSFS_FLAG_HAS_SEQ_SHOW)
194 return seq_read(file, user_buf, count, ppos);
195 else
196 return kernfs_file_direct_read(of, user_buf, count, ppos);
197}
198
199/**
200 * kernfs_file_write - kernfs vfs write callback
201 * @file: file pointer
202 * @user_buf: data to write
203 * @count: number of bytes
204 * @ppos: starting offset
205 *
206 * Copy data in from userland and pass it to the matching kernfs write
207 * operation.
208 *
209 * There is no easy way for us to know if userspace is only doing a partial
210 * write, so we don't support them. We expect the entire buffer to come on
211 * the first write. Hint: if you're writing a value, first read the file,
212 * modify only the the value you're changing, then write entire buffer
213 * back.
214 */
215static ssize_t kernfs_file_write(struct file *file, const char __user *user_buf,
216 size_t count, loff_t *ppos)
217{
218 struct sysfs_open_file *of = sysfs_of(file);
219 ssize_t len = min_t(size_t, count, PAGE_SIZE);
220 const struct kernfs_ops *ops;
221 char *buf;
222
223 buf = kmalloc(len + 1, GFP_KERNEL);
224 if (!buf)
225 return -ENOMEM;
226
227 if (copy_from_user(buf, user_buf, len)) {
228 len = -EFAULT;
229 goto out_free;
230 }
231 buf[len] = '\0'; /* guarantee string termination */
232
233 /*
234 * @of->mutex nests outside active ref and is just to ensure that
235 * the ops aren't called concurrently for the same open file.
236 */
237 mutex_lock(&of->mutex);
238 if (!sysfs_get_active(of->sd)) {
239 mutex_unlock(&of->mutex);
240 len = -ENODEV;
241 goto out_free;
242 }
243
244 ops = kernfs_ops(of->sd);
245 if (ops->write)
246 len = ops->write(of, buf, len, *ppos);
247 else
248 len = -EINVAL;
249
250 sysfs_put_active(of->sd);
251 mutex_unlock(&of->mutex);
252
253 if (len > 0)
254 *ppos += len;
255out_free:
256 kfree(buf);
257 return len;
258}
259
260static void kernfs_vma_open(struct vm_area_struct *vma)
261{
262 struct file *file = vma->vm_file;
263 struct sysfs_open_file *of = sysfs_of(file);
264
265 if (!of->vm_ops)
266 return;
267
268 if (!sysfs_get_active(of->sd))
269 return;
270
271 if (of->vm_ops->open)
272 of->vm_ops->open(vma);
273
274 sysfs_put_active(of->sd);
275}
276
277static int kernfs_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
278{
279 struct file *file = vma->vm_file;
280 struct sysfs_open_file *of = sysfs_of(file);
281 int ret;
282
283 if (!of->vm_ops)
284 return VM_FAULT_SIGBUS;
285
286 if (!sysfs_get_active(of->sd))
287 return VM_FAULT_SIGBUS;
288
289 ret = VM_FAULT_SIGBUS;
290 if (of->vm_ops->fault)
291 ret = of->vm_ops->fault(vma, vmf);
292
293 sysfs_put_active(of->sd);
294 return ret;
295}
296
297static int kernfs_vma_page_mkwrite(struct vm_area_struct *vma,
298 struct vm_fault *vmf)
299{
300 struct file *file = vma->vm_file;
301 struct sysfs_open_file *of = sysfs_of(file);
302 int ret;
303
304 if (!of->vm_ops)
305 return VM_FAULT_SIGBUS;
306
307 if (!sysfs_get_active(of->sd))
308 return VM_FAULT_SIGBUS;
309
310 ret = 0;
311 if (of->vm_ops->page_mkwrite)
312 ret = of->vm_ops->page_mkwrite(vma, vmf);
313 else
314 file_update_time(file);
315
316 sysfs_put_active(of->sd);
317 return ret;
318}
319
320static int kernfs_vma_access(struct vm_area_struct *vma, unsigned long addr,
321 void *buf, int len, int write)
322{
323 struct file *file = vma->vm_file;
324 struct sysfs_open_file *of = sysfs_of(file);
325 int ret;
326
327 if (!of->vm_ops)
328 return -EINVAL;
329
330 if (!sysfs_get_active(of->sd))
331 return -EINVAL;
332
333 ret = -EINVAL;
334 if (of->vm_ops->access)
335 ret = of->vm_ops->access(vma, addr, buf, len, write);
336
337 sysfs_put_active(of->sd);
338 return ret;
339}
340
341#ifdef CONFIG_NUMA
342static int kernfs_vma_set_policy(struct vm_area_struct *vma,
343 struct mempolicy *new)
344{
345 struct file *file = vma->vm_file;
346 struct sysfs_open_file *of = sysfs_of(file);
347 int ret;
348
349 if (!of->vm_ops)
350 return 0;
351
352 if (!sysfs_get_active(of->sd))
353 return -EINVAL;
354
355 ret = 0;
356 if (of->vm_ops->set_policy)
357 ret = of->vm_ops->set_policy(vma, new);
358
359 sysfs_put_active(of->sd);
360 return ret;
361}
362
363static struct mempolicy *kernfs_vma_get_policy(struct vm_area_struct *vma,
364 unsigned long addr)
365{
366 struct file *file = vma->vm_file;
367 struct sysfs_open_file *of = sysfs_of(file);
368 struct mempolicy *pol;
369
370 if (!of->vm_ops)
371 return vma->vm_policy;
372
373 if (!sysfs_get_active(of->sd))
374 return vma->vm_policy;
375
376 pol = vma->vm_policy;
377 if (of->vm_ops->get_policy)
378 pol = of->vm_ops->get_policy(vma, addr);
379
380 sysfs_put_active(of->sd);
381 return pol;
382}
383
384static int kernfs_vma_migrate(struct vm_area_struct *vma,
385 const nodemask_t *from, const nodemask_t *to,
386 unsigned long flags)
387{
388 struct file *file = vma->vm_file;
389 struct sysfs_open_file *of = sysfs_of(file);
390 int ret;
391
392 if (!of->vm_ops)
393 return 0;
394
395 if (!sysfs_get_active(of->sd))
396 return 0;
397
398 ret = 0;
399 if (of->vm_ops->migrate)
400 ret = of->vm_ops->migrate(vma, from, to, flags);
401
402 sysfs_put_active(of->sd);
403 return ret;
404}
405#endif
406
407static const struct vm_operations_struct kernfs_vm_ops = {
408 .open = kernfs_vma_open,
409 .fault = kernfs_vma_fault,
410 .page_mkwrite = kernfs_vma_page_mkwrite,
411 .access = kernfs_vma_access,
412#ifdef CONFIG_NUMA
413 .set_policy = kernfs_vma_set_policy,
414 .get_policy = kernfs_vma_get_policy,
415 .migrate = kernfs_vma_migrate,
416#endif
417};
418
419static int kernfs_file_mmap(struct file *file, struct vm_area_struct *vma)
420{
421 struct sysfs_open_file *of = sysfs_of(file);
422 const struct kernfs_ops *ops;
423 int rc;
424
425 mutex_lock(&of->mutex);
426
427 rc = -ENODEV;
428 if (!sysfs_get_active(of->sd))
429 goto out_unlock;
430
431 ops = kernfs_ops(of->sd);
432 if (ops->mmap)
433 rc = ops->mmap(of, vma);
434 if (rc)
435 goto out_put;
436
437 /*
438 * PowerPC's pci_mmap of legacy_mem uses shmem_zero_setup()
439 * to satisfy versions of X which crash if the mmap fails: that
440 * substitutes a new vm_file, and we don't then want bin_vm_ops.
441 */
442 if (vma->vm_file != file)
443 goto out_put;
444
445 rc = -EINVAL;
446 if (of->mmapped && of->vm_ops != vma->vm_ops)
447 goto out_put;
448
449 /*
450 * It is not possible to successfully wrap close.
451 * So error if someone is trying to use close.
452 */
453 rc = -EINVAL;
454 if (vma->vm_ops && vma->vm_ops->close)
455 goto out_put;
456
457 rc = 0;
458 of->mmapped = 1;
459 of->vm_ops = vma->vm_ops;
460 vma->vm_ops = &kernfs_vm_ops;
461out_put:
462 sysfs_put_active(of->sd);
463out_unlock:
464 mutex_unlock(&of->mutex);
465
466 return rc;
467}
468
469/**
470 * sysfs_get_open_dirent - get or create sysfs_open_dirent
471 * @sd: target sysfs_dirent
472 * @of: sysfs_open_file for this instance of open
473 *
474 * If @sd->s_attr.open exists, increment its reference count;
475 * otherwise, create one. @of is chained to the files list.
476 *
477 * LOCKING:
478 * Kernel thread context (may sleep).
479 *
480 * RETURNS:
481 * 0 on success, -errno on failure.
482 */
483static int sysfs_get_open_dirent(struct sysfs_dirent *sd,
484 struct sysfs_open_file *of)
485{
486 struct sysfs_open_dirent *od, *new_od = NULL;
487
488 retry:
489 mutex_lock(&sysfs_open_file_mutex);
490 spin_lock_irq(&sysfs_open_dirent_lock);
491
492 if (!sd->s_attr.open && new_od) {
493 sd->s_attr.open = new_od;
494 new_od = NULL;
495 }
496
497 od = sd->s_attr.open;
498 if (od) {
499 atomic_inc(&od->refcnt);
500 list_add_tail(&of->list, &od->files);
501 }
502
503 spin_unlock_irq(&sysfs_open_dirent_lock);
504 mutex_unlock(&sysfs_open_file_mutex);
505
506 if (od) {
507 kfree(new_od);
508 return 0;
509 }
510
511 /* not there, initialize a new one and retry */
512 new_od = kmalloc(sizeof(*new_od), GFP_KERNEL);
513 if (!new_od)
514 return -ENOMEM;
515
516 atomic_set(&new_od->refcnt, 0);
517 atomic_set(&new_od->event, 1);
518 init_waitqueue_head(&new_od->poll);
519 INIT_LIST_HEAD(&new_od->files);
520 goto retry;
521}
522
523/**
524 * sysfs_put_open_dirent - put sysfs_open_dirent
525 * @sd: target sysfs_dirent
526 * @of: associated sysfs_open_file
527 *
528 * Put @sd->s_attr.open and unlink @of from the files list. If
529 * reference count reaches zero, disassociate and free it.
530 *
531 * LOCKING:
532 * None.
533 */
534static void sysfs_put_open_dirent(struct sysfs_dirent *sd,
535 struct sysfs_open_file *of)
536{
537 struct sysfs_open_dirent *od = sd->s_attr.open;
538 unsigned long flags;
539
540 mutex_lock(&sysfs_open_file_mutex);
541 spin_lock_irqsave(&sysfs_open_dirent_lock, flags);
542
543 if (of)
544 list_del(&of->list);
545
546 if (atomic_dec_and_test(&od->refcnt))
547 sd->s_attr.open = NULL;
548 else
549 od = NULL;
550
551 spin_unlock_irqrestore(&sysfs_open_dirent_lock, flags);
552 mutex_unlock(&sysfs_open_file_mutex);
553
554 kfree(od);
555}
556
557static int kernfs_file_open(struct inode *inode, struct file *file)
558{
559 struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
560 const struct kernfs_ops *ops;
561 struct sysfs_open_file *of;
562 bool has_read, has_write, has_mmap;
563 int error = -EACCES;
564
565 if (!sysfs_get_active(attr_sd))
566 return -ENODEV;
567
568 ops = kernfs_ops(attr_sd);
569
570 has_read = ops->seq_show || ops->read || ops->mmap;
571 has_write = ops->write || ops->mmap;
572 has_mmap = ops->mmap;
573
574 /* check perms and supported operations */
575 if ((file->f_mode & FMODE_WRITE) &&
576 (!(inode->i_mode & S_IWUGO) || !has_write))
577 goto err_out;
578
579 if ((file->f_mode & FMODE_READ) &&
580 (!(inode->i_mode & S_IRUGO) || !has_read))
581 goto err_out;
582
583 /* allocate a sysfs_open_file for the file */
584 error = -ENOMEM;
585 of = kzalloc(sizeof(struct sysfs_open_file), GFP_KERNEL);
586 if (!of)
587 goto err_out;
588
589 /*
590 * The following is done to give a different lockdep key to
591 * @of->mutex for files which implement mmap. This is a rather
592 * crude way to avoid false positive lockdep warning around
593 * mm->mmap_sem - mmap nests @of->mutex under mm->mmap_sem and
594 * reading /sys/block/sda/trace/act_mask grabs sr_mutex, under
595 * which mm->mmap_sem nests, while holding @of->mutex. As each
596 * open file has a separate mutex, it's okay as long as those don't
597 * happen on the same file. At this point, we can't easily give
598 * each file a separate locking class. Let's differentiate on
599 * whether the file has mmap or not for now.
600 */
601 if (has_mmap)
602 mutex_init(&of->mutex);
603 else
604 mutex_init(&of->mutex);
605
606 of->sd = attr_sd;
607 of->file = file;
608
609 /*
610 * Always instantiate seq_file even if read access doesn't use
611 * seq_file or is not requested. This unifies private data access
612 * and readable regular files are the vast majority anyway.
613 */
614 if (ops->seq_show)
615 error = seq_open(file, &kernfs_seq_ops);
616 else
617 error = seq_open(file, NULL);
618 if (error)
619 goto err_free;
620
621 ((struct seq_file *)file->private_data)->private = of;
622
623 /* seq_file clears PWRITE unconditionally, restore it if WRITE */
624 if (file->f_mode & FMODE_WRITE)
625 file->f_mode |= FMODE_PWRITE;
626
627 /* make sure we have open dirent struct */
628 error = sysfs_get_open_dirent(attr_sd, of);
629 if (error)
630 goto err_close;
631
632 /* open succeeded, put active references */
633 sysfs_put_active(attr_sd);
634 return 0;
635
636err_close:
637 seq_release(inode, file);
638err_free:
639 kfree(of);
640err_out:
641 sysfs_put_active(attr_sd);
642 return error;
643}
644
645static int kernfs_file_release(struct inode *inode, struct file *filp)
646{
647 struct sysfs_dirent *sd = filp->f_path.dentry->d_fsdata;
648 struct sysfs_open_file *of = sysfs_of(filp);
649
650 sysfs_put_open_dirent(sd, of);
651 seq_release(inode, filp);
652 kfree(of);
653
654 return 0;
655}
656
657void sysfs_unmap_bin_file(struct sysfs_dirent *sd)
658{
659 struct sysfs_open_dirent *od;
660 struct sysfs_open_file *of;
661
662 if (!(sd->s_flags & SYSFS_FLAG_HAS_MMAP))
663 return;
664
665 spin_lock_irq(&sysfs_open_dirent_lock);
666 od = sd->s_attr.open;
667 if (od)
668 atomic_inc(&od->refcnt);
669 spin_unlock_irq(&sysfs_open_dirent_lock);
670 if (!od)
671 return;
672
673 mutex_lock(&sysfs_open_file_mutex);
674 list_for_each_entry(of, &od->files, list) {
675 struct inode *inode = file_inode(of->file);
676 unmap_mapping_range(inode->i_mapping, 0, 0, 1);
677 }
678 mutex_unlock(&sysfs_open_file_mutex);
679
680 sysfs_put_open_dirent(sd, NULL);
681}
682
683/* Sysfs attribute files are pollable. The idea is that you read
684 * the content and then you use 'poll' or 'select' to wait for
685 * the content to change. When the content changes (assuming the
686 * manager for the kobject supports notification), poll will
687 * return POLLERR|POLLPRI, and select will return the fd whether
688 * it is waiting for read, write, or exceptions.
689 * Once poll/select indicates that the value has changed, you
690 * need to close and re-open the file, or seek to 0 and read again.
691 * Reminder: this only works for attributes which actively support
692 * it, and it is not possible to test an attribute from userspace
693 * to see if it supports poll (Neither 'poll' nor 'select' return
694 * an appropriate error code). When in doubt, set a suitable timeout value.
695 */
696static unsigned int kernfs_file_poll(struct file *filp, poll_table *wait)
697{
698 struct sysfs_open_file *of = sysfs_of(filp);
699 struct sysfs_dirent *attr_sd = filp->f_path.dentry->d_fsdata;
700 struct sysfs_open_dirent *od = attr_sd->s_attr.open;
701
702 /* need parent for the kobj, grab both */
703 if (!sysfs_get_active(attr_sd))
704 goto trigger;
705
706 poll_wait(filp, &od->poll, wait);
707
708 sysfs_put_active(attr_sd);
709
710 if (of->event != atomic_read(&od->event))
711 goto trigger;
712
713 return DEFAULT_POLLMASK;
714
715 trigger:
716 return DEFAULT_POLLMASK|POLLERR|POLLPRI;
717}
718
719/**
720 * kernfs_notify - notify a kernfs file
721 * @sd: file to notify
722 *
723 * Notify @sd such that poll(2) on @sd wakes up.
724 */
725void kernfs_notify(struct sysfs_dirent *sd)
726{
727 struct sysfs_open_dirent *od;
728 unsigned long flags;
729
730 spin_lock_irqsave(&sysfs_open_dirent_lock, flags);
731
732 if (!WARN_ON(sysfs_type(sd) != SYSFS_KOBJ_ATTR)) {
733 od = sd->s_attr.open;
734 if (od) {
735 atomic_inc(&od->event);
736 wake_up_interruptible(&od->poll);
737 }
738 }
739
740 spin_unlock_irqrestore(&sysfs_open_dirent_lock, flags);
741}
742EXPORT_SYMBOL_GPL(kernfs_notify);
743
744const struct file_operations kernfs_file_operations = {
745 .read = kernfs_file_read,
746 .write = kernfs_file_write,
747 .llseek = generic_file_llseek,
748 .mmap = kernfs_file_mmap,
749 .open = kernfs_file_open,
750 .release = kernfs_file_release,
751 .poll = kernfs_file_poll,
752};
753
754/**
755 * kernfs_create_file_ns_key - create a file
756 * @parent: directory to create the file in
757 * @name: name of the file
758 * @mode: mode of the file
759 * @size: size of the file
760 * @ops: kernfs operations for the file
761 * @priv: private data for the file
762 * @ns: optional namespace tag of the file
763 * @key: lockdep key for the file's active_ref, %NULL to disable lockdep
764 *
765 * Returns the created node on success, ERR_PTR() value on error.
766 */
767struct sysfs_dirent *kernfs_create_file_ns_key(struct sysfs_dirent *parent,
768 const char *name,
769 umode_t mode, loff_t size,
770 const struct kernfs_ops *ops,
771 void *priv, const void *ns,
772 struct lock_class_key *key)
773{
774 struct sysfs_addrm_cxt acxt;
775 struct sysfs_dirent *sd;
776 int rc;
777
778 sd = sysfs_new_dirent(name, (mode & S_IALLUGO) | S_IFREG,
779 SYSFS_KOBJ_ATTR);
780 if (!sd)
781 return ERR_PTR(-ENOMEM);
782
783 sd->s_attr.ops = ops;
784 sd->s_attr.size = size;
785 sd->s_ns = ns;
786 sd->priv = priv;
787
788#ifdef CONFIG_DEBUG_LOCK_ALLOC
789 if (key) {
790 lockdep_init_map(&sd->dep_map, "s_active", key, 0);
791 sd->s_flags |= SYSFS_FLAG_LOCKDEP;
792 }
793#endif
794
795 /*
796 * sd->s_attr.ops is accesible only while holding active ref. We
797 * need to know whether some ops are implemented outside active
798 * ref. Cache their existence in flags.
799 */
800 if (ops->seq_show)
801 sd->s_flags |= SYSFS_FLAG_HAS_SEQ_SHOW;
802 if (ops->mmap)
803 sd->s_flags |= SYSFS_FLAG_HAS_MMAP;
804
805 sysfs_addrm_start(&acxt);
806 rc = sysfs_add_one(&acxt, sd, parent);
807 sysfs_addrm_finish(&acxt);
808
809 if (rc) {
810 kernfs_put(sd);
811 return ERR_PTR(rc);
812 }
813 return sd;
814}
diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h
index 31f0dbe1881b..38e3a163e5ad 100644
--- a/fs/kernfs/kernfs-internal.h
+++ b/fs/kernfs/kernfs-internal.h
@@ -142,4 +142,11 @@ int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd,
142void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt); 142void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt);
143struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type); 143struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type);
144 144
145/*
146 * file.c
147 */
148extern const struct file_operations kernfs_file_operations;
149
150void sysfs_unmap_bin_file(struct sysfs_dirent *sd);
151
145#endif /* __KERNFS_INTERNAL_H */ 152#endif /* __KERNFS_INTERNAL_H */
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 7f0a79fa2ed8..ac77d2be3c31 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -14,54 +14,12 @@
14#include <linux/kobject.h> 14#include <linux/kobject.h>
15#include <linux/kallsyms.h> 15#include <linux/kallsyms.h>
16#include <linux/slab.h> 16#include <linux/slab.h>
17#include <linux/fsnotify.h>
18#include <linux/namei.h>
19#include <linux/poll.h>
20#include <linux/list.h> 17#include <linux/list.h>
21#include <linux/mutex.h> 18#include <linux/mutex.h>
22#include <linux/limits.h>
23#include <linux/uaccess.h>
24#include <linux/seq_file.h> 19#include <linux/seq_file.h>
25#include <linux/mm.h>
26 20
27#include "sysfs.h" 21#include "sysfs.h"
28 22#include "../kernfs/kernfs-internal.h"
29/*
30 * There's one sysfs_open_file for each open file and one sysfs_open_dirent
31 * for each sysfs_dirent with one or more open files.
32 *
33 * sysfs_dirent->s_attr.open points to sysfs_open_dirent. s_attr.open is
34 * protected by sysfs_open_dirent_lock.
35 *
36 * filp->private_data points to seq_file whose ->private points to
37 * sysfs_open_file. sysfs_open_files are chained at
38 * sysfs_open_dirent->files, which is protected by sysfs_open_file_mutex.
39 */
40static DEFINE_SPINLOCK(sysfs_open_dirent_lock);
41static DEFINE_MUTEX(sysfs_open_file_mutex);
42
43struct sysfs_open_dirent {
44 atomic_t refcnt;
45 atomic_t event;
46 wait_queue_head_t poll;
47 struct list_head files; /* goes through sysfs_open_file.list */
48};
49
50static struct sysfs_open_file *sysfs_of(struct file *file)
51{
52 return ((struct seq_file *)file->private_data)->private;
53}
54
55/*
56 * Determine the kernfs_ops for the given sysfs_dirent. This function must
57 * be called while holding an active reference.
58 */
59static const struct kernfs_ops *kernfs_ops(struct sysfs_dirent *sd)
60{
61 if (sd->s_flags & SYSFS_FLAG_LOCKDEP)
62 lockdep_assert_held(sd);
63 return sd->s_attr.ops;
64}
65 23
66/* 24/*
67 * Determine ktype->sysfs_ops for the given sysfs_dirent. This function 25 * Determine ktype->sysfs_ops for the given sysfs_dirent. This function
@@ -143,147 +101,6 @@ static ssize_t sysfs_kf_bin_read(struct sysfs_open_file *of, char *buf,
143 return battr->read(of->file, kobj, battr, buf, pos, count); 101 return battr->read(of->file, kobj, battr, buf, pos, count);
144} 102}
145 103
146static void *kernfs_seq_start(struct seq_file *sf, loff_t *ppos)
147{
148 struct sysfs_open_file *of = sf->private;
149 const struct kernfs_ops *ops;
150
151 /*
152 * @of->mutex nests outside active ref and is just to ensure that
153 * the ops aren't called concurrently for the same open file.
154 */
155 mutex_lock(&of->mutex);
156 if (!sysfs_get_active(of->sd))
157 return ERR_PTR(-ENODEV);
158
159 ops = kernfs_ops(of->sd);
160 if (ops->seq_start) {
161 return ops->seq_start(sf, ppos);
162 } else {
163 /*
164 * The same behavior and code as single_open(). Returns
165 * !NULL if pos is at the beginning; otherwise, NULL.
166 */
167 return NULL + !*ppos;
168 }
169}
170
171static void *kernfs_seq_next(struct seq_file *sf, void *v, loff_t *ppos)
172{
173 struct sysfs_open_file *of = sf->private;
174 const struct kernfs_ops *ops = kernfs_ops(of->sd);
175
176 if (ops->seq_next) {
177 return ops->seq_next(sf, v, ppos);
178 } else {
179 /*
180 * The same behavior and code as single_open(), always
181 * terminate after the initial read.
182 */
183 ++*ppos;
184 return NULL;
185 }
186}
187
188static void kernfs_seq_stop(struct seq_file *sf, void *v)
189{
190 struct sysfs_open_file *of = sf->private;
191 const struct kernfs_ops *ops = kernfs_ops(of->sd);
192
193 if (ops->seq_stop)
194 ops->seq_stop(sf, v);
195
196 sysfs_put_active(of->sd);
197 mutex_unlock(&of->mutex);
198}
199
200static int kernfs_seq_show(struct seq_file *sf, void *v)
201{
202 struct sysfs_open_file *of = sf->private;
203
204 of->event = atomic_read(&of->sd->s_attr.open->event);
205
206 return of->sd->s_attr.ops->seq_show(sf, v);
207}
208
209static const struct seq_operations kernfs_seq_ops = {
210 .start = kernfs_seq_start,
211 .next = kernfs_seq_next,
212 .stop = kernfs_seq_stop,
213 .show = kernfs_seq_show,
214};
215
216/*
217 * As reading a bin file can have side-effects, the exact offset and bytes
218 * specified in read(2) call should be passed to the read callback making
219 * it difficult to use seq_file. Implement simplistic custom buffering for
220 * bin files.
221 */
222static ssize_t kernfs_file_direct_read(struct sysfs_open_file *of,
223 char __user *user_buf, size_t count,
224 loff_t *ppos)
225{
226 ssize_t len = min_t(size_t, count, PAGE_SIZE);
227 const struct kernfs_ops *ops;
228 char *buf;
229
230 buf = kmalloc(len, GFP_KERNEL);
231 if (!buf)
232 return -ENOMEM;
233
234 /*
235 * @of->mutex nests outside active ref and is just to ensure that
236 * the ops aren't called concurrently for the same open file.
237 */
238 mutex_lock(&of->mutex);
239 if (!sysfs_get_active(of->sd)) {
240 len = -ENODEV;
241 mutex_unlock(&of->mutex);
242 goto out_free;
243 }
244
245 ops = kernfs_ops(of->sd);
246 if (ops->read)
247 len = ops->read(of, buf, len, *ppos);
248 else
249 len = -EINVAL;
250
251 sysfs_put_active(of->sd);
252 mutex_unlock(&of->mutex);
253
254 if (len < 0)
255 goto out_free;
256
257 if (copy_to_user(user_buf, buf, len)) {
258 len = -EFAULT;
259 goto out_free;
260 }
261
262 *ppos += len;
263
264 out_free:
265 kfree(buf);
266 return len;
267}
268
269/**
270 * kernfs_file_read - kernfs vfs read callback
271 * @file: file pointer
272 * @user_buf: data to write
273 * @count: number of bytes
274 * @ppos: starting offset
275 */
276static ssize_t kernfs_file_read(struct file *file, char __user *user_buf,
277 size_t count, loff_t *ppos)
278{
279 struct sysfs_open_file *of = sysfs_of(file);
280
281 if (of->sd->s_flags & SYSFS_FLAG_HAS_SEQ_SHOW)
282 return seq_read(file, user_buf, count, ppos);
283 else
284 return kernfs_file_direct_read(of, user_buf, count, ppos);
285}
286
287/* kernfs write callback for regular sysfs files */ 104/* kernfs write callback for regular sysfs files */
288static ssize_t sysfs_kf_write(struct sysfs_open_file *of, char *buf, 105static ssize_t sysfs_kf_write(struct sysfs_open_file *of, char *buf,
289 size_t count, loff_t pos) 106 size_t count, loff_t pos)
@@ -319,67 +136,6 @@ static ssize_t sysfs_kf_bin_write(struct sysfs_open_file *of, char *buf,
319 return battr->write(of->file, kobj, battr, buf, pos, count); 136 return battr->write(of->file, kobj, battr, buf, pos, count);
320} 137}
321 138
322/**
323 * kernfs_file_write - kernfs vfs write callback
324 * @file: file pointer
325 * @user_buf: data to write
326 * @count: number of bytes
327 * @ppos: starting offset
328 *
329 * Copy data in from userland and pass it to the matching kernfs write
330 * operation.
331 *
332 * There is no easy way for us to know if userspace is only doing a partial
333 * write, so we don't support them. We expect the entire buffer to come on
334 * the first write. Hint: if you're writing a value, first read the file,
335 * modify only the the value you're changing, then write entire buffer
336 * back.
337 */
338static ssize_t kernfs_file_write(struct file *file, const char __user *user_buf,
339 size_t count, loff_t *ppos)
340{
341 struct sysfs_open_file *of = sysfs_of(file);
342 ssize_t len = min_t(size_t, count, PAGE_SIZE);
343 const struct kernfs_ops *ops;
344 char *buf;
345
346 buf = kmalloc(len + 1, GFP_KERNEL);
347 if (!buf)
348 return -ENOMEM;
349
350 if (copy_from_user(buf, user_buf, len)) {
351 len = -EFAULT;
352 goto out_free;
353 }
354 buf[len] = '\0'; /* guarantee string termination */
355
356 /*
357 * @of->mutex nests outside active ref and is just to ensure that
358 * the ops aren't called concurrently for the same open file.
359 */
360 mutex_lock(&of->mutex);
361 if (!sysfs_get_active(of->sd)) {
362 mutex_unlock(&of->mutex);
363 len = -ENODEV;
364 goto out_free;
365 }
366
367 ops = kernfs_ops(of->sd);
368 if (ops->write)
369 len = ops->write(of, buf, len, *ppos);
370 else
371 len = -EINVAL;
372
373 sysfs_put_active(of->sd);
374 mutex_unlock(&of->mutex);
375
376 if (len > 0)
377 *ppos += len;
378out_free:
379 kfree(buf);
380 return len;
381}
382
383static int sysfs_kf_bin_mmap(struct sysfs_open_file *of, 139static int sysfs_kf_bin_mmap(struct sysfs_open_file *of,
384 struct vm_area_struct *vma) 140 struct vm_area_struct *vma)
385{ 141{
@@ -392,490 +148,6 @@ static int sysfs_kf_bin_mmap(struct sysfs_open_file *of,
392 return battr->mmap(of->file, kobj, battr, vma); 148 return battr->mmap(of->file, kobj, battr, vma);
393} 149}
394 150
395static void kernfs_vma_open(struct vm_area_struct *vma)
396{
397 struct file *file = vma->vm_file;
398 struct sysfs_open_file *of = sysfs_of(file);
399
400 if (!of->vm_ops)
401 return;
402
403 if (!sysfs_get_active(of->sd))
404 return;
405
406 if (of->vm_ops->open)
407 of->vm_ops->open(vma);
408
409 sysfs_put_active(of->sd);
410}
411
412static int kernfs_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
413{
414 struct file *file = vma->vm_file;
415 struct sysfs_open_file *of = sysfs_of(file);
416 int ret;
417
418 if (!of->vm_ops)
419 return VM_FAULT_SIGBUS;
420
421 if (!sysfs_get_active(of->sd))
422 return VM_FAULT_SIGBUS;
423
424 ret = VM_FAULT_SIGBUS;
425 if (of->vm_ops->fault)
426 ret = of->vm_ops->fault(vma, vmf);
427
428 sysfs_put_active(of->sd);
429 return ret;
430}
431
432static int kernfs_vma_page_mkwrite(struct vm_area_struct *vma,
433 struct vm_fault *vmf)
434{
435 struct file *file = vma->vm_file;
436 struct sysfs_open_file *of = sysfs_of(file);
437 int ret;
438
439 if (!of->vm_ops)
440 return VM_FAULT_SIGBUS;
441
442 if (!sysfs_get_active(of->sd))
443 return VM_FAULT_SIGBUS;
444
445 ret = 0;
446 if (of->vm_ops->page_mkwrite)
447 ret = of->vm_ops->page_mkwrite(vma, vmf);
448 else
449 file_update_time(file);
450
451 sysfs_put_active(of->sd);
452 return ret;
453}
454
455static int kernfs_vma_access(struct vm_area_struct *vma, unsigned long addr,
456 void *buf, int len, int write)
457{
458 struct file *file = vma->vm_file;
459 struct sysfs_open_file *of = sysfs_of(file);
460 int ret;
461
462 if (!of->vm_ops)
463 return -EINVAL;
464
465 if (!sysfs_get_active(of->sd))
466 return -EINVAL;
467
468 ret = -EINVAL;
469 if (of->vm_ops->access)
470 ret = of->vm_ops->access(vma, addr, buf, len, write);
471
472 sysfs_put_active(of->sd);
473 return ret;
474}
475
476#ifdef CONFIG_NUMA
477static int kernfs_vma_set_policy(struct vm_area_struct *vma,
478 struct mempolicy *new)
479{
480 struct file *file = vma->vm_file;
481 struct sysfs_open_file *of = sysfs_of(file);
482 int ret;
483
484 if (!of->vm_ops)
485 return 0;
486
487 if (!sysfs_get_active(of->sd))
488 return -EINVAL;
489
490 ret = 0;
491 if (of->vm_ops->set_policy)
492 ret = of->vm_ops->set_policy(vma, new);
493
494 sysfs_put_active(of->sd);
495 return ret;
496}
497
498static struct mempolicy *kernfs_vma_get_policy(struct vm_area_struct *vma,
499 unsigned long addr)
500{
501 struct file *file = vma->vm_file;
502 struct sysfs_open_file *of = sysfs_of(file);
503 struct mempolicy *pol;
504
505 if (!of->vm_ops)
506 return vma->vm_policy;
507
508 if (!sysfs_get_active(of->sd))
509 return vma->vm_policy;
510
511 pol = vma->vm_policy;
512 if (of->vm_ops->get_policy)
513 pol = of->vm_ops->get_policy(vma, addr);
514
515 sysfs_put_active(of->sd);
516 return pol;
517}
518
519static int kernfs_vma_migrate(struct vm_area_struct *vma,
520 const nodemask_t *from, const nodemask_t *to,
521 unsigned long flags)
522{
523 struct file *file = vma->vm_file;
524 struct sysfs_open_file *of = sysfs_of(file);
525 int ret;
526
527 if (!of->vm_ops)
528 return 0;
529
530 if (!sysfs_get_active(of->sd))
531 return 0;
532
533 ret = 0;
534 if (of->vm_ops->migrate)
535 ret = of->vm_ops->migrate(vma, from, to, flags);
536
537 sysfs_put_active(of->sd);
538 return ret;
539}
540#endif
541
542static const struct vm_operations_struct kernfs_vm_ops = {
543 .open = kernfs_vma_open,
544 .fault = kernfs_vma_fault,
545 .page_mkwrite = kernfs_vma_page_mkwrite,
546 .access = kernfs_vma_access,
547#ifdef CONFIG_NUMA
548 .set_policy = kernfs_vma_set_policy,
549 .get_policy = kernfs_vma_get_policy,
550 .migrate = kernfs_vma_migrate,
551#endif
552};
553
554static int kernfs_file_mmap(struct file *file, struct vm_area_struct *vma)
555{
556 struct sysfs_open_file *of = sysfs_of(file);
557 const struct kernfs_ops *ops;
558 int rc;
559
560 mutex_lock(&of->mutex);
561
562 rc = -ENODEV;
563 if (!sysfs_get_active(of->sd))
564 goto out_unlock;
565
566 ops = kernfs_ops(of->sd);
567 if (ops->mmap)
568 rc = ops->mmap(of, vma);
569 if (rc)
570 goto out_put;
571
572 /*
573 * PowerPC's pci_mmap of legacy_mem uses shmem_zero_setup()
574 * to satisfy versions of X which crash if the mmap fails: that
575 * substitutes a new vm_file, and we don't then want bin_vm_ops.
576 */
577 if (vma->vm_file != file)
578 goto out_put;
579
580 rc = -EINVAL;
581 if (of->mmapped && of->vm_ops != vma->vm_ops)
582 goto out_put;
583
584 /*
585 * It is not possible to successfully wrap close.
586 * So error if someone is trying to use close.
587 */
588 rc = -EINVAL;
589 if (vma->vm_ops && vma->vm_ops->close)
590 goto out_put;
591
592 rc = 0;
593 of->mmapped = 1;
594 of->vm_ops = vma->vm_ops;
595 vma->vm_ops = &kernfs_vm_ops;
596out_put:
597 sysfs_put_active(of->sd);
598out_unlock:
599 mutex_unlock(&of->mutex);
600
601 return rc;
602}
603
604/**
605 * sysfs_get_open_dirent - get or create sysfs_open_dirent
606 * @sd: target sysfs_dirent
607 * @of: sysfs_open_file for this instance of open
608 *
609 * If @sd->s_attr.open exists, increment its reference count;
610 * otherwise, create one. @of is chained to the files list.
611 *
612 * LOCKING:
613 * Kernel thread context (may sleep).
614 *
615 * RETURNS:
616 * 0 on success, -errno on failure.
617 */
618static int sysfs_get_open_dirent(struct sysfs_dirent *sd,
619 struct sysfs_open_file *of)
620{
621 struct sysfs_open_dirent *od, *new_od = NULL;
622
623 retry:
624 mutex_lock(&sysfs_open_file_mutex);
625 spin_lock_irq(&sysfs_open_dirent_lock);
626
627 if (!sd->s_attr.open && new_od) {
628 sd->s_attr.open = new_od;
629 new_od = NULL;
630 }
631
632 od = sd->s_attr.open;
633 if (od) {
634 atomic_inc(&od->refcnt);
635 list_add_tail(&of->list, &od->files);
636 }
637
638 spin_unlock_irq(&sysfs_open_dirent_lock);
639 mutex_unlock(&sysfs_open_file_mutex);
640
641 if (od) {
642 kfree(new_od);
643 return 0;
644 }
645
646 /* not there, initialize a new one and retry */
647 new_od = kmalloc(sizeof(*new_od), GFP_KERNEL);
648 if (!new_od)
649 return -ENOMEM;
650
651 atomic_set(&new_od->refcnt, 0);
652 atomic_set(&new_od->event, 1);
653 init_waitqueue_head(&new_od->poll);
654 INIT_LIST_HEAD(&new_od->files);
655 goto retry;
656}
657
658/**
659 * sysfs_put_open_dirent - put sysfs_open_dirent
660 * @sd: target sysfs_dirent
661 * @of: associated sysfs_open_file
662 *
663 * Put @sd->s_attr.open and unlink @of from the files list. If
664 * reference count reaches zero, disassociate and free it.
665 *
666 * LOCKING:
667 * None.
668 */
669static void sysfs_put_open_dirent(struct sysfs_dirent *sd,
670 struct sysfs_open_file *of)
671{
672 struct sysfs_open_dirent *od = sd->s_attr.open;
673 unsigned long flags;
674
675 mutex_lock(&sysfs_open_file_mutex);
676 spin_lock_irqsave(&sysfs_open_dirent_lock, flags);
677
678 if (of)
679 list_del(&of->list);
680
681 if (atomic_dec_and_test(&od->refcnt))
682 sd->s_attr.open = NULL;
683 else
684 od = NULL;
685
686 spin_unlock_irqrestore(&sysfs_open_dirent_lock, flags);
687 mutex_unlock(&sysfs_open_file_mutex);
688
689 kfree(od);
690}
691
692static int kernfs_file_open(struct inode *inode, struct file *file)
693{
694 struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
695 const struct kernfs_ops *ops;
696 struct sysfs_open_file *of;
697 bool has_read, has_write, has_mmap;
698 int error = -EACCES;
699
700 if (!sysfs_get_active(attr_sd))
701 return -ENODEV;
702
703 ops = kernfs_ops(attr_sd);
704
705 has_read = ops->seq_show || ops->read || ops->mmap;
706 has_write = ops->write || ops->mmap;
707 has_mmap = ops->mmap;
708
709 /* check perms and supported operations */
710 if ((file->f_mode & FMODE_WRITE) &&
711 (!(inode->i_mode & S_IWUGO) || !has_write))
712 goto err_out;
713
714 if ((file->f_mode & FMODE_READ) &&
715 (!(inode->i_mode & S_IRUGO) || !has_read))
716 goto err_out;
717
718 /* allocate a sysfs_open_file for the file */
719 error = -ENOMEM;
720 of = kzalloc(sizeof(struct sysfs_open_file), GFP_KERNEL);
721 if (!of)
722 goto err_out;
723
724 /*
725 * The following is done to give a different lockdep key to
726 * @of->mutex for files which implement mmap. This is a rather
727 * crude way to avoid false positive lockdep warning around
728 * mm->mmap_sem - mmap nests @of->mutex under mm->mmap_sem and
729 * reading /sys/block/sda/trace/act_mask grabs sr_mutex, under
730 * which mm->mmap_sem nests, while holding @of->mutex. As each
731 * open file has a separate mutex, it's okay as long as those don't
732 * happen on the same file. At this point, we can't easily give
733 * each file a separate locking class. Let's differentiate on
734 * whether the file has mmap or not for now.
735 */
736 if (has_mmap)
737 mutex_init(&of->mutex);
738 else
739 mutex_init(&of->mutex);
740
741 of->sd = attr_sd;
742 of->file = file;
743
744 /*
745 * Always instantiate seq_file even if read access doesn't use
746 * seq_file or is not requested. This unifies private data access
747 * and readable regular files are the vast majority anyway.
748 */
749 if (ops->seq_show)
750 error = seq_open(file, &kernfs_seq_ops);
751 else
752 error = seq_open(file, NULL);
753 if (error)
754 goto err_free;
755
756 ((struct seq_file *)file->private_data)->private = of;
757
758 /* seq_file clears PWRITE unconditionally, restore it if WRITE */
759 if (file->f_mode & FMODE_WRITE)
760 file->f_mode |= FMODE_PWRITE;
761
762 /* make sure we have open dirent struct */
763 error = sysfs_get_open_dirent(attr_sd, of);
764 if (error)
765 goto err_close;
766
767 /* open succeeded, put active references */
768 sysfs_put_active(attr_sd);
769 return 0;
770
771err_close:
772 seq_release(inode, file);
773err_free:
774 kfree(of);
775err_out:
776 sysfs_put_active(attr_sd);
777 return error;
778}
779
780static int kernfs_file_release(struct inode *inode, struct file *filp)
781{
782 struct sysfs_dirent *sd = filp->f_path.dentry->d_fsdata;
783 struct sysfs_open_file *of = sysfs_of(filp);
784
785 sysfs_put_open_dirent(sd, of);
786 seq_release(inode, filp);
787 kfree(of);
788
789 return 0;
790}
791
792void sysfs_unmap_bin_file(struct sysfs_dirent *sd)
793{
794 struct sysfs_open_dirent *od;
795 struct sysfs_open_file *of;
796
797 if (!(sd->s_flags & SYSFS_FLAG_HAS_MMAP))
798 return;
799
800 spin_lock_irq(&sysfs_open_dirent_lock);
801 od = sd->s_attr.open;
802 if (od)
803 atomic_inc(&od->refcnt);
804 spin_unlock_irq(&sysfs_open_dirent_lock);
805 if (!od)
806 return;
807
808 mutex_lock(&sysfs_open_file_mutex);
809 list_for_each_entry(of, &od->files, list) {
810 struct inode *inode = file_inode(of->file);
811 unmap_mapping_range(inode->i_mapping, 0, 0, 1);
812 }
813 mutex_unlock(&sysfs_open_file_mutex);
814
815 sysfs_put_open_dirent(sd, NULL);
816}
817
818/* Sysfs attribute files are pollable. The idea is that you read
819 * the content and then you use 'poll' or 'select' to wait for
820 * the content to change. When the content changes (assuming the
821 * manager for the kobject supports notification), poll will
822 * return POLLERR|POLLPRI, and select will return the fd whether
823 * it is waiting for read, write, or exceptions.
824 * Once poll/select indicates that the value has changed, you
825 * need to close and re-open the file, or seek to 0 and read again.
826 * Reminder: this only works for attributes which actively support
827 * it, and it is not possible to test an attribute from userspace
828 * to see if it supports poll (Neither 'poll' nor 'select' return
829 * an appropriate error code). When in doubt, set a suitable timeout value.
830 */
831static unsigned int kernfs_file_poll(struct file *filp, poll_table *wait)
832{
833 struct sysfs_open_file *of = sysfs_of(filp);
834 struct sysfs_dirent *attr_sd = filp->f_path.dentry->d_fsdata;
835 struct sysfs_open_dirent *od = attr_sd->s_attr.open;
836
837 /* need parent for the kobj, grab both */
838 if (!sysfs_get_active(attr_sd))
839 goto trigger;
840
841 poll_wait(filp, &od->poll, wait);
842
843 sysfs_put_active(attr_sd);
844
845 if (of->event != atomic_read(&od->event))
846 goto trigger;
847
848 return DEFAULT_POLLMASK;
849
850 trigger:
851 return DEFAULT_POLLMASK|POLLERR|POLLPRI;
852}
853
854/**
855 * kernfs_notify - notify a kernfs file
856 * @sd: file to notify
857 *
858 * Notify @sd such that poll(2) on @sd wakes up.
859 */
860void kernfs_notify(struct sysfs_dirent *sd)
861{
862 struct sysfs_open_dirent *od;
863 unsigned long flags;
864
865 spin_lock_irqsave(&sysfs_open_dirent_lock, flags);
866
867 if (!WARN_ON(sysfs_type(sd) != SYSFS_KOBJ_ATTR)) {
868 od = sd->s_attr.open;
869 if (od) {
870 atomic_inc(&od->event);
871 wake_up_interruptible(&od->poll);
872 }
873 }
874
875 spin_unlock_irqrestore(&sysfs_open_dirent_lock, flags);
876}
877EXPORT_SYMBOL_GPL(kernfs_notify);
878
879void sysfs_notify(struct kobject *k, const char *dir, const char *attr) 151void sysfs_notify(struct kobject *k, const char *dir, const char *attr)
880{ 152{
881 struct sysfs_dirent *sd = k->sd, *tmp; 153 struct sysfs_dirent *sd = k->sd, *tmp;
@@ -898,16 +170,6 @@ void sysfs_notify(struct kobject *k, const char *dir, const char *attr)
898} 170}
899EXPORT_SYMBOL_GPL(sysfs_notify); 171EXPORT_SYMBOL_GPL(sysfs_notify);
900 172
901const struct file_operations kernfs_file_operations = {
902 .read = kernfs_file_read,
903 .write = kernfs_file_write,
904 .llseek = generic_file_llseek,
905 .mmap = kernfs_file_mmap,
906 .open = kernfs_file_open,
907 .release = kernfs_file_release,
908 .poll = kernfs_file_poll,
909};
910
911static const struct kernfs_ops sysfs_file_kfops_empty = { 173static const struct kernfs_ops sysfs_file_kfops_empty = {
912}; 174};
913 175
@@ -996,68 +258,6 @@ int sysfs_add_file_mode_ns(struct sysfs_dirent *dir_sd,
996 return 0; 258 return 0;
997} 259}
998 260
999/**
1000 * kernfs_create_file_ns_key - create a file
1001 * @parent: directory to create the file in
1002 * @name: name of the file
1003 * @mode: mode of the file
1004 * @size: size of the file
1005 * @ops: kernfs operations for the file
1006 * @priv: private data for the file
1007 * @ns: optional namespace tag of the file
1008 * @key: lockdep key for the file's active_ref, %NULL to disable lockdep
1009 *
1010 * Returns the created node on success, ERR_PTR() value on error.
1011 */
1012struct sysfs_dirent *kernfs_create_file_ns_key(struct sysfs_dirent *parent,
1013 const char *name,
1014 umode_t mode, loff_t size,
1015 const struct kernfs_ops *ops,
1016 void *priv, const void *ns,
1017 struct lock_class_key *key)
1018{
1019 struct sysfs_addrm_cxt acxt;
1020 struct sysfs_dirent *sd;
1021 int rc;
1022
1023 sd = sysfs_new_dirent(name, (mode & S_IALLUGO) | S_IFREG,
1024 SYSFS_KOBJ_ATTR);
1025 if (!sd)
1026 return ERR_PTR(-ENOMEM);
1027
1028 sd->s_attr.ops = ops;
1029 sd->s_attr.size = size;
1030 sd->s_ns = ns;
1031 sd->priv = priv;
1032
1033#ifdef CONFIG_DEBUG_LOCK_ALLOC
1034 if (key) {
1035 lockdep_init_map(&sd->dep_map, "s_active", key, 0);
1036 sd->s_flags |= SYSFS_FLAG_LOCKDEP;
1037 }
1038#endif
1039
1040 /*
1041 * sd->s_attr.ops is accesible only while holding active ref. We
1042 * need to know whether some ops are implemented outside active
1043 * ref. Cache their existence in flags.
1044 */
1045 if (ops->seq_show)
1046 sd->s_flags |= SYSFS_FLAG_HAS_SEQ_SHOW;
1047 if (ops->mmap)
1048 sd->s_flags |= SYSFS_FLAG_HAS_MMAP;
1049
1050 sysfs_addrm_start(&acxt);
1051 rc = sysfs_add_one(&acxt, sd, parent);
1052 sysfs_addrm_finish(&acxt);
1053
1054 if (rc) {
1055 kernfs_put(sd);
1056 return ERR_PTR(rc);
1057 }
1058 return sd;
1059}
1060
1061int sysfs_add_file(struct sysfs_dirent *dir_sd, const struct attribute *attr, 261int sysfs_add_file(struct sysfs_dirent *dir_sd, const struct attribute *attr,
1062 bool is_bin) 262 bool is_bin)
1063{ 263{
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index 972b4a4a5f90..4b8b60d834cc 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -41,15 +41,11 @@ void sysfs_warn_dup(struct sysfs_dirent *parent, const char *name);
41/* 41/*
42 * file.c 42 * file.c
43 */ 43 */
44extern const struct file_operations kernfs_file_operations;
45
46int sysfs_add_file(struct sysfs_dirent *dir_sd, 44int sysfs_add_file(struct sysfs_dirent *dir_sd,
47 const struct attribute *attr, bool is_bin); 45 const struct attribute *attr, bool is_bin);
48
49int sysfs_add_file_mode_ns(struct sysfs_dirent *dir_sd, 46int sysfs_add_file_mode_ns(struct sysfs_dirent *dir_sd,
50 const struct attribute *attr, bool is_bin, 47 const struct attribute *attr, bool is_bin,
51 umode_t amode, const void *ns); 48 umode_t amode, const void *ns);
52void sysfs_unmap_bin_file(struct sysfs_dirent *sd);
53 49
54/* 50/*
55 * symlink.c 51 * symlink.c