aboutsummaryrefslogtreecommitdiffstats
path: root/fs/tracefs/inode.c
diff options
context:
space:
mode:
authorSteven Rostedt (Red Hat) <rostedt@goodmis.org>2015-01-20 11:36:55 -0500
committerSteven Rostedt <rostedt@goodmis.org>2015-02-03 12:48:40 -0500
commit4282d60689d4f21b40692029080440cc58e8a17d (patch)
tree6ae2bcc5e17dbf11fcbdb0f066224e146d273097 /fs/tracefs/inode.c
parent09d23a1d8a82e814bd56a4f121b80ea8214ac49d (diff)
tracefs: Add new tracefs file system
Add a separate file system to handle the tracing directory. Currently it is part of debugfs, but that is starting to show its limits. One thing is that in order to access the tracing infrastructure, you need to mount debugfs. As that includes debugging from all sorts of sub systems in the kernel, it is not considered advisable to mount such an all encompassing debugging system. Having the tracing system in its own file systems gives access to the tracing sub system without needing to include all other systems. Another problem with tracing using the debugfs system is that the instances use mkdir to create sub buffers. debugfs does not support mkdir from userspace so to implement it, special hacks were used. By controlling the file system that the tracing infrastructure uses, this can be properly done without hacks. Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Diffstat (limited to 'fs/tracefs/inode.c')
-rw-r--r--fs/tracefs/inode.c522
1 files changed, 522 insertions, 0 deletions
diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c
new file mode 100644
index 000000000000..5b1547a452d8
--- /dev/null
+++ b/fs/tracefs/inode.c
@@ -0,0 +1,522 @@
1/*
2 * inode.c - part of tracefs, a pseudo file system for activating tracing
3 *
4 * Based on debugfs by: Greg Kroah-Hartman <greg@kroah.com>
5 *
6 * Copyright (C) 2014 Red Hat Inc, author: Steven Rostedt <srostedt@redhat.com>
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License version
10 * 2 as published by the Free Software Foundation.
11 *
12 * tracefs is the file system that is used by the tracing infrastructure.
13 *
14 */
15
16#include <linux/module.h>
17#include <linux/fs.h>
18#include <linux/mount.h>
19#include <linux/namei.h>
20#include <linux/tracefs.h>
21#include <linux/fsnotify.h>
22#include <linux/seq_file.h>
23#include <linux/parser.h>
24#include <linux/magic.h>
25#include <linux/slab.h>
26
27#define TRACEFS_DEFAULT_MODE 0700
28
29static struct vfsmount *tracefs_mount;
30static int tracefs_mount_count;
31static bool tracefs_registered;
32
33static ssize_t default_read_file(struct file *file, char __user *buf,
34 size_t count, loff_t *ppos)
35{
36 return 0;
37}
38
39static ssize_t default_write_file(struct file *file, const char __user *buf,
40 size_t count, loff_t *ppos)
41{
42 return count;
43}
44
45static const struct file_operations tracefs_file_operations = {
46 .read = default_read_file,
47 .write = default_write_file,
48 .open = simple_open,
49 .llseek = noop_llseek,
50};
51
52static struct inode *tracefs_get_inode(struct super_block *sb)
53{
54 struct inode *inode = new_inode(sb);
55 if (inode) {
56 inode->i_ino = get_next_ino();
57 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
58 }
59 return inode;
60}
61
62struct tracefs_mount_opts {
63 kuid_t uid;
64 kgid_t gid;
65 umode_t mode;
66};
67
68enum {
69 Opt_uid,
70 Opt_gid,
71 Opt_mode,
72 Opt_err
73};
74
75static const match_table_t tokens = {
76 {Opt_uid, "uid=%u"},
77 {Opt_gid, "gid=%u"},
78 {Opt_mode, "mode=%o"},
79 {Opt_err, NULL}
80};
81
82struct tracefs_fs_info {
83 struct tracefs_mount_opts mount_opts;
84};
85
86static int tracefs_parse_options(char *data, struct tracefs_mount_opts *opts)
87{
88 substring_t args[MAX_OPT_ARGS];
89 int option;
90 int token;
91 kuid_t uid;
92 kgid_t gid;
93 char *p;
94
95 opts->mode = TRACEFS_DEFAULT_MODE;
96
97 while ((p = strsep(&data, ",")) != NULL) {
98 if (!*p)
99 continue;
100
101 token = match_token(p, tokens, args);
102 switch (token) {
103 case Opt_uid:
104 if (match_int(&args[0], &option))
105 return -EINVAL;
106 uid = make_kuid(current_user_ns(), option);
107 if (!uid_valid(uid))
108 return -EINVAL;
109 opts->uid = uid;
110 break;
111 case Opt_gid:
112 if (match_int(&args[0], &option))
113 return -EINVAL;
114 gid = make_kgid(current_user_ns(), option);
115 if (!gid_valid(gid))
116 return -EINVAL;
117 opts->gid = gid;
118 break;
119 case Opt_mode:
120 if (match_octal(&args[0], &option))
121 return -EINVAL;
122 opts->mode = option & S_IALLUGO;
123 break;
124 /*
125 * We might like to report bad mount options here;
126 * but traditionally tracefs has ignored all mount options
127 */
128 }
129 }
130
131 return 0;
132}
133
134static int tracefs_apply_options(struct super_block *sb)
135{
136 struct tracefs_fs_info *fsi = sb->s_fs_info;
137 struct inode *inode = sb->s_root->d_inode;
138 struct tracefs_mount_opts *opts = &fsi->mount_opts;
139
140 inode->i_mode &= ~S_IALLUGO;
141 inode->i_mode |= opts->mode;
142
143 inode->i_uid = opts->uid;
144 inode->i_gid = opts->gid;
145
146 return 0;
147}
148
149static int tracefs_remount(struct super_block *sb, int *flags, char *data)
150{
151 int err;
152 struct tracefs_fs_info *fsi = sb->s_fs_info;
153
154 sync_filesystem(sb);
155 err = tracefs_parse_options(data, &fsi->mount_opts);
156 if (err)
157 goto fail;
158
159 tracefs_apply_options(sb);
160
161fail:
162 return err;
163}
164
165static int tracefs_show_options(struct seq_file *m, struct dentry *root)
166{
167 struct tracefs_fs_info *fsi = root->d_sb->s_fs_info;
168 struct tracefs_mount_opts *opts = &fsi->mount_opts;
169
170 if (!uid_eq(opts->uid, GLOBAL_ROOT_UID))
171 seq_printf(m, ",uid=%u",
172 from_kuid_munged(&init_user_ns, opts->uid));
173 if (!gid_eq(opts->gid, GLOBAL_ROOT_GID))
174 seq_printf(m, ",gid=%u",
175 from_kgid_munged(&init_user_ns, opts->gid));
176 if (opts->mode != TRACEFS_DEFAULT_MODE)
177 seq_printf(m, ",mode=%o", opts->mode);
178
179 return 0;
180}
181
182static const struct super_operations tracefs_super_operations = {
183 .statfs = simple_statfs,
184 .remount_fs = tracefs_remount,
185 .show_options = tracefs_show_options,
186};
187
188static int trace_fill_super(struct super_block *sb, void *data, int silent)
189{
190 static struct tree_descr trace_files[] = {{""}};
191 struct tracefs_fs_info *fsi;
192 int err;
193
194 save_mount_options(sb, data);
195
196 fsi = kzalloc(sizeof(struct tracefs_fs_info), GFP_KERNEL);
197 sb->s_fs_info = fsi;
198 if (!fsi) {
199 err = -ENOMEM;
200 goto fail;
201 }
202
203 err = tracefs_parse_options(data, &fsi->mount_opts);
204 if (err)
205 goto fail;
206
207 err = simple_fill_super(sb, TRACEFS_MAGIC, trace_files);
208 if (err)
209 goto fail;
210
211 sb->s_op = &tracefs_super_operations;
212
213 tracefs_apply_options(sb);
214
215 return 0;
216
217fail:
218 kfree(fsi);
219 sb->s_fs_info = NULL;
220 return err;
221}
222
223static struct dentry *trace_mount(struct file_system_type *fs_type,
224 int flags, const char *dev_name,
225 void *data)
226{
227 return mount_single(fs_type, flags, data, trace_fill_super);
228}
229
230static struct file_system_type trace_fs_type = {
231 .owner = THIS_MODULE,
232 .name = "tracefs",
233 .mount = trace_mount,
234 .kill_sb = kill_litter_super,
235};
236MODULE_ALIAS_FS("tracefs");
237
238static struct dentry *start_creating(const char *name, struct dentry *parent)
239{
240 struct dentry *dentry;
241 int error;
242
243 pr_debug("tracefs: creating file '%s'\n",name);
244
245 error = simple_pin_fs(&trace_fs_type, &tracefs_mount,
246 &tracefs_mount_count);
247 if (error)
248 return ERR_PTR(error);
249
250 /* If the parent is not specified, we create it in the root.
251 * We need the root dentry to do this, which is in the super
252 * block. A pointer to that is in the struct vfsmount that we
253 * have around.
254 */
255 if (!parent)
256 parent = tracefs_mount->mnt_root;
257
258 mutex_lock(&parent->d_inode->i_mutex);
259 dentry = lookup_one_len(name, parent, strlen(name));
260 if (!IS_ERR(dentry) && dentry->d_inode) {
261 dput(dentry);
262 dentry = ERR_PTR(-EEXIST);
263 }
264 if (IS_ERR(dentry))
265 mutex_unlock(&parent->d_inode->i_mutex);
266 return dentry;
267}
268
269static struct dentry *failed_creating(struct dentry *dentry)
270{
271 mutex_unlock(&dentry->d_parent->d_inode->i_mutex);
272 dput(dentry);
273 simple_release_fs(&tracefs_mount, &tracefs_mount_count);
274 return NULL;
275}
276
277static struct dentry *end_creating(struct dentry *dentry)
278{
279 mutex_unlock(&dentry->d_parent->d_inode->i_mutex);
280 return dentry;
281}
282
283/**
284 * tracefs_create_file - create a file in the tracefs filesystem
285 * @name: a pointer to a string containing the name of the file to create.
286 * @mode: the permission that the file should have.
287 * @parent: a pointer to the parent dentry for this file. This should be a
288 * directory dentry if set. If this parameter is NULL, then the
289 * file will be created in the root of the tracefs filesystem.
290 * @data: a pointer to something that the caller will want to get to later
291 * on. The inode.i_private pointer will point to this value on
292 * the open() call.
293 * @fops: a pointer to a struct file_operations that should be used for
294 * this file.
295 *
296 * This is the basic "create a file" function for tracefs. It allows for a
297 * wide range of flexibility in creating a file, or a directory (if you want
298 * to create a directory, the tracefs_create_dir() function is
299 * recommended to be used instead.)
300 *
301 * This function will return a pointer to a dentry if it succeeds. This
302 * pointer must be passed to the tracefs_remove() function when the file is
303 * to be removed (no automatic cleanup happens if your module is unloaded,
304 * you are responsible here.) If an error occurs, %NULL will be returned.
305 *
306 * If tracefs is not enabled in the kernel, the value -%ENODEV will be
307 * returned.
308 */
309struct dentry *tracefs_create_file(const char *name, umode_t mode,
310 struct dentry *parent, void *data,
311 const struct file_operations *fops)
312{
313 struct dentry *dentry;
314 struct inode *inode;
315
316 if (!(mode & S_IFMT))
317 mode |= S_IFREG;
318 BUG_ON(!S_ISREG(mode));
319 dentry = start_creating(name, parent);
320
321 if (IS_ERR(dentry))
322 return NULL;
323
324 inode = tracefs_get_inode(dentry->d_sb);
325 if (unlikely(!inode))
326 return failed_creating(dentry);
327
328 inode->i_mode = mode;
329 inode->i_fop = fops ? fops : &tracefs_file_operations;
330 inode->i_private = data;
331 d_instantiate(dentry, inode);
332 fsnotify_create(dentry->d_parent->d_inode, dentry);
333 return end_creating(dentry);
334}
335
336/**
337 * tracefs_create_dir - create a directory in the tracefs filesystem
338 * @name: a pointer to a string containing the name of the directory to
339 * create.
340 * @parent: a pointer to the parent dentry for this file. This should be a
341 * directory dentry if set. If this parameter is NULL, then the
342 * directory will be created in the root of the tracefs filesystem.
343 *
344 * This function creates a directory in tracefs with the given name.
345 *
346 * This function will return a pointer to a dentry if it succeeds. This
347 * pointer must be passed to the tracefs_remove() function when the file is
348 * to be removed. If an error occurs, %NULL will be returned.
349 *
350 * If tracing is not enabled in the kernel, the value -%ENODEV will be
351 * returned.
352 */
353struct dentry *tracefs_create_dir(const char *name, struct dentry *parent)
354{
355 struct dentry *dentry = start_creating(name, parent);
356 struct inode *inode;
357
358 if (IS_ERR(dentry))
359 return NULL;
360
361 inode = tracefs_get_inode(dentry->d_sb);
362 if (unlikely(!inode))
363 return failed_creating(dentry);
364
365 inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO;
366 inode->i_op = &simple_dir_inode_operations;
367 inode->i_fop = &simple_dir_operations;
368
369 /* directory inodes start off with i_nlink == 2 (for "." entry) */
370 inc_nlink(inode);
371 d_instantiate(dentry, inode);
372 inc_nlink(dentry->d_parent->d_inode);
373 fsnotify_mkdir(dentry->d_parent->d_inode, dentry);
374 return end_creating(dentry);
375}
376
377static inline int tracefs_positive(struct dentry *dentry)
378{
379 return dentry->d_inode && !d_unhashed(dentry);
380}
381
382static int __tracefs_remove(struct dentry *dentry, struct dentry *parent)
383{
384 int ret = 0;
385
386 if (tracefs_positive(dentry)) {
387 if (dentry->d_inode) {
388 dget(dentry);
389 switch (dentry->d_inode->i_mode & S_IFMT) {
390 case S_IFDIR:
391 ret = simple_rmdir(parent->d_inode, dentry);
392 break;
393 default:
394 simple_unlink(parent->d_inode, dentry);
395 break;
396 }
397 if (!ret)
398 d_delete(dentry);
399 dput(dentry);
400 }
401 }
402 return ret;
403}
404
405/**
406 * tracefs_remove - removes a file or directory from the tracefs filesystem
407 * @dentry: a pointer to a the dentry of the file or directory to be
408 * removed.
409 *
410 * This function removes a file or directory in tracefs that was previously
411 * created with a call to another tracefs function (like
412 * tracefs_create_file() or variants thereof.)
413 */
414void tracefs_remove(struct dentry *dentry)
415{
416 struct dentry *parent;
417 int ret;
418
419 if (IS_ERR_OR_NULL(dentry))
420 return;
421
422 parent = dentry->d_parent;
423 if (!parent || !parent->d_inode)
424 return;
425
426 mutex_lock(&parent->d_inode->i_mutex);
427 ret = __tracefs_remove(dentry, parent);
428 mutex_unlock(&parent->d_inode->i_mutex);
429 if (!ret)
430 simple_release_fs(&tracefs_mount, &tracefs_mount_count);
431}
432
433/**
434 * tracefs_remove_recursive - recursively removes a directory
435 * @dentry: a pointer to a the dentry of the directory to be removed.
436 *
437 * This function recursively removes a directory tree in tracefs that
438 * was previously created with a call to another tracefs function
439 * (like tracefs_create_file() or variants thereof.)
440 */
441void tracefs_remove_recursive(struct dentry *dentry)
442{
443 struct dentry *child, *parent;
444
445 if (IS_ERR_OR_NULL(dentry))
446 return;
447
448 parent = dentry->d_parent;
449 if (!parent || !parent->d_inode)
450 return;
451
452 parent = dentry;
453 down:
454 mutex_lock(&parent->d_inode->i_mutex);
455 loop:
456 /*
457 * The parent->d_subdirs is protected by the d_lock. Outside that
458 * lock, the child can be unlinked and set to be freed which can
459 * use the d_u.d_child as the rcu head and corrupt this list.
460 */
461 spin_lock(&parent->d_lock);
462 list_for_each_entry(child, &parent->d_subdirs, d_child) {
463 if (!tracefs_positive(child))
464 continue;
465
466 /* perhaps simple_empty(child) makes more sense */
467 if (!list_empty(&child->d_subdirs)) {
468 spin_unlock(&parent->d_lock);
469 mutex_unlock(&parent->d_inode->i_mutex);
470 parent = child;
471 goto down;
472 }
473
474 spin_unlock(&parent->d_lock);
475
476 if (!__tracefs_remove(child, parent))
477 simple_release_fs(&tracefs_mount, &tracefs_mount_count);
478
479 /*
480 * The parent->d_lock protects agaist child from unlinking
481 * from d_subdirs. When releasing the parent->d_lock we can
482 * no longer trust that the next pointer is valid.
483 * Restart the loop. We'll skip this one with the
484 * tracefs_positive() check.
485 */
486 goto loop;
487 }
488 spin_unlock(&parent->d_lock);
489
490 mutex_unlock(&parent->d_inode->i_mutex);
491 child = parent;
492 parent = parent->d_parent;
493 mutex_lock(&parent->d_inode->i_mutex);
494
495 if (child != dentry)
496 /* go up */
497 goto loop;
498
499 if (!__tracefs_remove(child, parent))
500 simple_release_fs(&tracefs_mount, &tracefs_mount_count);
501 mutex_unlock(&parent->d_inode->i_mutex);
502}
503
504/**
505 * tracefs_initialized - Tells whether tracefs has been registered
506 */
507bool tracefs_initialized(void)
508{
509 return tracefs_registered;
510}
511
512static int __init tracefs_init(void)
513{
514 int retval;
515
516 retval = register_filesystem(&trace_fs_type);
517 if (!retval)
518 tracefs_registered = true;
519
520 return retval;
521}
522core_initcall(tracefs_init);