aboutsummaryrefslogtreecommitdiffstats
path: root/fs/relayfs/inode.c
diff options
context:
space:
mode:
authorTom Zanussi <zanussi@us.ibm.com>2005-09-06 18:16:30 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2005-09-07 19:57:18 -0400
commite82894f84dbba130ab46c97748c03647f8204f92 (patch)
treedbf20825db44037f0db5d0696d43457292c546c3 /fs/relayfs/inode.c
parent8446f1d391f3d27e6bf9c43d4cbcdac0ca720417 (diff)
[PATCH] relayfs
Here's the latest version of relayfs, against linux-2.6.11-mm2. I'm hoping you'll consider putting this version back into your tree - the previous rounds of comment seem to have shaken out all the API issues and the number of comments on the code itself have also steadily dwindled. This patch is essentially the same as the relayfs redux part 5 patch, with some minor changes based on reviewer comments. Thanks again to Pekka Enberg for those. The patch size without documentation is now a little smaller at just over 40k. Here's a detailed list of the changes: - removed the attribute_flags in relay open and changed it to a boolean specifying either overwrite or no-overwrite mode, and removed everything referencing the attribute flags. - added a check for NULL names in relayfs_create_entry() - got rid of the unnecessary multiple labels in relay_create_buf() - some minor simplification of relay_alloc_buf() which got rid of a couple params - updated the Documentation In addition, this version (through code contained in the relay-apps tarball linked to below, not as part of the relayfs patch) tries to make it as easy as possible to create the cooperating kernel/user pieces of a typical and common type of logging application, one where kernel logging is kicked off when a user space data collection app starts and stops when the collection app exits, with the data being automatically logged to disk in between. To create this type of application, you basically just include a header file (relay-app.h, included in the relay-apps tarball) in your kernel module, define a couple of callbacks and call an initialization function, and on the user side call a single function that sets up and continuously monitors the buffers, and writes data to files as it becomes available. Channels are created when the collection app is started and destroyed when it exits, not when the kernel module is inserted, so different channel buffer sizes can be specified for each separate run via command-line options. See the README in the relay-apps tarball for details. Also included in the relay-apps tarball are a couple examples demonstrating how you can use this to create quick and dirty kernel logging/debugging applications. They are: - tprintk, short for 'tee printk', which temporarily puts a kprobe on printk() and writes a duplicate stream of printk output to a relayfs channel. This could be used anywhere there's printk() debugging code in the kernel which you'd like to exercise, but would rather not have your system logs cluttered with debugging junk. You'd probably want to kill klogd while you do this, otherwise there wouldn't be much point (since putting a kprobe on printk() doesn't change the output of printk()). I've used this method to temporarily divert the packet logging output of the iptables LOG target from the system logs to relayfs files instead, for instance. - klog, which just provides a printk-like formatted logging function on top of relayfs. Again, you can use this to keep stuff out of your system logs if used in place of printk. The example applications can be found here: http://prdownloads.sourceforge.net/dprobes/relay-apps.tar.gz?download From: Christoph Hellwig <hch@lst.de> avoid lookup_hash usage in relayfs Signed-off-by: Tom Zanussi <zanussi@us.ibm.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'fs/relayfs/inode.c')
-rw-r--r--fs/relayfs/inode.c609
1 files changed, 609 insertions, 0 deletions
diff --git a/fs/relayfs/inode.c b/fs/relayfs/inode.c
new file mode 100644
index 000000000000..0f7f88d067ad
--- /dev/null
+++ b/fs/relayfs/inode.c
@@ -0,0 +1,609 @@
1/*
2 * VFS-related code for RelayFS, a high-speed data relay filesystem.
3 *
4 * Copyright (C) 2003-2005 - Tom Zanussi <zanussi@us.ibm.com>, IBM Corp
5 * Copyright (C) 2003-2005 - Karim Yaghmour <karim@opersys.com>
6 *
7 * Based on ramfs, Copyright (C) 2002 - Linus Torvalds
8 *
9 * This file is released under the GPL.
10 */
11
12#include <linux/module.h>
13#include <linux/fs.h>
14#include <linux/mount.h>
15#include <linux/pagemap.h>
16#include <linux/init.h>
17#include <linux/string.h>
18#include <linux/backing-dev.h>
19#include <linux/namei.h>
20#include <linux/poll.h>
21#include <linux/relayfs_fs.h>
22#include "relay.h"
23#include "buffers.h"
24
25#define RELAYFS_MAGIC 0xF0B4A981
26
27static struct vfsmount * relayfs_mount;
28static int relayfs_mount_count;
29static kmem_cache_t * relayfs_inode_cachep;
30
31static struct backing_dev_info relayfs_backing_dev_info = {
32 .ra_pages = 0, /* No readahead */
33 .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK,
34};
35
36static struct inode *relayfs_get_inode(struct super_block *sb, int mode,
37 struct rchan *chan)
38{
39 struct rchan_buf *buf = NULL;
40 struct inode *inode;
41
42 if (S_ISREG(mode)) {
43 BUG_ON(!chan);
44 buf = relay_create_buf(chan);
45 if (!buf)
46 return NULL;
47 }
48
49 inode = new_inode(sb);
50 if (!inode) {
51 relay_destroy_buf(buf);
52 return NULL;
53 }
54
55 inode->i_mode = mode;
56 inode->i_uid = 0;
57 inode->i_gid = 0;
58 inode->i_blksize = PAGE_CACHE_SIZE;
59 inode->i_blocks = 0;
60 inode->i_mapping->backing_dev_info = &relayfs_backing_dev_info;
61 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
62 switch (mode & S_IFMT) {
63 case S_IFREG:
64 inode->i_fop = &relayfs_file_operations;
65 RELAYFS_I(inode)->buf = buf;
66 break;
67 case S_IFDIR:
68 inode->i_op = &simple_dir_inode_operations;
69 inode->i_fop = &simple_dir_operations;
70
71 /* directory inodes start off with i_nlink == 2 (for "." entry) */
72 inode->i_nlink++;
73 break;
74 default:
75 break;
76 }
77
78 return inode;
79}
80
81/**
82 * relayfs_create_entry - create a relayfs directory or file
83 * @name: the name of the file to create
84 * @parent: parent directory
85 * @mode: mode
86 * @chan: relay channel associated with the file
87 *
88 * Returns the new dentry, NULL on failure
89 *
90 * Creates a file or directory with the specifed permissions.
91 */
92static struct dentry *relayfs_create_entry(const char *name,
93 struct dentry *parent,
94 int mode,
95 struct rchan *chan)
96{
97 struct dentry *d;
98 struct inode *inode;
99 int error = 0;
100
101 BUG_ON(!name || !(S_ISREG(mode) || S_ISDIR(mode)));
102
103 error = simple_pin_fs("relayfs", &relayfs_mount, &relayfs_mount_count);
104 if (error) {
105 printk(KERN_ERR "Couldn't mount relayfs: errcode %d\n", error);
106 return NULL;
107 }
108
109 if (!parent && relayfs_mount && relayfs_mount->mnt_sb)
110 parent = relayfs_mount->mnt_sb->s_root;
111
112 if (!parent) {
113 simple_release_fs(&relayfs_mount, &relayfs_mount_count);
114 return NULL;
115 }
116
117 parent = dget(parent);
118 down(&parent->d_inode->i_sem);
119 d = lookup_one_len(name, parent, strlen(name));
120 if (IS_ERR(d)) {
121 d = NULL;
122 goto release_mount;
123 }
124
125 if (d->d_inode) {
126 d = NULL;
127 goto release_mount;
128 }
129
130 inode = relayfs_get_inode(parent->d_inode->i_sb, mode, chan);
131 if (!inode) {
132 d = NULL;
133 goto release_mount;
134 }
135
136 d_instantiate(d, inode);
137 dget(d); /* Extra count - pin the dentry in core */
138
139 if (S_ISDIR(mode))
140 parent->d_inode->i_nlink++;
141
142 goto exit;
143
144release_mount:
145 simple_release_fs(&relayfs_mount, &relayfs_mount_count);
146
147exit:
148 up(&parent->d_inode->i_sem);
149 dput(parent);
150 return d;
151}
152
153/**
154 * relayfs_create_file - create a file in the relay filesystem
155 * @name: the name of the file to create
156 * @parent: parent directory
157 * @mode: mode, if not specied the default perms are used
158 * @chan: channel associated with the file
159 *
160 * Returns file dentry if successful, NULL otherwise.
161 *
162 * The file will be created user r on behalf of current user.
163 */
164struct dentry *relayfs_create_file(const char *name, struct dentry *parent,
165 int mode, struct rchan *chan)
166{
167 if (!mode)
168 mode = S_IRUSR;
169 mode = (mode & S_IALLUGO) | S_IFREG;
170
171 return relayfs_create_entry(name, parent, mode, chan);
172}
173
174/**
175 * relayfs_create_dir - create a directory in the relay filesystem
176 * @name: the name of the directory to create
177 * @parent: parent directory, NULL if parent should be fs root
178 *
179 * Returns directory dentry if successful, NULL otherwise.
180 *
181 * The directory will be created world rwx on behalf of current user.
182 */
183struct dentry *relayfs_create_dir(const char *name, struct dentry *parent)
184{
185 int mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO;
186 return relayfs_create_entry(name, parent, mode, NULL);
187}
188
189/**
190 * relayfs_remove - remove a file or directory in the relay filesystem
191 * @dentry: file or directory dentry
192 *
193 * Returns 0 if successful, negative otherwise.
194 */
195int relayfs_remove(struct dentry *dentry)
196{
197 struct dentry *parent;
198 int error = 0;
199
200 if (!dentry)
201 return -EINVAL;
202 parent = dentry->d_parent;
203 if (!parent)
204 return -EINVAL;
205
206 parent = dget(parent);
207 down(&parent->d_inode->i_sem);
208 if (dentry->d_inode) {
209 if (S_ISDIR(dentry->d_inode->i_mode))
210 error = simple_rmdir(parent->d_inode, dentry);
211 else
212 error = simple_unlink(parent->d_inode, dentry);
213 if (!error)
214 d_delete(dentry);
215 }
216 if (!error)
217 dput(dentry);
218 up(&parent->d_inode->i_sem);
219 dput(parent);
220
221 if (!error)
222 simple_release_fs(&relayfs_mount, &relayfs_mount_count);
223
224 return error;
225}
226
227/**
228 * relayfs_remove_dir - remove a directory in the relay filesystem
229 * @dentry: directory dentry
230 *
231 * Returns 0 if successful, negative otherwise.
232 */
233int relayfs_remove_dir(struct dentry *dentry)
234{
235 return relayfs_remove(dentry);
236}
237
238/**
239 * relayfs_open - open file op for relayfs files
240 * @inode: the inode
241 * @filp: the file
242 *
243 * Increments the channel buffer refcount.
244 */
245static int relayfs_open(struct inode *inode, struct file *filp)
246{
247 struct rchan_buf *buf = RELAYFS_I(inode)->buf;
248 kref_get(&buf->kref);
249
250 return 0;
251}
252
253/**
254 * relayfs_mmap - mmap file op for relayfs files
255 * @filp: the file
256 * @vma: the vma describing what to map
257 *
258 * Calls upon relay_mmap_buf to map the file into user space.
259 */
260static int relayfs_mmap(struct file *filp, struct vm_area_struct *vma)
261{
262 struct inode *inode = filp->f_dentry->d_inode;
263 return relay_mmap_buf(RELAYFS_I(inode)->buf, vma);
264}
265
266/**
267 * relayfs_poll - poll file op for relayfs files
268 * @filp: the file
269 * @wait: poll table
270 *
271 * Poll implemention.
272 */
273static unsigned int relayfs_poll(struct file *filp, poll_table *wait)
274{
275 unsigned int mask = 0;
276 struct inode *inode = filp->f_dentry->d_inode;
277 struct rchan_buf *buf = RELAYFS_I(inode)->buf;
278
279 if (buf->finalized)
280 return POLLERR;
281
282 if (filp->f_mode & FMODE_READ) {
283 poll_wait(filp, &buf->read_wait, wait);
284 if (!relay_buf_empty(buf))
285 mask |= POLLIN | POLLRDNORM;
286 }
287
288 return mask;
289}
290
291/**
292 * relayfs_release - release file op for relayfs files
293 * @inode: the inode
294 * @filp: the file
295 *
296 * Decrements the channel refcount, as the filesystem is
297 * no longer using it.
298 */
299static int relayfs_release(struct inode *inode, struct file *filp)
300{
301 struct rchan_buf *buf = RELAYFS_I(inode)->buf;
302 kref_put(&buf->kref, relay_remove_buf);
303
304 return 0;
305}
306
307/**
308 * relayfs_read_consume - update the consumed count for the buffer
309 */
310static void relayfs_read_consume(struct rchan_buf *buf,
311 size_t read_pos,
312 size_t bytes_consumed)
313{
314 size_t subbuf_size = buf->chan->subbuf_size;
315 size_t n_subbufs = buf->chan->n_subbufs;
316 size_t read_subbuf;
317
318 if (buf->bytes_consumed + bytes_consumed > subbuf_size) {
319 relay_subbufs_consumed(buf->chan, buf->cpu, 1);
320 buf->bytes_consumed = 0;
321 }
322
323 buf->bytes_consumed += bytes_consumed;
324 read_subbuf = read_pos / buf->chan->subbuf_size;
325 if (buf->bytes_consumed + buf->padding[read_subbuf] == subbuf_size) {
326 if ((read_subbuf == buf->subbufs_produced % n_subbufs) &&
327 (buf->offset == subbuf_size))
328 return;
329 relay_subbufs_consumed(buf->chan, buf->cpu, 1);
330 buf->bytes_consumed = 0;
331 }
332}
333
334/**
335 * relayfs_read_avail - boolean, are there unconsumed bytes available?
336 */
337static int relayfs_read_avail(struct rchan_buf *buf, size_t read_pos)
338{
339 size_t bytes_produced, bytes_consumed, write_offset;
340 size_t subbuf_size = buf->chan->subbuf_size;
341 size_t n_subbufs = buf->chan->n_subbufs;
342 size_t produced = buf->subbufs_produced % n_subbufs;
343 size_t consumed = buf->subbufs_consumed % n_subbufs;
344
345 write_offset = buf->offset > subbuf_size ? subbuf_size : buf->offset;
346
347 if (consumed > produced) {
348 if ((produced > n_subbufs) &&
349 (produced + n_subbufs - consumed <= n_subbufs))
350 produced += n_subbufs;
351 } else if (consumed == produced) {
352 if (buf->offset > subbuf_size) {
353 produced += n_subbufs;
354 if (buf->subbufs_produced == buf->subbufs_consumed)
355 consumed += n_subbufs;
356 }
357 }
358
359 if (buf->offset > subbuf_size)
360 bytes_produced = (produced - 1) * subbuf_size + write_offset;
361 else
362 bytes_produced = produced * subbuf_size + write_offset;
363 bytes_consumed = consumed * subbuf_size + buf->bytes_consumed;
364
365 if (bytes_produced == bytes_consumed)
366 return 0;
367
368 relayfs_read_consume(buf, read_pos, 0);
369
370 return 1;
371}
372
373/**
374 * relayfs_read_subbuf_avail - return bytes available in sub-buffer
375 */
376static size_t relayfs_read_subbuf_avail(size_t read_pos,
377 struct rchan_buf *buf)
378{
379 size_t padding, avail = 0;
380 size_t read_subbuf, read_offset, write_subbuf, write_offset;
381 size_t subbuf_size = buf->chan->subbuf_size;
382
383 write_subbuf = (buf->data - buf->start) / subbuf_size;
384 write_offset = buf->offset > subbuf_size ? subbuf_size : buf->offset;
385 read_subbuf = read_pos / subbuf_size;
386 read_offset = read_pos % subbuf_size;
387 padding = buf->padding[read_subbuf];
388
389 if (read_subbuf == write_subbuf) {
390 if (read_offset + padding < write_offset)
391 avail = write_offset - (read_offset + padding);
392 } else
393 avail = (subbuf_size - padding) - read_offset;
394
395 return avail;
396}
397
398/**
399 * relayfs_read_start_pos - find the first available byte to read
400 *
401 * If the read_pos is in the middle of padding, return the
402 * position of the first actually available byte, otherwise
403 * return the original value.
404 */
405static size_t relayfs_read_start_pos(size_t read_pos,
406 struct rchan_buf *buf)
407{
408 size_t read_subbuf, padding, padding_start, padding_end;
409 size_t subbuf_size = buf->chan->subbuf_size;
410 size_t n_subbufs = buf->chan->n_subbufs;
411
412 read_subbuf = read_pos / subbuf_size;
413 padding = buf->padding[read_subbuf];
414 padding_start = (read_subbuf + 1) * subbuf_size - padding;
415 padding_end = (read_subbuf + 1) * subbuf_size;
416 if (read_pos >= padding_start && read_pos < padding_end) {
417 read_subbuf = (read_subbuf + 1) % n_subbufs;
418 read_pos = read_subbuf * subbuf_size;
419 }
420
421 return read_pos;
422}
423
424/**
425 * relayfs_read_end_pos - return the new read position
426 */
427static size_t relayfs_read_end_pos(struct rchan_buf *buf,
428 size_t read_pos,
429 size_t count)
430{
431 size_t read_subbuf, padding, end_pos;
432 size_t subbuf_size = buf->chan->subbuf_size;
433 size_t n_subbufs = buf->chan->n_subbufs;
434
435 read_subbuf = read_pos / subbuf_size;
436 padding = buf->padding[read_subbuf];
437 if (read_pos % subbuf_size + count + padding == subbuf_size)
438 end_pos = (read_subbuf + 1) * subbuf_size;
439 else
440 end_pos = read_pos + count;
441 if (end_pos >= subbuf_size * n_subbufs)
442 end_pos = 0;
443
444 return end_pos;
445}
446
447/**
448 * relayfs_read - read file op for relayfs files
449 * @filp: the file
450 * @buffer: the userspace buffer
451 * @count: number of bytes to read
452 * @ppos: position to read from
453 *
454 * Reads count bytes or the number of bytes available in the
455 * current sub-buffer being read, whichever is smaller.
456 */
457static ssize_t relayfs_read(struct file *filp,
458 char __user *buffer,
459 size_t count,
460 loff_t *ppos)
461{
462 struct inode *inode = filp->f_dentry->d_inode;
463 struct rchan_buf *buf = RELAYFS_I(inode)->buf;
464 size_t read_start, avail;
465 ssize_t ret = 0;
466 void *from;
467
468 down(&inode->i_sem);
469 if(!relayfs_read_avail(buf, *ppos))
470 goto out;
471
472 read_start = relayfs_read_start_pos(*ppos, buf);
473 avail = relayfs_read_subbuf_avail(read_start, buf);
474 if (!avail)
475 goto out;
476
477 from = buf->start + read_start;
478 ret = count = min(count, avail);
479 if (copy_to_user(buffer, from, count)) {
480 ret = -EFAULT;
481 goto out;
482 }
483 relayfs_read_consume(buf, read_start, count);
484 *ppos = relayfs_read_end_pos(buf, read_start, count);
485out:
486 up(&inode->i_sem);
487 return ret;
488}
489
490/**
491 * relayfs alloc_inode() implementation
492 */
493static struct inode *relayfs_alloc_inode(struct super_block *sb)
494{
495 struct relayfs_inode_info *p = kmem_cache_alloc(relayfs_inode_cachep, SLAB_KERNEL);
496 if (!p)
497 return NULL;
498 p->buf = NULL;
499
500 return &p->vfs_inode;
501}
502
503/**
504 * relayfs destroy_inode() implementation
505 */
506static void relayfs_destroy_inode(struct inode *inode)
507{
508 if (RELAYFS_I(inode)->buf)
509 relay_destroy_buf(RELAYFS_I(inode)->buf);
510
511 kmem_cache_free(relayfs_inode_cachep, RELAYFS_I(inode));
512}
513
514static void init_once(void *p, kmem_cache_t *cachep, unsigned long flags)
515{
516 struct relayfs_inode_info *i = p;
517 if ((flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) == SLAB_CTOR_CONSTRUCTOR)
518 inode_init_once(&i->vfs_inode);
519}
520
521struct file_operations relayfs_file_operations = {
522 .open = relayfs_open,
523 .poll = relayfs_poll,
524 .mmap = relayfs_mmap,
525 .read = relayfs_read,
526 .llseek = no_llseek,
527 .release = relayfs_release,
528};
529
530static struct super_operations relayfs_ops = {
531 .statfs = simple_statfs,
532 .drop_inode = generic_delete_inode,
533 .alloc_inode = relayfs_alloc_inode,
534 .destroy_inode = relayfs_destroy_inode,
535};
536
537static int relayfs_fill_super(struct super_block * sb, void * data, int silent)
538{
539 struct inode *inode;
540 struct dentry *root;
541 int mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO;
542
543 sb->s_blocksize = PAGE_CACHE_SIZE;
544 sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
545 sb->s_magic = RELAYFS_MAGIC;
546 sb->s_op = &relayfs_ops;
547 inode = relayfs_get_inode(sb, mode, NULL);
548
549 if (!inode)
550 return -ENOMEM;
551
552 root = d_alloc_root(inode);
553 if (!root) {
554 iput(inode);
555 return -ENOMEM;
556 }
557 sb->s_root = root;
558
559 return 0;
560}
561
562static struct super_block * relayfs_get_sb(struct file_system_type *fs_type,
563 int flags, const char *dev_name,
564 void *data)
565{
566 return get_sb_single(fs_type, flags, data, relayfs_fill_super);
567}
568
569static struct file_system_type relayfs_fs_type = {
570 .owner = THIS_MODULE,
571 .name = "relayfs",
572 .get_sb = relayfs_get_sb,
573 .kill_sb = kill_litter_super,
574};
575
576static int __init init_relayfs_fs(void)
577{
578 int err;
579
580 relayfs_inode_cachep = kmem_cache_create("relayfs_inode_cache",
581 sizeof(struct relayfs_inode_info), 0,
582 0, init_once, NULL);
583 if (!relayfs_inode_cachep)
584 return -ENOMEM;
585
586 err = register_filesystem(&relayfs_fs_type);
587 if (err)
588 kmem_cache_destroy(relayfs_inode_cachep);
589
590 return err;
591}
592
593static void __exit exit_relayfs_fs(void)
594{
595 unregister_filesystem(&relayfs_fs_type);
596 kmem_cache_destroy(relayfs_inode_cachep);
597}
598
599module_init(init_relayfs_fs)
600module_exit(exit_relayfs_fs)
601
602EXPORT_SYMBOL_GPL(relayfs_file_operations);
603EXPORT_SYMBOL_GPL(relayfs_create_dir);
604EXPORT_SYMBOL_GPL(relayfs_remove_dir);
605
606MODULE_AUTHOR("Tom Zanussi <zanussi@us.ibm.com> and Karim Yaghmour <karim@opersys.com>");
607MODULE_DESCRIPTION("Relay Filesystem");
608MODULE_LICENSE("GPL");
609