aboutsummaryrefslogtreecommitdiffstats
path: root/fs/relayfs
diff options
context:
space:
mode:
authorTom Zanussi <zanussi@us.ibm.com>2005-09-06 18:16:30 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2005-09-07 19:57:18 -0400
commite82894f84dbba130ab46c97748c03647f8204f92 (patch)
treedbf20825db44037f0db5d0696d43457292c546c3 /fs/relayfs
parent8446f1d391f3d27e6bf9c43d4cbcdac0ca720417 (diff)
[PATCH] relayfs
Here's the latest version of relayfs, against linux-2.6.11-mm2. I'm hoping you'll consider putting this version back into your tree - the previous rounds of comment seem to have shaken out all the API issues and the number of comments on the code itself have also steadily dwindled. This patch is essentially the same as the relayfs redux part 5 patch, with some minor changes based on reviewer comments. Thanks again to Pekka Enberg for those. The patch size without documentation is now a little smaller at just over 40k. Here's a detailed list of the changes: - removed the attribute_flags in relay open and changed it to a boolean specifying either overwrite or no-overwrite mode, and removed everything referencing the attribute flags. - added a check for NULL names in relayfs_create_entry() - got rid of the unnecessary multiple labels in relay_create_buf() - some minor simplification of relay_alloc_buf() which got rid of a couple params - updated the Documentation In addition, this version (through code contained in the relay-apps tarball linked to below, not as part of the relayfs patch) tries to make it as easy as possible to create the cooperating kernel/user pieces of a typical and common type of logging application, one where kernel logging is kicked off when a user space data collection app starts and stops when the collection app exits, with the data being automatically logged to disk in between. To create this type of application, you basically just include a header file (relay-app.h, included in the relay-apps tarball) in your kernel module, define a couple of callbacks and call an initialization function, and on the user side call a single function that sets up and continuously monitors the buffers, and writes data to files as it becomes available. Channels are created when the collection app is started and destroyed when it exits, not when the kernel module is inserted, so different channel buffer sizes can be specified for each separate run via command-line options. See the README in the relay-apps tarball for details. Also included in the relay-apps tarball are a couple examples demonstrating how you can use this to create quick and dirty kernel logging/debugging applications. They are: - tprintk, short for 'tee printk', which temporarily puts a kprobe on printk() and writes a duplicate stream of printk output to a relayfs channel. This could be used anywhere there's printk() debugging code in the kernel which you'd like to exercise, but would rather not have your system logs cluttered with debugging junk. You'd probably want to kill klogd while you do this, otherwise there wouldn't be much point (since putting a kprobe on printk() doesn't change the output of printk()). I've used this method to temporarily divert the packet logging output of the iptables LOG target from the system logs to relayfs files instead, for instance. - klog, which just provides a printk-like formatted logging function on top of relayfs. Again, you can use this to keep stuff out of your system logs if used in place of printk. The example applications can be found here: http://prdownloads.sourceforge.net/dprobes/relay-apps.tar.gz?download From: Christoph Hellwig <hch@lst.de> avoid lookup_hash usage in relayfs Signed-off-by: Tom Zanussi <zanussi@us.ibm.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'fs/relayfs')
-rw-r--r--fs/relayfs/Makefile4
-rw-r--r--fs/relayfs/buffers.c189
-rw-r--r--fs/relayfs/buffers.h12
-rw-r--r--fs/relayfs/inode.c609
-rw-r--r--fs/relayfs/relay.c431
-rw-r--r--fs/relayfs/relay.h12
6 files changed, 1257 insertions, 0 deletions
diff --git a/fs/relayfs/Makefile b/fs/relayfs/Makefile
new file mode 100644
index 000000000000..e76e182cdb38
--- /dev/null
+++ b/fs/relayfs/Makefile
@@ -0,0 +1,4 @@
1obj-$(CONFIG_RELAYFS_FS) += relayfs.o
2
3relayfs-y := relay.o inode.o buffers.o
4
diff --git a/fs/relayfs/buffers.c b/fs/relayfs/buffers.c
new file mode 100644
index 000000000000..2aa8e2719999
--- /dev/null
+++ b/fs/relayfs/buffers.c
@@ -0,0 +1,189 @@
1/*
2 * RelayFS buffer management code.
3 *
4 * Copyright (C) 2002-2005 - Tom Zanussi (zanussi@us.ibm.com), IBM Corp
5 * Copyright (C) 1999-2005 - Karim Yaghmour (karim@opersys.com)
6 *
7 * This file is released under the GPL.
8 */
9
10#include <linux/module.h>
11#include <linux/vmalloc.h>
12#include <linux/mm.h>
13#include <linux/relayfs_fs.h>
14#include "relay.h"
15#include "buffers.h"
16
17/*
18 * close() vm_op implementation for relayfs file mapping.
19 */
20static void relay_file_mmap_close(struct vm_area_struct *vma)
21{
22 struct rchan_buf *buf = vma->vm_private_data;
23 buf->chan->cb->buf_unmapped(buf, vma->vm_file);
24}
25
26/*
27 * nopage() vm_op implementation for relayfs file mapping.
28 */
29static struct page *relay_buf_nopage(struct vm_area_struct *vma,
30 unsigned long address,
31 int *type)
32{
33 struct page *page;
34 struct rchan_buf *buf = vma->vm_private_data;
35 unsigned long offset = address - vma->vm_start;
36
37 if (address > vma->vm_end)
38 return NOPAGE_SIGBUS; /* Disallow mremap */
39 if (!buf)
40 return NOPAGE_OOM;
41
42 page = vmalloc_to_page(buf->start + offset);
43 if (!page)
44 return NOPAGE_OOM;
45 get_page(page);
46
47 if (type)
48 *type = VM_FAULT_MINOR;
49
50 return page;
51}
52
53/*
54 * vm_ops for relay file mappings.
55 */
56static struct vm_operations_struct relay_file_mmap_ops = {
57 .nopage = relay_buf_nopage,
58 .close = relay_file_mmap_close,
59};
60
61/**
62 * relay_mmap_buf: - mmap channel buffer to process address space
63 * @buf: relay channel buffer
64 * @vma: vm_area_struct describing memory to be mapped
65 *
66 * Returns 0 if ok, negative on error
67 *
68 * Caller should already have grabbed mmap_sem.
69 */
70int relay_mmap_buf(struct rchan_buf *buf, struct vm_area_struct *vma)
71{
72 unsigned long length = vma->vm_end - vma->vm_start;
73 struct file *filp = vma->vm_file;
74
75 if (!buf)
76 return -EBADF;
77
78 if (length != (unsigned long)buf->chan->alloc_size)
79 return -EINVAL;
80
81 vma->vm_ops = &relay_file_mmap_ops;
82 vma->vm_private_data = buf;
83 buf->chan->cb->buf_mapped(buf, filp);
84
85 return 0;
86}
87
88/**
89 * relay_alloc_buf - allocate a channel buffer
90 * @buf: the buffer struct
91 * @size: total size of the buffer
92 *
93 * Returns a pointer to the resulting buffer, NULL if unsuccessful
94 */
95static void *relay_alloc_buf(struct rchan_buf *buf, unsigned long size)
96{
97 void *mem;
98 unsigned int i, j, n_pages;
99
100 size = PAGE_ALIGN(size);
101 n_pages = size >> PAGE_SHIFT;
102
103 buf->page_array = kcalloc(n_pages, sizeof(struct page *), GFP_KERNEL);
104 if (!buf->page_array)
105 return NULL;
106
107 for (i = 0; i < n_pages; i++) {
108 buf->page_array[i] = alloc_page(GFP_KERNEL);
109 if (unlikely(!buf->page_array[i]))
110 goto depopulate;
111 }
112 mem = vmap(buf->page_array, n_pages, GFP_KERNEL, PAGE_KERNEL);
113 if (!mem)
114 goto depopulate;
115
116 memset(mem, 0, size);
117 buf->page_count = n_pages;
118 return mem;
119
120depopulate:
121 for (j = 0; j < i; j++)
122 __free_page(buf->page_array[j]);
123 kfree(buf->page_array);
124 return NULL;
125}
126
127/**
128 * relay_create_buf - allocate and initialize a channel buffer
129 * @alloc_size: size of the buffer to allocate
130 * @n_subbufs: number of sub-buffers in the channel
131 *
132 * Returns channel buffer if successful, NULL otherwise
133 */
134struct rchan_buf *relay_create_buf(struct rchan *chan)
135{
136 struct rchan_buf *buf = kcalloc(1, sizeof(struct rchan_buf), GFP_KERNEL);
137 if (!buf)
138 return NULL;
139
140 buf->padding = kmalloc(chan->n_subbufs * sizeof(size_t *), GFP_KERNEL);
141 if (!buf->padding)
142 goto free_buf;
143
144 buf->start = relay_alloc_buf(buf, chan->alloc_size);
145 if (!buf->start)
146 goto free_buf;
147
148 buf->chan = chan;
149 kref_get(&buf->chan->kref);
150 return buf;
151
152free_buf:
153 kfree(buf->padding);
154 kfree(buf);
155 return NULL;
156}
157
158/**
159 * relay_destroy_buf - destroy an rchan_buf struct and associated buffer
160 * @buf: the buffer struct
161 */
162void relay_destroy_buf(struct rchan_buf *buf)
163{
164 struct rchan *chan = buf->chan;
165 unsigned int i;
166
167 if (likely(buf->start)) {
168 vunmap(buf->start);
169 for (i = 0; i < buf->page_count; i++)
170 __free_page(buf->page_array[i]);
171 kfree(buf->page_array);
172 }
173 kfree(buf->padding);
174 kfree(buf);
175 kref_put(&chan->kref, relay_destroy_channel);
176}
177
178/**
179 * relay_remove_buf - remove a channel buffer
180 *
181 * Removes the file from the relayfs fileystem, which also frees the
182 * rchan_buf_struct and the channel buffer. Should only be called from
183 * kref_put().
184 */
185void relay_remove_buf(struct kref *kref)
186{
187 struct rchan_buf *buf = container_of(kref, struct rchan_buf, kref);
188 relayfs_remove(buf->dentry);
189}
diff --git a/fs/relayfs/buffers.h b/fs/relayfs/buffers.h
new file mode 100644
index 000000000000..37a12493f641
--- /dev/null
+++ b/fs/relayfs/buffers.h
@@ -0,0 +1,12 @@
1#ifndef _BUFFERS_H
2#define _BUFFERS_H
3
4/* This inspired by rtai/shmem */
5#define FIX_SIZE(x) (((x) - 1) & PAGE_MASK) + PAGE_SIZE
6
7extern int relay_mmap_buf(struct rchan_buf *buf, struct vm_area_struct *vma);
8extern struct rchan_buf *relay_create_buf(struct rchan *chan);
9extern void relay_destroy_buf(struct rchan_buf *buf);
10extern void relay_remove_buf(struct kref *kref);
11
12#endif/* _BUFFERS_H */
diff --git a/fs/relayfs/inode.c b/fs/relayfs/inode.c
new file mode 100644
index 000000000000..0f7f88d067ad
--- /dev/null
+++ b/fs/relayfs/inode.c
@@ -0,0 +1,609 @@
1/*
2 * VFS-related code for RelayFS, a high-speed data relay filesystem.
3 *
4 * Copyright (C) 2003-2005 - Tom Zanussi <zanussi@us.ibm.com>, IBM Corp
5 * Copyright (C) 2003-2005 - Karim Yaghmour <karim@opersys.com>
6 *
7 * Based on ramfs, Copyright (C) 2002 - Linus Torvalds
8 *
9 * This file is released under the GPL.
10 */
11
12#include <linux/module.h>
13#include <linux/fs.h>
14#include <linux/mount.h>
15#include <linux/pagemap.h>
16#include <linux/init.h>
17#include <linux/string.h>
18#include <linux/backing-dev.h>
19#include <linux/namei.h>
20#include <linux/poll.h>
21#include <linux/relayfs_fs.h>
22#include "relay.h"
23#include "buffers.h"
24
25#define RELAYFS_MAGIC 0xF0B4A981
26
27static struct vfsmount * relayfs_mount;
28static int relayfs_mount_count;
29static kmem_cache_t * relayfs_inode_cachep;
30
31static struct backing_dev_info relayfs_backing_dev_info = {
32 .ra_pages = 0, /* No readahead */
33 .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK,
34};
35
36static struct inode *relayfs_get_inode(struct super_block *sb, int mode,
37 struct rchan *chan)
38{
39 struct rchan_buf *buf = NULL;
40 struct inode *inode;
41
42 if (S_ISREG(mode)) {
43 BUG_ON(!chan);
44 buf = relay_create_buf(chan);
45 if (!buf)
46 return NULL;
47 }
48
49 inode = new_inode(sb);
50 if (!inode) {
51 relay_destroy_buf(buf);
52 return NULL;
53 }
54
55 inode->i_mode = mode;
56 inode->i_uid = 0;
57 inode->i_gid = 0;
58 inode->i_blksize = PAGE_CACHE_SIZE;
59 inode->i_blocks = 0;
60 inode->i_mapping->backing_dev_info = &relayfs_backing_dev_info;
61 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
62 switch (mode & S_IFMT) {
63 case S_IFREG:
64 inode->i_fop = &relayfs_file_operations;
65 RELAYFS_I(inode)->buf = buf;
66 break;
67 case S_IFDIR:
68 inode->i_op = &simple_dir_inode_operations;
69 inode->i_fop = &simple_dir_operations;
70
71 /* directory inodes start off with i_nlink == 2 (for "." entry) */
72 inode->i_nlink++;
73 break;
74 default:
75 break;
76 }
77
78 return inode;
79}
80
81/**
82 * relayfs_create_entry - create a relayfs directory or file
83 * @name: the name of the file to create
84 * @parent: parent directory
85 * @mode: mode
86 * @chan: relay channel associated with the file
87 *
88 * Returns the new dentry, NULL on failure
89 *
90 * Creates a file or directory with the specifed permissions.
91 */
92static struct dentry *relayfs_create_entry(const char *name,
93 struct dentry *parent,
94 int mode,
95 struct rchan *chan)
96{
97 struct dentry *d;
98 struct inode *inode;
99 int error = 0;
100
101 BUG_ON(!name || !(S_ISREG(mode) || S_ISDIR(mode)));
102
103 error = simple_pin_fs("relayfs", &relayfs_mount, &relayfs_mount_count);
104 if (error) {
105 printk(KERN_ERR "Couldn't mount relayfs: errcode %d\n", error);
106 return NULL;
107 }
108
109 if (!parent && relayfs_mount && relayfs_mount->mnt_sb)
110 parent = relayfs_mount->mnt_sb->s_root;
111
112 if (!parent) {
113 simple_release_fs(&relayfs_mount, &relayfs_mount_count);
114 return NULL;
115 }
116
117 parent = dget(parent);
118 down(&parent->d_inode->i_sem);
119 d = lookup_one_len(name, parent, strlen(name));
120 if (IS_ERR(d)) {
121 d = NULL;
122 goto release_mount;
123 }
124
125 if (d->d_inode) {
126 d = NULL;
127 goto release_mount;
128 }
129
130 inode = relayfs_get_inode(parent->d_inode->i_sb, mode, chan);
131 if (!inode) {
132 d = NULL;
133 goto release_mount;
134 }
135
136 d_instantiate(d, inode);
137 dget(d); /* Extra count - pin the dentry in core */
138
139 if (S_ISDIR(mode))
140 parent->d_inode->i_nlink++;
141
142 goto exit;
143
144release_mount:
145 simple_release_fs(&relayfs_mount, &relayfs_mount_count);
146
147exit:
148 up(&parent->d_inode->i_sem);
149 dput(parent);
150 return d;
151}
152
153/**
154 * relayfs_create_file - create a file in the relay filesystem
155 * @name: the name of the file to create
156 * @parent: parent directory
157 * @mode: mode, if not specied the default perms are used
158 * @chan: channel associated with the file
159 *
160 * Returns file dentry if successful, NULL otherwise.
161 *
162 * The file will be created user r on behalf of current user.
163 */
164struct dentry *relayfs_create_file(const char *name, struct dentry *parent,
165 int mode, struct rchan *chan)
166{
167 if (!mode)
168 mode = S_IRUSR;
169 mode = (mode & S_IALLUGO) | S_IFREG;
170
171 return relayfs_create_entry(name, parent, mode, chan);
172}
173
174/**
175 * relayfs_create_dir - create a directory in the relay filesystem
176 * @name: the name of the directory to create
177 * @parent: parent directory, NULL if parent should be fs root
178 *
179 * Returns directory dentry if successful, NULL otherwise.
180 *
181 * The directory will be created world rwx on behalf of current user.
182 */
183struct dentry *relayfs_create_dir(const char *name, struct dentry *parent)
184{
185 int mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO;
186 return relayfs_create_entry(name, parent, mode, NULL);
187}
188
189/**
190 * relayfs_remove - remove a file or directory in the relay filesystem
191 * @dentry: file or directory dentry
192 *
193 * Returns 0 if successful, negative otherwise.
194 */
195int relayfs_remove(struct dentry *dentry)
196{
197 struct dentry *parent;
198 int error = 0;
199
200 if (!dentry)
201 return -EINVAL;
202 parent = dentry->d_parent;
203 if (!parent)
204 return -EINVAL;
205
206 parent = dget(parent);
207 down(&parent->d_inode->i_sem);
208 if (dentry->d_inode) {
209 if (S_ISDIR(dentry->d_inode->i_mode))
210 error = simple_rmdir(parent->d_inode, dentry);
211 else
212 error = simple_unlink(parent->d_inode, dentry);
213 if (!error)
214 d_delete(dentry);
215 }
216 if (!error)
217 dput(dentry);
218 up(&parent->d_inode->i_sem);
219 dput(parent);
220
221 if (!error)
222 simple_release_fs(&relayfs_mount, &relayfs_mount_count);
223
224 return error;
225}
226
227/**
228 * relayfs_remove_dir - remove a directory in the relay filesystem
229 * @dentry: directory dentry
230 *
231 * Returns 0 if successful, negative otherwise.
232 */
233int relayfs_remove_dir(struct dentry *dentry)
234{
235 return relayfs_remove(dentry);
236}
237
238/**
239 * relayfs_open - open file op for relayfs files
240 * @inode: the inode
241 * @filp: the file
242 *
243 * Increments the channel buffer refcount.
244 */
245static int relayfs_open(struct inode *inode, struct file *filp)
246{
247 struct rchan_buf *buf = RELAYFS_I(inode)->buf;
248 kref_get(&buf->kref);
249
250 return 0;
251}
252
253/**
254 * relayfs_mmap - mmap file op for relayfs files
255 * @filp: the file
256 * @vma: the vma describing what to map
257 *
258 * Calls upon relay_mmap_buf to map the file into user space.
259 */
260static int relayfs_mmap(struct file *filp, struct vm_area_struct *vma)
261{
262 struct inode *inode = filp->f_dentry->d_inode;
263 return relay_mmap_buf(RELAYFS_I(inode)->buf, vma);
264}
265
266/**
267 * relayfs_poll - poll file op for relayfs files
268 * @filp: the file
269 * @wait: poll table
270 *
271 * Poll implemention.
272 */
273static unsigned int relayfs_poll(struct file *filp, poll_table *wait)
274{
275 unsigned int mask = 0;
276 struct inode *inode = filp->f_dentry->d_inode;
277 struct rchan_buf *buf = RELAYFS_I(inode)->buf;
278
279 if (buf->finalized)
280 return POLLERR;
281
282 if (filp->f_mode & FMODE_READ) {
283 poll_wait(filp, &buf->read_wait, wait);
284 if (!relay_buf_empty(buf))
285 mask |= POLLIN | POLLRDNORM;
286 }
287
288 return mask;
289}
290
291/**
292 * relayfs_release - release file op for relayfs files
293 * @inode: the inode
294 * @filp: the file
295 *
296 * Decrements the channel refcount, as the filesystem is
297 * no longer using it.
298 */
299static int relayfs_release(struct inode *inode, struct file *filp)
300{
301 struct rchan_buf *buf = RELAYFS_I(inode)->buf;
302 kref_put(&buf->kref, relay_remove_buf);
303
304 return 0;
305}
306
307/**
308 * relayfs_read_consume - update the consumed count for the buffer
309 */
310static void relayfs_read_consume(struct rchan_buf *buf,
311 size_t read_pos,
312 size_t bytes_consumed)
313{
314 size_t subbuf_size = buf->chan->subbuf_size;
315 size_t n_subbufs = buf->chan->n_subbufs;
316 size_t read_subbuf;
317
318 if (buf->bytes_consumed + bytes_consumed > subbuf_size) {
319 relay_subbufs_consumed(buf->chan, buf->cpu, 1);
320 buf->bytes_consumed = 0;
321 }
322
323 buf->bytes_consumed += bytes_consumed;
324 read_subbuf = read_pos / buf->chan->subbuf_size;
325 if (buf->bytes_consumed + buf->padding[read_subbuf] == subbuf_size) {
326 if ((read_subbuf == buf->subbufs_produced % n_subbufs) &&
327 (buf->offset == subbuf_size))
328 return;
329 relay_subbufs_consumed(buf->chan, buf->cpu, 1);
330 buf->bytes_consumed = 0;
331 }
332}
333
334/**
335 * relayfs_read_avail - boolean, are there unconsumed bytes available?
336 */
337static int relayfs_read_avail(struct rchan_buf *buf, size_t read_pos)
338{
339 size_t bytes_produced, bytes_consumed, write_offset;
340 size_t subbuf_size = buf->chan->subbuf_size;
341 size_t n_subbufs = buf->chan->n_subbufs;
342 size_t produced = buf->subbufs_produced % n_subbufs;
343 size_t consumed = buf->subbufs_consumed % n_subbufs;
344
345 write_offset = buf->offset > subbuf_size ? subbuf_size : buf->offset;
346
347 if (consumed > produced) {
348 if ((produced > n_subbufs) &&
349 (produced + n_subbufs - consumed <= n_subbufs))
350 produced += n_subbufs;
351 } else if (consumed == produced) {
352 if (buf->offset > subbuf_size) {
353 produced += n_subbufs;
354 if (buf->subbufs_produced == buf->subbufs_consumed)
355 consumed += n_subbufs;
356 }
357 }
358
359 if (buf->offset > subbuf_size)
360 bytes_produced = (produced - 1) * subbuf_size + write_offset;
361 else
362 bytes_produced = produced * subbuf_size + write_offset;
363 bytes_consumed = consumed * subbuf_size + buf->bytes_consumed;
364
365 if (bytes_produced == bytes_consumed)
366 return 0;
367
368 relayfs_read_consume(buf, read_pos, 0);
369
370 return 1;
371}
372
373/**
374 * relayfs_read_subbuf_avail - return bytes available in sub-buffer
375 */
376static size_t relayfs_read_subbuf_avail(size_t read_pos,
377 struct rchan_buf *buf)
378{
379 size_t padding, avail = 0;
380 size_t read_subbuf, read_offset, write_subbuf, write_offset;
381 size_t subbuf_size = buf->chan->subbuf_size;
382
383 write_subbuf = (buf->data - buf->start) / subbuf_size;
384 write_offset = buf->offset > subbuf_size ? subbuf_size : buf->offset;
385 read_subbuf = read_pos / subbuf_size;
386 read_offset = read_pos % subbuf_size;
387 padding = buf->padding[read_subbuf];
388
389 if (read_subbuf == write_subbuf) {
390 if (read_offset + padding < write_offset)
391 avail = write_offset - (read_offset + padding);
392 } else
393 avail = (subbuf_size - padding) - read_offset;
394
395 return avail;
396}
397
398/**
399 * relayfs_read_start_pos - find the first available byte to read
400 *
401 * If the read_pos is in the middle of padding, return the
402 * position of the first actually available byte, otherwise
403 * return the original value.
404 */
405static size_t relayfs_read_start_pos(size_t read_pos,
406 struct rchan_buf *buf)
407{
408 size_t read_subbuf, padding, padding_start, padding_end;
409 size_t subbuf_size = buf->chan->subbuf_size;
410 size_t n_subbufs = buf->chan->n_subbufs;
411
412 read_subbuf = read_pos / subbuf_size;
413 padding = buf->padding[read_subbuf];
414 padding_start = (read_subbuf + 1) * subbuf_size - padding;
415 padding_end = (read_subbuf + 1) * subbuf_size;
416 if (read_pos >= padding_start && read_pos < padding_end) {
417 read_subbuf = (read_subbuf + 1) % n_subbufs;
418 read_pos = read_subbuf * subbuf_size;
419 }
420
421 return read_pos;
422}
423
424/**
425 * relayfs_read_end_pos - return the new read position
426 */
427static size_t relayfs_read_end_pos(struct rchan_buf *buf,
428 size_t read_pos,
429 size_t count)
430{
431 size_t read_subbuf, padding, end_pos;
432 size_t subbuf_size = buf->chan->subbuf_size;
433 size_t n_subbufs = buf->chan->n_subbufs;
434
435 read_subbuf = read_pos / subbuf_size;
436 padding = buf->padding[read_subbuf];
437 if (read_pos % subbuf_size + count + padding == subbuf_size)
438 end_pos = (read_subbuf + 1) * subbuf_size;
439 else
440 end_pos = read_pos + count;
441 if (end_pos >= subbuf_size * n_subbufs)
442 end_pos = 0;
443
444 return end_pos;
445}
446
447/**
448 * relayfs_read - read file op for relayfs files
449 * @filp: the file
450 * @buffer: the userspace buffer
451 * @count: number of bytes to read
452 * @ppos: position to read from
453 *
454 * Reads count bytes or the number of bytes available in the
455 * current sub-buffer being read, whichever is smaller.
456 */
457static ssize_t relayfs_read(struct file *filp,
458 char __user *buffer,
459 size_t count,
460 loff_t *ppos)
461{
462 struct inode *inode = filp->f_dentry->d_inode;
463 struct rchan_buf *buf = RELAYFS_I(inode)->buf;
464 size_t read_start, avail;
465 ssize_t ret = 0;
466 void *from;
467
468 down(&inode->i_sem);
469 if(!relayfs_read_avail(buf, *ppos))
470 goto out;
471
472 read_start = relayfs_read_start_pos(*ppos, buf);
473 avail = relayfs_read_subbuf_avail(read_start, buf);
474 if (!avail)
475 goto out;
476
477 from = buf->start + read_start;
478 ret = count = min(count, avail);
479 if (copy_to_user(buffer, from, count)) {
480 ret = -EFAULT;
481 goto out;
482 }
483 relayfs_read_consume(buf, read_start, count);
484 *ppos = relayfs_read_end_pos(buf, read_start, count);
485out:
486 up(&inode->i_sem);
487 return ret;
488}
489
490/**
491 * relayfs alloc_inode() implementation
492 */
493static struct inode *relayfs_alloc_inode(struct super_block *sb)
494{
495 struct relayfs_inode_info *p = kmem_cache_alloc(relayfs_inode_cachep, SLAB_KERNEL);
496 if (!p)
497 return NULL;
498 p->buf = NULL;
499
500 return &p->vfs_inode;
501}
502
503/**
504 * relayfs destroy_inode() implementation
505 */
506static void relayfs_destroy_inode(struct inode *inode)
507{
508 if (RELAYFS_I(inode)->buf)
509 relay_destroy_buf(RELAYFS_I(inode)->buf);
510
511 kmem_cache_free(relayfs_inode_cachep, RELAYFS_I(inode));
512}
513
514static void init_once(void *p, kmem_cache_t *cachep, unsigned long flags)
515{
516 struct relayfs_inode_info *i = p;
517 if ((flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) == SLAB_CTOR_CONSTRUCTOR)
518 inode_init_once(&i->vfs_inode);
519}
520
521struct file_operations relayfs_file_operations = {
522 .open = relayfs_open,
523 .poll = relayfs_poll,
524 .mmap = relayfs_mmap,
525 .read = relayfs_read,
526 .llseek = no_llseek,
527 .release = relayfs_release,
528};
529
530static struct super_operations relayfs_ops = {
531 .statfs = simple_statfs,
532 .drop_inode = generic_delete_inode,
533 .alloc_inode = relayfs_alloc_inode,
534 .destroy_inode = relayfs_destroy_inode,
535};
536
537static int relayfs_fill_super(struct super_block * sb, void * data, int silent)
538{
539 struct inode *inode;
540 struct dentry *root;
541 int mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO;
542
543 sb->s_blocksize = PAGE_CACHE_SIZE;
544 sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
545 sb->s_magic = RELAYFS_MAGIC;
546 sb->s_op = &relayfs_ops;
547 inode = relayfs_get_inode(sb, mode, NULL);
548
549 if (!inode)
550 return -ENOMEM;
551
552 root = d_alloc_root(inode);
553 if (!root) {
554 iput(inode);
555 return -ENOMEM;
556 }
557 sb->s_root = root;
558
559 return 0;
560}
561
562static struct super_block * relayfs_get_sb(struct file_system_type *fs_type,
563 int flags, const char *dev_name,
564 void *data)
565{
566 return get_sb_single(fs_type, flags, data, relayfs_fill_super);
567}
568
569static struct file_system_type relayfs_fs_type = {
570 .owner = THIS_MODULE,
571 .name = "relayfs",
572 .get_sb = relayfs_get_sb,
573 .kill_sb = kill_litter_super,
574};
575
576static int __init init_relayfs_fs(void)
577{
578 int err;
579
580 relayfs_inode_cachep = kmem_cache_create("relayfs_inode_cache",
581 sizeof(struct relayfs_inode_info), 0,
582 0, init_once, NULL);
583 if (!relayfs_inode_cachep)
584 return -ENOMEM;
585
586 err = register_filesystem(&relayfs_fs_type);
587 if (err)
588 kmem_cache_destroy(relayfs_inode_cachep);
589
590 return err;
591}
592
593static void __exit exit_relayfs_fs(void)
594{
595 unregister_filesystem(&relayfs_fs_type);
596 kmem_cache_destroy(relayfs_inode_cachep);
597}
598
599module_init(init_relayfs_fs)
600module_exit(exit_relayfs_fs)
601
602EXPORT_SYMBOL_GPL(relayfs_file_operations);
603EXPORT_SYMBOL_GPL(relayfs_create_dir);
604EXPORT_SYMBOL_GPL(relayfs_remove_dir);
605
606MODULE_AUTHOR("Tom Zanussi <zanussi@us.ibm.com> and Karim Yaghmour <karim@opersys.com>");
607MODULE_DESCRIPTION("Relay Filesystem");
608MODULE_LICENSE("GPL");
609
diff --git a/fs/relayfs/relay.c b/fs/relayfs/relay.c
new file mode 100644
index 000000000000..16446a15c96d
--- /dev/null
+++ b/fs/relayfs/relay.c
@@ -0,0 +1,431 @@
1/*
2 * Public API and common code for RelayFS.
3 *
4 * See Documentation/filesystems/relayfs.txt for an overview of relayfs.
5 *
6 * Copyright (C) 2002-2005 - Tom Zanussi (zanussi@us.ibm.com), IBM Corp
7 * Copyright (C) 1999-2005 - Karim Yaghmour (karim@opersys.com)
8 *
9 * This file is released under the GPL.
10 */
11
12#include <linux/errno.h>
13#include <linux/stddef.h>
14#include <linux/slab.h>
15#include <linux/module.h>
16#include <linux/string.h>
17#include <linux/relayfs_fs.h>
18#include "relay.h"
19#include "buffers.h"
20
21/**
22 * relay_buf_empty - boolean, is the channel buffer empty?
23 * @buf: channel buffer
24 *
25 * Returns 1 if the buffer is empty, 0 otherwise.
26 */
27int relay_buf_empty(struct rchan_buf *buf)
28{
29 return (buf->subbufs_produced - buf->subbufs_consumed) ? 0 : 1;
30}
31
32/**
33 * relay_buf_full - boolean, is the channel buffer full?
34 * @buf: channel buffer
35 *
36 * Returns 1 if the buffer is full, 0 otherwise.
37 */
38int relay_buf_full(struct rchan_buf *buf)
39{
40 size_t ready = buf->subbufs_produced - buf->subbufs_consumed;
41 return (ready >= buf->chan->n_subbufs) ? 1 : 0;
42}
43
44/*
45 * High-level relayfs kernel API and associated functions.
46 */
47
48/*
49 * rchan_callback implementations defining default channel behavior. Used
50 * in place of corresponding NULL values in client callback struct.
51 */
52
53/*
54 * subbuf_start() default callback. Does nothing.
55 */
56static int subbuf_start_default_callback (struct rchan_buf *buf,
57 void *subbuf,
58 void *prev_subbuf,
59 size_t prev_padding)
60{
61 if (relay_buf_full(buf))
62 return 0;
63
64 return 1;
65}
66
67/*
68 * buf_mapped() default callback. Does nothing.
69 */
70static void buf_mapped_default_callback(struct rchan_buf *buf,
71 struct file *filp)
72{
73}
74
75/*
76 * buf_unmapped() default callback. Does nothing.
77 */
78static void buf_unmapped_default_callback(struct rchan_buf *buf,
79 struct file *filp)
80{
81}
82
83/* relay channel default callbacks */
84static struct rchan_callbacks default_channel_callbacks = {
85 .subbuf_start = subbuf_start_default_callback,
86 .buf_mapped = buf_mapped_default_callback,
87 .buf_unmapped = buf_unmapped_default_callback,
88};
89
90/**
91 * wakeup_readers - wake up readers waiting on a channel
92 * @private: the channel buffer
93 *
94 * This is the work function used to defer reader waking. The
95 * reason waking is deferred is that calling directly from write
96 * causes problems if you're writing from say the scheduler.
97 */
98static void wakeup_readers(void *private)
99{
100 struct rchan_buf *buf = private;
101 wake_up_interruptible(&buf->read_wait);
102}
103
104/**
105 * __relay_reset - reset a channel buffer
106 * @buf: the channel buffer
107 * @init: 1 if this is a first-time initialization
108 *
109 * See relay_reset for description of effect.
110 */
111static inline void __relay_reset(struct rchan_buf *buf, unsigned int init)
112{
113 size_t i;
114
115 if (init) {
116 init_waitqueue_head(&buf->read_wait);
117 kref_init(&buf->kref);
118 INIT_WORK(&buf->wake_readers, NULL, NULL);
119 } else {
120 cancel_delayed_work(&buf->wake_readers);
121 flush_scheduled_work();
122 }
123
124 buf->subbufs_produced = 0;
125 buf->subbufs_consumed = 0;
126 buf->bytes_consumed = 0;
127 buf->finalized = 0;
128 buf->data = buf->start;
129 buf->offset = 0;
130
131 for (i = 0; i < buf->chan->n_subbufs; i++)
132 buf->padding[i] = 0;
133
134 buf->chan->cb->subbuf_start(buf, buf->data, NULL, 0);
135}
136
137/**
138 * relay_reset - reset the channel
139 * @chan: the channel
140 *
141 * This has the effect of erasing all data from all channel buffers
142 * and restarting the channel in its initial state. The buffers
143 * are not freed, so any mappings are still in effect.
144 *
145 * NOTE: Care should be taken that the channel isn't actually
146 * being used by anything when this call is made.
147 */
148void relay_reset(struct rchan *chan)
149{
150 unsigned int i;
151
152 if (!chan)
153 return;
154
155 for (i = 0; i < NR_CPUS; i++) {
156 if (!chan->buf[i])
157 continue;
158 __relay_reset(chan->buf[i], 0);
159 }
160}
161
162/**
163 * relay_open_buf - create a new channel buffer in relayfs
164 *
165 * Internal - used by relay_open().
166 */
167static struct rchan_buf *relay_open_buf(struct rchan *chan,
168 const char *filename,
169 struct dentry *parent)
170{
171 struct rchan_buf *buf;
172 struct dentry *dentry;
173
174 /* Create file in fs */
175 dentry = relayfs_create_file(filename, parent, S_IRUSR, chan);
176 if (!dentry)
177 return NULL;
178
179 buf = RELAYFS_I(dentry->d_inode)->buf;
180 buf->dentry = dentry;
181 __relay_reset(buf, 1);
182
183 return buf;
184}
185
186/**
187 * relay_close_buf - close a channel buffer
188 * @buf: channel buffer
189 *
190 * Marks the buffer finalized and restores the default callbacks.
191 * The channel buffer and channel buffer data structure are then freed
192 * automatically when the last reference is given up.
193 */
194static inline void relay_close_buf(struct rchan_buf *buf)
195{
196 buf->finalized = 1;
197 buf->chan->cb = &default_channel_callbacks;
198 cancel_delayed_work(&buf->wake_readers);
199 flush_scheduled_work();
200 kref_put(&buf->kref, relay_remove_buf);
201}
202
203static inline void setup_callbacks(struct rchan *chan,
204 struct rchan_callbacks *cb)
205{
206 if (!cb) {
207 chan->cb = &default_channel_callbacks;
208 return;
209 }
210
211 if (!cb->subbuf_start)
212 cb->subbuf_start = subbuf_start_default_callback;
213 if (!cb->buf_mapped)
214 cb->buf_mapped = buf_mapped_default_callback;
215 if (!cb->buf_unmapped)
216 cb->buf_unmapped = buf_unmapped_default_callback;
217 chan->cb = cb;
218}
219
220/**
221 * relay_open - create a new relayfs channel
222 * @base_filename: base name of files to create
223 * @parent: dentry of parent directory, NULL for root directory
224 * @subbuf_size: size of sub-buffers
225 * @n_subbufs: number of sub-buffers
226 * @cb: client callback functions
227 *
228 * Returns channel pointer if successful, NULL otherwise.
229 *
230 * Creates a channel buffer for each cpu using the sizes and
231 * attributes specified. The created channel buffer files
232 * will be named base_filename0...base_filenameN-1. File
233 * permissions will be S_IRUSR.
234 */
235struct rchan *relay_open(const char *base_filename,
236 struct dentry *parent,
237 size_t subbuf_size,
238 size_t n_subbufs,
239 struct rchan_callbacks *cb)
240{
241 unsigned int i;
242 struct rchan *chan;
243 char *tmpname;
244
245 if (!base_filename)
246 return NULL;
247
248 if (!(subbuf_size && n_subbufs))
249 return NULL;
250
251 chan = kcalloc(1, sizeof(struct rchan), GFP_KERNEL);
252 if (!chan)
253 return NULL;
254
255 chan->version = RELAYFS_CHANNEL_VERSION;
256 chan->n_subbufs = n_subbufs;
257 chan->subbuf_size = subbuf_size;
258 chan->alloc_size = FIX_SIZE(subbuf_size * n_subbufs);
259 setup_callbacks(chan, cb);
260 kref_init(&chan->kref);
261
262 tmpname = kmalloc(NAME_MAX + 1, GFP_KERNEL);
263 if (!tmpname)
264 goto free_chan;
265
266 for_each_online_cpu(i) {
267 sprintf(tmpname, "%s%d", base_filename, i);
268 chan->buf[i] = relay_open_buf(chan, tmpname, parent);
269 chan->buf[i]->cpu = i;
270 if (!chan->buf[i])
271 goto free_bufs;
272 }
273
274 kfree(tmpname);
275 return chan;
276
277free_bufs:
278 for (i = 0; i < NR_CPUS; i++) {
279 if (!chan->buf[i])
280 break;
281 relay_close_buf(chan->buf[i]);
282 }
283 kfree(tmpname);
284
285free_chan:
286 kref_put(&chan->kref, relay_destroy_channel);
287 return NULL;
288}
289
290/**
291 * relay_switch_subbuf - switch to a new sub-buffer
292 * @buf: channel buffer
293 * @length: size of current event
294 *
295 * Returns either the length passed in or 0 if full.
296
297 * Performs sub-buffer-switch tasks such as invoking callbacks,
298 * updating padding counts, waking up readers, etc.
299 */
300size_t relay_switch_subbuf(struct rchan_buf *buf, size_t length)
301{
302 void *old, *new;
303 size_t old_subbuf, new_subbuf;
304
305 if (unlikely(length > buf->chan->subbuf_size))
306 goto toobig;
307
308 if (buf->offset != buf->chan->subbuf_size + 1) {
309 buf->prev_padding = buf->chan->subbuf_size - buf->offset;
310 old_subbuf = buf->subbufs_produced % buf->chan->n_subbufs;
311 buf->padding[old_subbuf] = buf->prev_padding;
312 buf->subbufs_produced++;
313 if (waitqueue_active(&buf->read_wait)) {
314 PREPARE_WORK(&buf->wake_readers, wakeup_readers, buf);
315 schedule_delayed_work(&buf->wake_readers, 1);
316 }
317 }
318
319 old = buf->data;
320 new_subbuf = buf->subbufs_produced % buf->chan->n_subbufs;
321 new = buf->start + new_subbuf * buf->chan->subbuf_size;
322 buf->offset = 0;
323 if (!buf->chan->cb->subbuf_start(buf, new, old, buf->prev_padding)) {
324 buf->offset = buf->chan->subbuf_size + 1;
325 return 0;
326 }
327 buf->data = new;
328 buf->padding[new_subbuf] = 0;
329
330 if (unlikely(length + buf->offset > buf->chan->subbuf_size))
331 goto toobig;
332
333 return length;
334
335toobig:
336 printk(KERN_WARNING "relayfs: event too large (%Zd)\n", length);
337 WARN_ON(1);
338 return 0;
339}
340
341/**
342 * relay_subbufs_consumed - update the buffer's sub-buffers-consumed count
343 * @chan: the channel
344 * @cpu: the cpu associated with the channel buffer to update
345 * @subbufs_consumed: number of sub-buffers to add to current buf's count
346 *
347 * Adds to the channel buffer's consumed sub-buffer count.
348 * subbufs_consumed should be the number of sub-buffers newly consumed,
349 * not the total consumed.
350 *
351 * NOTE: kernel clients don't need to call this function if the channel
352 * mode is 'overwrite'.
353 */
354void relay_subbufs_consumed(struct rchan *chan,
355 unsigned int cpu,
356 size_t subbufs_consumed)
357{
358 struct rchan_buf *buf;
359
360 if (!chan)
361 return;
362
363 if (cpu >= NR_CPUS || !chan->buf[cpu])
364 return;
365
366 buf = chan->buf[cpu];
367 buf->subbufs_consumed += subbufs_consumed;
368 if (buf->subbufs_consumed > buf->subbufs_produced)
369 buf->subbufs_consumed = buf->subbufs_produced;
370}
371
372/**
373 * relay_destroy_channel - free the channel struct
374 *
375 * Should only be called from kref_put().
376 */
377void relay_destroy_channel(struct kref *kref)
378{
379 struct rchan *chan = container_of(kref, struct rchan, kref);
380 kfree(chan);
381}
382
383/**
384 * relay_close - close the channel
385 * @chan: the channel
386 *
387 * Closes all channel buffers and frees the channel.
388 */
389void relay_close(struct rchan *chan)
390{
391 unsigned int i;
392
393 if (!chan)
394 return;
395
396 for (i = 0; i < NR_CPUS; i++) {
397 if (!chan->buf[i])
398 continue;
399 relay_close_buf(chan->buf[i]);
400 }
401
402 kref_put(&chan->kref, relay_destroy_channel);
403}
404
405/**
406 * relay_flush - close the channel
407 * @chan: the channel
408 *
409 * Flushes all channel buffers i.e. forces buffer switch.
410 */
411void relay_flush(struct rchan *chan)
412{
413 unsigned int i;
414
415 if (!chan)
416 return;
417
418 for (i = 0; i < NR_CPUS; i++) {
419 if (!chan->buf[i])
420 continue;
421 relay_switch_subbuf(chan->buf[i], 0);
422 }
423}
424
425EXPORT_SYMBOL_GPL(relay_open);
426EXPORT_SYMBOL_GPL(relay_close);
427EXPORT_SYMBOL_GPL(relay_flush);
428EXPORT_SYMBOL_GPL(relay_reset);
429EXPORT_SYMBOL_GPL(relay_subbufs_consumed);
430EXPORT_SYMBOL_GPL(relay_switch_subbuf);
431EXPORT_SYMBOL_GPL(relay_buf_full);
diff --git a/fs/relayfs/relay.h b/fs/relayfs/relay.h
new file mode 100644
index 000000000000..703503fa22b6
--- /dev/null
+++ b/fs/relayfs/relay.h
@@ -0,0 +1,12 @@
1#ifndef _RELAY_H
2#define _RELAY_H
3
4struct dentry *relayfs_create_file(const char *name,
5 struct dentry *parent,
6 int mode,
7 struct rchan *chan);
8extern int relayfs_remove(struct dentry *dentry);
9extern int relay_buf_empty(struct rchan_buf *buf);
10extern void relay_destroy_channel(struct kref *kref);
11
12#endif /* _RELAY_H */