/* * proc/fs/generic.c --- generic routines for the proc-fs * * This file contains generic proc-fs routines for handling * directories and files. * * Copyright (C) 1991, 1992 Linus Torvalds. * Copyright (C) 1997 Theodore Ts'o */ #include <linux/errno.h> #include <linux/time.h> #include <linux/proc_fs.h> #include <linux/stat.h> #include <linux/module.h> #include <linux/mount.h> #include <linux/smp_lock.h> #include <linux/init.h> #include <linux/idr.h> #include <linux/namei.h> #include <linux/bitops.h> #include <linux/spinlock.h> #include <linux/completion.h> #include <asm/uaccess.h> #include "internal.h" DEFINE_SPINLOCK(proc_subdir_lock); static int proc_match(int len, const char *name, struct proc_dir_entry *de) { if (de->namelen != len) return 0; return !memcmp(name, de->name, len); } /* buffer size is one page but our output routines use some slack for overruns */ #define PROC_BLOCK_SIZE (PAGE_SIZE - 1024) static ssize_t proc_file_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) { struct inode * inode = file->f_path.dentry->d_inode; char *page; ssize_t retval=0; int eof=0; ssize_t n, count; char *start; struct proc_dir_entry * dp; unsigned long long pos; /* * Gaah, please just use "seq_file" instead. The legacy /proc * interfaces cut loff_t down to off_t for reads, and ignore * the offset entirely for writes.. */ pos = *ppos; if (pos > MAX_NON_LFS) return 0; if (nbytes > MAX_NON_LFS - pos) nbytes = MAX_NON_LFS - pos; dp = PDE(inode); if (!(page = (char*) __get_free_page(GFP_TEMPORARY))) return -ENOMEM; while ((nbytes > 0) && !eof) { count = min_t(size_t, PROC_BLOCK_SIZE, nbytes); start = NULL; if (dp->read_proc) { /* * How to be a proc read function * ------------------------------ * Prototype: * int f(char *buffer, char **start, off_t offset, * int count, int *peof, void *dat) * * Assume that the buffer is "count" bytes in size. * * If you know you have supplied all the data you * have, set *peof. * * You have three ways to return data: * 0) Leave *start = NULL. (This is the default.) * Put the data of the requested offset at that * offset within the buffer. Return the number (n) * of bytes there are from the beginning of the * buffer up to the last byte of data. If the * number of supplied bytes (= n - offset) is * greater than zero and you didn't signal eof * and the reader is prepared to take more data * you will be called again with the requested * offset advanced by the number of bytes * absorbed. This interface is useful for files * no larger than the buffer. * 1) Set *start = an unsigned long value less than * the buffer address but greater than zero. * Put the data of the requested offset at the * beginning of the buffer. Return the number of * bytes of data placed there. If this number is * greater than zero and you didn't signal eof * and the reader is prepared to take more data * you will be called again with the requested * offset advanced by *start. This interface is * useful when you have a large file consisting * of a series of blocks which you want to count * and return as wholes. * (Hack by Paul.Russell@rustcorp.com.au) * 2) Set *start = an address within the buffer. * Put the data of the requested offset at *start. * Return the number of bytes of data placed there. * If this number is greater than zero and you * didn't signal eof and the reader is prepared to * take more data you will be called again with the * requested offset advanced by the number of bytes * absorbed. */ n = dp->read_proc(page, &start, *ppos, count, &eof, dp->data); } else break; if (n == 0) /* end of file */ break; if (n < 0) { /* error */ if (retval == 0) retval = n; break; } if (start == NULL) { if (n > PAGE_SIZE) { printk(KERN_ERR "proc_file_read: Apparent buffer overflow!\n"); n = PAGE_SIZE; } n -= *ppos; if (n <= 0) break; if (n > count) n = count; start = page + *ppos; } else if (start < page) { if (n > PAGE_SIZE) { printk(KERN_ERR "proc_file_read: Apparent buffer overflow!\n"); n = PAGE_SIZE; } if (n > count) { /* * Don't reduce n because doing so might * cut off part of a data block. */ printk(KERN_WARNING "proc_file_read: Read count exceeded\n"); } } else /* start >= page */ { unsigned long startoff = (unsigned long)(start - page); if (n > (PAGE_SIZE - startoff)) { printk(KERN_ERR "proc_file_read: Apparent buffer overflow!\n"); n = PAGE_SIZE - startoff; } if (n > count) n = count; } n -= copy_to_user(buf, start < page ? page : start, n); if (n == 0) { if (retval == 0) retval = -EFAULT; break; } *ppos += start < page ? (unsigned long)start : n; nbytes -= n; buf += n; retval += n; } free_page((unsigned long) page); return retval; } static ssize_t proc_file_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos) { struct inode *inode = file->f_path.dentry->d_inode; struct proc_dir_entry * dp; dp = PDE(inode); if (!dp->write_proc) return -EIO; /* FIXME: does this routine need ppos? probably... */ return dp->write_proc(file, buffer, count, dp->data); } static loff_t proc_file_lseek(struct file *file, loff_t offset, int orig) { loff_t retval = -EINVAL; switch (orig) { case 1: offset += file->f_pos; /* fallthrough */ case 0: if (offset < 0 || offset > MAX_NON_LFS) break; file->f_pos = retval = offset; } return retval; } static const struct file_operations proc_file_operations = { .llseek = proc_file_lseek, .read = proc_file_read, .write = proc_file_write, }; static int proc_notify_change(struct dentry *dentry, struct iattr *iattr) { struct inode *inode = dentry->d_inode; struct proc_dir_entry *de = PDE(inode); int error; error = inode_change_ok(inode, iattr); if (error) goto out; error = inode_setattr(inode, iattr); if (error) goto out; de->uid = inode->i_uid; de->gid = inode->i_gid; de->mode = inode->i_mode; out: return error; } static int proc_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) { struct inode *inode = dentry->d_inode; struct proc_dir_entry *de = PROC_I(inode)->pde; if (de && de->nlink) inode->i_nlink = de->nlink; generic_fillattr(inode, stat); return 0; } static const struct inode_operations proc_file_inode_operations = { .setattr = proc_notify_change, }; /* * This function parses a name such as "tty/driver/serial", and * returns the struct proc_dir_entry for "/proc/tty/driver", and * returns "serial" in residual. */ static int xlate_proc_name(const char *name, struct proc_dir_entry **ret, const char **residual) { const char *cp = name, *next; struct proc_dir_entry *de; int len; int rtn = 0; de = *ret; if (!de) de = &proc_root; spin_lock(&proc_subdir_lock); while (1) { next = strchr(cp, '/'); if (!next) break; len = next - cp; for (de = de->subdir; de ; de = de->next) { if (proc_match(len, cp, de)) break; } if (!de) { rtn = -ENOENT; goto out; } cp += len + 1; } *residual = cp; *ret = de; out: spin_unlock(&proc_subdir_lock); return rtn; } static DEFINE_IDR(proc_inum_idr); static DEFINE_SPINLOCK(proc_inum_lock); /* protects the above */ #define PROC_DYNAMIC_FIRST 0xF0000000UL /* * Return an inode number between PROC_DYNAMIC_FIRST and * 0xffffffff, or zero on failure. */ static unsigned int get_inode_number(void) { int i, inum = 0; int error; retry: if (idr_pre_get(&proc_inum_idr, GFP_KERNEL) == 0) return 0; spin_lock(&proc_inum_lock); error = idr_get_new(&proc_inum_idr, NULL, &i); spin_unlock(&proc_inum_lock); if (error == -EAGAIN) goto retry; else if (error) return 0; inum = (i & MAX_ID_MASK) + PROC_DYNAMIC_FIRST; /* inum will never be more than 0xf0ffffff, so no check * for overflow. */ return inum; } static void release_inode_number(unsigned int inum) { int id = (inum - PROC_DYNAMIC_FIRST) | ~MAX_ID_MASK; spin_lock(&proc_inum_lock); idr_remove(&proc_inum_idr, id); spin_unlock(&proc_inum_lock); } static void *proc_follow_link(struct dentry *dentry, struct nameidata *nd) { nd_set_link(nd, PDE(dentry->d_inode)->data); return NULL; } static const struct inode_operations proc_link_inode_operations = { .readlink = generic_readlink, .follow_link = proc_follow_link, }; /* * As some entries in /proc are volatile, we want to * get rid of unused dentries. This could be made * smarter: we could keep a "volatile" flag in the * inode to indicate which ones to keep. */ static int proc_delete_dentry(struct dentry * dentry) { return 1; } static struct dentry_operations proc_dentry_operations = { .d_delete = proc_delete_dentry, }; /* * Don't create negative dentries here, return -ENOENT by hand * instead. */ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir, struct dentry *dentry) { struct inode *inode = NULL; int error = -ENOENT; lock_kernel(); spin_lock(&proc_subdir_lock); for (de = de->subdir; de ; de = de->next) { if (de->namelen != dentry->d_name.len) continue; if (!memcmp(dentry->d_name.name, de->name, de->namelen)) { unsigned int ino; ino = de->low_ino; de_get(de); spin_unlock(&proc_subdir_lock); error = -EINVAL; inode = proc_get_inode(dir->i_sb, ino, de); goto out_unlock; } } spin_unlock(&proc_subdir_lock); out_unlock: unlock_kernel(); if (inode) { dentry->d_op = &proc_dentry_operations; d_add(dentry, inode); return NULL; } if (de) de_put(de); return ERR_PTR(error); } struct dentry *proc_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { return proc_lookup_de(PDE(dir), dir, dentry); } /* * This returns non-zero if at EOF, so that the /proc * root directory can use this and check if it should * continue with the <pid> entries.. * * Note that the VFS-layer doesn't care about the return * value of the readdir() call, as long as it's non-negative * for success.. */ int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent, filldir_t filldir) { unsigned int ino; int i; struct inode *inode = filp->f_path.dentry->d_inode; int ret = 0; lock_kernel(); ino = inode->i_ino; i = filp->f_pos; switch (i) { case 0: if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0) goto out; i++; filp->f_pos++; /* fall through */ case 1: if (filldir(dirent, "..", 2, i, parent_ino(filp->f_path.dentry), DT_DIR) < 0) goto out; i++; filp->f_pos++; /* fall through */ default: spin_lock(&proc_subdir_lock); de = de->subdir; i -= 2; for (;;) { if (!de) { ret = 1; spin_unlock(&proc_subdir_lock); goto out; } if (!i) break; de = de->next; i--; } do { struct proc_dir_entry *next; /* filldir passes info to user space */ de_get(de); spin_unlock(&proc_subdir_lock); if (filldir(dirent, de->name, de->namelen, filp->f_pos, de->low_ino, de->mode >> 12) < 0) { de_put(de); goto out; } spin_lock(&proc_subdir_lock); filp->f_pos++; next = de->next; de_put(de); de = next; } while (de); spin_unlock(&proc_subdir_lock); } ret = 1; out: unlock_kernel(); return ret; } int proc_readdir(struct file *filp, void *dirent, filldir_t filldir) { struct inode *inode = filp->f_path.dentry->d_inode; return proc_readdir_de(PDE(inode), filp, dirent, filldir); } /* * These are the generic /proc directory operations. They * use the in-memory "struct proc_dir_entry" tree to parse * the /proc directory. */ static const struct file_operations proc_dir_operations = { .read = generic_read_dir, .readdir = proc_readdir, }; /* * proc directories can do almost nothing.. */ static const struct inode_operations proc_dir_inode_operations = { .lookup = proc_lookup, .getattr = proc_getattr, .setattr = proc_notify_change, }; static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp) { unsigned int i; struct proc_dir_entry *tmp; i = get_inode_number(); if (i == 0) return -EAGAIN; dp->low_ino = i; if (S_ISDIR(dp->mode)) { if (dp->proc_iops == NULL) { dp->proc_fops = &proc_dir_operations; dp->proc_iops = &proc_dir_inode_operations; } dir->nlink++; } else if (S_ISLNK(dp->mode)) { if (dp->proc_iops == NULL) dp->proc_iops = &proc_link_inode_operations; } else if (S_ISREG(dp->mode)) { if (dp->proc_fops == NULL) dp->proc_fops = &proc_file_operations; if (dp->proc_iops == NULL) dp->proc_iops = &proc_file_inode_operations; } spin_lock(&proc_subdir_lock); for (tmp = dir->subdir; tmp; tmp = tmp->next) if (strcmp(tmp->name, dp->name) == 0) { printk(KERN_WARNING "proc_dir_entry '%s' already " "registered\n", dp->name); dump_stack(); break; } dp->next = dir->subdir; dp->parent = dir; dir->subdir = dp; spin_unlock(&proc_subdir_lock); return 0; } static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent, const char *name, mode_t mode, nlink_t nlink) { struct proc_dir_entry *ent = NULL; const char *fn = name; int len; /* make sure name is valid */ if (!name || !strlen(name)) goto out; if (xlate_proc_name(name, parent, &fn) != 0) goto out; /* At this point there must not be any '/' characters beyond *fn */ if (strchr(fn, '/')) goto out; len = strlen(fn); ent = kmalloc(sizeof(struct proc_dir_entry) + len + 1, GFP_KERNEL); if (!ent) goto out; memset(ent, 0, sizeof(struct proc_dir_entry)); memcpy(((char *) ent) + sizeof(struct proc_dir_entry), fn, len + 1); ent->name = ((char *) ent) + sizeof(*ent); ent->namelen = len; ent->mode = mode; ent->nlink = nlink; atomic_set(&ent->count, 1); ent->pde_users = 0; spin_lock_init(&ent->pde_unload_lock); ent->pde_unload_completion = NULL; out: return ent; } struct proc_dir_entry *proc_symlink(const char *name, struct proc_dir_entry *parent, const char *dest) { struct proc_dir_entry *ent; ent = __proc_create(&parent, name, (S_IFLNK | S_IRUGO | S_IWUGO | S_IXUGO),1); if (ent) { ent->data = kmalloc((ent->size=strlen(dest))+1, GFP_KERNEL); if (ent->data) { strcpy((char*)ent->data,dest); if (proc_register(parent, ent) < 0) { kfree(ent->data); kfree(ent); ent = NULL; } } else { kfree(ent); ent = NULL; } } return ent; } struct proc_dir_entry *proc_mkdir_mode(const char *name, mode_t mode, struct proc_dir_entry *parent) { struct proc_dir_entry *ent; ent = __proc_create(&parent, name, S_IFDIR | mode, 2); if (ent) { if (proc_register(parent, ent) < 0) { kfree(ent); ent = NULL; } } return ent; } struct proc_dir_entry *proc_mkdir(const char *name, struct proc_dir_entry *parent) { return proc_mkdir_mode(name, S_IRUGO | S_IXUGO, parent); } struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode, struct proc_dir_entry *parent) { struct proc_dir_entry *ent; nlink_t nlink; if (S_ISDIR(mode)) { if ((mode & S_IALLUGO) == 0) mode |= S_IRUGO | S_IXUGO; nlink = 2; } else { if ((mode & S_IFMT) == 0) mode |= S_IFREG; if ((mode & S_IALLUGO) == 0) mode |= S_IRUGO; nlink = 1; } ent = __proc_create(&parent, name, mode, nlink); if (ent) { if (proc_register(parent, ent) < 0) { kfree(ent); ent = NULL; } } return ent; } struct proc_dir_entry *proc_create_data(const char *name, mode_t mode, struct proc_dir_entry *parent, const struct file_operations *proc_fops, void *data) { struct proc_dir_entry *pde; nlink_t nlink; if (S_ISDIR(mode)) { if ((mode & S_IALLUGO) == 0) mode |= S_IRUGO | S_IXUGO; nlink = 2; } else { if ((mode & S_IFMT) == 0) mode |= S_IFREG; if ((mode & S_IALLUGO) == 0) mode |= S_IRUGO; nlink = 1; } pde = __proc_create(&parent, name, mode, nlink); if (!pde) goto out; pde->proc_fops = proc_fops; pde->data = data; if (proc_register(parent, pde) < 0) goto out_free; return pde; out_free: kfree(pde); out: return NULL; } void free_proc_entry(struct proc_dir_entry *de) { unsigned int ino = de->low_ino; if (ino < PROC_DYNAMIC_FIRST) return; release_inode_number(ino); if (S_ISLNK(de->mode)) kfree(de->data); kfree(de); } /* * Remove a /proc entry and free it if it's not currently in use. */ void remove_proc_entry(const char *name, struct proc_dir_entry *parent) { struct proc_dir_entry **p; struct proc_dir_entry *de = NULL; const char *fn = name; int len; if (xlate_proc_name(name, &parent, &fn) != 0) return; len = strlen(fn); spin_lock(&proc_subdir_lock); for (p = &parent->subdir; *p; p=&(*p)->next ) { if (proc_match(len, fn, *p)) { de = *p; *p = de->next; de->next = NULL; break; } } spin_unlock(&proc_subdir_lock); if (!de) return; spin_lock(&de->pde_unload_lock); /* * Stop accepting new callers into module. If you're * dynamically allocating ->proc_fops, save a pointer somewhere. */ de->proc_fops = NULL; /* Wait until all existing callers into module are done. */ if (de->pde_users > 0) { DECLARE_COMPLETION_ONSTACK(c); if (!de->pde_unload_completion) de->pde_unload_completion = &c; spin_unlock(&de->pde_unload_lock); wait_for_completion(de->pde_unload_completion); goto continue_removing; } spin_unlock(&de->pde_unload_lock); continue_removing: if (S_ISDIR(de->mode)) parent->nlink--; de->nlink = 0; if (de->subdir) { printk(KERN_WARNING "%s: removing non-empty directory " "'%s/%s', leaking at least '%s'\n", __func__, de->parent->name, de->name, de->subdir->name); WARN_ON(1); } if (atomic_dec_and_test(&de->count)) free_proc_entry(de); }