From a18a4da0233492c15bb7b62a329061cf7dcce7a2 Mon Sep 17 00:00:00 2001 From: Andrea Bastoni Date: Thu, 17 Dec 2009 21:33:26 -0500 Subject: Add File Descriptor Attached Shared Objects (FDSO) infrastructure --- fs/exec.c | 13 ++- fs/inode.c | 2 + include/linux/fs.h | 21 ++-- include/linux/sched.h | 10 +- include/litmus/fdso.h | 69 ++++++++++++ kernel/exit.c | 4 + litmus/Makefile | 1 + litmus/fdso.c | 283 ++++++++++++++++++++++++++++++++++++++++++++++++++ 8 files changed, 386 insertions(+), 17 deletions(-) create mode 100644 include/litmus/fdso.h create mode 100644 litmus/fdso.c diff --git a/fs/exec.c b/fs/exec.c index ba112bd4a339..606cf96828d5 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -19,7 +19,7 @@ * current->executable is only used by the procfs. This allows a dispatch * table to check for several different types of binary formats. We keep * trying until we recognize the file or we run out of supported binary - * formats. + * formats. */ #include @@ -57,6 +57,8 @@ #include #include +#include + #include #include #include @@ -80,7 +82,7 @@ int __register_binfmt(struct linux_binfmt * fmt, int insert) insert ? list_add(&fmt->lh, &formats) : list_add_tail(&fmt->lh, &formats); write_unlock(&binfmt_lock); - return 0; + return 0; } EXPORT_SYMBOL(__register_binfmt); @@ -1006,7 +1008,7 @@ int flush_old_exec(struct linux_binprm * bprm) group */ current->self_exec_id++; - + flush_signal_handlers(current, 0); flush_old_files(current->files); @@ -1102,8 +1104,8 @@ int check_unsafe_exec(struct linux_binprm *bprm) return res; } -/* - * Fill the binprm structure from the inode. +/* + * Fill the binprm structure from the inode. * Check permissions, then read the first 128 (BINPRM_BUF_SIZE) bytes * * This may be called multiple times for binary chains (scripts for example). @@ -1318,6 +1320,7 @@ int do_execve(char * filename, goto out_unmark; sched_exec(); + litmus_exec(); bprm->file = file; bprm->filename = filename; diff --git a/fs/inode.c b/fs/inode.c index 4d8e3be55976..de80bc2bf713 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -282,6 +282,8 @@ void inode_init_once(struct inode *inode) #ifdef CONFIG_FSNOTIFY INIT_HLIST_HEAD(&inode->i_fsnotify_mark_entries); #endif + INIT_LIST_HEAD(&inode->i_obj_list); + mutex_init(&inode->i_obj_mutex); } EXPORT_SYMBOL(inode_init_once); diff --git a/include/linux/fs.h b/include/linux/fs.h index 2620a8c63571..5c7e0ff370ba 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -15,8 +15,8 @@ * nr_file rlimit, so it's safe to set up a ridiculously high absolute * upper limit on files-per-process. * - * Some programs (notably those using select()) may have to be - * recompiled to take full advantage of the new limits.. + * Some programs (notably those using select()) may have to be + * recompiled to take full advantage of the new limits.. */ /* Fixed constants first: */ @@ -169,7 +169,7 @@ struct inodes_stat_t { #define SEL_EX 4 /* public flags for file_system_type */ -#define FS_REQUIRES_DEV 1 +#define FS_REQUIRES_DEV 1 #define FS_BINARY_MOUNTDATA 2 #define FS_HAS_SUBTYPE 4 #define FS_REVAL_DOT 16384 /* Check the paths ".", ".." for staleness */ @@ -466,7 +466,7 @@ struct iattr { */ #include -/** +/** * enum positive_aop_returns - aop return codes with specific semantics * * @AOP_WRITEPAGE_ACTIVATE: Informs the caller that page writeback has @@ -476,7 +476,7 @@ struct iattr { * be a candidate for writeback again in the near * future. Other callers must be careful to unlock * the page if they get this return. Returned by - * writepage(); + * writepage(); * * @AOP_TRUNCATED_PAGE: The AOP method that was handed a locked page has * unlocked it and the page might have been truncated. @@ -715,6 +715,7 @@ static inline int mapping_writably_mapped(struct address_space *mapping) struct posix_acl; #define ACL_NOT_CACHED ((void *)(-1)) +struct inode_obj_id_table; struct inode { struct hlist_node i_hash; @@ -783,6 +784,8 @@ struct inode { struct posix_acl *i_acl; struct posix_acl *i_default_acl; #endif + struct list_head i_obj_list; + struct mutex i_obj_mutex; void *i_private; /* fs or device private pointer */ }; @@ -995,10 +998,10 @@ static inline int file_check_writeable(struct file *filp) #define MAX_NON_LFS ((1UL<<31) - 1) -/* Page cache limit. The filesystems should put that into their s_maxbytes - limits, otherwise bad things can happen in VM. */ +/* Page cache limit. The filesystems should put that into their s_maxbytes + limits, otherwise bad things can happen in VM. */ #if BITS_PER_LONG==32 -#define MAX_LFS_FILESIZE (((u64)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1) +#define MAX_LFS_FILESIZE (((u64)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1) #elif BITS_PER_LONG==64 #define MAX_LFS_FILESIZE 0x7fffffffffffffffUL #endif @@ -2139,7 +2142,7 @@ extern int may_open(struct path *, int, int); extern int kernel_read(struct file *, loff_t, char *, unsigned long); extern struct file * open_exec(const char *); - + /* fs/dcache.c -- generic fs support functions */ extern int is_subdir(struct dentry *, struct dentry *); extern ino_t find_inode_number(struct dentry *, struct qstr *); diff --git a/include/linux/sched.h b/include/linux/sched.h index bb046c0adf99..724814191fe9 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1214,6 +1214,7 @@ struct sched_rt_entity { }; struct rcu_node; +struct od_table_entry; struct task_struct { volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ @@ -1296,9 +1297,9 @@ struct task_struct { unsigned long stack_canary; #endif - /* + /* * pointers to (original) parent process, youngest child, younger sibling, - * older sibling, respectively. (p->father can be replaced with + * older sibling, respectively. (p->father can be replaced with * p->real_parent->pid) */ struct task_struct *real_parent; /* real parent process */ @@ -1512,6 +1513,9 @@ struct task_struct { /* LITMUS RT parameters and state */ struct rt_param rt_param; + /* references to PI semaphores, etc. */ + struct od_table_entry *od_table; + #ifdef CONFIG_LATENCYTOP int latency_record_count; struct latency_record latency_record[LT_SAVECOUNT]; @@ -2051,7 +2055,7 @@ static inline int dequeue_signal_lock(struct task_struct *tsk, sigset_t *mask, s spin_unlock_irqrestore(&tsk->sighand->siglock, flags); return ret; -} +} extern void block_all_signals(int (*notifier)(void *priv), void *priv, sigset_t *mask); diff --git a/include/litmus/fdso.h b/include/litmus/fdso.h new file mode 100644 index 000000000000..286e10f86de0 --- /dev/null +++ b/include/litmus/fdso.h @@ -0,0 +1,69 @@ +/* fdso.h - file descriptor attached shared objects + * + * (c) 2007 B. Brandenburg, LITMUS^RT project + */ + +#ifndef _LINUX_FDSO_H_ +#define _LINUX_FDSO_H_ + +#include +#include + +#include + +#define MAX_OBJECT_DESCRIPTORS 32 + +typedef enum { + MIN_OBJ_TYPE = 0, + + FMLP_SEM = 0, + SRP_SEM = 1, + + MAX_OBJ_TYPE = 1 +} obj_type_t; + +struct inode_obj_id { + struct list_head list; + atomic_t count; + struct inode* inode; + + obj_type_t type; + void* obj; + unsigned int id; +}; + + +struct od_table_entry { + unsigned int used; + + struct inode_obj_id* obj; + void* extra; +}; + +struct fdso_ops { + void* (*create) (void); + void (*destroy)(void*); + int (*open) (struct od_table_entry*, void* __user); + int (*close) (struct od_table_entry*); +}; + +/* translate a userspace supplied od into the raw table entry + * returns NULL if od is invalid + */ +struct od_table_entry* __od_lookup(int od); + +/* translate a userspace supplied od into the associated object + * returns NULL if od is invalid + */ +static inline void* od_lookup(int od, obj_type_t type) +{ + struct od_table_entry* e = __od_lookup(od); + return e && e->obj->type == type ? e->obj->obj : NULL; +} + +#define lookup_fmlp_sem(od)((struct pi_semaphore*) od_lookup(od, FMLP_SEM)) +#define lookup_srp_sem(od) ((struct srp_semaphore*) od_lookup(od, SRP_SEM)) +#define lookup_ics(od) ((struct ics*) od_lookup(od, ICS_ID)) + + +#endif diff --git a/kernel/exit.c b/kernel/exit.c index f7864ac2ecc1..3da04257aeaf 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -56,6 +56,8 @@ #include #include "cred-internals.h" +extern void exit_od_table(struct task_struct *t); + static void exit_mm(struct task_struct * tsk); static void __unhash_process(struct task_struct *p) @@ -954,6 +956,8 @@ NORET_TYPE void do_exit(long code) if (unlikely(tsk->audit_context)) audit_free(tsk); + exit_od_table(tsk); + tsk->exit_code = code; taskstats_exit(tsk, group_dead); diff --git a/litmus/Makefile b/litmus/Makefile index 59f61cbc7f10..3d18cff62cee 100644 --- a/litmus/Makefile +++ b/litmus/Makefile @@ -7,6 +7,7 @@ obj-y = sched_plugin.o litmus.o \ sync.o \ rt_domain.o \ edf_common.o \ + fdso.o \ heap.o obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o diff --git a/litmus/fdso.c b/litmus/fdso.c new file mode 100644 index 000000000000..323efac17a47 --- /dev/null +++ b/litmus/fdso.c @@ -0,0 +1,283 @@ +/* fdso.c - file descriptor attached shared objects + * + * (c) 2007 B. Brandenburg, LITMUS^RT project + * + * Notes: + * - objects descriptor (OD) tables are not cloned during a fork. + * - objects are created on-demand, and freed after the last reference + * is dropped. + * - for now, object types are hard coded. + * - As long as we have live objects, we keep a reference to the inode. + */ + +#include +#include +#include +#include +#include + +#include + + +static struct fdso_ops dummy_ops = { + .create = NULL +}; + +static const struct fdso_ops* fdso_ops[] = { + &dummy_ops, + &dummy_ops, +}; + +static void* fdso_create(obj_type_t type) +{ + if (fdso_ops[type]->create) + return fdso_ops[type]->create(); + else + return NULL; +} + +static void fdso_destroy(obj_type_t type, void* obj) +{ + fdso_ops[type]->destroy(obj); +} + +static int fdso_open(struct od_table_entry* entry, void* __user config) +{ + if (fdso_ops[entry->obj->type]->open) + return fdso_ops[entry->obj->type]->open(entry, config); + else + return 0; +} + +static int fdso_close(struct od_table_entry* entry) +{ + if (fdso_ops[entry->obj->type]->close) + return fdso_ops[entry->obj->type]->close(entry); + else + return 0; +} + +/* inode must be locked already */ +static struct inode_obj_id* alloc_inode_obj(struct inode* inode, + obj_type_t type, + unsigned int id) +{ + struct inode_obj_id* obj; + void* raw_obj; + + raw_obj = fdso_create(type); + if (!raw_obj) + return NULL; + + obj = kmalloc(sizeof(*obj), GFP_KERNEL); + if (!obj) + return NULL; + INIT_LIST_HEAD(&obj->list); + atomic_set(&obj->count, 1); + obj->type = type; + obj->id = id; + obj->obj = raw_obj; + obj->inode = inode; + + list_add(&obj->list, &inode->i_obj_list); + atomic_inc(&inode->i_count); + + printk(KERN_DEBUG "alloc_inode_obj(%p, %d, %d): object created\n", inode, type, id); + return obj; +} + +/* inode must be locked already */ +static struct inode_obj_id* get_inode_obj(struct inode* inode, + obj_type_t type, + unsigned int id) +{ + struct list_head* pos; + struct inode_obj_id* obj = NULL; + + list_for_each(pos, &inode->i_obj_list) { + obj = list_entry(pos, struct inode_obj_id, list); + if (obj->id == id && obj->type == type) { + atomic_inc(&obj->count); + return obj; + } + } + printk(KERN_DEBUG "get_inode_obj(%p, %d, %d): couldn't find object\n", inode, type, id); + return NULL; +} + + +static void put_inode_obj(struct inode_obj_id* obj) +{ + struct inode* inode; + int let_go = 0; + + inode = obj->inode; + if (atomic_dec_and_test(&obj->count)) { + + mutex_lock(&inode->i_obj_mutex); + /* no new references can be obtained */ + if (!atomic_read(&obj->count)) { + list_del(&obj->list); + fdso_destroy(obj->type, obj->obj); + kfree(obj); + let_go = 1; + } + mutex_unlock(&inode->i_obj_mutex); + if (let_go) + iput(inode); + } +} + +static struct od_table_entry* get_od_entry(struct task_struct* t) +{ + struct od_table_entry* table; + int i; + + + table = t->od_table; + if (!table) { + table = kzalloc(sizeof(*table) * MAX_OBJECT_DESCRIPTORS, + GFP_KERNEL); + t->od_table = table; + } + + for (i = 0; table && i < MAX_OBJECT_DESCRIPTORS; i++) + if (!table[i].used) { + table[i].used = 1; + return table + i; + } + return NULL; +} + +static int put_od_entry(struct od_table_entry* od) +{ + put_inode_obj(od->obj); + od->used = 0; + return 0; +} + +void exit_od_table(struct task_struct* t) +{ + int i; + + if (t->od_table) { + for (i = 0; i < MAX_OBJECT_DESCRIPTORS; i++) + if (t->od_table[i].used) + put_od_entry(t->od_table + i); + kfree(t->od_table); + t->od_table = NULL; + } +} + +static int do_sys_od_open(struct file* file, obj_type_t type, int id, + void* __user config) +{ + int idx = 0, err; + struct inode* inode; + struct inode_obj_id* obj = NULL; + struct od_table_entry* entry; + + inode = file->f_dentry->d_inode; + + entry = get_od_entry(current); + if (!entry) + return -ENOMEM; + + mutex_lock(&inode->i_obj_mutex); + obj = get_inode_obj(inode, type, id); + if (!obj) + obj = alloc_inode_obj(inode, type, id); + if (!obj) { + idx = -ENOMEM; + entry->used = 0; + } else { + entry->obj = obj; + entry->extra = NULL; + idx = entry - current->od_table; + } + + mutex_unlock(&inode->i_obj_mutex); + + err = fdso_open(entry, config); + if (err < 0) { + /* The class rejected the open call. + * We need to clean up and tell user space. + */ + put_od_entry(entry); + idx = err; + } + + return idx; +} + + +struct od_table_entry* __od_lookup(int od) +{ + struct task_struct *t = current; + + if (!t->od_table) + return NULL; + if (od < 0 || od >= MAX_OBJECT_DESCRIPTORS) + return NULL; + if (!t->od_table[od].used) + return NULL; + return t->od_table + od; +} + + +asmlinkage int sys_od_open(int fd, int type, int obj_id, void* __user config) +{ + int ret = 0; + struct file* file; + + /* + 1) get file from fd, get inode from file + 2) lock inode + 3) try to lookup object + 4) if not present create and enqueue object, inc inode refcnt + 5) increment refcnt of object + 6) alloc od_table_entry, setup ptrs + 7) unlock inode + 8) return offset in od_table as OD + */ + + if (type < MIN_OBJ_TYPE || type > MAX_OBJ_TYPE) { + ret = -EINVAL; + goto out; + } + + file = fget(fd); + if (!file) { + ret = -EBADF; + goto out; + } + + ret = do_sys_od_open(file, type, obj_id, config); + + fput(file); + +out: + return ret; +} + + +asmlinkage int sys_od_close(int od) +{ + int ret = -EINVAL; + struct task_struct *t = current; + + if (od < 0 || od >= MAX_OBJECT_DESCRIPTORS) + return ret; + + if (!t->od_table || !t->od_table[od].used) + return ret; + + + /* give the class a chance to reject the close + */ + ret = fdso_close(t->od_table + od); + if (ret == 0) + ret = put_od_entry(t->od_table + od); + + return ret; +} -- cgit v1.2.2