aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndrea Bastoni <bastoni@cs.unc.edu>2009-12-17 21:33:26 -0500
committerAndrea Bastoni <bastoni@cs.unc.edu>2010-05-29 17:16:27 -0400
commitfa3c94fc9cd1619fe0dd6081a1a980c09ef3e119 (patch)
tree2e389b77431e55e8b81f4f256b93f23137dd4e2f
parentf5936ecf0cff0b94419b6768efba3e15622beeb6 (diff)
[ported from 2008.3] Add File Descriptor Attached Shared Objects (FDSO) infrastructure
-rw-r--r--fs/exec.c13
-rw-r--r--fs/inode.c2
-rw-r--r--include/linux/fs.h21
-rw-r--r--include/linux/sched.h10
-rw-r--r--include/litmus/fdso.h69
-rw-r--r--kernel/exit.c4
-rw-r--r--litmus/Makefile1
-rw-r--r--litmus/fdso.c283
8 files changed, 386 insertions, 17 deletions
diff --git a/fs/exec.c b/fs/exec.c
index ba112bd4a339..606cf96828d5 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -19,7 +19,7 @@
19 * current->executable is only used by the procfs. This allows a dispatch 19 * current->executable is only used by the procfs. This allows a dispatch
20 * table to check for several different types of binary formats. We keep 20 * table to check for several different types of binary formats. We keep
21 * trying until we recognize the file or we run out of supported binary 21 * trying until we recognize the file or we run out of supported binary
22 * formats. 22 * formats.
23 */ 23 */
24 24
25#include <linux/slab.h> 25#include <linux/slab.h>
@@ -57,6 +57,8 @@
57#include <linux/fs_struct.h> 57#include <linux/fs_struct.h>
58#include <linux/pipe_fs_i.h> 58#include <linux/pipe_fs_i.h>
59 59
60#include <litmus/litmus.h>
61
60#include <asm/uaccess.h> 62#include <asm/uaccess.h>
61#include <asm/mmu_context.h> 63#include <asm/mmu_context.h>
62#include <asm/tlb.h> 64#include <asm/tlb.h>
@@ -80,7 +82,7 @@ int __register_binfmt(struct linux_binfmt * fmt, int insert)
80 insert ? list_add(&fmt->lh, &formats) : 82 insert ? list_add(&fmt->lh, &formats) :
81 list_add_tail(&fmt->lh, &formats); 83 list_add_tail(&fmt->lh, &formats);
82 write_unlock(&binfmt_lock); 84 write_unlock(&binfmt_lock);
83 return 0; 85 return 0;
84} 86}
85 87
86EXPORT_SYMBOL(__register_binfmt); 88EXPORT_SYMBOL(__register_binfmt);
@@ -1006,7 +1008,7 @@ int flush_old_exec(struct linux_binprm * bprm)
1006 group */ 1008 group */
1007 1009
1008 current->self_exec_id++; 1010 current->self_exec_id++;
1009 1011
1010 flush_signal_handlers(current, 0); 1012 flush_signal_handlers(current, 0);
1011 flush_old_files(current->files); 1013 flush_old_files(current->files);
1012 1014
@@ -1102,8 +1104,8 @@ int check_unsafe_exec(struct linux_binprm *bprm)
1102 return res; 1104 return res;
1103} 1105}
1104 1106
1105/* 1107/*
1106 * Fill the binprm structure from the inode. 1108 * Fill the binprm structure from the inode.
1107 * Check permissions, then read the first 128 (BINPRM_BUF_SIZE) bytes 1109 * Check permissions, then read the first 128 (BINPRM_BUF_SIZE) bytes
1108 * 1110 *
1109 * This may be called multiple times for binary chains (scripts for example). 1111 * This may be called multiple times for binary chains (scripts for example).
@@ -1318,6 +1320,7 @@ int do_execve(char * filename,
1318 goto out_unmark; 1320 goto out_unmark;
1319 1321
1320 sched_exec(); 1322 sched_exec();
1323 litmus_exec();
1321 1324
1322 bprm->file = file; 1325 bprm->file = file;
1323 bprm->filename = filename; 1326 bprm->filename = filename;
diff --git a/fs/inode.c b/fs/inode.c
index 4d8e3be55976..de80bc2bf713 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -282,6 +282,8 @@ void inode_init_once(struct inode *inode)
282#ifdef CONFIG_FSNOTIFY 282#ifdef CONFIG_FSNOTIFY
283 INIT_HLIST_HEAD(&inode->i_fsnotify_mark_entries); 283 INIT_HLIST_HEAD(&inode->i_fsnotify_mark_entries);
284#endif 284#endif
285 INIT_LIST_HEAD(&inode->i_obj_list);
286 mutex_init(&inode->i_obj_mutex);
285} 287}
286EXPORT_SYMBOL(inode_init_once); 288EXPORT_SYMBOL(inode_init_once);
287 289
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 2620a8c63571..5c7e0ff370ba 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -15,8 +15,8 @@
15 * nr_file rlimit, so it's safe to set up a ridiculously high absolute 15 * nr_file rlimit, so it's safe to set up a ridiculously high absolute
16 * upper limit on files-per-process. 16 * upper limit on files-per-process.
17 * 17 *
18 * Some programs (notably those using select()) may have to be 18 * Some programs (notably those using select()) may have to be
19 * recompiled to take full advantage of the new limits.. 19 * recompiled to take full advantage of the new limits..
20 */ 20 */
21 21
22/* Fixed constants first: */ 22/* Fixed constants first: */
@@ -169,7 +169,7 @@ struct inodes_stat_t {
169#define SEL_EX 4 169#define SEL_EX 4
170 170
171/* public flags for file_system_type */ 171/* public flags for file_system_type */
172#define FS_REQUIRES_DEV 1 172#define FS_REQUIRES_DEV 1
173#define FS_BINARY_MOUNTDATA 2 173#define FS_BINARY_MOUNTDATA 2
174#define FS_HAS_SUBTYPE 4 174#define FS_HAS_SUBTYPE 4
175#define FS_REVAL_DOT 16384 /* Check the paths ".", ".." for staleness */ 175#define FS_REVAL_DOT 16384 /* Check the paths ".", ".." for staleness */
@@ -466,7 +466,7 @@ struct iattr {
466 */ 466 */
467#include <linux/quota.h> 467#include <linux/quota.h>
468 468
469/** 469/**
470 * enum positive_aop_returns - aop return codes with specific semantics 470 * enum positive_aop_returns - aop return codes with specific semantics
471 * 471 *
472 * @AOP_WRITEPAGE_ACTIVATE: Informs the caller that page writeback has 472 * @AOP_WRITEPAGE_ACTIVATE: Informs the caller that page writeback has
@@ -476,7 +476,7 @@ struct iattr {
476 * be a candidate for writeback again in the near 476 * be a candidate for writeback again in the near
477 * future. Other callers must be careful to unlock 477 * future. Other callers must be careful to unlock
478 * the page if they get this return. Returned by 478 * the page if they get this return. Returned by
479 * writepage(); 479 * writepage();
480 * 480 *
481 * @AOP_TRUNCATED_PAGE: The AOP method that was handed a locked page has 481 * @AOP_TRUNCATED_PAGE: The AOP method that was handed a locked page has
482 * unlocked it and the page might have been truncated. 482 * unlocked it and the page might have been truncated.
@@ -715,6 +715,7 @@ static inline int mapping_writably_mapped(struct address_space *mapping)
715 715
716struct posix_acl; 716struct posix_acl;
717#define ACL_NOT_CACHED ((void *)(-1)) 717#define ACL_NOT_CACHED ((void *)(-1))
718struct inode_obj_id_table;
718 719
719struct inode { 720struct inode {
720 struct hlist_node i_hash; 721 struct hlist_node i_hash;
@@ -783,6 +784,8 @@ struct inode {
783 struct posix_acl *i_acl; 784 struct posix_acl *i_acl;
784 struct posix_acl *i_default_acl; 785 struct posix_acl *i_default_acl;
785#endif 786#endif
787 struct list_head i_obj_list;
788 struct mutex i_obj_mutex;
786 void *i_private; /* fs or device private pointer */ 789 void *i_private; /* fs or device private pointer */
787}; 790};
788 791
@@ -995,10 +998,10 @@ static inline int file_check_writeable(struct file *filp)
995 998
996#define MAX_NON_LFS ((1UL<<31) - 1) 999#define MAX_NON_LFS ((1UL<<31) - 1)
997 1000
998/* Page cache limit. The filesystems should put that into their s_maxbytes 1001/* Page cache limit. The filesystems should put that into their s_maxbytes
999 limits, otherwise bad things can happen in VM. */ 1002 limits, otherwise bad things can happen in VM. */
1000#if BITS_PER_LONG==32 1003#if BITS_PER_LONG==32
1001#define MAX_LFS_FILESIZE (((u64)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1) 1004#define MAX_LFS_FILESIZE (((u64)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1)
1002#elif BITS_PER_LONG==64 1005#elif BITS_PER_LONG==64
1003#define MAX_LFS_FILESIZE 0x7fffffffffffffffUL 1006#define MAX_LFS_FILESIZE 0x7fffffffffffffffUL
1004#endif 1007#endif
@@ -2139,7 +2142,7 @@ extern int may_open(struct path *, int, int);
2139 2142
2140extern int kernel_read(struct file *, loff_t, char *, unsigned long); 2143extern int kernel_read(struct file *, loff_t, char *, unsigned long);
2141extern struct file * open_exec(const char *); 2144extern struct file * open_exec(const char *);
2142 2145
2143/* fs/dcache.c -- generic fs support functions */ 2146/* fs/dcache.c -- generic fs support functions */
2144extern int is_subdir(struct dentry *, struct dentry *); 2147extern int is_subdir(struct dentry *, struct dentry *);
2145extern ino_t find_inode_number(struct dentry *, struct qstr *); 2148extern ino_t find_inode_number(struct dentry *, struct qstr *);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index bb046c0adf99..724814191fe9 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1214,6 +1214,7 @@ struct sched_rt_entity {
1214}; 1214};
1215 1215
1216struct rcu_node; 1216struct rcu_node;
1217struct od_table_entry;
1217 1218
1218struct task_struct { 1219struct task_struct {
1219 volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ 1220 volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */
@@ -1296,9 +1297,9 @@ struct task_struct {
1296 unsigned long stack_canary; 1297 unsigned long stack_canary;
1297#endif 1298#endif
1298 1299
1299 /* 1300 /*
1300 * pointers to (original) parent process, youngest child, younger sibling, 1301 * pointers to (original) parent process, youngest child, younger sibling,
1301 * older sibling, respectively. (p->father can be replaced with 1302 * older sibling, respectively. (p->father can be replaced with
1302 * p->real_parent->pid) 1303 * p->real_parent->pid)
1303 */ 1304 */
1304 struct task_struct *real_parent; /* real parent process */ 1305 struct task_struct *real_parent; /* real parent process */
@@ -1512,6 +1513,9 @@ struct task_struct {
1512 /* LITMUS RT parameters and state */ 1513 /* LITMUS RT parameters and state */
1513 struct rt_param rt_param; 1514 struct rt_param rt_param;
1514 1515
1516 /* references to PI semaphores, etc. */
1517 struct od_table_entry *od_table;
1518
1515#ifdef CONFIG_LATENCYTOP 1519#ifdef CONFIG_LATENCYTOP
1516 int latency_record_count; 1520 int latency_record_count;
1517 struct latency_record latency_record[LT_SAVECOUNT]; 1521 struct latency_record latency_record[LT_SAVECOUNT];
@@ -2051,7 +2055,7 @@ static inline int dequeue_signal_lock(struct task_struct *tsk, sigset_t *mask, s
2051 spin_unlock_irqrestore(&tsk->sighand->siglock, flags); 2055 spin_unlock_irqrestore(&tsk->sighand->siglock, flags);
2052 2056
2053 return ret; 2057 return ret;
2054} 2058}
2055 2059
2056extern void block_all_signals(int (*notifier)(void *priv), void *priv, 2060extern void block_all_signals(int (*notifier)(void *priv), void *priv,
2057 sigset_t *mask); 2061 sigset_t *mask);
diff --git a/include/litmus/fdso.h b/include/litmus/fdso.h
new file mode 100644
index 000000000000..286e10f86de0
--- /dev/null
+++ b/include/litmus/fdso.h
@@ -0,0 +1,69 @@
1/* fdso.h - file descriptor attached shared objects
2 *
3 * (c) 2007 B. Brandenburg, LITMUS^RT project
4 */
5
6#ifndef _LINUX_FDSO_H_
7#define _LINUX_FDSO_H_
8
9#include <linux/list.h>
10#include <asm/atomic.h>
11
12#include <linux/fs.h>
13
14#define MAX_OBJECT_DESCRIPTORS 32
15
16typedef enum {
17 MIN_OBJ_TYPE = 0,
18
19 FMLP_SEM = 0,
20 SRP_SEM = 1,
21
22 MAX_OBJ_TYPE = 1
23} obj_type_t;
24
25struct inode_obj_id {
26 struct list_head list;
27 atomic_t count;
28 struct inode* inode;
29
30 obj_type_t type;
31 void* obj;
32 unsigned int id;
33};
34
35
36struct od_table_entry {
37 unsigned int used;
38
39 struct inode_obj_id* obj;
40 void* extra;
41};
42
43struct fdso_ops {
44 void* (*create) (void);
45 void (*destroy)(void*);
46 int (*open) (struct od_table_entry*, void* __user);
47 int (*close) (struct od_table_entry*);
48};
49
50/* translate a userspace supplied od into the raw table entry
51 * returns NULL if od is invalid
52 */
53struct od_table_entry* __od_lookup(int od);
54
55/* translate a userspace supplied od into the associated object
56 * returns NULL if od is invalid
57 */
58static inline void* od_lookup(int od, obj_type_t type)
59{
60 struct od_table_entry* e = __od_lookup(od);
61 return e && e->obj->type == type ? e->obj->obj : NULL;
62}
63
64#define lookup_fmlp_sem(od)((struct pi_semaphore*) od_lookup(od, FMLP_SEM))
65#define lookup_srp_sem(od) ((struct srp_semaphore*) od_lookup(od, SRP_SEM))
66#define lookup_ics(od) ((struct ics*) od_lookup(od, ICS_ID))
67
68
69#endif
diff --git a/kernel/exit.c b/kernel/exit.c
index f7864ac2ecc1..3da04257aeaf 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -56,6 +56,8 @@
56#include <asm/mmu_context.h> 56#include <asm/mmu_context.h>
57#include "cred-internals.h" 57#include "cred-internals.h"
58 58
59extern void exit_od_table(struct task_struct *t);
60
59static void exit_mm(struct task_struct * tsk); 61static void exit_mm(struct task_struct * tsk);
60 62
61static void __unhash_process(struct task_struct *p) 63static void __unhash_process(struct task_struct *p)
@@ -954,6 +956,8 @@ NORET_TYPE void do_exit(long code)
954 if (unlikely(tsk->audit_context)) 956 if (unlikely(tsk->audit_context))
955 audit_free(tsk); 957 audit_free(tsk);
956 958
959 exit_od_table(tsk);
960
957 tsk->exit_code = code; 961 tsk->exit_code = code;
958 taskstats_exit(tsk, group_dead); 962 taskstats_exit(tsk, group_dead);
959 963
diff --git a/litmus/Makefile b/litmus/Makefile
index 59f61cbc7f10..3d18cff62cee 100644
--- a/litmus/Makefile
+++ b/litmus/Makefile
@@ -7,6 +7,7 @@ obj-y = sched_plugin.o litmus.o \
7 sync.o \ 7 sync.o \
8 rt_domain.o \ 8 rt_domain.o \
9 edf_common.o \ 9 edf_common.o \
10 fdso.o \
10 heap.o 11 heap.o
11 12
12obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o 13obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o
diff --git a/litmus/fdso.c b/litmus/fdso.c
new file mode 100644
index 000000000000..323efac17a47
--- /dev/null
+++ b/litmus/fdso.c
@@ -0,0 +1,283 @@
1/* fdso.c - file descriptor attached shared objects
2 *
3 * (c) 2007 B. Brandenburg, LITMUS^RT project
4 *
5 * Notes:
6 * - objects descriptor (OD) tables are not cloned during a fork.
7 * - objects are created on-demand, and freed after the last reference
8 * is dropped.
9 * - for now, object types are hard coded.
10 * - As long as we have live objects, we keep a reference to the inode.
11 */
12
13#include <linux/errno.h>
14#include <linux/sched.h>
15#include <linux/mutex.h>
16#include <linux/file.h>
17#include <asm/uaccess.h>
18
19#include <litmus/fdso.h>
20
21
22static struct fdso_ops dummy_ops = {
23 .create = NULL
24};
25
26static const struct fdso_ops* fdso_ops[] = {
27 &dummy_ops,
28 &dummy_ops,
29};
30
31static void* fdso_create(obj_type_t type)
32{
33 if (fdso_ops[type]->create)
34 return fdso_ops[type]->create();
35 else
36 return NULL;
37}
38
39static void fdso_destroy(obj_type_t type, void* obj)
40{
41 fdso_ops[type]->destroy(obj);
42}
43
44static int fdso_open(struct od_table_entry* entry, void* __user config)
45{
46 if (fdso_ops[entry->obj->type]->open)
47 return fdso_ops[entry->obj->type]->open(entry, config);
48 else
49 return 0;
50}
51
52static int fdso_close(struct od_table_entry* entry)
53{
54 if (fdso_ops[entry->obj->type]->close)
55 return fdso_ops[entry->obj->type]->close(entry);
56 else
57 return 0;
58}
59
60/* inode must be locked already */
61static struct inode_obj_id* alloc_inode_obj(struct inode* inode,
62 obj_type_t type,
63 unsigned int id)
64{
65 struct inode_obj_id* obj;
66 void* raw_obj;
67
68 raw_obj = fdso_create(type);
69 if (!raw_obj)
70 return NULL;
71
72 obj = kmalloc(sizeof(*obj), GFP_KERNEL);
73 if (!obj)
74 return NULL;
75 INIT_LIST_HEAD(&obj->list);
76 atomic_set(&obj->count, 1);
77 obj->type = type;
78 obj->id = id;
79 obj->obj = raw_obj;
80 obj->inode = inode;
81
82 list_add(&obj->list, &inode->i_obj_list);
83 atomic_inc(&inode->i_count);
84
85 printk(KERN_DEBUG "alloc_inode_obj(%p, %d, %d): object created\n", inode, type, id);
86 return obj;
87}
88
89/* inode must be locked already */
90static struct inode_obj_id* get_inode_obj(struct inode* inode,
91 obj_type_t type,
92 unsigned int id)
93{
94 struct list_head* pos;
95 struct inode_obj_id* obj = NULL;
96
97 list_for_each(pos, &inode->i_obj_list) {
98 obj = list_entry(pos, struct inode_obj_id, list);
99 if (obj->id == id && obj->type == type) {
100 atomic_inc(&obj->count);
101 return obj;
102 }
103 }
104 printk(KERN_DEBUG "get_inode_obj(%p, %d, %d): couldn't find object\n", inode, type, id);
105 return NULL;
106}
107
108
109static void put_inode_obj(struct inode_obj_id* obj)
110{
111 struct inode* inode;
112 int let_go = 0;
113
114 inode = obj->inode;
115 if (atomic_dec_and_test(&obj->count)) {
116
117 mutex_lock(&inode->i_obj_mutex);
118 /* no new references can be obtained */
119 if (!atomic_read(&obj->count)) {
120 list_del(&obj->list);
121 fdso_destroy(obj->type, obj->obj);
122 kfree(obj);
123 let_go = 1;
124 }
125 mutex_unlock(&inode->i_obj_mutex);
126 if (let_go)
127 iput(inode);
128 }
129}
130
131static struct od_table_entry* get_od_entry(struct task_struct* t)
132{
133 struct od_table_entry* table;
134 int i;
135
136
137 table = t->od_table;
138 if (!table) {
139 table = kzalloc(sizeof(*table) * MAX_OBJECT_DESCRIPTORS,
140 GFP_KERNEL);
141 t->od_table = table;
142 }
143
144 for (i = 0; table && i < MAX_OBJECT_DESCRIPTORS; i++)
145 if (!table[i].used) {
146 table[i].used = 1;
147 return table + i;
148 }
149 return NULL;
150}
151
152static int put_od_entry(struct od_table_entry* od)
153{
154 put_inode_obj(od->obj);
155 od->used = 0;
156 return 0;
157}
158
159void exit_od_table(struct task_struct* t)
160{
161 int i;
162
163 if (t->od_table) {
164 for (i = 0; i < MAX_OBJECT_DESCRIPTORS; i++)
165 if (t->od_table[i].used)
166 put_od_entry(t->od_table + i);
167 kfree(t->od_table);
168 t->od_table = NULL;
169 }
170}
171
172static int do_sys_od_open(struct file* file, obj_type_t type, int id,
173 void* __user config)
174{
175 int idx = 0, err;
176 struct inode* inode;
177 struct inode_obj_id* obj = NULL;
178 struct od_table_entry* entry;
179
180 inode = file->f_dentry->d_inode;
181
182 entry = get_od_entry(current);
183 if (!entry)
184 return -ENOMEM;
185
186 mutex_lock(&inode->i_obj_mutex);
187 obj = get_inode_obj(inode, type, id);
188 if (!obj)
189 obj = alloc_inode_obj(inode, type, id);
190 if (!obj) {
191 idx = -ENOMEM;
192 entry->used = 0;
193 } else {
194 entry->obj = obj;
195 entry->extra = NULL;
196 idx = entry - current->od_table;
197 }
198
199 mutex_unlock(&inode->i_obj_mutex);
200
201 err = fdso_open(entry, config);
202 if (err < 0) {
203 /* The class rejected the open call.
204 * We need to clean up and tell user space.
205 */
206 put_od_entry(entry);
207 idx = err;
208 }
209
210 return idx;
211}
212
213
214struct od_table_entry* __od_lookup(int od)
215{
216 struct task_struct *t = current;
217
218 if (!t->od_table)
219 return NULL;
220 if (od < 0 || od >= MAX_OBJECT_DESCRIPTORS)
221 return NULL;
222 if (!t->od_table[od].used)
223 return NULL;
224 return t->od_table + od;
225}
226
227
228asmlinkage int sys_od_open(int fd, int type, int obj_id, void* __user config)
229{
230 int ret = 0;
231 struct file* file;
232
233 /*
234 1) get file from fd, get inode from file
235 2) lock inode
236 3) try to lookup object
237 4) if not present create and enqueue object, inc inode refcnt
238 5) increment refcnt of object
239 6) alloc od_table_entry, setup ptrs
240 7) unlock inode
241 8) return offset in od_table as OD
242 */
243
244 if (type < MIN_OBJ_TYPE || type > MAX_OBJ_TYPE) {
245 ret = -EINVAL;
246 goto out;
247 }
248
249 file = fget(fd);
250 if (!file) {
251 ret = -EBADF;
252 goto out;
253 }
254
255 ret = do_sys_od_open(file, type, obj_id, config);
256
257 fput(file);
258
259out:
260 return ret;
261}
262
263
264asmlinkage int sys_od_close(int od)
265{
266 int ret = -EINVAL;
267 struct task_struct *t = current;
268
269 if (od < 0 || od >= MAX_OBJECT_DESCRIPTORS)
270 return ret;
271
272 if (!t->od_table || !t->od_table[od].used)
273 return ret;
274
275
276 /* give the class a chance to reject the close
277 */
278 ret = fdso_close(t->od_table + od);
279 if (ret == 0)
280 ret = put_od_entry(t->od_table + od);
281
282 return ret;
283}