diff options
author | Zach Brown <zach.brown@oracle.com> | 2005-11-07 03:59:31 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2005-11-07 10:53:38 -0500 |
commit | d55b5fdaf40846221d543937b786956e27837fda (patch) | |
tree | ecdf572d7d2b08b4f7b31d804542a920934b6d9a | |
parent | 0f6ed7c2641fe4cea83cd09c21928ca30c0983ec (diff) |
[PATCH] aio: remove aio_max_nr accounting race
AIO was adding a new context's max requests to the global total before
testing if that resulting total was over the global limit. This let
innocent tasks get their new limit tested along with a racing guilty task
that was crossing the limit. This serializes the _nr accounting with a
spinlock It also switches to using unsigned long for the global totals.
Individual contexts are still limited to an unsigned int's worth of
requests by the syscall interface.
The problem and fix were verified with a simple program that spun creating
and destroying a context while holding on to another long lived context.
Before the patch a task creating a tiny context could get a spurious EAGAIN
if it raced with a task creating a very large context that overran the
limit.
Signed-off-by: Zach Brown <zach.brown@oracle.com>
Cc: Benjamin LaHaise <bcrl@kvack.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r-- | fs/aio.c | 31 | ||||
-rw-r--r-- | include/linux/aio.h | 5 | ||||
-rw-r--r-- | kernel/sysctl.c | 4 |
3 files changed, 26 insertions, 14 deletions
@@ -42,8 +42,9 @@ | |||
42 | #endif | 42 | #endif |
43 | 43 | ||
44 | /*------ sysctl variables----*/ | 44 | /*------ sysctl variables----*/ |
45 | atomic_t aio_nr = ATOMIC_INIT(0); /* current system wide number of aio requests */ | 45 | static DEFINE_SPINLOCK(aio_nr_lock); |
46 | unsigned aio_max_nr = 0x10000; /* system wide maximum number of aio requests */ | 46 | unsigned long aio_nr; /* current system wide number of aio requests */ |
47 | unsigned long aio_max_nr = 0x10000; /* system wide maximum number of aio requests */ | ||
47 | /*----end sysctl variables---*/ | 48 | /*----end sysctl variables---*/ |
48 | 49 | ||
49 | static kmem_cache_t *kiocb_cachep; | 50 | static kmem_cache_t *kiocb_cachep; |
@@ -208,7 +209,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) | |||
208 | return ERR_PTR(-EINVAL); | 209 | return ERR_PTR(-EINVAL); |
209 | } | 210 | } |
210 | 211 | ||
211 | if (nr_events > aio_max_nr) | 212 | if ((unsigned long)nr_events > aio_max_nr) |
212 | return ERR_PTR(-EAGAIN); | 213 | return ERR_PTR(-EAGAIN); |
213 | 214 | ||
214 | ctx = kmem_cache_alloc(kioctx_cachep, GFP_KERNEL); | 215 | ctx = kmem_cache_alloc(kioctx_cachep, GFP_KERNEL); |
@@ -233,8 +234,14 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) | |||
233 | goto out_freectx; | 234 | goto out_freectx; |
234 | 235 | ||
235 | /* limit the number of system wide aios */ | 236 | /* limit the number of system wide aios */ |
236 | atomic_add(ctx->max_reqs, &aio_nr); /* undone by __put_ioctx */ | 237 | spin_lock(&aio_nr_lock); |
237 | if (unlikely(atomic_read(&aio_nr) > aio_max_nr)) | 238 | if (aio_nr + ctx->max_reqs > aio_max_nr || |
239 | aio_nr + ctx->max_reqs < aio_nr) | ||
240 | ctx->max_reqs = 0; | ||
241 | else | ||
242 | aio_nr += ctx->max_reqs; | ||
243 | spin_unlock(&aio_nr_lock); | ||
244 | if (ctx->max_reqs == 0) | ||
238 | goto out_cleanup; | 245 | goto out_cleanup; |
239 | 246 | ||
240 | /* now link into global list. kludge. FIXME */ | 247 | /* now link into global list. kludge. FIXME */ |
@@ -248,8 +255,6 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) | |||
248 | return ctx; | 255 | return ctx; |
249 | 256 | ||
250 | out_cleanup: | 257 | out_cleanup: |
251 | atomic_sub(ctx->max_reqs, &aio_nr); | ||
252 | ctx->max_reqs = 0; /* prevent __put_ioctx from sub'ing aio_nr */ | ||
253 | __put_ioctx(ctx); | 258 | __put_ioctx(ctx); |
254 | return ERR_PTR(-EAGAIN); | 259 | return ERR_PTR(-EAGAIN); |
255 | 260 | ||
@@ -374,7 +379,12 @@ void fastcall __put_ioctx(struct kioctx *ctx) | |||
374 | pr_debug("__put_ioctx: freeing %p\n", ctx); | 379 | pr_debug("__put_ioctx: freeing %p\n", ctx); |
375 | kmem_cache_free(kioctx_cachep, ctx); | 380 | kmem_cache_free(kioctx_cachep, ctx); |
376 | 381 | ||
377 | atomic_sub(nr_events, &aio_nr); | 382 | if (nr_events) { |
383 | spin_lock(&aio_nr_lock); | ||
384 | BUG_ON(aio_nr - nr_events > aio_nr); | ||
385 | aio_nr -= nr_events; | ||
386 | spin_unlock(&aio_nr_lock); | ||
387 | } | ||
378 | } | 388 | } |
379 | 389 | ||
380 | /* aio_get_req | 390 | /* aio_get_req |
@@ -1258,8 +1268,9 @@ asmlinkage long sys_io_setup(unsigned nr_events, aio_context_t __user *ctxp) | |||
1258 | goto out; | 1268 | goto out; |
1259 | 1269 | ||
1260 | ret = -EINVAL; | 1270 | ret = -EINVAL; |
1261 | if (unlikely(ctx || (int)nr_events <= 0)) { | 1271 | if (unlikely(ctx || nr_events == 0)) { |
1262 | pr_debug("EINVAL: io_setup: ctx or nr_events > max\n"); | 1272 | pr_debug("EINVAL: io_setup: ctx %lu nr_events %u\n", |
1273 | ctx, nr_events); | ||
1263 | goto out; | 1274 | goto out; |
1264 | } | 1275 | } |
1265 | 1276 | ||
diff --git a/include/linux/aio.h b/include/linux/aio.h index 0decf66117c1..403d71dcb7c8 100644 --- a/include/linux/aio.h +++ b/include/linux/aio.h | |||
@@ -183,6 +183,7 @@ struct kioctx { | |||
183 | struct list_head active_reqs; /* used for cancellation */ | 183 | struct list_head active_reqs; /* used for cancellation */ |
184 | struct list_head run_list; /* used for kicked reqs */ | 184 | struct list_head run_list; /* used for kicked reqs */ |
185 | 185 | ||
186 | /* sys_io_setup currently limits this to an unsigned int */ | ||
186 | unsigned max_reqs; | 187 | unsigned max_reqs; |
187 | 188 | ||
188 | struct aio_ring_info ring_info; | 189 | struct aio_ring_info ring_info; |
@@ -234,7 +235,7 @@ static inline struct kiocb *list_kiocb(struct list_head *h) | |||
234 | } | 235 | } |
235 | 236 | ||
236 | /* for sysctl: */ | 237 | /* for sysctl: */ |
237 | extern atomic_t aio_nr; | 238 | extern unsigned long aio_nr; |
238 | extern unsigned aio_max_nr; | 239 | extern unsigned long aio_max_nr; |
239 | 240 | ||
240 | #endif /* __LINUX__AIO_H */ | 241 | #endif /* __LINUX__AIO_H */ |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 8e56e2495542..e1351200ce85 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -952,7 +952,7 @@ static ctl_table fs_table[] = { | |||
952 | .data = &aio_nr, | 952 | .data = &aio_nr, |
953 | .maxlen = sizeof(aio_nr), | 953 | .maxlen = sizeof(aio_nr), |
954 | .mode = 0444, | 954 | .mode = 0444, |
955 | .proc_handler = &proc_dointvec, | 955 | .proc_handler = &proc_doulongvec_minmax, |
956 | }, | 956 | }, |
957 | { | 957 | { |
958 | .ctl_name = FS_AIO_MAX_NR, | 958 | .ctl_name = FS_AIO_MAX_NR, |
@@ -960,7 +960,7 @@ static ctl_table fs_table[] = { | |||
960 | .data = &aio_max_nr, | 960 | .data = &aio_max_nr, |
961 | .maxlen = sizeof(aio_max_nr), | 961 | .maxlen = sizeof(aio_max_nr), |
962 | .mode = 0644, | 962 | .mode = 0644, |
963 | .proc_handler = &proc_dointvec, | 963 | .proc_handler = &proc_doulongvec_minmax, |
964 | }, | 964 | }, |
965 | #ifdef CONFIG_INOTIFY | 965 | #ifdef CONFIG_INOTIFY |
966 | { | 966 | { |