aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorZach Brown <zach.brown@oracle.com>2005-11-07 03:59:31 -0500
committerLinus Torvalds <torvalds@g5.osdl.org>2005-11-07 10:53:38 -0500
commitd55b5fdaf40846221d543937b786956e27837fda (patch)
treeecdf572d7d2b08b4f7b31d804542a920934b6d9a
parent0f6ed7c2641fe4cea83cd09c21928ca30c0983ec (diff)
[PATCH] aio: remove aio_max_nr accounting race
AIO was adding a new context's max requests to the global total before testing if that resulting total was over the global limit. This let innocent tasks get their new limit tested along with a racing guilty task that was crossing the limit. This serializes the _nr accounting with a spinlock It also switches to using unsigned long for the global totals. Individual contexts are still limited to an unsigned int's worth of requests by the syscall interface. The problem and fix were verified with a simple program that spun creating and destroying a context while holding on to another long lived context. Before the patch a task creating a tiny context could get a spurious EAGAIN if it raced with a task creating a very large context that overran the limit. Signed-off-by: Zach Brown <zach.brown@oracle.com> Cc: Benjamin LaHaise <bcrl@kvack.org> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--fs/aio.c31
-rw-r--r--include/linux/aio.h5
-rw-r--r--kernel/sysctl.c4
3 files changed, 26 insertions, 14 deletions
diff --git a/fs/aio.c b/fs/aio.c
index edfca5b75535..20bb919eb195 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -42,8 +42,9 @@
42#endif 42#endif
43 43
44/*------ sysctl variables----*/ 44/*------ sysctl variables----*/
45atomic_t aio_nr = ATOMIC_INIT(0); /* current system wide number of aio requests */ 45static DEFINE_SPINLOCK(aio_nr_lock);
46unsigned aio_max_nr = 0x10000; /* system wide maximum number of aio requests */ 46unsigned long aio_nr; /* current system wide number of aio requests */
47unsigned long aio_max_nr = 0x10000; /* system wide maximum number of aio requests */
47/*----end sysctl variables---*/ 48/*----end sysctl variables---*/
48 49
49static kmem_cache_t *kiocb_cachep; 50static kmem_cache_t *kiocb_cachep;
@@ -208,7 +209,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
208 return ERR_PTR(-EINVAL); 209 return ERR_PTR(-EINVAL);
209 } 210 }
210 211
211 if (nr_events > aio_max_nr) 212 if ((unsigned long)nr_events > aio_max_nr)
212 return ERR_PTR(-EAGAIN); 213 return ERR_PTR(-EAGAIN);
213 214
214 ctx = kmem_cache_alloc(kioctx_cachep, GFP_KERNEL); 215 ctx = kmem_cache_alloc(kioctx_cachep, GFP_KERNEL);
@@ -233,8 +234,14 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
233 goto out_freectx; 234 goto out_freectx;
234 235
235 /* limit the number of system wide aios */ 236 /* limit the number of system wide aios */
236 atomic_add(ctx->max_reqs, &aio_nr); /* undone by __put_ioctx */ 237 spin_lock(&aio_nr_lock);
237 if (unlikely(atomic_read(&aio_nr) > aio_max_nr)) 238 if (aio_nr + ctx->max_reqs > aio_max_nr ||
239 aio_nr + ctx->max_reqs < aio_nr)
240 ctx->max_reqs = 0;
241 else
242 aio_nr += ctx->max_reqs;
243 spin_unlock(&aio_nr_lock);
244 if (ctx->max_reqs == 0)
238 goto out_cleanup; 245 goto out_cleanup;
239 246
240 /* now link into global list. kludge. FIXME */ 247 /* now link into global list. kludge. FIXME */
@@ -248,8 +255,6 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
248 return ctx; 255 return ctx;
249 256
250out_cleanup: 257out_cleanup:
251 atomic_sub(ctx->max_reqs, &aio_nr);
252 ctx->max_reqs = 0; /* prevent __put_ioctx from sub'ing aio_nr */
253 __put_ioctx(ctx); 258 __put_ioctx(ctx);
254 return ERR_PTR(-EAGAIN); 259 return ERR_PTR(-EAGAIN);
255 260
@@ -374,7 +379,12 @@ void fastcall __put_ioctx(struct kioctx *ctx)
374 pr_debug("__put_ioctx: freeing %p\n", ctx); 379 pr_debug("__put_ioctx: freeing %p\n", ctx);
375 kmem_cache_free(kioctx_cachep, ctx); 380 kmem_cache_free(kioctx_cachep, ctx);
376 381
377 atomic_sub(nr_events, &aio_nr); 382 if (nr_events) {
383 spin_lock(&aio_nr_lock);
384 BUG_ON(aio_nr - nr_events > aio_nr);
385 aio_nr -= nr_events;
386 spin_unlock(&aio_nr_lock);
387 }
378} 388}
379 389
380/* aio_get_req 390/* aio_get_req
@@ -1258,8 +1268,9 @@ asmlinkage long sys_io_setup(unsigned nr_events, aio_context_t __user *ctxp)
1258 goto out; 1268 goto out;
1259 1269
1260 ret = -EINVAL; 1270 ret = -EINVAL;
1261 if (unlikely(ctx || (int)nr_events <= 0)) { 1271 if (unlikely(ctx || nr_events == 0)) {
1262 pr_debug("EINVAL: io_setup: ctx or nr_events > max\n"); 1272 pr_debug("EINVAL: io_setup: ctx %lu nr_events %u\n",
1273 ctx, nr_events);
1263 goto out; 1274 goto out;
1264 } 1275 }
1265 1276
diff --git a/include/linux/aio.h b/include/linux/aio.h
index 0decf66117c1..403d71dcb7c8 100644
--- a/include/linux/aio.h
+++ b/include/linux/aio.h
@@ -183,6 +183,7 @@ struct kioctx {
183 struct list_head active_reqs; /* used for cancellation */ 183 struct list_head active_reqs; /* used for cancellation */
184 struct list_head run_list; /* used for kicked reqs */ 184 struct list_head run_list; /* used for kicked reqs */
185 185
186 /* sys_io_setup currently limits this to an unsigned int */
186 unsigned max_reqs; 187 unsigned max_reqs;
187 188
188 struct aio_ring_info ring_info; 189 struct aio_ring_info ring_info;
@@ -234,7 +235,7 @@ static inline struct kiocb *list_kiocb(struct list_head *h)
234} 235}
235 236
236/* for sysctl: */ 237/* for sysctl: */
237extern atomic_t aio_nr; 238extern unsigned long aio_nr;
238extern unsigned aio_max_nr; 239extern unsigned long aio_max_nr;
239 240
240#endif /* __LINUX__AIO_H */ 241#endif /* __LINUX__AIO_H */
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 8e56e2495542..e1351200ce85 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -952,7 +952,7 @@ static ctl_table fs_table[] = {
952 .data = &aio_nr, 952 .data = &aio_nr,
953 .maxlen = sizeof(aio_nr), 953 .maxlen = sizeof(aio_nr),
954 .mode = 0444, 954 .mode = 0444,
955 .proc_handler = &proc_dointvec, 955 .proc_handler = &proc_doulongvec_minmax,
956 }, 956 },
957 { 957 {
958 .ctl_name = FS_AIO_MAX_NR, 958 .ctl_name = FS_AIO_MAX_NR,
@@ -960,7 +960,7 @@ static ctl_table fs_table[] = {
960 .data = &aio_max_nr, 960 .data = &aio_max_nr,
961 .maxlen = sizeof(aio_max_nr), 961 .maxlen = sizeof(aio_max_nr),
962 .mode = 0644, 962 .mode = 0644,
963 .proc_handler = &proc_dointvec, 963 .proc_handler = &proc_doulongvec_minmax,
964 }, 964 },
965#ifdef CONFIG_INOTIFY 965#ifdef CONFIG_INOTIFY
966 { 966 {