aboutsummaryrefslogtreecommitdiffstats
path: root/mm/readahead.c
diff options
context:
space:
mode:
authorFengguang Wu <wfg@mail.ustc.edu.cn>2007-10-16 04:24:34 -0400
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-10-16 12:42:52 -0400
commit6b10c6c9fbfe754e8482efb8c8b84f8e40c0f2eb (patch)
tree08f275b1e8d2e9c93bb46367611c43ab88f8f8dc /mm/readahead.c
parent6df8ba4f8a4c4abca9ccad10441d0dddbdff301c (diff)
readahead: basic support of interleaved reads
This is a simplified version of the pagecache context based readahead. It handles the case of multiple threads reading on the same fd and invalidating each others' readahead state. It does the trick by scanning the pagecache and recovering the current read stream's readahead status. The algorithm works in a opportunistic way, in that it does not try to detect interleaved reads _actively_, which requires a probe into the page cache (which means a little more overhead for random reads). It only tries to handle a previously started sequential readahead whose state was overwritten by another concurrent stream, and it can do this job pretty well. Negative and positive examples(or what you can expect from it): 1) it cannot detect and serve perfect request-by-request interleaved reads right: time stream 1 stream 2 0 1 1 1001 2 2 3 1002 4 3 5 1003 6 4 7 1004 8 5 9 1005 Here no single readahead will be carried out. 2) However, if it's two concurrent reads by two threads, the chance of the initial sequential readahead be started is huge. Once the first sequential readahead is started for a stream, this patch will ensure that the readahead window continues to rampup and won't be disturbed by other streams. time stream 1 stream 2 0 1 1 2 2 1001 3 3 4 1002 5 1003 6 4 7 5 8 1004 9 6 10 1005 11 7 12 1006 13 1007 Here stream 1 will start a readahead at page 2, and stream 2 will start its first readahead at page 1003. From then on the two streams will be served right. Cc: Rusty Russell <rusty@rustcorp.com.au> Signed-off-by: Fengguang Wu <wfg@mail.ustc.edu.cn> Cc: Rusty Russell <rusty@rustcorp.com.au> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/readahead.c')
-rw-r--r--mm/readahead.c33
1 files changed, 23 insertions, 10 deletions
diff --git a/mm/readahead.c b/mm/readahead.c
index 4a58befbde4a..fd588ffc5086 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -380,6 +380,29 @@ ondemand_readahead(struct address_space *mapping,
380 } 380 }
381 381
382 /* 382 /*
383 * Hit a marked page without valid readahead state.
384 * E.g. interleaved reads.
385 * Query the pagecache for async_size, which normally equals to
386 * readahead size. Ramp it up and use it as the new readahead size.
387 */
388 if (hit_readahead_marker) {
389 pgoff_t start;
390
391 read_lock_irq(&mapping->tree_lock);
392 start = radix_tree_next_hole(&mapping->page_tree, offset, max+1);
393 read_unlock_irq(&mapping->tree_lock);
394
395 if (!start || start - offset > max)
396 return 0;
397
398 ra->start = start;
399 ra->size = start - offset; /* old async_size */
400 ra->size = get_next_ra_size(ra, max);
401 ra->async_size = ra->size;
402 goto readit;
403 }
404
405 /*
383 * It may be one of 406 * It may be one of
384 * - first read on start of file 407 * - first read on start of file
385 * - sequential cache miss 408 * - sequential cache miss
@@ -390,16 +413,6 @@ ondemand_readahead(struct address_space *mapping,
390 ra->size = get_init_ra_size(req_size, max); 413 ra->size = get_init_ra_size(req_size, max);
391 ra->async_size = ra->size > req_size ? ra->size - req_size : ra->size; 414 ra->async_size = ra->size > req_size ? ra->size - req_size : ra->size;
392 415
393 /*
394 * Hit on a marked page without valid readahead state.
395 * E.g. interleaved reads.
396 * Not knowing its readahead pos/size, bet on the minimal possible one.
397 */
398 if (hit_readahead_marker) {
399 ra->start++;
400 ra->size = get_next_ra_size(ra, max);
401 }
402
403readit: 416readit:
404 return ra_submit(ra, mapping, filp); 417 return ra_submit(ra, mapping, filp);
405} 418}