aboutsummaryrefslogtreecommitdiffstats
path: root/mm/readahead.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/readahead.c')
-rw-r--r--mm/readahead.c145
1 files changed, 101 insertions, 44 deletions
diff --git a/mm/readahead.c b/mm/readahead.c
index 133b6d525513..aa1aa2345235 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -133,15 +133,12 @@ out:
133} 133}
134 134
135/* 135/*
136 * do_page_cache_readahead actually reads a chunk of disk. It allocates all 136 * __do_page_cache_readahead() actually reads a chunk of disk. It allocates all
137 * the pages first, then submits them all for I/O. This avoids the very bad 137 * the pages first, then submits them all for I/O. This avoids the very bad
138 * behaviour which would occur if page allocations are causing VM writeback. 138 * behaviour which would occur if page allocations are causing VM writeback.
139 * We really don't want to intermingle reads and writes like that. 139 * We really don't want to intermingle reads and writes like that.
140 * 140 *
141 * Returns the number of pages requested, or the maximum amount of I/O allowed. 141 * Returns the number of pages requested, or the maximum amount of I/O allowed.
142 *
143 * do_page_cache_readahead() returns -1 if it encountered request queue
144 * congestion.
145 */ 142 */
146static int 143static int
147__do_page_cache_readahead(struct address_space *mapping, struct file *filp, 144__do_page_cache_readahead(struct address_space *mapping, struct file *filp,
@@ -210,6 +207,7 @@ int force_page_cache_readahead(struct address_space *mapping, struct file *filp,
210 if (unlikely(!mapping->a_ops->readpage && !mapping->a_ops->readpages)) 207 if (unlikely(!mapping->a_ops->readpage && !mapping->a_ops->readpages))
211 return -EINVAL; 208 return -EINVAL;
212 209
210 nr_to_read = max_sane_readahead(nr_to_read);
213 while (nr_to_read) { 211 while (nr_to_read) {
214 int err; 212 int err;
215 213
@@ -231,22 +229,6 @@ int force_page_cache_readahead(struct address_space *mapping, struct file *filp,
231} 229}
232 230
233/* 231/*
234 * This version skips the IO if the queue is read-congested, and will tell the
235 * block layer to abandon the readahead if request allocation would block.
236 *
237 * force_page_cache_readahead() will ignore queue congestion and will block on
238 * request queues.
239 */
240int do_page_cache_readahead(struct address_space *mapping, struct file *filp,
241 pgoff_t offset, unsigned long nr_to_read)
242{
243 if (bdi_read_congested(mapping->backing_dev_info))
244 return -1;
245
246 return __do_page_cache_readahead(mapping, filp, offset, nr_to_read, 0);
247}
248
249/*
250 * Given a desired number of PAGE_CACHE_SIZE readahead pages, return a 232 * Given a desired number of PAGE_CACHE_SIZE readahead pages, return a
251 * sensible upper limit. 233 * sensible upper limit.
252 */ 234 */
@@ -259,7 +241,7 @@ unsigned long max_sane_readahead(unsigned long nr)
259/* 241/*
260 * Submit IO for the read-ahead request in file_ra_state. 242 * Submit IO for the read-ahead request in file_ra_state.
261 */ 243 */
262static unsigned long ra_submit(struct file_ra_state *ra, 244unsigned long ra_submit(struct file_ra_state *ra,
263 struct address_space *mapping, struct file *filp) 245 struct address_space *mapping, struct file *filp)
264{ 246{
265 int actual; 247 int actual;
@@ -348,6 +330,59 @@ static unsigned long get_next_ra_size(struct file_ra_state *ra,
348 */ 330 */
349 331
350/* 332/*
333 * Count contiguously cached pages from @offset-1 to @offset-@max,
334 * this count is a conservative estimation of
335 * - length of the sequential read sequence, or
336 * - thrashing threshold in memory tight systems
337 */
338static pgoff_t count_history_pages(struct address_space *mapping,
339 struct file_ra_state *ra,
340 pgoff_t offset, unsigned long max)
341{
342 pgoff_t head;
343
344 rcu_read_lock();
345 head = radix_tree_prev_hole(&mapping->page_tree, offset - 1, max);
346 rcu_read_unlock();
347
348 return offset - 1 - head;
349}
350
351/*
352 * page cache context based read-ahead
353 */
354static int try_context_readahead(struct address_space *mapping,
355 struct file_ra_state *ra,
356 pgoff_t offset,
357 unsigned long req_size,
358 unsigned long max)
359{
360 pgoff_t size;
361
362 size = count_history_pages(mapping, ra, offset, max);
363
364 /*
365 * no history pages:
366 * it could be a random read
367 */
368 if (!size)
369 return 0;
370
371 /*
372 * starts from beginning of file:
373 * it is a strong indication of long-run stream (or whole-file-read)
374 */
375 if (size >= offset)
376 size *= 2;
377
378 ra->start = offset;
379 ra->size = get_init_ra_size(size + req_size, max);
380 ra->async_size = ra->size;
381
382 return 1;
383}
384
385/*
351 * A minimal readahead algorithm for trivial sequential/random reads. 386 * A minimal readahead algorithm for trivial sequential/random reads.
352 */ 387 */
353static unsigned long 388static unsigned long
@@ -356,34 +391,26 @@ ondemand_readahead(struct address_space *mapping,
356 bool hit_readahead_marker, pgoff_t offset, 391 bool hit_readahead_marker, pgoff_t offset,
357 unsigned long req_size) 392 unsigned long req_size)
358{ 393{
359 int max = ra->ra_pages; /* max readahead pages */ 394 unsigned long max = max_sane_readahead(ra->ra_pages);
360 pgoff_t prev_offset; 395
361 int sequential; 396 /*
397 * start of file
398 */
399 if (!offset)
400 goto initial_readahead;
362 401
363 /* 402 /*
364 * It's the expected callback offset, assume sequential access. 403 * It's the expected callback offset, assume sequential access.
365 * Ramp up sizes, and push forward the readahead window. 404 * Ramp up sizes, and push forward the readahead window.
366 */ 405 */
367 if (offset && (offset == (ra->start + ra->size - ra->async_size) || 406 if ((offset == (ra->start + ra->size - ra->async_size) ||
368 offset == (ra->start + ra->size))) { 407 offset == (ra->start + ra->size))) {
369 ra->start += ra->size; 408 ra->start += ra->size;
370 ra->size = get_next_ra_size(ra, max); 409 ra->size = get_next_ra_size(ra, max);
371 ra->async_size = ra->size; 410 ra->async_size = ra->size;
372 goto readit; 411 goto readit;
373 } 412 }
374 413
375 prev_offset = ra->prev_pos >> PAGE_CACHE_SHIFT;
376 sequential = offset - prev_offset <= 1UL || req_size > max;
377
378 /*
379 * Standalone, small read.
380 * Read as is, and do not pollute the readahead state.
381 */
382 if (!hit_readahead_marker && !sequential) {
383 return __do_page_cache_readahead(mapping, filp,
384 offset, req_size, 0);
385 }
386
387 /* 414 /*
388 * Hit a marked page without valid readahead state. 415 * Hit a marked page without valid readahead state.
389 * E.g. interleaved reads. 416 * E.g. interleaved reads.
@@ -394,7 +421,7 @@ ondemand_readahead(struct address_space *mapping,
394 pgoff_t start; 421 pgoff_t start;
395 422
396 rcu_read_lock(); 423 rcu_read_lock();
397 start = radix_tree_next_hole(&mapping->page_tree, offset,max+1); 424 start = radix_tree_next_hole(&mapping->page_tree, offset+1,max);
398 rcu_read_unlock(); 425 rcu_read_unlock();
399 426
400 if (!start || start - offset > max) 427 if (!start || start - offset > max)
@@ -402,23 +429,53 @@ ondemand_readahead(struct address_space *mapping,
402 429
403 ra->start = start; 430 ra->start = start;
404 ra->size = start - offset; /* old async_size */ 431 ra->size = start - offset; /* old async_size */
432 ra->size += req_size;
405 ra->size = get_next_ra_size(ra, max); 433 ra->size = get_next_ra_size(ra, max);
406 ra->async_size = ra->size; 434 ra->async_size = ra->size;
407 goto readit; 435 goto readit;
408 } 436 }
409 437
410 /* 438 /*
411 * It may be one of 439 * oversize read
412 * - first read on start of file 440 */
413 * - sequential cache miss 441 if (req_size > max)
414 * - oversize random read 442 goto initial_readahead;
415 * Start readahead for it. 443
444 /*
445 * sequential cache miss
446 */
447 if (offset - (ra->prev_pos >> PAGE_CACHE_SHIFT) <= 1UL)
448 goto initial_readahead;
449
450 /*
451 * Query the page cache and look for the traces(cached history pages)
452 * that a sequential stream would leave behind.
453 */
454 if (try_context_readahead(mapping, ra, offset, req_size, max))
455 goto readit;
456
457 /*
458 * standalone, small random read
459 * Read as is, and do not pollute the readahead state.
416 */ 460 */
461 return __do_page_cache_readahead(mapping, filp, offset, req_size, 0);
462
463initial_readahead:
417 ra->start = offset; 464 ra->start = offset;
418 ra->size = get_init_ra_size(req_size, max); 465 ra->size = get_init_ra_size(req_size, max);
419 ra->async_size = ra->size > req_size ? ra->size - req_size : ra->size; 466 ra->async_size = ra->size > req_size ? ra->size - req_size : ra->size;
420 467
421readit: 468readit:
469 /*
470 * Will this read hit the readahead marker made by itself?
471 * If so, trigger the readahead marker hit now, and merge
472 * the resulted next readahead window into the current one.
473 */
474 if (offset == ra->start && ra->size == ra->async_size) {
475 ra->async_size = get_next_ra_size(ra, max);
476 ra->size += ra->async_size;
477 }
478
422 return ra_submit(ra, mapping, filp); 479 return ra_submit(ra, mapping, filp);
423} 480}
424 481