aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-06-16 18:31:25 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2009-06-16 22:47:29 -0400
commitef00e08e26dd5d84271ef706262506b82195e752 (patch)
tree5f6cf72cf9bf0574ecfbd73f4ee5378d89298dd7
parent51daa88ebd8e0d437289f589af29d4b39379ea76 (diff)
readahead: clean up and simplify the code for filemap page fault readahead
This shouldn't really change behavior all that much, but the single rather complex function with read-ahead inside a loop etc is broken up into more manageable pieces. The behaviour is also less subtle, with the read-ahead being done up-front rather than inside some subtle loop and thus avoiding the now unnecessary extra state variables (ie "did_readaround" is gone). Fengguang: the code split in fact fixed a bug reported by Pavel Levshin: the PGMAJFAULT accounting used to be bypassed when MADV_RANDOM is set, in which case the original code will directly jump to no_cached_page reading. Cc: Pavel Levshin <lpk@581.spb.su> Cc: <wli@movementarian.org> Cc: Nick Piggin <npiggin@suse.de> Signed-off-by: Wu Fengguang <fengguang.wu@intel.com> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--mm/filemap.c156
1 files changed, 89 insertions, 67 deletions
diff --git a/mm/filemap.c b/mm/filemap.c
index dcef9fd6b92..82753648559 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1456,6 +1456,68 @@ static int page_cache_read(struct file *file, pgoff_t offset)
1456 1456
1457#define MMAP_LOTSAMISS (100) 1457#define MMAP_LOTSAMISS (100)
1458 1458
1459/*
1460 * Synchronous readahead happens when we don't even find
1461 * a page in the page cache at all.
1462 */
1463static void do_sync_mmap_readahead(struct vm_area_struct *vma,
1464 struct file_ra_state *ra,
1465 struct file *file,
1466 pgoff_t offset)
1467{
1468 unsigned long ra_pages;
1469 struct address_space *mapping = file->f_mapping;
1470
1471 /* If we don't want any read-ahead, don't bother */
1472 if (VM_RandomReadHint(vma))
1473 return;
1474
1475 if (VM_SequentialReadHint(vma)) {
1476 page_cache_sync_readahead(mapping, ra, file, offset, 1);
1477 return;
1478 }
1479
1480 if (ra->mmap_miss < INT_MAX)
1481 ra->mmap_miss++;
1482
1483 /*
1484 * Do we miss much more than hit in this file? If so,
1485 * stop bothering with read-ahead. It will only hurt.
1486 */
1487 if (ra->mmap_miss > MMAP_LOTSAMISS)
1488 return;
1489
1490 ra_pages = max_sane_readahead(ra->ra_pages);
1491 if (ra_pages) {
1492 pgoff_t start = 0;
1493
1494 if (offset > ra_pages / 2)
1495 start = offset - ra_pages / 2;
1496 do_page_cache_readahead(mapping, file, start, ra_pages);
1497 }
1498}
1499
1500/*
1501 * Asynchronous readahead happens when we find the page and PG_readahead,
1502 * so we want to possibly extend the readahead further..
1503 */
1504static void do_async_mmap_readahead(struct vm_area_struct *vma,
1505 struct file_ra_state *ra,
1506 struct file *file,
1507 struct page *page,
1508 pgoff_t offset)
1509{
1510 struct address_space *mapping = file->f_mapping;
1511
1512 /* If we don't want any read-ahead, don't bother */
1513 if (VM_RandomReadHint(vma))
1514 return;
1515 if (ra->mmap_miss > 0)
1516 ra->mmap_miss--;
1517 if (PageReadahead(page))
1518 page_cache_async_readahead(mapping, ra, file, page, offset, 1);
1519}
1520
1459/** 1521/**
1460 * filemap_fault - read in file data for page fault handling 1522 * filemap_fault - read in file data for page fault handling
1461 * @vma: vma in which the fault was taken 1523 * @vma: vma in which the fault was taken
@@ -1475,78 +1537,44 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1475 struct address_space *mapping = file->f_mapping; 1537 struct address_space *mapping = file->f_mapping;
1476 struct file_ra_state *ra = &file->f_ra; 1538 struct file_ra_state *ra = &file->f_ra;
1477 struct inode *inode = mapping->host; 1539 struct inode *inode = mapping->host;
1540 pgoff_t offset = vmf->pgoff;
1478 struct page *page; 1541 struct page *page;
1479 pgoff_t size; 1542 pgoff_t size;
1480 int did_readaround = 0;
1481 int ret = 0; 1543 int ret = 0;
1482 1544
1483 size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 1545 size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
1484 if (vmf->pgoff >= size) 1546 if (offset >= size)
1485 return VM_FAULT_SIGBUS; 1547 return VM_FAULT_SIGBUS;
1486 1548
1487 /* If we don't want any read-ahead, don't bother */
1488 if (VM_RandomReadHint(vma))
1489 goto no_cached_page;
1490
1491 /* 1549 /*
1492 * Do we have something in the page cache already? 1550 * Do we have something in the page cache already?
1493 */ 1551 */
1494retry_find: 1552 page = find_get_page(mapping, offset);
1495 page = find_lock_page(mapping, vmf->pgoff); 1553 if (likely(page)) {
1496 /*
1497 * For sequential accesses, we use the generic readahead logic.
1498 */
1499 if (VM_SequentialReadHint(vma)) {
1500 if (!page) {
1501 page_cache_sync_readahead(mapping, ra, file,
1502 vmf->pgoff, 1);
1503 page = find_lock_page(mapping, vmf->pgoff);
1504 if (!page)
1505 goto no_cached_page;
1506 }
1507 if (PageReadahead(page)) {
1508 page_cache_async_readahead(mapping, ra, file, page,
1509 vmf->pgoff, 1);
1510 }
1511 }
1512
1513 if (!page) {
1514 unsigned long ra_pages;
1515
1516 ra->mmap_miss++;
1517
1518 /* 1554 /*
1519 * Do we miss much more than hit in this file? If so, 1555 * We found the page, so try async readahead before
1520 * stop bothering with read-ahead. It will only hurt. 1556 * waiting for the lock.
1521 */ 1557 */
1522 if (ra->mmap_miss > MMAP_LOTSAMISS) 1558 do_async_mmap_readahead(vma, ra, file, page, offset);
1523 goto no_cached_page; 1559 lock_page(page);
1524 1560
1525 /* 1561 /* Did it get truncated? */
1526 * To keep the pgmajfault counter straight, we need to 1562 if (unlikely(page->mapping != mapping)) {
1527 * check did_readaround, as this is an inner loop. 1563 unlock_page(page);
1528 */ 1564 put_page(page);
1529 if (!did_readaround) { 1565 goto no_cached_page;
1530 ret = VM_FAULT_MAJOR;
1531 count_vm_event(PGMAJFAULT);
1532 }
1533 did_readaround = 1;
1534 ra_pages = max_sane_readahead(file->f_ra.ra_pages);
1535 if (ra_pages) {
1536 pgoff_t start = 0;
1537
1538 if (vmf->pgoff > ra_pages / 2)
1539 start = vmf->pgoff - ra_pages / 2;
1540 do_page_cache_readahead(mapping, file, start, ra_pages);
1541 } 1566 }
1542 page = find_lock_page(mapping, vmf->pgoff); 1567 } else {
1568 /* No page in the page cache at all */
1569 do_sync_mmap_readahead(vma, ra, file, offset);
1570 count_vm_event(PGMAJFAULT);
1571 ret = VM_FAULT_MAJOR;
1572retry_find:
1573 page = find_lock_page(mapping, offset);
1543 if (!page) 1574 if (!page)
1544 goto no_cached_page; 1575 goto no_cached_page;
1545 } 1576 }
1546 1577
1547 if (!did_readaround)
1548 ra->mmap_miss--;
1549
1550 /* 1578 /*
1551 * We have a locked page in the page cache, now we need to check 1579 * We have a locked page in the page cache, now we need to check
1552 * that it's up-to-date. If not, it is going to be due to an error. 1580 * that it's up-to-date. If not, it is going to be due to an error.
@@ -1554,18 +1582,18 @@ retry_find:
1554 if (unlikely(!PageUptodate(page))) 1582 if (unlikely(!PageUptodate(page)))
1555 goto page_not_uptodate; 1583 goto page_not_uptodate;
1556 1584
1557 /* Must recheck i_size under page lock */ 1585 /*
1586 * Found the page and have a reference on it.
1587 * We must recheck i_size under page lock.
1588 */
1558 size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 1589 size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
1559 if (unlikely(vmf->pgoff >= size)) { 1590 if (unlikely(offset >= size)) {
1560 unlock_page(page); 1591 unlock_page(page);
1561 page_cache_release(page); 1592 page_cache_release(page);
1562 return VM_FAULT_SIGBUS; 1593 return VM_FAULT_SIGBUS;
1563 } 1594 }
1564 1595
1565 /* 1596 ra->prev_pos = (loff_t)offset << PAGE_CACHE_SHIFT;
1566 * Found the page and have a reference on it.
1567 */
1568 ra->prev_pos = (loff_t)page->index << PAGE_CACHE_SHIFT;
1569 vmf->page = page; 1597 vmf->page = page;
1570 return ret | VM_FAULT_LOCKED; 1598 return ret | VM_FAULT_LOCKED;
1571 1599
@@ -1574,7 +1602,7 @@ no_cached_page:
1574 * We're only likely to ever get here if MADV_RANDOM is in 1602 * We're only likely to ever get here if MADV_RANDOM is in
1575 * effect. 1603 * effect.
1576 */ 1604 */
1577 error = page_cache_read(file, vmf->pgoff); 1605 error = page_cache_read(file, offset);
1578 1606
1579 /* 1607 /*
1580 * The page we want has now been added to the page cache. 1608 * The page we want has now been added to the page cache.
@@ -1594,12 +1622,6 @@ no_cached_page:
1594 return VM_FAULT_SIGBUS; 1622 return VM_FAULT_SIGBUS;
1595 1623
1596page_not_uptodate: 1624page_not_uptodate:
1597 /* IO error path */
1598 if (!did_readaround) {
1599 ret = VM_FAULT_MAJOR;
1600 count_vm_event(PGMAJFAULT);
1601 }
1602
1603 /* 1625 /*
1604 * Umm, take care of errors if the page isn't up-to-date. 1626 * Umm, take care of errors if the page isn't up-to-date.
1605 * Try to re-read it _once_. We do this synchronously, 1627 * Try to re-read it _once_. We do this synchronously,