diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2009-06-16 18:31:25 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-06-16 22:47:29 -0400 |
commit | ef00e08e26dd5d84271ef706262506b82195e752 (patch) | |
tree | 5f6cf72cf9bf0574ecfbd73f4ee5378d89298dd7 | |
parent | 51daa88ebd8e0d437289f589af29d4b39379ea76 (diff) |
readahead: clean up and simplify the code for filemap page fault readahead
This shouldn't really change behavior all that much, but the single rather
complex function with read-ahead inside a loop etc is broken up into more
manageable pieces.
The behaviour is also less subtle, with the read-ahead being done up-front
rather than inside some subtle loop and thus avoiding the now unnecessary
extra state variables (ie "did_readaround" is gone).
Fengguang: the code split in fact fixed a bug reported by Pavel Levshin:
the PGMAJFAULT accounting used to be bypassed when MADV_RANDOM is set, in
which case the original code will directly jump to no_cached_page reading.
Cc: Pavel Levshin <lpk@581.spb.su>
Cc: <wli@movementarian.org>
Cc: Nick Piggin <npiggin@suse.de>
Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | mm/filemap.c | 156 |
1 files changed, 89 insertions, 67 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index dcef9fd6b92..82753648559 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -1456,6 +1456,68 @@ static int page_cache_read(struct file *file, pgoff_t offset) | |||
1456 | 1456 | ||
1457 | #define MMAP_LOTSAMISS (100) | 1457 | #define MMAP_LOTSAMISS (100) |
1458 | 1458 | ||
1459 | /* | ||
1460 | * Synchronous readahead happens when we don't even find | ||
1461 | * a page in the page cache at all. | ||
1462 | */ | ||
1463 | static void do_sync_mmap_readahead(struct vm_area_struct *vma, | ||
1464 | struct file_ra_state *ra, | ||
1465 | struct file *file, | ||
1466 | pgoff_t offset) | ||
1467 | { | ||
1468 | unsigned long ra_pages; | ||
1469 | struct address_space *mapping = file->f_mapping; | ||
1470 | |||
1471 | /* If we don't want any read-ahead, don't bother */ | ||
1472 | if (VM_RandomReadHint(vma)) | ||
1473 | return; | ||
1474 | |||
1475 | if (VM_SequentialReadHint(vma)) { | ||
1476 | page_cache_sync_readahead(mapping, ra, file, offset, 1); | ||
1477 | return; | ||
1478 | } | ||
1479 | |||
1480 | if (ra->mmap_miss < INT_MAX) | ||
1481 | ra->mmap_miss++; | ||
1482 | |||
1483 | /* | ||
1484 | * Do we miss much more than hit in this file? If so, | ||
1485 | * stop bothering with read-ahead. It will only hurt. | ||
1486 | */ | ||
1487 | if (ra->mmap_miss > MMAP_LOTSAMISS) | ||
1488 | return; | ||
1489 | |||
1490 | ra_pages = max_sane_readahead(ra->ra_pages); | ||
1491 | if (ra_pages) { | ||
1492 | pgoff_t start = 0; | ||
1493 | |||
1494 | if (offset > ra_pages / 2) | ||
1495 | start = offset - ra_pages / 2; | ||
1496 | do_page_cache_readahead(mapping, file, start, ra_pages); | ||
1497 | } | ||
1498 | } | ||
1499 | |||
1500 | /* | ||
1501 | * Asynchronous readahead happens when we find the page and PG_readahead, | ||
1502 | * so we want to possibly extend the readahead further.. | ||
1503 | */ | ||
1504 | static void do_async_mmap_readahead(struct vm_area_struct *vma, | ||
1505 | struct file_ra_state *ra, | ||
1506 | struct file *file, | ||
1507 | struct page *page, | ||
1508 | pgoff_t offset) | ||
1509 | { | ||
1510 | struct address_space *mapping = file->f_mapping; | ||
1511 | |||
1512 | /* If we don't want any read-ahead, don't bother */ | ||
1513 | if (VM_RandomReadHint(vma)) | ||
1514 | return; | ||
1515 | if (ra->mmap_miss > 0) | ||
1516 | ra->mmap_miss--; | ||
1517 | if (PageReadahead(page)) | ||
1518 | page_cache_async_readahead(mapping, ra, file, page, offset, 1); | ||
1519 | } | ||
1520 | |||
1459 | /** | 1521 | /** |
1460 | * filemap_fault - read in file data for page fault handling | 1522 | * filemap_fault - read in file data for page fault handling |
1461 | * @vma: vma in which the fault was taken | 1523 | * @vma: vma in which the fault was taken |
@@ -1475,78 +1537,44 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
1475 | struct address_space *mapping = file->f_mapping; | 1537 | struct address_space *mapping = file->f_mapping; |
1476 | struct file_ra_state *ra = &file->f_ra; | 1538 | struct file_ra_state *ra = &file->f_ra; |
1477 | struct inode *inode = mapping->host; | 1539 | struct inode *inode = mapping->host; |
1540 | pgoff_t offset = vmf->pgoff; | ||
1478 | struct page *page; | 1541 | struct page *page; |
1479 | pgoff_t size; | 1542 | pgoff_t size; |
1480 | int did_readaround = 0; | ||
1481 | int ret = 0; | 1543 | int ret = 0; |
1482 | 1544 | ||
1483 | size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 1545 | size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; |
1484 | if (vmf->pgoff >= size) | 1546 | if (offset >= size) |
1485 | return VM_FAULT_SIGBUS; | 1547 | return VM_FAULT_SIGBUS; |
1486 | 1548 | ||
1487 | /* If we don't want any read-ahead, don't bother */ | ||
1488 | if (VM_RandomReadHint(vma)) | ||
1489 | goto no_cached_page; | ||
1490 | |||
1491 | /* | 1549 | /* |
1492 | * Do we have something in the page cache already? | 1550 | * Do we have something in the page cache already? |
1493 | */ | 1551 | */ |
1494 | retry_find: | 1552 | page = find_get_page(mapping, offset); |
1495 | page = find_lock_page(mapping, vmf->pgoff); | 1553 | if (likely(page)) { |
1496 | /* | ||
1497 | * For sequential accesses, we use the generic readahead logic. | ||
1498 | */ | ||
1499 | if (VM_SequentialReadHint(vma)) { | ||
1500 | if (!page) { | ||
1501 | page_cache_sync_readahead(mapping, ra, file, | ||
1502 | vmf->pgoff, 1); | ||
1503 | page = find_lock_page(mapping, vmf->pgoff); | ||
1504 | if (!page) | ||
1505 | goto no_cached_page; | ||
1506 | } | ||
1507 | if (PageReadahead(page)) { | ||
1508 | page_cache_async_readahead(mapping, ra, file, page, | ||
1509 | vmf->pgoff, 1); | ||
1510 | } | ||
1511 | } | ||
1512 | |||
1513 | if (!page) { | ||
1514 | unsigned long ra_pages; | ||
1515 | |||
1516 | ra->mmap_miss++; | ||
1517 | |||
1518 | /* | 1554 | /* |
1519 | * Do we miss much more than hit in this file? If so, | 1555 | * We found the page, so try async readahead before |
1520 | * stop bothering with read-ahead. It will only hurt. | 1556 | * waiting for the lock. |
1521 | */ | 1557 | */ |
1522 | if (ra->mmap_miss > MMAP_LOTSAMISS) | 1558 | do_async_mmap_readahead(vma, ra, file, page, offset); |
1523 | goto no_cached_page; | 1559 | lock_page(page); |
1524 | 1560 | ||
1525 | /* | 1561 | /* Did it get truncated? */ |
1526 | * To keep the pgmajfault counter straight, we need to | 1562 | if (unlikely(page->mapping != mapping)) { |
1527 | * check did_readaround, as this is an inner loop. | 1563 | unlock_page(page); |
1528 | */ | 1564 | put_page(page); |
1529 | if (!did_readaround) { | 1565 | goto no_cached_page; |
1530 | ret = VM_FAULT_MAJOR; | ||
1531 | count_vm_event(PGMAJFAULT); | ||
1532 | } | ||
1533 | did_readaround = 1; | ||
1534 | ra_pages = max_sane_readahead(file->f_ra.ra_pages); | ||
1535 | if (ra_pages) { | ||
1536 | pgoff_t start = 0; | ||
1537 | |||
1538 | if (vmf->pgoff > ra_pages / 2) | ||
1539 | start = vmf->pgoff - ra_pages / 2; | ||
1540 | do_page_cache_readahead(mapping, file, start, ra_pages); | ||
1541 | } | 1566 | } |
1542 | page = find_lock_page(mapping, vmf->pgoff); | 1567 | } else { |
1568 | /* No page in the page cache at all */ | ||
1569 | do_sync_mmap_readahead(vma, ra, file, offset); | ||
1570 | count_vm_event(PGMAJFAULT); | ||
1571 | ret = VM_FAULT_MAJOR; | ||
1572 | retry_find: | ||
1573 | page = find_lock_page(mapping, offset); | ||
1543 | if (!page) | 1574 | if (!page) |
1544 | goto no_cached_page; | 1575 | goto no_cached_page; |
1545 | } | 1576 | } |
1546 | 1577 | ||
1547 | if (!did_readaround) | ||
1548 | ra->mmap_miss--; | ||
1549 | |||
1550 | /* | 1578 | /* |
1551 | * We have a locked page in the page cache, now we need to check | 1579 | * We have a locked page in the page cache, now we need to check |
1552 | * that it's up-to-date. If not, it is going to be due to an error. | 1580 | * that it's up-to-date. If not, it is going to be due to an error. |
@@ -1554,18 +1582,18 @@ retry_find: | |||
1554 | if (unlikely(!PageUptodate(page))) | 1582 | if (unlikely(!PageUptodate(page))) |
1555 | goto page_not_uptodate; | 1583 | goto page_not_uptodate; |
1556 | 1584 | ||
1557 | /* Must recheck i_size under page lock */ | 1585 | /* |
1586 | * Found the page and have a reference on it. | ||
1587 | * We must recheck i_size under page lock. | ||
1588 | */ | ||
1558 | size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 1589 | size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; |
1559 | if (unlikely(vmf->pgoff >= size)) { | 1590 | if (unlikely(offset >= size)) { |
1560 | unlock_page(page); | 1591 | unlock_page(page); |
1561 | page_cache_release(page); | 1592 | page_cache_release(page); |
1562 | return VM_FAULT_SIGBUS; | 1593 | return VM_FAULT_SIGBUS; |
1563 | } | 1594 | } |
1564 | 1595 | ||
1565 | /* | 1596 | ra->prev_pos = (loff_t)offset << PAGE_CACHE_SHIFT; |
1566 | * Found the page and have a reference on it. | ||
1567 | */ | ||
1568 | ra->prev_pos = (loff_t)page->index << PAGE_CACHE_SHIFT; | ||
1569 | vmf->page = page; | 1597 | vmf->page = page; |
1570 | return ret | VM_FAULT_LOCKED; | 1598 | return ret | VM_FAULT_LOCKED; |
1571 | 1599 | ||
@@ -1574,7 +1602,7 @@ no_cached_page: | |||
1574 | * We're only likely to ever get here if MADV_RANDOM is in | 1602 | * We're only likely to ever get here if MADV_RANDOM is in |
1575 | * effect. | 1603 | * effect. |
1576 | */ | 1604 | */ |
1577 | error = page_cache_read(file, vmf->pgoff); | 1605 | error = page_cache_read(file, offset); |
1578 | 1606 | ||
1579 | /* | 1607 | /* |
1580 | * The page we want has now been added to the page cache. | 1608 | * The page we want has now been added to the page cache. |
@@ -1594,12 +1622,6 @@ no_cached_page: | |||
1594 | return VM_FAULT_SIGBUS; | 1622 | return VM_FAULT_SIGBUS; |
1595 | 1623 | ||
1596 | page_not_uptodate: | 1624 | page_not_uptodate: |
1597 | /* IO error path */ | ||
1598 | if (!did_readaround) { | ||
1599 | ret = VM_FAULT_MAJOR; | ||
1600 | count_vm_event(PGMAJFAULT); | ||
1601 | } | ||
1602 | |||
1603 | /* | 1625 | /* |
1604 | * Umm, take care of errors if the page isn't up-to-date. | 1626 | * Umm, take care of errors if the page isn't up-to-date. |
1605 | * Try to re-read it _once_. We do this synchronously, | 1627 | * Try to re-read it _once_. We do this synchronously, |