diff options
Diffstat (limited to 'mm/filemap.c')
-rw-r--r-- | mm/filemap.c | 169 |
1 files changed, 96 insertions, 73 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index 1b60f30cebfa..22396713feb9 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -521,7 +521,7 @@ struct page *__page_cache_alloc(gfp_t gfp) | |||
521 | { | 521 | { |
522 | if (cpuset_do_page_mem_spread()) { | 522 | if (cpuset_do_page_mem_spread()) { |
523 | int n = cpuset_mem_spread_node(); | 523 | int n = cpuset_mem_spread_node(); |
524 | return alloc_pages_node(n, gfp, 0); | 524 | return alloc_pages_exact_node(n, gfp, 0); |
525 | } | 525 | } |
526 | return alloc_pages(gfp, 0); | 526 | return alloc_pages(gfp, 0); |
527 | } | 527 | } |
@@ -1004,9 +1004,6 @@ EXPORT_SYMBOL(grab_cache_page_nowait); | |||
1004 | static void shrink_readahead_size_eio(struct file *filp, | 1004 | static void shrink_readahead_size_eio(struct file *filp, |
1005 | struct file_ra_state *ra) | 1005 | struct file_ra_state *ra) |
1006 | { | 1006 | { |
1007 | if (!ra->ra_pages) | ||
1008 | return; | ||
1009 | |||
1010 | ra->ra_pages /= 4; | 1007 | ra->ra_pages /= 4; |
1011 | } | 1008 | } |
1012 | 1009 | ||
@@ -1390,8 +1387,7 @@ do_readahead(struct address_space *mapping, struct file *filp, | |||
1390 | if (!mapping || !mapping->a_ops || !mapping->a_ops->readpage) | 1387 | if (!mapping || !mapping->a_ops || !mapping->a_ops->readpage) |
1391 | return -EINVAL; | 1388 | return -EINVAL; |
1392 | 1389 | ||
1393 | force_page_cache_readahead(mapping, filp, index, | 1390 | force_page_cache_readahead(mapping, filp, index, nr); |
1394 | max_sane_readahead(nr)); | ||
1395 | return 0; | 1391 | return 0; |
1396 | } | 1392 | } |
1397 | 1393 | ||
@@ -1457,6 +1453,73 @@ static int page_cache_read(struct file *file, pgoff_t offset) | |||
1457 | 1453 | ||
1458 | #define MMAP_LOTSAMISS (100) | 1454 | #define MMAP_LOTSAMISS (100) |
1459 | 1455 | ||
1456 | /* | ||
1457 | * Synchronous readahead happens when we don't even find | ||
1458 | * a page in the page cache at all. | ||
1459 | */ | ||
1460 | static void do_sync_mmap_readahead(struct vm_area_struct *vma, | ||
1461 | struct file_ra_state *ra, | ||
1462 | struct file *file, | ||
1463 | pgoff_t offset) | ||
1464 | { | ||
1465 | unsigned long ra_pages; | ||
1466 | struct address_space *mapping = file->f_mapping; | ||
1467 | |||
1468 | /* If we don't want any read-ahead, don't bother */ | ||
1469 | if (VM_RandomReadHint(vma)) | ||
1470 | return; | ||
1471 | |||
1472 | if (VM_SequentialReadHint(vma) || | ||
1473 | offset - 1 == (ra->prev_pos >> PAGE_CACHE_SHIFT)) { | ||
1474 | page_cache_sync_readahead(mapping, ra, file, offset, | ||
1475 | ra->ra_pages); | ||
1476 | return; | ||
1477 | } | ||
1478 | |||
1479 | if (ra->mmap_miss < INT_MAX) | ||
1480 | ra->mmap_miss++; | ||
1481 | |||
1482 | /* | ||
1483 | * Do we miss much more than hit in this file? If so, | ||
1484 | * stop bothering with read-ahead. It will only hurt. | ||
1485 | */ | ||
1486 | if (ra->mmap_miss > MMAP_LOTSAMISS) | ||
1487 | return; | ||
1488 | |||
1489 | /* | ||
1490 | * mmap read-around | ||
1491 | */ | ||
1492 | ra_pages = max_sane_readahead(ra->ra_pages); | ||
1493 | if (ra_pages) { | ||
1494 | ra->start = max_t(long, 0, offset - ra_pages/2); | ||
1495 | ra->size = ra_pages; | ||
1496 | ra->async_size = 0; | ||
1497 | ra_submit(ra, mapping, file); | ||
1498 | } | ||
1499 | } | ||
1500 | |||
1501 | /* | ||
1502 | * Asynchronous readahead happens when we find the page and PG_readahead, | ||
1503 | * so we want to possibly extend the readahead further.. | ||
1504 | */ | ||
1505 | static void do_async_mmap_readahead(struct vm_area_struct *vma, | ||
1506 | struct file_ra_state *ra, | ||
1507 | struct file *file, | ||
1508 | struct page *page, | ||
1509 | pgoff_t offset) | ||
1510 | { | ||
1511 | struct address_space *mapping = file->f_mapping; | ||
1512 | |||
1513 | /* If we don't want any read-ahead, don't bother */ | ||
1514 | if (VM_RandomReadHint(vma)) | ||
1515 | return; | ||
1516 | if (ra->mmap_miss > 0) | ||
1517 | ra->mmap_miss--; | ||
1518 | if (PageReadahead(page)) | ||
1519 | page_cache_async_readahead(mapping, ra, file, | ||
1520 | page, offset, ra->ra_pages); | ||
1521 | } | ||
1522 | |||
1460 | /** | 1523 | /** |
1461 | * filemap_fault - read in file data for page fault handling | 1524 | * filemap_fault - read in file data for page fault handling |
1462 | * @vma: vma in which the fault was taken | 1525 | * @vma: vma in which the fault was taken |
@@ -1476,78 +1539,44 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
1476 | struct address_space *mapping = file->f_mapping; | 1539 | struct address_space *mapping = file->f_mapping; |
1477 | struct file_ra_state *ra = &file->f_ra; | 1540 | struct file_ra_state *ra = &file->f_ra; |
1478 | struct inode *inode = mapping->host; | 1541 | struct inode *inode = mapping->host; |
1542 | pgoff_t offset = vmf->pgoff; | ||
1479 | struct page *page; | 1543 | struct page *page; |
1480 | pgoff_t size; | 1544 | pgoff_t size; |
1481 | int did_readaround = 0; | ||
1482 | int ret = 0; | 1545 | int ret = 0; |
1483 | 1546 | ||
1484 | size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 1547 | size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; |
1485 | if (vmf->pgoff >= size) | 1548 | if (offset >= size) |
1486 | return VM_FAULT_SIGBUS; | 1549 | return VM_FAULT_SIGBUS; |
1487 | 1550 | ||
1488 | /* If we don't want any read-ahead, don't bother */ | ||
1489 | if (VM_RandomReadHint(vma)) | ||
1490 | goto no_cached_page; | ||
1491 | |||
1492 | /* | 1551 | /* |
1493 | * Do we have something in the page cache already? | 1552 | * Do we have something in the page cache already? |
1494 | */ | 1553 | */ |
1495 | retry_find: | 1554 | page = find_get_page(mapping, offset); |
1496 | page = find_lock_page(mapping, vmf->pgoff); | 1555 | if (likely(page)) { |
1497 | /* | ||
1498 | * For sequential accesses, we use the generic readahead logic. | ||
1499 | */ | ||
1500 | if (VM_SequentialReadHint(vma)) { | ||
1501 | if (!page) { | ||
1502 | page_cache_sync_readahead(mapping, ra, file, | ||
1503 | vmf->pgoff, 1); | ||
1504 | page = find_lock_page(mapping, vmf->pgoff); | ||
1505 | if (!page) | ||
1506 | goto no_cached_page; | ||
1507 | } | ||
1508 | if (PageReadahead(page)) { | ||
1509 | page_cache_async_readahead(mapping, ra, file, page, | ||
1510 | vmf->pgoff, 1); | ||
1511 | } | ||
1512 | } | ||
1513 | |||
1514 | if (!page) { | ||
1515 | unsigned long ra_pages; | ||
1516 | |||
1517 | ra->mmap_miss++; | ||
1518 | |||
1519 | /* | 1556 | /* |
1520 | * Do we miss much more than hit in this file? If so, | 1557 | * We found the page, so try async readahead before |
1521 | * stop bothering with read-ahead. It will only hurt. | 1558 | * waiting for the lock. |
1522 | */ | 1559 | */ |
1523 | if (ra->mmap_miss > MMAP_LOTSAMISS) | 1560 | do_async_mmap_readahead(vma, ra, file, page, offset); |
1524 | goto no_cached_page; | 1561 | lock_page(page); |
1525 | 1562 | ||
1526 | /* | 1563 | /* Did it get truncated? */ |
1527 | * To keep the pgmajfault counter straight, we need to | 1564 | if (unlikely(page->mapping != mapping)) { |
1528 | * check did_readaround, as this is an inner loop. | 1565 | unlock_page(page); |
1529 | */ | 1566 | put_page(page); |
1530 | if (!did_readaround) { | 1567 | goto no_cached_page; |
1531 | ret = VM_FAULT_MAJOR; | ||
1532 | count_vm_event(PGMAJFAULT); | ||
1533 | } | ||
1534 | did_readaround = 1; | ||
1535 | ra_pages = max_sane_readahead(file->f_ra.ra_pages); | ||
1536 | if (ra_pages) { | ||
1537 | pgoff_t start = 0; | ||
1538 | |||
1539 | if (vmf->pgoff > ra_pages / 2) | ||
1540 | start = vmf->pgoff - ra_pages / 2; | ||
1541 | do_page_cache_readahead(mapping, file, start, ra_pages); | ||
1542 | } | 1568 | } |
1543 | page = find_lock_page(mapping, vmf->pgoff); | 1569 | } else { |
1570 | /* No page in the page cache at all */ | ||
1571 | do_sync_mmap_readahead(vma, ra, file, offset); | ||
1572 | count_vm_event(PGMAJFAULT); | ||
1573 | ret = VM_FAULT_MAJOR; | ||
1574 | retry_find: | ||
1575 | page = find_lock_page(mapping, offset); | ||
1544 | if (!page) | 1576 | if (!page) |
1545 | goto no_cached_page; | 1577 | goto no_cached_page; |
1546 | } | 1578 | } |
1547 | 1579 | ||
1548 | if (!did_readaround) | ||
1549 | ra->mmap_miss--; | ||
1550 | |||
1551 | /* | 1580 | /* |
1552 | * We have a locked page in the page cache, now we need to check | 1581 | * We have a locked page in the page cache, now we need to check |
1553 | * that it's up-to-date. If not, it is going to be due to an error. | 1582 | * that it's up-to-date. If not, it is going to be due to an error. |
@@ -1555,18 +1584,18 @@ retry_find: | |||
1555 | if (unlikely(!PageUptodate(page))) | 1584 | if (unlikely(!PageUptodate(page))) |
1556 | goto page_not_uptodate; | 1585 | goto page_not_uptodate; |
1557 | 1586 | ||
1558 | /* Must recheck i_size under page lock */ | 1587 | /* |
1588 | * Found the page and have a reference on it. | ||
1589 | * We must recheck i_size under page lock. | ||
1590 | */ | ||
1559 | size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 1591 | size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; |
1560 | if (unlikely(vmf->pgoff >= size)) { | 1592 | if (unlikely(offset >= size)) { |
1561 | unlock_page(page); | 1593 | unlock_page(page); |
1562 | page_cache_release(page); | 1594 | page_cache_release(page); |
1563 | return VM_FAULT_SIGBUS; | 1595 | return VM_FAULT_SIGBUS; |
1564 | } | 1596 | } |
1565 | 1597 | ||
1566 | /* | 1598 | ra->prev_pos = (loff_t)offset << PAGE_CACHE_SHIFT; |
1567 | * Found the page and have a reference on it. | ||
1568 | */ | ||
1569 | ra->prev_pos = (loff_t)page->index << PAGE_CACHE_SHIFT; | ||
1570 | vmf->page = page; | 1599 | vmf->page = page; |
1571 | return ret | VM_FAULT_LOCKED; | 1600 | return ret | VM_FAULT_LOCKED; |
1572 | 1601 | ||
@@ -1575,7 +1604,7 @@ no_cached_page: | |||
1575 | * We're only likely to ever get here if MADV_RANDOM is in | 1604 | * We're only likely to ever get here if MADV_RANDOM is in |
1576 | * effect. | 1605 | * effect. |
1577 | */ | 1606 | */ |
1578 | error = page_cache_read(file, vmf->pgoff); | 1607 | error = page_cache_read(file, offset); |
1579 | 1608 | ||
1580 | /* | 1609 | /* |
1581 | * The page we want has now been added to the page cache. | 1610 | * The page we want has now been added to the page cache. |
@@ -1595,12 +1624,6 @@ no_cached_page: | |||
1595 | return VM_FAULT_SIGBUS; | 1624 | return VM_FAULT_SIGBUS; |
1596 | 1625 | ||
1597 | page_not_uptodate: | 1626 | page_not_uptodate: |
1598 | /* IO error path */ | ||
1599 | if (!did_readaround) { | ||
1600 | ret = VM_FAULT_MAJOR; | ||
1601 | count_vm_event(PGMAJFAULT); | ||
1602 | } | ||
1603 | |||
1604 | /* | 1627 | /* |
1605 | * Umm, take care of errors if the page isn't up-to-date. | 1628 | * Umm, take care of errors if the page isn't up-to-date. |
1606 | * Try to re-read it _once_. We do this synchronously, | 1629 | * Try to re-read it _once_. We do this synchronously, |