diff options
author | Nick Piggin <npiggin@suse.de> | 2007-07-19 04:47:03 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-07-19 13:04:41 -0400 |
commit | d0217ac04ca6591841e5665f518e38064f4e65bd (patch) | |
tree | d3309094bb734d34773f97d642593e298a5cfcfc /mm/filemap.c | |
parent | ed2f2f9b3ff8debdf512f7687b232c3c1d7d60d7 (diff) |
mm: fault feedback #1
Change ->fault prototype. We now return an int, which contains
VM_FAULT_xxx code in the low byte, and FAULT_RET_xxx code in the next byte.
FAULT_RET_ code tells the VM whether a page was found, whether it has been
locked, and potentially other things. This is not quite the way he wanted
it yet, but that's changed in the next patch (which requires changes to
arch code).
This means we no longer set VM_CAN_INVALIDATE in the vma in order to say
that a page is locked which requires filemap_nopage to go away (because we
can no longer remain backward compatible without that flag), but we were
going to do that anyway.
struct fault_data is renamed to struct vm_fault as Linus asked. address
is now a void __user * that we should firmly encourage drivers not to use
without really good reason.
The page is now returned via a page pointer in the vm_fault struct.
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/filemap.c')
-rw-r--r-- | mm/filemap.c | 249 |
1 files changed, 25 insertions, 224 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index 26b992d169e5..0876cc57255f 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -1302,8 +1302,8 @@ static int fastcall page_cache_read(struct file * file, unsigned long offset) | |||
1302 | 1302 | ||
1303 | /** | 1303 | /** |
1304 | * filemap_fault - read in file data for page fault handling | 1304 | * filemap_fault - read in file data for page fault handling |
1305 | * @vma: user vma (not used) | 1305 | * @vma: vma in which the fault was taken |
1306 | * @fdata: the applicable fault_data | 1306 | * @vmf: struct vm_fault containing details of the fault |
1307 | * | 1307 | * |
1308 | * filemap_fault() is invoked via the vma operations vector for a | 1308 | * filemap_fault() is invoked via the vma operations vector for a |
1309 | * mapped memory region to read in file data during a page fault. | 1309 | * mapped memory region to read in file data during a page fault. |
@@ -1312,7 +1312,7 @@ static int fastcall page_cache_read(struct file * file, unsigned long offset) | |||
1312 | * it in the page cache, and handles the special cases reasonably without | 1312 | * it in the page cache, and handles the special cases reasonably without |
1313 | * having a lot of duplicated code. | 1313 | * having a lot of duplicated code. |
1314 | */ | 1314 | */ |
1315 | struct page *filemap_fault(struct vm_area_struct *vma, struct fault_data *fdata) | 1315 | int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) |
1316 | { | 1316 | { |
1317 | int error; | 1317 | int error; |
1318 | struct file *file = vma->vm_file; | 1318 | struct file *file = vma->vm_file; |
@@ -1322,13 +1322,12 @@ struct page *filemap_fault(struct vm_area_struct *vma, struct fault_data *fdata) | |||
1322 | struct page *page; | 1322 | struct page *page; |
1323 | unsigned long size; | 1323 | unsigned long size; |
1324 | int did_readaround = 0; | 1324 | int did_readaround = 0; |
1325 | int ret; | ||
1325 | 1326 | ||
1326 | fdata->type = VM_FAULT_MINOR; | 1327 | ret = VM_FAULT_MINOR; |
1327 | |||
1328 | BUG_ON(!(vma->vm_flags & VM_CAN_INVALIDATE)); | ||
1329 | 1328 | ||
1330 | size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 1329 | size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; |
1331 | if (fdata->pgoff >= size) | 1330 | if (vmf->pgoff >= size) |
1332 | goto outside_data_content; | 1331 | goto outside_data_content; |
1333 | 1332 | ||
1334 | /* If we don't want any read-ahead, don't bother */ | 1333 | /* If we don't want any read-ahead, don't bother */ |
@@ -1342,18 +1341,18 @@ struct page *filemap_fault(struct vm_area_struct *vma, struct fault_data *fdata) | |||
1342 | * For sequential accesses, we use the generic readahead logic. | 1341 | * For sequential accesses, we use the generic readahead logic. |
1343 | */ | 1342 | */ |
1344 | if (VM_SequentialReadHint(vma)) | 1343 | if (VM_SequentialReadHint(vma)) |
1345 | page_cache_readahead(mapping, ra, file, fdata->pgoff, 1); | 1344 | page_cache_readahead(mapping, ra, file, vmf->pgoff, 1); |
1346 | 1345 | ||
1347 | /* | 1346 | /* |
1348 | * Do we have something in the page cache already? | 1347 | * Do we have something in the page cache already? |
1349 | */ | 1348 | */ |
1350 | retry_find: | 1349 | retry_find: |
1351 | page = find_lock_page(mapping, fdata->pgoff); | 1350 | page = find_lock_page(mapping, vmf->pgoff); |
1352 | if (!page) { | 1351 | if (!page) { |
1353 | unsigned long ra_pages; | 1352 | unsigned long ra_pages; |
1354 | 1353 | ||
1355 | if (VM_SequentialReadHint(vma)) { | 1354 | if (VM_SequentialReadHint(vma)) { |
1356 | handle_ra_miss(mapping, ra, fdata->pgoff); | 1355 | handle_ra_miss(mapping, ra, vmf->pgoff); |
1357 | goto no_cached_page; | 1356 | goto no_cached_page; |
1358 | } | 1357 | } |
1359 | ra->mmap_miss++; | 1358 | ra->mmap_miss++; |
@@ -1370,7 +1369,7 @@ retry_find: | |||
1370 | * check did_readaround, as this is an inner loop. | 1369 | * check did_readaround, as this is an inner loop. |
1371 | */ | 1370 | */ |
1372 | if (!did_readaround) { | 1371 | if (!did_readaround) { |
1373 | fdata->type = VM_FAULT_MAJOR; | 1372 | ret = VM_FAULT_MAJOR; |
1374 | count_vm_event(PGMAJFAULT); | 1373 | count_vm_event(PGMAJFAULT); |
1375 | } | 1374 | } |
1376 | did_readaround = 1; | 1375 | did_readaround = 1; |
@@ -1378,11 +1377,11 @@ retry_find: | |||
1378 | if (ra_pages) { | 1377 | if (ra_pages) { |
1379 | pgoff_t start = 0; | 1378 | pgoff_t start = 0; |
1380 | 1379 | ||
1381 | if (fdata->pgoff > ra_pages / 2) | 1380 | if (vmf->pgoff > ra_pages / 2) |
1382 | start = fdata->pgoff - ra_pages / 2; | 1381 | start = vmf->pgoff - ra_pages / 2; |
1383 | do_page_cache_readahead(mapping, file, start, ra_pages); | 1382 | do_page_cache_readahead(mapping, file, start, ra_pages); |
1384 | } | 1383 | } |
1385 | page = find_lock_page(mapping, fdata->pgoff); | 1384 | page = find_lock_page(mapping, vmf->pgoff); |
1386 | if (!page) | 1385 | if (!page) |
1387 | goto no_cached_page; | 1386 | goto no_cached_page; |
1388 | } | 1387 | } |
@@ -1399,7 +1398,7 @@ retry_find: | |||
1399 | 1398 | ||
1400 | /* Must recheck i_size under page lock */ | 1399 | /* Must recheck i_size under page lock */ |
1401 | size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 1400 | size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; |
1402 | if (unlikely(fdata->pgoff >= size)) { | 1401 | if (unlikely(vmf->pgoff >= size)) { |
1403 | unlock_page(page); | 1402 | unlock_page(page); |
1404 | goto outside_data_content; | 1403 | goto outside_data_content; |
1405 | } | 1404 | } |
@@ -1408,24 +1407,24 @@ retry_find: | |||
1408 | * Found the page and have a reference on it. | 1407 | * Found the page and have a reference on it. |
1409 | */ | 1408 | */ |
1410 | mark_page_accessed(page); | 1409 | mark_page_accessed(page); |
1411 | return page; | 1410 | vmf->page = page; |
1411 | return ret | FAULT_RET_LOCKED; | ||
1412 | 1412 | ||
1413 | outside_data_content: | 1413 | outside_data_content: |
1414 | /* | 1414 | /* |
1415 | * An external ptracer can access pages that normally aren't | 1415 | * An external ptracer can access pages that normally aren't |
1416 | * accessible.. | 1416 | * accessible.. |
1417 | */ | 1417 | */ |
1418 | if (vma->vm_mm == current->mm) { | 1418 | if (vma->vm_mm == current->mm) |
1419 | fdata->type = VM_FAULT_SIGBUS; | 1419 | return VM_FAULT_SIGBUS; |
1420 | return NULL; | 1420 | |
1421 | } | ||
1422 | /* Fall through to the non-read-ahead case */ | 1421 | /* Fall through to the non-read-ahead case */ |
1423 | no_cached_page: | 1422 | no_cached_page: |
1424 | /* | 1423 | /* |
1425 | * We're only likely to ever get here if MADV_RANDOM is in | 1424 | * We're only likely to ever get here if MADV_RANDOM is in |
1426 | * effect. | 1425 | * effect. |
1427 | */ | 1426 | */ |
1428 | error = page_cache_read(file, fdata->pgoff); | 1427 | error = page_cache_read(file, vmf->pgoff); |
1429 | 1428 | ||
1430 | /* | 1429 | /* |
1431 | * The page we want has now been added to the page cache. | 1430 | * The page we want has now been added to the page cache. |
@@ -1441,15 +1440,13 @@ no_cached_page: | |||
1441 | * to schedule I/O. | 1440 | * to schedule I/O. |
1442 | */ | 1441 | */ |
1443 | if (error == -ENOMEM) | 1442 | if (error == -ENOMEM) |
1444 | fdata->type = VM_FAULT_OOM; | 1443 | return VM_FAULT_OOM; |
1445 | else | 1444 | return VM_FAULT_SIGBUS; |
1446 | fdata->type = VM_FAULT_SIGBUS; | ||
1447 | return NULL; | ||
1448 | 1445 | ||
1449 | page_not_uptodate: | 1446 | page_not_uptodate: |
1450 | /* IO error path */ | 1447 | /* IO error path */ |
1451 | if (!did_readaround) { | 1448 | if (!did_readaround) { |
1452 | fdata->type = VM_FAULT_MAJOR; | 1449 | ret = VM_FAULT_MAJOR; |
1453 | count_vm_event(PGMAJFAULT); | 1450 | count_vm_event(PGMAJFAULT); |
1454 | } | 1451 | } |
1455 | 1452 | ||
@@ -1468,206 +1465,10 @@ page_not_uptodate: | |||
1468 | 1465 | ||
1469 | /* Things didn't work out. Return zero to tell the mm layer so. */ | 1466 | /* Things didn't work out. Return zero to tell the mm layer so. */ |
1470 | shrink_readahead_size_eio(file, ra); | 1467 | shrink_readahead_size_eio(file, ra); |
1471 | fdata->type = VM_FAULT_SIGBUS; | 1468 | return VM_FAULT_SIGBUS; |
1472 | return NULL; | ||
1473 | } | 1469 | } |
1474 | EXPORT_SYMBOL(filemap_fault); | 1470 | EXPORT_SYMBOL(filemap_fault); |
1475 | 1471 | ||
1476 | /* | ||
1477 | * filemap_nopage and filemap_populate are legacy exports that are not used | ||
1478 | * in tree. Scheduled for removal. | ||
1479 | */ | ||
1480 | struct page *filemap_nopage(struct vm_area_struct *area, | ||
1481 | unsigned long address, int *type) | ||
1482 | { | ||
1483 | struct page *page; | ||
1484 | struct fault_data fdata; | ||
1485 | fdata.address = address; | ||
1486 | fdata.pgoff = ((address - area->vm_start) >> PAGE_CACHE_SHIFT) | ||
1487 | + area->vm_pgoff; | ||
1488 | fdata.flags = 0; | ||
1489 | |||
1490 | page = filemap_fault(area, &fdata); | ||
1491 | if (type) | ||
1492 | *type = fdata.type; | ||
1493 | |||
1494 | return page; | ||
1495 | } | ||
1496 | EXPORT_SYMBOL(filemap_nopage); | ||
1497 | |||
1498 | static struct page * filemap_getpage(struct file *file, unsigned long pgoff, | ||
1499 | int nonblock) | ||
1500 | { | ||
1501 | struct address_space *mapping = file->f_mapping; | ||
1502 | struct page *page; | ||
1503 | int error; | ||
1504 | |||
1505 | /* | ||
1506 | * Do we have something in the page cache already? | ||
1507 | */ | ||
1508 | retry_find: | ||
1509 | page = find_get_page(mapping, pgoff); | ||
1510 | if (!page) { | ||
1511 | if (nonblock) | ||
1512 | return NULL; | ||
1513 | goto no_cached_page; | ||
1514 | } | ||
1515 | |||
1516 | /* | ||
1517 | * Ok, found a page in the page cache, now we need to check | ||
1518 | * that it's up-to-date. | ||
1519 | */ | ||
1520 | if (!PageUptodate(page)) { | ||
1521 | if (nonblock) { | ||
1522 | page_cache_release(page); | ||
1523 | return NULL; | ||
1524 | } | ||
1525 | goto page_not_uptodate; | ||
1526 | } | ||
1527 | |||
1528 | success: | ||
1529 | /* | ||
1530 | * Found the page and have a reference on it. | ||
1531 | */ | ||
1532 | mark_page_accessed(page); | ||
1533 | return page; | ||
1534 | |||
1535 | no_cached_page: | ||
1536 | error = page_cache_read(file, pgoff); | ||
1537 | |||
1538 | /* | ||
1539 | * The page we want has now been added to the page cache. | ||
1540 | * In the unlikely event that someone removed it in the | ||
1541 | * meantime, we'll just come back here and read it again. | ||
1542 | */ | ||
1543 | if (error >= 0) | ||
1544 | goto retry_find; | ||
1545 | |||
1546 | /* | ||
1547 | * An error return from page_cache_read can result if the | ||
1548 | * system is low on memory, or a problem occurs while trying | ||
1549 | * to schedule I/O. | ||
1550 | */ | ||
1551 | return NULL; | ||
1552 | |||
1553 | page_not_uptodate: | ||
1554 | lock_page(page); | ||
1555 | |||
1556 | /* Did it get truncated while we waited for it? */ | ||
1557 | if (!page->mapping) { | ||
1558 | unlock_page(page); | ||
1559 | goto err; | ||
1560 | } | ||
1561 | |||
1562 | /* Did somebody else get it up-to-date? */ | ||
1563 | if (PageUptodate(page)) { | ||
1564 | unlock_page(page); | ||
1565 | goto success; | ||
1566 | } | ||
1567 | |||
1568 | error = mapping->a_ops->readpage(file, page); | ||
1569 | if (!error) { | ||
1570 | wait_on_page_locked(page); | ||
1571 | if (PageUptodate(page)) | ||
1572 | goto success; | ||
1573 | } else if (error == AOP_TRUNCATED_PAGE) { | ||
1574 | page_cache_release(page); | ||
1575 | goto retry_find; | ||
1576 | } | ||
1577 | |||
1578 | /* | ||
1579 | * Umm, take care of errors if the page isn't up-to-date. | ||
1580 | * Try to re-read it _once_. We do this synchronously, | ||
1581 | * because there really aren't any performance issues here | ||
1582 | * and we need to check for errors. | ||
1583 | */ | ||
1584 | lock_page(page); | ||
1585 | |||
1586 | /* Somebody truncated the page on us? */ | ||
1587 | if (!page->mapping) { | ||
1588 | unlock_page(page); | ||
1589 | goto err; | ||
1590 | } | ||
1591 | /* Somebody else successfully read it in? */ | ||
1592 | if (PageUptodate(page)) { | ||
1593 | unlock_page(page); | ||
1594 | goto success; | ||
1595 | } | ||
1596 | |||
1597 | ClearPageError(page); | ||
1598 | error = mapping->a_ops->readpage(file, page); | ||
1599 | if (!error) { | ||
1600 | wait_on_page_locked(page); | ||
1601 | if (PageUptodate(page)) | ||
1602 | goto success; | ||
1603 | } else if (error == AOP_TRUNCATED_PAGE) { | ||
1604 | page_cache_release(page); | ||
1605 | goto retry_find; | ||
1606 | } | ||
1607 | |||
1608 | /* | ||
1609 | * Things didn't work out. Return zero to tell the | ||
1610 | * mm layer so, possibly freeing the page cache page first. | ||
1611 | */ | ||
1612 | err: | ||
1613 | page_cache_release(page); | ||
1614 | |||
1615 | return NULL; | ||
1616 | } | ||
1617 | |||
1618 | int filemap_populate(struct vm_area_struct *vma, unsigned long addr, | ||
1619 | unsigned long len, pgprot_t prot, unsigned long pgoff, | ||
1620 | int nonblock) | ||
1621 | { | ||
1622 | struct file *file = vma->vm_file; | ||
1623 | struct address_space *mapping = file->f_mapping; | ||
1624 | struct inode *inode = mapping->host; | ||
1625 | unsigned long size; | ||
1626 | struct mm_struct *mm = vma->vm_mm; | ||
1627 | struct page *page; | ||
1628 | int err; | ||
1629 | |||
1630 | if (!nonblock) | ||
1631 | force_page_cache_readahead(mapping, vma->vm_file, | ||
1632 | pgoff, len >> PAGE_CACHE_SHIFT); | ||
1633 | |||
1634 | repeat: | ||
1635 | size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | ||
1636 | if (pgoff + (len >> PAGE_CACHE_SHIFT) > size) | ||
1637 | return -EINVAL; | ||
1638 | |||
1639 | page = filemap_getpage(file, pgoff, nonblock); | ||
1640 | |||
1641 | /* XXX: This is wrong, a filesystem I/O error may have happened. Fix that as | ||
1642 | * done in shmem_populate calling shmem_getpage */ | ||
1643 | if (!page && !nonblock) | ||
1644 | return -ENOMEM; | ||
1645 | |||
1646 | if (page) { | ||
1647 | err = install_page(mm, vma, addr, page, prot); | ||
1648 | if (err) { | ||
1649 | page_cache_release(page); | ||
1650 | return err; | ||
1651 | } | ||
1652 | } else if (vma->vm_flags & VM_NONLINEAR) { | ||
1653 | /* No page was found just because we can't read it in now (being | ||
1654 | * here implies nonblock != 0), but the page may exist, so set | ||
1655 | * the PTE to fault it in later. */ | ||
1656 | err = install_file_pte(mm, vma, addr, pgoff, prot); | ||
1657 | if (err) | ||
1658 | return err; | ||
1659 | } | ||
1660 | |||
1661 | len -= PAGE_SIZE; | ||
1662 | addr += PAGE_SIZE; | ||
1663 | pgoff++; | ||
1664 | if (len) | ||
1665 | goto repeat; | ||
1666 | |||
1667 | return 0; | ||
1668 | } | ||
1669 | EXPORT_SYMBOL(filemap_populate); | ||
1670 | |||
1671 | struct vm_operations_struct generic_file_vm_ops = { | 1472 | struct vm_operations_struct generic_file_vm_ops = { |
1672 | .fault = filemap_fault, | 1473 | .fault = filemap_fault, |
1673 | }; | 1474 | }; |
@@ -1682,7 +1483,7 @@ int generic_file_mmap(struct file * file, struct vm_area_struct * vma) | |||
1682 | return -ENOEXEC; | 1483 | return -ENOEXEC; |
1683 | file_accessed(file); | 1484 | file_accessed(file); |
1684 | vma->vm_ops = &generic_file_vm_ops; | 1485 | vma->vm_ops = &generic_file_vm_ops; |
1685 | vma->vm_flags |= VM_CAN_INVALIDATE | VM_CAN_NONLINEAR; | 1486 | vma->vm_flags |= VM_CAN_NONLINEAR; |
1686 | return 0; | 1487 | return 0; |
1687 | } | 1488 | } |
1688 | 1489 | ||