aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorNick Piggin <npiggin@suse.de>2007-07-19 04:47:03 -0400
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-07-19 13:04:41 -0400
commitd0217ac04ca6591841e5665f518e38064f4e65bd (patch)
treed3309094bb734d34773f97d642593e298a5cfcfc /mm
parented2f2f9b3ff8debdf512f7687b232c3c1d7d60d7 (diff)
mm: fault feedback #1
Change ->fault prototype. We now return an int, which contains VM_FAULT_xxx code in the low byte, and FAULT_RET_xxx code in the next byte. FAULT_RET_ code tells the VM whether a page was found, whether it has been locked, and potentially other things. This is not quite the way he wanted it yet, but that's changed in the next patch (which requires changes to arch code). This means we no longer set VM_CAN_INVALIDATE in the vma in order to say that a page is locked which requires filemap_nopage to go away (because we can no longer remain backward compatible without that flag), but we were going to do that anyway. struct fault_data is renamed to struct vm_fault as Linus asked. address is now a void __user * that we should firmly encourage drivers not to use without really good reason. The page is now returned via a page pointer in the vm_fault struct. Signed-off-by: Nick Piggin <npiggin@suse.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/filemap.c249
-rw-r--r--mm/filemap_xip.c37
-rw-r--r--mm/fremap.c85
-rw-r--r--mm/hugetlb.c7
-rw-r--r--mm/memory.c109
-rw-r--r--mm/nommu.c4
-rw-r--r--mm/shmem.c29
7 files changed, 122 insertions, 398 deletions
diff --git a/mm/filemap.c b/mm/filemap.c
index 26b992d169e5..0876cc57255f 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1302,8 +1302,8 @@ static int fastcall page_cache_read(struct file * file, unsigned long offset)
1302 1302
1303/** 1303/**
1304 * filemap_fault - read in file data for page fault handling 1304 * filemap_fault - read in file data for page fault handling
1305 * @vma: user vma (not used) 1305 * @vma: vma in which the fault was taken
1306 * @fdata: the applicable fault_data 1306 * @vmf: struct vm_fault containing details of the fault
1307 * 1307 *
1308 * filemap_fault() is invoked via the vma operations vector for a 1308 * filemap_fault() is invoked via the vma operations vector for a
1309 * mapped memory region to read in file data during a page fault. 1309 * mapped memory region to read in file data during a page fault.
@@ -1312,7 +1312,7 @@ static int fastcall page_cache_read(struct file * file, unsigned long offset)
1312 * it in the page cache, and handles the special cases reasonably without 1312 * it in the page cache, and handles the special cases reasonably without
1313 * having a lot of duplicated code. 1313 * having a lot of duplicated code.
1314 */ 1314 */
1315struct page *filemap_fault(struct vm_area_struct *vma, struct fault_data *fdata) 1315int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1316{ 1316{
1317 int error; 1317 int error;
1318 struct file *file = vma->vm_file; 1318 struct file *file = vma->vm_file;
@@ -1322,13 +1322,12 @@ struct page *filemap_fault(struct vm_area_struct *vma, struct fault_data *fdata)
1322 struct page *page; 1322 struct page *page;
1323 unsigned long size; 1323 unsigned long size;
1324 int did_readaround = 0; 1324 int did_readaround = 0;
1325 int ret;
1325 1326
1326 fdata->type = VM_FAULT_MINOR; 1327 ret = VM_FAULT_MINOR;
1327
1328 BUG_ON(!(vma->vm_flags & VM_CAN_INVALIDATE));
1329 1328
1330 size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 1329 size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
1331 if (fdata->pgoff >= size) 1330 if (vmf->pgoff >= size)
1332 goto outside_data_content; 1331 goto outside_data_content;
1333 1332
1334 /* If we don't want any read-ahead, don't bother */ 1333 /* If we don't want any read-ahead, don't bother */
@@ -1342,18 +1341,18 @@ struct page *filemap_fault(struct vm_area_struct *vma, struct fault_data *fdata)
1342 * For sequential accesses, we use the generic readahead logic. 1341 * For sequential accesses, we use the generic readahead logic.
1343 */ 1342 */
1344 if (VM_SequentialReadHint(vma)) 1343 if (VM_SequentialReadHint(vma))
1345 page_cache_readahead(mapping, ra, file, fdata->pgoff, 1); 1344 page_cache_readahead(mapping, ra, file, vmf->pgoff, 1);
1346 1345
1347 /* 1346 /*
1348 * Do we have something in the page cache already? 1347 * Do we have something in the page cache already?
1349 */ 1348 */
1350retry_find: 1349retry_find:
1351 page = find_lock_page(mapping, fdata->pgoff); 1350 page = find_lock_page(mapping, vmf->pgoff);
1352 if (!page) { 1351 if (!page) {
1353 unsigned long ra_pages; 1352 unsigned long ra_pages;
1354 1353
1355 if (VM_SequentialReadHint(vma)) { 1354 if (VM_SequentialReadHint(vma)) {
1356 handle_ra_miss(mapping, ra, fdata->pgoff); 1355 handle_ra_miss(mapping, ra, vmf->pgoff);
1357 goto no_cached_page; 1356 goto no_cached_page;
1358 } 1357 }
1359 ra->mmap_miss++; 1358 ra->mmap_miss++;
@@ -1370,7 +1369,7 @@ retry_find:
1370 * check did_readaround, as this is an inner loop. 1369 * check did_readaround, as this is an inner loop.
1371 */ 1370 */
1372 if (!did_readaround) { 1371 if (!did_readaround) {
1373 fdata->type = VM_FAULT_MAJOR; 1372 ret = VM_FAULT_MAJOR;
1374 count_vm_event(PGMAJFAULT); 1373 count_vm_event(PGMAJFAULT);
1375 } 1374 }
1376 did_readaround = 1; 1375 did_readaround = 1;
@@ -1378,11 +1377,11 @@ retry_find:
1378 if (ra_pages) { 1377 if (ra_pages) {
1379 pgoff_t start = 0; 1378 pgoff_t start = 0;
1380 1379
1381 if (fdata->pgoff > ra_pages / 2) 1380 if (vmf->pgoff > ra_pages / 2)
1382 start = fdata->pgoff - ra_pages / 2; 1381 start = vmf->pgoff - ra_pages / 2;
1383 do_page_cache_readahead(mapping, file, start, ra_pages); 1382 do_page_cache_readahead(mapping, file, start, ra_pages);
1384 } 1383 }
1385 page = find_lock_page(mapping, fdata->pgoff); 1384 page = find_lock_page(mapping, vmf->pgoff);
1386 if (!page) 1385 if (!page)
1387 goto no_cached_page; 1386 goto no_cached_page;
1388 } 1387 }
@@ -1399,7 +1398,7 @@ retry_find:
1399 1398
1400 /* Must recheck i_size under page lock */ 1399 /* Must recheck i_size under page lock */
1401 size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 1400 size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
1402 if (unlikely(fdata->pgoff >= size)) { 1401 if (unlikely(vmf->pgoff >= size)) {
1403 unlock_page(page); 1402 unlock_page(page);
1404 goto outside_data_content; 1403 goto outside_data_content;
1405 } 1404 }
@@ -1408,24 +1407,24 @@ retry_find:
1408 * Found the page and have a reference on it. 1407 * Found the page and have a reference on it.
1409 */ 1408 */
1410 mark_page_accessed(page); 1409 mark_page_accessed(page);
1411 return page; 1410 vmf->page = page;
1411 return ret | FAULT_RET_LOCKED;
1412 1412
1413outside_data_content: 1413outside_data_content:
1414 /* 1414 /*
1415 * An external ptracer can access pages that normally aren't 1415 * An external ptracer can access pages that normally aren't
1416 * accessible.. 1416 * accessible..
1417 */ 1417 */
1418 if (vma->vm_mm == current->mm) { 1418 if (vma->vm_mm == current->mm)
1419 fdata->type = VM_FAULT_SIGBUS; 1419 return VM_FAULT_SIGBUS;
1420 return NULL; 1420
1421 }
1422 /* Fall through to the non-read-ahead case */ 1421 /* Fall through to the non-read-ahead case */
1423no_cached_page: 1422no_cached_page:
1424 /* 1423 /*
1425 * We're only likely to ever get here if MADV_RANDOM is in 1424 * We're only likely to ever get here if MADV_RANDOM is in
1426 * effect. 1425 * effect.
1427 */ 1426 */
1428 error = page_cache_read(file, fdata->pgoff); 1427 error = page_cache_read(file, vmf->pgoff);
1429 1428
1430 /* 1429 /*
1431 * The page we want has now been added to the page cache. 1430 * The page we want has now been added to the page cache.
@@ -1441,15 +1440,13 @@ no_cached_page:
1441 * to schedule I/O. 1440 * to schedule I/O.
1442 */ 1441 */
1443 if (error == -ENOMEM) 1442 if (error == -ENOMEM)
1444 fdata->type = VM_FAULT_OOM; 1443 return VM_FAULT_OOM;
1445 else 1444 return VM_FAULT_SIGBUS;
1446 fdata->type = VM_FAULT_SIGBUS;
1447 return NULL;
1448 1445
1449page_not_uptodate: 1446page_not_uptodate:
1450 /* IO error path */ 1447 /* IO error path */
1451 if (!did_readaround) { 1448 if (!did_readaround) {
1452 fdata->type = VM_FAULT_MAJOR; 1449 ret = VM_FAULT_MAJOR;
1453 count_vm_event(PGMAJFAULT); 1450 count_vm_event(PGMAJFAULT);
1454 } 1451 }
1455 1452
@@ -1468,206 +1465,10 @@ page_not_uptodate:
1468 1465
1469 /* Things didn't work out. Return zero to tell the mm layer so. */ 1466 /* Things didn't work out. Return zero to tell the mm layer so. */
1470 shrink_readahead_size_eio(file, ra); 1467 shrink_readahead_size_eio(file, ra);
1471 fdata->type = VM_FAULT_SIGBUS; 1468 return VM_FAULT_SIGBUS;
1472 return NULL;
1473} 1469}
1474EXPORT_SYMBOL(filemap_fault); 1470EXPORT_SYMBOL(filemap_fault);
1475 1471
1476/*
1477 * filemap_nopage and filemap_populate are legacy exports that are not used
1478 * in tree. Scheduled for removal.
1479 */
1480struct page *filemap_nopage(struct vm_area_struct *area,
1481 unsigned long address, int *type)
1482{
1483 struct page *page;
1484 struct fault_data fdata;
1485 fdata.address = address;
1486 fdata.pgoff = ((address - area->vm_start) >> PAGE_CACHE_SHIFT)
1487 + area->vm_pgoff;
1488 fdata.flags = 0;
1489
1490 page = filemap_fault(area, &fdata);
1491 if (type)
1492 *type = fdata.type;
1493
1494 return page;
1495}
1496EXPORT_SYMBOL(filemap_nopage);
1497
1498static struct page * filemap_getpage(struct file *file, unsigned long pgoff,
1499 int nonblock)
1500{
1501 struct address_space *mapping = file->f_mapping;
1502 struct page *page;
1503 int error;
1504
1505 /*
1506 * Do we have something in the page cache already?
1507 */
1508retry_find:
1509 page = find_get_page(mapping, pgoff);
1510 if (!page) {
1511 if (nonblock)
1512 return NULL;
1513 goto no_cached_page;
1514 }
1515
1516 /*
1517 * Ok, found a page in the page cache, now we need to check
1518 * that it's up-to-date.
1519 */
1520 if (!PageUptodate(page)) {
1521 if (nonblock) {
1522 page_cache_release(page);
1523 return NULL;
1524 }
1525 goto page_not_uptodate;
1526 }
1527
1528success:
1529 /*
1530 * Found the page and have a reference on it.
1531 */
1532 mark_page_accessed(page);
1533 return page;
1534
1535no_cached_page:
1536 error = page_cache_read(file, pgoff);
1537
1538 /*
1539 * The page we want has now been added to the page cache.
1540 * In the unlikely event that someone removed it in the
1541 * meantime, we'll just come back here and read it again.
1542 */
1543 if (error >= 0)
1544 goto retry_find;
1545
1546 /*
1547 * An error return from page_cache_read can result if the
1548 * system is low on memory, or a problem occurs while trying
1549 * to schedule I/O.
1550 */
1551 return NULL;
1552
1553page_not_uptodate:
1554 lock_page(page);
1555
1556 /* Did it get truncated while we waited for it? */
1557 if (!page->mapping) {
1558 unlock_page(page);
1559 goto err;
1560 }
1561
1562 /* Did somebody else get it up-to-date? */
1563 if (PageUptodate(page)) {
1564 unlock_page(page);
1565 goto success;
1566 }
1567
1568 error = mapping->a_ops->readpage(file, page);
1569 if (!error) {
1570 wait_on_page_locked(page);
1571 if (PageUptodate(page))
1572 goto success;
1573 } else if (error == AOP_TRUNCATED_PAGE) {
1574 page_cache_release(page);
1575 goto retry_find;
1576 }
1577
1578 /*
1579 * Umm, take care of errors if the page isn't up-to-date.
1580 * Try to re-read it _once_. We do this synchronously,
1581 * because there really aren't any performance issues here
1582 * and we need to check for errors.
1583 */
1584 lock_page(page);
1585
1586 /* Somebody truncated the page on us? */
1587 if (!page->mapping) {
1588 unlock_page(page);
1589 goto err;
1590 }
1591 /* Somebody else successfully read it in? */
1592 if (PageUptodate(page)) {
1593 unlock_page(page);
1594 goto success;
1595 }
1596
1597 ClearPageError(page);
1598 error = mapping->a_ops->readpage(file, page);
1599 if (!error) {
1600 wait_on_page_locked(page);
1601 if (PageUptodate(page))
1602 goto success;
1603 } else if (error == AOP_TRUNCATED_PAGE) {
1604 page_cache_release(page);
1605 goto retry_find;
1606 }
1607
1608 /*
1609 * Things didn't work out. Return zero to tell the
1610 * mm layer so, possibly freeing the page cache page first.
1611 */
1612err:
1613 page_cache_release(page);
1614
1615 return NULL;
1616}
1617
1618int filemap_populate(struct vm_area_struct *vma, unsigned long addr,
1619 unsigned long len, pgprot_t prot, unsigned long pgoff,
1620 int nonblock)
1621{
1622 struct file *file = vma->vm_file;
1623 struct address_space *mapping = file->f_mapping;
1624 struct inode *inode = mapping->host;
1625 unsigned long size;
1626 struct mm_struct *mm = vma->vm_mm;
1627 struct page *page;
1628 int err;
1629
1630 if (!nonblock)
1631 force_page_cache_readahead(mapping, vma->vm_file,
1632 pgoff, len >> PAGE_CACHE_SHIFT);
1633
1634repeat:
1635 size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
1636 if (pgoff + (len >> PAGE_CACHE_SHIFT) > size)
1637 return -EINVAL;
1638
1639 page = filemap_getpage(file, pgoff, nonblock);
1640
1641 /* XXX: This is wrong, a filesystem I/O error may have happened. Fix that as
1642 * done in shmem_populate calling shmem_getpage */
1643 if (!page && !nonblock)
1644 return -ENOMEM;
1645
1646 if (page) {
1647 err = install_page(mm, vma, addr, page, prot);
1648 if (err) {
1649 page_cache_release(page);
1650 return err;
1651 }
1652 } else if (vma->vm_flags & VM_NONLINEAR) {
1653 /* No page was found just because we can't read it in now (being
1654 * here implies nonblock != 0), but the page may exist, so set
1655 * the PTE to fault it in later. */
1656 err = install_file_pte(mm, vma, addr, pgoff, prot);
1657 if (err)
1658 return err;
1659 }
1660
1661 len -= PAGE_SIZE;
1662 addr += PAGE_SIZE;
1663 pgoff++;
1664 if (len)
1665 goto repeat;
1666
1667 return 0;
1668}
1669EXPORT_SYMBOL(filemap_populate);
1670
1671struct vm_operations_struct generic_file_vm_ops = { 1472struct vm_operations_struct generic_file_vm_ops = {
1672 .fault = filemap_fault, 1473 .fault = filemap_fault,
1673}; 1474};
@@ -1682,7 +1483,7 @@ int generic_file_mmap(struct file * file, struct vm_area_struct * vma)
1682 return -ENOEXEC; 1483 return -ENOEXEC;
1683 file_accessed(file); 1484 file_accessed(file);
1684 vma->vm_ops = &generic_file_vm_ops; 1485 vma->vm_ops = &generic_file_vm_ops;
1685 vma->vm_flags |= VM_CAN_INVALIDATE | VM_CAN_NONLINEAR; 1486 vma->vm_flags |= VM_CAN_NONLINEAR;
1686 return 0; 1487 return 0;
1687} 1488}
1688 1489
diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c
index 82f4b8e9834e..847d5d78163e 100644
--- a/mm/filemap_xip.c
+++ b/mm/filemap_xip.c
@@ -210,8 +210,7 @@ __xip_unmap (struct address_space * mapping,
210 * 210 *
211 * This function is derived from filemap_fault, but used for execute in place 211 * This function is derived from filemap_fault, but used for execute in place
212 */ 212 */
213static struct page *xip_file_fault(struct vm_area_struct *area, 213static int xip_file_fault(struct vm_area_struct *area, struct vm_fault *vmf)
214 struct fault_data *fdata)
215{ 214{
216 struct file *file = area->vm_file; 215 struct file *file = area->vm_file;
217 struct address_space *mapping = file->f_mapping; 216 struct address_space *mapping = file->f_mapping;
@@ -222,19 +221,15 @@ static struct page *xip_file_fault(struct vm_area_struct *area,
222 /* XXX: are VM_FAULT_ codes OK? */ 221 /* XXX: are VM_FAULT_ codes OK? */
223 222
224 size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 223 size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
225 if (fdata->pgoff >= size) { 224 if (vmf->pgoff >= size)
226 fdata->type = VM_FAULT_SIGBUS; 225 return VM_FAULT_SIGBUS;
227 return NULL;
228 }
229 226
230 page = mapping->a_ops->get_xip_page(mapping, 227 page = mapping->a_ops->get_xip_page(mapping,
231 fdata->pgoff*(PAGE_SIZE/512), 0); 228 vmf->pgoff*(PAGE_SIZE/512), 0);
232 if (!IS_ERR(page)) 229 if (!IS_ERR(page))
233 goto out; 230 goto out;
234 if (PTR_ERR(page) != -ENODATA) { 231 if (PTR_ERR(page) != -ENODATA)
235 fdata->type = VM_FAULT_OOM; 232 return VM_FAULT_OOM;
236 return NULL;
237 }
238 233
239 /* sparse block */ 234 /* sparse block */
240 if ((area->vm_flags & (VM_WRITE | VM_MAYWRITE)) && 235 if ((area->vm_flags & (VM_WRITE | VM_MAYWRITE)) &&
@@ -242,26 +237,22 @@ static struct page *xip_file_fault(struct vm_area_struct *area,
242 (!(mapping->host->i_sb->s_flags & MS_RDONLY))) { 237 (!(mapping->host->i_sb->s_flags & MS_RDONLY))) {
243 /* maybe shared writable, allocate new block */ 238 /* maybe shared writable, allocate new block */
244 page = mapping->a_ops->get_xip_page(mapping, 239 page = mapping->a_ops->get_xip_page(mapping,
245 fdata->pgoff*(PAGE_SIZE/512), 1); 240 vmf->pgoff*(PAGE_SIZE/512), 1);
246 if (IS_ERR(page)) { 241 if (IS_ERR(page))
247 fdata->type = VM_FAULT_SIGBUS; 242 return VM_FAULT_SIGBUS;
248 return NULL;
249 }
250 /* unmap page at pgoff from all other vmas */ 243 /* unmap page at pgoff from all other vmas */
251 __xip_unmap(mapping, fdata->pgoff); 244 __xip_unmap(mapping, vmf->pgoff);
252 } else { 245 } else {
253 /* not shared and writable, use xip_sparse_page() */ 246 /* not shared and writable, use xip_sparse_page() */
254 page = xip_sparse_page(); 247 page = xip_sparse_page();
255 if (!page) { 248 if (!page)
256 fdata->type = VM_FAULT_OOM; 249 return VM_FAULT_OOM;
257 return NULL;
258 }
259 } 250 }
260 251
261out: 252out:
262 fdata->type = VM_FAULT_MINOR;
263 page_cache_get(page); 253 page_cache_get(page);
264 return page; 254 vmf->page = page;
255 return VM_FAULT_MINOR;
265} 256}
266 257
267static struct vm_operations_struct xip_file_vm_ops = { 258static struct vm_operations_struct xip_file_vm_ops = {
diff --git a/mm/fremap.c b/mm/fremap.c
index 01e51f01b84e..5f50d736a037 100644
--- a/mm/fremap.c
+++ b/mm/fremap.c
@@ -20,13 +20,14 @@
20#include <asm/cacheflush.h> 20#include <asm/cacheflush.h>
21#include <asm/tlbflush.h> 21#include <asm/tlbflush.h>
22 22
23static int zap_pte(struct mm_struct *mm, struct vm_area_struct *vma, 23static void zap_pte(struct mm_struct *mm, struct vm_area_struct *vma,
24 unsigned long addr, pte_t *ptep) 24 unsigned long addr, pte_t *ptep)
25{ 25{
26 pte_t pte = *ptep; 26 pte_t pte = *ptep;
27 struct page *page = NULL;
28 27
29 if (pte_present(pte)) { 28 if (pte_present(pte)) {
29 struct page *page;
30
30 flush_cache_page(vma, addr, pte_pfn(pte)); 31 flush_cache_page(vma, addr, pte_pfn(pte));
31 pte = ptep_clear_flush(vma, addr, ptep); 32 pte = ptep_clear_flush(vma, addr, ptep);
32 page = vm_normal_page(vma, addr, pte); 33 page = vm_normal_page(vma, addr, pte);
@@ -35,68 +36,21 @@ static int zap_pte(struct mm_struct *mm, struct vm_area_struct *vma,
35 set_page_dirty(page); 36 set_page_dirty(page);
36 page_remove_rmap(page, vma); 37 page_remove_rmap(page, vma);
37 page_cache_release(page); 38 page_cache_release(page);
39 update_hiwater_rss(mm);
40 dec_mm_counter(mm, file_rss);
38 } 41 }
39 } else { 42 } else {
40 if (!pte_file(pte)) 43 if (!pte_file(pte))
41 free_swap_and_cache(pte_to_swp_entry(pte)); 44 free_swap_and_cache(pte_to_swp_entry(pte));
42 pte_clear_not_present_full(mm, addr, ptep, 0); 45 pte_clear_not_present_full(mm, addr, ptep, 0);
43 } 46 }
44 return !!page;
45} 47}
46 48
47/* 49/*
48 * Install a file page to a given virtual memory address, release any
49 * previously existing mapping.
50 */
51int install_page(struct mm_struct *mm, struct vm_area_struct *vma,
52 unsigned long addr, struct page *page, pgprot_t prot)
53{
54 struct inode *inode;
55 pgoff_t size;
56 int err = -ENOMEM;
57 pte_t *pte;
58 pte_t pte_val;
59 spinlock_t *ptl;
60
61 pte = get_locked_pte(mm, addr, &ptl);
62 if (!pte)
63 goto out;
64
65 /*
66 * This page may have been truncated. Tell the
67 * caller about it.
68 */
69 err = -EINVAL;
70 inode = vma->vm_file->f_mapping->host;
71 size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
72 if (!page->mapping || page->index >= size)
73 goto unlock;
74 err = -ENOMEM;
75 if (page_mapcount(page) > INT_MAX/2)
76 goto unlock;
77
78 if (pte_none(*pte) || !zap_pte(mm, vma, addr, pte))
79 inc_mm_counter(mm, file_rss);
80
81 flush_icache_page(vma, page);
82 pte_val = mk_pte(page, prot);
83 set_pte_at(mm, addr, pte, pte_val);
84 page_add_file_rmap(page);
85 update_mmu_cache(vma, addr, pte_val);
86 lazy_mmu_prot_update(pte_val);
87 err = 0;
88unlock:
89 pte_unmap_unlock(pte, ptl);
90out:
91 return err;
92}
93EXPORT_SYMBOL(install_page);
94
95/*
96 * Install a file pte to a given virtual memory address, release any 50 * Install a file pte to a given virtual memory address, release any
97 * previously existing mapping. 51 * previously existing mapping.
98 */ 52 */
99int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma, 53static int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma,
100 unsigned long addr, unsigned long pgoff, pgprot_t prot) 54 unsigned long addr, unsigned long pgoff, pgprot_t prot)
101{ 55{
102 int err = -ENOMEM; 56 int err = -ENOMEM;
@@ -107,10 +61,8 @@ int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma,
107 if (!pte) 61 if (!pte)
108 goto out; 62 goto out;
109 63
110 if (!pte_none(*pte) && zap_pte(mm, vma, addr, pte)) { 64 if (!pte_none(*pte))
111 update_hiwater_rss(mm); 65 zap_pte(mm, vma, addr, pte);
112 dec_mm_counter(mm, file_rss);
113 }
114 66
115 set_pte_at(mm, addr, pte, pgoff_to_pte(pgoff)); 67 set_pte_at(mm, addr, pte, pgoff_to_pte(pgoff));
116 /* 68 /*
@@ -208,8 +160,7 @@ asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size,
208 if (vma->vm_private_data && !(vma->vm_flags & VM_NONLINEAR)) 160 if (vma->vm_private_data && !(vma->vm_flags & VM_NONLINEAR))
209 goto out; 161 goto out;
210 162
211 if ((!vma->vm_ops || !vma->vm_ops->populate) && 163 if (!vma->vm_flags & VM_CAN_NONLINEAR)
212 !(vma->vm_flags & VM_CAN_NONLINEAR))
213 goto out; 164 goto out;
214 165
215 if (end <= start || start < vma->vm_start || end > vma->vm_end) 166 if (end <= start || start < vma->vm_start || end > vma->vm_end)
@@ -239,18 +190,14 @@ asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size,
239 spin_unlock(&mapping->i_mmap_lock); 190 spin_unlock(&mapping->i_mmap_lock);
240 } 191 }
241 192
242 if (vma->vm_flags & VM_CAN_NONLINEAR) { 193 err = populate_range(mm, vma, start, size, pgoff);
243 err = populate_range(mm, vma, start, size, pgoff); 194 if (!err && !(flags & MAP_NONBLOCK)) {
244 if (!err && !(flags & MAP_NONBLOCK)) { 195 if (unlikely(has_write_lock)) {
245 if (unlikely(has_write_lock)) { 196 downgrade_write(&mm->mmap_sem);
246 downgrade_write(&mm->mmap_sem); 197 has_write_lock = 0;
247 has_write_lock = 0;
248 }
249 make_pages_present(start, start+size);
250 } 198 }
251 } else 199 make_pages_present(start, start+size);
252 err = vma->vm_ops->populate(vma, start, size, vma->vm_page_prot, 200 }
253 pgoff, flags & MAP_NONBLOCK);
254 201
255 /* 202 /*
256 * We can't clear VM_NONLINEAR because we'd have to do 203 * We can't clear VM_NONLINEAR because we'd have to do
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 6912bbf33faa..aaa7c1a682d9 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -316,15 +316,14 @@ unsigned long hugetlb_total_pages(void)
316 * hugegpage VMA. do_page_fault() is supposed to trap this, so BUG is we get 316 * hugegpage VMA. do_page_fault() is supposed to trap this, so BUG is we get
317 * this far. 317 * this far.
318 */ 318 */
319static struct page *hugetlb_nopage(struct vm_area_struct *vma, 319static int hugetlb_vm_op_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
320 unsigned long address, int *unused)
321{ 320{
322 BUG(); 321 BUG();
323 return NULL; 322 return 0;
324} 323}
325 324
326struct vm_operations_struct hugetlb_vm_ops = { 325struct vm_operations_struct hugetlb_vm_ops = {
327 .nopage = hugetlb_nopage, 326 .fault = hugetlb_vm_op_fault,
328}; 327};
329 328
330static pte_t make_huge_pte(struct vm_area_struct *vma, struct page *page, 329static pte_t make_huge_pte(struct vm_area_struct *vma, struct page *page,
diff --git a/mm/memory.c b/mm/memory.c
index 7abd3899848b..23c870479b3e 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1834,10 +1834,10 @@ static int unmap_mapping_range_vma(struct vm_area_struct *vma,
1834 1834
1835 /* 1835 /*
1836 * files that support invalidating or truncating portions of the 1836 * files that support invalidating or truncating portions of the
1837 * file from under mmaped areas must set the VM_CAN_INVALIDATE flag, and 1837 * file from under mmaped areas must have their ->fault function
1838 * have their .nopage function return the page locked. 1838 * return a locked page (and FAULT_RET_LOCKED code). This provides
1839 * synchronisation against concurrent unmapping here.
1839 */ 1840 */
1840 BUG_ON(!(vma->vm_flags & VM_CAN_INVALIDATE));
1841 1841
1842again: 1842again:
1843 restart_addr = vma->vm_truncate_count; 1843 restart_addr = vma->vm_truncate_count;
@@ -2306,63 +2306,62 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2306 pgoff_t pgoff, unsigned int flags, pte_t orig_pte) 2306 pgoff_t pgoff, unsigned int flags, pte_t orig_pte)
2307{ 2307{
2308 spinlock_t *ptl; 2308 spinlock_t *ptl;
2309 struct page *page, *faulted_page; 2309 struct page *page;
2310 pte_t entry; 2310 pte_t entry;
2311 int anon = 0; 2311 int anon = 0;
2312 struct page *dirty_page = NULL; 2312 struct page *dirty_page = NULL;
2313 struct fault_data fdata; 2313 struct vm_fault vmf;
2314 int ret;
2314 2315
2315 fdata.address = address & PAGE_MASK; 2316 vmf.virtual_address = (void __user *)(address & PAGE_MASK);
2316 fdata.pgoff = pgoff; 2317 vmf.pgoff = pgoff;
2317 fdata.flags = flags; 2318 vmf.flags = flags;
2319 vmf.page = NULL;
2318 2320
2319 pte_unmap(page_table); 2321 pte_unmap(page_table);
2320 BUG_ON(vma->vm_flags & VM_PFNMAP); 2322 BUG_ON(vma->vm_flags & VM_PFNMAP);
2321 2323
2322 if (likely(vma->vm_ops->fault)) { 2324 if (likely(vma->vm_ops->fault)) {
2323 fdata.type = -1; 2325 ret = vma->vm_ops->fault(vma, &vmf);
2324 faulted_page = vma->vm_ops->fault(vma, &fdata); 2326 if (unlikely(ret & (VM_FAULT_ERROR | FAULT_RET_NOPAGE)))
2325 WARN_ON(fdata.type == -1); 2327 return (ret & VM_FAULT_MASK);
2326 if (unlikely(!faulted_page))
2327 return fdata.type;
2328 } else { 2328 } else {
2329 /* Legacy ->nopage path */ 2329 /* Legacy ->nopage path */
2330 fdata.type = VM_FAULT_MINOR; 2330 ret = VM_FAULT_MINOR;
2331 faulted_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, 2331 vmf.page = vma->vm_ops->nopage(vma, address & PAGE_MASK, &ret);
2332 &fdata.type);
2333 /* no page was available -- either SIGBUS or OOM */ 2332 /* no page was available -- either SIGBUS or OOM */
2334 if (unlikely(faulted_page == NOPAGE_SIGBUS)) 2333 if (unlikely(vmf.page == NOPAGE_SIGBUS))
2335 return VM_FAULT_SIGBUS; 2334 return VM_FAULT_SIGBUS;
2336 else if (unlikely(faulted_page == NOPAGE_OOM)) 2335 else if (unlikely(vmf.page == NOPAGE_OOM))
2337 return VM_FAULT_OOM; 2336 return VM_FAULT_OOM;
2338 } 2337 }
2339 2338
2340 /* 2339 /*
2341 * For consistency in subsequent calls, make the faulted_page always 2340 * For consistency in subsequent calls, make the faulted page always
2342 * locked. 2341 * locked.
2343 */ 2342 */
2344 if (unlikely(!(vma->vm_flags & VM_CAN_INVALIDATE))) 2343 if (unlikely(!(ret & FAULT_RET_LOCKED)))
2345 lock_page(faulted_page); 2344 lock_page(vmf.page);
2346 else 2345 else
2347 BUG_ON(!PageLocked(faulted_page)); 2346 VM_BUG_ON(!PageLocked(vmf.page));
2348 2347
2349 /* 2348 /*
2350 * Should we do an early C-O-W break? 2349 * Should we do an early C-O-W break?
2351 */ 2350 */
2352 page = faulted_page; 2351 page = vmf.page;
2353 if (flags & FAULT_FLAG_WRITE) { 2352 if (flags & FAULT_FLAG_WRITE) {
2354 if (!(vma->vm_flags & VM_SHARED)) { 2353 if (!(vma->vm_flags & VM_SHARED)) {
2355 anon = 1; 2354 anon = 1;
2356 if (unlikely(anon_vma_prepare(vma))) { 2355 if (unlikely(anon_vma_prepare(vma))) {
2357 fdata.type = VM_FAULT_OOM; 2356 ret = VM_FAULT_OOM;
2358 goto out; 2357 goto out;
2359 } 2358 }
2360 page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address); 2359 page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
2361 if (!page) { 2360 if (!page) {
2362 fdata.type = VM_FAULT_OOM; 2361 ret = VM_FAULT_OOM;
2363 goto out; 2362 goto out;
2364 } 2363 }
2365 copy_user_highpage(page, faulted_page, address, vma); 2364 copy_user_highpage(page, vmf.page, address, vma);
2366 } else { 2365 } else {
2367 /* 2366 /*
2368 * If the page will be shareable, see if the backing 2367 * If the page will be shareable, see if the backing
@@ -2372,11 +2371,23 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2372 if (vma->vm_ops->page_mkwrite) { 2371 if (vma->vm_ops->page_mkwrite) {
2373 unlock_page(page); 2372 unlock_page(page);
2374 if (vma->vm_ops->page_mkwrite(vma, page) < 0) { 2373 if (vma->vm_ops->page_mkwrite(vma, page) < 0) {
2375 fdata.type = VM_FAULT_SIGBUS; 2374 ret = VM_FAULT_SIGBUS;
2376 anon = 1; /* no anon but release faulted_page */ 2375 anon = 1; /* no anon but release vmf.page */
2377 goto out_unlocked; 2376 goto out_unlocked;
2378 } 2377 }
2379 lock_page(page); 2378 lock_page(page);
2379 /*
2380 * XXX: this is not quite right (racy vs
2381 * invalidate) to unlock and relock the page
2382 * like this, however a better fix requires
2383 * reworking page_mkwrite locking API, which
2384 * is better done later.
2385 */
2386 if (!page->mapping) {
2387 ret = VM_FAULT_MINOR;
2388 anon = 1; /* no anon but release vmf.page */
2389 goto out;
2390 }
2380 } 2391 }
2381 } 2392 }
2382 2393
@@ -2427,16 +2438,16 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2427 pte_unmap_unlock(page_table, ptl); 2438 pte_unmap_unlock(page_table, ptl);
2428 2439
2429out: 2440out:
2430 unlock_page(faulted_page); 2441 unlock_page(vmf.page);
2431out_unlocked: 2442out_unlocked:
2432 if (anon) 2443 if (anon)
2433 page_cache_release(faulted_page); 2444 page_cache_release(vmf.page);
2434 else if (dirty_page) { 2445 else if (dirty_page) {
2435 set_page_dirty_balance(dirty_page); 2446 set_page_dirty_balance(dirty_page);
2436 put_page(dirty_page); 2447 put_page(dirty_page);
2437 } 2448 }
2438 2449
2439 return fdata.type; 2450 return (ret & VM_FAULT_MASK);
2440} 2451}
2441 2452
2442static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma, 2453static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
@@ -2447,18 +2458,10 @@ static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2447 - vma->vm_start) >> PAGE_CACHE_SHIFT) + vma->vm_pgoff; 2458 - vma->vm_start) >> PAGE_CACHE_SHIFT) + vma->vm_pgoff;
2448 unsigned int flags = (write_access ? FAULT_FLAG_WRITE : 0); 2459 unsigned int flags = (write_access ? FAULT_FLAG_WRITE : 0);
2449 2460
2450 return __do_fault(mm, vma, address, page_table, pmd, pgoff, flags, orig_pte); 2461 return __do_fault(mm, vma, address, page_table, pmd, pgoff,
2462 flags, orig_pte);
2451} 2463}
2452 2464
2453static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2454 unsigned long address, pte_t *page_table, pmd_t *pmd,
2455 int write_access, pgoff_t pgoff, pte_t orig_pte)
2456{
2457 unsigned int flags = FAULT_FLAG_NONLINEAR |
2458 (write_access ? FAULT_FLAG_WRITE : 0);
2459
2460 return __do_fault(mm, vma, address, page_table, pmd, pgoff, flags, orig_pte);
2461}
2462 2465
2463/* 2466/*
2464 * do_no_pfn() tries to create a new page mapping for a page without 2467 * do_no_pfn() tries to create a new page mapping for a page without
@@ -2519,17 +2522,19 @@ static noinline int do_no_pfn(struct mm_struct *mm, struct vm_area_struct *vma,
2519 * but allow concurrent faults), and pte mapped but not yet locked. 2522 * but allow concurrent faults), and pte mapped but not yet locked.
2520 * We return with mmap_sem still held, but pte unmapped and unlocked. 2523 * We return with mmap_sem still held, but pte unmapped and unlocked.
2521 */ 2524 */
2522static int do_file_page(struct mm_struct *mm, struct vm_area_struct *vma, 2525static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2523 unsigned long address, pte_t *page_table, pmd_t *pmd, 2526 unsigned long address, pte_t *page_table, pmd_t *pmd,
2524 int write_access, pte_t orig_pte) 2527 int write_access, pte_t orig_pte)
2525{ 2528{
2529 unsigned int flags = FAULT_FLAG_NONLINEAR |
2530 (write_access ? FAULT_FLAG_WRITE : 0);
2526 pgoff_t pgoff; 2531 pgoff_t pgoff;
2527 int err;
2528 2532
2529 if (!pte_unmap_same(mm, pmd, page_table, orig_pte)) 2533 if (!pte_unmap_same(mm, pmd, page_table, orig_pte))
2530 return VM_FAULT_MINOR; 2534 return VM_FAULT_MINOR;
2531 2535
2532 if (unlikely(!(vma->vm_flags & VM_NONLINEAR))) { 2536 if (unlikely(!(vma->vm_flags & VM_NONLINEAR) ||
2537 !(vma->vm_flags & VM_CAN_NONLINEAR))) {
2533 /* 2538 /*
2534 * Page table corrupted: show pte and kill process. 2539 * Page table corrupted: show pte and kill process.
2535 */ 2540 */
@@ -2539,18 +2544,8 @@ static int do_file_page(struct mm_struct *mm, struct vm_area_struct *vma,
2539 2544
2540 pgoff = pte_to_pgoff(orig_pte); 2545 pgoff = pte_to_pgoff(orig_pte);
2541 2546
2542 if (vma->vm_ops && vma->vm_ops->fault) 2547 return __do_fault(mm, vma, address, page_table, pmd, pgoff,
2543 return do_nonlinear_fault(mm, vma, address, page_table, pmd, 2548 flags, orig_pte);
2544 write_access, pgoff, orig_pte);
2545
2546 /* We can then assume vm->vm_ops && vma->vm_ops->populate */
2547 err = vma->vm_ops->populate(vma, address & PAGE_MASK, PAGE_SIZE,
2548 vma->vm_page_prot, pgoff, 0);
2549 if (err == -ENOMEM)
2550 return VM_FAULT_OOM;
2551 if (err)
2552 return VM_FAULT_SIGBUS;
2553 return VM_FAULT_MAJOR;
2554} 2549}
2555 2550
2556/* 2551/*
@@ -2588,7 +2583,7 @@ static inline int handle_pte_fault(struct mm_struct *mm,
2588 pte, pmd, write_access); 2583 pte, pmd, write_access);
2589 } 2584 }
2590 if (pte_file(entry)) 2585 if (pte_file(entry))
2591 return do_file_page(mm, vma, address, 2586 return do_nonlinear_fault(mm, vma, address,
2592 pte, pmd, write_access, entry); 2587 pte, pmd, write_access, entry);
2593 return do_swap_page(mm, vma, address, 2588 return do_swap_page(mm, vma, address,
2594 pte, pmd, write_access, entry); 2589 pte, pmd, write_access, entry);
diff --git a/mm/nommu.c b/mm/nommu.c
index aee0e1b0ebe7..1b105d28949f 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1341,10 +1341,10 @@ int in_gate_area_no_task(unsigned long addr)
1341 return 0; 1341 return 0;
1342} 1342}
1343 1343
1344struct page *filemap_fault(struct vm_area_struct *vma, struct fault_data *fdata) 1344int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1345{ 1345{
1346 BUG(); 1346 BUG();
1347 return NULL; 1347 return 0;
1348} 1348}
1349 1349
1350/* 1350/*
diff --git a/mm/shmem.c b/mm/shmem.c
index 6b44440f1b24..0a555af8733d 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1309,29 +1309,21 @@ failed:
1309 return error; 1309 return error;
1310} 1310}
1311 1311
1312static struct page *shmem_fault(struct vm_area_struct *vma, 1312static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1313 struct fault_data *fdata)
1314{ 1313{
1315 struct inode *inode = vma->vm_file->f_path.dentry->d_inode; 1314 struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
1316 struct page *page = NULL;
1317 int error; 1315 int error;
1316 int ret;
1318 1317
1319 BUG_ON(!(vma->vm_flags & VM_CAN_INVALIDATE)); 1318 if (((loff_t)vmf->pgoff << PAGE_CACHE_SHIFT) >= i_size_read(inode))
1319 return VM_FAULT_SIGBUS;
1320 1320
1321 if (((loff_t)fdata->pgoff << PAGE_CACHE_SHIFT) >= i_size_read(inode)) { 1321 error = shmem_getpage(inode, vmf->pgoff, &vmf->page, SGP_FAULT, &ret);
1322 fdata->type = VM_FAULT_SIGBUS; 1322 if (error)
1323 return NULL; 1323 return ((error == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS);
1324 }
1325
1326 error = shmem_getpage(inode, fdata->pgoff, &page,
1327 SGP_FAULT, &fdata->type);
1328 if (error) {
1329 fdata->type = ((error == -ENOMEM)?VM_FAULT_OOM:VM_FAULT_SIGBUS);
1330 return NULL;
1331 }
1332 1324
1333 mark_page_accessed(page); 1325 mark_page_accessed(vmf->page);
1334 return page; 1326 return ret | FAULT_RET_LOCKED;
1335} 1327}
1336 1328
1337#ifdef CONFIG_NUMA 1329#ifdef CONFIG_NUMA
@@ -1378,7 +1370,7 @@ static int shmem_mmap(struct file *file, struct vm_area_struct *vma)
1378{ 1370{
1379 file_accessed(file); 1371 file_accessed(file);
1380 vma->vm_ops = &shmem_vm_ops; 1372 vma->vm_ops = &shmem_vm_ops;
1381 vma->vm_flags |= VM_CAN_INVALIDATE | VM_CAN_NONLINEAR; 1373 vma->vm_flags |= VM_CAN_NONLINEAR;
1382 return 0; 1374 return 0;
1383} 1375}
1384 1376
@@ -2560,6 +2552,5 @@ int shmem_zero_setup(struct vm_area_struct *vma)
2560 fput(vma->vm_file); 2552 fput(vma->vm_file);
2561 vma->vm_file = file; 2553 vma->vm_file = file;
2562 vma->vm_ops = &shmem_vm_ops; 2554 vma->vm_ops = &shmem_vm_ops;
2563 vma->vm_flags |= VM_CAN_INVALIDATE;
2564 return 0; 2555 return 0;
2565} 2556}