aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/filemap.c249
-rw-r--r--mm/filemap_xip.c37
-rw-r--r--mm/fremap.c85
-rw-r--r--mm/hugetlb.c7
-rw-r--r--mm/memory.c109
-rw-r--r--mm/nommu.c4
-rw-r--r--mm/shmem.c29
7 files changed, 122 insertions, 398 deletions
diff --git a/mm/filemap.c b/mm/filemap.c
index 26b992d169e5..0876cc57255f 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1302,8 +1302,8 @@ static int fastcall page_cache_read(struct file * file, unsigned long offset)
1302 1302
1303/** 1303/**
1304 * filemap_fault - read in file data for page fault handling 1304 * filemap_fault - read in file data for page fault handling
1305 * @vma: user vma (not used) 1305 * @vma: vma in which the fault was taken
1306 * @fdata: the applicable fault_data 1306 * @vmf: struct vm_fault containing details of the fault
1307 * 1307 *
1308 * filemap_fault() is invoked via the vma operations vector for a 1308 * filemap_fault() is invoked via the vma operations vector for a
1309 * mapped memory region to read in file data during a page fault. 1309 * mapped memory region to read in file data during a page fault.
@@ -1312,7 +1312,7 @@ static int fastcall page_cache_read(struct file * file, unsigned long offset)
1312 * it in the page cache, and handles the special cases reasonably without 1312 * it in the page cache, and handles the special cases reasonably without
1313 * having a lot of duplicated code. 1313 * having a lot of duplicated code.
1314 */ 1314 */
1315struct page *filemap_fault(struct vm_area_struct *vma, struct fault_data *fdata) 1315int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1316{ 1316{
1317 int error; 1317 int error;
1318 struct file *file = vma->vm_file; 1318 struct file *file = vma->vm_file;
@@ -1322,13 +1322,12 @@ struct page *filemap_fault(struct vm_area_struct *vma, struct fault_data *fdata)
1322 struct page *page; 1322 struct page *page;
1323 unsigned long size; 1323 unsigned long size;
1324 int did_readaround = 0; 1324 int did_readaround = 0;
1325 int ret;
1325 1326
1326 fdata->type = VM_FAULT_MINOR; 1327 ret = VM_FAULT_MINOR;
1327
1328 BUG_ON(!(vma->vm_flags & VM_CAN_INVALIDATE));
1329 1328
1330 size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 1329 size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
1331 if (fdata->pgoff >= size) 1330 if (vmf->pgoff >= size)
1332 goto outside_data_content; 1331 goto outside_data_content;
1333 1332
1334 /* If we don't want any read-ahead, don't bother */ 1333 /* If we don't want any read-ahead, don't bother */
@@ -1342,18 +1341,18 @@ struct page *filemap_fault(struct vm_area_struct *vma, struct fault_data *fdata)
1342 * For sequential accesses, we use the generic readahead logic. 1341 * For sequential accesses, we use the generic readahead logic.
1343 */ 1342 */
1344 if (VM_SequentialReadHint(vma)) 1343 if (VM_SequentialReadHint(vma))
1345 page_cache_readahead(mapping, ra, file, fdata->pgoff, 1); 1344 page_cache_readahead(mapping, ra, file, vmf->pgoff, 1);
1346 1345
1347 /* 1346 /*
1348 * Do we have something in the page cache already? 1347 * Do we have something in the page cache already?
1349 */ 1348 */
1350retry_find: 1349retry_find:
1351 page = find_lock_page(mapping, fdata->pgoff); 1350 page = find_lock_page(mapping, vmf->pgoff);
1352 if (!page) { 1351 if (!page) {
1353 unsigned long ra_pages; 1352 unsigned long ra_pages;
1354 1353
1355 if (VM_SequentialReadHint(vma)) { 1354 if (VM_SequentialReadHint(vma)) {
1356 handle_ra_miss(mapping, ra, fdata->pgoff); 1355 handle_ra_miss(mapping, ra, vmf->pgoff);
1357 goto no_cached_page; 1356 goto no_cached_page;
1358 } 1357 }
1359 ra->mmap_miss++; 1358 ra->mmap_miss++;
@@ -1370,7 +1369,7 @@ retry_find:
1370 * check did_readaround, as this is an inner loop. 1369 * check did_readaround, as this is an inner loop.
1371 */ 1370 */
1372 if (!did_readaround) { 1371 if (!did_readaround) {
1373 fdata->type = VM_FAULT_MAJOR; 1372 ret = VM_FAULT_MAJOR;
1374 count_vm_event(PGMAJFAULT); 1373 count_vm_event(PGMAJFAULT);
1375 } 1374 }
1376 did_readaround = 1; 1375 did_readaround = 1;
@@ -1378,11 +1377,11 @@ retry_find:
1378 if (ra_pages) { 1377 if (ra_pages) {
1379 pgoff_t start = 0; 1378 pgoff_t start = 0;
1380 1379
1381 if (fdata->pgoff > ra_pages / 2) 1380 if (vmf->pgoff > ra_pages / 2)
1382 start = fdata->pgoff - ra_pages / 2; 1381 start = vmf->pgoff - ra_pages / 2;
1383 do_page_cache_readahead(mapping, file, start, ra_pages); 1382 do_page_cache_readahead(mapping, file, start, ra_pages);
1384 } 1383 }
1385 page = find_lock_page(mapping, fdata->pgoff); 1384 page = find_lock_page(mapping, vmf->pgoff);
1386 if (!page) 1385 if (!page)
1387 goto no_cached_page; 1386 goto no_cached_page;
1388 } 1387 }
@@ -1399,7 +1398,7 @@ retry_find:
1399 1398
1400 /* Must recheck i_size under page lock */ 1399 /* Must recheck i_size under page lock */
1401 size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 1400 size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
1402 if (unlikely(fdata->pgoff >= size)) { 1401 if (unlikely(vmf->pgoff >= size)) {
1403 unlock_page(page); 1402 unlock_page(page);
1404 goto outside_data_content; 1403 goto outside_data_content;
1405 } 1404 }
@@ -1408,24 +1407,24 @@ retry_find:
1408 * Found the page and have a reference on it. 1407 * Found the page and have a reference on it.
1409 */ 1408 */
1410 mark_page_accessed(page); 1409 mark_page_accessed(page);
1411 return page; 1410 vmf->page = page;
1411 return ret | FAULT_RET_LOCKED;
1412 1412
1413outside_data_content: 1413outside_data_content:
1414 /* 1414 /*
1415 * An external ptracer can access pages that normally aren't 1415 * An external ptracer can access pages that normally aren't
1416 * accessible.. 1416 * accessible..
1417 */ 1417 */
1418 if (vma->vm_mm == current->mm) { 1418 if (vma->vm_mm == current->mm)
1419 fdata->type = VM_FAULT_SIGBUS; 1419 return VM_FAULT_SIGBUS;
1420 return NULL; 1420
1421 }
1422 /* Fall through to the non-read-ahead case */ 1421 /* Fall through to the non-read-ahead case */
1423no_cached_page: 1422no_cached_page:
1424 /* 1423 /*
1425 * We're only likely to ever get here if MADV_RANDOM is in 1424 * We're only likely to ever get here if MADV_RANDOM is in
1426 * effect. 1425 * effect.
1427 */ 1426 */
1428 error = page_cache_read(file, fdata->pgoff); 1427 error = page_cache_read(file, vmf->pgoff);
1429 1428
1430 /* 1429 /*
1431 * The page we want has now been added to the page cache. 1430 * The page we want has now been added to the page cache.
@@ -1441,15 +1440,13 @@ no_cached_page:
1441 * to schedule I/O. 1440 * to schedule I/O.
1442 */ 1441 */
1443 if (error == -ENOMEM) 1442 if (error == -ENOMEM)
1444 fdata->type = VM_FAULT_OOM; 1443 return VM_FAULT_OOM;
1445 else 1444 return VM_FAULT_SIGBUS;
1446 fdata->type = VM_FAULT_SIGBUS;
1447 return NULL;
1448 1445
1449page_not_uptodate: 1446page_not_uptodate:
1450 /* IO error path */ 1447 /* IO error path */
1451 if (!did_readaround) { 1448 if (!did_readaround) {
1452 fdata->type = VM_FAULT_MAJOR; 1449 ret = VM_FAULT_MAJOR;
1453 count_vm_event(PGMAJFAULT); 1450 count_vm_event(PGMAJFAULT);
1454 } 1451 }
1455 1452
@@ -1468,206 +1465,10 @@ page_not_uptodate:
1468 1465
1469 /* Things didn't work out. Return zero to tell the mm layer so. */ 1466 /* Things didn't work out. Return zero to tell the mm layer so. */
1470 shrink_readahead_size_eio(file, ra); 1467 shrink_readahead_size_eio(file, ra);
1471 fdata->type = VM_FAULT_SIGBUS; 1468 return VM_FAULT_SIGBUS;
1472 return NULL;
1473} 1469}
1474EXPORT_SYMBOL(filemap_fault); 1470EXPORT_SYMBOL(filemap_fault);
1475 1471
1476/*
1477 * filemap_nopage and filemap_populate are legacy exports that are not used
1478 * in tree. Scheduled for removal.
1479 */
1480struct page *filemap_nopage(struct vm_area_struct *area,
1481 unsigned long address, int *type)
1482{
1483 struct page *page;
1484 struct fault_data fdata;
1485 fdata.address = address;
1486 fdata.pgoff = ((address - area->vm_start) >> PAGE_CACHE_SHIFT)
1487 + area->vm_pgoff;
1488 fdata.flags = 0;
1489
1490 page = filemap_fault(area, &fdata);
1491 if (type)
1492 *type = fdata.type;
1493
1494 return page;
1495}
1496EXPORT_SYMBOL(filemap_nopage);
1497
1498static struct page * filemap_getpage(struct file *file, unsigned long pgoff,
1499 int nonblock)
1500{
1501 struct address_space *mapping = file->f_mapping;
1502 struct page *page;
1503 int error;
1504
1505 /*
1506 * Do we have something in the page cache already?
1507 */
1508retry_find:
1509 page = find_get_page(mapping, pgoff);
1510 if (!page) {
1511 if (nonblock)
1512 return NULL;
1513 goto no_cached_page;
1514 }
1515
1516 /*
1517 * Ok, found a page in the page cache, now we need to check
1518 * that it's up-to-date.
1519 */
1520 if (!PageUptodate(page)) {
1521 if (nonblock) {
1522 page_cache_release(page);
1523 return NULL;
1524 }
1525 goto page_not_uptodate;
1526 }
1527
1528success:
1529 /*
1530 * Found the page and have a reference on it.
1531 */
1532 mark_page_accessed(page);
1533 return page;
1534
1535no_cached_page:
1536 error = page_cache_read(file, pgoff);
1537
1538 /*
1539 * The page we want has now been added to the page cache.
1540 * In the unlikely event that someone removed it in the
1541 * meantime, we'll just come back here and read it again.
1542 */
1543 if (error >= 0)
1544 goto retry_find;
1545
1546 /*
1547 * An error return from page_cache_read can result if the
1548 * system is low on memory, or a problem occurs while trying
1549 * to schedule I/O.
1550 */
1551 return NULL;
1552
1553page_not_uptodate:
1554 lock_page(page);
1555
1556 /* Did it get truncated while we waited for it? */
1557 if (!page->mapping) {
1558 unlock_page(page);
1559 goto err;
1560 }
1561
1562 /* Did somebody else get it up-to-date? */
1563 if (PageUptodate(page)) {
1564 unlock_page(page);
1565 goto success;
1566 }
1567
1568 error = mapping->a_ops->readpage(file, page);
1569 if (!error) {
1570 wait_on_page_locked(page);
1571 if (PageUptodate(page))
1572 goto success;
1573 } else if (error == AOP_TRUNCATED_PAGE) {
1574 page_cache_release(page);
1575 goto retry_find;
1576 }
1577
1578 /*
1579 * Umm, take care of errors if the page isn't up-to-date.
1580 * Try to re-read it _once_. We do this synchronously,
1581 * because there really aren't any performance issues here
1582 * and we need to check for errors.
1583 */
1584 lock_page(page);
1585
1586 /* Somebody truncated the page on us? */
1587 if (!page->mapping) {
1588 unlock_page(page);
1589 goto err;
1590 }
1591 /* Somebody else successfully read it in? */
1592 if (PageUptodate(page)) {
1593 unlock_page(page);
1594 goto success;
1595 }
1596
1597 ClearPageError(page);
1598 error = mapping->a_ops->readpage(file, page);
1599 if (!error) {
1600 wait_on_page_locked(page);
1601 if (PageUptodate(page))
1602 goto success;
1603 } else if (error == AOP_TRUNCATED_PAGE) {
1604 page_cache_release(page);
1605 goto retry_find;
1606 }
1607
1608 /*
1609 * Things didn't work out. Return zero to tell the
1610 * mm layer so, possibly freeing the page cache page first.
1611 */
1612err:
1613 page_cache_release(page);
1614
1615 return NULL;
1616}
1617
1618int filemap_populate(struct vm_area_struct *vma, unsigned long addr,
1619 unsigned long len, pgprot_t prot, unsigned long pgoff,
1620 int nonblock)
1621{
1622 struct file *file = vma->vm_file;
1623 struct address_space *mapping = file->f_mapping;
1624 struct inode *inode = mapping->host;
1625 unsigned long size;
1626 struct mm_struct *mm = vma->vm_mm;
1627 struct page *page;
1628 int err;
1629
1630 if (!nonblock)
1631 force_page_cache_readahead(mapping, vma->vm_file,
1632 pgoff, len >> PAGE_CACHE_SHIFT);
1633
1634repeat:
1635 size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
1636 if (pgoff + (len >> PAGE_CACHE_SHIFT) > size)
1637 return -EINVAL;
1638
1639 page = filemap_getpage(file, pgoff, nonblock);
1640
1641 /* XXX: This is wrong, a filesystem I/O error may have happened. Fix that as
1642 * done in shmem_populate calling shmem_getpage */
1643 if (!page && !nonblock)
1644 return -ENOMEM;
1645
1646 if (page) {
1647 err = install_page(mm, vma, addr, page, prot);
1648 if (err) {
1649 page_cache_release(page);
1650 return err;
1651 }
1652 } else if (vma->vm_flags & VM_NONLINEAR) {
1653 /* No page was found just because we can't read it in now (being
1654 * here implies nonblock != 0), but the page may exist, so set
1655 * the PTE to fault it in later. */
1656 err = install_file_pte(mm, vma, addr, pgoff, prot);
1657 if (err)
1658 return err;
1659 }
1660
1661 len -= PAGE_SIZE;
1662 addr += PAGE_SIZE;
1663 pgoff++;
1664 if (len)
1665 goto repeat;
1666
1667 return 0;
1668}
1669EXPORT_SYMBOL(filemap_populate);
1670
1671struct vm_operations_struct generic_file_vm_ops = { 1472struct vm_operations_struct generic_file_vm_ops = {
1672 .fault = filemap_fault, 1473 .fault = filemap_fault,
1673}; 1474};
@@ -1682,7 +1483,7 @@ int generic_file_mmap(struct file * file, struct vm_area_struct * vma)
1682 return -ENOEXEC; 1483 return -ENOEXEC;
1683 file_accessed(file); 1484 file_accessed(file);
1684 vma->vm_ops = &generic_file_vm_ops; 1485 vma->vm_ops = &generic_file_vm_ops;
1685 vma->vm_flags |= VM_CAN_INVALIDATE | VM_CAN_NONLINEAR; 1486 vma->vm_flags |= VM_CAN_NONLINEAR;
1686 return 0; 1487 return 0;
1687} 1488}
1688 1489
diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c
index 82f4b8e9834e..847d5d78163e 100644
--- a/mm/filemap_xip.c
+++ b/mm/filemap_xip.c
@@ -210,8 +210,7 @@ __xip_unmap (struct address_space * mapping,
210 * 210 *
211 * This function is derived from filemap_fault, but used for execute in place 211 * This function is derived from filemap_fault, but used for execute in place
212 */ 212 */
213static struct page *xip_file_fault(struct vm_area_struct *area, 213static int xip_file_fault(struct vm_area_struct *area, struct vm_fault *vmf)
214 struct fault_data *fdata)
215{ 214{
216 struct file *file = area->vm_file; 215 struct file *file = area->vm_file;
217 struct address_space *mapping = file->f_mapping; 216 struct address_space *mapping = file->f_mapping;
@@ -222,19 +221,15 @@ static struct page *xip_file_fault(struct vm_area_struct *area,
222 /* XXX: are VM_FAULT_ codes OK? */ 221 /* XXX: are VM_FAULT_ codes OK? */
223 222
224 size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 223 size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
225 if (fdata->pgoff >= size) { 224 if (vmf->pgoff >= size)
226 fdata->type = VM_FAULT_SIGBUS; 225 return VM_FAULT_SIGBUS;
227 return NULL;
228 }
229 226
230 page = mapping->a_ops->get_xip_page(mapping, 227 page = mapping->a_ops->get_xip_page(mapping,
231 fdata->pgoff*(PAGE_SIZE/512), 0); 228 vmf->pgoff*(PAGE_SIZE/512), 0);
232 if (!IS_ERR(page)) 229 if (!IS_ERR(page))
233 goto out; 230 goto out;
234 if (PTR_ERR(page) != -ENODATA) { 231 if (PTR_ERR(page) != -ENODATA)
235 fdata->type = VM_FAULT_OOM; 232 return VM_FAULT_OOM;
236 return NULL;
237 }
238 233
239 /* sparse block */ 234 /* sparse block */
240 if ((area->vm_flags & (VM_WRITE | VM_MAYWRITE)) && 235 if ((area->vm_flags & (VM_WRITE | VM_MAYWRITE)) &&
@@ -242,26 +237,22 @@ static struct page *xip_file_fault(struct vm_area_struct *area,
242 (!(mapping->host->i_sb->s_flags & MS_RDONLY))) { 237 (!(mapping->host->i_sb->s_flags & MS_RDONLY))) {
243 /* maybe shared writable, allocate new block */ 238 /* maybe shared writable, allocate new block */
244 page = mapping->a_ops->get_xip_page(mapping, 239 page = mapping->a_ops->get_xip_page(mapping,
245 fdata->pgoff*(PAGE_SIZE/512), 1); 240 vmf->pgoff*(PAGE_SIZE/512), 1);
246 if (IS_ERR(page)) { 241 if (IS_ERR(page))
247 fdata->type = VM_FAULT_SIGBUS; 242 return VM_FAULT_SIGBUS;
248 return NULL;
249 }
250 /* unmap page at pgoff from all other vmas */ 243 /* unmap page at pgoff from all other vmas */
251 __xip_unmap(mapping, fdata->pgoff); 244 __xip_unmap(mapping, vmf->pgoff);
252 } else { 245 } else {
253 /* not shared and writable, use xip_sparse_page() */ 246 /* not shared and writable, use xip_sparse_page() */
254 page = xip_sparse_page(); 247 page = xip_sparse_page();
255 if (!page) { 248 if (!page)
256 fdata->type = VM_FAULT_OOM; 249 return VM_FAULT_OOM;
257 return NULL;
258 }
259 } 250 }
260 251
261out: 252out:
262 fdata->type = VM_FAULT_MINOR;
263 page_cache_get(page); 253 page_cache_get(page);
264 return page; 254 vmf->page = page;
255 return VM_FAULT_MINOR;
265} 256}
266 257
267static struct vm_operations_struct xip_file_vm_ops = { 258static struct vm_operations_struct xip_file_vm_ops = {
diff --git a/mm/fremap.c b/mm/fremap.c
index 01e51f01b84e..5f50d736a037 100644
--- a/mm/fremap.c
+++ b/mm/fremap.c
@@ -20,13 +20,14 @@
20#include <asm/cacheflush.h> 20#include <asm/cacheflush.h>
21#include <asm/tlbflush.h> 21#include <asm/tlbflush.h>
22 22
23static int zap_pte(struct mm_struct *mm, struct vm_area_struct *vma, 23static void zap_pte(struct mm_struct *mm, struct vm_area_struct *vma,
24 unsigned long addr, pte_t *ptep) 24 unsigned long addr, pte_t *ptep)
25{ 25{
26 pte_t pte = *ptep; 26 pte_t pte = *ptep;
27 struct page *page = NULL;
28 27
29 if (pte_present(pte)) { 28 if (pte_present(pte)) {
29 struct page *page;
30
30 flush_cache_page(vma, addr, pte_pfn(pte)); 31 flush_cache_page(vma, addr, pte_pfn(pte));
31 pte = ptep_clear_flush(vma, addr, ptep); 32 pte = ptep_clear_flush(vma, addr, ptep);
32 page = vm_normal_page(vma, addr, pte); 33 page = vm_normal_page(vma, addr, pte);
@@ -35,68 +36,21 @@ static int zap_pte(struct mm_struct *mm, struct vm_area_struct *vma,
35 set_page_dirty(page); 36 set_page_dirty(page);
36 page_remove_rmap(page, vma); 37 page_remove_rmap(page, vma);
37 page_cache_release(page); 38 page_cache_release(page);
39 update_hiwater_rss(mm);
40 dec_mm_counter(mm, file_rss);
38 } 41 }
39 } else { 42 } else {
40 if (!pte_file(pte)) 43 if (!pte_file(pte))
41 free_swap_and_cache(pte_to_swp_entry(pte)); 44 free_swap_and_cache(pte_to_swp_entry(pte));
42 pte_clear_not_present_full(mm, addr, ptep, 0); 45 pte_clear_not_present_full(mm, addr, ptep, 0);
43 } 46 }
44 return !!page;
45} 47}
46 48
47/* 49/*
48 * Install a file page to a given virtual memory address, release any
49 * previously existing mapping.
50 */
51int install_page(struct mm_struct *mm, struct vm_area_struct *vma,
52 unsigned long addr, struct page *page, pgprot_t prot)
53{
54 struct inode *inode;
55 pgoff_t size;
56 int err = -ENOMEM;
57 pte_t *pte;
58 pte_t pte_val;
59 spinlock_t *ptl;
60
61 pte = get_locked_pte(mm, addr, &ptl);
62 if (!pte)
63 goto out;
64
65 /*
66 * This page may have been truncated. Tell the
67 * caller about it.
68 */
69 err = -EINVAL;
70 inode = vma->vm_file->f_mapping->host;
71 size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
72 if (!page->mapping || page->index >= size)
73 goto unlock;
74 err = -ENOMEM;
75 if (page_mapcount(page) > INT_MAX/2)
76 goto unlock;
77
78 if (pte_none(*pte) || !zap_pte(mm, vma, addr, pte))
79 inc_mm_counter(mm, file_rss);
80
81 flush_icache_page(vma, page);
82 pte_val = mk_pte(page, prot);
83 set_pte_at(mm, addr, pte, pte_val);
84 page_add_file_rmap(page);
85 update_mmu_cache(vma, addr, pte_val);
86 lazy_mmu_prot_update(pte_val);
87 err = 0;
88unlock:
89 pte_unmap_unlock(pte, ptl);
90out:
91 return err;
92}
93EXPORT_SYMBOL(install_page);
94
95/*
96 * Install a file pte to a given virtual memory address, release any 50 * Install a file pte to a given virtual memory address, release any
97 * previously existing mapping. 51 * previously existing mapping.
98 */ 52 */
99int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma, 53static int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma,
100 unsigned long addr, unsigned long pgoff, pgprot_t prot) 54 unsigned long addr, unsigned long pgoff, pgprot_t prot)
101{ 55{
102 int err = -ENOMEM; 56 int err = -ENOMEM;
@@ -107,10 +61,8 @@ int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma,
107 if (!pte) 61 if (!pte)
108 goto out; 62 goto out;
109 63
110 if (!pte_none(*pte) && zap_pte(mm, vma, addr, pte)) { 64 if (!pte_none(*pte))
111 update_hiwater_rss(mm); 65 zap_pte(mm, vma, addr, pte);
112 dec_mm_counter(mm, file_rss);
113 }
114 66
115 set_pte_at(mm, addr, pte, pgoff_to_pte(pgoff)); 67 set_pte_at(mm, addr, pte, pgoff_to_pte(pgoff));
116 /* 68 /*
@@ -208,8 +160,7 @@ asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size,
208 if (vma->vm_private_data && !(vma->vm_flags & VM_NONLINEAR)) 160 if (vma->vm_private_data && !(vma->vm_flags & VM_NONLINEAR))
209 goto out; 161 goto out;
210 162
211 if ((!vma->vm_ops || !vma->vm_ops->populate) && 163 if (!vma->vm_flags & VM_CAN_NONLINEAR)
212 !(vma->vm_flags & VM_CAN_NONLINEAR))
213 goto out; 164 goto out;
214 165
215 if (end <= start || start < vma->vm_start || end > vma->vm_end) 166 if (end <= start || start < vma->vm_start || end > vma->vm_end)
@@ -239,18 +190,14 @@ asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size,
239 spin_unlock(&mapping->i_mmap_lock); 190 spin_unlock(&mapping->i_mmap_lock);
240 } 191 }
241 192
242 if (vma->vm_flags & VM_CAN_NONLINEAR) { 193 err = populate_range(mm, vma, start, size, pgoff);
243 err = populate_range(mm, vma, start, size, pgoff); 194 if (!err && !(flags & MAP_NONBLOCK)) {
244 if (!err && !(flags & MAP_NONBLOCK)) { 195 if (unlikely(has_write_lock)) {
245 if (unlikely(has_write_lock)) { 196 downgrade_write(&mm->mmap_sem);
246 downgrade_write(&mm->mmap_sem); 197 has_write_lock = 0;
247 has_write_lock = 0;
248 }
249 make_pages_present(start, start+size);
250 } 198 }
251 } else 199 make_pages_present(start, start+size);
252 err = vma->vm_ops->populate(vma, start, size, vma->vm_page_prot, 200 }
253 pgoff, flags & MAP_NONBLOCK);
254 201
255 /* 202 /*
256 * We can't clear VM_NONLINEAR because we'd have to do 203 * We can't clear VM_NONLINEAR because we'd have to do
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 6912bbf33faa..aaa7c1a682d9 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -316,15 +316,14 @@ unsigned long hugetlb_total_pages(void)
316 * hugegpage VMA. do_page_fault() is supposed to trap this, so BUG is we get 316 * hugegpage VMA. do_page_fault() is supposed to trap this, so BUG is we get
317 * this far. 317 * this far.
318 */ 318 */
319static struct page *hugetlb_nopage(struct vm_area_struct *vma, 319static int hugetlb_vm_op_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
320 unsigned long address, int *unused)
321{ 320{
322 BUG(); 321 BUG();
323 return NULL; 322 return 0;
324} 323}
325 324
326struct vm_operations_struct hugetlb_vm_ops = { 325struct vm_operations_struct hugetlb_vm_ops = {
327 .nopage = hugetlb_nopage, 326 .fault = hugetlb_vm_op_fault,
328}; 327};
329 328
330static pte_t make_huge_pte(struct vm_area_struct *vma, struct page *page, 329static pte_t make_huge_pte(struct vm_area_struct *vma, struct page *page,
diff --git a/mm/memory.c b/mm/memory.c
index 7abd3899848b..23c870479b3e 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1834,10 +1834,10 @@ static int unmap_mapping_range_vma(struct vm_area_struct *vma,
1834 1834
1835 /* 1835 /*
1836 * files that support invalidating or truncating portions of the 1836 * files that support invalidating or truncating portions of the
1837 * file from under mmaped areas must set the VM_CAN_INVALIDATE flag, and 1837 * file from under mmaped areas must have their ->fault function
1838 * have their .nopage function return the page locked. 1838 * return a locked page (and FAULT_RET_LOCKED code). This provides
1839 * synchronisation against concurrent unmapping here.
1839 */ 1840 */
1840 BUG_ON(!(vma->vm_flags & VM_CAN_INVALIDATE));
1841 1841
1842again: 1842again:
1843 restart_addr = vma->vm_truncate_count; 1843 restart_addr = vma->vm_truncate_count;
@@ -2306,63 +2306,62 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2306 pgoff_t pgoff, unsigned int flags, pte_t orig_pte) 2306 pgoff_t pgoff, unsigned int flags, pte_t orig_pte)
2307{ 2307{
2308 spinlock_t *ptl; 2308 spinlock_t *ptl;
2309 struct page *page, *faulted_page; 2309 struct page *page;
2310 pte_t entry; 2310 pte_t entry;
2311 int anon = 0; 2311 int anon = 0;
2312 struct page *dirty_page = NULL; 2312 struct page *dirty_page = NULL;
2313 struct fault_data fdata; 2313 struct vm_fault vmf;
2314 int ret;
2314 2315
2315 fdata.address = address & PAGE_MASK; 2316 vmf.virtual_address = (void __user *)(address & PAGE_MASK);
2316 fdata.pgoff = pgoff; 2317 vmf.pgoff = pgoff;
2317 fdata.flags = flags; 2318 vmf.flags = flags;
2319 vmf.page = NULL;
2318 2320
2319 pte_unmap(page_table); 2321 pte_unmap(page_table);
2320 BUG_ON(vma->vm_flags & VM_PFNMAP); 2322 BUG_ON(vma->vm_flags & VM_PFNMAP);
2321 2323
2322 if (likely(vma->vm_ops->fault)) { 2324 if (likely(vma->vm_ops->fault)) {
2323 fdata.type = -1; 2325 ret = vma->vm_ops->fault(vma, &vmf);
2324 faulted_page = vma->vm_ops->fault(vma, &fdata); 2326 if (unlikely(ret & (VM_FAULT_ERROR | FAULT_RET_NOPAGE)))
2325 WARN_ON(fdata.type == -1); 2327 return (ret & VM_FAULT_MASK);
2326 if (unlikely(!faulted_page))
2327 return fdata.type;
2328 } else { 2328 } else {
2329 /* Legacy ->nopage path */ 2329 /* Legacy ->nopage path */
2330 fdata.type = VM_FAULT_MINOR; 2330 ret = VM_FAULT_MINOR;
2331 faulted_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, 2331 vmf.page = vma->vm_ops->nopage(vma, address & PAGE_MASK, &ret);
2332 &fdata.type);
2333 /* no page was available -- either SIGBUS or OOM */ 2332 /* no page was available -- either SIGBUS or OOM */
2334 if (unlikely(faulted_page == NOPAGE_SIGBUS)) 2333 if (unlikely(vmf.page == NOPAGE_SIGBUS))
2335 return VM_FAULT_SIGBUS; 2334 return VM_FAULT_SIGBUS;
2336 else if (unlikely(faulted_page == NOPAGE_OOM)) 2335 else if (unlikely(vmf.page == NOPAGE_OOM))
2337 return VM_FAULT_OOM; 2336 return VM_FAULT_OOM;
2338 } 2337 }
2339 2338
2340 /* 2339 /*
2341 * For consistency in subsequent calls, make the faulted_page always 2340 * For consistency in subsequent calls, make the faulted page always
2342 * locked. 2341 * locked.
2343 */ 2342 */
2344 if (unlikely(!(vma->vm_flags & VM_CAN_INVALIDATE))) 2343 if (unlikely(!(ret & FAULT_RET_LOCKED)))
2345 lock_page(faulted_page); 2344 lock_page(vmf.page);
2346 else 2345 else
2347 BUG_ON(!PageLocked(faulted_page)); 2346 VM_BUG_ON(!PageLocked(vmf.page));
2348 2347
2349 /* 2348 /*
2350 * Should we do an early C-O-W break? 2349 * Should we do an early C-O-W break?
2351 */ 2350 */
2352 page = faulted_page; 2351 page = vmf.page;
2353 if (flags & FAULT_FLAG_WRITE) { 2352 if (flags & FAULT_FLAG_WRITE) {
2354 if (!(vma->vm_flags & VM_SHARED)) { 2353 if (!(vma->vm_flags & VM_SHARED)) {
2355 anon = 1; 2354 anon = 1;
2356 if (unlikely(anon_vma_prepare(vma))) { 2355 if (unlikely(anon_vma_prepare(vma))) {
2357 fdata.type = VM_FAULT_OOM; 2356 ret = VM_FAULT_OOM;
2358 goto out; 2357 goto out;
2359 } 2358 }
2360 page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address); 2359 page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
2361 if (!page) { 2360 if (!page) {
2362 fdata.type = VM_FAULT_OOM; 2361 ret = VM_FAULT_OOM;
2363 goto out; 2362 goto out;
2364 } 2363 }
2365 copy_user_highpage(page, faulted_page, address, vma); 2364 copy_user_highpage(page, vmf.page, address, vma);
2366 } else { 2365 } else {
2367 /* 2366 /*
2368 * If the page will be shareable, see if the backing 2367 * If the page will be shareable, see if the backing
@@ -2372,11 +2371,23 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2372 if (vma->vm_ops->page_mkwrite) { 2371 if (vma->vm_ops->page_mkwrite) {
2373 unlock_page(page); 2372 unlock_page(page);
2374 if (vma->vm_ops->page_mkwrite(vma, page) < 0) { 2373 if (vma->vm_ops->page_mkwrite(vma, page) < 0) {
2375 fdata.type = VM_FAULT_SIGBUS; 2374 ret = VM_FAULT_SIGBUS;
2376 anon = 1; /* no anon but release faulted_page */ 2375 anon = 1; /* no anon but release vmf.page */
2377 goto out_unlocked; 2376 goto out_unlocked;
2378 } 2377 }
2379 lock_page(page); 2378 lock_page(page);
2379 /*
2380 * XXX: this is not quite right (racy vs
2381 * invalidate) to unlock and relock the page
2382 * like this, however a better fix requires
2383 * reworking page_mkwrite locking API, which
2384 * is better done later.
2385 */
2386 if (!page->mapping) {
2387 ret = VM_FAULT_MINOR;
2388 anon = 1; /* no anon but release vmf.page */
2389 goto out;
2390 }
2380 } 2391 }
2381 } 2392 }
2382 2393
@@ -2427,16 +2438,16 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2427 pte_unmap_unlock(page_table, ptl); 2438 pte_unmap_unlock(page_table, ptl);
2428 2439
2429out: 2440out:
2430 unlock_page(faulted_page); 2441 unlock_page(vmf.page);
2431out_unlocked: 2442out_unlocked:
2432 if (anon) 2443 if (anon)
2433 page_cache_release(faulted_page); 2444 page_cache_release(vmf.page);
2434 else if (dirty_page) { 2445 else if (dirty_page) {
2435 set_page_dirty_balance(dirty_page); 2446 set_page_dirty_balance(dirty_page);
2436 put_page(dirty_page); 2447 put_page(dirty_page);
2437 } 2448 }
2438 2449
2439 return fdata.type; 2450 return (ret & VM_FAULT_MASK);
2440} 2451}
2441 2452
2442static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma, 2453static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
@@ -2447,18 +2458,10 @@ static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2447 - vma->vm_start) >> PAGE_CACHE_SHIFT) + vma->vm_pgoff; 2458 - vma->vm_start) >> PAGE_CACHE_SHIFT) + vma->vm_pgoff;
2448 unsigned int flags = (write_access ? FAULT_FLAG_WRITE : 0); 2459 unsigned int flags = (write_access ? FAULT_FLAG_WRITE : 0);
2449 2460
2450 return __do_fault(mm, vma, address, page_table, pmd, pgoff, flags, orig_pte); 2461 return __do_fault(mm, vma, address, page_table, pmd, pgoff,
2462 flags, orig_pte);
2451} 2463}
2452 2464
2453static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2454 unsigned long address, pte_t *page_table, pmd_t *pmd,
2455 int write_access, pgoff_t pgoff, pte_t orig_pte)
2456{
2457 unsigned int flags = FAULT_FLAG_NONLINEAR |
2458 (write_access ? FAULT_FLAG_WRITE : 0);
2459
2460 return __do_fault(mm, vma, address, page_table, pmd, pgoff, flags, orig_pte);
2461}
2462 2465
2463/* 2466/*
2464 * do_no_pfn() tries to create a new page mapping for a page without 2467 * do_no_pfn() tries to create a new page mapping for a page without
@@ -2519,17 +2522,19 @@ static noinline int do_no_pfn(struct mm_struct *mm, struct vm_area_struct *vma,
2519 * but allow concurrent faults), and pte mapped but not yet locked. 2522 * but allow concurrent faults), and pte mapped but not yet locked.
2520 * We return with mmap_sem still held, but pte unmapped and unlocked. 2523 * We return with mmap_sem still held, but pte unmapped and unlocked.
2521 */ 2524 */
2522static int do_file_page(struct mm_struct *mm, struct vm_area_struct *vma, 2525static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2523 unsigned long address, pte_t *page_table, pmd_t *pmd, 2526 unsigned long address, pte_t *page_table, pmd_t *pmd,
2524 int write_access, pte_t orig_pte) 2527 int write_access, pte_t orig_pte)
2525{ 2528{
2529 unsigned int flags = FAULT_FLAG_NONLINEAR |
2530 (write_access ? FAULT_FLAG_WRITE : 0);
2526 pgoff_t pgoff; 2531 pgoff_t pgoff;
2527 int err;
2528 2532
2529 if (!pte_unmap_same(mm, pmd, page_table, orig_pte)) 2533 if (!pte_unmap_same(mm, pmd, page_table, orig_pte))
2530 return VM_FAULT_MINOR; 2534 return VM_FAULT_MINOR;
2531 2535
2532 if (unlikely(!(vma->vm_flags & VM_NONLINEAR))) { 2536 if (unlikely(!(vma->vm_flags & VM_NONLINEAR) ||
2537 !(vma->vm_flags & VM_CAN_NONLINEAR))) {
2533 /* 2538 /*
2534 * Page table corrupted: show pte and kill process. 2539 * Page table corrupted: show pte and kill process.
2535 */ 2540 */
@@ -2539,18 +2544,8 @@ static int do_file_page(struct mm_struct *mm, struct vm_area_struct *vma,
2539 2544
2540 pgoff = pte_to_pgoff(orig_pte); 2545 pgoff = pte_to_pgoff(orig_pte);
2541 2546
2542 if (vma->vm_ops && vma->vm_ops->fault) 2547 return __do_fault(mm, vma, address, page_table, pmd, pgoff,
2543 return do_nonlinear_fault(mm, vma, address, page_table, pmd, 2548 flags, orig_pte);
2544 write_access, pgoff, orig_pte);
2545
2546 /* We can then assume vm->vm_ops && vma->vm_ops->populate */
2547 err = vma->vm_ops->populate(vma, address & PAGE_MASK, PAGE_SIZE,
2548 vma->vm_page_prot, pgoff, 0);
2549 if (err == -ENOMEM)
2550 return VM_FAULT_OOM;
2551 if (err)
2552 return VM_FAULT_SIGBUS;
2553 return VM_FAULT_MAJOR;
2554} 2549}
2555 2550
2556/* 2551/*
@@ -2588,7 +2583,7 @@ static inline int handle_pte_fault(struct mm_struct *mm,
2588 pte, pmd, write_access); 2583 pte, pmd, write_access);
2589 } 2584 }
2590 if (pte_file(entry)) 2585 if (pte_file(entry))
2591 return do_file_page(mm, vma, address, 2586 return do_nonlinear_fault(mm, vma, address,
2592 pte, pmd, write_access, entry); 2587 pte, pmd, write_access, entry);
2593 return do_swap_page(mm, vma, address, 2588 return do_swap_page(mm, vma, address,
2594 pte, pmd, write_access, entry); 2589 pte, pmd, write_access, entry);
diff --git a/mm/nommu.c b/mm/nommu.c
index aee0e1b0ebe7..1b105d28949f 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1341,10 +1341,10 @@ int in_gate_area_no_task(unsigned long addr)
1341 return 0; 1341 return 0;
1342} 1342}
1343 1343
1344struct page *filemap_fault(struct vm_area_struct *vma, struct fault_data *fdata) 1344int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1345{ 1345{
1346 BUG(); 1346 BUG();
1347 return NULL; 1347 return 0;
1348} 1348}
1349 1349
1350/* 1350/*
diff --git a/mm/shmem.c b/mm/shmem.c
index 6b44440f1b24..0a555af8733d 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1309,29 +1309,21 @@ failed:
1309 return error; 1309 return error;
1310} 1310}
1311 1311
1312static struct page *shmem_fault(struct vm_area_struct *vma, 1312static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1313 struct fault_data *fdata)
1314{ 1313{
1315 struct inode *inode = vma->vm_file->f_path.dentry->d_inode; 1314 struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
1316 struct page *page = NULL;
1317 int error; 1315 int error;
1316 int ret;
1318 1317
1319 BUG_ON(!(vma->vm_flags & VM_CAN_INVALIDATE)); 1318 if (((loff_t)vmf->pgoff << PAGE_CACHE_SHIFT) >= i_size_read(inode))
1319 return VM_FAULT_SIGBUS;
1320 1320
1321 if (((loff_t)fdata->pgoff << PAGE_CACHE_SHIFT) >= i_size_read(inode)) { 1321 error = shmem_getpage(inode, vmf->pgoff, &vmf->page, SGP_FAULT, &ret);
1322 fdata->type = VM_FAULT_SIGBUS; 1322 if (error)
1323 return NULL; 1323 return ((error == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS);
1324 }
1325
1326 error = shmem_getpage(inode, fdata->pgoff, &page,
1327 SGP_FAULT, &fdata->type);
1328 if (error) {
1329 fdata->type = ((error == -ENOMEM)?VM_FAULT_OOM:VM_FAULT_SIGBUS);
1330 return NULL;
1331 }
1332 1324
1333 mark_page_accessed(page); 1325 mark_page_accessed(vmf->page);
1334 return page; 1326 return ret | FAULT_RET_LOCKED;
1335} 1327}
1336 1328
1337#ifdef CONFIG_NUMA 1329#ifdef CONFIG_NUMA
@@ -1378,7 +1370,7 @@ static int shmem_mmap(struct file *file, struct vm_area_struct *vma)
1378{ 1370{
1379 file_accessed(file); 1371 file_accessed(file);
1380 vma->vm_ops = &shmem_vm_ops; 1372 vma->vm_ops = &shmem_vm_ops;
1381 vma->vm_flags |= VM_CAN_INVALIDATE | VM_CAN_NONLINEAR; 1373 vma->vm_flags |= VM_CAN_NONLINEAR;
1382 return 0; 1374 return 0;
1383} 1375}
1384 1376
@@ -2560,6 +2552,5 @@ int shmem_zero_setup(struct vm_area_struct *vma)
2560 fput(vma->vm_file); 2552 fput(vma->vm_file);
2561 vma->vm_file = file; 2553 vma->vm_file = file;
2562 vma->vm_ops = &shmem_vm_ops; 2554 vma->vm_ops = &shmem_vm_ops;
2563 vma->vm_flags |= VM_CAN_INVALIDATE;
2564 return 0; 2555 return 0;
2565} 2556}