aboutsummaryrefslogtreecommitdiffstats
path: root/mm/filemap.c
diff options
context:
space:
mode:
authorNick Piggin <npiggin@suse.de>2007-07-19 04:46:59 -0400
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-07-19 13:04:41 -0400
commit54cb8821de07f2ffcd28c380ce9b93d5784b40d7 (patch)
tree1de676534963d96af42863b20191bc9f80060dea /mm/filemap.c
parentd00806b183152af6d24f46f0c33f14162ca1262a (diff)
mm: merge populate and nopage into fault (fixes nonlinear)
Nonlinear mappings are (AFAIKS) simply a virtual memory concept that encodes the virtual address -> file offset differently from linear mappings. ->populate is a layering violation because the filesystem/pagecache code should need to know anything about the virtual memory mapping. The hitch here is that the ->nopage handler didn't pass down enough information (ie. pgoff). But it is more logical to pass pgoff rather than have the ->nopage function calculate it itself anyway (because that's a similar layering violation). Having the populate handler install the pte itself is likewise a nasty thing to be doing. This patch introduces a new fault handler that replaces ->nopage and ->populate and (later) ->nopfn. Most of the old mechanism is still in place so there is a lot of duplication and nice cleanups that can be removed if everyone switches over. The rationale for doing this in the first place is that nonlinear mappings are subject to the pagefault vs invalidate/truncate race too, and it seemed stupid to duplicate the synchronisation logic rather than just consolidate the two. After this patch, MAP_NONBLOCK no longer sets up ptes for pages present in pagecache. Seems like a fringe functionality anyway. NOPAGE_REFAULT is removed. This should be implemented with ->fault, and no users have hit mainline yet. [akpm@linux-foundation.org: cleanup] [randy.dunlap@oracle.com: doc. fixes for readahead] [akpm@linux-foundation.org: build fix] Signed-off-by: Nick Piggin <npiggin@suse.de> Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com> Cc: Mark Fasheh <mark.fasheh@oracle.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/filemap.c')
-rw-r--r--mm/filemap.c94
1 files changed, 58 insertions, 36 deletions
diff --git a/mm/filemap.c b/mm/filemap.c
index 462cda58a18e..26b992d169e5 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1301,40 +1301,38 @@ static int fastcall page_cache_read(struct file * file, unsigned long offset)
1301#define MMAP_LOTSAMISS (100) 1301#define MMAP_LOTSAMISS (100)
1302 1302
1303/** 1303/**
1304 * filemap_nopage - read in file data for page fault handling 1304 * filemap_fault - read in file data for page fault handling
1305 * @area: the applicable vm_area 1305 * @vma: user vma (not used)
1306 * @address: target address to read in 1306 * @fdata: the applicable fault_data
1307 * @type: returned with VM_FAULT_{MINOR,MAJOR} if not %NULL
1308 * 1307 *
1309 * filemap_nopage() is invoked via the vma operations vector for a 1308 * filemap_fault() is invoked via the vma operations vector for a
1310 * mapped memory region to read in file data during a page fault. 1309 * mapped memory region to read in file data during a page fault.
1311 * 1310 *
1312 * The goto's are kind of ugly, but this streamlines the normal case of having 1311 * The goto's are kind of ugly, but this streamlines the normal case of having
1313 * it in the page cache, and handles the special cases reasonably without 1312 * it in the page cache, and handles the special cases reasonably without
1314 * having a lot of duplicated code. 1313 * having a lot of duplicated code.
1315 */ 1314 */
1316struct page *filemap_nopage(struct vm_area_struct *area, 1315struct page *filemap_fault(struct vm_area_struct *vma, struct fault_data *fdata)
1317 unsigned long address, int *type)
1318{ 1316{
1319 int error; 1317 int error;
1320 struct file *file = area->vm_file; 1318 struct file *file = vma->vm_file;
1321 struct address_space *mapping = file->f_mapping; 1319 struct address_space *mapping = file->f_mapping;
1322 struct file_ra_state *ra = &file->f_ra; 1320 struct file_ra_state *ra = &file->f_ra;
1323 struct inode *inode = mapping->host; 1321 struct inode *inode = mapping->host;
1324 struct page *page; 1322 struct page *page;
1325 unsigned long size, pgoff; 1323 unsigned long size;
1326 int did_readaround = 0, majmin = VM_FAULT_MINOR; 1324 int did_readaround = 0;
1327 1325
1328 BUG_ON(!(area->vm_flags & VM_CAN_INVALIDATE)); 1326 fdata->type = VM_FAULT_MINOR;
1329 1327
1330 pgoff = ((address-area->vm_start) >> PAGE_CACHE_SHIFT) + area->vm_pgoff; 1328 BUG_ON(!(vma->vm_flags & VM_CAN_INVALIDATE));
1331 1329
1332 size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 1330 size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
1333 if (pgoff >= size) 1331 if (fdata->pgoff >= size)
1334 goto outside_data_content; 1332 goto outside_data_content;
1335 1333
1336 /* If we don't want any read-ahead, don't bother */ 1334 /* If we don't want any read-ahead, don't bother */
1337 if (VM_RandomReadHint(area)) 1335 if (VM_RandomReadHint(vma))
1338 goto no_cached_page; 1336 goto no_cached_page;
1339 1337
1340 /* 1338 /*
@@ -1343,19 +1341,19 @@ struct page *filemap_nopage(struct vm_area_struct *area,
1343 * 1341 *
1344 * For sequential accesses, we use the generic readahead logic. 1342 * For sequential accesses, we use the generic readahead logic.
1345 */ 1343 */
1346 if (VM_SequentialReadHint(area)) 1344 if (VM_SequentialReadHint(vma))
1347 page_cache_readahead(mapping, ra, file, pgoff, 1); 1345 page_cache_readahead(mapping, ra, file, fdata->pgoff, 1);
1348 1346
1349 /* 1347 /*
1350 * Do we have something in the page cache already? 1348 * Do we have something in the page cache already?
1351 */ 1349 */
1352retry_find: 1350retry_find:
1353 page = find_lock_page(mapping, pgoff); 1351 page = find_lock_page(mapping, fdata->pgoff);
1354 if (!page) { 1352 if (!page) {
1355 unsigned long ra_pages; 1353 unsigned long ra_pages;
1356 1354
1357 if (VM_SequentialReadHint(area)) { 1355 if (VM_SequentialReadHint(vma)) {
1358 handle_ra_miss(mapping, ra, pgoff); 1356 handle_ra_miss(mapping, ra, fdata->pgoff);
1359 goto no_cached_page; 1357 goto no_cached_page;
1360 } 1358 }
1361 ra->mmap_miss++; 1359 ra->mmap_miss++;
@@ -1372,7 +1370,7 @@ retry_find:
1372 * check did_readaround, as this is an inner loop. 1370 * check did_readaround, as this is an inner loop.
1373 */ 1371 */
1374 if (!did_readaround) { 1372 if (!did_readaround) {
1375 majmin = VM_FAULT_MAJOR; 1373 fdata->type = VM_FAULT_MAJOR;
1376 count_vm_event(PGMAJFAULT); 1374 count_vm_event(PGMAJFAULT);
1377 } 1375 }
1378 did_readaround = 1; 1376 did_readaround = 1;
@@ -1380,11 +1378,11 @@ retry_find:
1380 if (ra_pages) { 1378 if (ra_pages) {
1381 pgoff_t start = 0; 1379 pgoff_t start = 0;
1382 1380
1383 if (pgoff > ra_pages / 2) 1381 if (fdata->pgoff > ra_pages / 2)
1384 start = pgoff - ra_pages / 2; 1382 start = fdata->pgoff - ra_pages / 2;
1385 do_page_cache_readahead(mapping, file, start, ra_pages); 1383 do_page_cache_readahead(mapping, file, start, ra_pages);
1386 } 1384 }
1387 page = find_lock_page(mapping, pgoff); 1385 page = find_lock_page(mapping, fdata->pgoff);
1388 if (!page) 1386 if (!page)
1389 goto no_cached_page; 1387 goto no_cached_page;
1390 } 1388 }
@@ -1401,7 +1399,7 @@ retry_find:
1401 1399
1402 /* Must recheck i_size under page lock */ 1400 /* Must recheck i_size under page lock */
1403 size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 1401 size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
1404 if (unlikely(pgoff >= size)) { 1402 if (unlikely(fdata->pgoff >= size)) {
1405 unlock_page(page); 1403 unlock_page(page);
1406 goto outside_data_content; 1404 goto outside_data_content;
1407 } 1405 }
@@ -1410,8 +1408,6 @@ retry_find:
1410 * Found the page and have a reference on it. 1408 * Found the page and have a reference on it.
1411 */ 1409 */
1412 mark_page_accessed(page); 1410 mark_page_accessed(page);
1413 if (type)
1414 *type = majmin;
1415 return page; 1411 return page;
1416 1412
1417outside_data_content: 1413outside_data_content:
@@ -1419,15 +1415,17 @@ outside_data_content:
1419 * An external ptracer can access pages that normally aren't 1415 * An external ptracer can access pages that normally aren't
1420 * accessible.. 1416 * accessible..
1421 */ 1417 */
1422 if (area->vm_mm == current->mm) 1418 if (vma->vm_mm == current->mm) {
1423 return NOPAGE_SIGBUS; 1419 fdata->type = VM_FAULT_SIGBUS;
1420 return NULL;
1421 }
1424 /* Fall through to the non-read-ahead case */ 1422 /* Fall through to the non-read-ahead case */
1425no_cached_page: 1423no_cached_page:
1426 /* 1424 /*
1427 * We're only likely to ever get here if MADV_RANDOM is in 1425 * We're only likely to ever get here if MADV_RANDOM is in
1428 * effect. 1426 * effect.
1429 */ 1427 */
1430 error = page_cache_read(file, pgoff); 1428 error = page_cache_read(file, fdata->pgoff);
1431 1429
1432 /* 1430 /*
1433 * The page we want has now been added to the page cache. 1431 * The page we want has now been added to the page cache.
@@ -1443,13 +1441,15 @@ no_cached_page:
1443 * to schedule I/O. 1441 * to schedule I/O.
1444 */ 1442 */
1445 if (error == -ENOMEM) 1443 if (error == -ENOMEM)
1446 return NOPAGE_OOM; 1444 fdata->type = VM_FAULT_OOM;
1447 return NOPAGE_SIGBUS; 1445 else
1446 fdata->type = VM_FAULT_SIGBUS;
1447 return NULL;
1448 1448
1449page_not_uptodate: 1449page_not_uptodate:
1450 /* IO error path */ 1450 /* IO error path */
1451 if (!did_readaround) { 1451 if (!did_readaround) {
1452 majmin = VM_FAULT_MAJOR; 1452 fdata->type = VM_FAULT_MAJOR;
1453 count_vm_event(PGMAJFAULT); 1453 count_vm_event(PGMAJFAULT);
1454 } 1454 }
1455 1455
@@ -1468,7 +1468,30 @@ page_not_uptodate:
1468 1468
1469 /* Things didn't work out. Return zero to tell the mm layer so. */ 1469 /* Things didn't work out. Return zero to tell the mm layer so. */
1470 shrink_readahead_size_eio(file, ra); 1470 shrink_readahead_size_eio(file, ra);
1471 return NOPAGE_SIGBUS; 1471 fdata->type = VM_FAULT_SIGBUS;
1472 return NULL;
1473}
1474EXPORT_SYMBOL(filemap_fault);
1475
1476/*
1477 * filemap_nopage and filemap_populate are legacy exports that are not used
1478 * in tree. Scheduled for removal.
1479 */
1480struct page *filemap_nopage(struct vm_area_struct *area,
1481 unsigned long address, int *type)
1482{
1483 struct page *page;
1484 struct fault_data fdata;
1485 fdata.address = address;
1486 fdata.pgoff = ((address - area->vm_start) >> PAGE_CACHE_SHIFT)
1487 + area->vm_pgoff;
1488 fdata.flags = 0;
1489
1490 page = filemap_fault(area, &fdata);
1491 if (type)
1492 *type = fdata.type;
1493
1494 return page;
1472} 1495}
1473EXPORT_SYMBOL(filemap_nopage); 1496EXPORT_SYMBOL(filemap_nopage);
1474 1497
@@ -1646,8 +1669,7 @@ repeat:
1646EXPORT_SYMBOL(filemap_populate); 1669EXPORT_SYMBOL(filemap_populate);
1647 1670
1648struct vm_operations_struct generic_file_vm_ops = { 1671struct vm_operations_struct generic_file_vm_ops = {
1649 .nopage = filemap_nopage, 1672 .fault = filemap_fault,
1650 .populate = filemap_populate,
1651}; 1673};
1652 1674
1653/* This is used for a general mmap of a disk file */ 1675/* This is used for a general mmap of a disk file */
@@ -1660,7 +1682,7 @@ int generic_file_mmap(struct file * file, struct vm_area_struct * vma)
1660 return -ENOEXEC; 1682 return -ENOEXEC;
1661 file_accessed(file); 1683 file_accessed(file);
1662 vma->vm_ops = &generic_file_vm_ops; 1684 vma->vm_ops = &generic_file_vm_ops;
1663 vma->vm_flags |= VM_CAN_INVALIDATE; 1685 vma->vm_flags |= VM_CAN_INVALIDATE | VM_CAN_NONLINEAR;
1664 return 0; 1686 return 0;
1665} 1687}
1666 1688