diff options
author | Nick Piggin <npiggin@suse.de> | 2007-07-19 04:46:59 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-07-19 13:04:41 -0400 |
commit | 54cb8821de07f2ffcd28c380ce9b93d5784b40d7 (patch) | |
tree | 1de676534963d96af42863b20191bc9f80060dea /mm/filemap.c | |
parent | d00806b183152af6d24f46f0c33f14162ca1262a (diff) |
mm: merge populate and nopage into fault (fixes nonlinear)
Nonlinear mappings are (AFAIKS) simply a virtual memory concept that encodes
the virtual address -> file offset differently from linear mappings.
->populate is a layering violation because the filesystem/pagecache code
should need to know anything about the virtual memory mapping. The hitch here
is that the ->nopage handler didn't pass down enough information (ie. pgoff).
But it is more logical to pass pgoff rather than have the ->nopage function
calculate it itself anyway (because that's a similar layering violation).
Having the populate handler install the pte itself is likewise a nasty thing
to be doing.
This patch introduces a new fault handler that replaces ->nopage and
->populate and (later) ->nopfn. Most of the old mechanism is still in place
so there is a lot of duplication and nice cleanups that can be removed if
everyone switches over.
The rationale for doing this in the first place is that nonlinear mappings are
subject to the pagefault vs invalidate/truncate race too, and it seemed stupid
to duplicate the synchronisation logic rather than just consolidate the two.
After this patch, MAP_NONBLOCK no longer sets up ptes for pages present in
pagecache. Seems like a fringe functionality anyway.
NOPAGE_REFAULT is removed. This should be implemented with ->fault, and no
users have hit mainline yet.
[akpm@linux-foundation.org: cleanup]
[randy.dunlap@oracle.com: doc. fixes for readahead]
[akpm@linux-foundation.org: build fix]
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Cc: Mark Fasheh <mark.fasheh@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/filemap.c')
-rw-r--r-- | mm/filemap.c | 94 |
1 files changed, 58 insertions, 36 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index 462cda58a18e..26b992d169e5 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -1301,40 +1301,38 @@ static int fastcall page_cache_read(struct file * file, unsigned long offset) | |||
1301 | #define MMAP_LOTSAMISS (100) | 1301 | #define MMAP_LOTSAMISS (100) |
1302 | 1302 | ||
1303 | /** | 1303 | /** |
1304 | * filemap_nopage - read in file data for page fault handling | 1304 | * filemap_fault - read in file data for page fault handling |
1305 | * @area: the applicable vm_area | 1305 | * @vma: user vma (not used) |
1306 | * @address: target address to read in | 1306 | * @fdata: the applicable fault_data |
1307 | * @type: returned with VM_FAULT_{MINOR,MAJOR} if not %NULL | ||
1308 | * | 1307 | * |
1309 | * filemap_nopage() is invoked via the vma operations vector for a | 1308 | * filemap_fault() is invoked via the vma operations vector for a |
1310 | * mapped memory region to read in file data during a page fault. | 1309 | * mapped memory region to read in file data during a page fault. |
1311 | * | 1310 | * |
1312 | * The goto's are kind of ugly, but this streamlines the normal case of having | 1311 | * The goto's are kind of ugly, but this streamlines the normal case of having |
1313 | * it in the page cache, and handles the special cases reasonably without | 1312 | * it in the page cache, and handles the special cases reasonably without |
1314 | * having a lot of duplicated code. | 1313 | * having a lot of duplicated code. |
1315 | */ | 1314 | */ |
1316 | struct page *filemap_nopage(struct vm_area_struct *area, | 1315 | struct page *filemap_fault(struct vm_area_struct *vma, struct fault_data *fdata) |
1317 | unsigned long address, int *type) | ||
1318 | { | 1316 | { |
1319 | int error; | 1317 | int error; |
1320 | struct file *file = area->vm_file; | 1318 | struct file *file = vma->vm_file; |
1321 | struct address_space *mapping = file->f_mapping; | 1319 | struct address_space *mapping = file->f_mapping; |
1322 | struct file_ra_state *ra = &file->f_ra; | 1320 | struct file_ra_state *ra = &file->f_ra; |
1323 | struct inode *inode = mapping->host; | 1321 | struct inode *inode = mapping->host; |
1324 | struct page *page; | 1322 | struct page *page; |
1325 | unsigned long size, pgoff; | 1323 | unsigned long size; |
1326 | int did_readaround = 0, majmin = VM_FAULT_MINOR; | 1324 | int did_readaround = 0; |
1327 | 1325 | ||
1328 | BUG_ON(!(area->vm_flags & VM_CAN_INVALIDATE)); | 1326 | fdata->type = VM_FAULT_MINOR; |
1329 | 1327 | ||
1330 | pgoff = ((address-area->vm_start) >> PAGE_CACHE_SHIFT) + area->vm_pgoff; | 1328 | BUG_ON(!(vma->vm_flags & VM_CAN_INVALIDATE)); |
1331 | 1329 | ||
1332 | size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 1330 | size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; |
1333 | if (pgoff >= size) | 1331 | if (fdata->pgoff >= size) |
1334 | goto outside_data_content; | 1332 | goto outside_data_content; |
1335 | 1333 | ||
1336 | /* If we don't want any read-ahead, don't bother */ | 1334 | /* If we don't want any read-ahead, don't bother */ |
1337 | if (VM_RandomReadHint(area)) | 1335 | if (VM_RandomReadHint(vma)) |
1338 | goto no_cached_page; | 1336 | goto no_cached_page; |
1339 | 1337 | ||
1340 | /* | 1338 | /* |
@@ -1343,19 +1341,19 @@ struct page *filemap_nopage(struct vm_area_struct *area, | |||
1343 | * | 1341 | * |
1344 | * For sequential accesses, we use the generic readahead logic. | 1342 | * For sequential accesses, we use the generic readahead logic. |
1345 | */ | 1343 | */ |
1346 | if (VM_SequentialReadHint(area)) | 1344 | if (VM_SequentialReadHint(vma)) |
1347 | page_cache_readahead(mapping, ra, file, pgoff, 1); | 1345 | page_cache_readahead(mapping, ra, file, fdata->pgoff, 1); |
1348 | 1346 | ||
1349 | /* | 1347 | /* |
1350 | * Do we have something in the page cache already? | 1348 | * Do we have something in the page cache already? |
1351 | */ | 1349 | */ |
1352 | retry_find: | 1350 | retry_find: |
1353 | page = find_lock_page(mapping, pgoff); | 1351 | page = find_lock_page(mapping, fdata->pgoff); |
1354 | if (!page) { | 1352 | if (!page) { |
1355 | unsigned long ra_pages; | 1353 | unsigned long ra_pages; |
1356 | 1354 | ||
1357 | if (VM_SequentialReadHint(area)) { | 1355 | if (VM_SequentialReadHint(vma)) { |
1358 | handle_ra_miss(mapping, ra, pgoff); | 1356 | handle_ra_miss(mapping, ra, fdata->pgoff); |
1359 | goto no_cached_page; | 1357 | goto no_cached_page; |
1360 | } | 1358 | } |
1361 | ra->mmap_miss++; | 1359 | ra->mmap_miss++; |
@@ -1372,7 +1370,7 @@ retry_find: | |||
1372 | * check did_readaround, as this is an inner loop. | 1370 | * check did_readaround, as this is an inner loop. |
1373 | */ | 1371 | */ |
1374 | if (!did_readaround) { | 1372 | if (!did_readaround) { |
1375 | majmin = VM_FAULT_MAJOR; | 1373 | fdata->type = VM_FAULT_MAJOR; |
1376 | count_vm_event(PGMAJFAULT); | 1374 | count_vm_event(PGMAJFAULT); |
1377 | } | 1375 | } |
1378 | did_readaround = 1; | 1376 | did_readaround = 1; |
@@ -1380,11 +1378,11 @@ retry_find: | |||
1380 | if (ra_pages) { | 1378 | if (ra_pages) { |
1381 | pgoff_t start = 0; | 1379 | pgoff_t start = 0; |
1382 | 1380 | ||
1383 | if (pgoff > ra_pages / 2) | 1381 | if (fdata->pgoff > ra_pages / 2) |
1384 | start = pgoff - ra_pages / 2; | 1382 | start = fdata->pgoff - ra_pages / 2; |
1385 | do_page_cache_readahead(mapping, file, start, ra_pages); | 1383 | do_page_cache_readahead(mapping, file, start, ra_pages); |
1386 | } | 1384 | } |
1387 | page = find_lock_page(mapping, pgoff); | 1385 | page = find_lock_page(mapping, fdata->pgoff); |
1388 | if (!page) | 1386 | if (!page) |
1389 | goto no_cached_page; | 1387 | goto no_cached_page; |
1390 | } | 1388 | } |
@@ -1401,7 +1399,7 @@ retry_find: | |||
1401 | 1399 | ||
1402 | /* Must recheck i_size under page lock */ | 1400 | /* Must recheck i_size under page lock */ |
1403 | size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 1401 | size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; |
1404 | if (unlikely(pgoff >= size)) { | 1402 | if (unlikely(fdata->pgoff >= size)) { |
1405 | unlock_page(page); | 1403 | unlock_page(page); |
1406 | goto outside_data_content; | 1404 | goto outside_data_content; |
1407 | } | 1405 | } |
@@ -1410,8 +1408,6 @@ retry_find: | |||
1410 | * Found the page and have a reference on it. | 1408 | * Found the page and have a reference on it. |
1411 | */ | 1409 | */ |
1412 | mark_page_accessed(page); | 1410 | mark_page_accessed(page); |
1413 | if (type) | ||
1414 | *type = majmin; | ||
1415 | return page; | 1411 | return page; |
1416 | 1412 | ||
1417 | outside_data_content: | 1413 | outside_data_content: |
@@ -1419,15 +1415,17 @@ outside_data_content: | |||
1419 | * An external ptracer can access pages that normally aren't | 1415 | * An external ptracer can access pages that normally aren't |
1420 | * accessible.. | 1416 | * accessible.. |
1421 | */ | 1417 | */ |
1422 | if (area->vm_mm == current->mm) | 1418 | if (vma->vm_mm == current->mm) { |
1423 | return NOPAGE_SIGBUS; | 1419 | fdata->type = VM_FAULT_SIGBUS; |
1420 | return NULL; | ||
1421 | } | ||
1424 | /* Fall through to the non-read-ahead case */ | 1422 | /* Fall through to the non-read-ahead case */ |
1425 | no_cached_page: | 1423 | no_cached_page: |
1426 | /* | 1424 | /* |
1427 | * We're only likely to ever get here if MADV_RANDOM is in | 1425 | * We're only likely to ever get here if MADV_RANDOM is in |
1428 | * effect. | 1426 | * effect. |
1429 | */ | 1427 | */ |
1430 | error = page_cache_read(file, pgoff); | 1428 | error = page_cache_read(file, fdata->pgoff); |
1431 | 1429 | ||
1432 | /* | 1430 | /* |
1433 | * The page we want has now been added to the page cache. | 1431 | * The page we want has now been added to the page cache. |
@@ -1443,13 +1441,15 @@ no_cached_page: | |||
1443 | * to schedule I/O. | 1441 | * to schedule I/O. |
1444 | */ | 1442 | */ |
1445 | if (error == -ENOMEM) | 1443 | if (error == -ENOMEM) |
1446 | return NOPAGE_OOM; | 1444 | fdata->type = VM_FAULT_OOM; |
1447 | return NOPAGE_SIGBUS; | 1445 | else |
1446 | fdata->type = VM_FAULT_SIGBUS; | ||
1447 | return NULL; | ||
1448 | 1448 | ||
1449 | page_not_uptodate: | 1449 | page_not_uptodate: |
1450 | /* IO error path */ | 1450 | /* IO error path */ |
1451 | if (!did_readaround) { | 1451 | if (!did_readaround) { |
1452 | majmin = VM_FAULT_MAJOR; | 1452 | fdata->type = VM_FAULT_MAJOR; |
1453 | count_vm_event(PGMAJFAULT); | 1453 | count_vm_event(PGMAJFAULT); |
1454 | } | 1454 | } |
1455 | 1455 | ||
@@ -1468,7 +1468,30 @@ page_not_uptodate: | |||
1468 | 1468 | ||
1469 | /* Things didn't work out. Return zero to tell the mm layer so. */ | 1469 | /* Things didn't work out. Return zero to tell the mm layer so. */ |
1470 | shrink_readahead_size_eio(file, ra); | 1470 | shrink_readahead_size_eio(file, ra); |
1471 | return NOPAGE_SIGBUS; | 1471 | fdata->type = VM_FAULT_SIGBUS; |
1472 | return NULL; | ||
1473 | } | ||
1474 | EXPORT_SYMBOL(filemap_fault); | ||
1475 | |||
1476 | /* | ||
1477 | * filemap_nopage and filemap_populate are legacy exports that are not used | ||
1478 | * in tree. Scheduled for removal. | ||
1479 | */ | ||
1480 | struct page *filemap_nopage(struct vm_area_struct *area, | ||
1481 | unsigned long address, int *type) | ||
1482 | { | ||
1483 | struct page *page; | ||
1484 | struct fault_data fdata; | ||
1485 | fdata.address = address; | ||
1486 | fdata.pgoff = ((address - area->vm_start) >> PAGE_CACHE_SHIFT) | ||
1487 | + area->vm_pgoff; | ||
1488 | fdata.flags = 0; | ||
1489 | |||
1490 | page = filemap_fault(area, &fdata); | ||
1491 | if (type) | ||
1492 | *type = fdata.type; | ||
1493 | |||
1494 | return page; | ||
1472 | } | 1495 | } |
1473 | EXPORT_SYMBOL(filemap_nopage); | 1496 | EXPORT_SYMBOL(filemap_nopage); |
1474 | 1497 | ||
@@ -1646,8 +1669,7 @@ repeat: | |||
1646 | EXPORT_SYMBOL(filemap_populate); | 1669 | EXPORT_SYMBOL(filemap_populate); |
1647 | 1670 | ||
1648 | struct vm_operations_struct generic_file_vm_ops = { | 1671 | struct vm_operations_struct generic_file_vm_ops = { |
1649 | .nopage = filemap_nopage, | 1672 | .fault = filemap_fault, |
1650 | .populate = filemap_populate, | ||
1651 | }; | 1673 | }; |
1652 | 1674 | ||
1653 | /* This is used for a general mmap of a disk file */ | 1675 | /* This is used for a general mmap of a disk file */ |
@@ -1660,7 +1682,7 @@ int generic_file_mmap(struct file * file, struct vm_area_struct * vma) | |||
1660 | return -ENOEXEC; | 1682 | return -ENOEXEC; |
1661 | file_accessed(file); | 1683 | file_accessed(file); |
1662 | vma->vm_ops = &generic_file_vm_ops; | 1684 | vma->vm_ops = &generic_file_vm_ops; |
1663 | vma->vm_flags |= VM_CAN_INVALIDATE; | 1685 | vma->vm_flags |= VM_CAN_INVALIDATE | VM_CAN_NONLINEAR; |
1664 | return 0; | 1686 | return 0; |
1665 | } | 1687 | } |
1666 | 1688 | ||