diff options
Diffstat (limited to 'mm/shmem.c')
-rw-r--r-- | mm/shmem.c | 564 |
1 files changed, 460 insertions, 104 deletions
diff --git a/mm/shmem.c b/mm/shmem.c index f99ff3e50bd6..bd106361be4b 100644 --- a/mm/shmem.c +++ b/mm/shmem.c | |||
@@ -53,6 +53,7 @@ static struct vfsmount *shm_mnt; | |||
53 | #include <linux/blkdev.h> | 53 | #include <linux/blkdev.h> |
54 | #include <linux/pagevec.h> | 54 | #include <linux/pagevec.h> |
55 | #include <linux/percpu_counter.h> | 55 | #include <linux/percpu_counter.h> |
56 | #include <linux/falloc.h> | ||
56 | #include <linux/splice.h> | 57 | #include <linux/splice.h> |
57 | #include <linux/security.h> | 58 | #include <linux/security.h> |
58 | #include <linux/swapops.h> | 59 | #include <linux/swapops.h> |
@@ -83,12 +84,25 @@ struct shmem_xattr { | |||
83 | char value[0]; | 84 | char value[0]; |
84 | }; | 85 | }; |
85 | 86 | ||
87 | /* | ||
88 | * shmem_fallocate and shmem_writepage communicate via inode->i_private | ||
89 | * (with i_mutex making sure that it has only one user at a time): | ||
90 | * we would prefer not to enlarge the shmem inode just for that. | ||
91 | */ | ||
92 | struct shmem_falloc { | ||
93 | pgoff_t start; /* start of range currently being fallocated */ | ||
94 | pgoff_t next; /* the next page offset to be fallocated */ | ||
95 | pgoff_t nr_falloced; /* how many new pages have been fallocated */ | ||
96 | pgoff_t nr_unswapped; /* how often writepage refused to swap out */ | ||
97 | }; | ||
98 | |||
86 | /* Flag allocation requirements to shmem_getpage */ | 99 | /* Flag allocation requirements to shmem_getpage */ |
87 | enum sgp_type { | 100 | enum sgp_type { |
88 | SGP_READ, /* don't exceed i_size, don't allocate page */ | 101 | SGP_READ, /* don't exceed i_size, don't allocate page */ |
89 | SGP_CACHE, /* don't exceed i_size, may allocate page */ | 102 | SGP_CACHE, /* don't exceed i_size, may allocate page */ |
90 | SGP_DIRTY, /* like SGP_CACHE, but set new page dirty */ | 103 | SGP_DIRTY, /* like SGP_CACHE, but set new page dirty */ |
91 | SGP_WRITE, /* may exceed i_size, may allocate page */ | 104 | SGP_WRITE, /* may exceed i_size, may allocate !Uptodate page */ |
105 | SGP_FALLOC, /* like SGP_WRITE, but make existing page Uptodate */ | ||
92 | }; | 106 | }; |
93 | 107 | ||
94 | #ifdef CONFIG_TMPFS | 108 | #ifdef CONFIG_TMPFS |
@@ -103,6 +117,9 @@ static unsigned long shmem_default_max_inodes(void) | |||
103 | } | 117 | } |
104 | #endif | 118 | #endif |
105 | 119 | ||
120 | static bool shmem_should_replace_page(struct page *page, gfp_t gfp); | ||
121 | static int shmem_replace_page(struct page **pagep, gfp_t gfp, | ||
122 | struct shmem_inode_info *info, pgoff_t index); | ||
106 | static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, | 123 | static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, |
107 | struct page **pagep, enum sgp_type sgp, gfp_t gfp, int *fault_type); | 124 | struct page **pagep, enum sgp_type sgp, gfp_t gfp, int *fault_type); |
108 | 125 | ||
@@ -247,46 +264,55 @@ static int shmem_radix_tree_replace(struct address_space *mapping, | |||
247 | } | 264 | } |
248 | 265 | ||
249 | /* | 266 | /* |
267 | * Sometimes, before we decide whether to proceed or to fail, we must check | ||
268 | * that an entry was not already brought back from swap by a racing thread. | ||
269 | * | ||
270 | * Checking page is not enough: by the time a SwapCache page is locked, it | ||
271 | * might be reused, and again be SwapCache, using the same swap as before. | ||
272 | */ | ||
273 | static bool shmem_confirm_swap(struct address_space *mapping, | ||
274 | pgoff_t index, swp_entry_t swap) | ||
275 | { | ||
276 | void *item; | ||
277 | |||
278 | rcu_read_lock(); | ||
279 | item = radix_tree_lookup(&mapping->page_tree, index); | ||
280 | rcu_read_unlock(); | ||
281 | return item == swp_to_radix_entry(swap); | ||
282 | } | ||
283 | |||
284 | /* | ||
250 | * Like add_to_page_cache_locked, but error if expected item has gone. | 285 | * Like add_to_page_cache_locked, but error if expected item has gone. |
251 | */ | 286 | */ |
252 | static int shmem_add_to_page_cache(struct page *page, | 287 | static int shmem_add_to_page_cache(struct page *page, |
253 | struct address_space *mapping, | 288 | struct address_space *mapping, |
254 | pgoff_t index, gfp_t gfp, void *expected) | 289 | pgoff_t index, gfp_t gfp, void *expected) |
255 | { | 290 | { |
256 | int error = 0; | 291 | int error; |
257 | 292 | ||
258 | VM_BUG_ON(!PageLocked(page)); | 293 | VM_BUG_ON(!PageLocked(page)); |
259 | VM_BUG_ON(!PageSwapBacked(page)); | 294 | VM_BUG_ON(!PageSwapBacked(page)); |
260 | 295 | ||
296 | page_cache_get(page); | ||
297 | page->mapping = mapping; | ||
298 | page->index = index; | ||
299 | |||
300 | spin_lock_irq(&mapping->tree_lock); | ||
261 | if (!expected) | 301 | if (!expected) |
262 | error = radix_tree_preload(gfp & GFP_RECLAIM_MASK); | 302 | error = radix_tree_insert(&mapping->page_tree, index, page); |
303 | else | ||
304 | error = shmem_radix_tree_replace(mapping, index, expected, | ||
305 | page); | ||
263 | if (!error) { | 306 | if (!error) { |
264 | page_cache_get(page); | 307 | mapping->nrpages++; |
265 | page->mapping = mapping; | 308 | __inc_zone_page_state(page, NR_FILE_PAGES); |
266 | page->index = index; | 309 | __inc_zone_page_state(page, NR_SHMEM); |
267 | 310 | spin_unlock_irq(&mapping->tree_lock); | |
268 | spin_lock_irq(&mapping->tree_lock); | 311 | } else { |
269 | if (!expected) | 312 | page->mapping = NULL; |
270 | error = radix_tree_insert(&mapping->page_tree, | 313 | spin_unlock_irq(&mapping->tree_lock); |
271 | index, page); | 314 | page_cache_release(page); |
272 | else | ||
273 | error = shmem_radix_tree_replace(mapping, index, | ||
274 | expected, page); | ||
275 | if (!error) { | ||
276 | mapping->nrpages++; | ||
277 | __inc_zone_page_state(page, NR_FILE_PAGES); | ||
278 | __inc_zone_page_state(page, NR_SHMEM); | ||
279 | spin_unlock_irq(&mapping->tree_lock); | ||
280 | } else { | ||
281 | page->mapping = NULL; | ||
282 | spin_unlock_irq(&mapping->tree_lock); | ||
283 | page_cache_release(page); | ||
284 | } | ||
285 | if (!expected) | ||
286 | radix_tree_preload_end(); | ||
287 | } | 315 | } |
288 | if (error) | ||
289 | mem_cgroup_uncharge_cache_page(page); | ||
290 | return error; | 316 | return error; |
291 | } | 317 | } |
292 | 318 | ||
@@ -423,27 +449,31 @@ void shmem_unlock_mapping(struct address_space *mapping) | |||
423 | 449 | ||
424 | /* | 450 | /* |
425 | * Remove range of pages and swap entries from radix tree, and free them. | 451 | * Remove range of pages and swap entries from radix tree, and free them. |
452 | * If !unfalloc, truncate or punch hole; if unfalloc, undo failed fallocate. | ||
426 | */ | 453 | */ |
427 | void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend) | 454 | static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend, |
455 | bool unfalloc) | ||
428 | { | 456 | { |
429 | struct address_space *mapping = inode->i_mapping; | 457 | struct address_space *mapping = inode->i_mapping; |
430 | struct shmem_inode_info *info = SHMEM_I(inode); | 458 | struct shmem_inode_info *info = SHMEM_I(inode); |
431 | pgoff_t start = (lstart + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 459 | pgoff_t start = (lstart + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; |
432 | unsigned partial = lstart & (PAGE_CACHE_SIZE - 1); | 460 | pgoff_t end = (lend + 1) >> PAGE_CACHE_SHIFT; |
433 | pgoff_t end = (lend >> PAGE_CACHE_SHIFT); | 461 | unsigned int partial_start = lstart & (PAGE_CACHE_SIZE - 1); |
462 | unsigned int partial_end = (lend + 1) & (PAGE_CACHE_SIZE - 1); | ||
434 | struct pagevec pvec; | 463 | struct pagevec pvec; |
435 | pgoff_t indices[PAGEVEC_SIZE]; | 464 | pgoff_t indices[PAGEVEC_SIZE]; |
436 | long nr_swaps_freed = 0; | 465 | long nr_swaps_freed = 0; |
437 | pgoff_t index; | 466 | pgoff_t index; |
438 | int i; | 467 | int i; |
439 | 468 | ||
440 | BUG_ON((lend & (PAGE_CACHE_SIZE - 1)) != (PAGE_CACHE_SIZE - 1)); | 469 | if (lend == -1) |
470 | end = -1; /* unsigned, so actually very big */ | ||
441 | 471 | ||
442 | pagevec_init(&pvec, 0); | 472 | pagevec_init(&pvec, 0); |
443 | index = start; | 473 | index = start; |
444 | while (index <= end) { | 474 | while (index < end) { |
445 | pvec.nr = shmem_find_get_pages_and_swap(mapping, index, | 475 | pvec.nr = shmem_find_get_pages_and_swap(mapping, index, |
446 | min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1, | 476 | min(end - index, (pgoff_t)PAGEVEC_SIZE), |
447 | pvec.pages, indices); | 477 | pvec.pages, indices); |
448 | if (!pvec.nr) | 478 | if (!pvec.nr) |
449 | break; | 479 | break; |
@@ -452,10 +482,12 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend) | |||
452 | struct page *page = pvec.pages[i]; | 482 | struct page *page = pvec.pages[i]; |
453 | 483 | ||
454 | index = indices[i]; | 484 | index = indices[i]; |
455 | if (index > end) | 485 | if (index >= end) |
456 | break; | 486 | break; |
457 | 487 | ||
458 | if (radix_tree_exceptional_entry(page)) { | 488 | if (radix_tree_exceptional_entry(page)) { |
489 | if (unfalloc) | ||
490 | continue; | ||
459 | nr_swaps_freed += !shmem_free_swap(mapping, | 491 | nr_swaps_freed += !shmem_free_swap(mapping, |
460 | index, page); | 492 | index, page); |
461 | continue; | 493 | continue; |
@@ -463,9 +495,11 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend) | |||
463 | 495 | ||
464 | if (!trylock_page(page)) | 496 | if (!trylock_page(page)) |
465 | continue; | 497 | continue; |
466 | if (page->mapping == mapping) { | 498 | if (!unfalloc || !PageUptodate(page)) { |
467 | VM_BUG_ON(PageWriteback(page)); | 499 | if (page->mapping == mapping) { |
468 | truncate_inode_page(mapping, page); | 500 | VM_BUG_ON(PageWriteback(page)); |
501 | truncate_inode_page(mapping, page); | ||
502 | } | ||
469 | } | 503 | } |
470 | unlock_page(page); | 504 | unlock_page(page); |
471 | } | 505 | } |
@@ -476,30 +510,47 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend) | |||
476 | index++; | 510 | index++; |
477 | } | 511 | } |
478 | 512 | ||
479 | if (partial) { | 513 | if (partial_start) { |
480 | struct page *page = NULL; | 514 | struct page *page = NULL; |
481 | shmem_getpage(inode, start - 1, &page, SGP_READ, NULL); | 515 | shmem_getpage(inode, start - 1, &page, SGP_READ, NULL); |
482 | if (page) { | 516 | if (page) { |
483 | zero_user_segment(page, partial, PAGE_CACHE_SIZE); | 517 | unsigned int top = PAGE_CACHE_SIZE; |
518 | if (start > end) { | ||
519 | top = partial_end; | ||
520 | partial_end = 0; | ||
521 | } | ||
522 | zero_user_segment(page, partial_start, top); | ||
484 | set_page_dirty(page); | 523 | set_page_dirty(page); |
485 | unlock_page(page); | 524 | unlock_page(page); |
486 | page_cache_release(page); | 525 | page_cache_release(page); |
487 | } | 526 | } |
488 | } | 527 | } |
528 | if (partial_end) { | ||
529 | struct page *page = NULL; | ||
530 | shmem_getpage(inode, end, &page, SGP_READ, NULL); | ||
531 | if (page) { | ||
532 | zero_user_segment(page, 0, partial_end); | ||
533 | set_page_dirty(page); | ||
534 | unlock_page(page); | ||
535 | page_cache_release(page); | ||
536 | } | ||
537 | } | ||
538 | if (start >= end) | ||
539 | return; | ||
489 | 540 | ||
490 | index = start; | 541 | index = start; |
491 | for ( ; ; ) { | 542 | for ( ; ; ) { |
492 | cond_resched(); | 543 | cond_resched(); |
493 | pvec.nr = shmem_find_get_pages_and_swap(mapping, index, | 544 | pvec.nr = shmem_find_get_pages_and_swap(mapping, index, |
494 | min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1, | 545 | min(end - index, (pgoff_t)PAGEVEC_SIZE), |
495 | pvec.pages, indices); | 546 | pvec.pages, indices); |
496 | if (!pvec.nr) { | 547 | if (!pvec.nr) { |
497 | if (index == start) | 548 | if (index == start || unfalloc) |
498 | break; | 549 | break; |
499 | index = start; | 550 | index = start; |
500 | continue; | 551 | continue; |
501 | } | 552 | } |
502 | if (index == start && indices[0] > end) { | 553 | if ((index == start || unfalloc) && indices[0] >= end) { |
503 | shmem_deswap_pagevec(&pvec); | 554 | shmem_deswap_pagevec(&pvec); |
504 | pagevec_release(&pvec); | 555 | pagevec_release(&pvec); |
505 | break; | 556 | break; |
@@ -509,19 +560,23 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend) | |||
509 | struct page *page = pvec.pages[i]; | 560 | struct page *page = pvec.pages[i]; |
510 | 561 | ||
511 | index = indices[i]; | 562 | index = indices[i]; |
512 | if (index > end) | 563 | if (index >= end) |
513 | break; | 564 | break; |
514 | 565 | ||
515 | if (radix_tree_exceptional_entry(page)) { | 566 | if (radix_tree_exceptional_entry(page)) { |
567 | if (unfalloc) | ||
568 | continue; | ||
516 | nr_swaps_freed += !shmem_free_swap(mapping, | 569 | nr_swaps_freed += !shmem_free_swap(mapping, |
517 | index, page); | 570 | index, page); |
518 | continue; | 571 | continue; |
519 | } | 572 | } |
520 | 573 | ||
521 | lock_page(page); | 574 | lock_page(page); |
522 | if (page->mapping == mapping) { | 575 | if (!unfalloc || !PageUptodate(page)) { |
523 | VM_BUG_ON(PageWriteback(page)); | 576 | if (page->mapping == mapping) { |
524 | truncate_inode_page(mapping, page); | 577 | VM_BUG_ON(PageWriteback(page)); |
578 | truncate_inode_page(mapping, page); | ||
579 | } | ||
525 | } | 580 | } |
526 | unlock_page(page); | 581 | unlock_page(page); |
527 | } | 582 | } |
@@ -535,7 +590,11 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend) | |||
535 | info->swapped -= nr_swaps_freed; | 590 | info->swapped -= nr_swaps_freed; |
536 | shmem_recalc_inode(inode); | 591 | shmem_recalc_inode(inode); |
537 | spin_unlock(&info->lock); | 592 | spin_unlock(&info->lock); |
593 | } | ||
538 | 594 | ||
595 | void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend) | ||
596 | { | ||
597 | shmem_undo_range(inode, lstart, lend, false); | ||
539 | inode->i_ctime = inode->i_mtime = CURRENT_TIME; | 598 | inode->i_ctime = inode->i_mtime = CURRENT_TIME; |
540 | } | 599 | } |
541 | EXPORT_SYMBOL_GPL(shmem_truncate_range); | 600 | EXPORT_SYMBOL_GPL(shmem_truncate_range); |
@@ -597,19 +656,20 @@ static void shmem_evict_inode(struct inode *inode) | |||
597 | } | 656 | } |
598 | BUG_ON(inode->i_blocks); | 657 | BUG_ON(inode->i_blocks); |
599 | shmem_free_inode(inode->i_sb); | 658 | shmem_free_inode(inode->i_sb); |
600 | end_writeback(inode); | 659 | clear_inode(inode); |
601 | } | 660 | } |
602 | 661 | ||
603 | /* | 662 | /* |
604 | * If swap found in inode, free it and move page from swapcache to filecache. | 663 | * If swap found in inode, free it and move page from swapcache to filecache. |
605 | */ | 664 | */ |
606 | static int shmem_unuse_inode(struct shmem_inode_info *info, | 665 | static int shmem_unuse_inode(struct shmem_inode_info *info, |
607 | swp_entry_t swap, struct page *page) | 666 | swp_entry_t swap, struct page **pagep) |
608 | { | 667 | { |
609 | struct address_space *mapping = info->vfs_inode.i_mapping; | 668 | struct address_space *mapping = info->vfs_inode.i_mapping; |
610 | void *radswap; | 669 | void *radswap; |
611 | pgoff_t index; | 670 | pgoff_t index; |
612 | int error; | 671 | gfp_t gfp; |
672 | int error = 0; | ||
613 | 673 | ||
614 | radswap = swp_to_radix_entry(swap); | 674 | radswap = swp_to_radix_entry(swap); |
615 | index = radix_tree_locate_item(&mapping->page_tree, radswap); | 675 | index = radix_tree_locate_item(&mapping->page_tree, radswap); |
@@ -625,22 +685,48 @@ static int shmem_unuse_inode(struct shmem_inode_info *info, | |||
625 | if (shmem_swaplist.next != &info->swaplist) | 685 | if (shmem_swaplist.next != &info->swaplist) |
626 | list_move_tail(&shmem_swaplist, &info->swaplist); | 686 | list_move_tail(&shmem_swaplist, &info->swaplist); |
627 | 687 | ||
688 | gfp = mapping_gfp_mask(mapping); | ||
689 | if (shmem_should_replace_page(*pagep, gfp)) { | ||
690 | mutex_unlock(&shmem_swaplist_mutex); | ||
691 | error = shmem_replace_page(pagep, gfp, info, index); | ||
692 | mutex_lock(&shmem_swaplist_mutex); | ||
693 | /* | ||
694 | * We needed to drop mutex to make that restrictive page | ||
695 | * allocation, but the inode might have been freed while we | ||
696 | * dropped it: although a racing shmem_evict_inode() cannot | ||
697 | * complete without emptying the radix_tree, our page lock | ||
698 | * on this swapcache page is not enough to prevent that - | ||
699 | * free_swap_and_cache() of our swap entry will only | ||
700 | * trylock_page(), removing swap from radix_tree whatever. | ||
701 | * | ||
702 | * We must not proceed to shmem_add_to_page_cache() if the | ||
703 | * inode has been freed, but of course we cannot rely on | ||
704 | * inode or mapping or info to check that. However, we can | ||
705 | * safely check if our swap entry is still in use (and here | ||
706 | * it can't have got reused for another page): if it's still | ||
707 | * in use, then the inode cannot have been freed yet, and we | ||
708 | * can safely proceed (if it's no longer in use, that tells | ||
709 | * nothing about the inode, but we don't need to unuse swap). | ||
710 | */ | ||
711 | if (!page_swapcount(*pagep)) | ||
712 | error = -ENOENT; | ||
713 | } | ||
714 | |||
628 | /* | 715 | /* |
629 | * We rely on shmem_swaplist_mutex, not only to protect the swaplist, | 716 | * We rely on shmem_swaplist_mutex, not only to protect the swaplist, |
630 | * but also to hold up shmem_evict_inode(): so inode cannot be freed | 717 | * but also to hold up shmem_evict_inode(): so inode cannot be freed |
631 | * beneath us (pagelock doesn't help until the page is in pagecache). | 718 | * beneath us (pagelock doesn't help until the page is in pagecache). |
632 | */ | 719 | */ |
633 | error = shmem_add_to_page_cache(page, mapping, index, | 720 | if (!error) |
721 | error = shmem_add_to_page_cache(*pagep, mapping, index, | ||
634 | GFP_NOWAIT, radswap); | 722 | GFP_NOWAIT, radswap); |
635 | /* which does mem_cgroup_uncharge_cache_page on error */ | ||
636 | |||
637 | if (error != -ENOMEM) { | 723 | if (error != -ENOMEM) { |
638 | /* | 724 | /* |
639 | * Truncation and eviction use free_swap_and_cache(), which | 725 | * Truncation and eviction use free_swap_and_cache(), which |
640 | * only does trylock page: if we raced, best clean up here. | 726 | * only does trylock page: if we raced, best clean up here. |
641 | */ | 727 | */ |
642 | delete_from_swap_cache(page); | 728 | delete_from_swap_cache(*pagep); |
643 | set_page_dirty(page); | 729 | set_page_dirty(*pagep); |
644 | if (!error) { | 730 | if (!error) { |
645 | spin_lock(&info->lock); | 731 | spin_lock(&info->lock); |
646 | info->swapped--; | 732 | info->swapped--; |
@@ -660,7 +746,14 @@ int shmem_unuse(swp_entry_t swap, struct page *page) | |||
660 | struct list_head *this, *next; | 746 | struct list_head *this, *next; |
661 | struct shmem_inode_info *info; | 747 | struct shmem_inode_info *info; |
662 | int found = 0; | 748 | int found = 0; |
663 | int error; | 749 | int error = 0; |
750 | |||
751 | /* | ||
752 | * There's a faint possibility that swap page was replaced before | ||
753 | * caller locked it: caller will come back later with the right page. | ||
754 | */ | ||
755 | if (unlikely(!PageSwapCache(page) || page_private(page) != swap.val)) | ||
756 | goto out; | ||
664 | 757 | ||
665 | /* | 758 | /* |
666 | * Charge page using GFP_KERNEL while we can wait, before taking | 759 | * Charge page using GFP_KERNEL while we can wait, before taking |
@@ -676,7 +769,7 @@ int shmem_unuse(swp_entry_t swap, struct page *page) | |||
676 | list_for_each_safe(this, next, &shmem_swaplist) { | 769 | list_for_each_safe(this, next, &shmem_swaplist) { |
677 | info = list_entry(this, struct shmem_inode_info, swaplist); | 770 | info = list_entry(this, struct shmem_inode_info, swaplist); |
678 | if (info->swapped) | 771 | if (info->swapped) |
679 | found = shmem_unuse_inode(info, swap, page); | 772 | found = shmem_unuse_inode(info, swap, &page); |
680 | else | 773 | else |
681 | list_del_init(&info->swaplist); | 774 | list_del_init(&info->swaplist); |
682 | cond_resched(); | 775 | cond_resched(); |
@@ -685,8 +778,6 @@ int shmem_unuse(swp_entry_t swap, struct page *page) | |||
685 | } | 778 | } |
686 | mutex_unlock(&shmem_swaplist_mutex); | 779 | mutex_unlock(&shmem_swaplist_mutex); |
687 | 780 | ||
688 | if (!found) | ||
689 | mem_cgroup_uncharge_cache_page(page); | ||
690 | if (found < 0) | 781 | if (found < 0) |
691 | error = found; | 782 | error = found; |
692 | out: | 783 | out: |
@@ -727,6 +818,38 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc) | |||
727 | WARN_ON_ONCE(1); /* Still happens? Tell us about it! */ | 818 | WARN_ON_ONCE(1); /* Still happens? Tell us about it! */ |
728 | goto redirty; | 819 | goto redirty; |
729 | } | 820 | } |
821 | |||
822 | /* | ||
823 | * This is somewhat ridiculous, but without plumbing a SWAP_MAP_FALLOC | ||
824 | * value into swapfile.c, the only way we can correctly account for a | ||
825 | * fallocated page arriving here is now to initialize it and write it. | ||
826 | * | ||
827 | * That's okay for a page already fallocated earlier, but if we have | ||
828 | * not yet completed the fallocation, then (a) we want to keep track | ||
829 | * of this page in case we have to undo it, and (b) it may not be a | ||
830 | * good idea to continue anyway, once we're pushing into swap. So | ||
831 | * reactivate the page, and let shmem_fallocate() quit when too many. | ||
832 | */ | ||
833 | if (!PageUptodate(page)) { | ||
834 | if (inode->i_private) { | ||
835 | struct shmem_falloc *shmem_falloc; | ||
836 | spin_lock(&inode->i_lock); | ||
837 | shmem_falloc = inode->i_private; | ||
838 | if (shmem_falloc && | ||
839 | index >= shmem_falloc->start && | ||
840 | index < shmem_falloc->next) | ||
841 | shmem_falloc->nr_unswapped++; | ||
842 | else | ||
843 | shmem_falloc = NULL; | ||
844 | spin_unlock(&inode->i_lock); | ||
845 | if (shmem_falloc) | ||
846 | goto redirty; | ||
847 | } | ||
848 | clear_highpage(page); | ||
849 | flush_dcache_page(page); | ||
850 | SetPageUptodate(page); | ||
851 | } | ||
852 | |||
730 | swap = get_swap_page(); | 853 | swap = get_swap_page(); |
731 | if (!swap.val) | 854 | if (!swap.val) |
732 | goto redirty; | 855 | goto redirty; |
@@ -856,6 +979,89 @@ static inline struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo) | |||
856 | #endif | 979 | #endif |
857 | 980 | ||
858 | /* | 981 | /* |
982 | * When a page is moved from swapcache to shmem filecache (either by the | ||
983 | * usual swapin of shmem_getpage_gfp(), or by the less common swapoff of | ||
984 | * shmem_unuse_inode()), it may have been read in earlier from swap, in | ||
985 | * ignorance of the mapping it belongs to. If that mapping has special | ||
986 | * constraints (like the gma500 GEM driver, which requires RAM below 4GB), | ||
987 | * we may need to copy to a suitable page before moving to filecache. | ||
988 | * | ||
989 | * In a future release, this may well be extended to respect cpuset and | ||
990 | * NUMA mempolicy, and applied also to anonymous pages in do_swap_page(); | ||
991 | * but for now it is a simple matter of zone. | ||
992 | */ | ||
993 | static bool shmem_should_replace_page(struct page *page, gfp_t gfp) | ||
994 | { | ||
995 | return page_zonenum(page) > gfp_zone(gfp); | ||
996 | } | ||
997 | |||
998 | static int shmem_replace_page(struct page **pagep, gfp_t gfp, | ||
999 | struct shmem_inode_info *info, pgoff_t index) | ||
1000 | { | ||
1001 | struct page *oldpage, *newpage; | ||
1002 | struct address_space *swap_mapping; | ||
1003 | pgoff_t swap_index; | ||
1004 | int error; | ||
1005 | |||
1006 | oldpage = *pagep; | ||
1007 | swap_index = page_private(oldpage); | ||
1008 | swap_mapping = page_mapping(oldpage); | ||
1009 | |||
1010 | /* | ||
1011 | * We have arrived here because our zones are constrained, so don't | ||
1012 | * limit chance of success by further cpuset and node constraints. | ||
1013 | */ | ||
1014 | gfp &= ~GFP_CONSTRAINT_MASK; | ||
1015 | newpage = shmem_alloc_page(gfp, info, index); | ||
1016 | if (!newpage) | ||
1017 | return -ENOMEM; | ||
1018 | |||
1019 | page_cache_get(newpage); | ||
1020 | copy_highpage(newpage, oldpage); | ||
1021 | flush_dcache_page(newpage); | ||
1022 | |||
1023 | __set_page_locked(newpage); | ||
1024 | SetPageUptodate(newpage); | ||
1025 | SetPageSwapBacked(newpage); | ||
1026 | set_page_private(newpage, swap_index); | ||
1027 | SetPageSwapCache(newpage); | ||
1028 | |||
1029 | /* | ||
1030 | * Our caller will very soon move newpage out of swapcache, but it's | ||
1031 | * a nice clean interface for us to replace oldpage by newpage there. | ||
1032 | */ | ||
1033 | spin_lock_irq(&swap_mapping->tree_lock); | ||
1034 | error = shmem_radix_tree_replace(swap_mapping, swap_index, oldpage, | ||
1035 | newpage); | ||
1036 | if (!error) { | ||
1037 | __inc_zone_page_state(newpage, NR_FILE_PAGES); | ||
1038 | __dec_zone_page_state(oldpage, NR_FILE_PAGES); | ||
1039 | } | ||
1040 | spin_unlock_irq(&swap_mapping->tree_lock); | ||
1041 | |||
1042 | if (unlikely(error)) { | ||
1043 | /* | ||
1044 | * Is this possible? I think not, now that our callers check | ||
1045 | * both PageSwapCache and page_private after getting page lock; | ||
1046 | * but be defensive. Reverse old to newpage for clear and free. | ||
1047 | */ | ||
1048 | oldpage = newpage; | ||
1049 | } else { | ||
1050 | mem_cgroup_replace_page_cache(oldpage, newpage); | ||
1051 | lru_cache_add_anon(newpage); | ||
1052 | *pagep = newpage; | ||
1053 | } | ||
1054 | |||
1055 | ClearPageSwapCache(oldpage); | ||
1056 | set_page_private(oldpage, 0); | ||
1057 | |||
1058 | unlock_page(oldpage); | ||
1059 | page_cache_release(oldpage); | ||
1060 | page_cache_release(oldpage); | ||
1061 | return error; | ||
1062 | } | ||
1063 | |||
1064 | /* | ||
859 | * shmem_getpage_gfp - find page in cache, or get from swap, or allocate | 1065 | * shmem_getpage_gfp - find page in cache, or get from swap, or allocate |
860 | * | 1066 | * |
861 | * If we allocate a new one we do not mark it dirty. That's up to the | 1067 | * If we allocate a new one we do not mark it dirty. That's up to the |
@@ -872,6 +1078,7 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, | |||
872 | swp_entry_t swap; | 1078 | swp_entry_t swap; |
873 | int error; | 1079 | int error; |
874 | int once = 0; | 1080 | int once = 0; |
1081 | int alloced = 0; | ||
875 | 1082 | ||
876 | if (index > (MAX_LFS_FILESIZE >> PAGE_CACHE_SHIFT)) | 1083 | if (index > (MAX_LFS_FILESIZE >> PAGE_CACHE_SHIFT)) |
877 | return -EFBIG; | 1084 | return -EFBIG; |
@@ -883,19 +1090,21 @@ repeat: | |||
883 | page = NULL; | 1090 | page = NULL; |
884 | } | 1091 | } |
885 | 1092 | ||
886 | if (sgp != SGP_WRITE && | 1093 | if (sgp != SGP_WRITE && sgp != SGP_FALLOC && |
887 | ((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) { | 1094 | ((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) { |
888 | error = -EINVAL; | 1095 | error = -EINVAL; |
889 | goto failed; | 1096 | goto failed; |
890 | } | 1097 | } |
891 | 1098 | ||
1099 | /* fallocated page? */ | ||
1100 | if (page && !PageUptodate(page)) { | ||
1101 | if (sgp != SGP_READ) | ||
1102 | goto clear; | ||
1103 | unlock_page(page); | ||
1104 | page_cache_release(page); | ||
1105 | page = NULL; | ||
1106 | } | ||
892 | if (page || (sgp == SGP_READ && !swap.val)) { | 1107 | if (page || (sgp == SGP_READ && !swap.val)) { |
893 | /* | ||
894 | * Once we can get the page lock, it must be uptodate: | ||
895 | * if there were an error in reading back from swap, | ||
896 | * the page would not be inserted into the filecache. | ||
897 | */ | ||
898 | BUG_ON(page && !PageUptodate(page)); | ||
899 | *pagep = page; | 1108 | *pagep = page; |
900 | return 0; | 1109 | return 0; |
901 | } | 1110 | } |
@@ -923,26 +1132,31 @@ repeat: | |||
923 | 1132 | ||
924 | /* We have to do this with page locked to prevent races */ | 1133 | /* We have to do this with page locked to prevent races */ |
925 | lock_page(page); | 1134 | lock_page(page); |
1135 | if (!PageSwapCache(page) || page_private(page) != swap.val || | ||
1136 | !shmem_confirm_swap(mapping, index, swap)) { | ||
1137 | error = -EEXIST; /* try again */ | ||
1138 | goto unlock; | ||
1139 | } | ||
926 | if (!PageUptodate(page)) { | 1140 | if (!PageUptodate(page)) { |
927 | error = -EIO; | 1141 | error = -EIO; |
928 | goto failed; | 1142 | goto failed; |
929 | } | 1143 | } |
930 | wait_on_page_writeback(page); | 1144 | wait_on_page_writeback(page); |
931 | 1145 | ||
932 | /* Someone may have already done it for us */ | 1146 | if (shmem_should_replace_page(page, gfp)) { |
933 | if (page->mapping) { | 1147 | error = shmem_replace_page(&page, gfp, info, index); |
934 | if (page->mapping == mapping && | 1148 | if (error) |
935 | page->index == index) | 1149 | goto failed; |
936 | goto done; | ||
937 | error = -EEXIST; | ||
938 | goto failed; | ||
939 | } | 1150 | } |
940 | 1151 | ||
941 | error = mem_cgroup_cache_charge(page, current->mm, | 1152 | error = mem_cgroup_cache_charge(page, current->mm, |
942 | gfp & GFP_RECLAIM_MASK); | 1153 | gfp & GFP_RECLAIM_MASK); |
943 | if (!error) | 1154 | if (!error) { |
944 | error = shmem_add_to_page_cache(page, mapping, index, | 1155 | error = shmem_add_to_page_cache(page, mapping, index, |
945 | gfp, swp_to_radix_entry(swap)); | 1156 | gfp, swp_to_radix_entry(swap)); |
1157 | /* We already confirmed swap, and make no allocation */ | ||
1158 | VM_BUG_ON(error); | ||
1159 | } | ||
946 | if (error) | 1160 | if (error) |
947 | goto failed; | 1161 | goto failed; |
948 | 1162 | ||
@@ -979,11 +1193,18 @@ repeat: | |||
979 | __set_page_locked(page); | 1193 | __set_page_locked(page); |
980 | error = mem_cgroup_cache_charge(page, current->mm, | 1194 | error = mem_cgroup_cache_charge(page, current->mm, |
981 | gfp & GFP_RECLAIM_MASK); | 1195 | gfp & GFP_RECLAIM_MASK); |
982 | if (!error) | ||
983 | error = shmem_add_to_page_cache(page, mapping, index, | ||
984 | gfp, NULL); | ||
985 | if (error) | 1196 | if (error) |
986 | goto decused; | 1197 | goto decused; |
1198 | error = radix_tree_preload(gfp & GFP_RECLAIM_MASK); | ||
1199 | if (!error) { | ||
1200 | error = shmem_add_to_page_cache(page, mapping, index, | ||
1201 | gfp, NULL); | ||
1202 | radix_tree_preload_end(); | ||
1203 | } | ||
1204 | if (error) { | ||
1205 | mem_cgroup_uncharge_cache_page(page); | ||
1206 | goto decused; | ||
1207 | } | ||
987 | lru_cache_add_anon(page); | 1208 | lru_cache_add_anon(page); |
988 | 1209 | ||
989 | spin_lock(&info->lock); | 1210 | spin_lock(&info->lock); |
@@ -991,19 +1212,36 @@ repeat: | |||
991 | inode->i_blocks += BLOCKS_PER_PAGE; | 1212 | inode->i_blocks += BLOCKS_PER_PAGE; |
992 | shmem_recalc_inode(inode); | 1213 | shmem_recalc_inode(inode); |
993 | spin_unlock(&info->lock); | 1214 | spin_unlock(&info->lock); |
1215 | alloced = true; | ||
994 | 1216 | ||
995 | clear_highpage(page); | 1217 | /* |
996 | flush_dcache_page(page); | 1218 | * Let SGP_FALLOC use the SGP_WRITE optimization on a new page. |
997 | SetPageUptodate(page); | 1219 | */ |
1220 | if (sgp == SGP_FALLOC) | ||
1221 | sgp = SGP_WRITE; | ||
1222 | clear: | ||
1223 | /* | ||
1224 | * Let SGP_WRITE caller clear ends if write does not fill page; | ||
1225 | * but SGP_FALLOC on a page fallocated earlier must initialize | ||
1226 | * it now, lest undo on failure cancel our earlier guarantee. | ||
1227 | */ | ||
1228 | if (sgp != SGP_WRITE) { | ||
1229 | clear_highpage(page); | ||
1230 | flush_dcache_page(page); | ||
1231 | SetPageUptodate(page); | ||
1232 | } | ||
998 | if (sgp == SGP_DIRTY) | 1233 | if (sgp == SGP_DIRTY) |
999 | set_page_dirty(page); | 1234 | set_page_dirty(page); |
1000 | } | 1235 | } |
1001 | done: | 1236 | |
1002 | /* Perhaps the file has been truncated since we checked */ | 1237 | /* Perhaps the file has been truncated since we checked */ |
1003 | if (sgp != SGP_WRITE && | 1238 | if (sgp != SGP_WRITE && sgp != SGP_FALLOC && |
1004 | ((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) { | 1239 | ((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) { |
1005 | error = -EINVAL; | 1240 | error = -EINVAL; |
1006 | goto trunc; | 1241 | if (alloced) |
1242 | goto trunc; | ||
1243 | else | ||
1244 | goto failed; | ||
1007 | } | 1245 | } |
1008 | *pagep = page; | 1246 | *pagep = page; |
1009 | return 0; | 1247 | return 0; |
@@ -1012,6 +1250,7 @@ done: | |||
1012 | * Error recovery. | 1250 | * Error recovery. |
1013 | */ | 1251 | */ |
1014 | trunc: | 1252 | trunc: |
1253 | info = SHMEM_I(inode); | ||
1015 | ClearPageDirty(page); | 1254 | ClearPageDirty(page); |
1016 | delete_from_page_cache(page); | 1255 | delete_from_page_cache(page); |
1017 | spin_lock(&info->lock); | 1256 | spin_lock(&info->lock); |
@@ -1019,19 +1258,16 @@ trunc: | |||
1019 | inode->i_blocks -= BLOCKS_PER_PAGE; | 1258 | inode->i_blocks -= BLOCKS_PER_PAGE; |
1020 | spin_unlock(&info->lock); | 1259 | spin_unlock(&info->lock); |
1021 | decused: | 1260 | decused: |
1261 | sbinfo = SHMEM_SB(inode->i_sb); | ||
1022 | if (sbinfo->max_blocks) | 1262 | if (sbinfo->max_blocks) |
1023 | percpu_counter_add(&sbinfo->used_blocks, -1); | 1263 | percpu_counter_add(&sbinfo->used_blocks, -1); |
1024 | unacct: | 1264 | unacct: |
1025 | shmem_unacct_blocks(info->flags, 1); | 1265 | shmem_unacct_blocks(info->flags, 1); |
1026 | failed: | 1266 | failed: |
1027 | if (swap.val && error != -EINVAL) { | 1267 | if (swap.val && error != -EINVAL && |
1028 | struct page *test = find_get_page(mapping, index); | 1268 | !shmem_confirm_swap(mapping, index, swap)) |
1029 | if (test && !radix_tree_exceptional_entry(test)) | 1269 | error = -EEXIST; |
1030 | page_cache_release(test); | 1270 | unlock: |
1031 | /* Have another try if the entry has changed */ | ||
1032 | if (test != swp_to_radix_entry(swap)) | ||
1033 | error = -EEXIST; | ||
1034 | } | ||
1035 | if (page) { | 1271 | if (page) { |
1036 | unlock_page(page); | 1272 | unlock_page(page); |
1037 | page_cache_release(page); | 1273 | page_cache_release(page); |
@@ -1043,7 +1279,7 @@ failed: | |||
1043 | spin_unlock(&info->lock); | 1279 | spin_unlock(&info->lock); |
1044 | goto repeat; | 1280 | goto repeat; |
1045 | } | 1281 | } |
1046 | if (error == -EEXIST) | 1282 | if (error == -EEXIST) /* from above or from radix_tree_insert */ |
1047 | goto repeat; | 1283 | goto repeat; |
1048 | return error; | 1284 | return error; |
1049 | } | 1285 | } |
@@ -1204,6 +1440,14 @@ shmem_write_end(struct file *file, struct address_space *mapping, | |||
1204 | if (pos + copied > inode->i_size) | 1440 | if (pos + copied > inode->i_size) |
1205 | i_size_write(inode, pos + copied); | 1441 | i_size_write(inode, pos + copied); |
1206 | 1442 | ||
1443 | if (!PageUptodate(page)) { | ||
1444 | if (copied < PAGE_CACHE_SIZE) { | ||
1445 | unsigned from = pos & (PAGE_CACHE_SIZE - 1); | ||
1446 | zero_user_segments(page, 0, from, | ||
1447 | from + copied, PAGE_CACHE_SIZE); | ||
1448 | } | ||
1449 | SetPageUptodate(page); | ||
1450 | } | ||
1207 | set_page_dirty(page); | 1451 | set_page_dirty(page); |
1208 | unlock_page(page); | 1452 | unlock_page(page); |
1209 | page_cache_release(page); | 1453 | page_cache_release(page); |
@@ -1365,6 +1609,7 @@ static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos, | |||
1365 | struct splice_pipe_desc spd = { | 1609 | struct splice_pipe_desc spd = { |
1366 | .pages = pages, | 1610 | .pages = pages, |
1367 | .partial = partial, | 1611 | .partial = partial, |
1612 | .nr_pages_max = PIPE_DEF_BUFFERS, | ||
1368 | .flags = flags, | 1613 | .flags = flags, |
1369 | .ops = &page_cache_pipe_buf_ops, | 1614 | .ops = &page_cache_pipe_buf_ops, |
1370 | .spd_release = spd_release_page, | 1615 | .spd_release = spd_release_page, |
@@ -1453,7 +1698,7 @@ static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos, | |||
1453 | if (spd.nr_pages) | 1698 | if (spd.nr_pages) |
1454 | error = splice_to_pipe(pipe, &spd); | 1699 | error = splice_to_pipe(pipe, &spd); |
1455 | 1700 | ||
1456 | splice_shrink_spd(pipe, &spd); | 1701 | splice_shrink_spd(&spd); |
1457 | 1702 | ||
1458 | if (error > 0) { | 1703 | if (error > 0) { |
1459 | *ppos += error; | 1704 | *ppos += error; |
@@ -1462,6 +1707,107 @@ static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos, | |||
1462 | return error; | 1707 | return error; |
1463 | } | 1708 | } |
1464 | 1709 | ||
1710 | static long shmem_fallocate(struct file *file, int mode, loff_t offset, | ||
1711 | loff_t len) | ||
1712 | { | ||
1713 | struct inode *inode = file->f_path.dentry->d_inode; | ||
1714 | struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); | ||
1715 | struct shmem_falloc shmem_falloc; | ||
1716 | pgoff_t start, index, end; | ||
1717 | int error; | ||
1718 | |||
1719 | mutex_lock(&inode->i_mutex); | ||
1720 | |||
1721 | if (mode & FALLOC_FL_PUNCH_HOLE) { | ||
1722 | struct address_space *mapping = file->f_mapping; | ||
1723 | loff_t unmap_start = round_up(offset, PAGE_SIZE); | ||
1724 | loff_t unmap_end = round_down(offset + len, PAGE_SIZE) - 1; | ||
1725 | |||
1726 | if ((u64)unmap_end > (u64)unmap_start) | ||
1727 | unmap_mapping_range(mapping, unmap_start, | ||
1728 | 1 + unmap_end - unmap_start, 0); | ||
1729 | shmem_truncate_range(inode, offset, offset + len - 1); | ||
1730 | /* No need to unmap again: hole-punching leaves COWed pages */ | ||
1731 | error = 0; | ||
1732 | goto out; | ||
1733 | } | ||
1734 | |||
1735 | /* We need to check rlimit even when FALLOC_FL_KEEP_SIZE */ | ||
1736 | error = inode_newsize_ok(inode, offset + len); | ||
1737 | if (error) | ||
1738 | goto out; | ||
1739 | |||
1740 | start = offset >> PAGE_CACHE_SHIFT; | ||
1741 | end = (offset + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | ||
1742 | /* Try to avoid a swapstorm if len is impossible to satisfy */ | ||
1743 | if (sbinfo->max_blocks && end - start > sbinfo->max_blocks) { | ||
1744 | error = -ENOSPC; | ||
1745 | goto out; | ||
1746 | } | ||
1747 | |||
1748 | shmem_falloc.start = start; | ||
1749 | shmem_falloc.next = start; | ||
1750 | shmem_falloc.nr_falloced = 0; | ||
1751 | shmem_falloc.nr_unswapped = 0; | ||
1752 | spin_lock(&inode->i_lock); | ||
1753 | inode->i_private = &shmem_falloc; | ||
1754 | spin_unlock(&inode->i_lock); | ||
1755 | |||
1756 | for (index = start; index < end; index++) { | ||
1757 | struct page *page; | ||
1758 | |||
1759 | /* | ||
1760 | * Good, the fallocate(2) manpage permits EINTR: we may have | ||
1761 | * been interrupted because we are using up too much memory. | ||
1762 | */ | ||
1763 | if (signal_pending(current)) | ||
1764 | error = -EINTR; | ||
1765 | else if (shmem_falloc.nr_unswapped > shmem_falloc.nr_falloced) | ||
1766 | error = -ENOMEM; | ||
1767 | else | ||
1768 | error = shmem_getpage(inode, index, &page, SGP_FALLOC, | ||
1769 | NULL); | ||
1770 | if (error) { | ||
1771 | /* Remove the !PageUptodate pages we added */ | ||
1772 | shmem_undo_range(inode, | ||
1773 | (loff_t)start << PAGE_CACHE_SHIFT, | ||
1774 | (loff_t)index << PAGE_CACHE_SHIFT, true); | ||
1775 | goto undone; | ||
1776 | } | ||
1777 | |||
1778 | /* | ||
1779 | * Inform shmem_writepage() how far we have reached. | ||
1780 | * No need for lock or barrier: we have the page lock. | ||
1781 | */ | ||
1782 | shmem_falloc.next++; | ||
1783 | if (!PageUptodate(page)) | ||
1784 | shmem_falloc.nr_falloced++; | ||
1785 | |||
1786 | /* | ||
1787 | * If !PageUptodate, leave it that way so that freeable pages | ||
1788 | * can be recognized if we need to rollback on error later. | ||
1789 | * But set_page_dirty so that memory pressure will swap rather | ||
1790 | * than free the pages we are allocating (and SGP_CACHE pages | ||
1791 | * might still be clean: we now need to mark those dirty too). | ||
1792 | */ | ||
1793 | set_page_dirty(page); | ||
1794 | unlock_page(page); | ||
1795 | page_cache_release(page); | ||
1796 | cond_resched(); | ||
1797 | } | ||
1798 | |||
1799 | if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > inode->i_size) | ||
1800 | i_size_write(inode, offset + len); | ||
1801 | inode->i_ctime = CURRENT_TIME; | ||
1802 | undone: | ||
1803 | spin_lock(&inode->i_lock); | ||
1804 | inode->i_private = NULL; | ||
1805 | spin_unlock(&inode->i_lock); | ||
1806 | out: | ||
1807 | mutex_unlock(&inode->i_mutex); | ||
1808 | return error; | ||
1809 | } | ||
1810 | |||
1465 | static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf) | 1811 | static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf) |
1466 | { | 1812 | { |
1467 | struct shmem_sb_info *sbinfo = SHMEM_SB(dentry->d_sb); | 1813 | struct shmem_sb_info *sbinfo = SHMEM_SB(dentry->d_sb); |
@@ -1665,6 +2011,7 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s | |||
1665 | kaddr = kmap_atomic(page); | 2011 | kaddr = kmap_atomic(page); |
1666 | memcpy(kaddr, symname, len); | 2012 | memcpy(kaddr, symname, len); |
1667 | kunmap_atomic(kaddr); | 2013 | kunmap_atomic(kaddr); |
2014 | SetPageUptodate(page); | ||
1668 | set_page_dirty(page); | 2015 | set_page_dirty(page); |
1669 | unlock_page(page); | 2016 | unlock_page(page); |
1670 | page_cache_release(page); | 2017 | page_cache_release(page); |
@@ -2033,11 +2380,9 @@ static struct dentry *shmem_fh_to_dentry(struct super_block *sb, | |||
2033 | return dentry; | 2380 | return dentry; |
2034 | } | 2381 | } |
2035 | 2382 | ||
2036 | static int shmem_encode_fh(struct dentry *dentry, __u32 *fh, int *len, | 2383 | static int shmem_encode_fh(struct inode *inode, __u32 *fh, int *len, |
2037 | int connectable) | 2384 | struct inode *parent) |
2038 | { | 2385 | { |
2039 | struct inode *inode = dentry->d_inode; | ||
2040 | |||
2041 | if (*len < 3) { | 2386 | if (*len < 3) { |
2042 | *len = 3; | 2387 | *len = 3; |
2043 | return 255; | 2388 | return 255; |
@@ -2075,6 +2420,8 @@ static int shmem_parse_options(char *options, struct shmem_sb_info *sbinfo, | |||
2075 | bool remount) | 2420 | bool remount) |
2076 | { | 2421 | { |
2077 | char *this_char, *value, *rest; | 2422 | char *this_char, *value, *rest; |
2423 | uid_t uid; | ||
2424 | gid_t gid; | ||
2078 | 2425 | ||
2079 | while (options != NULL) { | 2426 | while (options != NULL) { |
2080 | this_char = options; | 2427 | this_char = options; |
@@ -2134,15 +2481,21 @@ static int shmem_parse_options(char *options, struct shmem_sb_info *sbinfo, | |||
2134 | } else if (!strcmp(this_char,"uid")) { | 2481 | } else if (!strcmp(this_char,"uid")) { |
2135 | if (remount) | 2482 | if (remount) |
2136 | continue; | 2483 | continue; |
2137 | sbinfo->uid = simple_strtoul(value, &rest, 0); | 2484 | uid = simple_strtoul(value, &rest, 0); |
2138 | if (*rest) | 2485 | if (*rest) |
2139 | goto bad_val; | 2486 | goto bad_val; |
2487 | sbinfo->uid = make_kuid(current_user_ns(), uid); | ||
2488 | if (!uid_valid(sbinfo->uid)) | ||
2489 | goto bad_val; | ||
2140 | } else if (!strcmp(this_char,"gid")) { | 2490 | } else if (!strcmp(this_char,"gid")) { |
2141 | if (remount) | 2491 | if (remount) |
2142 | continue; | 2492 | continue; |
2143 | sbinfo->gid = simple_strtoul(value, &rest, 0); | 2493 | gid = simple_strtoul(value, &rest, 0); |
2144 | if (*rest) | 2494 | if (*rest) |
2145 | goto bad_val; | 2495 | goto bad_val; |
2496 | sbinfo->gid = make_kgid(current_user_ns(), gid); | ||
2497 | if (!gid_valid(sbinfo->gid)) | ||
2498 | goto bad_val; | ||
2146 | } else if (!strcmp(this_char,"mpol")) { | 2499 | } else if (!strcmp(this_char,"mpol")) { |
2147 | if (mpol_parse_str(value, &sbinfo->mpol, 1)) | 2500 | if (mpol_parse_str(value, &sbinfo->mpol, 1)) |
2148 | goto bad_val; | 2501 | goto bad_val; |
@@ -2210,10 +2563,12 @@ static int shmem_show_options(struct seq_file *seq, struct dentry *root) | |||
2210 | seq_printf(seq, ",nr_inodes=%lu", sbinfo->max_inodes); | 2563 | seq_printf(seq, ",nr_inodes=%lu", sbinfo->max_inodes); |
2211 | if (sbinfo->mode != (S_IRWXUGO | S_ISVTX)) | 2564 | if (sbinfo->mode != (S_IRWXUGO | S_ISVTX)) |
2212 | seq_printf(seq, ",mode=%03ho", sbinfo->mode); | 2565 | seq_printf(seq, ",mode=%03ho", sbinfo->mode); |
2213 | if (sbinfo->uid != 0) | 2566 | if (!uid_eq(sbinfo->uid, GLOBAL_ROOT_UID)) |
2214 | seq_printf(seq, ",uid=%u", sbinfo->uid); | 2567 | seq_printf(seq, ",uid=%u", |
2215 | if (sbinfo->gid != 0) | 2568 | from_kuid_munged(&init_user_ns, sbinfo->uid)); |
2216 | seq_printf(seq, ",gid=%u", sbinfo->gid); | 2569 | if (!gid_eq(sbinfo->gid, GLOBAL_ROOT_GID)) |
2570 | seq_printf(seq, ",gid=%u", | ||
2571 | from_kgid_munged(&init_user_ns, sbinfo->gid)); | ||
2217 | shmem_show_mpol(seq, sbinfo->mpol); | 2572 | shmem_show_mpol(seq, sbinfo->mpol); |
2218 | return 0; | 2573 | return 0; |
2219 | } | 2574 | } |
@@ -2260,6 +2615,7 @@ int shmem_fill_super(struct super_block *sb, void *data, int silent) | |||
2260 | } | 2615 | } |
2261 | } | 2616 | } |
2262 | sb->s_export_op = &shmem_export_ops; | 2617 | sb->s_export_op = &shmem_export_ops; |
2618 | sb->s_flags |= MS_NOSEC; | ||
2263 | #else | 2619 | #else |
2264 | sb->s_flags |= MS_NOUSER; | 2620 | sb->s_flags |= MS_NOUSER; |
2265 | #endif | 2621 | #endif |
@@ -2362,12 +2718,12 @@ static const struct file_operations shmem_file_operations = { | |||
2362 | .fsync = noop_fsync, | 2718 | .fsync = noop_fsync, |
2363 | .splice_read = shmem_file_splice_read, | 2719 | .splice_read = shmem_file_splice_read, |
2364 | .splice_write = generic_file_splice_write, | 2720 | .splice_write = generic_file_splice_write, |
2721 | .fallocate = shmem_fallocate, | ||
2365 | #endif | 2722 | #endif |
2366 | }; | 2723 | }; |
2367 | 2724 | ||
2368 | static const struct inode_operations shmem_inode_operations = { | 2725 | static const struct inode_operations shmem_inode_operations = { |
2369 | .setattr = shmem_setattr, | 2726 | .setattr = shmem_setattr, |
2370 | .truncate_range = shmem_truncate_range, | ||
2371 | #ifdef CONFIG_TMPFS_XATTR | 2727 | #ifdef CONFIG_TMPFS_XATTR |
2372 | .setxattr = shmem_setxattr, | 2728 | .setxattr = shmem_setxattr, |
2373 | .getxattr = shmem_getxattr, | 2729 | .getxattr = shmem_getxattr, |