aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/filemap_xip.c48
-rw-r--r--mm/madvise.c19
-rw-r--r--mm/shmem.c132
3 files changed, 151 insertions, 48 deletions
diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c
index 9dd9fbb75139..cbb335813ec0 100644
--- a/mm/filemap_xip.c
+++ b/mm/filemap_xip.c
@@ -17,6 +17,29 @@
17#include "filemap.h" 17#include "filemap.h"
18 18
19/* 19/*
20 * We do use our own empty page to avoid interference with other users
21 * of ZERO_PAGE(), such as /dev/zero
22 */
23static struct page *__xip_sparse_page;
24
25static struct page *xip_sparse_page(void)
26{
27 if (!__xip_sparse_page) {
28 unsigned long zeroes = get_zeroed_page(GFP_HIGHUSER);
29 if (zeroes) {
30 static DEFINE_SPINLOCK(xip_alloc_lock);
31 spin_lock(&xip_alloc_lock);
32 if (!__xip_sparse_page)
33 __xip_sparse_page = virt_to_page(zeroes);
34 else
35 free_page(zeroes);
36 spin_unlock(&xip_alloc_lock);
37 }
38 }
39 return __xip_sparse_page;
40}
41
42/*
20 * This is a file read routine for execute in place files, and uses 43 * This is a file read routine for execute in place files, and uses
21 * the mapping->a_ops->get_xip_page() function for the actual low-level 44 * the mapping->a_ops->get_xip_page() function for the actual low-level
22 * stuff. 45 * stuff.
@@ -162,7 +185,7 @@ EXPORT_SYMBOL_GPL(xip_file_sendfile);
162 * xip_write 185 * xip_write
163 * 186 *
164 * This function walks all vmas of the address_space and unmaps the 187 * This function walks all vmas of the address_space and unmaps the
165 * ZERO_PAGE when found at pgoff. Should it go in rmap.c? 188 * __xip_sparse_page when found at pgoff.
166 */ 189 */
167static void 190static void
168__xip_unmap (struct address_space * mapping, 191__xip_unmap (struct address_space * mapping,
@@ -177,13 +200,16 @@ __xip_unmap (struct address_space * mapping,
177 spinlock_t *ptl; 200 spinlock_t *ptl;
178 struct page *page; 201 struct page *page;
179 202
203 page = __xip_sparse_page;
204 if (!page)
205 return;
206
180 spin_lock(&mapping->i_mmap_lock); 207 spin_lock(&mapping->i_mmap_lock);
181 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { 208 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
182 mm = vma->vm_mm; 209 mm = vma->vm_mm;
183 address = vma->vm_start + 210 address = vma->vm_start +
184 ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); 211 ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
185 BUG_ON(address < vma->vm_start || address >= vma->vm_end); 212 BUG_ON(address < vma->vm_start || address >= vma->vm_end);
186 page = ZERO_PAGE(0);
187 pte = page_check_address(page, mm, address, &ptl); 213 pte = page_check_address(page, mm, address, &ptl);
188 if (pte) { 214 if (pte) {
189 /* Nuke the page table entry. */ 215 /* Nuke the page table entry. */
@@ -222,16 +248,14 @@ xip_file_nopage(struct vm_area_struct * area,
222 + area->vm_pgoff; 248 + area->vm_pgoff;
223 249
224 size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 250 size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
225 if (pgoff >= size) { 251 if (pgoff >= size)
226 return NULL; 252 return NOPAGE_SIGBUS;
227 }
228 253
229 page = mapping->a_ops->get_xip_page(mapping, pgoff*(PAGE_SIZE/512), 0); 254 page = mapping->a_ops->get_xip_page(mapping, pgoff*(PAGE_SIZE/512), 0);
230 if (!IS_ERR(page)) { 255 if (!IS_ERR(page))
231 goto out; 256 goto out;
232 }
233 if (PTR_ERR(page) != -ENODATA) 257 if (PTR_ERR(page) != -ENODATA)
234 return NULL; 258 return NOPAGE_SIGBUS;
235 259
236 /* sparse block */ 260 /* sparse block */
237 if ((area->vm_flags & (VM_WRITE | VM_MAYWRITE)) && 261 if ((area->vm_flags & (VM_WRITE | VM_MAYWRITE)) &&
@@ -241,12 +265,14 @@ xip_file_nopage(struct vm_area_struct * area,
241 page = mapping->a_ops->get_xip_page (mapping, 265 page = mapping->a_ops->get_xip_page (mapping,
242 pgoff*(PAGE_SIZE/512), 1); 266 pgoff*(PAGE_SIZE/512), 1);
243 if (IS_ERR(page)) 267 if (IS_ERR(page))
244 return NULL; 268 return NOPAGE_SIGBUS;
245 /* unmap page at pgoff from all other vmas */ 269 /* unmap page at pgoff from all other vmas */
246 __xip_unmap(mapping, pgoff); 270 __xip_unmap(mapping, pgoff);
247 } else { 271 } else {
248 /* not shared and writable, use ZERO_PAGE() */ 272 /* not shared and writable, use xip_sparse_page() */
249 page = ZERO_PAGE(0); 273 page = xip_sparse_page();
274 if (!page)
275 return NOPAGE_OOM;
250 } 276 }
251 277
252out: 278out:
diff --git a/mm/madvise.c b/mm/madvise.c
index 77916e9fc52b..603c5257ed6e 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -159,9 +159,10 @@ static long madvise_remove(struct vm_area_struct *vma,
159 unsigned long start, unsigned long end) 159 unsigned long start, unsigned long end)
160{ 160{
161 struct address_space *mapping; 161 struct address_space *mapping;
162 loff_t offset, endoff; 162 loff_t offset, endoff;
163 int error;
163 164
164 *prev = vma; 165 *prev = NULL; /* tell sys_madvise we drop mmap_sem */
165 166
166 if (vma->vm_flags & (VM_LOCKED|VM_NONLINEAR|VM_HUGETLB)) 167 if (vma->vm_flags & (VM_LOCKED|VM_NONLINEAR|VM_HUGETLB))
167 return -EINVAL; 168 return -EINVAL;
@@ -180,7 +181,12 @@ static long madvise_remove(struct vm_area_struct *vma,
180 + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); 181 + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
181 endoff = (loff_t)(end - vma->vm_start - 1) 182 endoff = (loff_t)(end - vma->vm_start - 1)
182 + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); 183 + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
183 return vmtruncate_range(mapping->host, offset, endoff); 184
185 /* vmtruncate_range needs to take i_mutex and i_alloc_sem */
186 up_write(&current->mm->mmap_sem);
187 error = vmtruncate_range(mapping->host, offset, endoff);
188 down_write(&current->mm->mmap_sem);
189 return error;
184} 190}
185 191
186static long 192static long
@@ -315,12 +321,15 @@ asmlinkage long sys_madvise(unsigned long start, size_t len_in, int behavior)
315 if (error) 321 if (error)
316 goto out; 322 goto out;
317 start = tmp; 323 start = tmp;
318 if (start < prev->vm_end) 324 if (prev && start < prev->vm_end)
319 start = prev->vm_end; 325 start = prev->vm_end;
320 error = unmapped_error; 326 error = unmapped_error;
321 if (start >= end) 327 if (start >= end)
322 goto out; 328 goto out;
323 vma = prev->vm_next; 329 if (prev)
330 vma = prev->vm_next;
331 else /* madvise_remove dropped mmap_sem */
332 vma = find_vma(current->mm, start);
324 } 333 }
325out: 334out:
326 up_write(&current->mm->mmap_sem); 335 up_write(&current->mm->mmap_sem);
diff --git a/mm/shmem.c b/mm/shmem.c
index b8c429a2d271..b2a35ebf071a 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -402,26 +402,38 @@ static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, unsigned long
402/* 402/*
403 * shmem_free_swp - free some swap entries in a directory 403 * shmem_free_swp - free some swap entries in a directory
404 * 404 *
405 * @dir: pointer to the directory 405 * @dir: pointer to the directory
406 * @edir: pointer after last entry of the directory 406 * @edir: pointer after last entry of the directory
407 * @punch_lock: pointer to spinlock when needed for the holepunch case
407 */ 408 */
408static int shmem_free_swp(swp_entry_t *dir, swp_entry_t *edir) 409static int shmem_free_swp(swp_entry_t *dir, swp_entry_t *edir,
410 spinlock_t *punch_lock)
409{ 411{
412 spinlock_t *punch_unlock = NULL;
410 swp_entry_t *ptr; 413 swp_entry_t *ptr;
411 int freed = 0; 414 int freed = 0;
412 415
413 for (ptr = dir; ptr < edir; ptr++) { 416 for (ptr = dir; ptr < edir; ptr++) {
414 if (ptr->val) { 417 if (ptr->val) {
418 if (unlikely(punch_lock)) {
419 punch_unlock = punch_lock;
420 punch_lock = NULL;
421 spin_lock(punch_unlock);
422 if (!ptr->val)
423 continue;
424 }
415 free_swap_and_cache(*ptr); 425 free_swap_and_cache(*ptr);
416 *ptr = (swp_entry_t){0}; 426 *ptr = (swp_entry_t){0};
417 freed++; 427 freed++;
418 } 428 }
419 } 429 }
430 if (punch_unlock)
431 spin_unlock(punch_unlock);
420 return freed; 432 return freed;
421} 433}
422 434
423static int shmem_map_and_free_swp(struct page *subdir, 435static int shmem_map_and_free_swp(struct page *subdir, int offset,
424 int offset, int limit, struct page ***dir) 436 int limit, struct page ***dir, spinlock_t *punch_lock)
425{ 437{
426 swp_entry_t *ptr; 438 swp_entry_t *ptr;
427 int freed = 0; 439 int freed = 0;
@@ -431,7 +443,8 @@ static int shmem_map_and_free_swp(struct page *subdir,
431 int size = limit - offset; 443 int size = limit - offset;
432 if (size > LATENCY_LIMIT) 444 if (size > LATENCY_LIMIT)
433 size = LATENCY_LIMIT; 445 size = LATENCY_LIMIT;
434 freed += shmem_free_swp(ptr+offset, ptr+offset+size); 446 freed += shmem_free_swp(ptr+offset, ptr+offset+size,
447 punch_lock);
435 if (need_resched()) { 448 if (need_resched()) {
436 shmem_swp_unmap(ptr); 449 shmem_swp_unmap(ptr);
437 if (*dir) { 450 if (*dir) {
@@ -481,7 +494,10 @@ static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end)
481 long nr_swaps_freed = 0; 494 long nr_swaps_freed = 0;
482 int offset; 495 int offset;
483 int freed; 496 int freed;
484 int punch_hole = 0; 497 int punch_hole;
498 spinlock_t *needs_lock;
499 spinlock_t *punch_lock;
500 unsigned long upper_limit;
485 501
486 inode->i_ctime = inode->i_mtime = CURRENT_TIME; 502 inode->i_ctime = inode->i_mtime = CURRENT_TIME;
487 idx = (start + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 503 idx = (start + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
@@ -492,11 +508,20 @@ static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end)
492 info->flags |= SHMEM_TRUNCATE; 508 info->flags |= SHMEM_TRUNCATE;
493 if (likely(end == (loff_t) -1)) { 509 if (likely(end == (loff_t) -1)) {
494 limit = info->next_index; 510 limit = info->next_index;
511 upper_limit = SHMEM_MAX_INDEX;
495 info->next_index = idx; 512 info->next_index = idx;
513 needs_lock = NULL;
514 punch_hole = 0;
496 } else { 515 } else {
497 limit = (end + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 516 if (end + 1 >= inode->i_size) { /* we may free a little more */
498 if (limit > info->next_index) 517 limit = (inode->i_size + PAGE_CACHE_SIZE - 1) >>
499 limit = info->next_index; 518 PAGE_CACHE_SHIFT;
519 upper_limit = SHMEM_MAX_INDEX;
520 } else {
521 limit = (end + 1) >> PAGE_CACHE_SHIFT;
522 upper_limit = limit;
523 }
524 needs_lock = &info->lock;
500 punch_hole = 1; 525 punch_hole = 1;
501 } 526 }
502 527
@@ -513,17 +538,30 @@ static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end)
513 size = limit; 538 size = limit;
514 if (size > SHMEM_NR_DIRECT) 539 if (size > SHMEM_NR_DIRECT)
515 size = SHMEM_NR_DIRECT; 540 size = SHMEM_NR_DIRECT;
516 nr_swaps_freed = shmem_free_swp(ptr+idx, ptr+size); 541 nr_swaps_freed = shmem_free_swp(ptr+idx, ptr+size, needs_lock);
517 } 542 }
518 543
519 /* 544 /*
520 * If there are no indirect blocks or we are punching a hole 545 * If there are no indirect blocks or we are punching a hole
521 * below indirect blocks, nothing to be done. 546 * below indirect blocks, nothing to be done.
522 */ 547 */
523 if (!topdir || (punch_hole && (limit <= SHMEM_NR_DIRECT))) 548 if (!topdir || limit <= SHMEM_NR_DIRECT)
524 goto done2; 549 goto done2;
525 550
526 BUG_ON(limit <= SHMEM_NR_DIRECT); 551 /*
552 * The truncation case has already dropped info->lock, and we're safe
553 * because i_size and next_index have already been lowered, preventing
554 * access beyond. But in the punch_hole case, we still need to take
555 * the lock when updating the swap directory, because there might be
556 * racing accesses by shmem_getpage(SGP_CACHE), shmem_unuse_inode or
557 * shmem_writepage. However, whenever we find we can remove a whole
558 * directory page (not at the misaligned start or end of the range),
559 * we first NULLify its pointer in the level above, and then have no
560 * need to take the lock when updating its contents: needs_lock and
561 * punch_lock (either pointing to info->lock or NULL) manage this.
562 */
563
564 upper_limit -= SHMEM_NR_DIRECT;
527 limit -= SHMEM_NR_DIRECT; 565 limit -= SHMEM_NR_DIRECT;
528 idx = (idx > SHMEM_NR_DIRECT)? (idx - SHMEM_NR_DIRECT): 0; 566 idx = (idx > SHMEM_NR_DIRECT)? (idx - SHMEM_NR_DIRECT): 0;
529 offset = idx % ENTRIES_PER_PAGE; 567 offset = idx % ENTRIES_PER_PAGE;
@@ -543,8 +581,14 @@ static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end)
543 if (*dir) { 581 if (*dir) {
544 diroff = ((idx - ENTRIES_PER_PAGEPAGE/2) % 582 diroff = ((idx - ENTRIES_PER_PAGEPAGE/2) %
545 ENTRIES_PER_PAGEPAGE) / ENTRIES_PER_PAGE; 583 ENTRIES_PER_PAGEPAGE) / ENTRIES_PER_PAGE;
546 if (!diroff && !offset) { 584 if (!diroff && !offset && upper_limit >= stage) {
547 *dir = NULL; 585 if (needs_lock) {
586 spin_lock(needs_lock);
587 *dir = NULL;
588 spin_unlock(needs_lock);
589 needs_lock = NULL;
590 } else
591 *dir = NULL;
548 nr_pages_to_free++; 592 nr_pages_to_free++;
549 list_add(&middir->lru, &pages_to_free); 593 list_add(&middir->lru, &pages_to_free);
550 } 594 }
@@ -570,39 +614,55 @@ static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end)
570 } 614 }
571 stage = idx + ENTRIES_PER_PAGEPAGE; 615 stage = idx + ENTRIES_PER_PAGEPAGE;
572 middir = *dir; 616 middir = *dir;
573 *dir = NULL; 617 if (punch_hole)
574 nr_pages_to_free++; 618 needs_lock = &info->lock;
575 list_add(&middir->lru, &pages_to_free); 619 if (upper_limit >= stage) {
620 if (needs_lock) {
621 spin_lock(needs_lock);
622 *dir = NULL;
623 spin_unlock(needs_lock);
624 needs_lock = NULL;
625 } else
626 *dir = NULL;
627 nr_pages_to_free++;
628 list_add(&middir->lru, &pages_to_free);
629 }
576 shmem_dir_unmap(dir); 630 shmem_dir_unmap(dir);
577 cond_resched(); 631 cond_resched();
578 dir = shmem_dir_map(middir); 632 dir = shmem_dir_map(middir);
579 diroff = 0; 633 diroff = 0;
580 } 634 }
635 punch_lock = needs_lock;
581 subdir = dir[diroff]; 636 subdir = dir[diroff];
582 if (subdir && page_private(subdir)) { 637 if (subdir && !offset && upper_limit-idx >= ENTRIES_PER_PAGE) {
638 if (needs_lock) {
639 spin_lock(needs_lock);
640 dir[diroff] = NULL;
641 spin_unlock(needs_lock);
642 punch_lock = NULL;
643 } else
644 dir[diroff] = NULL;
645 nr_pages_to_free++;
646 list_add(&subdir->lru, &pages_to_free);
647 }
648 if (subdir && page_private(subdir) /* has swap entries */) {
583 size = limit - idx; 649 size = limit - idx;
584 if (size > ENTRIES_PER_PAGE) 650 if (size > ENTRIES_PER_PAGE)
585 size = ENTRIES_PER_PAGE; 651 size = ENTRIES_PER_PAGE;
586 freed = shmem_map_and_free_swp(subdir, 652 freed = shmem_map_and_free_swp(subdir,
587 offset, size, &dir); 653 offset, size, &dir, punch_lock);
588 if (!dir) 654 if (!dir)
589 dir = shmem_dir_map(middir); 655 dir = shmem_dir_map(middir);
590 nr_swaps_freed += freed; 656 nr_swaps_freed += freed;
591 if (offset) 657 if (offset || punch_lock) {
592 spin_lock(&info->lock); 658 spin_lock(&info->lock);
593 set_page_private(subdir, page_private(subdir) - freed); 659 set_page_private(subdir,
594 if (offset) 660 page_private(subdir) - freed);
595 spin_unlock(&info->lock); 661 spin_unlock(&info->lock);
596 if (!punch_hole) 662 } else
597 BUG_ON(page_private(subdir) > offset); 663 BUG_ON(page_private(subdir) != freed);
598 }
599 if (offset)
600 offset = 0;
601 else if (subdir && !page_private(subdir)) {
602 dir[diroff] = NULL;
603 nr_pages_to_free++;
604 list_add(&subdir->lru, &pages_to_free);
605 } 664 }
665 offset = 0;
606 } 666 }
607done1: 667done1:
608 shmem_dir_unmap(dir); 668 shmem_dir_unmap(dir);
@@ -614,8 +674,16 @@ done2:
614 * generic_delete_inode did it, before we lowered next_index. 674 * generic_delete_inode did it, before we lowered next_index.
615 * Also, though shmem_getpage checks i_size before adding to 675 * Also, though shmem_getpage checks i_size before adding to
616 * cache, no recheck after: so fix the narrow window there too. 676 * cache, no recheck after: so fix the narrow window there too.
677 *
678 * Recalling truncate_inode_pages_range and unmap_mapping_range
679 * every time for punch_hole (which never got a chance to clear
680 * SHMEM_PAGEIN at the start of vmtruncate_range) is expensive,
681 * yet hardly ever necessary: try to optimize them out later.
617 */ 682 */
618 truncate_inode_pages_range(inode->i_mapping, start, end); 683 truncate_inode_pages_range(inode->i_mapping, start, end);
684 if (punch_hole)
685 unmap_mapping_range(inode->i_mapping, start,
686 end - start, 1);
619 } 687 }
620 688
621 spin_lock(&info->lock); 689 spin_lock(&info->lock);