aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorDavid Howells <dhowells@redhat.com>2009-01-08 07:04:47 -0500
committerDavid Howells <dhowells@redhat.com>2009-01-08 07:04:47 -0500
commit8feae13110d60cc6287afabc2887366b0eb226c2 (patch)
treeb3188986faab70e753e00ea8670a11ba8ec844c0 /mm
parent41836382ebb415d68d3ebc4525e78e871fe58baf (diff)
NOMMU: Make VMAs per MM as for MMU-mode linux
Make VMAs per mm_struct as for MMU-mode linux. This solves two problems: (1) In SYSV SHM where nattch for a segment does not reflect the number of shmat's (and forks) done. (2) In mmap() where the VMA's vm_mm is set to point to the parent mm by an exec'ing process when VM_EXECUTABLE is specified, regardless of the fact that a VMA might be shared and already have its vm_mm assigned to another process or a dead process. A new struct (vm_region) is introduced to track a mapped region and to remember the circumstances under which it may be shared and the vm_list_struct structure is discarded as it's no longer required. This patch makes the following additional changes: (1) Regions are now allocated with alloc_pages() rather than kmalloc() and with no recourse to __GFP_COMP, so the pages are not composite. Instead, each page has a reference on it held by the region. Anything else that is interested in such a page will have to get a reference on it to retain it. When the pages are released due to unmapping, each page is passed to put_page() and will be freed when the page usage count reaches zero. (2) Excess pages are trimmed after an allocation as the allocation must be made as a power-of-2 quantity of pages. (3) VMAs are added to the parent MM's R/B tree and mmap lists. As an MM may end up with overlapping VMAs within the tree, the VMA struct address is appended to the sort key. (4) Non-anonymous VMAs are now added to the backing inode's prio list. (5) Holes may be punched in anonymous VMAs with munmap(), releasing parts of the backing region. The VMA and region structs will be split if necessary. (6) sys_shmdt() only releases one attachment to a SYSV IPC shared memory segment instead of all the attachments at that addresss. Multiple shmat()'s return the same address under NOMMU-mode instead of different virtual addresses as under MMU-mode. (7) Core dumping for ELF-FDPIC requires fewer exceptions for NOMMU-mode. (8) /proc/maps is now the global list of mapped regions, and may list bits that aren't actually mapped anywhere. (9) /proc/meminfo gains a line (tagged "MmapCopy") that indicates the amount of RAM currently allocated by mmap to hold mappable regions that can't be mapped directly. These are copies of the backing device or file if not anonymous. These changes make NOMMU mode more similar to MMU mode. The downside is that NOMMU mode requires some extra memory to track things over NOMMU without this patch (VMAs are no longer shared, and there are now region structs). Signed-off-by: David Howells <dhowells@redhat.com> Tested-by: Mike Frysinger <vapier.adi@gmail.com> Acked-by: Paul Mundt <lethal@linux-sh.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/mmap.c10
-rw-r--r--mm/nommu.c960
2 files changed, 674 insertions, 296 deletions
diff --git a/mm/mmap.c b/mm/mmap.c
index a910c045cfd4..749623196cb9 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2472,3 +2472,13 @@ void mm_drop_all_locks(struct mm_struct *mm)
2472 2472
2473 mutex_unlock(&mm_all_locks_mutex); 2473 mutex_unlock(&mm_all_locks_mutex);
2474} 2474}
2475
2476/*
2477 * initialise the VMA slab
2478 */
2479void __init mmap_init(void)
2480{
2481 vm_area_cachep = kmem_cache_create("vm_area_struct",
2482 sizeof(struct vm_area_struct), 0,
2483 SLAB_PANIC, NULL);
2484}
diff --git a/mm/nommu.c b/mm/nommu.c
index 23f355bbe262..0d363dfcf10e 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -6,7 +6,7 @@
6 * 6 *
7 * See Documentation/nommu-mmap.txt 7 * See Documentation/nommu-mmap.txt
8 * 8 *
9 * Copyright (c) 2004-2005 David Howells <dhowells@redhat.com> 9 * Copyright (c) 2004-2008 David Howells <dhowells@redhat.com>
10 * Copyright (c) 2000-2003 David McCullough <davidm@snapgear.com> 10 * Copyright (c) 2000-2003 David McCullough <davidm@snapgear.com>
11 * Copyright (c) 2000-2001 D Jeff Dionne <jeff@uClinux.org> 11 * Copyright (c) 2000-2001 D Jeff Dionne <jeff@uClinux.org>
12 * Copyright (c) 2002 Greg Ungerer <gerg@snapgear.com> 12 * Copyright (c) 2002 Greg Ungerer <gerg@snapgear.com>
@@ -33,6 +33,28 @@
33#include <asm/uaccess.h> 33#include <asm/uaccess.h>
34#include <asm/tlb.h> 34#include <asm/tlb.h>
35#include <asm/tlbflush.h> 35#include <asm/tlbflush.h>
36#include "internal.h"
37
38static inline __attribute__((format(printf, 1, 2)))
39void no_printk(const char *fmt, ...)
40{
41}
42
43#if 0
44#define kenter(FMT, ...) \
45 printk(KERN_DEBUG "==> %s("FMT")\n", __func__, ##__VA_ARGS__)
46#define kleave(FMT, ...) \
47 printk(KERN_DEBUG "<== %s()"FMT"\n", __func__, ##__VA_ARGS__)
48#define kdebug(FMT, ...) \
49 printk(KERN_DEBUG "xxx" FMT"yyy\n", ##__VA_ARGS__)
50#else
51#define kenter(FMT, ...) \
52 no_printk(KERN_DEBUG "==> %s("FMT")\n", __func__, ##__VA_ARGS__)
53#define kleave(FMT, ...) \
54 no_printk(KERN_DEBUG "<== %s()"FMT"\n", __func__, ##__VA_ARGS__)
55#define kdebug(FMT, ...) \
56 no_printk(KERN_DEBUG FMT"\n", ##__VA_ARGS__)
57#endif
36 58
37#include "internal.h" 59#include "internal.h"
38 60
@@ -46,12 +68,15 @@ int sysctl_overcommit_ratio = 50; /* default is 50% */
46int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT; 68int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT;
47int heap_stack_gap = 0; 69int heap_stack_gap = 0;
48 70
71atomic_t mmap_pages_allocated;
72
49EXPORT_SYMBOL(mem_map); 73EXPORT_SYMBOL(mem_map);
50EXPORT_SYMBOL(num_physpages); 74EXPORT_SYMBOL(num_physpages);
51 75
52/* list of shareable VMAs */ 76/* list of mapped, potentially shareable regions */
53struct rb_root nommu_vma_tree = RB_ROOT; 77static struct kmem_cache *vm_region_jar;
54DECLARE_RWSEM(nommu_vma_sem); 78struct rb_root nommu_region_tree = RB_ROOT;
79DECLARE_RWSEM(nommu_region_sem);
55 80
56struct vm_operations_struct generic_file_vm_ops = { 81struct vm_operations_struct generic_file_vm_ops = {
57}; 82};
@@ -400,129 +425,174 @@ asmlinkage unsigned long sys_brk(unsigned long brk)
400 return mm->brk = brk; 425 return mm->brk = brk;
401} 426}
402 427
403#ifdef DEBUG 428/*
404static void show_process_blocks(void) 429 * initialise the VMA and region record slabs
430 */
431void __init mmap_init(void)
405{ 432{
406 struct vm_list_struct *vml; 433 vm_region_jar = kmem_cache_create("vm_region_jar",
407 434 sizeof(struct vm_region), 0,
408 printk("Process blocks %d:", current->pid); 435 SLAB_PANIC, NULL);
409 436 vm_area_cachep = kmem_cache_create("vm_area_struct",
410 for (vml = &current->mm->context.vmlist; vml; vml = vml->next) { 437 sizeof(struct vm_area_struct), 0,
411 printk(" %p: %p", vml, vml->vma); 438 SLAB_PANIC, NULL);
412 if (vml->vma)
413 printk(" (%d @%lx #%d)",
414 kobjsize((void *) vml->vma->vm_start),
415 vml->vma->vm_start,
416 atomic_read(&vml->vma->vm_usage));
417 printk(vml->next ? " ->" : ".\n");
418 }
419} 439}
420#endif /* DEBUG */
421 440
422/* 441/*
423 * add a VMA into a process's mm_struct in the appropriate place in the list 442 * validate the region tree
424 * - should be called with mm->mmap_sem held writelocked 443 * - the caller must hold the region lock
425 */ 444 */
426static void add_vma_to_mm(struct mm_struct *mm, struct vm_list_struct *vml) 445#ifdef CONFIG_DEBUG_NOMMU_REGIONS
446static noinline void validate_nommu_regions(void)
427{ 447{
428 struct vm_list_struct **ppv; 448 struct vm_region *region, *last;
449 struct rb_node *p, *lastp;
429 450
430 for (ppv = &current->mm->context.vmlist; *ppv; ppv = &(*ppv)->next) 451 lastp = rb_first(&nommu_region_tree);
431 if ((*ppv)->vma->vm_start > vml->vma->vm_start) 452 if (!lastp)
432 break; 453 return;
454
455 last = rb_entry(lastp, struct vm_region, vm_rb);
456 if (unlikely(last->vm_end <= last->vm_start))
457 BUG();
458
459 while ((p = rb_next(lastp))) {
460 region = rb_entry(p, struct vm_region, vm_rb);
461 last = rb_entry(lastp, struct vm_region, vm_rb);
462
463 if (unlikely(region->vm_end <= region->vm_start))
464 BUG();
465 if (unlikely(region->vm_start < last->vm_end))
466 BUG();
433 467
434 vml->next = *ppv; 468 lastp = p;
435 *ppv = vml; 469 }
436} 470}
471#else
472#define validate_nommu_regions() do {} while(0)
473#endif
437 474
438/* 475/*
439 * look up the first VMA in which addr resides, NULL if none 476 * add a region into the global tree
440 * - should be called with mm->mmap_sem at least held readlocked
441 */ 477 */
442struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr) 478static void add_nommu_region(struct vm_region *region)
443{ 479{
444 struct vm_list_struct *loop, *vml; 480 struct vm_region *pregion;
481 struct rb_node **p, *parent;
445 482
446 /* search the vm_start ordered list */ 483 validate_nommu_regions();
447 vml = NULL; 484
448 for (loop = mm->context.vmlist; loop; loop = loop->next) { 485 BUG_ON(region->vm_start & ~PAGE_MASK);
449 if (loop->vma->vm_start > addr) 486
450 break; 487 parent = NULL;
451 vml = loop; 488 p = &nommu_region_tree.rb_node;
489 while (*p) {
490 parent = *p;
491 pregion = rb_entry(parent, struct vm_region, vm_rb);
492 if (region->vm_start < pregion->vm_start)
493 p = &(*p)->rb_left;
494 else if (region->vm_start > pregion->vm_start)
495 p = &(*p)->rb_right;
496 else if (pregion == region)
497 return;
498 else
499 BUG();
452 } 500 }
453 501
454 if (vml && vml->vma->vm_end > addr) 502 rb_link_node(&region->vm_rb, parent, p);
455 return vml->vma; 503 rb_insert_color(&region->vm_rb, &nommu_region_tree);
456 504
457 return NULL; 505 validate_nommu_regions();
458} 506}
459EXPORT_SYMBOL(find_vma);
460 507
461/* 508/*
462 * find a VMA 509 * delete a region from the global tree
463 * - we don't extend stack VMAs under NOMMU conditions
464 */ 510 */
465struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr) 511static void delete_nommu_region(struct vm_region *region)
466{ 512{
467 return find_vma(mm, addr); 513 BUG_ON(!nommu_region_tree.rb_node);
468}
469 514
470int expand_stack(struct vm_area_struct *vma, unsigned long address) 515 validate_nommu_regions();
471{ 516 rb_erase(&region->vm_rb, &nommu_region_tree);
472 return -ENOMEM; 517 validate_nommu_regions();
473} 518}
474 519
475/* 520/*
476 * look up the first VMA exactly that exactly matches addr 521 * free a contiguous series of pages
477 * - should be called with mm->mmap_sem at least held readlocked
478 */ 522 */
479static inline struct vm_area_struct *find_vma_exact(struct mm_struct *mm, 523static void free_page_series(unsigned long from, unsigned long to)
480 unsigned long addr)
481{ 524{
482 struct vm_list_struct *vml; 525 for (; from < to; from += PAGE_SIZE) {
483 526 struct page *page = virt_to_page(from);
484 /* search the vm_start ordered list */ 527
485 for (vml = mm->context.vmlist; vml; vml = vml->next) { 528 kdebug("- free %lx", from);
486 if (vml->vma->vm_start == addr) 529 atomic_dec(&mmap_pages_allocated);
487 return vml->vma; 530 if (page_count(page) != 1)
488 if (vml->vma->vm_start > addr) 531 kdebug("free page %p [%d]", page, page_count(page));
489 break; 532 put_page(page);
490 } 533 }
491
492 return NULL;
493} 534}
494 535
495/* 536/*
496 * find a VMA in the global tree 537 * release a reference to a region
538 * - the caller must hold the region semaphore, which this releases
539 * - the region may not have been added to the tree yet, in which case vm_end
540 * will equal vm_start
497 */ 541 */
498static inline struct vm_area_struct *find_nommu_vma(unsigned long start) 542static void __put_nommu_region(struct vm_region *region)
543 __releases(nommu_region_sem)
499{ 544{
500 struct vm_area_struct *vma; 545 kenter("%p{%d}", region, atomic_read(&region->vm_usage));
501 struct rb_node *n = nommu_vma_tree.rb_node;
502 546
503 while (n) { 547 BUG_ON(!nommu_region_tree.rb_node);
504 vma = rb_entry(n, struct vm_area_struct, vm_rb);
505 548
506 if (start < vma->vm_start) 549 if (atomic_dec_and_test(&region->vm_usage)) {
507 n = n->rb_left; 550 if (region->vm_end > region->vm_start)
508 else if (start > vma->vm_start) 551 delete_nommu_region(region);
509 n = n->rb_right; 552 up_write(&nommu_region_sem);
510 else 553
511 return vma; 554 if (region->vm_file)
555 fput(region->vm_file);
556
557 /* IO memory and memory shared directly out of the pagecache
558 * from ramfs/tmpfs mustn't be released here */
559 if (region->vm_flags & VM_MAPPED_COPY) {
560 kdebug("free series");
561 free_page_series(region->vm_start, region->vm_end);
562 }
563 kmem_cache_free(vm_region_jar, region);
564 } else {
565 up_write(&nommu_region_sem);
512 } 566 }
567}
513 568
514 return NULL; 569/*
570 * release a reference to a region
571 */
572static void put_nommu_region(struct vm_region *region)
573{
574 down_write(&nommu_region_sem);
575 __put_nommu_region(region);
515} 576}
516 577
517/* 578/*
518 * add a VMA in the global tree 579 * add a VMA into a process's mm_struct in the appropriate place in the list
580 * and tree and add to the address space's page tree also if not an anonymous
581 * page
582 * - should be called with mm->mmap_sem held writelocked
519 */ 583 */
520static void add_nommu_vma(struct vm_area_struct *vma) 584static void add_vma_to_mm(struct mm_struct *mm, struct vm_area_struct *vma)
521{ 585{
522 struct vm_area_struct *pvma; 586 struct vm_area_struct *pvma, **pp;
523 struct address_space *mapping; 587 struct address_space *mapping;
524 struct rb_node **p = &nommu_vma_tree.rb_node; 588 struct rb_node **p, *parent;
525 struct rb_node *parent = NULL; 589
590 kenter(",%p", vma);
591
592 BUG_ON(!vma->vm_region);
593
594 mm->map_count++;
595 vma->vm_mm = mm;
526 596
527 /* add the VMA to the mapping */ 597 /* add the VMA to the mapping */
528 if (vma->vm_file) { 598 if (vma->vm_file) {
@@ -533,42 +603,62 @@ static void add_nommu_vma(struct vm_area_struct *vma)
533 flush_dcache_mmap_unlock(mapping); 603 flush_dcache_mmap_unlock(mapping);
534 } 604 }
535 605
536 /* add the VMA to the master list */ 606 /* add the VMA to the tree */
607 parent = NULL;
608 p = &mm->mm_rb.rb_node;
537 while (*p) { 609 while (*p) {
538 parent = *p; 610 parent = *p;
539 pvma = rb_entry(parent, struct vm_area_struct, vm_rb); 611 pvma = rb_entry(parent, struct vm_area_struct, vm_rb);
540 612
541 if (vma->vm_start < pvma->vm_start) { 613 /* sort by: start addr, end addr, VMA struct addr in that order
614 * (the latter is necessary as we may get identical VMAs) */
615 if (vma->vm_start < pvma->vm_start)
542 p = &(*p)->rb_left; 616 p = &(*p)->rb_left;
543 } 617 else if (vma->vm_start > pvma->vm_start)
544 else if (vma->vm_start > pvma->vm_start) {
545 p = &(*p)->rb_right; 618 p = &(*p)->rb_right;
546 } 619 else if (vma->vm_end < pvma->vm_end)
547 else { 620 p = &(*p)->rb_left;
548 /* mappings are at the same address - this can only 621 else if (vma->vm_end > pvma->vm_end)
549 * happen for shared-mem chardevs and shared file 622 p = &(*p)->rb_right;
550 * mappings backed by ramfs/tmpfs */ 623 else if (vma < pvma)
551 BUG_ON(!(pvma->vm_flags & VM_SHARED)); 624 p = &(*p)->rb_left;
552 625 else if (vma > pvma)
553 if (vma < pvma) 626 p = &(*p)->rb_right;
554 p = &(*p)->rb_left; 627 else
555 else if (vma > pvma) 628 BUG();
556 p = &(*p)->rb_right;
557 else
558 BUG();
559 }
560 } 629 }
561 630
562 rb_link_node(&vma->vm_rb, parent, p); 631 rb_link_node(&vma->vm_rb, parent, p);
563 rb_insert_color(&vma->vm_rb, &nommu_vma_tree); 632 rb_insert_color(&vma->vm_rb, &mm->mm_rb);
633
634 /* add VMA to the VMA list also */
635 for (pp = &mm->mmap; (pvma = *pp); pp = &(*pp)->vm_next) {
636 if (pvma->vm_start > vma->vm_start)
637 break;
638 if (pvma->vm_start < vma->vm_start)
639 continue;
640 if (pvma->vm_end < vma->vm_end)
641 break;
642 }
643
644 vma->vm_next = *pp;
645 *pp = vma;
564} 646}
565 647
566/* 648/*
567 * delete a VMA from the global list 649 * delete a VMA from its owning mm_struct and address space
568 */ 650 */
569static void delete_nommu_vma(struct vm_area_struct *vma) 651static void delete_vma_from_mm(struct vm_area_struct *vma)
570{ 652{
653 struct vm_area_struct **pp;
571 struct address_space *mapping; 654 struct address_space *mapping;
655 struct mm_struct *mm = vma->vm_mm;
656
657 kenter("%p", vma);
658
659 mm->map_count--;
660 if (mm->mmap_cache == vma)
661 mm->mmap_cache = NULL;
572 662
573 /* remove the VMA from the mapping */ 663 /* remove the VMA from the mapping */
574 if (vma->vm_file) { 664 if (vma->vm_file) {
@@ -579,8 +669,115 @@ static void delete_nommu_vma(struct vm_area_struct *vma)
579 flush_dcache_mmap_unlock(mapping); 669 flush_dcache_mmap_unlock(mapping);
580 } 670 }
581 671
582 /* remove from the master list */ 672 /* remove from the MM's tree and list */
583 rb_erase(&vma->vm_rb, &nommu_vma_tree); 673 rb_erase(&vma->vm_rb, &mm->mm_rb);
674 for (pp = &mm->mmap; *pp; pp = &(*pp)->vm_next) {
675 if (*pp == vma) {
676 *pp = vma->vm_next;
677 break;
678 }
679 }
680
681 vma->vm_mm = NULL;
682}
683
684/*
685 * destroy a VMA record
686 */
687static void delete_vma(struct mm_struct *mm, struct vm_area_struct *vma)
688{
689 kenter("%p", vma);
690 if (vma->vm_ops && vma->vm_ops->close)
691 vma->vm_ops->close(vma);
692 if (vma->vm_file) {
693 fput(vma->vm_file);
694 if (vma->vm_flags & VM_EXECUTABLE)
695 removed_exe_file_vma(mm);
696 }
697 put_nommu_region(vma->vm_region);
698 kmem_cache_free(vm_area_cachep, vma);
699}
700
701/*
702 * look up the first VMA in which addr resides, NULL if none
703 * - should be called with mm->mmap_sem at least held readlocked
704 */
705struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
706{
707 struct vm_area_struct *vma;
708 struct rb_node *n = mm->mm_rb.rb_node;
709
710 /* check the cache first */
711 vma = mm->mmap_cache;
712 if (vma && vma->vm_start <= addr && vma->vm_end > addr)
713 return vma;
714
715 /* trawl the tree (there may be multiple mappings in which addr
716 * resides) */
717 for (n = rb_first(&mm->mm_rb); n; n = rb_next(n)) {
718 vma = rb_entry(n, struct vm_area_struct, vm_rb);
719 if (vma->vm_start > addr)
720 return NULL;
721 if (vma->vm_end > addr) {
722 mm->mmap_cache = vma;
723 return vma;
724 }
725 }
726
727 return NULL;
728}
729EXPORT_SYMBOL(find_vma);
730
731/*
732 * find a VMA
733 * - we don't extend stack VMAs under NOMMU conditions
734 */
735struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr)
736{
737 return find_vma(mm, addr);
738}
739
740/*
741 * expand a stack to a given address
742 * - not supported under NOMMU conditions
743 */
744int expand_stack(struct vm_area_struct *vma, unsigned long address)
745{
746 return -ENOMEM;
747}
748
749/*
750 * look up the first VMA exactly that exactly matches addr
751 * - should be called with mm->mmap_sem at least held readlocked
752 */
753static struct vm_area_struct *find_vma_exact(struct mm_struct *mm,
754 unsigned long addr,
755 unsigned long len)
756{
757 struct vm_area_struct *vma;
758 struct rb_node *n = mm->mm_rb.rb_node;
759 unsigned long end = addr + len;
760
761 /* check the cache first */
762 vma = mm->mmap_cache;
763 if (vma && vma->vm_start == addr && vma->vm_end == end)
764 return vma;
765
766 /* trawl the tree (there may be multiple mappings in which addr
767 * resides) */
768 for (n = rb_first(&mm->mm_rb); n; n = rb_next(n)) {
769 vma = rb_entry(n, struct vm_area_struct, vm_rb);
770 if (vma->vm_start < addr)
771 continue;
772 if (vma->vm_start > addr)
773 return NULL;
774 if (vma->vm_end == end) {
775 mm->mmap_cache = vma;
776 return vma;
777 }
778 }
779
780 return NULL;
584} 781}
585 782
586/* 783/*
@@ -595,7 +792,7 @@ static int validate_mmap_request(struct file *file,
595 unsigned long pgoff, 792 unsigned long pgoff,
596 unsigned long *_capabilities) 793 unsigned long *_capabilities)
597{ 794{
598 unsigned long capabilities; 795 unsigned long capabilities, rlen;
599 unsigned long reqprot = prot; 796 unsigned long reqprot = prot;
600 int ret; 797 int ret;
601 798
@@ -615,12 +812,12 @@ static int validate_mmap_request(struct file *file,
615 return -EINVAL; 812 return -EINVAL;
616 813
617 /* Careful about overflows.. */ 814 /* Careful about overflows.. */
618 len = PAGE_ALIGN(len); 815 rlen = PAGE_ALIGN(len);
619 if (!len || len > TASK_SIZE) 816 if (!rlen || rlen > TASK_SIZE)
620 return -ENOMEM; 817 return -ENOMEM;
621 818
622 /* offset overflow? */ 819 /* offset overflow? */
623 if ((pgoff + (len >> PAGE_SHIFT)) < pgoff) 820 if ((pgoff + (rlen >> PAGE_SHIFT)) < pgoff)
624 return -EOVERFLOW; 821 return -EOVERFLOW;
625 822
626 if (file) { 823 if (file) {
@@ -794,9 +991,10 @@ static unsigned long determine_vm_flags(struct file *file,
794} 991}
795 992
796/* 993/*
797 * set up a shared mapping on a file 994 * set up a shared mapping on a file (the driver or filesystem provides and
995 * pins the storage)
798 */ 996 */
799static int do_mmap_shared_file(struct vm_area_struct *vma, unsigned long len) 997static int do_mmap_shared_file(struct vm_area_struct *vma)
800{ 998{
801 int ret; 999 int ret;
802 1000
@@ -814,10 +1012,14 @@ static int do_mmap_shared_file(struct vm_area_struct *vma, unsigned long len)
814/* 1012/*
815 * set up a private mapping or an anonymous shared mapping 1013 * set up a private mapping or an anonymous shared mapping
816 */ 1014 */
817static int do_mmap_private(struct vm_area_struct *vma, unsigned long len) 1015static int do_mmap_private(struct vm_area_struct *vma,
1016 struct vm_region *region,
1017 unsigned long len)
818{ 1018{
1019 struct page *pages;
1020 unsigned long total, point, n, rlen;
819 void *base; 1021 void *base;
820 int ret; 1022 int ret, order;
821 1023
822 /* invoke the file's mapping function so that it can keep track of 1024 /* invoke the file's mapping function so that it can keep track of
823 * shared mappings on devices or memory 1025 * shared mappings on devices or memory
@@ -836,23 +1038,46 @@ static int do_mmap_private(struct vm_area_struct *vma, unsigned long len)
836 * make a private copy of the data and map that instead */ 1038 * make a private copy of the data and map that instead */
837 } 1039 }
838 1040
1041 rlen = PAGE_ALIGN(len);
1042
839 /* allocate some memory to hold the mapping 1043 /* allocate some memory to hold the mapping
840 * - note that this may not return a page-aligned address if the object 1044 * - note that this may not return a page-aligned address if the object
841 * we're allocating is smaller than a page 1045 * we're allocating is smaller than a page
842 */ 1046 */
843 base = kmalloc(len, GFP_KERNEL|__GFP_COMP); 1047 order = get_order(rlen);
844 if (!base) 1048 kdebug("alloc order %d for %lx", order, len);
1049
1050 pages = alloc_pages(GFP_KERNEL, order);
1051 if (!pages)
845 goto enomem; 1052 goto enomem;
846 1053
847 vma->vm_start = (unsigned long) base; 1054 /* we allocated a power-of-2 sized page set, so we need to trim off the
848 vma->vm_end = vma->vm_start + len; 1055 * excess */
849 vma->vm_flags |= VM_MAPPED_COPY; 1056 total = 1 << order;
1057 atomic_add(total, &mmap_pages_allocated);
1058
1059 point = rlen >> PAGE_SHIFT;
1060 while (total > point) {
1061 order = ilog2(total - point);
1062 n = 1 << order;
1063 kdebug("shave %lu/%lu @%lu", n, total - point, total);
1064 atomic_sub(n, &mmap_pages_allocated);
1065 total -= n;
1066 set_page_refcounted(pages + total);
1067 __free_pages(pages + total, order);
1068 }
1069
1070 total = rlen >> PAGE_SHIFT;
1071 for (point = 1; point < total; point++)
1072 set_page_refcounted(&pages[point]);
850 1073
851#ifdef WARN_ON_SLACK 1074 base = page_address(pages);
852 if (len + WARN_ON_SLACK <= kobjsize(result)) 1075 region->vm_flags = vma->vm_flags |= VM_MAPPED_COPY;
853 printk("Allocation of %lu bytes from process %d has %lu bytes of slack\n", 1076 region->vm_start = (unsigned long) base;
854 len, current->pid, kobjsize(result) - len); 1077 region->vm_end = region->vm_start + rlen;
855#endif 1078
1079 vma->vm_start = region->vm_start;
1080 vma->vm_end = region->vm_start + len;
856 1081
857 if (vma->vm_file) { 1082 if (vma->vm_file) {
858 /* read the contents of a file into the copy */ 1083 /* read the contents of a file into the copy */
@@ -864,26 +1089,27 @@ static int do_mmap_private(struct vm_area_struct *vma, unsigned long len)
864 1089
865 old_fs = get_fs(); 1090 old_fs = get_fs();
866 set_fs(KERNEL_DS); 1091 set_fs(KERNEL_DS);
867 ret = vma->vm_file->f_op->read(vma->vm_file, base, len, &fpos); 1092 ret = vma->vm_file->f_op->read(vma->vm_file, base, rlen, &fpos);
868 set_fs(old_fs); 1093 set_fs(old_fs);
869 1094
870 if (ret < 0) 1095 if (ret < 0)
871 goto error_free; 1096 goto error_free;
872 1097
873 /* clear the last little bit */ 1098 /* clear the last little bit */
874 if (ret < len) 1099 if (ret < rlen)
875 memset(base + ret, 0, len - ret); 1100 memset(base + ret, 0, rlen - ret);
876 1101
877 } else { 1102 } else {
878 /* if it's an anonymous mapping, then just clear it */ 1103 /* if it's an anonymous mapping, then just clear it */
879 memset(base, 0, len); 1104 memset(base, 0, rlen);
880 } 1105 }
881 1106
882 return 0; 1107 return 0;
883 1108
884error_free: 1109error_free:
885 kfree(base); 1110 free_page_series(region->vm_start, region->vm_end);
886 vma->vm_start = 0; 1111 region->vm_start = vma->vm_start = 0;
1112 region->vm_end = vma->vm_end = 0;
887 return ret; 1113 return ret;
888 1114
889enomem: 1115enomem:
@@ -903,13 +1129,14 @@ unsigned long do_mmap_pgoff(struct file *file,
903 unsigned long flags, 1129 unsigned long flags,
904 unsigned long pgoff) 1130 unsigned long pgoff)
905{ 1131{
906 struct vm_list_struct *vml = NULL; 1132 struct vm_area_struct *vma;
907 struct vm_area_struct *vma = NULL; 1133 struct vm_region *region;
908 struct rb_node *rb; 1134 struct rb_node *rb;
909 unsigned long capabilities, vm_flags; 1135 unsigned long capabilities, vm_flags, result;
910 void *result;
911 int ret; 1136 int ret;
912 1137
1138 kenter(",%lx,%lx,%lx,%lx,%lx", addr, len, prot, flags, pgoff);
1139
913 if (!(flags & MAP_FIXED)) 1140 if (!(flags & MAP_FIXED))
914 addr = round_hint_to_min(addr); 1141 addr = round_hint_to_min(addr);
915 1142
@@ -917,73 +1144,120 @@ unsigned long do_mmap_pgoff(struct file *file,
917 * mapping */ 1144 * mapping */
918 ret = validate_mmap_request(file, addr, len, prot, flags, pgoff, 1145 ret = validate_mmap_request(file, addr, len, prot, flags, pgoff,
919 &capabilities); 1146 &capabilities);
920 if (ret < 0) 1147 if (ret < 0) {
1148 kleave(" = %d [val]", ret);
921 return ret; 1149 return ret;
1150 }
922 1151
923 /* we've determined that we can make the mapping, now translate what we 1152 /* we've determined that we can make the mapping, now translate what we
924 * now know into VMA flags */ 1153 * now know into VMA flags */
925 vm_flags = determine_vm_flags(file, prot, flags, capabilities); 1154 vm_flags = determine_vm_flags(file, prot, flags, capabilities);
926 1155
927 /* we're going to need to record the mapping if it works */ 1156 /* we're going to need to record the mapping */
928 vml = kzalloc(sizeof(struct vm_list_struct), GFP_KERNEL); 1157 region = kmem_cache_zalloc(vm_region_jar, GFP_KERNEL);
929 if (!vml) 1158 if (!region)
930 goto error_getting_vml; 1159 goto error_getting_region;
1160
1161 vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
1162 if (!vma)
1163 goto error_getting_vma;
1164
1165 atomic_set(&region->vm_usage, 1);
1166 region->vm_flags = vm_flags;
1167 region->vm_pgoff = pgoff;
931 1168
932 down_write(&nommu_vma_sem); 1169 INIT_LIST_HEAD(&vma->anon_vma_node);
1170 vma->vm_flags = vm_flags;
1171 vma->vm_pgoff = pgoff;
933 1172
934 /* if we want to share, we need to check for VMAs created by other 1173 if (file) {
1174 region->vm_file = file;
1175 get_file(file);
1176 vma->vm_file = file;
1177 get_file(file);
1178 if (vm_flags & VM_EXECUTABLE) {
1179 added_exe_file_vma(current->mm);
1180 vma->vm_mm = current->mm;
1181 }
1182 }
1183
1184 down_write(&nommu_region_sem);
1185
1186 /* if we want to share, we need to check for regions created by other
935 * mmap() calls that overlap with our proposed mapping 1187 * mmap() calls that overlap with our proposed mapping
936 * - we can only share with an exact match on most regular files 1188 * - we can only share with a superset match on most regular files
937 * - shared mappings on character devices and memory backed files are 1189 * - shared mappings on character devices and memory backed files are
938 * permitted to overlap inexactly as far as we are concerned for in 1190 * permitted to overlap inexactly as far as we are concerned for in
939 * these cases, sharing is handled in the driver or filesystem rather 1191 * these cases, sharing is handled in the driver or filesystem rather
940 * than here 1192 * than here
941 */ 1193 */
942 if (vm_flags & VM_MAYSHARE) { 1194 if (vm_flags & VM_MAYSHARE) {
943 unsigned long pglen = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; 1195 struct vm_region *pregion;
944 unsigned long vmpglen; 1196 unsigned long pglen, rpglen, pgend, rpgend, start;
945 1197
946 /* suppress VMA sharing for shared regions */ 1198 pglen = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
947 if (vm_flags & VM_SHARED && 1199 pgend = pgoff + pglen;
948 capabilities & BDI_CAP_MAP_DIRECT)
949 goto dont_share_VMAs;
950 1200
951 for (rb = rb_first(&nommu_vma_tree); rb; rb = rb_next(rb)) { 1201 for (rb = rb_first(&nommu_region_tree); rb; rb = rb_next(rb)) {
952 vma = rb_entry(rb, struct vm_area_struct, vm_rb); 1202 pregion = rb_entry(rb, struct vm_region, vm_rb);
953 1203
954 if (!(vma->vm_flags & VM_MAYSHARE)) 1204 if (!(pregion->vm_flags & VM_MAYSHARE))
955 continue; 1205 continue;
956 1206
957 /* search for overlapping mappings on the same file */ 1207 /* search for overlapping mappings on the same file */
958 if (vma->vm_file->f_path.dentry->d_inode != file->f_path.dentry->d_inode) 1208 if (pregion->vm_file->f_path.dentry->d_inode !=
1209 file->f_path.dentry->d_inode)
959 continue; 1210 continue;
960 1211
961 if (vma->vm_pgoff >= pgoff + pglen) 1212 if (pregion->vm_pgoff >= pgend)
962 continue; 1213 continue;
963 1214
964 vmpglen = vma->vm_end - vma->vm_start + PAGE_SIZE - 1; 1215 rpglen = pregion->vm_end - pregion->vm_start;
965 vmpglen >>= PAGE_SHIFT; 1216 rpglen = (rpglen + PAGE_SIZE - 1) >> PAGE_SHIFT;
966 if (pgoff >= vma->vm_pgoff + vmpglen) 1217 rpgend = pregion->vm_pgoff + rpglen;
1218 if (pgoff >= rpgend)
967 continue; 1219 continue;
968 1220
969 /* handle inexactly overlapping matches between mappings */ 1221 /* handle inexactly overlapping matches between
970 if (vma->vm_pgoff != pgoff || vmpglen != pglen) { 1222 * mappings */
1223 if ((pregion->vm_pgoff != pgoff || rpglen != pglen) &&
1224 !(pgoff >= pregion->vm_pgoff && pgend <= rpgend)) {
1225 /* new mapping is not a subset of the region */
971 if (!(capabilities & BDI_CAP_MAP_DIRECT)) 1226 if (!(capabilities & BDI_CAP_MAP_DIRECT))
972 goto sharing_violation; 1227 goto sharing_violation;
973 continue; 1228 continue;
974 } 1229 }
975 1230
976 /* we've found a VMA we can share */ 1231 /* we've found a region we can share */
977 atomic_inc(&vma->vm_usage); 1232 atomic_inc(&pregion->vm_usage);
978 1233 vma->vm_region = pregion;
979 vml->vma = vma; 1234 start = pregion->vm_start;
980 result = (void *) vma->vm_start; 1235 start += (pgoff - pregion->vm_pgoff) << PAGE_SHIFT;
981 goto shared; 1236 vma->vm_start = start;
1237 vma->vm_end = start + len;
1238
1239 if (pregion->vm_flags & VM_MAPPED_COPY) {
1240 kdebug("share copy");
1241 vma->vm_flags |= VM_MAPPED_COPY;
1242 } else {
1243 kdebug("share mmap");
1244 ret = do_mmap_shared_file(vma);
1245 if (ret < 0) {
1246 vma->vm_region = NULL;
1247 vma->vm_start = 0;
1248 vma->vm_end = 0;
1249 atomic_dec(&pregion->vm_usage);
1250 pregion = NULL;
1251 goto error_just_free;
1252 }
1253 }
1254 fput(region->vm_file);
1255 kmem_cache_free(vm_region_jar, region);
1256 region = pregion;
1257 result = start;
1258 goto share;
982 } 1259 }
983 1260
984 dont_share_VMAs:
985 vma = NULL;
986
987 /* obtain the address at which to make a shared mapping 1261 /* obtain the address at which to make a shared mapping
988 * - this is the hook for quasi-memory character devices to 1262 * - this is the hook for quasi-memory character devices to
989 * tell us the location of a shared mapping 1263 * tell us the location of a shared mapping
@@ -994,102 +1268,93 @@ unsigned long do_mmap_pgoff(struct file *file,
994 if (IS_ERR((void *) addr)) { 1268 if (IS_ERR((void *) addr)) {
995 ret = addr; 1269 ret = addr;
996 if (ret != (unsigned long) -ENOSYS) 1270 if (ret != (unsigned long) -ENOSYS)
997 goto error; 1271 goto error_just_free;
998 1272
999 /* the driver refused to tell us where to site 1273 /* the driver refused to tell us where to site
1000 * the mapping so we'll have to attempt to copy 1274 * the mapping so we'll have to attempt to copy
1001 * it */ 1275 * it */
1002 ret = (unsigned long) -ENODEV; 1276 ret = (unsigned long) -ENODEV;
1003 if (!(capabilities & BDI_CAP_MAP_COPY)) 1277 if (!(capabilities & BDI_CAP_MAP_COPY))
1004 goto error; 1278 goto error_just_free;
1005 1279
1006 capabilities &= ~BDI_CAP_MAP_DIRECT; 1280 capabilities &= ~BDI_CAP_MAP_DIRECT;
1281 } else {
1282 vma->vm_start = region->vm_start = addr;
1283 vma->vm_end = region->vm_end = addr + len;
1007 } 1284 }
1008 } 1285 }
1009 } 1286 }
1010 1287
1011 /* we're going to need a VMA struct as well */ 1288 vma->vm_region = region;
1012 vma = kzalloc(sizeof(struct vm_area_struct), GFP_KERNEL);
1013 if (!vma)
1014 goto error_getting_vma;
1015
1016 INIT_LIST_HEAD(&vma->anon_vma_node);
1017 atomic_set(&vma->vm_usage, 1);
1018 if (file) {
1019 get_file(file);
1020 if (vm_flags & VM_EXECUTABLE) {
1021 added_exe_file_vma(current->mm);
1022 vma->vm_mm = current->mm;
1023 }
1024 }
1025 vma->vm_file = file;
1026 vma->vm_flags = vm_flags;
1027 vma->vm_start = addr;
1028 vma->vm_end = addr + len;
1029 vma->vm_pgoff = pgoff;
1030
1031 vml->vma = vma;
1032 1289
1033 /* set up the mapping */ 1290 /* set up the mapping */
1034 if (file && vma->vm_flags & VM_SHARED) 1291 if (file && vma->vm_flags & VM_SHARED)
1035 ret = do_mmap_shared_file(vma, len); 1292 ret = do_mmap_shared_file(vma);
1036 else 1293 else
1037 ret = do_mmap_private(vma, len); 1294 ret = do_mmap_private(vma, region, len);
1038 if (ret < 0) 1295 if (ret < 0)
1039 goto error; 1296 goto error_put_region;
1297
1298 add_nommu_region(region);
1040 1299
1041 /* okay... we have a mapping; now we have to register it */ 1300 /* okay... we have a mapping; now we have to register it */
1042 result = (void *) vma->vm_start; 1301 result = vma->vm_start;
1043 1302
1044 current->mm->total_vm += len >> PAGE_SHIFT; 1303 current->mm->total_vm += len >> PAGE_SHIFT;
1045 1304
1046 add_nommu_vma(vma); 1305share:
1306 add_vma_to_mm(current->mm, vma);
1047 1307
1048 shared: 1308 up_write(&nommu_region_sem);
1049 add_vma_to_mm(current->mm, vml);
1050
1051 up_write(&nommu_vma_sem);
1052 1309
1053 if (prot & PROT_EXEC) 1310 if (prot & PROT_EXEC)
1054 flush_icache_range((unsigned long) result, 1311 flush_icache_range(result, result + len);
1055 (unsigned long) result + len);
1056 1312
1057#ifdef DEBUG 1313 kleave(" = %lx", result);
1058 printk("do_mmap:\n"); 1314 return result;
1059 show_process_blocks();
1060#endif
1061 1315
1062 return (unsigned long) result; 1316error_put_region:
1063 1317 __put_nommu_region(region);
1064 error:
1065 up_write(&nommu_vma_sem);
1066 kfree(vml);
1067 if (vma) { 1318 if (vma) {
1068 if (vma->vm_file) { 1319 if (vma->vm_file) {
1069 fput(vma->vm_file); 1320 fput(vma->vm_file);
1070 if (vma->vm_flags & VM_EXECUTABLE) 1321 if (vma->vm_flags & VM_EXECUTABLE)
1071 removed_exe_file_vma(vma->vm_mm); 1322 removed_exe_file_vma(vma->vm_mm);
1072 } 1323 }
1073 kfree(vma); 1324 kmem_cache_free(vm_area_cachep, vma);
1074 } 1325 }
1326 kleave(" = %d [pr]", ret);
1075 return ret; 1327 return ret;
1076 1328
1077 sharing_violation: 1329error_just_free:
1078 up_write(&nommu_vma_sem); 1330 up_write(&nommu_region_sem);
1079 printk("Attempt to share mismatched mappings\n"); 1331error:
1080 kfree(vml); 1332 fput(region->vm_file);
1081 return -EINVAL; 1333 kmem_cache_free(vm_region_jar, region);
1334 fput(vma->vm_file);
1335 if (vma->vm_flags & VM_EXECUTABLE)
1336 removed_exe_file_vma(vma->vm_mm);
1337 kmem_cache_free(vm_area_cachep, vma);
1338 kleave(" = %d", ret);
1339 return ret;
1340
1341sharing_violation:
1342 up_write(&nommu_region_sem);
1343 printk(KERN_WARNING "Attempt to share mismatched mappings\n");
1344 ret = -EINVAL;
1345 goto error;
1082 1346
1083 error_getting_vma: 1347error_getting_vma:
1084 up_write(&nommu_vma_sem); 1348 kmem_cache_free(vm_region_jar, region);
1085 kfree(vml); 1349 printk(KERN_WARNING "Allocation of vma for %lu byte allocation"
1086 printk("Allocation of vma for %lu byte allocation from process %d failed\n", 1350 " from process %d failed\n",
1087 len, current->pid); 1351 len, current->pid);
1088 show_free_areas(); 1352 show_free_areas();
1089 return -ENOMEM; 1353 return -ENOMEM;
1090 1354
1091 error_getting_vml: 1355error_getting_region:
1092 printk("Allocation of vml for %lu byte allocation from process %d failed\n", 1356 printk(KERN_WARNING "Allocation of vm region for %lu byte allocation"
1357 " from process %d failed\n",
1093 len, current->pid); 1358 len, current->pid);
1094 show_free_areas(); 1359 show_free_areas();
1095 return -ENOMEM; 1360 return -ENOMEM;
@@ -1097,77 +1362,180 @@ unsigned long do_mmap_pgoff(struct file *file,
1097EXPORT_SYMBOL(do_mmap_pgoff); 1362EXPORT_SYMBOL(do_mmap_pgoff);
1098 1363
1099/* 1364/*
1100 * handle mapping disposal for uClinux 1365 * split a vma into two pieces at address 'addr', a new vma is allocated either
1366 * for the first part or the tail.
1101 */ 1367 */
1102static void put_vma(struct mm_struct *mm, struct vm_area_struct *vma) 1368int split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
1369 unsigned long addr, int new_below)
1103{ 1370{
1104 if (vma) { 1371 struct vm_area_struct *new;
1105 down_write(&nommu_vma_sem); 1372 struct vm_region *region;
1373 unsigned long npages;
1106 1374
1107 if (atomic_dec_and_test(&vma->vm_usage)) { 1375 kenter("");
1108 delete_nommu_vma(vma);
1109 1376
1110 if (vma->vm_ops && vma->vm_ops->close) 1377 /* we're only permitted to split anonymous regions that have a single
1111 vma->vm_ops->close(vma); 1378 * owner */
1379 if (vma->vm_file ||
1380 atomic_read(&vma->vm_region->vm_usage) != 1)
1381 return -ENOMEM;
1112 1382
1113 /* IO memory and memory shared directly out of the pagecache from 1383 if (mm->map_count >= sysctl_max_map_count)
1114 * ramfs/tmpfs mustn't be released here */ 1384 return -ENOMEM;
1115 if (vma->vm_flags & VM_MAPPED_COPY)
1116 kfree((void *) vma->vm_start);
1117 1385
1118 if (vma->vm_file) { 1386 region = kmem_cache_alloc(vm_region_jar, GFP_KERNEL);
1119 fput(vma->vm_file); 1387 if (!region)
1120 if (vma->vm_flags & VM_EXECUTABLE) 1388 return -ENOMEM;
1121 removed_exe_file_vma(mm); 1389
1122 } 1390 new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
1123 kfree(vma); 1391 if (!new) {
1124 } 1392 kmem_cache_free(vm_region_jar, region);
1393 return -ENOMEM;
1394 }
1395
1396 /* most fields are the same, copy all, and then fixup */
1397 *new = *vma;
1398 *region = *vma->vm_region;
1399 new->vm_region = region;
1400
1401 npages = (addr - vma->vm_start) >> PAGE_SHIFT;
1402
1403 if (new_below) {
1404 region->vm_end = new->vm_end = addr;
1405 } else {
1406 region->vm_start = new->vm_start = addr;
1407 region->vm_pgoff = new->vm_pgoff += npages;
1408 }
1125 1409
1126 up_write(&nommu_vma_sem); 1410 if (new->vm_ops && new->vm_ops->open)
1411 new->vm_ops->open(new);
1412
1413 delete_vma_from_mm(vma);
1414 down_write(&nommu_region_sem);
1415 delete_nommu_region(vma->vm_region);
1416 if (new_below) {
1417 vma->vm_region->vm_start = vma->vm_start = addr;
1418 vma->vm_region->vm_pgoff = vma->vm_pgoff += npages;
1419 } else {
1420 vma->vm_region->vm_end = vma->vm_end = addr;
1127 } 1421 }
1422 add_nommu_region(vma->vm_region);
1423 add_nommu_region(new->vm_region);
1424 up_write(&nommu_region_sem);
1425 add_vma_to_mm(mm, vma);
1426 add_vma_to_mm(mm, new);
1427 return 0;
1128} 1428}
1129 1429
1130/* 1430/*
1131 * release a mapping 1431 * shrink a VMA by removing the specified chunk from either the beginning or
1132 * - under NOMMU conditions the parameters must match exactly to the mapping to 1432 * the end
1133 * be removed
1134 */ 1433 */
1135int do_munmap(struct mm_struct *mm, unsigned long addr, size_t len) 1434static int shrink_vma(struct mm_struct *mm,
1435 struct vm_area_struct *vma,
1436 unsigned long from, unsigned long to)
1136{ 1437{
1137 struct vm_list_struct *vml, **parent; 1438 struct vm_region *region;
1138 unsigned long end = addr + len;
1139 1439
1140#ifdef DEBUG 1440 kenter("");
1141 printk("do_munmap:\n");
1142#endif
1143 1441
1144 for (parent = &mm->context.vmlist; *parent; parent = &(*parent)->next) { 1442 /* adjust the VMA's pointers, which may reposition it in the MM's tree
1145 if ((*parent)->vma->vm_start > addr) 1443 * and list */
1146 break; 1444 delete_vma_from_mm(vma);
1147 if ((*parent)->vma->vm_start == addr && 1445 if (from > vma->vm_start)
1148 ((len == 0) || ((*parent)->vma->vm_end == end))) 1446 vma->vm_end = from;
1149 goto found; 1447 else
1150 } 1448 vma->vm_start = to;
1449 add_vma_to_mm(mm, vma);
1151 1450
1152 printk("munmap of non-mmaped memory by process %d (%s): %p\n", 1451 /* cut the backing region down to size */
1153 current->pid, current->comm, (void *) addr); 1452 region = vma->vm_region;
1154 return -EINVAL; 1453 BUG_ON(atomic_read(&region->vm_usage) != 1);
1155 1454
1156 found: 1455 down_write(&nommu_region_sem);
1157 vml = *parent; 1456 delete_nommu_region(region);
1457 if (from > region->vm_start)
1458 region->vm_end = from;
1459 else
1460 region->vm_start = to;
1461 add_nommu_region(region);
1462 up_write(&nommu_region_sem);
1158 1463
1159 put_vma(mm, vml->vma); 1464 free_page_series(from, to);
1465 return 0;
1466}
1160 1467
1161 *parent = vml->next; 1468/*
1162 kfree(vml); 1469 * release a mapping
1470 * - under NOMMU conditions the chunk to be unmapped must be backed by a single
1471 * VMA, though it need not cover the whole VMA
1472 */
1473int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
1474{
1475 struct vm_area_struct *vma;
1476 struct rb_node *rb;
1477 unsigned long end = start + len;
1478 int ret;
1163 1479
1164 update_hiwater_vm(mm); 1480 kenter(",%lx,%zx", start, len);
1165 mm->total_vm -= len >> PAGE_SHIFT;
1166 1481
1167#ifdef DEBUG 1482 if (len == 0)
1168 show_process_blocks(); 1483 return -EINVAL;
1169#endif 1484
1485 /* find the first potentially overlapping VMA */
1486 vma = find_vma(mm, start);
1487 if (!vma) {
1488 printk(KERN_WARNING
1489 "munmap of memory not mmapped by process %d (%s):"
1490 " 0x%lx-0x%lx\n",
1491 current->pid, current->comm, start, start + len - 1);
1492 return -EINVAL;
1493 }
1170 1494
1495 /* we're allowed to split an anonymous VMA but not a file-backed one */
1496 if (vma->vm_file) {
1497 do {
1498 if (start > vma->vm_start) {
1499 kleave(" = -EINVAL [miss]");
1500 return -EINVAL;
1501 }
1502 if (end == vma->vm_end)
1503 goto erase_whole_vma;
1504 rb = rb_next(&vma->vm_rb);
1505 vma = rb_entry(rb, struct vm_area_struct, vm_rb);
1506 } while (rb);
1507 kleave(" = -EINVAL [split file]");
1508 return -EINVAL;
1509 } else {
1510 /* the chunk must be a subset of the VMA found */
1511 if (start == vma->vm_start && end == vma->vm_end)
1512 goto erase_whole_vma;
1513 if (start < vma->vm_start || end > vma->vm_end) {
1514 kleave(" = -EINVAL [superset]");
1515 return -EINVAL;
1516 }
1517 if (start & ~PAGE_MASK) {
1518 kleave(" = -EINVAL [unaligned start]");
1519 return -EINVAL;
1520 }
1521 if (end != vma->vm_end && end & ~PAGE_MASK) {
1522 kleave(" = -EINVAL [unaligned split]");
1523 return -EINVAL;
1524 }
1525 if (start != vma->vm_start && end != vma->vm_end) {
1526 ret = split_vma(mm, vma, start, 1);
1527 if (ret < 0) {
1528 kleave(" = %d [split]", ret);
1529 return ret;
1530 }
1531 }
1532 return shrink_vma(mm, vma, start, end);
1533 }
1534
1535erase_whole_vma:
1536 delete_vma_from_mm(vma);
1537 delete_vma(mm, vma);
1538 kleave(" = 0");
1171 return 0; 1539 return 0;
1172} 1540}
1173EXPORT_SYMBOL(do_munmap); 1541EXPORT_SYMBOL(do_munmap);
@@ -1184,29 +1552,26 @@ asmlinkage long sys_munmap(unsigned long addr, size_t len)
1184} 1552}
1185 1553
1186/* 1554/*
1187 * Release all mappings 1555 * release all the mappings made in a process's VM space
1188 */ 1556 */
1189void exit_mmap(struct mm_struct * mm) 1557void exit_mmap(struct mm_struct *mm)
1190{ 1558{
1191 struct vm_list_struct *tmp; 1559 struct vm_area_struct *vma;
1192 1560
1193 if (mm) { 1561 if (!mm)
1194#ifdef DEBUG 1562 return;
1195 printk("Exit_mmap:\n");
1196#endif
1197 1563
1198 mm->total_vm = 0; 1564 kenter("");
1199 1565
1200 while ((tmp = mm->context.vmlist)) { 1566 mm->total_vm = 0;
1201 mm->context.vmlist = tmp->next;
1202 put_vma(mm, tmp->vma);
1203 kfree(tmp);
1204 }
1205 1567
1206#ifdef DEBUG 1568 while ((vma = mm->mmap)) {
1207 show_process_blocks(); 1569 mm->mmap = vma->vm_next;
1208#endif 1570 delete_vma_from_mm(vma);
1571 delete_vma(mm, vma);
1209 } 1572 }
1573
1574 kleave("");
1210} 1575}
1211 1576
1212unsigned long do_brk(unsigned long addr, unsigned long len) 1577unsigned long do_brk(unsigned long addr, unsigned long len)
@@ -1219,8 +1584,8 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
1219 * time (controlled by the MREMAP_MAYMOVE flag and available VM space) 1584 * time (controlled by the MREMAP_MAYMOVE flag and available VM space)
1220 * 1585 *
1221 * under NOMMU conditions, we only permit changing a mapping's size, and only 1586 * under NOMMU conditions, we only permit changing a mapping's size, and only
1222 * as long as it stays within the hole allocated by the kmalloc() call in 1587 * as long as it stays within the region allocated by do_mmap_private() and the
1223 * do_mmap_pgoff() and the block is not shareable 1588 * block is not shareable
1224 * 1589 *
1225 * MREMAP_FIXED is not supported under NOMMU conditions 1590 * MREMAP_FIXED is not supported under NOMMU conditions
1226 */ 1591 */
@@ -1231,13 +1596,16 @@ unsigned long do_mremap(unsigned long addr,
1231 struct vm_area_struct *vma; 1596 struct vm_area_struct *vma;
1232 1597
1233 /* insanity checks first */ 1598 /* insanity checks first */
1234 if (new_len == 0) 1599 if (old_len == 0 || new_len == 0)
1235 return (unsigned long) -EINVAL; 1600 return (unsigned long) -EINVAL;
1236 1601
1602 if (addr & ~PAGE_MASK)
1603 return -EINVAL;
1604
1237 if (flags & MREMAP_FIXED && new_addr != addr) 1605 if (flags & MREMAP_FIXED && new_addr != addr)
1238 return (unsigned long) -EINVAL; 1606 return (unsigned long) -EINVAL;
1239 1607
1240 vma = find_vma_exact(current->mm, addr); 1608 vma = find_vma_exact(current->mm, addr, old_len);
1241 if (!vma) 1609 if (!vma)
1242 return (unsigned long) -EINVAL; 1610 return (unsigned long) -EINVAL;
1243 1611
@@ -1247,19 +1615,19 @@ unsigned long do_mremap(unsigned long addr,
1247 if (vma->vm_flags & VM_MAYSHARE) 1615 if (vma->vm_flags & VM_MAYSHARE)
1248 return (unsigned long) -EPERM; 1616 return (unsigned long) -EPERM;
1249 1617
1250 if (new_len > kobjsize((void *) addr)) 1618 if (new_len > vma->vm_region->vm_end - vma->vm_region->vm_start)
1251 return (unsigned long) -ENOMEM; 1619 return (unsigned long) -ENOMEM;
1252 1620
1253 /* all checks complete - do it */ 1621 /* all checks complete - do it */
1254 vma->vm_end = vma->vm_start + new_len; 1622 vma->vm_end = vma->vm_start + new_len;
1255
1256 return vma->vm_start; 1623 return vma->vm_start;
1257} 1624}
1258EXPORT_SYMBOL(do_mremap); 1625EXPORT_SYMBOL(do_mremap);
1259 1626
1260asmlinkage unsigned long sys_mremap(unsigned long addr, 1627asmlinkage
1261 unsigned long old_len, unsigned long new_len, 1628unsigned long sys_mremap(unsigned long addr,
1262 unsigned long flags, unsigned long new_addr) 1629 unsigned long old_len, unsigned long new_len,
1630 unsigned long flags, unsigned long new_addr)
1263{ 1631{
1264 unsigned long ret; 1632 unsigned long ret;
1265 1633