diff options
Diffstat (limited to 'fs/proc/task_mmu.c')
-rw-r--r-- | fs/proc/task_mmu.c | 175 |
1 files changed, 98 insertions, 77 deletions
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 88717c0f941b..c492449f3b45 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c | |||
@@ -315,9 +315,9 @@ struct mem_size_stats { | |||
315 | }; | 315 | }; |
316 | 316 | ||
317 | static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, | 317 | static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, |
318 | void *private) | 318 | struct mm_walk *walk) |
319 | { | 319 | { |
320 | struct mem_size_stats *mss = private; | 320 | struct mem_size_stats *mss = walk->private; |
321 | struct vm_area_struct *vma = mss->vma; | 321 | struct vm_area_struct *vma = mss->vma; |
322 | pte_t *pte, ptent; | 322 | pte_t *pte, ptent; |
323 | spinlock_t *ptl; | 323 | spinlock_t *ptl; |
@@ -365,19 +365,21 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, | |||
365 | return 0; | 365 | return 0; |
366 | } | 366 | } |
367 | 367 | ||
368 | static struct mm_walk smaps_walk = { .pmd_entry = smaps_pte_range }; | ||
369 | |||
370 | static int show_smap(struct seq_file *m, void *v) | 368 | static int show_smap(struct seq_file *m, void *v) |
371 | { | 369 | { |
372 | struct vm_area_struct *vma = v; | 370 | struct vm_area_struct *vma = v; |
373 | struct mem_size_stats mss; | 371 | struct mem_size_stats mss; |
374 | int ret; | 372 | int ret; |
373 | struct mm_walk smaps_walk = { | ||
374 | .pmd_entry = smaps_pte_range, | ||
375 | .mm = vma->vm_mm, | ||
376 | .private = &mss, | ||
377 | }; | ||
375 | 378 | ||
376 | memset(&mss, 0, sizeof mss); | 379 | memset(&mss, 0, sizeof mss); |
377 | mss.vma = vma; | 380 | mss.vma = vma; |
378 | if (vma->vm_mm && !is_vm_hugetlb_page(vma)) | 381 | if (vma->vm_mm && !is_vm_hugetlb_page(vma)) |
379 | walk_page_range(vma->vm_mm, vma->vm_start, vma->vm_end, | 382 | walk_page_range(vma->vm_start, vma->vm_end, &smaps_walk); |
380 | &smaps_walk, &mss); | ||
381 | 383 | ||
382 | ret = show_map(m, v); | 384 | ret = show_map(m, v); |
383 | if (ret) | 385 | if (ret) |
@@ -426,9 +428,9 @@ const struct file_operations proc_smaps_operations = { | |||
426 | }; | 428 | }; |
427 | 429 | ||
428 | static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, | 430 | static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, |
429 | unsigned long end, void *private) | 431 | unsigned long end, struct mm_walk *walk) |
430 | { | 432 | { |
431 | struct vm_area_struct *vma = private; | 433 | struct vm_area_struct *vma = walk->private; |
432 | pte_t *pte, ptent; | 434 | pte_t *pte, ptent; |
433 | spinlock_t *ptl; | 435 | spinlock_t *ptl; |
434 | struct page *page; | 436 | struct page *page; |
@@ -452,8 +454,6 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, | |||
452 | return 0; | 454 | return 0; |
453 | } | 455 | } |
454 | 456 | ||
455 | static struct mm_walk clear_refs_walk = { .pmd_entry = clear_refs_pte_range }; | ||
456 | |||
457 | static ssize_t clear_refs_write(struct file *file, const char __user *buf, | 457 | static ssize_t clear_refs_write(struct file *file, const char __user *buf, |
458 | size_t count, loff_t *ppos) | 458 | size_t count, loff_t *ppos) |
459 | { | 459 | { |
@@ -476,11 +476,17 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, | |||
476 | return -ESRCH; | 476 | return -ESRCH; |
477 | mm = get_task_mm(task); | 477 | mm = get_task_mm(task); |
478 | if (mm) { | 478 | if (mm) { |
479 | struct mm_walk clear_refs_walk = { | ||
480 | .pmd_entry = clear_refs_pte_range, | ||
481 | .mm = mm, | ||
482 | }; | ||
479 | down_read(&mm->mmap_sem); | 483 | down_read(&mm->mmap_sem); |
480 | for (vma = mm->mmap; vma; vma = vma->vm_next) | 484 | for (vma = mm->mmap; vma; vma = vma->vm_next) { |
485 | clear_refs_walk.private = vma; | ||
481 | if (!is_vm_hugetlb_page(vma)) | 486 | if (!is_vm_hugetlb_page(vma)) |
482 | walk_page_range(mm, vma->vm_start, vma->vm_end, | 487 | walk_page_range(vma->vm_start, vma->vm_end, |
483 | &clear_refs_walk, vma); | 488 | &clear_refs_walk); |
489 | } | ||
484 | flush_tlb_mm(mm); | 490 | flush_tlb_mm(mm); |
485 | up_read(&mm->mmap_sem); | 491 | up_read(&mm->mmap_sem); |
486 | mmput(mm); | 492 | mmput(mm); |
@@ -496,7 +502,7 @@ const struct file_operations proc_clear_refs_operations = { | |||
496 | }; | 502 | }; |
497 | 503 | ||
498 | struct pagemapread { | 504 | struct pagemapread { |
499 | char __user *out, *end; | 505 | u64 __user *out, *end; |
500 | }; | 506 | }; |
501 | 507 | ||
502 | #define PM_ENTRY_BYTES sizeof(u64) | 508 | #define PM_ENTRY_BYTES sizeof(u64) |
@@ -519,28 +525,18 @@ struct pagemapread { | |||
519 | static int add_to_pagemap(unsigned long addr, u64 pfn, | 525 | static int add_to_pagemap(unsigned long addr, u64 pfn, |
520 | struct pagemapread *pm) | 526 | struct pagemapread *pm) |
521 | { | 527 | { |
522 | /* | ||
523 | * Make sure there's room in the buffer for an | ||
524 | * entire entry. Otherwise, only copy part of | ||
525 | * the pfn. | ||
526 | */ | ||
527 | if (pm->out + PM_ENTRY_BYTES >= pm->end) { | ||
528 | if (copy_to_user(pm->out, &pfn, pm->end - pm->out)) | ||
529 | return -EFAULT; | ||
530 | pm->out = pm->end; | ||
531 | return PM_END_OF_BUFFER; | ||
532 | } | ||
533 | |||
534 | if (put_user(pfn, pm->out)) | 528 | if (put_user(pfn, pm->out)) |
535 | return -EFAULT; | 529 | return -EFAULT; |
536 | pm->out += PM_ENTRY_BYTES; | 530 | pm->out++; |
531 | if (pm->out >= pm->end) | ||
532 | return PM_END_OF_BUFFER; | ||
537 | return 0; | 533 | return 0; |
538 | } | 534 | } |
539 | 535 | ||
540 | static int pagemap_pte_hole(unsigned long start, unsigned long end, | 536 | static int pagemap_pte_hole(unsigned long start, unsigned long end, |
541 | void *private) | 537 | struct mm_walk *walk) |
542 | { | 538 | { |
543 | struct pagemapread *pm = private; | 539 | struct pagemapread *pm = walk->private; |
544 | unsigned long addr; | 540 | unsigned long addr; |
545 | int err = 0; | 541 | int err = 0; |
546 | for (addr = start; addr < end; addr += PAGE_SIZE) { | 542 | for (addr = start; addr < end; addr += PAGE_SIZE) { |
@@ -557,24 +553,45 @@ static u64 swap_pte_to_pagemap_entry(pte_t pte) | |||
557 | return swp_type(e) | (swp_offset(e) << MAX_SWAPFILES_SHIFT); | 553 | return swp_type(e) | (swp_offset(e) << MAX_SWAPFILES_SHIFT); |
558 | } | 554 | } |
559 | 555 | ||
556 | static unsigned long pte_to_pagemap_entry(pte_t pte) | ||
557 | { | ||
558 | unsigned long pme = 0; | ||
559 | if (is_swap_pte(pte)) | ||
560 | pme = PM_PFRAME(swap_pte_to_pagemap_entry(pte)) | ||
561 | | PM_PSHIFT(PAGE_SHIFT) | PM_SWAP; | ||
562 | else if (pte_present(pte)) | ||
563 | pme = PM_PFRAME(pte_pfn(pte)) | ||
564 | | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT; | ||
565 | return pme; | ||
566 | } | ||
567 | |||
560 | static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, | 568 | static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, |
561 | void *private) | 569 | struct mm_walk *walk) |
562 | { | 570 | { |
563 | struct pagemapread *pm = private; | 571 | struct vm_area_struct *vma; |
572 | struct pagemapread *pm = walk->private; | ||
564 | pte_t *pte; | 573 | pte_t *pte; |
565 | int err = 0; | 574 | int err = 0; |
566 | 575 | ||
576 | /* find the first VMA at or above 'addr' */ | ||
577 | vma = find_vma(walk->mm, addr); | ||
567 | for (; addr != end; addr += PAGE_SIZE) { | 578 | for (; addr != end; addr += PAGE_SIZE) { |
568 | u64 pfn = PM_NOT_PRESENT; | 579 | u64 pfn = PM_NOT_PRESENT; |
569 | pte = pte_offset_map(pmd, addr); | 580 | |
570 | if (is_swap_pte(*pte)) | 581 | /* check to see if we've left 'vma' behind |
571 | pfn = PM_PFRAME(swap_pte_to_pagemap_entry(*pte)) | 582 | * and need a new, higher one */ |
572 | | PM_PSHIFT(PAGE_SHIFT) | PM_SWAP; | 583 | if (vma && (addr >= vma->vm_end)) |
573 | else if (pte_present(*pte)) | 584 | vma = find_vma(walk->mm, addr); |
574 | pfn = PM_PFRAME(pte_pfn(*pte)) | 585 | |
575 | | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT; | 586 | /* check that 'vma' actually covers this address, |
576 | /* unmap so we're not in atomic when we copy to userspace */ | 587 | * and that it isn't a huge page vma */ |
577 | pte_unmap(pte); | 588 | if (vma && (vma->vm_start <= addr) && |
589 | !is_vm_hugetlb_page(vma)) { | ||
590 | pte = pte_offset_map(pmd, addr); | ||
591 | pfn = pte_to_pagemap_entry(*pte); | ||
592 | /* unmap before userspace copy */ | ||
593 | pte_unmap(pte); | ||
594 | } | ||
578 | err = add_to_pagemap(addr, pfn, pm); | 595 | err = add_to_pagemap(addr, pfn, pm); |
579 | if (err) | 596 | if (err) |
580 | return err; | 597 | return err; |
@@ -585,11 +602,6 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, | |||
585 | return err; | 602 | return err; |
586 | } | 603 | } |
587 | 604 | ||
588 | static struct mm_walk pagemap_walk = { | ||
589 | .pmd_entry = pagemap_pte_range, | ||
590 | .pte_hole = pagemap_pte_hole | ||
591 | }; | ||
592 | |||
593 | /* | 605 | /* |
594 | * /proc/pid/pagemap - an array mapping virtual pages to pfns | 606 | * /proc/pid/pagemap - an array mapping virtual pages to pfns |
595 | * | 607 | * |
@@ -624,6 +636,11 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, | |||
624 | struct pagemapread pm; | 636 | struct pagemapread pm; |
625 | int pagecount; | 637 | int pagecount; |
626 | int ret = -ESRCH; | 638 | int ret = -ESRCH; |
639 | struct mm_walk pagemap_walk; | ||
640 | unsigned long src; | ||
641 | unsigned long svpfn; | ||
642 | unsigned long start_vaddr; | ||
643 | unsigned long end_vaddr; | ||
627 | 644 | ||
628 | if (!task) | 645 | if (!task) |
629 | goto out; | 646 | goto out; |
@@ -634,7 +651,7 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, | |||
634 | 651 | ||
635 | ret = -EINVAL; | 652 | ret = -EINVAL; |
636 | /* file position must be aligned */ | 653 | /* file position must be aligned */ |
637 | if (*ppos % PM_ENTRY_BYTES) | 654 | if ((*ppos % PM_ENTRY_BYTES) || (count % PM_ENTRY_BYTES)) |
638 | goto out_task; | 655 | goto out_task; |
639 | 656 | ||
640 | ret = 0; | 657 | ret = 0; |
@@ -642,11 +659,15 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, | |||
642 | if (!mm) | 659 | if (!mm) |
643 | goto out_task; | 660 | goto out_task; |
644 | 661 | ||
645 | ret = -ENOMEM; | 662 | |
646 | uaddr = (unsigned long)buf & PAGE_MASK; | 663 | uaddr = (unsigned long)buf & PAGE_MASK; |
647 | uend = (unsigned long)(buf + count); | 664 | uend = (unsigned long)(buf + count); |
648 | pagecount = (PAGE_ALIGN(uend) - uaddr) / PAGE_SIZE; | 665 | pagecount = (PAGE_ALIGN(uend) - uaddr) / PAGE_SIZE; |
649 | pages = kmalloc(pagecount * sizeof(struct page *), GFP_KERNEL); | 666 | ret = 0; |
667 | if (pagecount == 0) | ||
668 | goto out_mm; | ||
669 | pages = kcalloc(pagecount, sizeof(struct page *), GFP_KERNEL); | ||
670 | ret = -ENOMEM; | ||
650 | if (!pages) | 671 | if (!pages) |
651 | goto out_mm; | 672 | goto out_mm; |
652 | 673 | ||
@@ -664,36 +685,36 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, | |||
664 | goto out_pages; | 685 | goto out_pages; |
665 | } | 686 | } |
666 | 687 | ||
667 | pm.out = buf; | 688 | pm.out = (u64 *)buf; |
668 | pm.end = buf + count; | 689 | pm.end = (u64 *)(buf + count); |
669 | 690 | ||
670 | if (!ptrace_may_attach(task)) { | 691 | pagemap_walk.pmd_entry = pagemap_pte_range; |
671 | ret = -EIO; | 692 | pagemap_walk.pte_hole = pagemap_pte_hole; |
672 | } else { | 693 | pagemap_walk.mm = mm; |
673 | unsigned long src = *ppos; | 694 | pagemap_walk.private = ± |
674 | unsigned long svpfn = src / PM_ENTRY_BYTES; | 695 | |
675 | unsigned long start_vaddr = svpfn << PAGE_SHIFT; | 696 | src = *ppos; |
676 | unsigned long end_vaddr = TASK_SIZE_OF(task); | 697 | svpfn = src / PM_ENTRY_BYTES; |
677 | 698 | start_vaddr = svpfn << PAGE_SHIFT; | |
678 | /* watch out for wraparound */ | 699 | end_vaddr = TASK_SIZE_OF(task); |
679 | if (svpfn > TASK_SIZE_OF(task) >> PAGE_SHIFT) | 700 | |
680 | start_vaddr = end_vaddr; | 701 | /* watch out for wraparound */ |
681 | 702 | if (svpfn > TASK_SIZE_OF(task) >> PAGE_SHIFT) | |
682 | /* | 703 | start_vaddr = end_vaddr; |
683 | * The odds are that this will stop walking way | 704 | |
684 | * before end_vaddr, because the length of the | 705 | /* |
685 | * user buffer is tracked in "pm", and the walk | 706 | * The odds are that this will stop walking way |
686 | * will stop when we hit the end of the buffer. | 707 | * before end_vaddr, because the length of the |
687 | */ | 708 | * user buffer is tracked in "pm", and the walk |
688 | ret = walk_page_range(mm, start_vaddr, end_vaddr, | 709 | * will stop when we hit the end of the buffer. |
689 | &pagemap_walk, &pm); | 710 | */ |
690 | if (ret == PM_END_OF_BUFFER) | 711 | ret = walk_page_range(start_vaddr, end_vaddr, &pagemap_walk); |
691 | ret = 0; | 712 | if (ret == PM_END_OF_BUFFER) |
692 | /* don't need mmap_sem for these, but this looks cleaner */ | 713 | ret = 0; |
693 | *ppos += pm.out - buf; | 714 | /* don't need mmap_sem for these, but this looks cleaner */ |
694 | if (!ret) | 715 | *ppos += (char *)pm.out - buf; |
695 | ret = pm.out - buf; | 716 | if (!ret) |
696 | } | 717 | ret = (char *)pm.out - buf; |
697 | 718 | ||
698 | out_pages: | 719 | out_pages: |
699 | for (; pagecount; pagecount--) { | 720 | for (; pagecount; pagecount--) { |