Merge branch 'kvm-updates/2.6.39' of git://git.kernel.org/pub/scm/virt/kvm/kvm

* 'kvm-updates/2.6.39' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (55 commits) KVM: unbreak userspace that does not sets tss address KVM: MMU: cleanup pte write path KVM: MMU: introduce a common function to get no-dirty-logged slot KVM: fix rcu usage in init_rmode_* functions KVM: fix kvmclock regression due to missing clock update KVM: emulator: Fix permission checking in io permission bitmap KVM: emulator: Fix io permission checking for 64bit guest KVM: SVM: Load %gs earlier if CONFIG_X86_32_LAZY_GS=n KVM: x86: Remove useless regs_page pointer from kvm_lapic KVM: improve comment on rcu use in irqfd_deassign KVM: MMU: remove unused macros KVM: MMU: cleanup page alloc and free KVM: MMU: do not record gfn in kvm_mmu_pte_write KVM: MMU: move mmu pages calculated out of mmu lock KVM: MMU: set spte accessed bit properly KVM: MMU: fix kvm_mmu_slot_remove_write_access dropping intermediate W bits KVM: Start lock documentation KVM: better readability of efer_reserved_bits KVM: Clear async page fault hash after switching to real mode KVM: VMX: Initialize vm86 TSS only once. ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2011-03-17 21:40:35 -0400
committer: Linus Torvalds <torvalds@linux-foundation.org> 2011-03-17 21:40:35 -0400
commit: ec0afc9311adcfb10b90e547c23250f63939f990 (patch)
tree: 2093d2668898a8a03f30acbfd5568e65b8c086b9 /mm
parent: 804f18536984939622ddca60ab6b25743e0ec68d (diff)
parent: 776e58ea3d3735f85678155398241d2513afa67a (diff)
3 files changed, 60 insertions, 40 deletions
diff --git a/mm/internal.h b/mm/internal.h
index 69488205723d..3438dd43a062 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -245,11 +245,6 @@ static inline void mminit_validate_memmodel_limits(unsigned long *start_pfn,
 }
 #endif /* CONFIG_SPARSEMEM */
-int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
-                     unsigned long start, int len, unsigned int foll_flags,
-                     struct page **pages, struct vm_area_struct **vmas,
-                     int *nonblocking);
 #define ZONE_RECLAIM_NOSCAN     -2
 #define ZONE_RECLAIM_FULL       -1
 #define ZONE_RECLAIM_SOME       0
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 0207c2f6f8bd..99ccb4472623 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1487,35 +1487,3 @@ done:
        /* keep elevated page count for bad page */
        return ret;
 }
-/*
- * The caller must hold current->mm->mmap_sem in read mode.
- */
-int is_hwpoison_address(unsigned long addr)
-{
-        pgd_t *pgdp;
-        pud_t pud, *pudp;
-        pmd_t pmd, *pmdp;
-        pte_t pte, *ptep;
-        swp_entry_t entry;
-        pgdp = pgd_offset(current->mm, addr);
-        if (!pgd_present(*pgdp))
-                return 0;
-        pudp = pud_offset(pgdp, addr);
-        pud = *pudp;
-        if (!pud_present(pud) || pud_large(pud))
-                return 0;
-        pmdp = pmd_offset(pudp, addr);
-        pmd = *pmdp;
-        if (!pmd_present(pmd) || pmd_large(pmd))
-                return 0;
-        ptep = pte_offset_map(pmdp, addr);
-        pte = *ptep;
-        pte_unmap(ptep);
-        if (!is_swap_pte(pte))
-                return 0;
-        entry = pte_to_swp_entry(pte);
-        return is_hwpoison_entry(entry);
-}
-EXPORT_SYMBOL_GPL(is_hwpoison_address);
diff --git a/mm/memory.c b/mm/memory.c
index 5823698c2b71..346ee7e041fd 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1410,6 +1410,55 @@ no_page_table:
        return page;
 }
+/**
+ * __get_user_pages() - pin user pages in memory
+ * @tsk:        task_struct of target task
+ * @mm:         mm_struct of target mm
+ * @start:      starting user address
+ * @nr_pages:   number of pages from start to pin
+ * @gup_flags:  flags modifying pin behaviour
+ * @pages:      array that receives pointers to the pages pinned.
+ *              Should be at least nr_pages long. Or NULL, if caller
+ *              only intends to ensure the pages are faulted in.
+ * @vmas:       array of pointers to vmas corresponding to each page.
+ *              Or NULL if the caller does not require them.
+ * @nonblocking: whether waiting for disk IO or mmap_sem contention
+ *
+ * Returns number of pages pinned. This may be fewer than the number
+ * requested. If nr_pages is 0 or negative, returns 0. If no pages
+ * were pinned, returns -errno. Each page returned must be released
+ * with a put_page() call when it is finished with. vmas will only
+ * remain valid while mmap_sem is held.
+ *
+ * Must be called with mmap_sem held for read or write.
+ *
+ * __get_user_pages walks a process's page tables and takes a reference to
+ * each struct page that each user address corresponds to at a given
+ * instant. That is, it takes the page that would be accessed if a user
+ * thread accesses the given user virtual address at that instant.
+ *
+ * This does not guarantee that the page exists in the user mappings when
+ * __get_user_pages returns, and there may even be a completely different
+ * page there in some cases (eg. if mmapped pagecache has been invalidated
+ * and subsequently re faulted). However it does guarantee that the page
+ * won't be freed completely. And mostly callers simply care that the page
+ * contains data that was valid *at some point in time*. Typically, an IO
+ * or similar operation cannot guarantee anything stronger anyway because
+ * locks can't be held over the syscall boundary.
+ *
+ * If @gup_flags & FOLL_WRITE == 0, the page must not be written to. If
+ * the page is written to, set_page_dirty (or set_page_dirty_lock, as
+ * appropriate) must be called after the page is finished with, and
+ * before put_page is called.
+ *
+ * If @nonblocking != NULL, __get_user_pages will not wait for disk IO
+ * or mmap_sem contention, and if waiting is needed to pin all pages,
+ * *@nonblocking will be set to 0.
+ *
+ * In most cases, get_user_pages or get_user_pages_fast should be used
+ * instead of __get_user_pages. __get_user_pages should be used only if
+ * you need some special @gup_flags.
+ */
 int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                     unsigned long start, int nr_pages, unsigned int gup_flags,
                     struct page **pages, struct vm_area_struct **vmas,
@@ -1527,9 +1576,16 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                                if (ret & VM_FAULT_ERROR) {
                                        if (ret & VM_FAULT_OOM)
                                                return i ? i : -ENOMEM;
-                                        if (ret &
+                                        if (ret & (VM_FAULT_HWPOISON |
-                                            (VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE|
+                                                   VM_FAULT_HWPOISON_LARGE)) {
-                                             VM_FAULT_SIGBUS))
+                                                if (i)
+                                                        return i;
+                                                else if (gup_flags & FOLL_HWPOISON)
+                                                        return -EHWPOISON;
+                                                else
+                                                        return -EFAULT;
+                                        }
+                                        if (ret & VM_FAULT_SIGBUS)
                                                return i ? i : -EFAULT;
                                        BUG();
                                }
@@ -1578,6 +1634,7 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
        } while (nr_pages);
        return i;
 }
+EXPORT_SYMBOL(__get_user_pages);
 /**
 * get_user_pages() - pin user pages in memory
author	Linus Torvalds <torvalds@linux-foundation.org>	2011-03-17 21:40:35 -0400
committer	Linus Torvalds <torvalds@linux-foundation.org>	2011-03-17 21:40:35 -0400
commit	ec0afc9311adcfb10b90e547c23250f63939f990 (patch)
tree	2093d2668898a8a03f30acbfd5568e65b8c086b9 /mm
parent	804f18536984939622ddca60ab6b25743e0ec68d (diff)
parent	776e58ea3d3735f85678155398241d2513afa67a (diff)