aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memory.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memory.c')
-rw-r--r--mm/memory.c128
1 files changed, 109 insertions, 19 deletions
diff --git a/mm/memory.c b/mm/memory.c
index 4126dd16778c..d5d1653d60a6 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1360,6 +1360,56 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1360 return i; 1360 return i;
1361} 1361}
1362 1362
1363/**
1364 * get_user_pages() - pin user pages in memory
1365 * @tsk: task_struct of target task
1366 * @mm: mm_struct of target mm
1367 * @start: starting user address
1368 * @len: number of pages from start to pin
1369 * @write: whether pages will be written to by the caller
1370 * @force: whether to force write access even if user mapping is
1371 * readonly. This will result in the page being COWed even
1372 * in MAP_SHARED mappings. You do not want this.
1373 * @pages: array that receives pointers to the pages pinned.
1374 * Should be at least nr_pages long. Or NULL, if caller
1375 * only intends to ensure the pages are faulted in.
1376 * @vmas: array of pointers to vmas corresponding to each page.
1377 * Or NULL if the caller does not require them.
1378 *
1379 * Returns number of pages pinned. This may be fewer than the number
1380 * requested. If len is 0 or negative, returns 0. If no pages
1381 * were pinned, returns -errno. Each page returned must be released
1382 * with a put_page() call when it is finished with. vmas will only
1383 * remain valid while mmap_sem is held.
1384 *
1385 * Must be called with mmap_sem held for read or write.
1386 *
1387 * get_user_pages walks a process's page tables and takes a reference to
1388 * each struct page that each user address corresponds to at a given
1389 * instant. That is, it takes the page that would be accessed if a user
1390 * thread accesses the given user virtual address at that instant.
1391 *
1392 * This does not guarantee that the page exists in the user mappings when
1393 * get_user_pages returns, and there may even be a completely different
1394 * page there in some cases (eg. if mmapped pagecache has been invalidated
1395 * and subsequently re faulted). However it does guarantee that the page
1396 * won't be freed completely. And mostly callers simply care that the page
1397 * contains data that was valid *at some point in time*. Typically, an IO
1398 * or similar operation cannot guarantee anything stronger anyway because
1399 * locks can't be held over the syscall boundary.
1400 *
1401 * If write=0, the page must not be written to. If the page is written to,
1402 * set_page_dirty (or set_page_dirty_lock, as appropriate) must be called
1403 * after the page is finished with, and before put_page is called.
1404 *
1405 * get_user_pages is typically used for fewer-copy IO operations, to get a
1406 * handle on the memory by some means other than accesses via the user virtual
1407 * addresses. The pages may be submitted for DMA to devices or accessed via
1408 * their kernel linear mapping (via the kmap APIs). Care should be taken to
1409 * use the correct cache flushing APIs.
1410 *
1411 * See also get_user_pages_fast, for performance critical applications.
1412 */
1363int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, 1413int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1364 unsigned long start, int len, int write, int force, 1414 unsigned long start, int len, int write, int force,
1365 struct page **pages, struct vm_area_struct **vmas) 1415 struct page **pages, struct vm_area_struct **vmas)
@@ -3053,22 +3103,13 @@ int in_gate_area_no_task(unsigned long addr)
3053 3103
3054#endif /* __HAVE_ARCH_GATE_AREA */ 3104#endif /* __HAVE_ARCH_GATE_AREA */
3055 3105
3056#ifdef CONFIG_HAVE_IOREMAP_PROT 3106static int follow_pte(struct mm_struct *mm, unsigned long address,
3057int follow_phys(struct vm_area_struct *vma, 3107 pte_t **ptepp, spinlock_t **ptlp)
3058 unsigned long address, unsigned int flags,
3059 unsigned long *prot, resource_size_t *phys)
3060{ 3108{
3061 pgd_t *pgd; 3109 pgd_t *pgd;
3062 pud_t *pud; 3110 pud_t *pud;
3063 pmd_t *pmd; 3111 pmd_t *pmd;
3064 pte_t *ptep, pte; 3112 pte_t *ptep;
3065 spinlock_t *ptl;
3066 resource_size_t phys_addr = 0;
3067 struct mm_struct *mm = vma->vm_mm;
3068 int ret = -EINVAL;
3069
3070 if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
3071 goto out;
3072 3113
3073 pgd = pgd_offset(mm, address); 3114 pgd = pgd_offset(mm, address);
3074 if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) 3115 if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
@@ -3086,22 +3127,71 @@ int follow_phys(struct vm_area_struct *vma,
3086 if (pmd_huge(*pmd)) 3127 if (pmd_huge(*pmd))
3087 goto out; 3128 goto out;
3088 3129
3089 ptep = pte_offset_map_lock(mm, pmd, address, &ptl); 3130 ptep = pte_offset_map_lock(mm, pmd, address, ptlp);
3090 if (!ptep) 3131 if (!ptep)
3091 goto out; 3132 goto out;
3133 if (!pte_present(*ptep))
3134 goto unlock;
3135 *ptepp = ptep;
3136 return 0;
3137unlock:
3138 pte_unmap_unlock(ptep, *ptlp);
3139out:
3140 return -EINVAL;
3141}
3092 3142
3143/**
3144 * follow_pfn - look up PFN at a user virtual address
3145 * @vma: memory mapping
3146 * @address: user virtual address
3147 * @pfn: location to store found PFN
3148 *
3149 * Only IO mappings and raw PFN mappings are allowed.
3150 *
3151 * Returns zero and the pfn at @pfn on success, -ve otherwise.
3152 */
3153int follow_pfn(struct vm_area_struct *vma, unsigned long address,
3154 unsigned long *pfn)
3155{
3156 int ret = -EINVAL;
3157 spinlock_t *ptl;
3158 pte_t *ptep;
3159
3160 if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
3161 return ret;
3162
3163 ret = follow_pte(vma->vm_mm, address, &ptep, &ptl);
3164 if (ret)
3165 return ret;
3166 *pfn = pte_pfn(*ptep);
3167 pte_unmap_unlock(ptep, ptl);
3168 return 0;
3169}
3170EXPORT_SYMBOL(follow_pfn);
3171
3172#ifdef CONFIG_HAVE_IOREMAP_PROT
3173int follow_phys(struct vm_area_struct *vma,
3174 unsigned long address, unsigned int flags,
3175 unsigned long *prot, resource_size_t *phys)
3176{
3177 int ret = -EINVAL;
3178 pte_t *ptep, pte;
3179 spinlock_t *ptl;
3180
3181 if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
3182 goto out;
3183
3184 if (follow_pte(vma->vm_mm, address, &ptep, &ptl))
3185 goto out;
3093 pte = *ptep; 3186 pte = *ptep;
3094 if (!pte_present(pte)) 3187
3095 goto unlock;
3096 if ((flags & FOLL_WRITE) && !pte_write(pte)) 3188 if ((flags & FOLL_WRITE) && !pte_write(pte))
3097 goto unlock; 3189 goto unlock;
3098 phys_addr = pte_pfn(pte);
3099 phys_addr <<= PAGE_SHIFT; /* Shift here to avoid overflow on PAE */
3100 3190
3101 *prot = pgprot_val(pte_pgprot(pte)); 3191 *prot = pgprot_val(pte_pgprot(pte));
3102 *phys = phys_addr; 3192 *phys = (resource_size_t)pte_pfn(pte) << PAGE_SHIFT;
3103 ret = 0;
3104 3193
3194 ret = 0;
3105unlock: 3195unlock:
3106 pte_unmap_unlock(ptep, ptl); 3196 pte_unmap_unlock(ptep, ptl);
3107out: 3197out: