diff options
Diffstat (limited to 'mm/memory.c')
-rw-r--r-- | mm/memory.c | 128 |
1 files changed, 109 insertions, 19 deletions
diff --git a/mm/memory.c b/mm/memory.c index 4126dd16778c..d5d1653d60a6 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -1360,6 +1360,56 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | |||
1360 | return i; | 1360 | return i; |
1361 | } | 1361 | } |
1362 | 1362 | ||
1363 | /** | ||
1364 | * get_user_pages() - pin user pages in memory | ||
1365 | * @tsk: task_struct of target task | ||
1366 | * @mm: mm_struct of target mm | ||
1367 | * @start: starting user address | ||
1368 | * @len: number of pages from start to pin | ||
1369 | * @write: whether pages will be written to by the caller | ||
1370 | * @force: whether to force write access even if user mapping is | ||
1371 | * readonly. This will result in the page being COWed even | ||
1372 | * in MAP_SHARED mappings. You do not want this. | ||
1373 | * @pages: array that receives pointers to the pages pinned. | ||
1374 | * Should be at least nr_pages long. Or NULL, if caller | ||
1375 | * only intends to ensure the pages are faulted in. | ||
1376 | * @vmas: array of pointers to vmas corresponding to each page. | ||
1377 | * Or NULL if the caller does not require them. | ||
1378 | * | ||
1379 | * Returns number of pages pinned. This may be fewer than the number | ||
1380 | * requested. If len is 0 or negative, returns 0. If no pages | ||
1381 | * were pinned, returns -errno. Each page returned must be released | ||
1382 | * with a put_page() call when it is finished with. vmas will only | ||
1383 | * remain valid while mmap_sem is held. | ||
1384 | * | ||
1385 | * Must be called with mmap_sem held for read or write. | ||
1386 | * | ||
1387 | * get_user_pages walks a process's page tables and takes a reference to | ||
1388 | * each struct page that each user address corresponds to at a given | ||
1389 | * instant. That is, it takes the page that would be accessed if a user | ||
1390 | * thread accesses the given user virtual address at that instant. | ||
1391 | * | ||
1392 | * This does not guarantee that the page exists in the user mappings when | ||
1393 | * get_user_pages returns, and there may even be a completely different | ||
1394 | * page there in some cases (eg. if mmapped pagecache has been invalidated | ||
1395 | * and subsequently re faulted). However it does guarantee that the page | ||
1396 | * won't be freed completely. And mostly callers simply care that the page | ||
1397 | * contains data that was valid *at some point in time*. Typically, an IO | ||
1398 | * or similar operation cannot guarantee anything stronger anyway because | ||
1399 | * locks can't be held over the syscall boundary. | ||
1400 | * | ||
1401 | * If write=0, the page must not be written to. If the page is written to, | ||
1402 | * set_page_dirty (or set_page_dirty_lock, as appropriate) must be called | ||
1403 | * after the page is finished with, and before put_page is called. | ||
1404 | * | ||
1405 | * get_user_pages is typically used for fewer-copy IO operations, to get a | ||
1406 | * handle on the memory by some means other than accesses via the user virtual | ||
1407 | * addresses. The pages may be submitted for DMA to devices or accessed via | ||
1408 | * their kernel linear mapping (via the kmap APIs). Care should be taken to | ||
1409 | * use the correct cache flushing APIs. | ||
1410 | * | ||
1411 | * See also get_user_pages_fast, for performance critical applications. | ||
1412 | */ | ||
1363 | int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | 1413 | int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, |
1364 | unsigned long start, int len, int write, int force, | 1414 | unsigned long start, int len, int write, int force, |
1365 | struct page **pages, struct vm_area_struct **vmas) | 1415 | struct page **pages, struct vm_area_struct **vmas) |
@@ -3053,22 +3103,13 @@ int in_gate_area_no_task(unsigned long addr) | |||
3053 | 3103 | ||
3054 | #endif /* __HAVE_ARCH_GATE_AREA */ | 3104 | #endif /* __HAVE_ARCH_GATE_AREA */ |
3055 | 3105 | ||
3056 | #ifdef CONFIG_HAVE_IOREMAP_PROT | 3106 | static int follow_pte(struct mm_struct *mm, unsigned long address, |
3057 | int follow_phys(struct vm_area_struct *vma, | 3107 | pte_t **ptepp, spinlock_t **ptlp) |
3058 | unsigned long address, unsigned int flags, | ||
3059 | unsigned long *prot, resource_size_t *phys) | ||
3060 | { | 3108 | { |
3061 | pgd_t *pgd; | 3109 | pgd_t *pgd; |
3062 | pud_t *pud; | 3110 | pud_t *pud; |
3063 | pmd_t *pmd; | 3111 | pmd_t *pmd; |
3064 | pte_t *ptep, pte; | 3112 | pte_t *ptep; |
3065 | spinlock_t *ptl; | ||
3066 | resource_size_t phys_addr = 0; | ||
3067 | struct mm_struct *mm = vma->vm_mm; | ||
3068 | int ret = -EINVAL; | ||
3069 | |||
3070 | if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) | ||
3071 | goto out; | ||
3072 | 3113 | ||
3073 | pgd = pgd_offset(mm, address); | 3114 | pgd = pgd_offset(mm, address); |
3074 | if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) | 3115 | if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) |
@@ -3086,22 +3127,71 @@ int follow_phys(struct vm_area_struct *vma, | |||
3086 | if (pmd_huge(*pmd)) | 3127 | if (pmd_huge(*pmd)) |
3087 | goto out; | 3128 | goto out; |
3088 | 3129 | ||
3089 | ptep = pte_offset_map_lock(mm, pmd, address, &ptl); | 3130 | ptep = pte_offset_map_lock(mm, pmd, address, ptlp); |
3090 | if (!ptep) | 3131 | if (!ptep) |
3091 | goto out; | 3132 | goto out; |
3133 | if (!pte_present(*ptep)) | ||
3134 | goto unlock; | ||
3135 | *ptepp = ptep; | ||
3136 | return 0; | ||
3137 | unlock: | ||
3138 | pte_unmap_unlock(ptep, *ptlp); | ||
3139 | out: | ||
3140 | return -EINVAL; | ||
3141 | } | ||
3092 | 3142 | ||
3143 | /** | ||
3144 | * follow_pfn - look up PFN at a user virtual address | ||
3145 | * @vma: memory mapping | ||
3146 | * @address: user virtual address | ||
3147 | * @pfn: location to store found PFN | ||
3148 | * | ||
3149 | * Only IO mappings and raw PFN mappings are allowed. | ||
3150 | * | ||
3151 | * Returns zero and the pfn at @pfn on success, -ve otherwise. | ||
3152 | */ | ||
3153 | int follow_pfn(struct vm_area_struct *vma, unsigned long address, | ||
3154 | unsigned long *pfn) | ||
3155 | { | ||
3156 | int ret = -EINVAL; | ||
3157 | spinlock_t *ptl; | ||
3158 | pte_t *ptep; | ||
3159 | |||
3160 | if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) | ||
3161 | return ret; | ||
3162 | |||
3163 | ret = follow_pte(vma->vm_mm, address, &ptep, &ptl); | ||
3164 | if (ret) | ||
3165 | return ret; | ||
3166 | *pfn = pte_pfn(*ptep); | ||
3167 | pte_unmap_unlock(ptep, ptl); | ||
3168 | return 0; | ||
3169 | } | ||
3170 | EXPORT_SYMBOL(follow_pfn); | ||
3171 | |||
3172 | #ifdef CONFIG_HAVE_IOREMAP_PROT | ||
3173 | int follow_phys(struct vm_area_struct *vma, | ||
3174 | unsigned long address, unsigned int flags, | ||
3175 | unsigned long *prot, resource_size_t *phys) | ||
3176 | { | ||
3177 | int ret = -EINVAL; | ||
3178 | pte_t *ptep, pte; | ||
3179 | spinlock_t *ptl; | ||
3180 | |||
3181 | if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) | ||
3182 | goto out; | ||
3183 | |||
3184 | if (follow_pte(vma->vm_mm, address, &ptep, &ptl)) | ||
3185 | goto out; | ||
3093 | pte = *ptep; | 3186 | pte = *ptep; |
3094 | if (!pte_present(pte)) | 3187 | |
3095 | goto unlock; | ||
3096 | if ((flags & FOLL_WRITE) && !pte_write(pte)) | 3188 | if ((flags & FOLL_WRITE) && !pte_write(pte)) |
3097 | goto unlock; | 3189 | goto unlock; |
3098 | phys_addr = pte_pfn(pte); | ||
3099 | phys_addr <<= PAGE_SHIFT; /* Shift here to avoid overflow on PAE */ | ||
3100 | 3190 | ||
3101 | *prot = pgprot_val(pte_pgprot(pte)); | 3191 | *prot = pgprot_val(pte_pgprot(pte)); |
3102 | *phys = phys_addr; | 3192 | *phys = (resource_size_t)pte_pfn(pte) << PAGE_SHIFT; |
3103 | ret = 0; | ||
3104 | 3193 | ||
3194 | ret = 0; | ||
3105 | unlock: | 3195 | unlock: |
3106 | pte_unmap_unlock(ptep, ptl); | 3196 | pte_unmap_unlock(ptep, ptl); |
3107 | out: | 3197 | out: |