diff options
-rw-r--r-- | include/linux/mm.h | 20 | ||||
-rw-r--r-- | mm/memory.c | 50 | ||||
-rw-r--r-- | mm/util.c | 16 |
3 files changed, 67 insertions, 19 deletions
diff --git a/include/linux/mm.h b/include/linux/mm.h index 33da7f538841..a880161a3854 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
@@ -824,8 +824,11 @@ static inline int handle_mm_fault(struct mm_struct *mm, | |||
824 | extern int make_pages_present(unsigned long addr, unsigned long end); | 824 | extern int make_pages_present(unsigned long addr, unsigned long end); |
825 | extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write); | 825 | extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write); |
826 | 826 | ||
827 | int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, | 827 | int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, |
828 | int len, int write, int force, struct page **pages, struct vm_area_struct **vmas); | 828 | unsigned long start, int len, int write, int force, |
829 | struct page **pages, struct vm_area_struct **vmas); | ||
830 | int get_user_pages_fast(unsigned long start, int nr_pages, int write, | ||
831 | struct page **pages); | ||
829 | 832 | ||
830 | extern int try_to_release_page(struct page * page, gfp_t gfp_mask); | 833 | extern int try_to_release_page(struct page * page, gfp_t gfp_mask); |
831 | extern void do_invalidatepage(struct page *page, unsigned long offset); | 834 | extern void do_invalidatepage(struct page *page, unsigned long offset); |
@@ -850,19 +853,6 @@ extern int mprotect_fixup(struct vm_area_struct *vma, | |||
850 | unsigned long end, unsigned long newflags); | 853 | unsigned long end, unsigned long newflags); |
851 | 854 | ||
852 | /* | 855 | /* |
853 | * get_user_pages_fast provides equivalent functionality to get_user_pages, | ||
854 | * operating on current and current->mm (force=0 and doesn't return any vmas). | ||
855 | * | ||
856 | * get_user_pages_fast may take mmap_sem and page tables, so no assumptions | ||
857 | * can be made about locking. get_user_pages_fast is to be implemented in a | ||
858 | * way that is advantageous (vs get_user_pages()) when the user memory area is | ||
859 | * already faulted in and present in ptes. However if the pages have to be | ||
860 | * faulted in, it may turn out to be slightly slower). | ||
861 | */ | ||
862 | int get_user_pages_fast(unsigned long start, int nr_pages, int write, | ||
863 | struct page **pages); | ||
864 | |||
865 | /* | ||
866 | * A callback you can register to apply pressure to ageable caches. | 856 | * A callback you can register to apply pressure to ageable caches. |
867 | * | 857 | * |
868 | * 'shrink' is passed a count 'nr_to_scan' and a 'gfpmask'. It should | 858 | * 'shrink' is passed a count 'nr_to_scan' and a 'gfpmask'. It should |
diff --git a/mm/memory.c b/mm/memory.c index 4126dd16778c..891bad0613f4 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -1360,6 +1360,56 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | |||
1360 | return i; | 1360 | return i; |
1361 | } | 1361 | } |
1362 | 1362 | ||
1363 | /** | ||
1364 | * get_user_pages() - pin user pages in memory | ||
1365 | * @tsk: task_struct of target task | ||
1366 | * @mm: mm_struct of target mm | ||
1367 | * @start: starting user address | ||
1368 | * @len: number of pages from start to pin | ||
1369 | * @write: whether pages will be written to by the caller | ||
1370 | * @force: whether to force write access even if user mapping is | ||
1371 | * readonly. This will result in the page being COWed even | ||
1372 | * in MAP_SHARED mappings. You do not want this. | ||
1373 | * @pages: array that receives pointers to the pages pinned. | ||
1374 | * Should be at least nr_pages long. Or NULL, if caller | ||
1375 | * only intends to ensure the pages are faulted in. | ||
1376 | * @vmas: array of pointers to vmas corresponding to each page. | ||
1377 | * Or NULL if the caller does not require them. | ||
1378 | * | ||
1379 | * Returns number of pages pinned. This may be fewer than the number | ||
1380 | * requested. If len is 0 or negative, returns 0. If no pages | ||
1381 | * were pinned, returns -errno. Each page returned must be released | ||
1382 | * with a put_page() call when it is finished with. vmas will only | ||
1383 | * remain valid while mmap_sem is held. | ||
1384 | * | ||
1385 | * Must be called with mmap_sem held for read or write. | ||
1386 | * | ||
1387 | * get_user_pages walks a process's page tables and takes a reference to | ||
1388 | * each struct page that each user address corresponds to at a given | ||
1389 | * instant. That is, it takes the page that would be accessed if a user | ||
1390 | * thread accesses the given user virtual address at that instant. | ||
1391 | * | ||
1392 | * This does not guarantee that the page exists in the user mappings when | ||
1393 | * get_user_pages returns, and there may even be a completely different | ||
1394 | * page there in some cases (eg. if mmapped pagecache has been invalidated | ||
1395 | * and subsequently re faulted). However it does guarantee that the page | ||
1396 | * won't be freed completely. And mostly callers simply care that the page | ||
1397 | * contains data that was valid *at some point in time*. Typically, an IO | ||
1398 | * or similar operation cannot guarantee anything stronger anyway because | ||
1399 | * locks can't be held over the syscall boundary. | ||
1400 | * | ||
1401 | * If write=0, the page must not be written to. If the page is written to, | ||
1402 | * set_page_dirty (or set_page_dirty_lock, as appropriate) must be called | ||
1403 | * after the page is finished with, and before put_page is called. | ||
1404 | * | ||
1405 | * get_user_pages is typically used for fewer-copy IO operations, to get a | ||
1406 | * handle on the memory by some means other than accesses via the user virtual | ||
1407 | * addresses. The pages may be submitted for DMA to devices or accessed via | ||
1408 | * their kernel linear mapping (via the kmap APIs). Care should be taken to | ||
1409 | * use the correct cache flushing APIs. | ||
1410 | * | ||
1411 | * See also get_user_pages_fast, for performance critical applications. | ||
1412 | */ | ||
1363 | int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | 1413 | int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, |
1364 | unsigned long start, int len, int write, int force, | 1414 | unsigned long start, int len, int write, int force, |
1365 | struct page **pages, struct vm_area_struct **vmas) | 1415 | struct page **pages, struct vm_area_struct **vmas) |
@@ -233,13 +233,21 @@ void arch_pick_mmap_layout(struct mm_struct *mm) | |||
233 | * @pages: array that receives pointers to the pages pinned. | 233 | * @pages: array that receives pointers to the pages pinned. |
234 | * Should be at least nr_pages long. | 234 | * Should be at least nr_pages long. |
235 | * | 235 | * |
236 | * Attempt to pin user pages in memory without taking mm->mmap_sem. | ||
237 | * If not successful, it will fall back to taking the lock and | ||
238 | * calling get_user_pages(). | ||
239 | * | ||
240 | * Returns number of pages pinned. This may be fewer than the number | 236 | * Returns number of pages pinned. This may be fewer than the number |
241 | * requested. If nr_pages is 0 or negative, returns 0. If no pages | 237 | * requested. If nr_pages is 0 or negative, returns 0. If no pages |
242 | * were pinned, returns -errno. | 238 | * were pinned, returns -errno. |
239 | * | ||
240 | * get_user_pages_fast provides equivalent functionality to get_user_pages, | ||
241 | * operating on current and current->mm, with force=0 and vma=NULL. However | ||
242 | * unlike get_user_pages, it must be called without mmap_sem held. | ||
243 | * | ||
244 | * get_user_pages_fast may take mmap_sem and page table locks, so no | ||
245 | * assumptions can be made about lack of locking. get_user_pages_fast is to be | ||
246 | * implemented in a way that is advantageous (vs get_user_pages()) when the | ||
247 | * user memory area is already faulted in and present in ptes. However if the | ||
248 | * pages have to be faulted in, it may turn out to be slightly slower so | ||
249 | * callers need to carefully consider what to use. On many architectures, | ||
250 | * get_user_pages_fast simply falls back to get_user_pages. | ||
243 | */ | 251 | */ |
244 | int __attribute__((weak)) get_user_pages_fast(unsigned long start, | 252 | int __attribute__((weak)) get_user_pages_fast(unsigned long start, |
245 | int nr_pages, int write, struct page **pages) | 253 | int nr_pages, int write, struct page **pages) |