aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/mm.h20
-rw-r--r--mm/memory.c50
-rw-r--r--mm/util.c16
3 files changed, 67 insertions, 19 deletions
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 33da7f538841..a880161a3854 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -824,8 +824,11 @@ static inline int handle_mm_fault(struct mm_struct *mm,
824extern int make_pages_present(unsigned long addr, unsigned long end); 824extern int make_pages_present(unsigned long addr, unsigned long end);
825extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write); 825extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write);
826 826
827int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, 827int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
828 int len, int write, int force, struct page **pages, struct vm_area_struct **vmas); 828 unsigned long start, int len, int write, int force,
829 struct page **pages, struct vm_area_struct **vmas);
830int get_user_pages_fast(unsigned long start, int nr_pages, int write,
831 struct page **pages);
829 832
830extern int try_to_release_page(struct page * page, gfp_t gfp_mask); 833extern int try_to_release_page(struct page * page, gfp_t gfp_mask);
831extern void do_invalidatepage(struct page *page, unsigned long offset); 834extern void do_invalidatepage(struct page *page, unsigned long offset);
@@ -850,19 +853,6 @@ extern int mprotect_fixup(struct vm_area_struct *vma,
850 unsigned long end, unsigned long newflags); 853 unsigned long end, unsigned long newflags);
851 854
852/* 855/*
853 * get_user_pages_fast provides equivalent functionality to get_user_pages,
854 * operating on current and current->mm (force=0 and doesn't return any vmas).
855 *
856 * get_user_pages_fast may take mmap_sem and page tables, so no assumptions
857 * can be made about locking. get_user_pages_fast is to be implemented in a
858 * way that is advantageous (vs get_user_pages()) when the user memory area is
859 * already faulted in and present in ptes. However if the pages have to be
860 * faulted in, it may turn out to be slightly slower).
861 */
862int get_user_pages_fast(unsigned long start, int nr_pages, int write,
863 struct page **pages);
864
865/*
866 * A callback you can register to apply pressure to ageable caches. 856 * A callback you can register to apply pressure to ageable caches.
867 * 857 *
868 * 'shrink' is passed a count 'nr_to_scan' and a 'gfpmask'. It should 858 * 'shrink' is passed a count 'nr_to_scan' and a 'gfpmask'. It should
diff --git a/mm/memory.c b/mm/memory.c
index 4126dd16778c..891bad0613f4 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1360,6 +1360,56 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1360 return i; 1360 return i;
1361} 1361}
1362 1362
1363/**
1364 * get_user_pages() - pin user pages in memory
1365 * @tsk: task_struct of target task
1366 * @mm: mm_struct of target mm
1367 * @start: starting user address
1368 * @len: number of pages from start to pin
1369 * @write: whether pages will be written to by the caller
1370 * @force: whether to force write access even if user mapping is
1371 * readonly. This will result in the page being COWed even
1372 * in MAP_SHARED mappings. You do not want this.
1373 * @pages: array that receives pointers to the pages pinned.
1374 * Should be at least nr_pages long. Or NULL, if caller
1375 * only intends to ensure the pages are faulted in.
1376 * @vmas: array of pointers to vmas corresponding to each page.
1377 * Or NULL if the caller does not require them.
1378 *
1379 * Returns number of pages pinned. This may be fewer than the number
1380 * requested. If len is 0 or negative, returns 0. If no pages
1381 * were pinned, returns -errno. Each page returned must be released
1382 * with a put_page() call when it is finished with. vmas will only
1383 * remain valid while mmap_sem is held.
1384 *
1385 * Must be called with mmap_sem held for read or write.
1386 *
1387 * get_user_pages walks a process's page tables and takes a reference to
1388 * each struct page that each user address corresponds to at a given
1389 * instant. That is, it takes the page that would be accessed if a user
1390 * thread accesses the given user virtual address at that instant.
1391 *
1392 * This does not guarantee that the page exists in the user mappings when
1393 * get_user_pages returns, and there may even be a completely different
1394 * page there in some cases (eg. if mmapped pagecache has been invalidated
1395 * and subsequently re faulted). However it does guarantee that the page
1396 * won't be freed completely. And mostly callers simply care that the page
1397 * contains data that was valid *at some point in time*. Typically, an IO
1398 * or similar operation cannot guarantee anything stronger anyway because
1399 * locks can't be held over the syscall boundary.
1400 *
1401 * If write=0, the page must not be written to. If the page is written to,
1402 * set_page_dirty (or set_page_dirty_lock, as appropriate) must be called
1403 * after the page is finished with, and before put_page is called.
1404 *
1405 * get_user_pages is typically used for fewer-copy IO operations, to get a
1406 * handle on the memory by some means other than accesses via the user virtual
1407 * addresses. The pages may be submitted for DMA to devices or accessed via
1408 * their kernel linear mapping (via the kmap APIs). Care should be taken to
1409 * use the correct cache flushing APIs.
1410 *
1411 * See also get_user_pages_fast, for performance critical applications.
1412 */
1363int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, 1413int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1364 unsigned long start, int len, int write, int force, 1414 unsigned long start, int len, int write, int force,
1365 struct page **pages, struct vm_area_struct **vmas) 1415 struct page **pages, struct vm_area_struct **vmas)
diff --git a/mm/util.c b/mm/util.c
index abc65aa7cdfc..d5d2213728c5 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -233,13 +233,21 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
233 * @pages: array that receives pointers to the pages pinned. 233 * @pages: array that receives pointers to the pages pinned.
234 * Should be at least nr_pages long. 234 * Should be at least nr_pages long.
235 * 235 *
236 * Attempt to pin user pages in memory without taking mm->mmap_sem.
237 * If not successful, it will fall back to taking the lock and
238 * calling get_user_pages().
239 *
240 * Returns number of pages pinned. This may be fewer than the number 236 * Returns number of pages pinned. This may be fewer than the number
241 * requested. If nr_pages is 0 or negative, returns 0. If no pages 237 * requested. If nr_pages is 0 or negative, returns 0. If no pages
242 * were pinned, returns -errno. 238 * were pinned, returns -errno.
239 *
240 * get_user_pages_fast provides equivalent functionality to get_user_pages,
241 * operating on current and current->mm, with force=0 and vma=NULL. However
242 * unlike get_user_pages, it must be called without mmap_sem held.
243 *
244 * get_user_pages_fast may take mmap_sem and page table locks, so no
245 * assumptions can be made about lack of locking. get_user_pages_fast is to be
246 * implemented in a way that is advantageous (vs get_user_pages()) when the
247 * user memory area is already faulted in and present in ptes. However if the
248 * pages have to be faulted in, it may turn out to be slightly slower so
249 * callers need to carefully consider what to use. On many architectures,
250 * get_user_pages_fast simply falls back to get_user_pages.
243 */ 251 */
244int __attribute__((weak)) get_user_pages_fast(unsigned long start, 252int __attribute__((weak)) get_user_pages_fast(unsigned long start,
245 int nr_pages, int write, struct page **pages) 253 int nr_pages, int write, struct page **pages)