mm: clean up get_user_pages_fast() documentation

Move more documentation for get_user_pages_fast into the new kerneldoc comment. Add some comments for get_user_pages as well. Also, move get_user_pages_fast declaration up to get_user_pages. It wasn't there initially because it was once a static inline function. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Nick Piggin <npiggin@suse.de> Cc: Andy Grover <andy.grover@oracle.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
author: Nick Piggin <nickpiggin@yahoo.com.au> 2009-06-16 18:31:39 -0400
committer: Linus Torvalds <torvalds@linux-foundation.org> 2009-06-16 22:47:30 -0400
commit: d2bf6be8ab63aa84e6149aac934649aadf3828b1 (patch)
tree: 65e7f74f1ed6fd9516f1615b0f293d2aaeede07b
parent: 7ffc59b4d0bdfa00e882339f85b8a969bb7021e2 (diff)
3 files changed, 67 insertions, 19 deletions
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 33da7f538841..a880161a3854 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -824,8 +824,11 @@ static inline int handle_mm_fault(struct mm_struct *mm,
 extern int make_pages_present(unsigned long addr, unsigned long end);
 extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write);
-int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start,
+int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
-                int len, int write, int force, struct page **pages, struct vm_area_struct **vmas);
+                        unsigned long start, int len, int write, int force,
+                        struct page **pages, struct vm_area_struct **vmas);
+int get_user_pages_fast(unsigned long start, int nr_pages, int write,
+                        struct page **pages);
 extern int try_to_release_page(struct page * page, gfp_t gfp_mask);
 extern void do_invalidatepage(struct page *page, unsigned long offset);
@@ -850,19 +853,6 @@ extern int mprotect_fixup(struct vm_area_struct *vma,
                          unsigned long end, unsigned long newflags);
 /*
- * get_user_pages_fast provides equivalent functionality to get_user_pages,
- * operating on current and current->mm (force=0 and doesn't return any vmas).
- *
- * get_user_pages_fast may take mmap_sem and page tables, so no assumptions
- * can be made about locking. get_user_pages_fast is to be implemented in a
- * way that is advantageous (vs get_user_pages()) when the user memory area is
- * already faulted in and present in ptes. However if the pages have to be
- * faulted in, it may turn out to be slightly slower).
- */
-int get_user_pages_fast(unsigned long start, int nr_pages, int write,
-                        struct page **pages);
-/*
 * A callback you can register to apply pressure to ageable caches.
 *
 * 'shrink' is passed a count 'nr_to_scan' and a 'gfpmask'.  It should
diff --git a/mm/memory.c b/mm/memory.c
index 4126dd16778c..891bad0613f4 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1360,6 +1360,56 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
        return i;
 }
+/**
+ * get_user_pages() - pin user pages in memory
+ * @tsk:        task_struct of target task
+ * @mm:         mm_struct of target mm
+ * @start:      starting user address
+ * @len:        number of pages from start to pin
+ * @write:      whether pages will be written to by the caller
+ * @force:      whether to force write access even if user mapping is
+ *              readonly. This will result in the page being COWed even
+ *              in MAP_SHARED mappings. You do not want this.
+ * @pages:      array that receives pointers to the pages pinned.
+ *              Should be at least nr_pages long. Or NULL, if caller
+ *              only intends to ensure the pages are faulted in.
+ * @vmas:       array of pointers to vmas corresponding to each page.
+ *              Or NULL if the caller does not require them.
+ *
+ * Returns number of pages pinned. This may be fewer than the number
+ * requested. If len is 0 or negative, returns 0. If no pages
+ * were pinned, returns -errno. Each page returned must be released
+ * with a put_page() call when it is finished with. vmas will only
+ * remain valid while mmap_sem is held.
+ *
+ * Must be called with mmap_sem held for read or write.
+ *
+ * get_user_pages walks a process's page tables and takes a reference to
+ * each struct page that each user address corresponds to at a given
+ * instant. That is, it takes the page that would be accessed if a user
+ * thread accesses the given user virtual address at that instant.
+ *
+ * This does not guarantee that the page exists in the user mappings when
+ * get_user_pages returns, and there may even be a completely different
+ * page there in some cases (eg. if mmapped pagecache has been invalidated
+ * and subsequently re faulted). However it does guarantee that the page
+ * won't be freed completely. And mostly callers simply care that the page
+ * contains data that was valid *at some point in time*. Typically, an IO
+ * or similar operation cannot guarantee anything stronger anyway because
+ * locks can't be held over the syscall boundary.
+ *
+ * If write=0, the page must not be written to. If the page is written to,
+ * set_page_dirty (or set_page_dirty_lock, as appropriate) must be called
+ * after the page is finished with, and before put_page is called.
+ *
+ * get_user_pages is typically used for fewer-copy IO operations, to get a
+ * handle on the memory by some means other than accesses via the user virtual
+ * addresses. The pages may be submitted for DMA to devices or accessed via
+ * their kernel linear mapping (via the kmap APIs). Care should be taken to
+ * use the correct cache flushing APIs.
+ *
+ * See also get_user_pages_fast, for performance critical applications.
+ */
 int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                unsigned long start, int len, int write, int force,
                struct page **pages, struct vm_area_struct **vmas)
diff --git a/mm/util.c b/mm/util.c
index abc65aa7cdfc..d5d2213728c5 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -233,13 +233,21 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
 * @pages:      array that receives pointers to the pages pinned.
 *              Should be at least nr_pages long.
 *
- * Attempt to pin user pages in memory without taking mm->mmap_sem.
- * If not successful, it will fall back to taking the lock and
- * calling get_user_pages().
- *
 * Returns number of pages pinned. This may be fewer than the number
 * requested. If nr_pages is 0 or negative, returns 0. If no pages
 * were pinned, returns -errno.
+ *
+ * get_user_pages_fast provides equivalent functionality to get_user_pages,
+ * operating on current and current->mm, with force=0 and vma=NULL. However
+ * unlike get_user_pages, it must be called without mmap_sem held.
+ *
+ * get_user_pages_fast may take mmap_sem and page table locks, so no
+ * assumptions can be made about lack of locking. get_user_pages_fast is to be
+ * implemented in a way that is advantageous (vs get_user_pages()) when the
+ * user memory area is already faulted in and present in ptes. However if the
+ * pages have to be faulted in, it may turn out to be slightly slower so
+ * callers need to carefully consider what to use. On many architectures,
+ * get_user_pages_fast simply falls back to get_user_pages.
 */
 int __attribute__((weak)) get_user_pages_fast(unsigned long start,
                                int nr_pages, int write, struct page **pages)
author	Nick Piggin <nickpiggin@yahoo.com.au>	2009-06-16 18:31:39 -0400
committer	Linus Torvalds <torvalds@linux-foundation.org>	2009-06-16 22:47:30 -0400
commit	d2bf6be8ab63aa84e6149aac934649aadf3828b1 (patch)
tree	65e7f74f1ed6fd9516f1615b0f293d2aaeede07b
parent	7ffc59b4d0bdfa00e882339f85b8a969bb7021e2 (diff)

diff --git a/include/linux/mm.h b/include/linux/mm.h index 33da7f538841..a880161a3854 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h
@@ -824,8 +824,11 @@ static inline int handle_mm_fault(struct mm_struct *mm,
824	extern int make_pages_present(unsigned long addr, unsigned long end);	824	extern int make_pages_present(unsigned long addr, unsigned long end);
825	extern int access_process_vm(struct task_struct tsk, unsigned long addr, void buf, int len, int write);	825	extern int access_process_vm(struct task_struct tsk, unsigned long addr, void buf, int len, int write);
826		826
827	int get_user_pages(struct task_struct tsk, struct mm_struct mm, unsigned long start,	827	int get_user_pages(struct task_struct tsk, struct mm_struct mm,
828	int len, int write, int force, struct page pages, struct vm_area_struct vmas);	828	unsigned long start, int len, int write, int force,
		829	struct page pages, struct vm_area_struct vmas);
		830	int get_user_pages_fast(unsigned long start, int nr_pages, int write,
		831	struct page **pages);
829		832
830	extern int try_to_release_page(struct page * page, gfp_t gfp_mask);	833	extern int try_to_release_page(struct page * page, gfp_t gfp_mask);
831	extern void do_invalidatepage(struct page *page, unsigned long offset);	834	extern void do_invalidatepage(struct page *page, unsigned long offset);
@@ -850,19 +853,6 @@ extern int mprotect_fixup(struct vm_area_struct *vma,
850	unsigned long end, unsigned long newflags);	853	unsigned long end, unsigned long newflags);
851		854
852	/*	855	/*
853	* get_user_pages_fast provides equivalent functionality to get_user_pages,
854	* operating on current and current->mm (force=0 and doesn't return any vmas).
855	*
856	* get_user_pages_fast may take mmap_sem and page tables, so no assumptions
857	* can be made about locking. get_user_pages_fast is to be implemented in a
858	* way that is advantageous (vs get_user_pages()) when the user memory area is
859	* already faulted in and present in ptes. However if the pages have to be
860	* faulted in, it may turn out to be slightly slower).
861	*/
862	int get_user_pages_fast(unsigned long start, int nr_pages, int write,
863	struct page **pages);
864
865	/*
866	* A callback you can register to apply pressure to ageable caches.	856	* A callback you can register to apply pressure to ageable caches.
867	*	857	*
868	* 'shrink' is passed a count 'nr_to_scan' and a 'gfpmask'. It should	858	* 'shrink' is passed a count 'nr_to_scan' and a 'gfpmask'. It should


diff --git a/mm/memory.c b/mm/memory.c index 4126dd16778c..891bad0613f4 100644 --- a/mm/memory.c +++ b/mm/memory.c
@@ -1360,6 +1360,56 @@ int __get_user_pages(struct task_struct tsk, struct mm_struct mm,
1360	return i;	1360	return i;
1361	}	1361	}
1362		1362
		1363	/**
		1364	* get_user_pages() - pin user pages in memory
		1365	* @tsk: task_struct of target task
		1366	* @mm: mm_struct of target mm
		1367	* @start: starting user address
		1368	* @len: number of pages from start to pin
		1369	* @write: whether pages will be written to by the caller
		1370	* @force: whether to force write access even if user mapping is
		1371	* readonly. This will result in the page being COWed even
		1372	* in MAP_SHARED mappings. You do not want this.
		1373	* @pages: array that receives pointers to the pages pinned.
		1374	* Should be at least nr_pages long. Or NULL, if caller
		1375	* only intends to ensure the pages are faulted in.
		1376	* @vmas: array of pointers to vmas corresponding to each page.
		1377	* Or NULL if the caller does not require them.
		1378	*
		1379	* Returns number of pages pinned. This may be fewer than the number
		1380	* requested. If len is 0 or negative, returns 0. If no pages
		1381	* were pinned, returns -errno. Each page returned must be released
		1382	* with a put_page() call when it is finished with. vmas will only
		1383	* remain valid while mmap_sem is held.
		1384	*
		1385	* Must be called with mmap_sem held for read or write.
		1386	*
		1387	* get_user_pages walks a process's page tables and takes a reference to
		1388	* each struct page that each user address corresponds to at a given
		1389	* instant. That is, it takes the page that would be accessed if a user
		1390	* thread accesses the given user virtual address at that instant.
		1391	*
		1392	* This does not guarantee that the page exists in the user mappings when
		1393	* get_user_pages returns, and there may even be a completely different
		1394	* page there in some cases (eg. if mmapped pagecache has been invalidated
		1395	* and subsequently re faulted). However it does guarantee that the page
		1396	* won't be freed completely. And mostly callers simply care that the page
		1397	* contains data that was valid at some point in time. Typically, an IO
		1398	* or similar operation cannot guarantee anything stronger anyway because
		1399	* locks can't be held over the syscall boundary.
		1400	*
		1401	* If write=0, the page must not be written to. If the page is written to,
		1402	* set_page_dirty (or set_page_dirty_lock, as appropriate) must be called
		1403	* after the page is finished with, and before put_page is called.
		1404	*
		1405	* get_user_pages is typically used for fewer-copy IO operations, to get a
		1406	* handle on the memory by some means other than accesses via the user virtual
		1407	* addresses. The pages may be submitted for DMA to devices or accessed via
		1408	* their kernel linear mapping (via the kmap APIs). Care should be taken to
		1409	* use the correct cache flushing APIs.
		1410	*
		1411	* See also get_user_pages_fast, for performance critical applications.
		1412	*/
1363	int get_user_pages(struct task_struct tsk, struct mm_struct mm,	1413	int get_user_pages(struct task_struct tsk, struct mm_struct mm,
1364	unsigned long start, int len, int write, int force,	1414	unsigned long start, int len, int write, int force,
1365	struct page pages, struct vm_area_struct vmas)	1415	struct page pages, struct vm_area_struct vmas)


diff --git a/mm/util.c b/mm/util.c index abc65aa7cdfc..d5d2213728c5 100644 --- a/mm/util.c +++ b/mm/util.c
@@ -233,13 +233,21 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
233	* @pages: array that receives pointers to the pages pinned.	233	* @pages: array that receives pointers to the pages pinned.
234	* Should be at least nr_pages long.	234	* Should be at least nr_pages long.
235	*	235	*
236	* Attempt to pin user pages in memory without taking mm->mmap_sem.
237	* If not successful, it will fall back to taking the lock and
238	* calling get_user_pages().
239	*
240	* Returns number of pages pinned. This may be fewer than the number	236	* Returns number of pages pinned. This may be fewer than the number
241	* requested. If nr_pages is 0 or negative, returns 0. If no pages	237	* requested. If nr_pages is 0 or negative, returns 0. If no pages
242	* were pinned, returns -errno.	238	* were pinned, returns -errno.
		239	*
		240	* get_user_pages_fast provides equivalent functionality to get_user_pages,
		241	* operating on current and current->mm, with force=0 and vma=NULL. However
		242	* unlike get_user_pages, it must be called without mmap_sem held.
		243	*
		244	* get_user_pages_fast may take mmap_sem and page table locks, so no
		245	* assumptions can be made about lack of locking. get_user_pages_fast is to be
		246	* implemented in a way that is advantageous (vs get_user_pages()) when the
		247	* user memory area is already faulted in and present in ptes. However if the
		248	* pages have to be faulted in, it may turn out to be slightly slower so
		249	* callers need to carefully consider what to use. On many architectures,
		250	* get_user_pages_fast simply falls back to get_user_pages.
243	*/	251	*/
244	int __attribute__((weak)) get_user_pages_fast(unsigned long start,	252	int __attribute__((weak)) get_user_pages_fast(unsigned long start,
245	int nr_pages, int write, struct page **pages)	253	int nr_pages, int write, struct page **pages)