diff options
author | Mel Gorman <mgorman@suse.de> | 2012-07-31 19:44:51 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-07-31 21:42:47 -0400 |
commit | 18022c5d8627a7a9ba8097a0f238b513fae6f5b8 (patch) | |
tree | 57e0d0ec6a5369c56bd00957eb9c0e1220e62917 | |
parent | f981c5950fa85916ba49bea5d9a7a5078f47e569 (diff) |
mm: add get_kernel_page[s] for pinning of kernel addresses for I/O
This patch adds two new APIs get_kernel_pages() and get_kernel_page() that
may be used to pin a vector of kernel addresses for IO. The initial user
is expected to be NFS for allowing pages to be written to swap using
aops->direct_IO(). Strictly speaking, swap-over-NFS only needs to pin one
page for IO but it makes sense to express the API in terms of a vector and
add a helper for pinning single pages.
Signed-off-by: Mel Gorman <mgorman@suse.de>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: David S. Miller <davem@davemloft.net>
Cc: Eric B Munson <emunson@mgebm.net>
Cc: Eric Paris <eparis@redhat.com>
Cc: James Morris <jmorris@namei.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Mike Christie <michaelc@cs.wisc.edu>
Cc: Neil Brown <neilb@suse.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
Cc: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: Xiaotian Feng <dfeng@redhat.com>
Cc: Mark Salter <msalter@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | include/linux/blk_types.h | 2 | ||||
-rw-r--r-- | include/linux/fs.h | 2 | ||||
-rw-r--r-- | include/linux/mm.h | 4 | ||||
-rw-r--r-- | mm/swap.c | 53 |
4 files changed, 61 insertions, 0 deletions
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 0edb65dd8edd..7b7ac9ccec7a 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h | |||
@@ -160,6 +160,7 @@ enum rq_flag_bits { | |||
160 | __REQ_FLUSH_SEQ, /* request for flush sequence */ | 160 | __REQ_FLUSH_SEQ, /* request for flush sequence */ |
161 | __REQ_IO_STAT, /* account I/O stat */ | 161 | __REQ_IO_STAT, /* account I/O stat */ |
162 | __REQ_MIXED_MERGE, /* merge of different types, fail separately */ | 162 | __REQ_MIXED_MERGE, /* merge of different types, fail separately */ |
163 | __REQ_KERNEL, /* direct IO to kernel pages */ | ||
163 | __REQ_NR_BITS, /* stops here */ | 164 | __REQ_NR_BITS, /* stops here */ |
164 | }; | 165 | }; |
165 | 166 | ||
@@ -201,5 +202,6 @@ enum rq_flag_bits { | |||
201 | #define REQ_IO_STAT (1 << __REQ_IO_STAT) | 202 | #define REQ_IO_STAT (1 << __REQ_IO_STAT) |
202 | #define REQ_MIXED_MERGE (1 << __REQ_MIXED_MERGE) | 203 | #define REQ_MIXED_MERGE (1 << __REQ_MIXED_MERGE) |
203 | #define REQ_SECURE (1 << __REQ_SECURE) | 204 | #define REQ_SECURE (1 << __REQ_SECURE) |
205 | #define REQ_KERNEL (1 << __REQ_KERNEL) | ||
204 | 206 | ||
205 | #endif /* __LINUX_BLK_TYPES_H */ | 207 | #endif /* __LINUX_BLK_TYPES_H */ |
diff --git a/include/linux/fs.h b/include/linux/fs.h index 8fabb037a48d..9d77309da153 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h | |||
@@ -165,6 +165,8 @@ struct inodes_stat_t { | |||
165 | #define READ 0 | 165 | #define READ 0 |
166 | #define WRITE RW_MASK | 166 | #define WRITE RW_MASK |
167 | #define READA RWA_MASK | 167 | #define READA RWA_MASK |
168 | #define KERNEL_READ (READ|REQ_KERNEL) | ||
169 | #define KERNEL_WRITE (WRITE|REQ_KERNEL) | ||
168 | 170 | ||
169 | #define READ_SYNC (READ | REQ_SYNC) | 171 | #define READ_SYNC (READ | REQ_SYNC) |
170 | #define WRITE_SYNC (WRITE | REQ_SYNC | REQ_NOIDLE) | 172 | #define WRITE_SYNC (WRITE | REQ_SYNC | REQ_NOIDLE) |
diff --git a/include/linux/mm.h b/include/linux/mm.h index 7cdac1676b59..bd079a1b0fdc 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
@@ -1019,6 +1019,10 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | |||
1019 | struct page **pages, struct vm_area_struct **vmas); | 1019 | struct page **pages, struct vm_area_struct **vmas); |
1020 | int get_user_pages_fast(unsigned long start, int nr_pages, int write, | 1020 | int get_user_pages_fast(unsigned long start, int nr_pages, int write, |
1021 | struct page **pages); | 1021 | struct page **pages); |
1022 | struct kvec; | ||
1023 | int get_kernel_pages(const struct kvec *iov, int nr_pages, int write, | ||
1024 | struct page **pages); | ||
1025 | int get_kernel_page(unsigned long start, int write, struct page **pages); | ||
1022 | struct page *get_dump_page(unsigned long addr); | 1026 | struct page *get_dump_page(unsigned long addr); |
1023 | 1027 | ||
1024 | extern int try_to_release_page(struct page * page, gfp_t gfp_mask); | 1028 | extern int try_to_release_page(struct page * page, gfp_t gfp_mask); |
@@ -236,6 +236,59 @@ void put_pages_list(struct list_head *pages) | |||
236 | } | 236 | } |
237 | EXPORT_SYMBOL(put_pages_list); | 237 | EXPORT_SYMBOL(put_pages_list); |
238 | 238 | ||
239 | /* | ||
240 | * get_kernel_pages() - pin kernel pages in memory | ||
241 | * @kiov: An array of struct kvec structures | ||
242 | * @nr_segs: number of segments to pin | ||
243 | * @write: pinning for read/write, currently ignored | ||
244 | * @pages: array that receives pointers to the pages pinned. | ||
245 | * Should be at least nr_segs long. | ||
246 | * | ||
247 | * Returns number of pages pinned. This may be fewer than the number | ||
248 | * requested. If nr_pages is 0 or negative, returns 0. If no pages | ||
249 | * were pinned, returns -errno. Each page returned must be released | ||
250 | * with a put_page() call when it is finished with. | ||
251 | */ | ||
252 | int get_kernel_pages(const struct kvec *kiov, int nr_segs, int write, | ||
253 | struct page **pages) | ||
254 | { | ||
255 | int seg; | ||
256 | |||
257 | for (seg = 0; seg < nr_segs; seg++) { | ||
258 | if (WARN_ON(kiov[seg].iov_len != PAGE_SIZE)) | ||
259 | return seg; | ||
260 | |||
261 | /* virt_to_page sanity checks the PFN */ | ||
262 | pages[seg] = virt_to_page(kiov[seg].iov_base); | ||
263 | page_cache_get(pages[seg]); | ||
264 | } | ||
265 | |||
266 | return seg; | ||
267 | } | ||
268 | EXPORT_SYMBOL_GPL(get_kernel_pages); | ||
269 | |||
270 | /* | ||
271 | * get_kernel_page() - pin a kernel page in memory | ||
272 | * @start: starting kernel address | ||
273 | * @write: pinning for read/write, currently ignored | ||
274 | * @pages: array that receives pointer to the page pinned. | ||
275 | * Must be at least nr_segs long. | ||
276 | * | ||
277 | * Returns 1 if page is pinned. If the page was not pinned, returns | ||
278 | * -errno. The page returned must be released with a put_page() call | ||
279 | * when it is finished with. | ||
280 | */ | ||
281 | int get_kernel_page(unsigned long start, int write, struct page **pages) | ||
282 | { | ||
283 | const struct kvec kiov = { | ||
284 | .iov_base = (void *)start, | ||
285 | .iov_len = PAGE_SIZE | ||
286 | }; | ||
287 | |||
288 | return get_kernel_pages(&kiov, 1, write, pages); | ||
289 | } | ||
290 | EXPORT_SYMBOL_GPL(get_kernel_page); | ||
291 | |||
239 | static void pagevec_lru_move_fn(struct pagevec *pvec, | 292 | static void pagevec_lru_move_fn(struct pagevec *pvec, |
240 | void (*move_fn)(struct page *page, struct lruvec *lruvec, void *arg), | 293 | void (*move_fn)(struct page *page, struct lruvec *lruvec, void *arg), |
241 | void *arg) | 294 | void *arg) |