diff options
-rw-r--r-- | fs/aio.c | 119 | ||||
-rw-r--r-- | include/linux/migrate.h | 3 | ||||
-rw-r--r-- | mm/migrate.c | 2 |
3 files changed, 112 insertions, 12 deletions
@@ -35,6 +35,9 @@ | |||
35 | #include <linux/eventfd.h> | 35 | #include <linux/eventfd.h> |
36 | #include <linux/blkdev.h> | 36 | #include <linux/blkdev.h> |
37 | #include <linux/compat.h> | 37 | #include <linux/compat.h> |
38 | #include <linux/anon_inodes.h> | ||
39 | #include <linux/migrate.h> | ||
40 | #include <linux/ramfs.h> | ||
38 | 41 | ||
39 | #include <asm/kmap_types.h> | 42 | #include <asm/kmap_types.h> |
40 | #include <asm/uaccess.h> | 43 | #include <asm/uaccess.h> |
@@ -110,6 +113,7 @@ struct kioctx { | |||
110 | } ____cacheline_aligned_in_smp; | 113 | } ____cacheline_aligned_in_smp; |
111 | 114 | ||
112 | struct page *internal_pages[AIO_RING_PAGES]; | 115 | struct page *internal_pages[AIO_RING_PAGES]; |
116 | struct file *aio_ring_file; | ||
113 | }; | 117 | }; |
114 | 118 | ||
115 | /*------ sysctl variables----*/ | 119 | /*------ sysctl variables----*/ |
@@ -138,15 +142,78 @@ __initcall(aio_setup); | |||
138 | 142 | ||
139 | static void aio_free_ring(struct kioctx *ctx) | 143 | static void aio_free_ring(struct kioctx *ctx) |
140 | { | 144 | { |
141 | long i; | 145 | int i; |
146 | struct file *aio_ring_file = ctx->aio_ring_file; | ||
142 | 147 | ||
143 | for (i = 0; i < ctx->nr_pages; i++) | 148 | for (i = 0; i < ctx->nr_pages; i++) { |
149 | pr_debug("pid(%d) [%d] page->count=%d\n", current->pid, i, | ||
150 | page_count(ctx->ring_pages[i])); | ||
144 | put_page(ctx->ring_pages[i]); | 151 | put_page(ctx->ring_pages[i]); |
152 | } | ||
145 | 153 | ||
146 | if (ctx->ring_pages && ctx->ring_pages != ctx->internal_pages) | 154 | if (ctx->ring_pages && ctx->ring_pages != ctx->internal_pages) |
147 | kfree(ctx->ring_pages); | 155 | kfree(ctx->ring_pages); |
156 | |||
157 | if (aio_ring_file) { | ||
158 | truncate_setsize(aio_ring_file->f_inode, 0); | ||
159 | pr_debug("pid(%d) i_nlink=%u d_count=%d d_unhashed=%d i_count=%d\n", | ||
160 | current->pid, aio_ring_file->f_inode->i_nlink, | ||
161 | aio_ring_file->f_path.dentry->d_count, | ||
162 | d_unhashed(aio_ring_file->f_path.dentry), | ||
163 | atomic_read(&aio_ring_file->f_inode->i_count)); | ||
164 | fput(aio_ring_file); | ||
165 | ctx->aio_ring_file = NULL; | ||
166 | } | ||
148 | } | 167 | } |
149 | 168 | ||
169 | static int aio_ring_mmap(struct file *file, struct vm_area_struct *vma) | ||
170 | { | ||
171 | vma->vm_ops = &generic_file_vm_ops; | ||
172 | return 0; | ||
173 | } | ||
174 | |||
175 | static const struct file_operations aio_ring_fops = { | ||
176 | .mmap = aio_ring_mmap, | ||
177 | }; | ||
178 | |||
179 | static int aio_set_page_dirty(struct page *page) | ||
180 | { | ||
181 | return 0; | ||
182 | } | ||
183 | |||
184 | static int aio_migratepage(struct address_space *mapping, struct page *new, | ||
185 | struct page *old, enum migrate_mode mode) | ||
186 | { | ||
187 | struct kioctx *ctx = mapping->private_data; | ||
188 | unsigned long flags; | ||
189 | unsigned idx = old->index; | ||
190 | int rc; | ||
191 | |||
192 | /* Writeback must be complete */ | ||
193 | BUG_ON(PageWriteback(old)); | ||
194 | put_page(old); | ||
195 | |||
196 | rc = migrate_page_move_mapping(mapping, new, old, NULL, mode); | ||
197 | if (rc != MIGRATEPAGE_SUCCESS) { | ||
198 | get_page(old); | ||
199 | return rc; | ||
200 | } | ||
201 | |||
202 | get_page(new); | ||
203 | |||
204 | spin_lock_irqsave(&ctx->completion_lock, flags); | ||
205 | migrate_page_copy(new, old); | ||
206 | ctx->ring_pages[idx] = new; | ||
207 | spin_unlock_irqrestore(&ctx->completion_lock, flags); | ||
208 | |||
209 | return rc; | ||
210 | } | ||
211 | |||
212 | static const struct address_space_operations aio_ctx_aops = { | ||
213 | .set_page_dirty = aio_set_page_dirty, | ||
214 | .migratepage = aio_migratepage, | ||
215 | }; | ||
216 | |||
150 | static int aio_setup_ring(struct kioctx *ctx) | 217 | static int aio_setup_ring(struct kioctx *ctx) |
151 | { | 218 | { |
152 | struct aio_ring *ring; | 219 | struct aio_ring *ring; |
@@ -154,20 +221,45 @@ static int aio_setup_ring(struct kioctx *ctx) | |||
154 | struct mm_struct *mm = current->mm; | 221 | struct mm_struct *mm = current->mm; |
155 | unsigned long size, populate; | 222 | unsigned long size, populate; |
156 | int nr_pages; | 223 | int nr_pages; |
224 | int i; | ||
225 | struct file *file; | ||
157 | 226 | ||
158 | /* Compensate for the ring buffer's head/tail overlap entry */ | 227 | /* Compensate for the ring buffer's head/tail overlap entry */ |
159 | nr_events += 2; /* 1 is required, 2 for good luck */ | 228 | nr_events += 2; /* 1 is required, 2 for good luck */ |
160 | 229 | ||
161 | size = sizeof(struct aio_ring); | 230 | size = sizeof(struct aio_ring); |
162 | size += sizeof(struct io_event) * nr_events; | 231 | size += sizeof(struct io_event) * nr_events; |
163 | nr_pages = (size + PAGE_SIZE-1) >> PAGE_SHIFT; | ||
164 | 232 | ||
233 | nr_pages = PFN_UP(size); | ||
165 | if (nr_pages < 0) | 234 | if (nr_pages < 0) |
166 | return -EINVAL; | 235 | return -EINVAL; |
167 | 236 | ||
168 | nr_events = (PAGE_SIZE * nr_pages - sizeof(struct aio_ring)) / sizeof(struct io_event); | 237 | file = anon_inode_getfile_private("[aio]", &aio_ring_fops, ctx, O_RDWR); |
238 | if (IS_ERR(file)) { | ||
239 | ctx->aio_ring_file = NULL; | ||
240 | return -EAGAIN; | ||
241 | } | ||
242 | |||
243 | file->f_inode->i_mapping->a_ops = &aio_ctx_aops; | ||
244 | file->f_inode->i_mapping->private_data = ctx; | ||
245 | file->f_inode->i_size = PAGE_SIZE * (loff_t)nr_pages; | ||
246 | |||
247 | for (i = 0; i < nr_pages; i++) { | ||
248 | struct page *page; | ||
249 | page = find_or_create_page(file->f_inode->i_mapping, | ||
250 | i, GFP_HIGHUSER | __GFP_ZERO); | ||
251 | if (!page) | ||
252 | break; | ||
253 | pr_debug("pid(%d) page[%d]->count=%d\n", | ||
254 | current->pid, i, page_count(page)); | ||
255 | SetPageUptodate(page); | ||
256 | SetPageDirty(page); | ||
257 | unlock_page(page); | ||
258 | } | ||
259 | ctx->aio_ring_file = file; | ||
260 | nr_events = (PAGE_SIZE * nr_pages - sizeof(struct aio_ring)) | ||
261 | / sizeof(struct io_event); | ||
169 | 262 | ||
170 | ctx->nr_events = 0; | ||
171 | ctx->ring_pages = ctx->internal_pages; | 263 | ctx->ring_pages = ctx->internal_pages; |
172 | if (nr_pages > AIO_RING_PAGES) { | 264 | if (nr_pages > AIO_RING_PAGES) { |
173 | ctx->ring_pages = kcalloc(nr_pages, sizeof(struct page *), | 265 | ctx->ring_pages = kcalloc(nr_pages, sizeof(struct page *), |
@@ -178,28 +270,31 @@ static int aio_setup_ring(struct kioctx *ctx) | |||
178 | 270 | ||
179 | ctx->mmap_size = nr_pages * PAGE_SIZE; | 271 | ctx->mmap_size = nr_pages * PAGE_SIZE; |
180 | pr_debug("attempting mmap of %lu bytes\n", ctx->mmap_size); | 272 | pr_debug("attempting mmap of %lu bytes\n", ctx->mmap_size); |
273 | |||
181 | down_write(&mm->mmap_sem); | 274 | down_write(&mm->mmap_sem); |
182 | ctx->mmap_base = do_mmap_pgoff(NULL, 0, ctx->mmap_size, | 275 | ctx->mmap_base = do_mmap_pgoff(ctx->aio_ring_file, 0, ctx->mmap_size, |
183 | PROT_READ|PROT_WRITE, | 276 | PROT_READ | PROT_WRITE, |
184 | MAP_ANONYMOUS|MAP_PRIVATE, 0, &populate); | 277 | MAP_SHARED | MAP_POPULATE, 0, &populate); |
185 | if (IS_ERR((void *)ctx->mmap_base)) { | 278 | if (IS_ERR((void *)ctx->mmap_base)) { |
186 | up_write(&mm->mmap_sem); | 279 | up_write(&mm->mmap_sem); |
187 | ctx->mmap_size = 0; | 280 | ctx->mmap_size = 0; |
188 | aio_free_ring(ctx); | 281 | aio_free_ring(ctx); |
189 | return -EAGAIN; | 282 | return -EAGAIN; |
190 | } | 283 | } |
284 | up_write(&mm->mmap_sem); | ||
285 | |||
286 | mm_populate(ctx->mmap_base, populate); | ||
191 | 287 | ||
192 | pr_debug("mmap address: 0x%08lx\n", ctx->mmap_base); | 288 | pr_debug("mmap address: 0x%08lx\n", ctx->mmap_base); |
193 | ctx->nr_pages = get_user_pages(current, mm, ctx->mmap_base, nr_pages, | 289 | ctx->nr_pages = get_user_pages(current, mm, ctx->mmap_base, nr_pages, |
194 | 1, 0, ctx->ring_pages, NULL); | 290 | 1, 0, ctx->ring_pages, NULL); |
195 | up_write(&mm->mmap_sem); | 291 | for (i = 0; i < ctx->nr_pages; i++) |
292 | put_page(ctx->ring_pages[i]); | ||
196 | 293 | ||
197 | if (unlikely(ctx->nr_pages != nr_pages)) { | 294 | if (unlikely(ctx->nr_pages != nr_pages)) { |
198 | aio_free_ring(ctx); | 295 | aio_free_ring(ctx); |
199 | return -EAGAIN; | 296 | return -EAGAIN; |
200 | } | 297 | } |
201 | if (populate) | ||
202 | mm_populate(ctx->mmap_base, populate); | ||
203 | 298 | ||
204 | ctx->user_id = ctx->mmap_base; | 299 | ctx->user_id = ctx->mmap_base; |
205 | ctx->nr_events = nr_events; /* trusted copy */ | 300 | ctx->nr_events = nr_events; /* trusted copy */ |
@@ -399,6 +494,8 @@ out_cleanup: | |||
399 | err = -EAGAIN; | 494 | err = -EAGAIN; |
400 | aio_free_ring(ctx); | 495 | aio_free_ring(ctx); |
401 | out_freectx: | 496 | out_freectx: |
497 | if (ctx->aio_ring_file) | ||
498 | fput(ctx->aio_ring_file); | ||
402 | kmem_cache_free(kioctx_cachep, ctx); | 499 | kmem_cache_free(kioctx_cachep, ctx); |
403 | pr_debug("error allocating ioctx %d\n", err); | 500 | pr_debug("error allocating ioctx %d\n", err); |
404 | return ERR_PTR(err); | 501 | return ERR_PTR(err); |
diff --git a/include/linux/migrate.h b/include/linux/migrate.h index a405d3dc0f61..c407d88f5979 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h | |||
@@ -55,6 +55,9 @@ extern int migrate_vmas(struct mm_struct *mm, | |||
55 | extern void migrate_page_copy(struct page *newpage, struct page *page); | 55 | extern void migrate_page_copy(struct page *newpage, struct page *page); |
56 | extern int migrate_huge_page_move_mapping(struct address_space *mapping, | 56 | extern int migrate_huge_page_move_mapping(struct address_space *mapping, |
57 | struct page *newpage, struct page *page); | 57 | struct page *newpage, struct page *page); |
58 | extern int migrate_page_move_mapping(struct address_space *mapping, | ||
59 | struct page *newpage, struct page *page, | ||
60 | struct buffer_head *head, enum migrate_mode mode); | ||
58 | #else | 61 | #else |
59 | 62 | ||
60 | static inline void putback_lru_pages(struct list_head *l) {} | 63 | static inline void putback_lru_pages(struct list_head *l) {} |
diff --git a/mm/migrate.c b/mm/migrate.c index 6f0c24438bba..1da0092561a4 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
@@ -307,7 +307,7 @@ static inline bool buffer_migrate_lock_buffers(struct buffer_head *head, | |||
307 | * 2 for pages with a mapping | 307 | * 2 for pages with a mapping |
308 | * 3 for pages with a mapping and PagePrivate/PagePrivate2 set. | 308 | * 3 for pages with a mapping and PagePrivate/PagePrivate2 set. |
309 | */ | 309 | */ |
310 | static int migrate_page_move_mapping(struct address_space *mapping, | 310 | int migrate_page_move_mapping(struct address_space *mapping, |
311 | struct page *newpage, struct page *page, | 311 | struct page *newpage, struct page *page, |
312 | struct buffer_head *head, enum migrate_mode mode) | 312 | struct buffer_head *head, enum migrate_mode mode) |
313 | { | 313 | { |