diff options
Diffstat (limited to 'mm/shmem.c')
-rw-r--r-- | mm/shmem.c | 1813 |
1 files changed, 667 insertions, 1146 deletions
diff --git a/mm/shmem.c b/mm/shmem.c index 01c19c62d685..2d3577295298 100644 --- a/mm/shmem.c +++ b/mm/shmem.c | |||
@@ -6,7 +6,8 @@ | |||
6 | * 2000-2001 Christoph Rohland | 6 | * 2000-2001 Christoph Rohland |
7 | * 2000-2001 SAP AG | 7 | * 2000-2001 SAP AG |
8 | * 2002 Red Hat Inc. | 8 | * 2002 Red Hat Inc. |
9 | * Copyright (C) 2002-2005 Hugh Dickins. | 9 | * Copyright (C) 2002-2011 Hugh Dickins. |
10 | * Copyright (C) 2011 Google Inc. | ||
10 | * Copyright (C) 2002-2005 VERITAS Software Corporation. | 11 | * Copyright (C) 2002-2005 VERITAS Software Corporation. |
11 | * Copyright (C) 2004 Andi Kleen, SuSE Labs | 12 | * Copyright (C) 2004 Andi Kleen, SuSE Labs |
12 | * | 13 | * |
@@ -28,7 +29,6 @@ | |||
28 | #include <linux/file.h> | 29 | #include <linux/file.h> |
29 | #include <linux/mm.h> | 30 | #include <linux/mm.h> |
30 | #include <linux/module.h> | 31 | #include <linux/module.h> |
31 | #include <linux/percpu_counter.h> | ||
32 | #include <linux/swap.h> | 32 | #include <linux/swap.h> |
33 | 33 | ||
34 | static struct vfsmount *shm_mnt; | 34 | static struct vfsmount *shm_mnt; |
@@ -51,6 +51,9 @@ static struct vfsmount *shm_mnt; | |||
51 | #include <linux/shmem_fs.h> | 51 | #include <linux/shmem_fs.h> |
52 | #include <linux/writeback.h> | 52 | #include <linux/writeback.h> |
53 | #include <linux/blkdev.h> | 53 | #include <linux/blkdev.h> |
54 | #include <linux/pagevec.h> | ||
55 | #include <linux/percpu_counter.h> | ||
56 | #include <linux/splice.h> | ||
54 | #include <linux/security.h> | 57 | #include <linux/security.h> |
55 | #include <linux/swapops.h> | 58 | #include <linux/swapops.h> |
56 | #include <linux/mempolicy.h> | 59 | #include <linux/mempolicy.h> |
@@ -62,43 +65,17 @@ static struct vfsmount *shm_mnt; | |||
62 | #include <linux/magic.h> | 65 | #include <linux/magic.h> |
63 | 66 | ||
64 | #include <asm/uaccess.h> | 67 | #include <asm/uaccess.h> |
65 | #include <asm/div64.h> | ||
66 | #include <asm/pgtable.h> | 68 | #include <asm/pgtable.h> |
67 | 69 | ||
68 | /* | ||
69 | * The maximum size of a shmem/tmpfs file is limited by the maximum size of | ||
70 | * its triple-indirect swap vector - see illustration at shmem_swp_entry(). | ||
71 | * | ||
72 | * With 4kB page size, maximum file size is just over 2TB on a 32-bit kernel, | ||
73 | * but one eighth of that on a 64-bit kernel. With 8kB page size, maximum | ||
74 | * file size is just over 4TB on a 64-bit kernel, but 16TB on a 32-bit kernel, | ||
75 | * MAX_LFS_FILESIZE being then more restrictive than swap vector layout. | ||
76 | * | ||
77 | * We use / and * instead of shifts in the definitions below, so that the swap | ||
78 | * vector can be tested with small even values (e.g. 20) for ENTRIES_PER_PAGE. | ||
79 | */ | ||
80 | #define ENTRIES_PER_PAGE (PAGE_CACHE_SIZE/sizeof(unsigned long)) | ||
81 | #define ENTRIES_PER_PAGEPAGE ((unsigned long long)ENTRIES_PER_PAGE*ENTRIES_PER_PAGE) | ||
82 | |||
83 | #define SHMSWP_MAX_INDEX (SHMEM_NR_DIRECT + (ENTRIES_PER_PAGEPAGE/2) * (ENTRIES_PER_PAGE+1)) | ||
84 | #define SHMSWP_MAX_BYTES (SHMSWP_MAX_INDEX << PAGE_CACHE_SHIFT) | ||
85 | |||
86 | #define SHMEM_MAX_BYTES min_t(unsigned long long, SHMSWP_MAX_BYTES, MAX_LFS_FILESIZE) | ||
87 | #define SHMEM_MAX_INDEX ((unsigned long)((SHMEM_MAX_BYTES+1) >> PAGE_CACHE_SHIFT)) | ||
88 | |||
89 | #define BLOCKS_PER_PAGE (PAGE_CACHE_SIZE/512) | 70 | #define BLOCKS_PER_PAGE (PAGE_CACHE_SIZE/512) |
90 | #define VM_ACCT(size) (PAGE_CACHE_ALIGN(size) >> PAGE_SHIFT) | 71 | #define VM_ACCT(size) (PAGE_CACHE_ALIGN(size) >> PAGE_SHIFT) |
91 | 72 | ||
92 | /* info->flags needs VM_flags to handle pagein/truncate races efficiently */ | ||
93 | #define SHMEM_PAGEIN VM_READ | ||
94 | #define SHMEM_TRUNCATE VM_WRITE | ||
95 | |||
96 | /* Definition to limit shmem_truncate's steps between cond_rescheds */ | ||
97 | #define LATENCY_LIMIT 64 | ||
98 | |||
99 | /* Pretend that each entry is of this size in directory's i_size */ | 73 | /* Pretend that each entry is of this size in directory's i_size */ |
100 | #define BOGO_DIRENT_SIZE 20 | 74 | #define BOGO_DIRENT_SIZE 20 |
101 | 75 | ||
76 | /* Symlink up to this size is kmalloc'ed instead of using a swappable page */ | ||
77 | #define SHORT_SYMLINK_LEN 128 | ||
78 | |||
102 | struct shmem_xattr { | 79 | struct shmem_xattr { |
103 | struct list_head list; /* anchored by shmem_inode_info->xattr_list */ | 80 | struct list_head list; /* anchored by shmem_inode_info->xattr_list */ |
104 | char *name; /* xattr name */ | 81 | char *name; /* xattr name */ |
@@ -106,7 +83,7 @@ struct shmem_xattr { | |||
106 | char value[0]; | 83 | char value[0]; |
107 | }; | 84 | }; |
108 | 85 | ||
109 | /* Flag allocation requirements to shmem_getpage and shmem_swp_alloc */ | 86 | /* Flag allocation requirements to shmem_getpage */ |
110 | enum sgp_type { | 87 | enum sgp_type { |
111 | SGP_READ, /* don't exceed i_size, don't allocate page */ | 88 | SGP_READ, /* don't exceed i_size, don't allocate page */ |
112 | SGP_CACHE, /* don't exceed i_size, may allocate page */ | 89 | SGP_CACHE, /* don't exceed i_size, may allocate page */ |
@@ -126,57 +103,14 @@ static unsigned long shmem_default_max_inodes(void) | |||
126 | } | 103 | } |
127 | #endif | 104 | #endif |
128 | 105 | ||
129 | static int shmem_getpage(struct inode *inode, unsigned long idx, | 106 | static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, |
130 | struct page **pagep, enum sgp_type sgp, int *type); | 107 | struct page **pagep, enum sgp_type sgp, gfp_t gfp, int *fault_type); |
131 | |||
132 | static inline struct page *shmem_dir_alloc(gfp_t gfp_mask) | ||
133 | { | ||
134 | /* | ||
135 | * The above definition of ENTRIES_PER_PAGE, and the use of | ||
136 | * BLOCKS_PER_PAGE on indirect pages, assume PAGE_CACHE_SIZE: | ||
137 | * might be reconsidered if it ever diverges from PAGE_SIZE. | ||
138 | * | ||
139 | * Mobility flags are masked out as swap vectors cannot move | ||
140 | */ | ||
141 | return alloc_pages((gfp_mask & ~GFP_MOVABLE_MASK) | __GFP_ZERO, | ||
142 | PAGE_CACHE_SHIFT-PAGE_SHIFT); | ||
143 | } | ||
144 | |||
145 | static inline void shmem_dir_free(struct page *page) | ||
146 | { | ||
147 | __free_pages(page, PAGE_CACHE_SHIFT-PAGE_SHIFT); | ||
148 | } | ||
149 | |||
150 | static struct page **shmem_dir_map(struct page *page) | ||
151 | { | ||
152 | return (struct page **)kmap_atomic(page, KM_USER0); | ||
153 | } | ||
154 | |||
155 | static inline void shmem_dir_unmap(struct page **dir) | ||
156 | { | ||
157 | kunmap_atomic(dir, KM_USER0); | ||
158 | } | ||
159 | |||
160 | static swp_entry_t *shmem_swp_map(struct page *page) | ||
161 | { | ||
162 | return (swp_entry_t *)kmap_atomic(page, KM_USER1); | ||
163 | } | ||
164 | |||
165 | static inline void shmem_swp_balance_unmap(void) | ||
166 | { | ||
167 | /* | ||
168 | * When passing a pointer to an i_direct entry, to code which | ||
169 | * also handles indirect entries and so will shmem_swp_unmap, | ||
170 | * we must arrange for the preempt count to remain in balance. | ||
171 | * What kmap_atomic of a lowmem page does depends on config | ||
172 | * and architecture, so pretend to kmap_atomic some lowmem page. | ||
173 | */ | ||
174 | (void) kmap_atomic(ZERO_PAGE(0), KM_USER1); | ||
175 | } | ||
176 | 108 | ||
177 | static inline void shmem_swp_unmap(swp_entry_t *entry) | 109 | static inline int shmem_getpage(struct inode *inode, pgoff_t index, |
110 | struct page **pagep, enum sgp_type sgp, int *fault_type) | ||
178 | { | 111 | { |
179 | kunmap_atomic(entry, KM_USER1); | 112 | return shmem_getpage_gfp(inode, index, pagep, sgp, |
113 | mapping_gfp_mask(inode->i_mapping), fault_type); | ||
180 | } | 114 | } |
181 | 115 | ||
182 | static inline struct shmem_sb_info *SHMEM_SB(struct super_block *sb) | 116 | static inline struct shmem_sb_info *SHMEM_SB(struct super_block *sb) |
@@ -236,17 +170,6 @@ static struct backing_dev_info shmem_backing_dev_info __read_mostly = { | |||
236 | static LIST_HEAD(shmem_swaplist); | 170 | static LIST_HEAD(shmem_swaplist); |
237 | static DEFINE_MUTEX(shmem_swaplist_mutex); | 171 | static DEFINE_MUTEX(shmem_swaplist_mutex); |
238 | 172 | ||
239 | static void shmem_free_blocks(struct inode *inode, long pages) | ||
240 | { | ||
241 | struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); | ||
242 | if (sbinfo->max_blocks) { | ||
243 | percpu_counter_add(&sbinfo->used_blocks, -pages); | ||
244 | spin_lock(&inode->i_lock); | ||
245 | inode->i_blocks -= pages*BLOCKS_PER_PAGE; | ||
246 | spin_unlock(&inode->i_lock); | ||
247 | } | ||
248 | } | ||
249 | |||
250 | static int shmem_reserve_inode(struct super_block *sb) | 173 | static int shmem_reserve_inode(struct super_block *sb) |
251 | { | 174 | { |
252 | struct shmem_sb_info *sbinfo = SHMEM_SB(sb); | 175 | struct shmem_sb_info *sbinfo = SHMEM_SB(sb); |
@@ -273,7 +196,7 @@ static void shmem_free_inode(struct super_block *sb) | |||
273 | } | 196 | } |
274 | 197 | ||
275 | /** | 198 | /** |
276 | * shmem_recalc_inode - recalculate the size of an inode | 199 | * shmem_recalc_inode - recalculate the block usage of an inode |
277 | * @inode: inode to recalc | 200 | * @inode: inode to recalc |
278 | * | 201 | * |
279 | * We have to calculate the free blocks since the mm can drop | 202 | * We have to calculate the free blocks since the mm can drop |
@@ -291,474 +214,297 @@ static void shmem_recalc_inode(struct inode *inode) | |||
291 | 214 | ||
292 | freed = info->alloced - info->swapped - inode->i_mapping->nrpages; | 215 | freed = info->alloced - info->swapped - inode->i_mapping->nrpages; |
293 | if (freed > 0) { | 216 | if (freed > 0) { |
217 | struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); | ||
218 | if (sbinfo->max_blocks) | ||
219 | percpu_counter_add(&sbinfo->used_blocks, -freed); | ||
294 | info->alloced -= freed; | 220 | info->alloced -= freed; |
221 | inode->i_blocks -= freed * BLOCKS_PER_PAGE; | ||
295 | shmem_unacct_blocks(info->flags, freed); | 222 | shmem_unacct_blocks(info->flags, freed); |
296 | shmem_free_blocks(inode, freed); | ||
297 | } | 223 | } |
298 | } | 224 | } |
299 | 225 | ||
300 | /** | 226 | /* |
301 | * shmem_swp_entry - find the swap vector position in the info structure | 227 | * Replace item expected in radix tree by a new item, while holding tree lock. |
302 | * @info: info structure for the inode | ||
303 | * @index: index of the page to find | ||
304 | * @page: optional page to add to the structure. Has to be preset to | ||
305 | * all zeros | ||
306 | * | ||
307 | * If there is no space allocated yet it will return NULL when | ||
308 | * page is NULL, else it will use the page for the needed block, | ||
309 | * setting it to NULL on return to indicate that it has been used. | ||
310 | * | ||
311 | * The swap vector is organized the following way: | ||
312 | * | ||
313 | * There are SHMEM_NR_DIRECT entries directly stored in the | ||
314 | * shmem_inode_info structure. So small files do not need an addional | ||
315 | * allocation. | ||
316 | * | ||
317 | * For pages with index > SHMEM_NR_DIRECT there is the pointer | ||
318 | * i_indirect which points to a page which holds in the first half | ||
319 | * doubly indirect blocks, in the second half triple indirect blocks: | ||
320 | * | ||
321 | * For an artificial ENTRIES_PER_PAGE = 4 this would lead to the | ||
322 | * following layout (for SHMEM_NR_DIRECT == 16): | ||
323 | * | ||
324 | * i_indirect -> dir --> 16-19 | ||
325 | * | +-> 20-23 | ||
326 | * | | ||
327 | * +-->dir2 --> 24-27 | ||
328 | * | +-> 28-31 | ||
329 | * | +-> 32-35 | ||
330 | * | +-> 36-39 | ||
331 | * | | ||
332 | * +-->dir3 --> 40-43 | ||
333 | * +-> 44-47 | ||
334 | * +-> 48-51 | ||
335 | * +-> 52-55 | ||
336 | */ | 228 | */ |
337 | static swp_entry_t *shmem_swp_entry(struct shmem_inode_info *info, unsigned long index, struct page **page) | 229 | static int shmem_radix_tree_replace(struct address_space *mapping, |
338 | { | 230 | pgoff_t index, void *expected, void *replacement) |
339 | unsigned long offset; | 231 | { |
340 | struct page **dir; | 232 | void **pslot; |
341 | struct page *subdir; | 233 | void *item = NULL; |
234 | |||
235 | VM_BUG_ON(!expected); | ||
236 | pslot = radix_tree_lookup_slot(&mapping->page_tree, index); | ||
237 | if (pslot) | ||
238 | item = radix_tree_deref_slot_protected(pslot, | ||
239 | &mapping->tree_lock); | ||
240 | if (item != expected) | ||
241 | return -ENOENT; | ||
242 | if (replacement) | ||
243 | radix_tree_replace_slot(pslot, replacement); | ||
244 | else | ||
245 | radix_tree_delete(&mapping->page_tree, index); | ||
246 | return 0; | ||
247 | } | ||
342 | 248 | ||
343 | if (index < SHMEM_NR_DIRECT) { | 249 | /* |
344 | shmem_swp_balance_unmap(); | 250 | * Like add_to_page_cache_locked, but error if expected item has gone. |
345 | return info->i_direct+index; | 251 | */ |
346 | } | 252 | static int shmem_add_to_page_cache(struct page *page, |
347 | if (!info->i_indirect) { | 253 | struct address_space *mapping, |
348 | if (page) { | 254 | pgoff_t index, gfp_t gfp, void *expected) |
349 | info->i_indirect = *page; | 255 | { |
350 | *page = NULL; | 256 | int error = 0; |
351 | } | ||
352 | return NULL; /* need another page */ | ||
353 | } | ||
354 | 257 | ||
355 | index -= SHMEM_NR_DIRECT; | 258 | VM_BUG_ON(!PageLocked(page)); |
356 | offset = index % ENTRIES_PER_PAGE; | 259 | VM_BUG_ON(!PageSwapBacked(page)); |
357 | index /= ENTRIES_PER_PAGE; | ||
358 | dir = shmem_dir_map(info->i_indirect); | ||
359 | |||
360 | if (index >= ENTRIES_PER_PAGE/2) { | ||
361 | index -= ENTRIES_PER_PAGE/2; | ||
362 | dir += ENTRIES_PER_PAGE/2 + index/ENTRIES_PER_PAGE; | ||
363 | index %= ENTRIES_PER_PAGE; | ||
364 | subdir = *dir; | ||
365 | if (!subdir) { | ||
366 | if (page) { | ||
367 | *dir = *page; | ||
368 | *page = NULL; | ||
369 | } | ||
370 | shmem_dir_unmap(dir); | ||
371 | return NULL; /* need another page */ | ||
372 | } | ||
373 | shmem_dir_unmap(dir); | ||
374 | dir = shmem_dir_map(subdir); | ||
375 | } | ||
376 | 260 | ||
377 | dir += index; | 261 | if (!expected) |
378 | subdir = *dir; | 262 | error = radix_tree_preload(gfp & GFP_RECLAIM_MASK); |
379 | if (!subdir) { | 263 | if (!error) { |
380 | if (!page || !(subdir = *page)) { | 264 | page_cache_get(page); |
381 | shmem_dir_unmap(dir); | 265 | page->mapping = mapping; |
382 | return NULL; /* need a page */ | 266 | page->index = index; |
267 | |||
268 | spin_lock_irq(&mapping->tree_lock); | ||
269 | if (!expected) | ||
270 | error = radix_tree_insert(&mapping->page_tree, | ||
271 | index, page); | ||
272 | else | ||
273 | error = shmem_radix_tree_replace(mapping, index, | ||
274 | expected, page); | ||
275 | if (!error) { | ||
276 | mapping->nrpages++; | ||
277 | __inc_zone_page_state(page, NR_FILE_PAGES); | ||
278 | __inc_zone_page_state(page, NR_SHMEM); | ||
279 | spin_unlock_irq(&mapping->tree_lock); | ||
280 | } else { | ||
281 | page->mapping = NULL; | ||
282 | spin_unlock_irq(&mapping->tree_lock); | ||
283 | page_cache_release(page); | ||
383 | } | 284 | } |
384 | *dir = subdir; | 285 | if (!expected) |
385 | *page = NULL; | 286 | radix_tree_preload_end(); |
386 | } | 287 | } |
387 | shmem_dir_unmap(dir); | 288 | if (error) |
388 | return shmem_swp_map(subdir) + offset; | 289 | mem_cgroup_uncharge_cache_page(page); |
290 | return error; | ||
389 | } | 291 | } |
390 | 292 | ||
391 | static void shmem_swp_set(struct shmem_inode_info *info, swp_entry_t *entry, unsigned long value) | 293 | /* |
294 | * Like delete_from_page_cache, but substitutes swap for page. | ||
295 | */ | ||
296 | static void shmem_delete_from_page_cache(struct page *page, void *radswap) | ||
392 | { | 297 | { |
393 | long incdec = value? 1: -1; | 298 | struct address_space *mapping = page->mapping; |
299 | int error; | ||
394 | 300 | ||
395 | entry->val = value; | 301 | spin_lock_irq(&mapping->tree_lock); |
396 | info->swapped += incdec; | 302 | error = shmem_radix_tree_replace(mapping, page->index, page, radswap); |
397 | if ((unsigned long)(entry - info->i_direct) >= SHMEM_NR_DIRECT) { | 303 | page->mapping = NULL; |
398 | struct page *page = kmap_atomic_to_page(entry); | 304 | mapping->nrpages--; |
399 | set_page_private(page, page_private(page) + incdec); | 305 | __dec_zone_page_state(page, NR_FILE_PAGES); |
400 | } | 306 | __dec_zone_page_state(page, NR_SHMEM); |
307 | spin_unlock_irq(&mapping->tree_lock); | ||
308 | page_cache_release(page); | ||
309 | BUG_ON(error); | ||
401 | } | 310 | } |
402 | 311 | ||
403 | /** | 312 | /* |
404 | * shmem_swp_alloc - get the position of the swap entry for the page. | 313 | * Like find_get_pages, but collecting swap entries as well as pages. |
405 | * @info: info structure for the inode | ||
406 | * @index: index of the page to find | ||
407 | * @sgp: check and recheck i_size? skip allocation? | ||
408 | * | ||
409 | * If the entry does not exist, allocate it. | ||
410 | */ | 314 | */ |
411 | static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, unsigned long index, enum sgp_type sgp) | 315 | static unsigned shmem_find_get_pages_and_swap(struct address_space *mapping, |
412 | { | 316 | pgoff_t start, unsigned int nr_pages, |
413 | struct inode *inode = &info->vfs_inode; | 317 | struct page **pages, pgoff_t *indices) |
414 | struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); | 318 | { |
415 | struct page *page = NULL; | 319 | unsigned int i; |
416 | swp_entry_t *entry; | 320 | unsigned int ret; |
417 | 321 | unsigned int nr_found; | |
418 | if (sgp != SGP_WRITE && | 322 | |
419 | ((loff_t) index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) | 323 | rcu_read_lock(); |
420 | return ERR_PTR(-EINVAL); | 324 | restart: |
421 | 325 | nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree, | |
422 | while (!(entry = shmem_swp_entry(info, index, &page))) { | 326 | (void ***)pages, indices, start, nr_pages); |
423 | if (sgp == SGP_READ) | 327 | ret = 0; |
424 | return shmem_swp_map(ZERO_PAGE(0)); | 328 | for (i = 0; i < nr_found; i++) { |
425 | /* | 329 | struct page *page; |
426 | * Test used_blocks against 1 less max_blocks, since we have 1 data | 330 | repeat: |
427 | * page (and perhaps indirect index pages) yet to allocate: | 331 | page = radix_tree_deref_slot((void **)pages[i]); |
428 | * a waste to allocate index if we cannot allocate data. | 332 | if (unlikely(!page)) |
429 | */ | 333 | continue; |
430 | if (sbinfo->max_blocks) { | 334 | if (radix_tree_exception(page)) { |
431 | if (percpu_counter_compare(&sbinfo->used_blocks, | 335 | if (radix_tree_deref_retry(page)) |
432 | sbinfo->max_blocks - 1) >= 0) | 336 | goto restart; |
433 | return ERR_PTR(-ENOSPC); | 337 | /* |
434 | percpu_counter_inc(&sbinfo->used_blocks); | 338 | * Otherwise, we must be storing a swap entry |
435 | spin_lock(&inode->i_lock); | 339 | * here as an exceptional entry: so return it |
436 | inode->i_blocks += BLOCKS_PER_PAGE; | 340 | * without attempting to raise page count. |
437 | spin_unlock(&inode->i_lock); | 341 | */ |
342 | goto export; | ||
438 | } | 343 | } |
344 | if (!page_cache_get_speculative(page)) | ||
345 | goto repeat; | ||
439 | 346 | ||
440 | spin_unlock(&info->lock); | 347 | /* Has the page moved? */ |
441 | page = shmem_dir_alloc(mapping_gfp_mask(inode->i_mapping)); | 348 | if (unlikely(page != *((void **)pages[i]))) { |
442 | spin_lock(&info->lock); | 349 | page_cache_release(page); |
443 | 350 | goto repeat; | |
444 | if (!page) { | ||
445 | shmem_free_blocks(inode, 1); | ||
446 | return ERR_PTR(-ENOMEM); | ||
447 | } | ||
448 | if (sgp != SGP_WRITE && | ||
449 | ((loff_t) index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) { | ||
450 | entry = ERR_PTR(-EINVAL); | ||
451 | break; | ||
452 | } | 351 | } |
453 | if (info->next_index <= index) | 352 | export: |
454 | info->next_index = index + 1; | 353 | indices[ret] = indices[i]; |
455 | } | 354 | pages[ret] = page; |
456 | if (page) { | 355 | ret++; |
457 | /* another task gave its page, or truncated the file */ | 356 | } |
458 | shmem_free_blocks(inode, 1); | 357 | if (unlikely(!ret && nr_found)) |
459 | shmem_dir_free(page); | 358 | goto restart; |
460 | } | 359 | rcu_read_unlock(); |
461 | if (info->next_index <= index && !IS_ERR(entry)) | 360 | return ret; |
462 | info->next_index = index + 1; | ||
463 | return entry; | ||
464 | } | 361 | } |
465 | 362 | ||
466 | /** | 363 | /* |
467 | * shmem_free_swp - free some swap entries in a directory | 364 | * Remove swap entry from radix tree, free the swap and its page cache. |
468 | * @dir: pointer to the directory | ||
469 | * @edir: pointer after last entry of the directory | ||
470 | * @punch_lock: pointer to spinlock when needed for the holepunch case | ||
471 | */ | 365 | */ |
472 | static int shmem_free_swp(swp_entry_t *dir, swp_entry_t *edir, | 366 | static int shmem_free_swap(struct address_space *mapping, |
473 | spinlock_t *punch_lock) | 367 | pgoff_t index, void *radswap) |
474 | { | 368 | { |
475 | spinlock_t *punch_unlock = NULL; | 369 | int error; |
476 | swp_entry_t *ptr; | 370 | |
477 | int freed = 0; | 371 | spin_lock_irq(&mapping->tree_lock); |
478 | 372 | error = shmem_radix_tree_replace(mapping, index, radswap, NULL); | |
479 | for (ptr = dir; ptr < edir; ptr++) { | 373 | spin_unlock_irq(&mapping->tree_lock); |
480 | if (ptr->val) { | 374 | if (!error) |
481 | if (unlikely(punch_lock)) { | 375 | free_swap_and_cache(radix_to_swp_entry(radswap)); |
482 | punch_unlock = punch_lock; | 376 | return error; |
483 | punch_lock = NULL; | ||
484 | spin_lock(punch_unlock); | ||
485 | if (!ptr->val) | ||
486 | continue; | ||
487 | } | ||
488 | free_swap_and_cache(*ptr); | ||
489 | *ptr = (swp_entry_t){0}; | ||
490 | freed++; | ||
491 | } | ||
492 | } | ||
493 | if (punch_unlock) | ||
494 | spin_unlock(punch_unlock); | ||
495 | return freed; | ||
496 | } | ||
497 | |||
498 | static int shmem_map_and_free_swp(struct page *subdir, int offset, | ||
499 | int limit, struct page ***dir, spinlock_t *punch_lock) | ||
500 | { | ||
501 | swp_entry_t *ptr; | ||
502 | int freed = 0; | ||
503 | |||
504 | ptr = shmem_swp_map(subdir); | ||
505 | for (; offset < limit; offset += LATENCY_LIMIT) { | ||
506 | int size = limit - offset; | ||
507 | if (size > LATENCY_LIMIT) | ||
508 | size = LATENCY_LIMIT; | ||
509 | freed += shmem_free_swp(ptr+offset, ptr+offset+size, | ||
510 | punch_lock); | ||
511 | if (need_resched()) { | ||
512 | shmem_swp_unmap(ptr); | ||
513 | if (*dir) { | ||
514 | shmem_dir_unmap(*dir); | ||
515 | *dir = NULL; | ||
516 | } | ||
517 | cond_resched(); | ||
518 | ptr = shmem_swp_map(subdir); | ||
519 | } | ||
520 | } | ||
521 | shmem_swp_unmap(ptr); | ||
522 | return freed; | ||
523 | } | 377 | } |
524 | 378 | ||
525 | static void shmem_free_pages(struct list_head *next) | 379 | /* |
380 | * Pagevec may contain swap entries, so shuffle up pages before releasing. | ||
381 | */ | ||
382 | static void shmem_pagevec_release(struct pagevec *pvec) | ||
526 | { | 383 | { |
527 | struct page *page; | 384 | int i, j; |
528 | int freed = 0; | 385 | |
529 | 386 | for (i = 0, j = 0; i < pagevec_count(pvec); i++) { | |
530 | do { | 387 | struct page *page = pvec->pages[i]; |
531 | page = container_of(next, struct page, lru); | 388 | if (!radix_tree_exceptional_entry(page)) |
532 | next = next->next; | 389 | pvec->pages[j++] = page; |
533 | shmem_dir_free(page); | 390 | } |
534 | freed++; | 391 | pvec->nr = j; |
535 | if (freed >= LATENCY_LIMIT) { | 392 | pagevec_release(pvec); |
536 | cond_resched(); | ||
537 | freed = 0; | ||
538 | } | ||
539 | } while (next); | ||
540 | } | 393 | } |
541 | 394 | ||
542 | void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end) | 395 | /* |
396 | * Remove range of pages and swap entries from radix tree, and free them. | ||
397 | */ | ||
398 | void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend) | ||
543 | { | 399 | { |
400 | struct address_space *mapping = inode->i_mapping; | ||
544 | struct shmem_inode_info *info = SHMEM_I(inode); | 401 | struct shmem_inode_info *info = SHMEM_I(inode); |
545 | unsigned long idx; | 402 | pgoff_t start = (lstart + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; |
546 | unsigned long size; | 403 | unsigned partial = lstart & (PAGE_CACHE_SIZE - 1); |
547 | unsigned long limit; | 404 | pgoff_t end = (lend >> PAGE_CACHE_SHIFT); |
548 | unsigned long stage; | 405 | struct pagevec pvec; |
549 | unsigned long diroff; | 406 | pgoff_t indices[PAGEVEC_SIZE]; |
550 | struct page **dir; | ||
551 | struct page *topdir; | ||
552 | struct page *middir; | ||
553 | struct page *subdir; | ||
554 | swp_entry_t *ptr; | ||
555 | LIST_HEAD(pages_to_free); | ||
556 | long nr_pages_to_free = 0; | ||
557 | long nr_swaps_freed = 0; | 407 | long nr_swaps_freed = 0; |
558 | int offset; | 408 | pgoff_t index; |
559 | int freed; | 409 | int i; |
560 | int punch_hole; | ||
561 | spinlock_t *needs_lock; | ||
562 | spinlock_t *punch_lock; | ||
563 | unsigned long upper_limit; | ||
564 | 410 | ||
565 | truncate_inode_pages_range(inode->i_mapping, start, end); | 411 | BUG_ON((lend & (PAGE_CACHE_SIZE - 1)) != (PAGE_CACHE_SIZE - 1)); |
566 | 412 | ||
567 | inode->i_ctime = inode->i_mtime = CURRENT_TIME; | 413 | pagevec_init(&pvec, 0); |
568 | idx = (start + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 414 | index = start; |
569 | if (idx >= info->next_index) | 415 | while (index <= end) { |
570 | return; | 416 | pvec.nr = shmem_find_get_pages_and_swap(mapping, index, |
417 | min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1, | ||
418 | pvec.pages, indices); | ||
419 | if (!pvec.nr) | ||
420 | break; | ||
421 | mem_cgroup_uncharge_start(); | ||
422 | for (i = 0; i < pagevec_count(&pvec); i++) { | ||
423 | struct page *page = pvec.pages[i]; | ||
571 | 424 | ||
572 | spin_lock(&info->lock); | 425 | index = indices[i]; |
573 | info->flags |= SHMEM_TRUNCATE; | 426 | if (index > end) |
574 | if (likely(end == (loff_t) -1)) { | 427 | break; |
575 | limit = info->next_index; | ||
576 | upper_limit = SHMEM_MAX_INDEX; | ||
577 | info->next_index = idx; | ||
578 | needs_lock = NULL; | ||
579 | punch_hole = 0; | ||
580 | } else { | ||
581 | if (end + 1 >= inode->i_size) { /* we may free a little more */ | ||
582 | limit = (inode->i_size + PAGE_CACHE_SIZE - 1) >> | ||
583 | PAGE_CACHE_SHIFT; | ||
584 | upper_limit = SHMEM_MAX_INDEX; | ||
585 | } else { | ||
586 | limit = (end + 1) >> PAGE_CACHE_SHIFT; | ||
587 | upper_limit = limit; | ||
588 | } | ||
589 | needs_lock = &info->lock; | ||
590 | punch_hole = 1; | ||
591 | } | ||
592 | 428 | ||
593 | topdir = info->i_indirect; | 429 | if (radix_tree_exceptional_entry(page)) { |
594 | if (topdir && idx <= SHMEM_NR_DIRECT && !punch_hole) { | 430 | nr_swaps_freed += !shmem_free_swap(mapping, |
595 | info->i_indirect = NULL; | 431 | index, page); |
596 | nr_pages_to_free++; | 432 | continue; |
597 | list_add(&topdir->lru, &pages_to_free); | 433 | } |
434 | |||
435 | if (!trylock_page(page)) | ||
436 | continue; | ||
437 | if (page->mapping == mapping) { | ||
438 | VM_BUG_ON(PageWriteback(page)); | ||
439 | truncate_inode_page(mapping, page); | ||
440 | } | ||
441 | unlock_page(page); | ||
442 | } | ||
443 | shmem_pagevec_release(&pvec); | ||
444 | mem_cgroup_uncharge_end(); | ||
445 | cond_resched(); | ||
446 | index++; | ||
598 | } | 447 | } |
599 | spin_unlock(&info->lock); | ||
600 | 448 | ||
601 | if (info->swapped && idx < SHMEM_NR_DIRECT) { | 449 | if (partial) { |
602 | ptr = info->i_direct; | 450 | struct page *page = NULL; |
603 | size = limit; | 451 | shmem_getpage(inode, start - 1, &page, SGP_READ, NULL); |
604 | if (size > SHMEM_NR_DIRECT) | 452 | if (page) { |
605 | size = SHMEM_NR_DIRECT; | 453 | zero_user_segment(page, partial, PAGE_CACHE_SIZE); |
606 | nr_swaps_freed = shmem_free_swp(ptr+idx, ptr+size, needs_lock); | 454 | set_page_dirty(page); |
455 | unlock_page(page); | ||
456 | page_cache_release(page); | ||
457 | } | ||
607 | } | 458 | } |
608 | 459 | ||
609 | /* | 460 | index = start; |
610 | * If there are no indirect blocks or we are punching a hole | 461 | for ( ; ; ) { |
611 | * below indirect blocks, nothing to be done. | 462 | cond_resched(); |
612 | */ | 463 | pvec.nr = shmem_find_get_pages_and_swap(mapping, index, |
613 | if (!topdir || limit <= SHMEM_NR_DIRECT) | 464 | min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1, |
614 | goto done2; | 465 | pvec.pages, indices); |
466 | if (!pvec.nr) { | ||
467 | if (index == start) | ||
468 | break; | ||
469 | index = start; | ||
470 | continue; | ||
471 | } | ||
472 | if (index == start && indices[0] > end) { | ||
473 | shmem_pagevec_release(&pvec); | ||
474 | break; | ||
475 | } | ||
476 | mem_cgroup_uncharge_start(); | ||
477 | for (i = 0; i < pagevec_count(&pvec); i++) { | ||
478 | struct page *page = pvec.pages[i]; | ||
615 | 479 | ||
616 | /* | 480 | index = indices[i]; |
617 | * The truncation case has already dropped info->lock, and we're safe | 481 | if (index > end) |
618 | * because i_size and next_index have already been lowered, preventing | 482 | break; |
619 | * access beyond. But in the punch_hole case, we still need to take | ||
620 | * the lock when updating the swap directory, because there might be | ||
621 | * racing accesses by shmem_getpage(SGP_CACHE), shmem_unuse_inode or | ||
622 | * shmem_writepage. However, whenever we find we can remove a whole | ||
623 | * directory page (not at the misaligned start or end of the range), | ||
624 | * we first NULLify its pointer in the level above, and then have no | ||
625 | * need to take the lock when updating its contents: needs_lock and | ||
626 | * punch_lock (either pointing to info->lock or NULL) manage this. | ||
627 | */ | ||
628 | 483 | ||
629 | upper_limit -= SHMEM_NR_DIRECT; | 484 | if (radix_tree_exceptional_entry(page)) { |
630 | limit -= SHMEM_NR_DIRECT; | 485 | nr_swaps_freed += !shmem_free_swap(mapping, |
631 | idx = (idx > SHMEM_NR_DIRECT)? (idx - SHMEM_NR_DIRECT): 0; | 486 | index, page); |
632 | offset = idx % ENTRIES_PER_PAGE; | 487 | continue; |
633 | idx -= offset; | ||
634 | |||
635 | dir = shmem_dir_map(topdir); | ||
636 | stage = ENTRIES_PER_PAGEPAGE/2; | ||
637 | if (idx < ENTRIES_PER_PAGEPAGE/2) { | ||
638 | middir = topdir; | ||
639 | diroff = idx/ENTRIES_PER_PAGE; | ||
640 | } else { | ||
641 | dir += ENTRIES_PER_PAGE/2; | ||
642 | dir += (idx - ENTRIES_PER_PAGEPAGE/2)/ENTRIES_PER_PAGEPAGE; | ||
643 | while (stage <= idx) | ||
644 | stage += ENTRIES_PER_PAGEPAGE; | ||
645 | middir = *dir; | ||
646 | if (*dir) { | ||
647 | diroff = ((idx - ENTRIES_PER_PAGEPAGE/2) % | ||
648 | ENTRIES_PER_PAGEPAGE) / ENTRIES_PER_PAGE; | ||
649 | if (!diroff && !offset && upper_limit >= stage) { | ||
650 | if (needs_lock) { | ||
651 | spin_lock(needs_lock); | ||
652 | *dir = NULL; | ||
653 | spin_unlock(needs_lock); | ||
654 | needs_lock = NULL; | ||
655 | } else | ||
656 | *dir = NULL; | ||
657 | nr_pages_to_free++; | ||
658 | list_add(&middir->lru, &pages_to_free); | ||
659 | } | 488 | } |
660 | shmem_dir_unmap(dir); | ||
661 | dir = shmem_dir_map(middir); | ||
662 | } else { | ||
663 | diroff = 0; | ||
664 | offset = 0; | ||
665 | idx = stage; | ||
666 | } | ||
667 | } | ||
668 | 489 | ||
669 | for (; idx < limit; idx += ENTRIES_PER_PAGE, diroff++) { | 490 | lock_page(page); |
670 | if (unlikely(idx == stage)) { | 491 | if (page->mapping == mapping) { |
671 | shmem_dir_unmap(dir); | 492 | VM_BUG_ON(PageWriteback(page)); |
672 | dir = shmem_dir_map(topdir) + | 493 | truncate_inode_page(mapping, page); |
673 | ENTRIES_PER_PAGE/2 + idx/ENTRIES_PER_PAGEPAGE; | ||
674 | while (!*dir) { | ||
675 | dir++; | ||
676 | idx += ENTRIES_PER_PAGEPAGE; | ||
677 | if (idx >= limit) | ||
678 | goto done1; | ||
679 | } | 494 | } |
680 | stage = idx + ENTRIES_PER_PAGEPAGE; | 495 | unlock_page(page); |
681 | middir = *dir; | ||
682 | if (punch_hole) | ||
683 | needs_lock = &info->lock; | ||
684 | if (upper_limit >= stage) { | ||
685 | if (needs_lock) { | ||
686 | spin_lock(needs_lock); | ||
687 | *dir = NULL; | ||
688 | spin_unlock(needs_lock); | ||
689 | needs_lock = NULL; | ||
690 | } else | ||
691 | *dir = NULL; | ||
692 | nr_pages_to_free++; | ||
693 | list_add(&middir->lru, &pages_to_free); | ||
694 | } | ||
695 | shmem_dir_unmap(dir); | ||
696 | cond_resched(); | ||
697 | dir = shmem_dir_map(middir); | ||
698 | diroff = 0; | ||
699 | } | ||
700 | punch_lock = needs_lock; | ||
701 | subdir = dir[diroff]; | ||
702 | if (subdir && !offset && upper_limit-idx >= ENTRIES_PER_PAGE) { | ||
703 | if (needs_lock) { | ||
704 | spin_lock(needs_lock); | ||
705 | dir[diroff] = NULL; | ||
706 | spin_unlock(needs_lock); | ||
707 | punch_lock = NULL; | ||
708 | } else | ||
709 | dir[diroff] = NULL; | ||
710 | nr_pages_to_free++; | ||
711 | list_add(&subdir->lru, &pages_to_free); | ||
712 | } | ||
713 | if (subdir && page_private(subdir) /* has swap entries */) { | ||
714 | size = limit - idx; | ||
715 | if (size > ENTRIES_PER_PAGE) | ||
716 | size = ENTRIES_PER_PAGE; | ||
717 | freed = shmem_map_and_free_swp(subdir, | ||
718 | offset, size, &dir, punch_lock); | ||
719 | if (!dir) | ||
720 | dir = shmem_dir_map(middir); | ||
721 | nr_swaps_freed += freed; | ||
722 | if (offset || punch_lock) { | ||
723 | spin_lock(&info->lock); | ||
724 | set_page_private(subdir, | ||
725 | page_private(subdir) - freed); | ||
726 | spin_unlock(&info->lock); | ||
727 | } else | ||
728 | BUG_ON(page_private(subdir) != freed); | ||
729 | } | 496 | } |
730 | offset = 0; | 497 | shmem_pagevec_release(&pvec); |
731 | } | 498 | mem_cgroup_uncharge_end(); |
732 | done1: | 499 | index++; |
733 | shmem_dir_unmap(dir); | ||
734 | done2: | ||
735 | if (inode->i_mapping->nrpages && (info->flags & SHMEM_PAGEIN)) { | ||
736 | /* | ||
737 | * Call truncate_inode_pages again: racing shmem_unuse_inode | ||
738 | * may have swizzled a page in from swap since | ||
739 | * truncate_pagecache or generic_delete_inode did it, before we | ||
740 | * lowered next_index. Also, though shmem_getpage checks | ||
741 | * i_size before adding to cache, no recheck after: so fix the | ||
742 | * narrow window there too. | ||
743 | */ | ||
744 | truncate_inode_pages_range(inode->i_mapping, start, end); | ||
745 | } | 500 | } |
746 | 501 | ||
747 | spin_lock(&info->lock); | 502 | spin_lock(&info->lock); |
748 | info->flags &= ~SHMEM_TRUNCATE; | ||
749 | info->swapped -= nr_swaps_freed; | 503 | info->swapped -= nr_swaps_freed; |
750 | if (nr_pages_to_free) | ||
751 | shmem_free_blocks(inode, nr_pages_to_free); | ||
752 | shmem_recalc_inode(inode); | 504 | shmem_recalc_inode(inode); |
753 | spin_unlock(&info->lock); | 505 | spin_unlock(&info->lock); |
754 | 506 | ||
755 | /* | 507 | inode->i_ctime = inode->i_mtime = CURRENT_TIME; |
756 | * Empty swap vector directory pages to be freed? | ||
757 | */ | ||
758 | if (!list_empty(&pages_to_free)) { | ||
759 | pages_to_free.prev->next = NULL; | ||
760 | shmem_free_pages(pages_to_free.next); | ||
761 | } | ||
762 | } | 508 | } |
763 | EXPORT_SYMBOL_GPL(shmem_truncate_range); | 509 | EXPORT_SYMBOL_GPL(shmem_truncate_range); |
764 | 510 | ||
@@ -774,37 +520,7 @@ static int shmem_setattr(struct dentry *dentry, struct iattr *attr) | |||
774 | if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) { | 520 | if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) { |
775 | loff_t oldsize = inode->i_size; | 521 | loff_t oldsize = inode->i_size; |
776 | loff_t newsize = attr->ia_size; | 522 | loff_t newsize = attr->ia_size; |
777 | struct page *page = NULL; | ||
778 | 523 | ||
779 | if (newsize < oldsize) { | ||
780 | /* | ||
781 | * If truncating down to a partial page, then | ||
782 | * if that page is already allocated, hold it | ||
783 | * in memory until the truncation is over, so | ||
784 | * truncate_partial_page cannot miss it were | ||
785 | * it assigned to swap. | ||
786 | */ | ||
787 | if (newsize & (PAGE_CACHE_SIZE-1)) { | ||
788 | (void) shmem_getpage(inode, | ||
789 | newsize >> PAGE_CACHE_SHIFT, | ||
790 | &page, SGP_READ, NULL); | ||
791 | if (page) | ||
792 | unlock_page(page); | ||
793 | } | ||
794 | /* | ||
795 | * Reset SHMEM_PAGEIN flag so that shmem_truncate can | ||
796 | * detect if any pages might have been added to cache | ||
797 | * after truncate_inode_pages. But we needn't bother | ||
798 | * if it's being fully truncated to zero-length: the | ||
799 | * nrpages check is efficient enough in that case. | ||
800 | */ | ||
801 | if (newsize) { | ||
802 | struct shmem_inode_info *info = SHMEM_I(inode); | ||
803 | spin_lock(&info->lock); | ||
804 | info->flags &= ~SHMEM_PAGEIN; | ||
805 | spin_unlock(&info->lock); | ||
806 | } | ||
807 | } | ||
808 | if (newsize != oldsize) { | 524 | if (newsize != oldsize) { |
809 | i_size_write(inode, newsize); | 525 | i_size_write(inode, newsize); |
810 | inode->i_ctime = inode->i_mtime = CURRENT_TIME; | 526 | inode->i_ctime = inode->i_mtime = CURRENT_TIME; |
@@ -816,8 +532,6 @@ static int shmem_setattr(struct dentry *dentry, struct iattr *attr) | |||
816 | /* unmap again to remove racily COWed private pages */ | 532 | /* unmap again to remove racily COWed private pages */ |
817 | unmap_mapping_range(inode->i_mapping, holebegin, 0, 1); | 533 | unmap_mapping_range(inode->i_mapping, holebegin, 0, 1); |
818 | } | 534 | } |
819 | if (page) | ||
820 | page_cache_release(page); | ||
821 | } | 535 | } |
822 | 536 | ||
823 | setattr_copy(inode, attr); | 537 | setattr_copy(inode, attr); |
@@ -842,7 +556,8 @@ static void shmem_evict_inode(struct inode *inode) | |||
842 | list_del_init(&info->swaplist); | 556 | list_del_init(&info->swaplist); |
843 | mutex_unlock(&shmem_swaplist_mutex); | 557 | mutex_unlock(&shmem_swaplist_mutex); |
844 | } | 558 | } |
845 | } | 559 | } else |
560 | kfree(info->symlink); | ||
846 | 561 | ||
847 | list_for_each_entry_safe(xattr, nxattr, &info->xattr_list, list) { | 562 | list_for_each_entry_safe(xattr, nxattr, &info->xattr_list, list) { |
848 | kfree(xattr->name); | 563 | kfree(xattr->name); |
@@ -853,106 +568,27 @@ static void shmem_evict_inode(struct inode *inode) | |||
853 | end_writeback(inode); | 568 | end_writeback(inode); |
854 | } | 569 | } |
855 | 570 | ||
856 | static inline int shmem_find_swp(swp_entry_t entry, swp_entry_t *dir, swp_entry_t *edir) | 571 | /* |
857 | { | 572 | * If swap found in inode, free it and move page from swapcache to filecache. |
858 | swp_entry_t *ptr; | 573 | */ |
859 | 574 | static int shmem_unuse_inode(struct shmem_inode_info *info, | |
860 | for (ptr = dir; ptr < edir; ptr++) { | 575 | swp_entry_t swap, struct page *page) |
861 | if (ptr->val == entry.val) | ||
862 | return ptr - dir; | ||
863 | } | ||
864 | return -1; | ||
865 | } | ||
866 | |||
867 | static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, struct page *page) | ||
868 | { | 576 | { |
869 | struct address_space *mapping; | 577 | struct address_space *mapping = info->vfs_inode.i_mapping; |
870 | unsigned long idx; | 578 | void *radswap; |
871 | unsigned long size; | 579 | pgoff_t index; |
872 | unsigned long limit; | ||
873 | unsigned long stage; | ||
874 | struct page **dir; | ||
875 | struct page *subdir; | ||
876 | swp_entry_t *ptr; | ||
877 | int offset; | ||
878 | int error; | 580 | int error; |
879 | 581 | ||
880 | idx = 0; | 582 | radswap = swp_to_radix_entry(swap); |
881 | ptr = info->i_direct; | 583 | index = radix_tree_locate_item(&mapping->page_tree, radswap); |
882 | spin_lock(&info->lock); | 584 | if (index == -1) |
883 | if (!info->swapped) { | 585 | return 0; |
884 | list_del_init(&info->swaplist); | ||
885 | goto lost2; | ||
886 | } | ||
887 | limit = info->next_index; | ||
888 | size = limit; | ||
889 | if (size > SHMEM_NR_DIRECT) | ||
890 | size = SHMEM_NR_DIRECT; | ||
891 | offset = shmem_find_swp(entry, ptr, ptr+size); | ||
892 | if (offset >= 0) { | ||
893 | shmem_swp_balance_unmap(); | ||
894 | goto found; | ||
895 | } | ||
896 | if (!info->i_indirect) | ||
897 | goto lost2; | ||
898 | |||
899 | dir = shmem_dir_map(info->i_indirect); | ||
900 | stage = SHMEM_NR_DIRECT + ENTRIES_PER_PAGEPAGE/2; | ||
901 | |||
902 | for (idx = SHMEM_NR_DIRECT; idx < limit; idx += ENTRIES_PER_PAGE, dir++) { | ||
903 | if (unlikely(idx == stage)) { | ||
904 | shmem_dir_unmap(dir-1); | ||
905 | if (cond_resched_lock(&info->lock)) { | ||
906 | /* check it has not been truncated */ | ||
907 | if (limit > info->next_index) { | ||
908 | limit = info->next_index; | ||
909 | if (idx >= limit) | ||
910 | goto lost2; | ||
911 | } | ||
912 | } | ||
913 | dir = shmem_dir_map(info->i_indirect) + | ||
914 | ENTRIES_PER_PAGE/2 + idx/ENTRIES_PER_PAGEPAGE; | ||
915 | while (!*dir) { | ||
916 | dir++; | ||
917 | idx += ENTRIES_PER_PAGEPAGE; | ||
918 | if (idx >= limit) | ||
919 | goto lost1; | ||
920 | } | ||
921 | stage = idx + ENTRIES_PER_PAGEPAGE; | ||
922 | subdir = *dir; | ||
923 | shmem_dir_unmap(dir); | ||
924 | dir = shmem_dir_map(subdir); | ||
925 | } | ||
926 | subdir = *dir; | ||
927 | if (subdir && page_private(subdir)) { | ||
928 | ptr = shmem_swp_map(subdir); | ||
929 | size = limit - idx; | ||
930 | if (size > ENTRIES_PER_PAGE) | ||
931 | size = ENTRIES_PER_PAGE; | ||
932 | offset = shmem_find_swp(entry, ptr, ptr+size); | ||
933 | shmem_swp_unmap(ptr); | ||
934 | if (offset >= 0) { | ||
935 | shmem_dir_unmap(dir); | ||
936 | ptr = shmem_swp_map(subdir); | ||
937 | goto found; | ||
938 | } | ||
939 | } | ||
940 | } | ||
941 | lost1: | ||
942 | shmem_dir_unmap(dir-1); | ||
943 | lost2: | ||
944 | spin_unlock(&info->lock); | ||
945 | return 0; | ||
946 | found: | ||
947 | idx += offset; | ||
948 | ptr += offset; | ||
949 | 586 | ||
950 | /* | 587 | /* |
951 | * Move _head_ to start search for next from here. | 588 | * Move _head_ to start search for next from here. |
952 | * But be careful: shmem_evict_inode checks list_empty without taking | 589 | * But be careful: shmem_evict_inode checks list_empty without taking |
953 | * mutex, and there's an instant in list_move_tail when info->swaplist | 590 | * mutex, and there's an instant in list_move_tail when info->swaplist |
954 | * would appear empty, if it were the only one on shmem_swaplist. We | 591 | * would appear empty, if it were the only one on shmem_swaplist. |
955 | * could avoid doing it if inode NULL; or use this minor optimization. | ||
956 | */ | 592 | */ |
957 | if (shmem_swaplist.next != &info->swaplist) | 593 | if (shmem_swaplist.next != &info->swaplist) |
958 | list_move_tail(&shmem_swaplist, &info->swaplist); | 594 | list_move_tail(&shmem_swaplist, &info->swaplist); |
@@ -962,42 +598,34 @@ found: | |||
962 | * but also to hold up shmem_evict_inode(): so inode cannot be freed | 598 | * but also to hold up shmem_evict_inode(): so inode cannot be freed |
963 | * beneath us (pagelock doesn't help until the page is in pagecache). | 599 | * beneath us (pagelock doesn't help until the page is in pagecache). |
964 | */ | 600 | */ |
965 | mapping = info->vfs_inode.i_mapping; | 601 | error = shmem_add_to_page_cache(page, mapping, index, |
966 | error = add_to_page_cache_locked(page, mapping, idx, GFP_NOWAIT); | 602 | GFP_NOWAIT, radswap); |
967 | /* which does mem_cgroup_uncharge_cache_page on error */ | 603 | /* which does mem_cgroup_uncharge_cache_page on error */ |
968 | 604 | ||
969 | if (error == -EEXIST) { | 605 | if (error != -ENOMEM) { |
970 | struct page *filepage = find_get_page(mapping, idx); | 606 | /* |
971 | error = 1; | 607 | * Truncation and eviction use free_swap_and_cache(), which |
972 | if (filepage) { | 608 | * only does trylock page: if we raced, best clean up here. |
973 | /* | 609 | */ |
974 | * There might be a more uptodate page coming down | ||
975 | * from a stacked writepage: forget our swappage if so. | ||
976 | */ | ||
977 | if (PageUptodate(filepage)) | ||
978 | error = 0; | ||
979 | page_cache_release(filepage); | ||
980 | } | ||
981 | } | ||
982 | if (!error) { | ||
983 | delete_from_swap_cache(page); | 610 | delete_from_swap_cache(page); |
984 | set_page_dirty(page); | 611 | set_page_dirty(page); |
985 | info->flags |= SHMEM_PAGEIN; | 612 | if (!error) { |
986 | shmem_swp_set(info, ptr, 0); | 613 | spin_lock(&info->lock); |
987 | swap_free(entry); | 614 | info->swapped--; |
615 | spin_unlock(&info->lock); | ||
616 | swap_free(swap); | ||
617 | } | ||
988 | error = 1; /* not an error, but entry was found */ | 618 | error = 1; /* not an error, but entry was found */ |
989 | } | 619 | } |
990 | shmem_swp_unmap(ptr); | ||
991 | spin_unlock(&info->lock); | ||
992 | return error; | 620 | return error; |
993 | } | 621 | } |
994 | 622 | ||
995 | /* | 623 | /* |
996 | * shmem_unuse() search for an eventually swapped out shmem page. | 624 | * Search through swapped inodes to find and replace swap by page. |
997 | */ | 625 | */ |
998 | int shmem_unuse(swp_entry_t entry, struct page *page) | 626 | int shmem_unuse(swp_entry_t swap, struct page *page) |
999 | { | 627 | { |
1000 | struct list_head *p, *next; | 628 | struct list_head *this, *next; |
1001 | struct shmem_inode_info *info; | 629 | struct shmem_inode_info *info; |
1002 | int found = 0; | 630 | int found = 0; |
1003 | int error; | 631 | int error; |
@@ -1006,32 +634,25 @@ int shmem_unuse(swp_entry_t entry, struct page *page) | |||
1006 | * Charge page using GFP_KERNEL while we can wait, before taking | 634 | * Charge page using GFP_KERNEL while we can wait, before taking |
1007 | * the shmem_swaplist_mutex which might hold up shmem_writepage(). | 635 | * the shmem_swaplist_mutex which might hold up shmem_writepage(). |
1008 | * Charged back to the user (not to caller) when swap account is used. | 636 | * Charged back to the user (not to caller) when swap account is used. |
1009 | * add_to_page_cache() will be called with GFP_NOWAIT. | ||
1010 | */ | 637 | */ |
1011 | error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL); | 638 | error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL); |
1012 | if (error) | 639 | if (error) |
1013 | goto out; | 640 | goto out; |
1014 | /* | 641 | /* No radix_tree_preload: swap entry keeps a place for page in tree */ |
1015 | * Try to preload while we can wait, to not make a habit of | ||
1016 | * draining atomic reserves; but don't latch on to this cpu, | ||
1017 | * it's okay if sometimes we get rescheduled after this. | ||
1018 | */ | ||
1019 | error = radix_tree_preload(GFP_KERNEL); | ||
1020 | if (error) | ||
1021 | goto uncharge; | ||
1022 | radix_tree_preload_end(); | ||
1023 | 642 | ||
1024 | mutex_lock(&shmem_swaplist_mutex); | 643 | mutex_lock(&shmem_swaplist_mutex); |
1025 | list_for_each_safe(p, next, &shmem_swaplist) { | 644 | list_for_each_safe(this, next, &shmem_swaplist) { |
1026 | info = list_entry(p, struct shmem_inode_info, swaplist); | 645 | info = list_entry(this, struct shmem_inode_info, swaplist); |
1027 | found = shmem_unuse_inode(info, entry, page); | 646 | if (info->swapped) |
647 | found = shmem_unuse_inode(info, swap, page); | ||
648 | else | ||
649 | list_del_init(&info->swaplist); | ||
1028 | cond_resched(); | 650 | cond_resched(); |
1029 | if (found) | 651 | if (found) |
1030 | break; | 652 | break; |
1031 | } | 653 | } |
1032 | mutex_unlock(&shmem_swaplist_mutex); | 654 | mutex_unlock(&shmem_swaplist_mutex); |
1033 | 655 | ||
1034 | uncharge: | ||
1035 | if (!found) | 656 | if (!found) |
1036 | mem_cgroup_uncharge_cache_page(page); | 657 | mem_cgroup_uncharge_cache_page(page); |
1037 | if (found < 0) | 658 | if (found < 0) |
@@ -1048,10 +669,10 @@ out: | |||
1048 | static int shmem_writepage(struct page *page, struct writeback_control *wbc) | 669 | static int shmem_writepage(struct page *page, struct writeback_control *wbc) |
1049 | { | 670 | { |
1050 | struct shmem_inode_info *info; | 671 | struct shmem_inode_info *info; |
1051 | swp_entry_t *entry, swap; | ||
1052 | struct address_space *mapping; | 672 | struct address_space *mapping; |
1053 | unsigned long index; | ||
1054 | struct inode *inode; | 673 | struct inode *inode; |
674 | swp_entry_t swap; | ||
675 | pgoff_t index; | ||
1055 | 676 | ||
1056 | BUG_ON(!PageLocked(page)); | 677 | BUG_ON(!PageLocked(page)); |
1057 | mapping = page->mapping; | 678 | mapping = page->mapping; |
@@ -1066,69 +687,46 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc) | |||
1066 | /* | 687 | /* |
1067 | * shmem_backing_dev_info's capabilities prevent regular writeback or | 688 | * shmem_backing_dev_info's capabilities prevent regular writeback or |
1068 | * sync from ever calling shmem_writepage; but a stacking filesystem | 689 | * sync from ever calling shmem_writepage; but a stacking filesystem |
1069 | * may use the ->writepage of its underlying filesystem, in which case | 690 | * might use ->writepage of its underlying filesystem, in which case |
1070 | * tmpfs should write out to swap only in response to memory pressure, | 691 | * tmpfs should write out to swap only in response to memory pressure, |
1071 | * and not for the writeback threads or sync. However, in those cases, | 692 | * and not for the writeback threads or sync. |
1072 | * we do still want to check if there's a redundant swappage to be | ||
1073 | * discarded. | ||
1074 | */ | 693 | */ |
1075 | if (wbc->for_reclaim) | 694 | if (!wbc->for_reclaim) { |
1076 | swap = get_swap_page(); | 695 | WARN_ON_ONCE(1); /* Still happens? Tell us about it! */ |
1077 | else | 696 | goto redirty; |
1078 | swap.val = 0; | 697 | } |
698 | swap = get_swap_page(); | ||
699 | if (!swap.val) | ||
700 | goto redirty; | ||
1079 | 701 | ||
1080 | /* | 702 | /* |
1081 | * Add inode to shmem_unuse()'s list of swapped-out inodes, | 703 | * Add inode to shmem_unuse()'s list of swapped-out inodes, |
1082 | * if it's not already there. Do it now because we cannot take | 704 | * if it's not already there. Do it now before the page is |
1083 | * mutex while holding spinlock, and must do so before the page | 705 | * moved to swap cache, when its pagelock no longer protects |
1084 | * is moved to swap cache, when its pagelock no longer protects | ||
1085 | * the inode from eviction. But don't unlock the mutex until | 706 | * the inode from eviction. But don't unlock the mutex until |
1086 | * we've taken the spinlock, because shmem_unuse_inode() will | 707 | * we've incremented swapped, because shmem_unuse_inode() will |
1087 | * prune a !swapped inode from the swaplist under both locks. | 708 | * prune a !swapped inode from the swaplist under this mutex. |
1088 | */ | 709 | */ |
1089 | if (swap.val) { | 710 | mutex_lock(&shmem_swaplist_mutex); |
1090 | mutex_lock(&shmem_swaplist_mutex); | 711 | if (list_empty(&info->swaplist)) |
1091 | if (list_empty(&info->swaplist)) | 712 | list_add_tail(&info->swaplist, &shmem_swaplist); |
1092 | list_add_tail(&info->swaplist, &shmem_swaplist); | ||
1093 | } | ||
1094 | |||
1095 | spin_lock(&info->lock); | ||
1096 | if (swap.val) | ||
1097 | mutex_unlock(&shmem_swaplist_mutex); | ||
1098 | |||
1099 | if (index >= info->next_index) { | ||
1100 | BUG_ON(!(info->flags & SHMEM_TRUNCATE)); | ||
1101 | goto unlock; | ||
1102 | } | ||
1103 | entry = shmem_swp_entry(info, index, NULL); | ||
1104 | if (entry->val) { | ||
1105 | /* | ||
1106 | * The more uptodate page coming down from a stacked | ||
1107 | * writepage should replace our old swappage. | ||
1108 | */ | ||
1109 | free_swap_and_cache(*entry); | ||
1110 | shmem_swp_set(info, entry, 0); | ||
1111 | } | ||
1112 | shmem_recalc_inode(inode); | ||
1113 | 713 | ||
1114 | if (swap.val && add_to_swap_cache(page, swap, GFP_ATOMIC) == 0) { | 714 | if (add_to_swap_cache(page, swap, GFP_ATOMIC) == 0) { |
1115 | delete_from_page_cache(page); | ||
1116 | shmem_swp_set(info, entry, swap.val); | ||
1117 | shmem_swp_unmap(entry); | ||
1118 | swap_shmem_alloc(swap); | 715 | swap_shmem_alloc(swap); |
716 | shmem_delete_from_page_cache(page, swp_to_radix_entry(swap)); | ||
717 | |||
718 | spin_lock(&info->lock); | ||
719 | info->swapped++; | ||
720 | shmem_recalc_inode(inode); | ||
1119 | spin_unlock(&info->lock); | 721 | spin_unlock(&info->lock); |
722 | |||
723 | mutex_unlock(&shmem_swaplist_mutex); | ||
1120 | BUG_ON(page_mapped(page)); | 724 | BUG_ON(page_mapped(page)); |
1121 | swap_writepage(page, wbc); | 725 | swap_writepage(page, wbc); |
1122 | return 0; | 726 | return 0; |
1123 | } | 727 | } |
1124 | 728 | ||
1125 | shmem_swp_unmap(entry); | 729 | mutex_unlock(&shmem_swaplist_mutex); |
1126 | unlock: | ||
1127 | spin_unlock(&info->lock); | ||
1128 | /* | ||
1129 | * add_to_swap_cache() doesn't return -EEXIST, so we can safely | ||
1130 | * clear SWAP_HAS_CACHE flag. | ||
1131 | */ | ||
1132 | swapcache_free(swap, NULL); | 730 | swapcache_free(swap, NULL); |
1133 | redirty: | 731 | redirty: |
1134 | set_page_dirty(page); | 732 | set_page_dirty(page); |
@@ -1165,35 +763,33 @@ static struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo) | |||
1165 | } | 763 | } |
1166 | #endif /* CONFIG_TMPFS */ | 764 | #endif /* CONFIG_TMPFS */ |
1167 | 765 | ||
1168 | static struct page *shmem_swapin(swp_entry_t entry, gfp_t gfp, | 766 | static struct page *shmem_swapin(swp_entry_t swap, gfp_t gfp, |
1169 | struct shmem_inode_info *info, unsigned long idx) | 767 | struct shmem_inode_info *info, pgoff_t index) |
1170 | { | 768 | { |
1171 | struct mempolicy mpol, *spol; | 769 | struct mempolicy mpol, *spol; |
1172 | struct vm_area_struct pvma; | 770 | struct vm_area_struct pvma; |
1173 | struct page *page; | ||
1174 | 771 | ||
1175 | spol = mpol_cond_copy(&mpol, | 772 | spol = mpol_cond_copy(&mpol, |
1176 | mpol_shared_policy_lookup(&info->policy, idx)); | 773 | mpol_shared_policy_lookup(&info->policy, index)); |
1177 | 774 | ||
1178 | /* Create a pseudo vma that just contains the policy */ | 775 | /* Create a pseudo vma that just contains the policy */ |
1179 | pvma.vm_start = 0; | 776 | pvma.vm_start = 0; |
1180 | pvma.vm_pgoff = idx; | 777 | pvma.vm_pgoff = index; |
1181 | pvma.vm_ops = NULL; | 778 | pvma.vm_ops = NULL; |
1182 | pvma.vm_policy = spol; | 779 | pvma.vm_policy = spol; |
1183 | page = swapin_readahead(entry, gfp, &pvma, 0); | 780 | return swapin_readahead(swap, gfp, &pvma, 0); |
1184 | return page; | ||
1185 | } | 781 | } |
1186 | 782 | ||
1187 | static struct page *shmem_alloc_page(gfp_t gfp, | 783 | static struct page *shmem_alloc_page(gfp_t gfp, |
1188 | struct shmem_inode_info *info, unsigned long idx) | 784 | struct shmem_inode_info *info, pgoff_t index) |
1189 | { | 785 | { |
1190 | struct vm_area_struct pvma; | 786 | struct vm_area_struct pvma; |
1191 | 787 | ||
1192 | /* Create a pseudo vma that just contains the policy */ | 788 | /* Create a pseudo vma that just contains the policy */ |
1193 | pvma.vm_start = 0; | 789 | pvma.vm_start = 0; |
1194 | pvma.vm_pgoff = idx; | 790 | pvma.vm_pgoff = index; |
1195 | pvma.vm_ops = NULL; | 791 | pvma.vm_ops = NULL; |
1196 | pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, idx); | 792 | pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, index); |
1197 | 793 | ||
1198 | /* | 794 | /* |
1199 | * alloc_page_vma() will drop the shared policy reference | 795 | * alloc_page_vma() will drop the shared policy reference |
@@ -1202,19 +798,19 @@ static struct page *shmem_alloc_page(gfp_t gfp, | |||
1202 | } | 798 | } |
1203 | #else /* !CONFIG_NUMA */ | 799 | #else /* !CONFIG_NUMA */ |
1204 | #ifdef CONFIG_TMPFS | 800 | #ifdef CONFIG_TMPFS |
1205 | static inline void shmem_show_mpol(struct seq_file *seq, struct mempolicy *p) | 801 | static inline void shmem_show_mpol(struct seq_file *seq, struct mempolicy *mpol) |
1206 | { | 802 | { |
1207 | } | 803 | } |
1208 | #endif /* CONFIG_TMPFS */ | 804 | #endif /* CONFIG_TMPFS */ |
1209 | 805 | ||
1210 | static inline struct page *shmem_swapin(swp_entry_t entry, gfp_t gfp, | 806 | static inline struct page *shmem_swapin(swp_entry_t swap, gfp_t gfp, |
1211 | struct shmem_inode_info *info, unsigned long idx) | 807 | struct shmem_inode_info *info, pgoff_t index) |
1212 | { | 808 | { |
1213 | return swapin_readahead(entry, gfp, NULL, 0); | 809 | return swapin_readahead(swap, gfp, NULL, 0); |
1214 | } | 810 | } |
1215 | 811 | ||
1216 | static inline struct page *shmem_alloc_page(gfp_t gfp, | 812 | static inline struct page *shmem_alloc_page(gfp_t gfp, |
1217 | struct shmem_inode_info *info, unsigned long idx) | 813 | struct shmem_inode_info *info, pgoff_t index) |
1218 | { | 814 | { |
1219 | return alloc_page(gfp); | 815 | return alloc_page(gfp); |
1220 | } | 816 | } |
@@ -1228,311 +824,195 @@ static inline struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo) | |||
1228 | #endif | 824 | #endif |
1229 | 825 | ||
1230 | /* | 826 | /* |
1231 | * shmem_getpage - either get the page from swap or allocate a new one | 827 | * shmem_getpage_gfp - find page in cache, or get from swap, or allocate |
1232 | * | 828 | * |
1233 | * If we allocate a new one we do not mark it dirty. That's up to the | 829 | * If we allocate a new one we do not mark it dirty. That's up to the |
1234 | * vm. If we swap it in we mark it dirty since we also free the swap | 830 | * vm. If we swap it in we mark it dirty since we also free the swap |
1235 | * entry since a page cannot live in both the swap and page cache | 831 | * entry since a page cannot live in both the swap and page cache |
1236 | */ | 832 | */ |
1237 | static int shmem_getpage(struct inode *inode, unsigned long idx, | 833 | static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, |
1238 | struct page **pagep, enum sgp_type sgp, int *type) | 834 | struct page **pagep, enum sgp_type sgp, gfp_t gfp, int *fault_type) |
1239 | { | 835 | { |
1240 | struct address_space *mapping = inode->i_mapping; | 836 | struct address_space *mapping = inode->i_mapping; |
1241 | struct shmem_inode_info *info = SHMEM_I(inode); | 837 | struct shmem_inode_info *info; |
1242 | struct shmem_sb_info *sbinfo; | 838 | struct shmem_sb_info *sbinfo; |
1243 | struct page *filepage = *pagep; | 839 | struct page *page; |
1244 | struct page *swappage; | ||
1245 | struct page *prealloc_page = NULL; | ||
1246 | swp_entry_t *entry; | ||
1247 | swp_entry_t swap; | 840 | swp_entry_t swap; |
1248 | gfp_t gfp; | ||
1249 | int error; | 841 | int error; |
842 | int once = 0; | ||
1250 | 843 | ||
1251 | if (idx >= SHMEM_MAX_INDEX) | 844 | if (index > (MAX_LFS_FILESIZE >> PAGE_CACHE_SHIFT)) |
1252 | return -EFBIG; | 845 | return -EFBIG; |
846 | repeat: | ||
847 | swap.val = 0; | ||
848 | page = find_lock_page(mapping, index); | ||
849 | if (radix_tree_exceptional_entry(page)) { | ||
850 | swap = radix_to_swp_entry(page); | ||
851 | page = NULL; | ||
852 | } | ||
1253 | 853 | ||
1254 | if (type) | 854 | if (sgp != SGP_WRITE && |
1255 | *type = 0; | 855 | ((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) { |
856 | error = -EINVAL; | ||
857 | goto failed; | ||
858 | } | ||
1256 | 859 | ||
1257 | /* | 860 | if (page || (sgp == SGP_READ && !swap.val)) { |
1258 | * Normally, filepage is NULL on entry, and either found | ||
1259 | * uptodate immediately, or allocated and zeroed, or read | ||
1260 | * in under swappage, which is then assigned to filepage. | ||
1261 | * But shmem_readpage (required for splice) passes in a locked | ||
1262 | * filepage, which may be found not uptodate by other callers | ||
1263 | * too, and may need to be copied from the swappage read in. | ||
1264 | */ | ||
1265 | repeat: | ||
1266 | if (!filepage) | ||
1267 | filepage = find_lock_page(mapping, idx); | ||
1268 | if (filepage && PageUptodate(filepage)) | ||
1269 | goto done; | ||
1270 | gfp = mapping_gfp_mask(mapping); | ||
1271 | if (!filepage) { | ||
1272 | /* | 861 | /* |
1273 | * Try to preload while we can wait, to not make a habit of | 862 | * Once we can get the page lock, it must be uptodate: |
1274 | * draining atomic reserves; but don't latch on to this cpu. | 863 | * if there were an error in reading back from swap, |
864 | * the page would not be inserted into the filecache. | ||
1275 | */ | 865 | */ |
1276 | error = radix_tree_preload(gfp & ~__GFP_HIGHMEM); | 866 | BUG_ON(page && !PageUptodate(page)); |
1277 | if (error) | 867 | *pagep = page; |
1278 | goto failed; | 868 | return 0; |
1279 | radix_tree_preload_end(); | ||
1280 | if (sgp != SGP_READ && !prealloc_page) { | ||
1281 | /* We don't care if this fails */ | ||
1282 | prealloc_page = shmem_alloc_page(gfp, info, idx); | ||
1283 | if (prealloc_page) { | ||
1284 | if (mem_cgroup_cache_charge(prealloc_page, | ||
1285 | current->mm, GFP_KERNEL)) { | ||
1286 | page_cache_release(prealloc_page); | ||
1287 | prealloc_page = NULL; | ||
1288 | } | ||
1289 | } | ||
1290 | } | ||
1291 | } | 869 | } |
1292 | error = 0; | ||
1293 | 870 | ||
1294 | spin_lock(&info->lock); | 871 | /* |
1295 | shmem_recalc_inode(inode); | 872 | * Fast cache lookup did not find it: |
1296 | entry = shmem_swp_alloc(info, idx, sgp); | 873 | * bring it back from swap or allocate. |
1297 | if (IS_ERR(entry)) { | 874 | */ |
1298 | spin_unlock(&info->lock); | 875 | info = SHMEM_I(inode); |
1299 | error = PTR_ERR(entry); | 876 | sbinfo = SHMEM_SB(inode->i_sb); |
1300 | goto failed; | ||
1301 | } | ||
1302 | swap = *entry; | ||
1303 | 877 | ||
1304 | if (swap.val) { | 878 | if (swap.val) { |
1305 | /* Look it up and read it in.. */ | 879 | /* Look it up and read it in.. */ |
1306 | swappage = lookup_swap_cache(swap); | 880 | page = lookup_swap_cache(swap); |
1307 | if (!swappage) { | 881 | if (!page) { |
1308 | shmem_swp_unmap(entry); | ||
1309 | spin_unlock(&info->lock); | ||
1310 | /* here we actually do the io */ | 882 | /* here we actually do the io */ |
1311 | if (type) | 883 | if (fault_type) |
1312 | *type |= VM_FAULT_MAJOR; | 884 | *fault_type |= VM_FAULT_MAJOR; |
1313 | swappage = shmem_swapin(swap, gfp, info, idx); | 885 | page = shmem_swapin(swap, gfp, info, index); |
1314 | if (!swappage) { | 886 | if (!page) { |
1315 | spin_lock(&info->lock); | 887 | error = -ENOMEM; |
1316 | entry = shmem_swp_alloc(info, idx, sgp); | 888 | goto failed; |
1317 | if (IS_ERR(entry)) | ||
1318 | error = PTR_ERR(entry); | ||
1319 | else { | ||
1320 | if (entry->val == swap.val) | ||
1321 | error = -ENOMEM; | ||
1322 | shmem_swp_unmap(entry); | ||
1323 | } | ||
1324 | spin_unlock(&info->lock); | ||
1325 | if (error) | ||
1326 | goto failed; | ||
1327 | goto repeat; | ||
1328 | } | 889 | } |
1329 | wait_on_page_locked(swappage); | ||
1330 | page_cache_release(swappage); | ||
1331 | goto repeat; | ||
1332 | } | 890 | } |
1333 | 891 | ||
1334 | /* We have to do this with page locked to prevent races */ | 892 | /* We have to do this with page locked to prevent races */ |
1335 | if (!trylock_page(swappage)) { | 893 | lock_page(page); |
1336 | shmem_swp_unmap(entry); | 894 | if (!PageUptodate(page)) { |
1337 | spin_unlock(&info->lock); | ||
1338 | wait_on_page_locked(swappage); | ||
1339 | page_cache_release(swappage); | ||
1340 | goto repeat; | ||
1341 | } | ||
1342 | if (PageWriteback(swappage)) { | ||
1343 | shmem_swp_unmap(entry); | ||
1344 | spin_unlock(&info->lock); | ||
1345 | wait_on_page_writeback(swappage); | ||
1346 | unlock_page(swappage); | ||
1347 | page_cache_release(swappage); | ||
1348 | goto repeat; | ||
1349 | } | ||
1350 | if (!PageUptodate(swappage)) { | ||
1351 | shmem_swp_unmap(entry); | ||
1352 | spin_unlock(&info->lock); | ||
1353 | unlock_page(swappage); | ||
1354 | page_cache_release(swappage); | ||
1355 | error = -EIO; | 895 | error = -EIO; |
1356 | goto failed; | 896 | goto failed; |
1357 | } | 897 | } |
1358 | 898 | wait_on_page_writeback(page); | |
1359 | if (filepage) { | 899 | |
1360 | shmem_swp_set(info, entry, 0); | 900 | /* Someone may have already done it for us */ |
1361 | shmem_swp_unmap(entry); | 901 | if (page->mapping) { |
1362 | delete_from_swap_cache(swappage); | 902 | if (page->mapping == mapping && |
1363 | spin_unlock(&info->lock); | 903 | page->index == index) |
1364 | copy_highpage(filepage, swappage); | 904 | goto done; |
1365 | unlock_page(swappage); | 905 | error = -EEXIST; |
1366 | page_cache_release(swappage); | 906 | goto failed; |
1367 | flush_dcache_page(filepage); | ||
1368 | SetPageUptodate(filepage); | ||
1369 | set_page_dirty(filepage); | ||
1370 | swap_free(swap); | ||
1371 | } else if (!(error = add_to_page_cache_locked(swappage, mapping, | ||
1372 | idx, GFP_NOWAIT))) { | ||
1373 | info->flags |= SHMEM_PAGEIN; | ||
1374 | shmem_swp_set(info, entry, 0); | ||
1375 | shmem_swp_unmap(entry); | ||
1376 | delete_from_swap_cache(swappage); | ||
1377 | spin_unlock(&info->lock); | ||
1378 | filepage = swappage; | ||
1379 | set_page_dirty(filepage); | ||
1380 | swap_free(swap); | ||
1381 | } else { | ||
1382 | shmem_swp_unmap(entry); | ||
1383 | spin_unlock(&info->lock); | ||
1384 | if (error == -ENOMEM) { | ||
1385 | /* | ||
1386 | * reclaim from proper memory cgroup and | ||
1387 | * call memcg's OOM if needed. | ||
1388 | */ | ||
1389 | error = mem_cgroup_shmem_charge_fallback( | ||
1390 | swappage, | ||
1391 | current->mm, | ||
1392 | gfp); | ||
1393 | if (error) { | ||
1394 | unlock_page(swappage); | ||
1395 | page_cache_release(swappage); | ||
1396 | goto failed; | ||
1397 | } | ||
1398 | } | ||
1399 | unlock_page(swappage); | ||
1400 | page_cache_release(swappage); | ||
1401 | goto repeat; | ||
1402 | } | ||
1403 | } else if (sgp == SGP_READ && !filepage) { | ||
1404 | shmem_swp_unmap(entry); | ||
1405 | filepage = find_get_page(mapping, idx); | ||
1406 | if (filepage && | ||
1407 | (!PageUptodate(filepage) || !trylock_page(filepage))) { | ||
1408 | spin_unlock(&info->lock); | ||
1409 | wait_on_page_locked(filepage); | ||
1410 | page_cache_release(filepage); | ||
1411 | filepage = NULL; | ||
1412 | goto repeat; | ||
1413 | } | 907 | } |
908 | |||
909 | error = mem_cgroup_cache_charge(page, current->mm, | ||
910 | gfp & GFP_RECLAIM_MASK); | ||
911 | if (!error) | ||
912 | error = shmem_add_to_page_cache(page, mapping, index, | ||
913 | gfp, swp_to_radix_entry(swap)); | ||
914 | if (error) | ||
915 | goto failed; | ||
916 | |||
917 | spin_lock(&info->lock); | ||
918 | info->swapped--; | ||
919 | shmem_recalc_inode(inode); | ||
1414 | spin_unlock(&info->lock); | 920 | spin_unlock(&info->lock); |
921 | |||
922 | delete_from_swap_cache(page); | ||
923 | set_page_dirty(page); | ||
924 | swap_free(swap); | ||
925 | |||
1415 | } else { | 926 | } else { |
1416 | shmem_swp_unmap(entry); | 927 | if (shmem_acct_block(info->flags)) { |
1417 | sbinfo = SHMEM_SB(inode->i_sb); | 928 | error = -ENOSPC; |
929 | goto failed; | ||
930 | } | ||
1418 | if (sbinfo->max_blocks) { | 931 | if (sbinfo->max_blocks) { |
1419 | if (percpu_counter_compare(&sbinfo->used_blocks, | 932 | if (percpu_counter_compare(&sbinfo->used_blocks, |
1420 | sbinfo->max_blocks) >= 0 || | 933 | sbinfo->max_blocks) >= 0) { |
1421 | shmem_acct_block(info->flags)) | 934 | error = -ENOSPC; |
1422 | goto nospace; | 935 | goto unacct; |
1423 | percpu_counter_inc(&sbinfo->used_blocks); | ||
1424 | spin_lock(&inode->i_lock); | ||
1425 | inode->i_blocks += BLOCKS_PER_PAGE; | ||
1426 | spin_unlock(&inode->i_lock); | ||
1427 | } else if (shmem_acct_block(info->flags)) | ||
1428 | goto nospace; | ||
1429 | |||
1430 | if (!filepage) { | ||
1431 | int ret; | ||
1432 | |||
1433 | if (!prealloc_page) { | ||
1434 | spin_unlock(&info->lock); | ||
1435 | filepage = shmem_alloc_page(gfp, info, idx); | ||
1436 | if (!filepage) { | ||
1437 | shmem_unacct_blocks(info->flags, 1); | ||
1438 | shmem_free_blocks(inode, 1); | ||
1439 | error = -ENOMEM; | ||
1440 | goto failed; | ||
1441 | } | ||
1442 | SetPageSwapBacked(filepage); | ||
1443 | |||
1444 | /* | ||
1445 | * Precharge page while we can wait, compensate | ||
1446 | * after | ||
1447 | */ | ||
1448 | error = mem_cgroup_cache_charge(filepage, | ||
1449 | current->mm, GFP_KERNEL); | ||
1450 | if (error) { | ||
1451 | page_cache_release(filepage); | ||
1452 | shmem_unacct_blocks(info->flags, 1); | ||
1453 | shmem_free_blocks(inode, 1); | ||
1454 | filepage = NULL; | ||
1455 | goto failed; | ||
1456 | } | ||
1457 | |||
1458 | spin_lock(&info->lock); | ||
1459 | } else { | ||
1460 | filepage = prealloc_page; | ||
1461 | prealloc_page = NULL; | ||
1462 | SetPageSwapBacked(filepage); | ||
1463 | } | 936 | } |
937 | percpu_counter_inc(&sbinfo->used_blocks); | ||
938 | } | ||
1464 | 939 | ||
1465 | entry = shmem_swp_alloc(info, idx, sgp); | 940 | page = shmem_alloc_page(gfp, info, index); |
1466 | if (IS_ERR(entry)) | 941 | if (!page) { |
1467 | error = PTR_ERR(entry); | 942 | error = -ENOMEM; |
1468 | else { | 943 | goto decused; |
1469 | swap = *entry; | ||
1470 | shmem_swp_unmap(entry); | ||
1471 | } | ||
1472 | ret = error || swap.val; | ||
1473 | if (ret) | ||
1474 | mem_cgroup_uncharge_cache_page(filepage); | ||
1475 | else | ||
1476 | ret = add_to_page_cache_lru(filepage, mapping, | ||
1477 | idx, GFP_NOWAIT); | ||
1478 | /* | ||
1479 | * At add_to_page_cache_lru() failure, uncharge will | ||
1480 | * be done automatically. | ||
1481 | */ | ||
1482 | if (ret) { | ||
1483 | spin_unlock(&info->lock); | ||
1484 | page_cache_release(filepage); | ||
1485 | shmem_unacct_blocks(info->flags, 1); | ||
1486 | shmem_free_blocks(inode, 1); | ||
1487 | filepage = NULL; | ||
1488 | if (error) | ||
1489 | goto failed; | ||
1490 | goto repeat; | ||
1491 | } | ||
1492 | info->flags |= SHMEM_PAGEIN; | ||
1493 | } | 944 | } |
1494 | 945 | ||
946 | SetPageSwapBacked(page); | ||
947 | __set_page_locked(page); | ||
948 | error = mem_cgroup_cache_charge(page, current->mm, | ||
949 | gfp & GFP_RECLAIM_MASK); | ||
950 | if (!error) | ||
951 | error = shmem_add_to_page_cache(page, mapping, index, | ||
952 | gfp, NULL); | ||
953 | if (error) | ||
954 | goto decused; | ||
955 | lru_cache_add_anon(page); | ||
956 | |||
957 | spin_lock(&info->lock); | ||
1495 | info->alloced++; | 958 | info->alloced++; |
959 | inode->i_blocks += BLOCKS_PER_PAGE; | ||
960 | shmem_recalc_inode(inode); | ||
1496 | spin_unlock(&info->lock); | 961 | spin_unlock(&info->lock); |
1497 | clear_highpage(filepage); | 962 | |
1498 | flush_dcache_page(filepage); | 963 | clear_highpage(page); |
1499 | SetPageUptodate(filepage); | 964 | flush_dcache_page(page); |
965 | SetPageUptodate(page); | ||
1500 | if (sgp == SGP_DIRTY) | 966 | if (sgp == SGP_DIRTY) |
1501 | set_page_dirty(filepage); | 967 | set_page_dirty(page); |
1502 | } | 968 | } |
1503 | done: | 969 | done: |
1504 | *pagep = filepage; | 970 | /* Perhaps the file has been truncated since we checked */ |
1505 | error = 0; | 971 | if (sgp != SGP_WRITE && |
1506 | goto out; | 972 | ((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) { |
973 | error = -EINVAL; | ||
974 | goto trunc; | ||
975 | } | ||
976 | *pagep = page; | ||
977 | return 0; | ||
1507 | 978 | ||
1508 | nospace: | ||
1509 | /* | 979 | /* |
1510 | * Perhaps the page was brought in from swap between find_lock_page | 980 | * Error recovery. |
1511 | * and taking info->lock? We allow for that at add_to_page_cache_lru, | ||
1512 | * but must also avoid reporting a spurious ENOSPC while working on a | ||
1513 | * full tmpfs. (When filepage has been passed in to shmem_getpage, it | ||
1514 | * is already in page cache, which prevents this race from occurring.) | ||
1515 | */ | 981 | */ |
1516 | if (!filepage) { | 982 | trunc: |
1517 | struct page *page = find_get_page(mapping, idx); | 983 | ClearPageDirty(page); |
1518 | if (page) { | 984 | delete_from_page_cache(page); |
1519 | spin_unlock(&info->lock); | 985 | spin_lock(&info->lock); |
1520 | page_cache_release(page); | 986 | info->alloced--; |
1521 | goto repeat; | 987 | inode->i_blocks -= BLOCKS_PER_PAGE; |
1522 | } | ||
1523 | } | ||
1524 | spin_unlock(&info->lock); | 988 | spin_unlock(&info->lock); |
1525 | error = -ENOSPC; | 989 | decused: |
990 | if (sbinfo->max_blocks) | ||
991 | percpu_counter_add(&sbinfo->used_blocks, -1); | ||
992 | unacct: | ||
993 | shmem_unacct_blocks(info->flags, 1); | ||
1526 | failed: | 994 | failed: |
1527 | if (*pagep != filepage) { | 995 | if (swap.val && error != -EINVAL) { |
1528 | unlock_page(filepage); | 996 | struct page *test = find_get_page(mapping, index); |
1529 | page_cache_release(filepage); | 997 | if (test && !radix_tree_exceptional_entry(test)) |
998 | page_cache_release(test); | ||
999 | /* Have another try if the entry has changed */ | ||
1000 | if (test != swp_to_radix_entry(swap)) | ||
1001 | error = -EEXIST; | ||
1530 | } | 1002 | } |
1531 | out: | 1003 | if (page) { |
1532 | if (prealloc_page) { | 1004 | unlock_page(page); |
1533 | mem_cgroup_uncharge_cache_page(prealloc_page); | 1005 | page_cache_release(page); |
1534 | page_cache_release(prealloc_page); | 1006 | } |
1007 | if (error == -ENOSPC && !once++) { | ||
1008 | info = SHMEM_I(inode); | ||
1009 | spin_lock(&info->lock); | ||
1010 | shmem_recalc_inode(inode); | ||
1011 | spin_unlock(&info->lock); | ||
1012 | goto repeat; | ||
1535 | } | 1013 | } |
1014 | if (error == -EEXIST) | ||
1015 | goto repeat; | ||
1536 | return error; | 1016 | return error; |
1537 | } | 1017 | } |
1538 | 1018 | ||
@@ -1540,36 +1020,34 @@ static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
1540 | { | 1020 | { |
1541 | struct inode *inode = vma->vm_file->f_path.dentry->d_inode; | 1021 | struct inode *inode = vma->vm_file->f_path.dentry->d_inode; |
1542 | int error; | 1022 | int error; |
1543 | int ret; | 1023 | int ret = VM_FAULT_LOCKED; |
1544 | |||
1545 | if (((loff_t)vmf->pgoff << PAGE_CACHE_SHIFT) >= i_size_read(inode)) | ||
1546 | return VM_FAULT_SIGBUS; | ||
1547 | 1024 | ||
1548 | error = shmem_getpage(inode, vmf->pgoff, &vmf->page, SGP_CACHE, &ret); | 1025 | error = shmem_getpage(inode, vmf->pgoff, &vmf->page, SGP_CACHE, &ret); |
1549 | if (error) | 1026 | if (error) |
1550 | return ((error == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS); | 1027 | return ((error == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS); |
1028 | |||
1551 | if (ret & VM_FAULT_MAJOR) { | 1029 | if (ret & VM_FAULT_MAJOR) { |
1552 | count_vm_event(PGMAJFAULT); | 1030 | count_vm_event(PGMAJFAULT); |
1553 | mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT); | 1031 | mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT); |
1554 | } | 1032 | } |
1555 | return ret | VM_FAULT_LOCKED; | 1033 | return ret; |
1556 | } | 1034 | } |
1557 | 1035 | ||
1558 | #ifdef CONFIG_NUMA | 1036 | #ifdef CONFIG_NUMA |
1559 | static int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *new) | 1037 | static int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *mpol) |
1560 | { | 1038 | { |
1561 | struct inode *i = vma->vm_file->f_path.dentry->d_inode; | 1039 | struct inode *inode = vma->vm_file->f_path.dentry->d_inode; |
1562 | return mpol_set_shared_policy(&SHMEM_I(i)->policy, vma, new); | 1040 | return mpol_set_shared_policy(&SHMEM_I(inode)->policy, vma, mpol); |
1563 | } | 1041 | } |
1564 | 1042 | ||
1565 | static struct mempolicy *shmem_get_policy(struct vm_area_struct *vma, | 1043 | static struct mempolicy *shmem_get_policy(struct vm_area_struct *vma, |
1566 | unsigned long addr) | 1044 | unsigned long addr) |
1567 | { | 1045 | { |
1568 | struct inode *i = vma->vm_file->f_path.dentry->d_inode; | 1046 | struct inode *inode = vma->vm_file->f_path.dentry->d_inode; |
1569 | unsigned long idx; | 1047 | pgoff_t index; |
1570 | 1048 | ||
1571 | idx = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; | 1049 | index = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; |
1572 | return mpol_shared_policy_lookup(&SHMEM_I(i)->policy, idx); | 1050 | return mpol_shared_policy_lookup(&SHMEM_I(inode)->policy, index); |
1573 | } | 1051 | } |
1574 | #endif | 1052 | #endif |
1575 | 1053 | ||
@@ -1667,20 +1145,7 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode | |||
1667 | 1145 | ||
1668 | #ifdef CONFIG_TMPFS | 1146 | #ifdef CONFIG_TMPFS |
1669 | static const struct inode_operations shmem_symlink_inode_operations; | 1147 | static const struct inode_operations shmem_symlink_inode_operations; |
1670 | static const struct inode_operations shmem_symlink_inline_operations; | 1148 | static const struct inode_operations shmem_short_symlink_operations; |
1671 | |||
1672 | /* | ||
1673 | * Normally tmpfs avoids the use of shmem_readpage and shmem_write_begin; | ||
1674 | * but providing them allows a tmpfs file to be used for splice, sendfile, and | ||
1675 | * below the loop driver, in the generic fashion that many filesystems support. | ||
1676 | */ | ||
1677 | static int shmem_readpage(struct file *file, struct page *page) | ||
1678 | { | ||
1679 | struct inode *inode = page->mapping->host; | ||
1680 | int error = shmem_getpage(inode, page->index, &page, SGP_CACHE, NULL); | ||
1681 | unlock_page(page); | ||
1682 | return error; | ||
1683 | } | ||
1684 | 1149 | ||
1685 | static int | 1150 | static int |
1686 | shmem_write_begin(struct file *file, struct address_space *mapping, | 1151 | shmem_write_begin(struct file *file, struct address_space *mapping, |
@@ -1689,7 +1154,6 @@ shmem_write_begin(struct file *file, struct address_space *mapping, | |||
1689 | { | 1154 | { |
1690 | struct inode *inode = mapping->host; | 1155 | struct inode *inode = mapping->host; |
1691 | pgoff_t index = pos >> PAGE_CACHE_SHIFT; | 1156 | pgoff_t index = pos >> PAGE_CACHE_SHIFT; |
1692 | *pagep = NULL; | ||
1693 | return shmem_getpage(inode, index, pagep, SGP_WRITE, NULL); | 1157 | return shmem_getpage(inode, index, pagep, SGP_WRITE, NULL); |
1694 | } | 1158 | } |
1695 | 1159 | ||
@@ -1714,7 +1178,8 @@ static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_ | |||
1714 | { | 1178 | { |
1715 | struct inode *inode = filp->f_path.dentry->d_inode; | 1179 | struct inode *inode = filp->f_path.dentry->d_inode; |
1716 | struct address_space *mapping = inode->i_mapping; | 1180 | struct address_space *mapping = inode->i_mapping; |
1717 | unsigned long index, offset; | 1181 | pgoff_t index; |
1182 | unsigned long offset; | ||
1718 | enum sgp_type sgp = SGP_READ; | 1183 | enum sgp_type sgp = SGP_READ; |
1719 | 1184 | ||
1720 | /* | 1185 | /* |
@@ -1730,7 +1195,8 @@ static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_ | |||
1730 | 1195 | ||
1731 | for (;;) { | 1196 | for (;;) { |
1732 | struct page *page = NULL; | 1197 | struct page *page = NULL; |
1733 | unsigned long end_index, nr, ret; | 1198 | pgoff_t end_index; |
1199 | unsigned long nr, ret; | ||
1734 | loff_t i_size = i_size_read(inode); | 1200 | loff_t i_size = i_size_read(inode); |
1735 | 1201 | ||
1736 | end_index = i_size >> PAGE_CACHE_SHIFT; | 1202 | end_index = i_size >> PAGE_CACHE_SHIFT; |
@@ -1846,6 +1312,119 @@ static ssize_t shmem_file_aio_read(struct kiocb *iocb, | |||
1846 | return retval; | 1312 | return retval; |
1847 | } | 1313 | } |
1848 | 1314 | ||
1315 | static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos, | ||
1316 | struct pipe_inode_info *pipe, size_t len, | ||
1317 | unsigned int flags) | ||
1318 | { | ||
1319 | struct address_space *mapping = in->f_mapping; | ||
1320 | struct inode *inode = mapping->host; | ||
1321 | unsigned int loff, nr_pages, req_pages; | ||
1322 | struct page *pages[PIPE_DEF_BUFFERS]; | ||
1323 | struct partial_page partial[PIPE_DEF_BUFFERS]; | ||
1324 | struct page *page; | ||
1325 | pgoff_t index, end_index; | ||
1326 | loff_t isize, left; | ||
1327 | int error, page_nr; | ||
1328 | struct splice_pipe_desc spd = { | ||
1329 | .pages = pages, | ||
1330 | .partial = partial, | ||
1331 | .flags = flags, | ||
1332 | .ops = &page_cache_pipe_buf_ops, | ||
1333 | .spd_release = spd_release_page, | ||
1334 | }; | ||
1335 | |||
1336 | isize = i_size_read(inode); | ||
1337 | if (unlikely(*ppos >= isize)) | ||
1338 | return 0; | ||
1339 | |||
1340 | left = isize - *ppos; | ||
1341 | if (unlikely(left < len)) | ||
1342 | len = left; | ||
1343 | |||
1344 | if (splice_grow_spd(pipe, &spd)) | ||
1345 | return -ENOMEM; | ||
1346 | |||
1347 | index = *ppos >> PAGE_CACHE_SHIFT; | ||
1348 | loff = *ppos & ~PAGE_CACHE_MASK; | ||
1349 | req_pages = (len + loff + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | ||
1350 | nr_pages = min(req_pages, pipe->buffers); | ||
1351 | |||
1352 | spd.nr_pages = find_get_pages_contig(mapping, index, | ||
1353 | nr_pages, spd.pages); | ||
1354 | index += spd.nr_pages; | ||
1355 | error = 0; | ||
1356 | |||
1357 | while (spd.nr_pages < nr_pages) { | ||
1358 | error = shmem_getpage(inode, index, &page, SGP_CACHE, NULL); | ||
1359 | if (error) | ||
1360 | break; | ||
1361 | unlock_page(page); | ||
1362 | spd.pages[spd.nr_pages++] = page; | ||
1363 | index++; | ||
1364 | } | ||
1365 | |||
1366 | index = *ppos >> PAGE_CACHE_SHIFT; | ||
1367 | nr_pages = spd.nr_pages; | ||
1368 | spd.nr_pages = 0; | ||
1369 | |||
1370 | for (page_nr = 0; page_nr < nr_pages; page_nr++) { | ||
1371 | unsigned int this_len; | ||
1372 | |||
1373 | if (!len) | ||
1374 | break; | ||
1375 | |||
1376 | this_len = min_t(unsigned long, len, PAGE_CACHE_SIZE - loff); | ||
1377 | page = spd.pages[page_nr]; | ||
1378 | |||
1379 | if (!PageUptodate(page) || page->mapping != mapping) { | ||
1380 | error = shmem_getpage(inode, index, &page, | ||
1381 | SGP_CACHE, NULL); | ||
1382 | if (error) | ||
1383 | break; | ||
1384 | unlock_page(page); | ||
1385 | page_cache_release(spd.pages[page_nr]); | ||
1386 | spd.pages[page_nr] = page; | ||
1387 | } | ||
1388 | |||
1389 | isize = i_size_read(inode); | ||
1390 | end_index = (isize - 1) >> PAGE_CACHE_SHIFT; | ||
1391 | if (unlikely(!isize || index > end_index)) | ||
1392 | break; | ||
1393 | |||
1394 | if (end_index == index) { | ||
1395 | unsigned int plen; | ||
1396 | |||
1397 | plen = ((isize - 1) & ~PAGE_CACHE_MASK) + 1; | ||
1398 | if (plen <= loff) | ||
1399 | break; | ||
1400 | |||
1401 | this_len = min(this_len, plen - loff); | ||
1402 | len = this_len; | ||
1403 | } | ||
1404 | |||
1405 | spd.partial[page_nr].offset = loff; | ||
1406 | spd.partial[page_nr].len = this_len; | ||
1407 | len -= this_len; | ||
1408 | loff = 0; | ||
1409 | spd.nr_pages++; | ||
1410 | index++; | ||
1411 | } | ||
1412 | |||
1413 | while (page_nr < nr_pages) | ||
1414 | page_cache_release(spd.pages[page_nr++]); | ||
1415 | |||
1416 | if (spd.nr_pages) | ||
1417 | error = splice_to_pipe(pipe, &spd); | ||
1418 | |||
1419 | splice_shrink_spd(pipe, &spd); | ||
1420 | |||
1421 | if (error > 0) { | ||
1422 | *ppos += error; | ||
1423 | file_accessed(in); | ||
1424 | } | ||
1425 | return error; | ||
1426 | } | ||
1427 | |||
1849 | static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf) | 1428 | static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf) |
1850 | { | 1429 | { |
1851 | struct shmem_sb_info *sbinfo = SHMEM_SB(dentry->d_sb); | 1430 | struct shmem_sb_info *sbinfo = SHMEM_SB(dentry->d_sb); |
@@ -1855,8 +1434,9 @@ static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
1855 | buf->f_namelen = NAME_MAX; | 1434 | buf->f_namelen = NAME_MAX; |
1856 | if (sbinfo->max_blocks) { | 1435 | if (sbinfo->max_blocks) { |
1857 | buf->f_blocks = sbinfo->max_blocks; | 1436 | buf->f_blocks = sbinfo->max_blocks; |
1858 | buf->f_bavail = buf->f_bfree = | 1437 | buf->f_bavail = |
1859 | sbinfo->max_blocks - percpu_counter_sum(&sbinfo->used_blocks); | 1438 | buf->f_bfree = sbinfo->max_blocks - |
1439 | percpu_counter_sum(&sbinfo->used_blocks); | ||
1860 | } | 1440 | } |
1861 | if (sbinfo->max_inodes) { | 1441 | if (sbinfo->max_inodes) { |
1862 | buf->f_files = sbinfo->max_inodes; | 1442 | buf->f_files = sbinfo->max_inodes; |
@@ -2006,7 +1586,7 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s | |||
2006 | int error; | 1586 | int error; |
2007 | int len; | 1587 | int len; |
2008 | struct inode *inode; | 1588 | struct inode *inode; |
2009 | struct page *page = NULL; | 1589 | struct page *page; |
2010 | char *kaddr; | 1590 | char *kaddr; |
2011 | struct shmem_inode_info *info; | 1591 | struct shmem_inode_info *info; |
2012 | 1592 | ||
@@ -2030,10 +1610,13 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s | |||
2030 | 1610 | ||
2031 | info = SHMEM_I(inode); | 1611 | info = SHMEM_I(inode); |
2032 | inode->i_size = len-1; | 1612 | inode->i_size = len-1; |
2033 | if (len <= SHMEM_SYMLINK_INLINE_LEN) { | 1613 | if (len <= SHORT_SYMLINK_LEN) { |
2034 | /* do it inline */ | 1614 | info->symlink = kmemdup(symname, len, GFP_KERNEL); |
2035 | memcpy(info->inline_symlink, symname, len); | 1615 | if (!info->symlink) { |
2036 | inode->i_op = &shmem_symlink_inline_operations; | 1616 | iput(inode); |
1617 | return -ENOMEM; | ||
1618 | } | ||
1619 | inode->i_op = &shmem_short_symlink_operations; | ||
2037 | } else { | 1620 | } else { |
2038 | error = shmem_getpage(inode, 0, &page, SGP_WRITE, NULL); | 1621 | error = shmem_getpage(inode, 0, &page, SGP_WRITE, NULL); |
2039 | if (error) { | 1622 | if (error) { |
@@ -2056,17 +1639,17 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s | |||
2056 | return 0; | 1639 | return 0; |
2057 | } | 1640 | } |
2058 | 1641 | ||
2059 | static void *shmem_follow_link_inline(struct dentry *dentry, struct nameidata *nd) | 1642 | static void *shmem_follow_short_symlink(struct dentry *dentry, struct nameidata *nd) |
2060 | { | 1643 | { |
2061 | nd_set_link(nd, SHMEM_I(dentry->d_inode)->inline_symlink); | 1644 | nd_set_link(nd, SHMEM_I(dentry->d_inode)->symlink); |
2062 | return NULL; | 1645 | return NULL; |
2063 | } | 1646 | } |
2064 | 1647 | ||
2065 | static void *shmem_follow_link(struct dentry *dentry, struct nameidata *nd) | 1648 | static void *shmem_follow_link(struct dentry *dentry, struct nameidata *nd) |
2066 | { | 1649 | { |
2067 | struct page *page = NULL; | 1650 | struct page *page = NULL; |
2068 | int res = shmem_getpage(dentry->d_inode, 0, &page, SGP_READ, NULL); | 1651 | int error = shmem_getpage(dentry->d_inode, 0, &page, SGP_READ, NULL); |
2069 | nd_set_link(nd, res ? ERR_PTR(res) : kmap(page)); | 1652 | nd_set_link(nd, error ? ERR_PTR(error) : kmap(page)); |
2070 | if (page) | 1653 | if (page) |
2071 | unlock_page(page); | 1654 | unlock_page(page); |
2072 | return page; | 1655 | return page; |
@@ -2177,7 +1760,6 @@ out: | |||
2177 | return err; | 1760 | return err; |
2178 | } | 1761 | } |
2179 | 1762 | ||
2180 | |||
2181 | static const struct xattr_handler *shmem_xattr_handlers[] = { | 1763 | static const struct xattr_handler *shmem_xattr_handlers[] = { |
2182 | #ifdef CONFIG_TMPFS_POSIX_ACL | 1764 | #ifdef CONFIG_TMPFS_POSIX_ACL |
2183 | &generic_acl_access_handler, | 1765 | &generic_acl_access_handler, |
@@ -2307,9 +1889,9 @@ static ssize_t shmem_listxattr(struct dentry *dentry, char *buffer, size_t size) | |||
2307 | } | 1889 | } |
2308 | #endif /* CONFIG_TMPFS_XATTR */ | 1890 | #endif /* CONFIG_TMPFS_XATTR */ |
2309 | 1891 | ||
2310 | static const struct inode_operations shmem_symlink_inline_operations = { | 1892 | static const struct inode_operations shmem_short_symlink_operations = { |
2311 | .readlink = generic_readlink, | 1893 | .readlink = generic_readlink, |
2312 | .follow_link = shmem_follow_link_inline, | 1894 | .follow_link = shmem_follow_short_symlink, |
2313 | #ifdef CONFIG_TMPFS_XATTR | 1895 | #ifdef CONFIG_TMPFS_XATTR |
2314 | .setxattr = shmem_setxattr, | 1896 | .setxattr = shmem_setxattr, |
2315 | .getxattr = shmem_getxattr, | 1897 | .getxattr = shmem_getxattr, |
@@ -2509,8 +2091,7 @@ static int shmem_remount_fs(struct super_block *sb, int *flags, char *data) | |||
2509 | if (config.max_inodes < inodes) | 2091 | if (config.max_inodes < inodes) |
2510 | goto out; | 2092 | goto out; |
2511 | /* | 2093 | /* |
2512 | * Those tests also disallow limited->unlimited while any are in | 2094 | * Those tests disallow limited->unlimited while any are in use; |
2513 | * use, so i_blocks will always be zero when max_blocks is zero; | ||
2514 | * but we must separately disallow unlimited->limited, because | 2095 | * but we must separately disallow unlimited->limited, because |
2515 | * in that case we have no record of how much is already in use. | 2096 | * in that case we have no record of how much is already in use. |
2516 | */ | 2097 | */ |
@@ -2602,7 +2183,7 @@ int shmem_fill_super(struct super_block *sb, void *data, int silent) | |||
2602 | goto failed; | 2183 | goto failed; |
2603 | sbinfo->free_inodes = sbinfo->max_inodes; | 2184 | sbinfo->free_inodes = sbinfo->max_inodes; |
2604 | 2185 | ||
2605 | sb->s_maxbytes = SHMEM_MAX_BYTES; | 2186 | sb->s_maxbytes = MAX_LFS_FILESIZE; |
2606 | sb->s_blocksize = PAGE_CACHE_SIZE; | 2187 | sb->s_blocksize = PAGE_CACHE_SIZE; |
2607 | sb->s_blocksize_bits = PAGE_CACHE_SHIFT; | 2188 | sb->s_blocksize_bits = PAGE_CACHE_SHIFT; |
2608 | sb->s_magic = TMPFS_MAGIC; | 2189 | sb->s_magic = TMPFS_MAGIC; |
@@ -2637,14 +2218,14 @@ static struct kmem_cache *shmem_inode_cachep; | |||
2637 | 2218 | ||
2638 | static struct inode *shmem_alloc_inode(struct super_block *sb) | 2219 | static struct inode *shmem_alloc_inode(struct super_block *sb) |
2639 | { | 2220 | { |
2640 | struct shmem_inode_info *p; | 2221 | struct shmem_inode_info *info; |
2641 | p = (struct shmem_inode_info *)kmem_cache_alloc(shmem_inode_cachep, GFP_KERNEL); | 2222 | info = kmem_cache_alloc(shmem_inode_cachep, GFP_KERNEL); |
2642 | if (!p) | 2223 | if (!info) |
2643 | return NULL; | 2224 | return NULL; |
2644 | return &p->vfs_inode; | 2225 | return &info->vfs_inode; |
2645 | } | 2226 | } |
2646 | 2227 | ||
2647 | static void shmem_i_callback(struct rcu_head *head) | 2228 | static void shmem_destroy_callback(struct rcu_head *head) |
2648 | { | 2229 | { |
2649 | struct inode *inode = container_of(head, struct inode, i_rcu); | 2230 | struct inode *inode = container_of(head, struct inode, i_rcu); |
2650 | INIT_LIST_HEAD(&inode->i_dentry); | 2231 | INIT_LIST_HEAD(&inode->i_dentry); |
@@ -2653,29 +2234,26 @@ static void shmem_i_callback(struct rcu_head *head) | |||
2653 | 2234 | ||
2654 | static void shmem_destroy_inode(struct inode *inode) | 2235 | static void shmem_destroy_inode(struct inode *inode) |
2655 | { | 2236 | { |
2656 | if ((inode->i_mode & S_IFMT) == S_IFREG) { | 2237 | if ((inode->i_mode & S_IFMT) == S_IFREG) |
2657 | /* only struct inode is valid if it's an inline symlink */ | ||
2658 | mpol_free_shared_policy(&SHMEM_I(inode)->policy); | 2238 | mpol_free_shared_policy(&SHMEM_I(inode)->policy); |
2659 | } | 2239 | call_rcu(&inode->i_rcu, shmem_destroy_callback); |
2660 | call_rcu(&inode->i_rcu, shmem_i_callback); | ||
2661 | } | 2240 | } |
2662 | 2241 | ||
2663 | static void init_once(void *foo) | 2242 | static void shmem_init_inode(void *foo) |
2664 | { | 2243 | { |
2665 | struct shmem_inode_info *p = (struct shmem_inode_info *) foo; | 2244 | struct shmem_inode_info *info = foo; |
2666 | 2245 | inode_init_once(&info->vfs_inode); | |
2667 | inode_init_once(&p->vfs_inode); | ||
2668 | } | 2246 | } |
2669 | 2247 | ||
2670 | static int init_inodecache(void) | 2248 | static int shmem_init_inodecache(void) |
2671 | { | 2249 | { |
2672 | shmem_inode_cachep = kmem_cache_create("shmem_inode_cache", | 2250 | shmem_inode_cachep = kmem_cache_create("shmem_inode_cache", |
2673 | sizeof(struct shmem_inode_info), | 2251 | sizeof(struct shmem_inode_info), |
2674 | 0, SLAB_PANIC, init_once); | 2252 | 0, SLAB_PANIC, shmem_init_inode); |
2675 | return 0; | 2253 | return 0; |
2676 | } | 2254 | } |
2677 | 2255 | ||
2678 | static void destroy_inodecache(void) | 2256 | static void shmem_destroy_inodecache(void) |
2679 | { | 2257 | { |
2680 | kmem_cache_destroy(shmem_inode_cachep); | 2258 | kmem_cache_destroy(shmem_inode_cachep); |
2681 | } | 2259 | } |
@@ -2684,7 +2262,6 @@ static const struct address_space_operations shmem_aops = { | |||
2684 | .writepage = shmem_writepage, | 2262 | .writepage = shmem_writepage, |
2685 | .set_page_dirty = __set_page_dirty_no_writeback, | 2263 | .set_page_dirty = __set_page_dirty_no_writeback, |
2686 | #ifdef CONFIG_TMPFS | 2264 | #ifdef CONFIG_TMPFS |
2687 | .readpage = shmem_readpage, | ||
2688 | .write_begin = shmem_write_begin, | 2265 | .write_begin = shmem_write_begin, |
2689 | .write_end = shmem_write_end, | 2266 | .write_end = shmem_write_end, |
2690 | #endif | 2267 | #endif |
@@ -2701,7 +2278,7 @@ static const struct file_operations shmem_file_operations = { | |||
2701 | .aio_read = shmem_file_aio_read, | 2278 | .aio_read = shmem_file_aio_read, |
2702 | .aio_write = generic_file_aio_write, | 2279 | .aio_write = generic_file_aio_write, |
2703 | .fsync = noop_fsync, | 2280 | .fsync = noop_fsync, |
2704 | .splice_read = generic_file_splice_read, | 2281 | .splice_read = shmem_file_splice_read, |
2705 | .splice_write = generic_file_splice_write, | 2282 | .splice_write = generic_file_splice_write, |
2706 | #endif | 2283 | #endif |
2707 | }; | 2284 | }; |
@@ -2715,10 +2292,6 @@ static const struct inode_operations shmem_inode_operations = { | |||
2715 | .listxattr = shmem_listxattr, | 2292 | .listxattr = shmem_listxattr, |
2716 | .removexattr = shmem_removexattr, | 2293 | .removexattr = shmem_removexattr, |
2717 | #endif | 2294 | #endif |
2718 | #ifdef CONFIG_TMPFS_POSIX_ACL | ||
2719 | .check_acl = generic_check_acl, | ||
2720 | #endif | ||
2721 | |||
2722 | }; | 2295 | }; |
2723 | 2296 | ||
2724 | static const struct inode_operations shmem_dir_inode_operations = { | 2297 | static const struct inode_operations shmem_dir_inode_operations = { |
@@ -2741,7 +2314,6 @@ static const struct inode_operations shmem_dir_inode_operations = { | |||
2741 | #endif | 2314 | #endif |
2742 | #ifdef CONFIG_TMPFS_POSIX_ACL | 2315 | #ifdef CONFIG_TMPFS_POSIX_ACL |
2743 | .setattr = shmem_setattr, | 2316 | .setattr = shmem_setattr, |
2744 | .check_acl = generic_check_acl, | ||
2745 | #endif | 2317 | #endif |
2746 | }; | 2318 | }; |
2747 | 2319 | ||
@@ -2754,7 +2326,6 @@ static const struct inode_operations shmem_special_inode_operations = { | |||
2754 | #endif | 2326 | #endif |
2755 | #ifdef CONFIG_TMPFS_POSIX_ACL | 2327 | #ifdef CONFIG_TMPFS_POSIX_ACL |
2756 | .setattr = shmem_setattr, | 2328 | .setattr = shmem_setattr, |
2757 | .check_acl = generic_check_acl, | ||
2758 | #endif | 2329 | #endif |
2759 | }; | 2330 | }; |
2760 | 2331 | ||
@@ -2779,21 +2350,20 @@ static const struct vm_operations_struct shmem_vm_ops = { | |||
2779 | #endif | 2350 | #endif |
2780 | }; | 2351 | }; |
2781 | 2352 | ||
2782 | |||
2783 | static struct dentry *shmem_mount(struct file_system_type *fs_type, | 2353 | static struct dentry *shmem_mount(struct file_system_type *fs_type, |
2784 | int flags, const char *dev_name, void *data) | 2354 | int flags, const char *dev_name, void *data) |
2785 | { | 2355 | { |
2786 | return mount_nodev(fs_type, flags, data, shmem_fill_super); | 2356 | return mount_nodev(fs_type, flags, data, shmem_fill_super); |
2787 | } | 2357 | } |
2788 | 2358 | ||
2789 | static struct file_system_type tmpfs_fs_type = { | 2359 | static struct file_system_type shmem_fs_type = { |
2790 | .owner = THIS_MODULE, | 2360 | .owner = THIS_MODULE, |
2791 | .name = "tmpfs", | 2361 | .name = "tmpfs", |
2792 | .mount = shmem_mount, | 2362 | .mount = shmem_mount, |
2793 | .kill_sb = kill_litter_super, | 2363 | .kill_sb = kill_litter_super, |
2794 | }; | 2364 | }; |
2795 | 2365 | ||
2796 | int __init init_tmpfs(void) | 2366 | int __init shmem_init(void) |
2797 | { | 2367 | { |
2798 | int error; | 2368 | int error; |
2799 | 2369 | ||
@@ -2801,18 +2371,18 @@ int __init init_tmpfs(void) | |||
2801 | if (error) | 2371 | if (error) |
2802 | goto out4; | 2372 | goto out4; |
2803 | 2373 | ||
2804 | error = init_inodecache(); | 2374 | error = shmem_init_inodecache(); |
2805 | if (error) | 2375 | if (error) |
2806 | goto out3; | 2376 | goto out3; |
2807 | 2377 | ||
2808 | error = register_filesystem(&tmpfs_fs_type); | 2378 | error = register_filesystem(&shmem_fs_type); |
2809 | if (error) { | 2379 | if (error) { |
2810 | printk(KERN_ERR "Could not register tmpfs\n"); | 2380 | printk(KERN_ERR "Could not register tmpfs\n"); |
2811 | goto out2; | 2381 | goto out2; |
2812 | } | 2382 | } |
2813 | 2383 | ||
2814 | shm_mnt = vfs_kern_mount(&tmpfs_fs_type, MS_NOUSER, | 2384 | shm_mnt = vfs_kern_mount(&shmem_fs_type, MS_NOUSER, |
2815 | tmpfs_fs_type.name, NULL); | 2385 | shmem_fs_type.name, NULL); |
2816 | if (IS_ERR(shm_mnt)) { | 2386 | if (IS_ERR(shm_mnt)) { |
2817 | error = PTR_ERR(shm_mnt); | 2387 | error = PTR_ERR(shm_mnt); |
2818 | printk(KERN_ERR "Could not kern_mount tmpfs\n"); | 2388 | printk(KERN_ERR "Could not kern_mount tmpfs\n"); |
@@ -2821,9 +2391,9 @@ int __init init_tmpfs(void) | |||
2821 | return 0; | 2391 | return 0; |
2822 | 2392 | ||
2823 | out1: | 2393 | out1: |
2824 | unregister_filesystem(&tmpfs_fs_type); | 2394 | unregister_filesystem(&shmem_fs_type); |
2825 | out2: | 2395 | out2: |
2826 | destroy_inodecache(); | 2396 | shmem_destroy_inodecache(); |
2827 | out3: | 2397 | out3: |
2828 | bdi_destroy(&shmem_backing_dev_info); | 2398 | bdi_destroy(&shmem_backing_dev_info); |
2829 | out4: | 2399 | out4: |
@@ -2831,45 +2401,6 @@ out4: | |||
2831 | return error; | 2401 | return error; |
2832 | } | 2402 | } |
2833 | 2403 | ||
2834 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR | ||
2835 | /** | ||
2836 | * mem_cgroup_get_shmem_target - find a page or entry assigned to the shmem file | ||
2837 | * @inode: the inode to be searched | ||
2838 | * @pgoff: the offset to be searched | ||
2839 | * @pagep: the pointer for the found page to be stored | ||
2840 | * @ent: the pointer for the found swap entry to be stored | ||
2841 | * | ||
2842 | * If a page is found, refcount of it is incremented. Callers should handle | ||
2843 | * these refcount. | ||
2844 | */ | ||
2845 | void mem_cgroup_get_shmem_target(struct inode *inode, pgoff_t pgoff, | ||
2846 | struct page **pagep, swp_entry_t *ent) | ||
2847 | { | ||
2848 | swp_entry_t entry = { .val = 0 }, *ptr; | ||
2849 | struct page *page = NULL; | ||
2850 | struct shmem_inode_info *info = SHMEM_I(inode); | ||
2851 | |||
2852 | if ((pgoff << PAGE_CACHE_SHIFT) >= i_size_read(inode)) | ||
2853 | goto out; | ||
2854 | |||
2855 | spin_lock(&info->lock); | ||
2856 | ptr = shmem_swp_entry(info, pgoff, NULL); | ||
2857 | #ifdef CONFIG_SWAP | ||
2858 | if (ptr && ptr->val) { | ||
2859 | entry.val = ptr->val; | ||
2860 | page = find_get_page(&swapper_space, entry.val); | ||
2861 | } else | ||
2862 | #endif | ||
2863 | page = find_get_page(inode->i_mapping, pgoff); | ||
2864 | if (ptr) | ||
2865 | shmem_swp_unmap(ptr); | ||
2866 | spin_unlock(&info->lock); | ||
2867 | out: | ||
2868 | *pagep = page; | ||
2869 | *ent = entry; | ||
2870 | } | ||
2871 | #endif | ||
2872 | |||
2873 | #else /* !CONFIG_SHMEM */ | 2404 | #else /* !CONFIG_SHMEM */ |
2874 | 2405 | ||
2875 | /* | 2406 | /* |
@@ -2883,23 +2414,23 @@ out: | |||
2883 | 2414 | ||
2884 | #include <linux/ramfs.h> | 2415 | #include <linux/ramfs.h> |
2885 | 2416 | ||
2886 | static struct file_system_type tmpfs_fs_type = { | 2417 | static struct file_system_type shmem_fs_type = { |
2887 | .name = "tmpfs", | 2418 | .name = "tmpfs", |
2888 | .mount = ramfs_mount, | 2419 | .mount = ramfs_mount, |
2889 | .kill_sb = kill_litter_super, | 2420 | .kill_sb = kill_litter_super, |
2890 | }; | 2421 | }; |
2891 | 2422 | ||
2892 | int __init init_tmpfs(void) | 2423 | int __init shmem_init(void) |
2893 | { | 2424 | { |
2894 | BUG_ON(register_filesystem(&tmpfs_fs_type) != 0); | 2425 | BUG_ON(register_filesystem(&shmem_fs_type) != 0); |
2895 | 2426 | ||
2896 | shm_mnt = kern_mount(&tmpfs_fs_type); | 2427 | shm_mnt = kern_mount(&shmem_fs_type); |
2897 | BUG_ON(IS_ERR(shm_mnt)); | 2428 | BUG_ON(IS_ERR(shm_mnt)); |
2898 | 2429 | ||
2899 | return 0; | 2430 | return 0; |
2900 | } | 2431 | } |
2901 | 2432 | ||
2902 | int shmem_unuse(swp_entry_t entry, struct page *page) | 2433 | int shmem_unuse(swp_entry_t swap, struct page *page) |
2903 | { | 2434 | { |
2904 | return 0; | 2435 | return 0; |
2905 | } | 2436 | } |
@@ -2909,43 +2440,17 @@ int shmem_lock(struct file *file, int lock, struct user_struct *user) | |||
2909 | return 0; | 2440 | return 0; |
2910 | } | 2441 | } |
2911 | 2442 | ||
2912 | void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end) | 2443 | void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend) |
2913 | { | 2444 | { |
2914 | truncate_inode_pages_range(inode->i_mapping, start, end); | 2445 | truncate_inode_pages_range(inode->i_mapping, lstart, lend); |
2915 | } | 2446 | } |
2916 | EXPORT_SYMBOL_GPL(shmem_truncate_range); | 2447 | EXPORT_SYMBOL_GPL(shmem_truncate_range); |
2917 | 2448 | ||
2918 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR | ||
2919 | /** | ||
2920 | * mem_cgroup_get_shmem_target - find a page or entry assigned to the shmem file | ||
2921 | * @inode: the inode to be searched | ||
2922 | * @pgoff: the offset to be searched | ||
2923 | * @pagep: the pointer for the found page to be stored | ||
2924 | * @ent: the pointer for the found swap entry to be stored | ||
2925 | * | ||
2926 | * If a page is found, refcount of it is incremented. Callers should handle | ||
2927 | * these refcount. | ||
2928 | */ | ||
2929 | void mem_cgroup_get_shmem_target(struct inode *inode, pgoff_t pgoff, | ||
2930 | struct page **pagep, swp_entry_t *ent) | ||
2931 | { | ||
2932 | struct page *page = NULL; | ||
2933 | |||
2934 | if ((pgoff << PAGE_CACHE_SHIFT) >= i_size_read(inode)) | ||
2935 | goto out; | ||
2936 | page = find_get_page(inode->i_mapping, pgoff); | ||
2937 | out: | ||
2938 | *pagep = page; | ||
2939 | *ent = (swp_entry_t){ .val = 0 }; | ||
2940 | } | ||
2941 | #endif | ||
2942 | |||
2943 | #define shmem_vm_ops generic_file_vm_ops | 2449 | #define shmem_vm_ops generic_file_vm_ops |
2944 | #define shmem_file_operations ramfs_file_operations | 2450 | #define shmem_file_operations ramfs_file_operations |
2945 | #define shmem_get_inode(sb, dir, mode, dev, flags) ramfs_get_inode(sb, dir, mode, dev) | 2451 | #define shmem_get_inode(sb, dir, mode, dev, flags) ramfs_get_inode(sb, dir, mode, dev) |
2946 | #define shmem_acct_size(flags, size) 0 | 2452 | #define shmem_acct_size(flags, size) 0 |
2947 | #define shmem_unacct_size(flags, size) do {} while (0) | 2453 | #define shmem_unacct_size(flags, size) do {} while (0) |
2948 | #define SHMEM_MAX_BYTES MAX_LFS_FILESIZE | ||
2949 | 2454 | ||
2950 | #endif /* CONFIG_SHMEM */ | 2455 | #endif /* CONFIG_SHMEM */ |
2951 | 2456 | ||
@@ -2969,7 +2474,7 @@ struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags | |||
2969 | if (IS_ERR(shm_mnt)) | 2474 | if (IS_ERR(shm_mnt)) |
2970 | return (void *)shm_mnt; | 2475 | return (void *)shm_mnt; |
2971 | 2476 | ||
2972 | if (size < 0 || size > SHMEM_MAX_BYTES) | 2477 | if (size < 0 || size > MAX_LFS_FILESIZE) |
2973 | return ERR_PTR(-EINVAL); | 2478 | return ERR_PTR(-EINVAL); |
2974 | 2479 | ||
2975 | if (shmem_acct_size(flags, size)) | 2480 | if (shmem_acct_size(flags, size)) |
@@ -3048,13 +2553,29 @@ int shmem_zero_setup(struct vm_area_struct *vma) | |||
3048 | * suit tmpfs, since it may have pages in swapcache, and needs to find those | 2553 | * suit tmpfs, since it may have pages in swapcache, and needs to find those |
3049 | * for itself; although drivers/gpu/drm i915 and ttm rely upon this support. | 2554 | * for itself; although drivers/gpu/drm i915 and ttm rely upon this support. |
3050 | * | 2555 | * |
3051 | * Provide a stub for those callers to start using now, then later | 2556 | * i915_gem_object_get_pages_gtt() mixes __GFP_NORETRY | __GFP_NOWARN in |
3052 | * flesh it out to call shmem_getpage() with additional gfp mask, when | 2557 | * with the mapping_gfp_mask(), to avoid OOMing the machine unnecessarily. |
3053 | * shmem_file_splice_read() is added and shmem_readpage() is removed. | ||
3054 | */ | 2558 | */ |
3055 | struct page *shmem_read_mapping_page_gfp(struct address_space *mapping, | 2559 | struct page *shmem_read_mapping_page_gfp(struct address_space *mapping, |
3056 | pgoff_t index, gfp_t gfp) | 2560 | pgoff_t index, gfp_t gfp) |
3057 | { | 2561 | { |
2562 | #ifdef CONFIG_SHMEM | ||
2563 | struct inode *inode = mapping->host; | ||
2564 | struct page *page; | ||
2565 | int error; | ||
2566 | |||
2567 | BUG_ON(mapping->a_ops != &shmem_aops); | ||
2568 | error = shmem_getpage_gfp(inode, index, &page, SGP_CACHE, gfp, NULL); | ||
2569 | if (error) | ||
2570 | page = ERR_PTR(error); | ||
2571 | else | ||
2572 | unlock_page(page); | ||
2573 | return page; | ||
2574 | #else | ||
2575 | /* | ||
2576 | * The tiny !SHMEM case uses ramfs without swap | ||
2577 | */ | ||
3058 | return read_cache_page_gfp(mapping, index, gfp); | 2578 | return read_cache_page_gfp(mapping, index, gfp); |
2579 | #endif | ||
3059 | } | 2580 | } |
3060 | EXPORT_SYMBOL_GPL(shmem_read_mapping_page_gfp); | 2581 | EXPORT_SYMBOL_GPL(shmem_read_mapping_page_gfp); |