diff options
-rw-r--r-- | include/linux/shmem_fs.h | 2 | ||||
-rw-r--r-- | mm/shmem.c | 782 |
2 files changed, 84 insertions, 700 deletions
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index aa08fa8fd79b..80b695213fdb 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h | |||
@@ -17,9 +17,7 @@ struct shmem_inode_info { | |||
17 | unsigned long flags; | 17 | unsigned long flags; |
18 | unsigned long alloced; /* data pages alloced to file */ | 18 | unsigned long alloced; /* data pages alloced to file */ |
19 | unsigned long swapped; /* subtotal assigned to swap */ | 19 | unsigned long swapped; /* subtotal assigned to swap */ |
20 | unsigned long next_index; /* highest alloced index + 1 */ | ||
21 | struct shared_policy policy; /* NUMA memory alloc policy */ | 20 | struct shared_policy policy; /* NUMA memory alloc policy */ |
22 | struct page *i_indirect; /* top indirect blocks page */ | ||
23 | union { | 21 | union { |
24 | swp_entry_t i_direct[SHMEM_NR_DIRECT]; /* first blocks */ | 22 | swp_entry_t i_direct[SHMEM_NR_DIRECT]; /* first blocks */ |
25 | char inline_symlink[SHMEM_SYMLINK_INLINE_LEN]; | 23 | char inline_symlink[SHMEM_SYMLINK_INLINE_LEN]; |
diff --git a/mm/shmem.c b/mm/shmem.c index 5cc21f8b4cd3..5574b00ca771 100644 --- a/mm/shmem.c +++ b/mm/shmem.c | |||
@@ -66,37 +66,9 @@ static struct vfsmount *shm_mnt; | |||
66 | #include <asm/div64.h> | 66 | #include <asm/div64.h> |
67 | #include <asm/pgtable.h> | 67 | #include <asm/pgtable.h> |
68 | 68 | ||
69 | /* | ||
70 | * The maximum size of a shmem/tmpfs file is limited by the maximum size of | ||
71 | * its triple-indirect swap vector - see illustration at shmem_swp_entry(). | ||
72 | * | ||
73 | * With 4kB page size, maximum file size is just over 2TB on a 32-bit kernel, | ||
74 | * but one eighth of that on a 64-bit kernel. With 8kB page size, maximum | ||
75 | * file size is just over 4TB on a 64-bit kernel, but 16TB on a 32-bit kernel, | ||
76 | * MAX_LFS_FILESIZE being then more restrictive than swap vector layout. | ||
77 | * | ||
78 | * We use / and * instead of shifts in the definitions below, so that the swap | ||
79 | * vector can be tested with small even values (e.g. 20) for ENTRIES_PER_PAGE. | ||
80 | */ | ||
81 | #define ENTRIES_PER_PAGE (PAGE_CACHE_SIZE/sizeof(unsigned long)) | ||
82 | #define ENTRIES_PER_PAGEPAGE ((unsigned long long)ENTRIES_PER_PAGE*ENTRIES_PER_PAGE) | ||
83 | |||
84 | #define SHMSWP_MAX_INDEX (SHMEM_NR_DIRECT + (ENTRIES_PER_PAGEPAGE/2) * (ENTRIES_PER_PAGE+1)) | ||
85 | #define SHMSWP_MAX_BYTES (SHMSWP_MAX_INDEX << PAGE_CACHE_SHIFT) | ||
86 | |||
87 | #define SHMEM_MAX_BYTES min_t(unsigned long long, SHMSWP_MAX_BYTES, MAX_LFS_FILESIZE) | ||
88 | #define SHMEM_MAX_INDEX ((unsigned long)((SHMEM_MAX_BYTES+1) >> PAGE_CACHE_SHIFT)) | ||
89 | |||
90 | #define BLOCKS_PER_PAGE (PAGE_CACHE_SIZE/512) | 69 | #define BLOCKS_PER_PAGE (PAGE_CACHE_SIZE/512) |
91 | #define VM_ACCT(size) (PAGE_CACHE_ALIGN(size) >> PAGE_SHIFT) | 70 | #define VM_ACCT(size) (PAGE_CACHE_ALIGN(size) >> PAGE_SHIFT) |
92 | 71 | ||
93 | /* info->flags needs VM_flags to handle pagein/truncate races efficiently */ | ||
94 | #define SHMEM_PAGEIN VM_READ | ||
95 | #define SHMEM_TRUNCATE VM_WRITE | ||
96 | |||
97 | /* Definition to limit shmem_truncate's steps between cond_rescheds */ | ||
98 | #define LATENCY_LIMIT 64 | ||
99 | |||
100 | /* Pretend that each entry is of this size in directory's i_size */ | 72 | /* Pretend that each entry is of this size in directory's i_size */ |
101 | #define BOGO_DIRENT_SIZE 20 | 73 | #define BOGO_DIRENT_SIZE 20 |
102 | 74 | ||
@@ -107,7 +79,7 @@ struct shmem_xattr { | |||
107 | char value[0]; | 79 | char value[0]; |
108 | }; | 80 | }; |
109 | 81 | ||
110 | /* Flag allocation requirements to shmem_getpage and shmem_swp_alloc */ | 82 | /* Flag allocation requirements to shmem_getpage */ |
111 | enum sgp_type { | 83 | enum sgp_type { |
112 | SGP_READ, /* don't exceed i_size, don't allocate page */ | 84 | SGP_READ, /* don't exceed i_size, don't allocate page */ |
113 | SGP_CACHE, /* don't exceed i_size, may allocate page */ | 85 | SGP_CACHE, /* don't exceed i_size, may allocate page */ |
@@ -137,56 +109,6 @@ static inline int shmem_getpage(struct inode *inode, pgoff_t index, | |||
137 | mapping_gfp_mask(inode->i_mapping), fault_type); | 109 | mapping_gfp_mask(inode->i_mapping), fault_type); |
138 | } | 110 | } |
139 | 111 | ||
140 | static inline struct page *shmem_dir_alloc(gfp_t gfp_mask) | ||
141 | { | ||
142 | /* | ||
143 | * The above definition of ENTRIES_PER_PAGE, and the use of | ||
144 | * BLOCKS_PER_PAGE on indirect pages, assume PAGE_CACHE_SIZE: | ||
145 | * might be reconsidered if it ever diverges from PAGE_SIZE. | ||
146 | * | ||
147 | * Mobility flags are masked out as swap vectors cannot move | ||
148 | */ | ||
149 | return alloc_pages((gfp_mask & ~GFP_MOVABLE_MASK) | __GFP_ZERO, | ||
150 | PAGE_CACHE_SHIFT-PAGE_SHIFT); | ||
151 | } | ||
152 | |||
153 | static inline void shmem_dir_free(struct page *page) | ||
154 | { | ||
155 | __free_pages(page, PAGE_CACHE_SHIFT-PAGE_SHIFT); | ||
156 | } | ||
157 | |||
158 | static struct page **shmem_dir_map(struct page *page) | ||
159 | { | ||
160 | return (struct page **)kmap_atomic(page, KM_USER0); | ||
161 | } | ||
162 | |||
163 | static inline void shmem_dir_unmap(struct page **dir) | ||
164 | { | ||
165 | kunmap_atomic(dir, KM_USER0); | ||
166 | } | ||
167 | |||
168 | static swp_entry_t *shmem_swp_map(struct page *page) | ||
169 | { | ||
170 | return (swp_entry_t *)kmap_atomic(page, KM_USER1); | ||
171 | } | ||
172 | |||
173 | static inline void shmem_swp_balance_unmap(void) | ||
174 | { | ||
175 | /* | ||
176 | * When passing a pointer to an i_direct entry, to code which | ||
177 | * also handles indirect entries and so will shmem_swp_unmap, | ||
178 | * we must arrange for the preempt count to remain in balance. | ||
179 | * What kmap_atomic of a lowmem page does depends on config | ||
180 | * and architecture, so pretend to kmap_atomic some lowmem page. | ||
181 | */ | ||
182 | (void) kmap_atomic(ZERO_PAGE(0), KM_USER1); | ||
183 | } | ||
184 | |||
185 | static inline void shmem_swp_unmap(swp_entry_t *entry) | ||
186 | { | ||
187 | kunmap_atomic(entry, KM_USER1); | ||
188 | } | ||
189 | |||
190 | static inline struct shmem_sb_info *SHMEM_SB(struct super_block *sb) | 112 | static inline struct shmem_sb_info *SHMEM_SB(struct super_block *sb) |
191 | { | 113 | { |
192 | return sb->s_fs_info; | 114 | return sb->s_fs_info; |
@@ -303,468 +225,56 @@ static void shmem_recalc_inode(struct inode *inode) | |||
303 | } | 225 | } |
304 | } | 226 | } |
305 | 227 | ||
306 | /** | 228 | static void shmem_put_swap(struct shmem_inode_info *info, pgoff_t index, |
307 | * shmem_swp_entry - find the swap vector position in the info structure | 229 | swp_entry_t swap) |
308 | * @info: info structure for the inode | ||
309 | * @index: index of the page to find | ||
310 | * @page: optional page to add to the structure. Has to be preset to | ||
311 | * all zeros | ||
312 | * | ||
313 | * If there is no space allocated yet it will return NULL when | ||
314 | * page is NULL, else it will use the page for the needed block, | ||
315 | * setting it to NULL on return to indicate that it has been used. | ||
316 | * | ||
317 | * The swap vector is organized the following way: | ||
318 | * | ||
319 | * There are SHMEM_NR_DIRECT entries directly stored in the | ||
320 | * shmem_inode_info structure. So small files do not need an addional | ||
321 | * allocation. | ||
322 | * | ||
323 | * For pages with index > SHMEM_NR_DIRECT there is the pointer | ||
324 | * i_indirect which points to a page which holds in the first half | ||
325 | * doubly indirect blocks, in the second half triple indirect blocks: | ||
326 | * | ||
327 | * For an artificial ENTRIES_PER_PAGE = 4 this would lead to the | ||
328 | * following layout (for SHMEM_NR_DIRECT == 16): | ||
329 | * | ||
330 | * i_indirect -> dir --> 16-19 | ||
331 | * | +-> 20-23 | ||
332 | * | | ||
333 | * +-->dir2 --> 24-27 | ||
334 | * | +-> 28-31 | ||
335 | * | +-> 32-35 | ||
336 | * | +-> 36-39 | ||
337 | * | | ||
338 | * +-->dir3 --> 40-43 | ||
339 | * +-> 44-47 | ||
340 | * +-> 48-51 | ||
341 | * +-> 52-55 | ||
342 | */ | ||
343 | static swp_entry_t *shmem_swp_entry(struct shmem_inode_info *info, unsigned long index, struct page **page) | ||
344 | { | ||
345 | unsigned long offset; | ||
346 | struct page **dir; | ||
347 | struct page *subdir; | ||
348 | |||
349 | if (index < SHMEM_NR_DIRECT) { | ||
350 | shmem_swp_balance_unmap(); | ||
351 | return info->i_direct+index; | ||
352 | } | ||
353 | if (!info->i_indirect) { | ||
354 | if (page) { | ||
355 | info->i_indirect = *page; | ||
356 | *page = NULL; | ||
357 | } | ||
358 | return NULL; /* need another page */ | ||
359 | } | ||
360 | |||
361 | index -= SHMEM_NR_DIRECT; | ||
362 | offset = index % ENTRIES_PER_PAGE; | ||
363 | index /= ENTRIES_PER_PAGE; | ||
364 | dir = shmem_dir_map(info->i_indirect); | ||
365 | |||
366 | if (index >= ENTRIES_PER_PAGE/2) { | ||
367 | index -= ENTRIES_PER_PAGE/2; | ||
368 | dir += ENTRIES_PER_PAGE/2 + index/ENTRIES_PER_PAGE; | ||
369 | index %= ENTRIES_PER_PAGE; | ||
370 | subdir = *dir; | ||
371 | if (!subdir) { | ||
372 | if (page) { | ||
373 | *dir = *page; | ||
374 | *page = NULL; | ||
375 | } | ||
376 | shmem_dir_unmap(dir); | ||
377 | return NULL; /* need another page */ | ||
378 | } | ||
379 | shmem_dir_unmap(dir); | ||
380 | dir = shmem_dir_map(subdir); | ||
381 | } | ||
382 | |||
383 | dir += index; | ||
384 | subdir = *dir; | ||
385 | if (!subdir) { | ||
386 | if (!page || !(subdir = *page)) { | ||
387 | shmem_dir_unmap(dir); | ||
388 | return NULL; /* need a page */ | ||
389 | } | ||
390 | *dir = subdir; | ||
391 | *page = NULL; | ||
392 | } | ||
393 | shmem_dir_unmap(dir); | ||
394 | return shmem_swp_map(subdir) + offset; | ||
395 | } | ||
396 | |||
397 | static void shmem_swp_set(struct shmem_inode_info *info, swp_entry_t *entry, unsigned long value) | ||
398 | { | 230 | { |
399 | long incdec = value? 1: -1; | 231 | if (index < SHMEM_NR_DIRECT) |
400 | 232 | info->i_direct[index] = swap; | |
401 | entry->val = value; | ||
402 | info->swapped += incdec; | ||
403 | if ((unsigned long)(entry - info->i_direct) >= SHMEM_NR_DIRECT) { | ||
404 | struct page *page = kmap_atomic_to_page(entry); | ||
405 | set_page_private(page, page_private(page) + incdec); | ||
406 | } | ||
407 | } | ||
408 | |||
409 | /** | ||
410 | * shmem_swp_alloc - get the position of the swap entry for the page. | ||
411 | * @info: info structure for the inode | ||
412 | * @index: index of the page to find | ||
413 | * @sgp: check and recheck i_size? skip allocation? | ||
414 | * @gfp: gfp mask to use for any page allocation | ||
415 | * | ||
416 | * If the entry does not exist, allocate it. | ||
417 | */ | ||
418 | static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, | ||
419 | unsigned long index, enum sgp_type sgp, gfp_t gfp) | ||
420 | { | ||
421 | struct inode *inode = &info->vfs_inode; | ||
422 | struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); | ||
423 | struct page *page = NULL; | ||
424 | swp_entry_t *entry; | ||
425 | |||
426 | if (sgp != SGP_WRITE && | ||
427 | ((loff_t) index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) | ||
428 | return ERR_PTR(-EINVAL); | ||
429 | |||
430 | while (!(entry = shmem_swp_entry(info, index, &page))) { | ||
431 | if (sgp == SGP_READ) | ||
432 | return shmem_swp_map(ZERO_PAGE(0)); | ||
433 | /* | ||
434 | * Test used_blocks against 1 less max_blocks, since we have 1 data | ||
435 | * page (and perhaps indirect index pages) yet to allocate: | ||
436 | * a waste to allocate index if we cannot allocate data. | ||
437 | */ | ||
438 | if (sbinfo->max_blocks) { | ||
439 | if (percpu_counter_compare(&sbinfo->used_blocks, | ||
440 | sbinfo->max_blocks - 1) >= 0) | ||
441 | return ERR_PTR(-ENOSPC); | ||
442 | percpu_counter_inc(&sbinfo->used_blocks); | ||
443 | inode->i_blocks += BLOCKS_PER_PAGE; | ||
444 | } | ||
445 | |||
446 | spin_unlock(&info->lock); | ||
447 | page = shmem_dir_alloc(gfp); | ||
448 | spin_lock(&info->lock); | ||
449 | |||
450 | if (!page) { | ||
451 | shmem_free_blocks(inode, 1); | ||
452 | return ERR_PTR(-ENOMEM); | ||
453 | } | ||
454 | if (sgp != SGP_WRITE && | ||
455 | ((loff_t) index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) { | ||
456 | entry = ERR_PTR(-EINVAL); | ||
457 | break; | ||
458 | } | ||
459 | if (info->next_index <= index) | ||
460 | info->next_index = index + 1; | ||
461 | } | ||
462 | if (page) { | ||
463 | /* another task gave its page, or truncated the file */ | ||
464 | shmem_free_blocks(inode, 1); | ||
465 | shmem_dir_free(page); | ||
466 | } | ||
467 | if (info->next_index <= index && !IS_ERR(entry)) | ||
468 | info->next_index = index + 1; | ||
469 | return entry; | ||
470 | } | ||
471 | |||
472 | /** | ||
473 | * shmem_free_swp - free some swap entries in a directory | ||
474 | * @dir: pointer to the directory | ||
475 | * @edir: pointer after last entry of the directory | ||
476 | * @punch_lock: pointer to spinlock when needed for the holepunch case | ||
477 | */ | ||
478 | static int shmem_free_swp(swp_entry_t *dir, swp_entry_t *edir, | ||
479 | spinlock_t *punch_lock) | ||
480 | { | ||
481 | spinlock_t *punch_unlock = NULL; | ||
482 | swp_entry_t *ptr; | ||
483 | int freed = 0; | ||
484 | |||
485 | for (ptr = dir; ptr < edir; ptr++) { | ||
486 | if (ptr->val) { | ||
487 | if (unlikely(punch_lock)) { | ||
488 | punch_unlock = punch_lock; | ||
489 | punch_lock = NULL; | ||
490 | spin_lock(punch_unlock); | ||
491 | if (!ptr->val) | ||
492 | continue; | ||
493 | } | ||
494 | free_swap_and_cache(*ptr); | ||
495 | *ptr = (swp_entry_t){0}; | ||
496 | freed++; | ||
497 | } | ||
498 | } | ||
499 | if (punch_unlock) | ||
500 | spin_unlock(punch_unlock); | ||
501 | return freed; | ||
502 | } | ||
503 | |||
504 | static int shmem_map_and_free_swp(struct page *subdir, int offset, | ||
505 | int limit, struct page ***dir, spinlock_t *punch_lock) | ||
506 | { | ||
507 | swp_entry_t *ptr; | ||
508 | int freed = 0; | ||
509 | |||
510 | ptr = shmem_swp_map(subdir); | ||
511 | for (; offset < limit; offset += LATENCY_LIMIT) { | ||
512 | int size = limit - offset; | ||
513 | if (size > LATENCY_LIMIT) | ||
514 | size = LATENCY_LIMIT; | ||
515 | freed += shmem_free_swp(ptr+offset, ptr+offset+size, | ||
516 | punch_lock); | ||
517 | if (need_resched()) { | ||
518 | shmem_swp_unmap(ptr); | ||
519 | if (*dir) { | ||
520 | shmem_dir_unmap(*dir); | ||
521 | *dir = NULL; | ||
522 | } | ||
523 | cond_resched(); | ||
524 | ptr = shmem_swp_map(subdir); | ||
525 | } | ||
526 | } | ||
527 | shmem_swp_unmap(ptr); | ||
528 | return freed; | ||
529 | } | 233 | } |
530 | 234 | ||
531 | static void shmem_free_pages(struct list_head *next) | 235 | static swp_entry_t shmem_get_swap(struct shmem_inode_info *info, pgoff_t index) |
532 | { | 236 | { |
533 | struct page *page; | 237 | return (index < SHMEM_NR_DIRECT) ? |
534 | int freed = 0; | 238 | info->i_direct[index] : (swp_entry_t){0}; |
535 | |||
536 | do { | ||
537 | page = container_of(next, struct page, lru); | ||
538 | next = next->next; | ||
539 | shmem_dir_free(page); | ||
540 | freed++; | ||
541 | if (freed >= LATENCY_LIMIT) { | ||
542 | cond_resched(); | ||
543 | freed = 0; | ||
544 | } | ||
545 | } while (next); | ||
546 | } | 239 | } |
547 | 240 | ||
548 | void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end) | 241 | void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend) |
549 | { | 242 | { |
243 | struct address_space *mapping = inode->i_mapping; | ||
550 | struct shmem_inode_info *info = SHMEM_I(inode); | 244 | struct shmem_inode_info *info = SHMEM_I(inode); |
551 | unsigned long idx; | 245 | pgoff_t start = (lstart + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; |
552 | unsigned long size; | 246 | pgoff_t end = (lend >> PAGE_CACHE_SHIFT); |
553 | unsigned long limit; | 247 | pgoff_t index; |
554 | unsigned long stage; | 248 | swp_entry_t swap; |
555 | unsigned long diroff; | ||
556 | struct page **dir; | ||
557 | struct page *topdir; | ||
558 | struct page *middir; | ||
559 | struct page *subdir; | ||
560 | swp_entry_t *ptr; | ||
561 | LIST_HEAD(pages_to_free); | ||
562 | long nr_pages_to_free = 0; | ||
563 | long nr_swaps_freed = 0; | ||
564 | int offset; | ||
565 | int freed; | ||
566 | int punch_hole; | ||
567 | spinlock_t *needs_lock; | ||
568 | spinlock_t *punch_lock; | ||
569 | unsigned long upper_limit; | ||
570 | 249 | ||
571 | truncate_inode_pages_range(inode->i_mapping, start, end); | 250 | truncate_inode_pages_range(mapping, lstart, lend); |
572 | 251 | ||
573 | inode->i_ctime = inode->i_mtime = CURRENT_TIME; | 252 | if (end > SHMEM_NR_DIRECT) |
574 | idx = (start + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 253 | end = SHMEM_NR_DIRECT; |
575 | if (idx >= info->next_index) | ||
576 | return; | ||
577 | 254 | ||
578 | spin_lock(&info->lock); | 255 | spin_lock(&info->lock); |
579 | info->flags |= SHMEM_TRUNCATE; | 256 | for (index = start; index < end; index++) { |
580 | if (likely(end == (loff_t) -1)) { | 257 | swap = shmem_get_swap(info, index); |
581 | limit = info->next_index; | 258 | if (swap.val) { |
582 | upper_limit = SHMEM_MAX_INDEX; | 259 | free_swap_and_cache(swap); |
583 | info->next_index = idx; | 260 | shmem_put_swap(info, index, (swp_entry_t){0}); |
584 | needs_lock = NULL; | 261 | info->swapped--; |
585 | punch_hole = 0; | ||
586 | } else { | ||
587 | if (end + 1 >= inode->i_size) { /* we may free a little more */ | ||
588 | limit = (inode->i_size + PAGE_CACHE_SIZE - 1) >> | ||
589 | PAGE_CACHE_SHIFT; | ||
590 | upper_limit = SHMEM_MAX_INDEX; | ||
591 | } else { | ||
592 | limit = (end + 1) >> PAGE_CACHE_SHIFT; | ||
593 | upper_limit = limit; | ||
594 | } | ||
595 | needs_lock = &info->lock; | ||
596 | punch_hole = 1; | ||
597 | } | ||
598 | |||
599 | topdir = info->i_indirect; | ||
600 | if (topdir && idx <= SHMEM_NR_DIRECT && !punch_hole) { | ||
601 | info->i_indirect = NULL; | ||
602 | nr_pages_to_free++; | ||
603 | list_add(&topdir->lru, &pages_to_free); | ||
604 | } | ||
605 | spin_unlock(&info->lock); | ||
606 | |||
607 | if (info->swapped && idx < SHMEM_NR_DIRECT) { | ||
608 | ptr = info->i_direct; | ||
609 | size = limit; | ||
610 | if (size > SHMEM_NR_DIRECT) | ||
611 | size = SHMEM_NR_DIRECT; | ||
612 | nr_swaps_freed = shmem_free_swp(ptr+idx, ptr+size, needs_lock); | ||
613 | } | ||
614 | |||
615 | /* | ||
616 | * If there are no indirect blocks or we are punching a hole | ||
617 | * below indirect blocks, nothing to be done. | ||
618 | */ | ||
619 | if (!topdir || limit <= SHMEM_NR_DIRECT) | ||
620 | goto done2; | ||
621 | |||
622 | /* | ||
623 | * The truncation case has already dropped info->lock, and we're safe | ||
624 | * because i_size and next_index have already been lowered, preventing | ||
625 | * access beyond. But in the punch_hole case, we still need to take | ||
626 | * the lock when updating the swap directory, because there might be | ||
627 | * racing accesses by shmem_getpage(SGP_CACHE), shmem_unuse_inode or | ||
628 | * shmem_writepage. However, whenever we find we can remove a whole | ||
629 | * directory page (not at the misaligned start or end of the range), | ||
630 | * we first NULLify its pointer in the level above, and then have no | ||
631 | * need to take the lock when updating its contents: needs_lock and | ||
632 | * punch_lock (either pointing to info->lock or NULL) manage this. | ||
633 | */ | ||
634 | |||
635 | upper_limit -= SHMEM_NR_DIRECT; | ||
636 | limit -= SHMEM_NR_DIRECT; | ||
637 | idx = (idx > SHMEM_NR_DIRECT)? (idx - SHMEM_NR_DIRECT): 0; | ||
638 | offset = idx % ENTRIES_PER_PAGE; | ||
639 | idx -= offset; | ||
640 | |||
641 | dir = shmem_dir_map(topdir); | ||
642 | stage = ENTRIES_PER_PAGEPAGE/2; | ||
643 | if (idx < ENTRIES_PER_PAGEPAGE/2) { | ||
644 | middir = topdir; | ||
645 | diroff = idx/ENTRIES_PER_PAGE; | ||
646 | } else { | ||
647 | dir += ENTRIES_PER_PAGE/2; | ||
648 | dir += (idx - ENTRIES_PER_PAGEPAGE/2)/ENTRIES_PER_PAGEPAGE; | ||
649 | while (stage <= idx) | ||
650 | stage += ENTRIES_PER_PAGEPAGE; | ||
651 | middir = *dir; | ||
652 | if (*dir) { | ||
653 | diroff = ((idx - ENTRIES_PER_PAGEPAGE/2) % | ||
654 | ENTRIES_PER_PAGEPAGE) / ENTRIES_PER_PAGE; | ||
655 | if (!diroff && !offset && upper_limit >= stage) { | ||
656 | if (needs_lock) { | ||
657 | spin_lock(needs_lock); | ||
658 | *dir = NULL; | ||
659 | spin_unlock(needs_lock); | ||
660 | needs_lock = NULL; | ||
661 | } else | ||
662 | *dir = NULL; | ||
663 | nr_pages_to_free++; | ||
664 | list_add(&middir->lru, &pages_to_free); | ||
665 | } | ||
666 | shmem_dir_unmap(dir); | ||
667 | dir = shmem_dir_map(middir); | ||
668 | } else { | ||
669 | diroff = 0; | ||
670 | offset = 0; | ||
671 | idx = stage; | ||
672 | } | 262 | } |
673 | } | 263 | } |
674 | 264 | ||
675 | for (; idx < limit; idx += ENTRIES_PER_PAGE, diroff++) { | 265 | if (mapping->nrpages) { |
676 | if (unlikely(idx == stage)) { | 266 | spin_unlock(&info->lock); |
677 | shmem_dir_unmap(dir); | ||
678 | dir = shmem_dir_map(topdir) + | ||
679 | ENTRIES_PER_PAGE/2 + idx/ENTRIES_PER_PAGEPAGE; | ||
680 | while (!*dir) { | ||
681 | dir++; | ||
682 | idx += ENTRIES_PER_PAGEPAGE; | ||
683 | if (idx >= limit) | ||
684 | goto done1; | ||
685 | } | ||
686 | stage = idx + ENTRIES_PER_PAGEPAGE; | ||
687 | middir = *dir; | ||
688 | if (punch_hole) | ||
689 | needs_lock = &info->lock; | ||
690 | if (upper_limit >= stage) { | ||
691 | if (needs_lock) { | ||
692 | spin_lock(needs_lock); | ||
693 | *dir = NULL; | ||
694 | spin_unlock(needs_lock); | ||
695 | needs_lock = NULL; | ||
696 | } else | ||
697 | *dir = NULL; | ||
698 | nr_pages_to_free++; | ||
699 | list_add(&middir->lru, &pages_to_free); | ||
700 | } | ||
701 | shmem_dir_unmap(dir); | ||
702 | cond_resched(); | ||
703 | dir = shmem_dir_map(middir); | ||
704 | diroff = 0; | ||
705 | } | ||
706 | punch_lock = needs_lock; | ||
707 | subdir = dir[diroff]; | ||
708 | if (subdir && !offset && upper_limit-idx >= ENTRIES_PER_PAGE) { | ||
709 | if (needs_lock) { | ||
710 | spin_lock(needs_lock); | ||
711 | dir[diroff] = NULL; | ||
712 | spin_unlock(needs_lock); | ||
713 | punch_lock = NULL; | ||
714 | } else | ||
715 | dir[diroff] = NULL; | ||
716 | nr_pages_to_free++; | ||
717 | list_add(&subdir->lru, &pages_to_free); | ||
718 | } | ||
719 | if (subdir && page_private(subdir) /* has swap entries */) { | ||
720 | size = limit - idx; | ||
721 | if (size > ENTRIES_PER_PAGE) | ||
722 | size = ENTRIES_PER_PAGE; | ||
723 | freed = shmem_map_and_free_swp(subdir, | ||
724 | offset, size, &dir, punch_lock); | ||
725 | if (!dir) | ||
726 | dir = shmem_dir_map(middir); | ||
727 | nr_swaps_freed += freed; | ||
728 | if (offset || punch_lock) { | ||
729 | spin_lock(&info->lock); | ||
730 | set_page_private(subdir, | ||
731 | page_private(subdir) - freed); | ||
732 | spin_unlock(&info->lock); | ||
733 | } else | ||
734 | BUG_ON(page_private(subdir) != freed); | ||
735 | } | ||
736 | offset = 0; | ||
737 | } | ||
738 | done1: | ||
739 | shmem_dir_unmap(dir); | ||
740 | done2: | ||
741 | if (inode->i_mapping->nrpages && (info->flags & SHMEM_PAGEIN)) { | ||
742 | /* | 267 | /* |
743 | * Call truncate_inode_pages again: racing shmem_unuse_inode | 268 | * A page may have meanwhile sneaked in from swap. |
744 | * may have swizzled a page in from swap since | ||
745 | * truncate_pagecache or generic_delete_inode did it, before we | ||
746 | * lowered next_index. Also, though shmem_getpage checks | ||
747 | * i_size before adding to cache, no recheck after: so fix the | ||
748 | * narrow window there too. | ||
749 | */ | 269 | */ |
750 | truncate_inode_pages_range(inode->i_mapping, start, end); | 270 | truncate_inode_pages_range(mapping, lstart, lend); |
271 | spin_lock(&info->lock); | ||
751 | } | 272 | } |
752 | 273 | ||
753 | spin_lock(&info->lock); | ||
754 | info->flags &= ~SHMEM_TRUNCATE; | ||
755 | info->swapped -= nr_swaps_freed; | ||
756 | if (nr_pages_to_free) | ||
757 | shmem_free_blocks(inode, nr_pages_to_free); | ||
758 | shmem_recalc_inode(inode); | 274 | shmem_recalc_inode(inode); |
759 | spin_unlock(&info->lock); | 275 | spin_unlock(&info->lock); |
760 | 276 | ||
761 | /* | 277 | inode->i_ctime = inode->i_mtime = CURRENT_TIME; |
762 | * Empty swap vector directory pages to be freed? | ||
763 | */ | ||
764 | if (!list_empty(&pages_to_free)) { | ||
765 | pages_to_free.prev->next = NULL; | ||
766 | shmem_free_pages(pages_to_free.next); | ||
767 | } | ||
768 | } | 278 | } |
769 | EXPORT_SYMBOL_GPL(shmem_truncate_range); | 279 | EXPORT_SYMBOL_GPL(shmem_truncate_range); |
770 | 280 | ||
@@ -797,19 +307,6 @@ static int shmem_setattr(struct dentry *dentry, struct iattr *attr) | |||
797 | if (page) | 307 | if (page) |
798 | unlock_page(page); | 308 | unlock_page(page); |
799 | } | 309 | } |
800 | /* | ||
801 | * Reset SHMEM_PAGEIN flag so that shmem_truncate can | ||
802 | * detect if any pages might have been added to cache | ||
803 | * after truncate_inode_pages. But we needn't bother | ||
804 | * if it's being fully truncated to zero-length: the | ||
805 | * nrpages check is efficient enough in that case. | ||
806 | */ | ||
807 | if (newsize) { | ||
808 | struct shmem_inode_info *info = SHMEM_I(inode); | ||
809 | spin_lock(&info->lock); | ||
810 | info->flags &= ~SHMEM_PAGEIN; | ||
811 | spin_unlock(&info->lock); | ||
812 | } | ||
813 | } | 310 | } |
814 | if (newsize != oldsize) { | 311 | if (newsize != oldsize) { |
815 | i_size_write(inode, newsize); | 312 | i_size_write(inode, newsize); |
@@ -859,106 +356,28 @@ static void shmem_evict_inode(struct inode *inode) | |||
859 | end_writeback(inode); | 356 | end_writeback(inode); |
860 | } | 357 | } |
861 | 358 | ||
862 | static inline int shmem_find_swp(swp_entry_t entry, swp_entry_t *dir, swp_entry_t *edir) | ||
863 | { | ||
864 | swp_entry_t *ptr; | ||
865 | |||
866 | for (ptr = dir; ptr < edir; ptr++) { | ||
867 | if (ptr->val == entry.val) | ||
868 | return ptr - dir; | ||
869 | } | ||
870 | return -1; | ||
871 | } | ||
872 | |||
873 | static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, struct page *page) | 359 | static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, struct page *page) |
874 | { | 360 | { |
875 | struct address_space *mapping; | 361 | struct address_space *mapping = info->vfs_inode.i_mapping; |
876 | unsigned long idx; | 362 | unsigned long idx; |
877 | unsigned long size; | ||
878 | unsigned long limit; | ||
879 | unsigned long stage; | ||
880 | struct page **dir; | ||
881 | struct page *subdir; | ||
882 | swp_entry_t *ptr; | ||
883 | int offset; | ||
884 | int error; | 363 | int error; |
885 | 364 | ||
886 | idx = 0; | 365 | for (idx = 0; idx < SHMEM_NR_DIRECT; idx++) |
887 | ptr = info->i_direct; | 366 | if (shmem_get_swap(info, idx).val == entry.val) |
888 | spin_lock(&info->lock); | 367 | goto found; |
889 | if (!info->swapped) { | ||
890 | list_del_init(&info->swaplist); | ||
891 | goto lost2; | ||
892 | } | ||
893 | limit = info->next_index; | ||
894 | size = limit; | ||
895 | if (size > SHMEM_NR_DIRECT) | ||
896 | size = SHMEM_NR_DIRECT; | ||
897 | offset = shmem_find_swp(entry, ptr, ptr+size); | ||
898 | if (offset >= 0) { | ||
899 | shmem_swp_balance_unmap(); | ||
900 | goto found; | ||
901 | } | ||
902 | if (!info->i_indirect) | ||
903 | goto lost2; | ||
904 | |||
905 | dir = shmem_dir_map(info->i_indirect); | ||
906 | stage = SHMEM_NR_DIRECT + ENTRIES_PER_PAGEPAGE/2; | ||
907 | |||
908 | for (idx = SHMEM_NR_DIRECT; idx < limit; idx += ENTRIES_PER_PAGE, dir++) { | ||
909 | if (unlikely(idx == stage)) { | ||
910 | shmem_dir_unmap(dir-1); | ||
911 | if (cond_resched_lock(&info->lock)) { | ||
912 | /* check it has not been truncated */ | ||
913 | if (limit > info->next_index) { | ||
914 | limit = info->next_index; | ||
915 | if (idx >= limit) | ||
916 | goto lost2; | ||
917 | } | ||
918 | } | ||
919 | dir = shmem_dir_map(info->i_indirect) + | ||
920 | ENTRIES_PER_PAGE/2 + idx/ENTRIES_PER_PAGEPAGE; | ||
921 | while (!*dir) { | ||
922 | dir++; | ||
923 | idx += ENTRIES_PER_PAGEPAGE; | ||
924 | if (idx >= limit) | ||
925 | goto lost1; | ||
926 | } | ||
927 | stage = idx + ENTRIES_PER_PAGEPAGE; | ||
928 | subdir = *dir; | ||
929 | shmem_dir_unmap(dir); | ||
930 | dir = shmem_dir_map(subdir); | ||
931 | } | ||
932 | subdir = *dir; | ||
933 | if (subdir && page_private(subdir)) { | ||
934 | ptr = shmem_swp_map(subdir); | ||
935 | size = limit - idx; | ||
936 | if (size > ENTRIES_PER_PAGE) | ||
937 | size = ENTRIES_PER_PAGE; | ||
938 | offset = shmem_find_swp(entry, ptr, ptr+size); | ||
939 | shmem_swp_unmap(ptr); | ||
940 | if (offset >= 0) { | ||
941 | shmem_dir_unmap(dir); | ||
942 | ptr = shmem_swp_map(subdir); | ||
943 | goto found; | ||
944 | } | ||
945 | } | ||
946 | } | ||
947 | lost1: | ||
948 | shmem_dir_unmap(dir-1); | ||
949 | lost2: | ||
950 | spin_unlock(&info->lock); | ||
951 | return 0; | 368 | return 0; |
952 | found: | 369 | found: |
953 | idx += offset; | 370 | spin_lock(&info->lock); |
954 | ptr += offset; | 371 | if (shmem_get_swap(info, idx).val != entry.val) { |
372 | spin_unlock(&info->lock); | ||
373 | return 0; | ||
374 | } | ||
955 | 375 | ||
956 | /* | 376 | /* |
957 | * Move _head_ to start search for next from here. | 377 | * Move _head_ to start search for next from here. |
958 | * But be careful: shmem_evict_inode checks list_empty without taking | 378 | * But be careful: shmem_evict_inode checks list_empty without taking |
959 | * mutex, and there's an instant in list_move_tail when info->swaplist | 379 | * mutex, and there's an instant in list_move_tail when info->swaplist |
960 | * would appear empty, if it were the only one on shmem_swaplist. We | 380 | * would appear empty, if it were the only one on shmem_swaplist. |
961 | * could avoid doing it if inode NULL; or use this minor optimization. | ||
962 | */ | 381 | */ |
963 | if (shmem_swaplist.next != &info->swaplist) | 382 | if (shmem_swaplist.next != &info->swaplist) |
964 | list_move_tail(&shmem_swaplist, &info->swaplist); | 383 | list_move_tail(&shmem_swaplist, &info->swaplist); |
@@ -968,19 +387,17 @@ found: | |||
968 | * but also to hold up shmem_evict_inode(): so inode cannot be freed | 387 | * but also to hold up shmem_evict_inode(): so inode cannot be freed |
969 | * beneath us (pagelock doesn't help until the page is in pagecache). | 388 | * beneath us (pagelock doesn't help until the page is in pagecache). |
970 | */ | 389 | */ |
971 | mapping = info->vfs_inode.i_mapping; | ||
972 | error = add_to_page_cache_locked(page, mapping, idx, GFP_NOWAIT); | 390 | error = add_to_page_cache_locked(page, mapping, idx, GFP_NOWAIT); |
973 | /* which does mem_cgroup_uncharge_cache_page on error */ | 391 | /* which does mem_cgroup_uncharge_cache_page on error */ |
974 | 392 | ||
975 | if (error != -ENOMEM) { | 393 | if (error != -ENOMEM) { |
976 | delete_from_swap_cache(page); | 394 | delete_from_swap_cache(page); |
977 | set_page_dirty(page); | 395 | set_page_dirty(page); |
978 | info->flags |= SHMEM_PAGEIN; | 396 | shmem_put_swap(info, idx, (swp_entry_t){0}); |
979 | shmem_swp_set(info, ptr, 0); | 397 | info->swapped--; |
980 | swap_free(entry); | 398 | swap_free(entry); |
981 | error = 1; /* not an error, but entry was found */ | 399 | error = 1; /* not an error, but entry was found */ |
982 | } | 400 | } |
983 | shmem_swp_unmap(ptr); | ||
984 | spin_unlock(&info->lock); | 401 | spin_unlock(&info->lock); |
985 | return error; | 402 | return error; |
986 | } | 403 | } |
@@ -1017,7 +434,14 @@ int shmem_unuse(swp_entry_t entry, struct page *page) | |||
1017 | mutex_lock(&shmem_swaplist_mutex); | 434 | mutex_lock(&shmem_swaplist_mutex); |
1018 | list_for_each_safe(p, next, &shmem_swaplist) { | 435 | list_for_each_safe(p, next, &shmem_swaplist) { |
1019 | info = list_entry(p, struct shmem_inode_info, swaplist); | 436 | info = list_entry(p, struct shmem_inode_info, swaplist); |
1020 | found = shmem_unuse_inode(info, entry, page); | 437 | if (!info->swapped) { |
438 | spin_lock(&info->lock); | ||
439 | if (!info->swapped) | ||
440 | list_del_init(&info->swaplist); | ||
441 | spin_unlock(&info->lock); | ||
442 | } | ||
443 | if (info->swapped) | ||
444 | found = shmem_unuse_inode(info, entry, page); | ||
1021 | cond_resched(); | 445 | cond_resched(); |
1022 | if (found) | 446 | if (found) |
1023 | break; | 447 | break; |
@@ -1041,7 +465,7 @@ out: | |||
1041 | static int shmem_writepage(struct page *page, struct writeback_control *wbc) | 465 | static int shmem_writepage(struct page *page, struct writeback_control *wbc) |
1042 | { | 466 | { |
1043 | struct shmem_inode_info *info; | 467 | struct shmem_inode_info *info; |
1044 | swp_entry_t *entry, swap; | 468 | swp_entry_t swap, oswap; |
1045 | struct address_space *mapping; | 469 | struct address_space *mapping; |
1046 | unsigned long index; | 470 | unsigned long index; |
1047 | struct inode *inode; | 471 | struct inode *inode; |
@@ -1067,6 +491,15 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc) | |||
1067 | WARN_ON_ONCE(1); /* Still happens? Tell us about it! */ | 491 | WARN_ON_ONCE(1); /* Still happens? Tell us about it! */ |
1068 | goto redirty; | 492 | goto redirty; |
1069 | } | 493 | } |
494 | |||
495 | /* | ||
496 | * Just for this patch, we have a toy implementation, | ||
497 | * which can swap out only the first SHMEM_NR_DIRECT pages: | ||
498 | * for simple demonstration of where we need to think about swap. | ||
499 | */ | ||
500 | if (index >= SHMEM_NR_DIRECT) | ||
501 | goto redirty; | ||
502 | |||
1070 | swap = get_swap_page(); | 503 | swap = get_swap_page(); |
1071 | if (!swap.val) | 504 | if (!swap.val) |
1072 | goto redirty; | 505 | goto redirty; |
@@ -1087,22 +520,19 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc) | |||
1087 | spin_lock(&info->lock); | 520 | spin_lock(&info->lock); |
1088 | mutex_unlock(&shmem_swaplist_mutex); | 521 | mutex_unlock(&shmem_swaplist_mutex); |
1089 | 522 | ||
1090 | if (index >= info->next_index) { | 523 | oswap = shmem_get_swap(info, index); |
1091 | BUG_ON(!(info->flags & SHMEM_TRUNCATE)); | 524 | if (oswap.val) { |
1092 | goto unlock; | ||
1093 | } | ||
1094 | entry = shmem_swp_entry(info, index, NULL); | ||
1095 | if (entry->val) { | ||
1096 | WARN_ON_ONCE(1); /* Still happens? Tell us about it! */ | 525 | WARN_ON_ONCE(1); /* Still happens? Tell us about it! */ |
1097 | free_swap_and_cache(*entry); | 526 | free_swap_and_cache(oswap); |
1098 | shmem_swp_set(info, entry, 0); | 527 | shmem_put_swap(info, index, (swp_entry_t){0}); |
528 | info->swapped--; | ||
1099 | } | 529 | } |
1100 | shmem_recalc_inode(inode); | 530 | shmem_recalc_inode(inode); |
1101 | 531 | ||
1102 | if (add_to_swap_cache(page, swap, GFP_ATOMIC) == 0) { | 532 | if (add_to_swap_cache(page, swap, GFP_ATOMIC) == 0) { |
1103 | delete_from_page_cache(page); | 533 | delete_from_page_cache(page); |
1104 | shmem_swp_set(info, entry, swap.val); | 534 | shmem_put_swap(info, index, swap); |
1105 | shmem_swp_unmap(entry); | 535 | info->swapped++; |
1106 | swap_shmem_alloc(swap); | 536 | swap_shmem_alloc(swap); |
1107 | spin_unlock(&info->lock); | 537 | spin_unlock(&info->lock); |
1108 | BUG_ON(page_mapped(page)); | 538 | BUG_ON(page_mapped(page)); |
@@ -1110,13 +540,7 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc) | |||
1110 | return 0; | 540 | return 0; |
1111 | } | 541 | } |
1112 | 542 | ||
1113 | shmem_swp_unmap(entry); | ||
1114 | unlock: | ||
1115 | spin_unlock(&info->lock); | 543 | spin_unlock(&info->lock); |
1116 | /* | ||
1117 | * add_to_swap_cache() doesn't return -EEXIST, so we can safely | ||
1118 | * clear SWAP_HAS_CACHE flag. | ||
1119 | */ | ||
1120 | swapcache_free(swap, NULL); | 544 | swapcache_free(swap, NULL); |
1121 | redirty: | 545 | redirty: |
1122 | set_page_dirty(page); | 546 | set_page_dirty(page); |
@@ -1230,12 +654,10 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t idx, | |||
1230 | struct shmem_sb_info *sbinfo; | 654 | struct shmem_sb_info *sbinfo; |
1231 | struct page *page; | 655 | struct page *page; |
1232 | struct page *prealloc_page = NULL; | 656 | struct page *prealloc_page = NULL; |
1233 | swp_entry_t *entry; | ||
1234 | swp_entry_t swap; | 657 | swp_entry_t swap; |
1235 | int error; | 658 | int error; |
1236 | int ret; | ||
1237 | 659 | ||
1238 | if (idx >= SHMEM_MAX_INDEX) | 660 | if (idx > (MAX_LFS_FILESIZE >> PAGE_CACHE_SHIFT)) |
1239 | return -EFBIG; | 661 | return -EFBIG; |
1240 | repeat: | 662 | repeat: |
1241 | page = find_lock_page(mapping, idx); | 663 | page = find_lock_page(mapping, idx); |
@@ -1272,37 +694,22 @@ repeat: | |||
1272 | 694 | ||
1273 | spin_lock(&info->lock); | 695 | spin_lock(&info->lock); |
1274 | shmem_recalc_inode(inode); | 696 | shmem_recalc_inode(inode); |
1275 | entry = shmem_swp_alloc(info, idx, sgp, gfp); | 697 | swap = shmem_get_swap(info, idx); |
1276 | if (IS_ERR(entry)) { | ||
1277 | spin_unlock(&info->lock); | ||
1278 | error = PTR_ERR(entry); | ||
1279 | goto out; | ||
1280 | } | ||
1281 | swap = *entry; | ||
1282 | |||
1283 | if (swap.val) { | 698 | if (swap.val) { |
1284 | /* Look it up and read it in.. */ | 699 | /* Look it up and read it in.. */ |
1285 | page = lookup_swap_cache(swap); | 700 | page = lookup_swap_cache(swap); |
1286 | if (!page) { | 701 | if (!page) { |
1287 | shmem_swp_unmap(entry); | ||
1288 | spin_unlock(&info->lock); | 702 | spin_unlock(&info->lock); |
1289 | /* here we actually do the io */ | 703 | /* here we actually do the io */ |
1290 | if (fault_type) | 704 | if (fault_type) |
1291 | *fault_type |= VM_FAULT_MAJOR; | 705 | *fault_type |= VM_FAULT_MAJOR; |
1292 | page = shmem_swapin(swap, gfp, info, idx); | 706 | page = shmem_swapin(swap, gfp, info, idx); |
1293 | if (!page) { | 707 | if (!page) { |
1294 | spin_lock(&info->lock); | 708 | swp_entry_t nswap = shmem_get_swap(info, idx); |
1295 | entry = shmem_swp_alloc(info, idx, sgp, gfp); | 709 | if (nswap.val == swap.val) { |
1296 | if (IS_ERR(entry)) | 710 | error = -ENOMEM; |
1297 | error = PTR_ERR(entry); | ||
1298 | else { | ||
1299 | if (entry->val == swap.val) | ||
1300 | error = -ENOMEM; | ||
1301 | shmem_swp_unmap(entry); | ||
1302 | } | ||
1303 | spin_unlock(&info->lock); | ||
1304 | if (error) | ||
1305 | goto out; | 711 | goto out; |
712 | } | ||
1306 | goto repeat; | 713 | goto repeat; |
1307 | } | 714 | } |
1308 | wait_on_page_locked(page); | 715 | wait_on_page_locked(page); |
@@ -1312,14 +719,12 @@ repeat: | |||
1312 | 719 | ||
1313 | /* We have to do this with page locked to prevent races */ | 720 | /* We have to do this with page locked to prevent races */ |
1314 | if (!trylock_page(page)) { | 721 | if (!trylock_page(page)) { |
1315 | shmem_swp_unmap(entry); | ||
1316 | spin_unlock(&info->lock); | 722 | spin_unlock(&info->lock); |
1317 | wait_on_page_locked(page); | 723 | wait_on_page_locked(page); |
1318 | page_cache_release(page); | 724 | page_cache_release(page); |
1319 | goto repeat; | 725 | goto repeat; |
1320 | } | 726 | } |
1321 | if (PageWriteback(page)) { | 727 | if (PageWriteback(page)) { |
1322 | shmem_swp_unmap(entry); | ||
1323 | spin_unlock(&info->lock); | 728 | spin_unlock(&info->lock); |
1324 | wait_on_page_writeback(page); | 729 | wait_on_page_writeback(page); |
1325 | unlock_page(page); | 730 | unlock_page(page); |
@@ -1327,7 +732,6 @@ repeat: | |||
1327 | goto repeat; | 732 | goto repeat; |
1328 | } | 733 | } |
1329 | if (!PageUptodate(page)) { | 734 | if (!PageUptodate(page)) { |
1330 | shmem_swp_unmap(entry); | ||
1331 | spin_unlock(&info->lock); | 735 | spin_unlock(&info->lock); |
1332 | unlock_page(page); | 736 | unlock_page(page); |
1333 | page_cache_release(page); | 737 | page_cache_release(page); |
@@ -1338,7 +742,6 @@ repeat: | |||
1338 | error = add_to_page_cache_locked(page, mapping, | 742 | error = add_to_page_cache_locked(page, mapping, |
1339 | idx, GFP_NOWAIT); | 743 | idx, GFP_NOWAIT); |
1340 | if (error) { | 744 | if (error) { |
1341 | shmem_swp_unmap(entry); | ||
1342 | spin_unlock(&info->lock); | 745 | spin_unlock(&info->lock); |
1343 | if (error == -ENOMEM) { | 746 | if (error == -ENOMEM) { |
1344 | /* | 747 | /* |
@@ -1358,16 +761,14 @@ repeat: | |||
1358 | goto repeat; | 761 | goto repeat; |
1359 | } | 762 | } |
1360 | 763 | ||
1361 | info->flags |= SHMEM_PAGEIN; | ||
1362 | shmem_swp_set(info, entry, 0); | ||
1363 | shmem_swp_unmap(entry); | ||
1364 | delete_from_swap_cache(page); | 764 | delete_from_swap_cache(page); |
765 | shmem_put_swap(info, idx, (swp_entry_t){0}); | ||
766 | info->swapped--; | ||
1365 | spin_unlock(&info->lock); | 767 | spin_unlock(&info->lock); |
1366 | set_page_dirty(page); | 768 | set_page_dirty(page); |
1367 | swap_free(swap); | 769 | swap_free(swap); |
1368 | 770 | ||
1369 | } else if (sgp == SGP_READ) { | 771 | } else if (sgp == SGP_READ) { |
1370 | shmem_swp_unmap(entry); | ||
1371 | page = find_get_page(mapping, idx); | 772 | page = find_get_page(mapping, idx); |
1372 | if (page && !trylock_page(page)) { | 773 | if (page && !trylock_page(page)) { |
1373 | spin_unlock(&info->lock); | 774 | spin_unlock(&info->lock); |
@@ -1378,7 +779,6 @@ repeat: | |||
1378 | spin_unlock(&info->lock); | 779 | spin_unlock(&info->lock); |
1379 | 780 | ||
1380 | } else if (prealloc_page) { | 781 | } else if (prealloc_page) { |
1381 | shmem_swp_unmap(entry); | ||
1382 | sbinfo = SHMEM_SB(inode->i_sb); | 782 | sbinfo = SHMEM_SB(inode->i_sb); |
1383 | if (sbinfo->max_blocks) { | 783 | if (sbinfo->max_blocks) { |
1384 | if (percpu_counter_compare(&sbinfo->used_blocks, | 784 | if (percpu_counter_compare(&sbinfo->used_blocks, |
@@ -1393,34 +793,24 @@ repeat: | |||
1393 | page = prealloc_page; | 793 | page = prealloc_page; |
1394 | prealloc_page = NULL; | 794 | prealloc_page = NULL; |
1395 | 795 | ||
1396 | entry = shmem_swp_alloc(info, idx, sgp, gfp); | 796 | swap = shmem_get_swap(info, idx); |
1397 | if (IS_ERR(entry)) | 797 | if (swap.val) |
1398 | error = PTR_ERR(entry); | ||
1399 | else { | ||
1400 | swap = *entry; | ||
1401 | shmem_swp_unmap(entry); | ||
1402 | } | ||
1403 | ret = error || swap.val; | ||
1404 | if (ret) | ||
1405 | mem_cgroup_uncharge_cache_page(page); | 798 | mem_cgroup_uncharge_cache_page(page); |
1406 | else | 799 | else |
1407 | ret = add_to_page_cache_lru(page, mapping, | 800 | error = add_to_page_cache_lru(page, mapping, |
1408 | idx, GFP_NOWAIT); | 801 | idx, GFP_NOWAIT); |
1409 | /* | 802 | /* |
1410 | * At add_to_page_cache_lru() failure, | 803 | * At add_to_page_cache_lru() failure, |
1411 | * uncharge will be done automatically. | 804 | * uncharge will be done automatically. |
1412 | */ | 805 | */ |
1413 | if (ret) { | 806 | if (swap.val || error) { |
1414 | shmem_unacct_blocks(info->flags, 1); | 807 | shmem_unacct_blocks(info->flags, 1); |
1415 | shmem_free_blocks(inode, 1); | 808 | shmem_free_blocks(inode, 1); |
1416 | spin_unlock(&info->lock); | 809 | spin_unlock(&info->lock); |
1417 | page_cache_release(page); | 810 | page_cache_release(page); |
1418 | if (error) | ||
1419 | goto out; | ||
1420 | goto repeat; | 811 | goto repeat; |
1421 | } | 812 | } |
1422 | 813 | ||
1423 | info->flags |= SHMEM_PAGEIN; | ||
1424 | info->alloced++; | 814 | info->alloced++; |
1425 | spin_unlock(&info->lock); | 815 | spin_unlock(&info->lock); |
1426 | clear_highpage(page); | 816 | clear_highpage(page); |
@@ -2627,7 +2017,7 @@ int shmem_fill_super(struct super_block *sb, void *data, int silent) | |||
2627 | goto failed; | 2017 | goto failed; |
2628 | sbinfo->free_inodes = sbinfo->max_inodes; | 2018 | sbinfo->free_inodes = sbinfo->max_inodes; |
2629 | 2019 | ||
2630 | sb->s_maxbytes = SHMEM_MAX_BYTES; | 2020 | sb->s_maxbytes = MAX_LFS_FILESIZE; |
2631 | sb->s_blocksize = PAGE_CACHE_SIZE; | 2021 | sb->s_blocksize = PAGE_CACHE_SIZE; |
2632 | sb->s_blocksize_bits = PAGE_CACHE_SHIFT; | 2022 | sb->s_blocksize_bits = PAGE_CACHE_SHIFT; |
2633 | sb->s_magic = TMPFS_MAGIC; | 2023 | sb->s_magic = TMPFS_MAGIC; |
@@ -2863,7 +2253,7 @@ out4: | |||
2863 | void mem_cgroup_get_shmem_target(struct inode *inode, pgoff_t pgoff, | 2253 | void mem_cgroup_get_shmem_target(struct inode *inode, pgoff_t pgoff, |
2864 | struct page **pagep, swp_entry_t *ent) | 2254 | struct page **pagep, swp_entry_t *ent) |
2865 | { | 2255 | { |
2866 | swp_entry_t entry = { .val = 0 }, *ptr; | 2256 | swp_entry_t entry = { .val = 0 }; |
2867 | struct page *page = NULL; | 2257 | struct page *page = NULL; |
2868 | struct shmem_inode_info *info = SHMEM_I(inode); | 2258 | struct shmem_inode_info *info = SHMEM_I(inode); |
2869 | 2259 | ||
@@ -2871,16 +2261,13 @@ void mem_cgroup_get_shmem_target(struct inode *inode, pgoff_t pgoff, | |||
2871 | goto out; | 2261 | goto out; |
2872 | 2262 | ||
2873 | spin_lock(&info->lock); | 2263 | spin_lock(&info->lock); |
2874 | ptr = shmem_swp_entry(info, pgoff, NULL); | ||
2875 | #ifdef CONFIG_SWAP | 2264 | #ifdef CONFIG_SWAP |
2876 | if (ptr && ptr->val) { | 2265 | entry = shmem_get_swap(info, pgoff); |
2877 | entry.val = ptr->val; | 2266 | if (entry.val) |
2878 | page = find_get_page(&swapper_space, entry.val); | 2267 | page = find_get_page(&swapper_space, entry.val); |
2879 | } else | 2268 | else |
2880 | #endif | 2269 | #endif |
2881 | page = find_get_page(inode->i_mapping, pgoff); | 2270 | page = find_get_page(inode->i_mapping, pgoff); |
2882 | if (ptr) | ||
2883 | shmem_swp_unmap(ptr); | ||
2884 | spin_unlock(&info->lock); | 2271 | spin_unlock(&info->lock); |
2885 | out: | 2272 | out: |
2886 | *pagep = page; | 2273 | *pagep = page; |
@@ -2963,7 +2350,6 @@ out: | |||
2963 | #define shmem_get_inode(sb, dir, mode, dev, flags) ramfs_get_inode(sb, dir, mode, dev) | 2350 | #define shmem_get_inode(sb, dir, mode, dev, flags) ramfs_get_inode(sb, dir, mode, dev) |
2964 | #define shmem_acct_size(flags, size) 0 | 2351 | #define shmem_acct_size(flags, size) 0 |
2965 | #define shmem_unacct_size(flags, size) do {} while (0) | 2352 | #define shmem_unacct_size(flags, size) do {} while (0) |
2966 | #define SHMEM_MAX_BYTES MAX_LFS_FILESIZE | ||
2967 | 2353 | ||
2968 | #endif /* CONFIG_SHMEM */ | 2354 | #endif /* CONFIG_SHMEM */ |
2969 | 2355 | ||
@@ -2987,7 +2373,7 @@ struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags | |||
2987 | if (IS_ERR(shm_mnt)) | 2373 | if (IS_ERR(shm_mnt)) |
2988 | return (void *)shm_mnt; | 2374 | return (void *)shm_mnt; |
2989 | 2375 | ||
2990 | if (size < 0 || size > SHMEM_MAX_BYTES) | 2376 | if (size < 0 || size > MAX_LFS_FILESIZE) |
2991 | return ERR_PTR(-EINVAL); | 2377 | return ERR_PTR(-EINVAL); |
2992 | 2378 | ||
2993 | if (shmem_acct_size(flags, size)) | 2379 | if (shmem_acct_size(flags, size)) |