aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHugh Dickins <hughd@google.com>2011-08-03 19:21:20 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2011-08-03 20:25:23 -0400
commit285b2c4fdd69ea73b4762785d8c6be83b6c074a6 (patch)
treeb350a54d8bca3e87ca47d2ad14bb71a53c9bb593
parenta2c16d6cb0e478812829ca84aeabd02e36af35eb (diff)
tmpfs: demolish old swap vector support
The maximum size of a shmem/tmpfs file has been limited by the maximum size of its triple-indirect swap vector. With 4kB page size, maximum filesize was just over 2TB on a 32-bit kernel, but sadly one eighth of that on a 64-bit kernel. (With 8kB page size, maximum filesize was just over 4TB on a 64-bit kernel, but 16TB on a 32-bit kernel, MAX_LFS_FILESIZE being then more restrictive than swap vector layout.) It's a shame that tmpfs should be more restrictive than ramfs, and this limitation has now been noticed. Add another level to the swap vector? No, it became obscure and hard to maintain, once I complicated it to make use of highmem pages nine years ago: better choose another way. Surely, if 2.4 had had the radix tree pagecache introduced in 2.5, then tmpfs would never have invented its own peculiar radix tree: we would have fitted swap entries into the common radix tree instead, in much the same way as we fit swap entries into page tables. And why should each file have a separate radix tree for its pages and for its swap entries? The swap entries are required precisely where and when the pages are not. We want to put them together in a single radix tree: which can then avoid much of the locking which was needed to prevent them from being exchanged underneath us. This also avoids the waste of memory devoted to swap vectors, first in the shmem_inode itself, then at least two more pages once a file grew beyond 16 data pages (pages accounted by df and du, but not by memcg). Allocated upfront, to avoid allocation when under swapping pressure, but pure waste when CONFIG_SWAP is not set - I have never spattered around the ifdefs to prevent that, preferring this move to sharing the common radix tree instead. There are three downsides to sharing the radix tree. One, that it binds tmpfs more tightly to the rest of mm, either requiring knowledge of swap entries in radix tree there, or duplication of its code here in shmem.c. I believe that the simplications and memory savings (and probable higher performance, not yet measured) justify that. Two, that on HIGHMEM systems with SWAP enabled, it's the lowmem radix nodes that cannot be freed under memory pressure - whereas before it was the less precious highmem swap vector pages that could not be freed. I'm hoping that 64-bit has now been accessible for long enough, that the highmem argument has grown much less persuasive. Three, that swapoff is slower than it used to be on tmpfs files, since it's using a simple generic mechanism not tailored to it: I find this noticeable, and shall want to improve, but maybe nobody else will notice. So... now remove most of the old swap vector code from shmem.c. But, for the moment, keep the simple i_direct vector of 16 pages, with simple accessors shmem_put_swap() and shmem_get_swap(), as a toy implementation to help mark where swap needs to be handled in subsequent patches. Signed-off-by: Hugh Dickins <hughd@google.com> Acked-by: Rik van Riel <riel@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/shmem_fs.h2
-rw-r--r--mm/shmem.c782
2 files changed, 84 insertions, 700 deletions
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index aa08fa8fd79b..80b695213fdb 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -17,9 +17,7 @@ struct shmem_inode_info {
17 unsigned long flags; 17 unsigned long flags;
18 unsigned long alloced; /* data pages alloced to file */ 18 unsigned long alloced; /* data pages alloced to file */
19 unsigned long swapped; /* subtotal assigned to swap */ 19 unsigned long swapped; /* subtotal assigned to swap */
20 unsigned long next_index; /* highest alloced index + 1 */
21 struct shared_policy policy; /* NUMA memory alloc policy */ 20 struct shared_policy policy; /* NUMA memory alloc policy */
22 struct page *i_indirect; /* top indirect blocks page */
23 union { 21 union {
24 swp_entry_t i_direct[SHMEM_NR_DIRECT]; /* first blocks */ 22 swp_entry_t i_direct[SHMEM_NR_DIRECT]; /* first blocks */
25 char inline_symlink[SHMEM_SYMLINK_INLINE_LEN]; 23 char inline_symlink[SHMEM_SYMLINK_INLINE_LEN];
diff --git a/mm/shmem.c b/mm/shmem.c
index 5cc21f8b4cd3..5574b00ca771 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -66,37 +66,9 @@ static struct vfsmount *shm_mnt;
66#include <asm/div64.h> 66#include <asm/div64.h>
67#include <asm/pgtable.h> 67#include <asm/pgtable.h>
68 68
69/*
70 * The maximum size of a shmem/tmpfs file is limited by the maximum size of
71 * its triple-indirect swap vector - see illustration at shmem_swp_entry().
72 *
73 * With 4kB page size, maximum file size is just over 2TB on a 32-bit kernel,
74 * but one eighth of that on a 64-bit kernel. With 8kB page size, maximum
75 * file size is just over 4TB on a 64-bit kernel, but 16TB on a 32-bit kernel,
76 * MAX_LFS_FILESIZE being then more restrictive than swap vector layout.
77 *
78 * We use / and * instead of shifts in the definitions below, so that the swap
79 * vector can be tested with small even values (e.g. 20) for ENTRIES_PER_PAGE.
80 */
81#define ENTRIES_PER_PAGE (PAGE_CACHE_SIZE/sizeof(unsigned long))
82#define ENTRIES_PER_PAGEPAGE ((unsigned long long)ENTRIES_PER_PAGE*ENTRIES_PER_PAGE)
83
84#define SHMSWP_MAX_INDEX (SHMEM_NR_DIRECT + (ENTRIES_PER_PAGEPAGE/2) * (ENTRIES_PER_PAGE+1))
85#define SHMSWP_MAX_BYTES (SHMSWP_MAX_INDEX << PAGE_CACHE_SHIFT)
86
87#define SHMEM_MAX_BYTES min_t(unsigned long long, SHMSWP_MAX_BYTES, MAX_LFS_FILESIZE)
88#define SHMEM_MAX_INDEX ((unsigned long)((SHMEM_MAX_BYTES+1) >> PAGE_CACHE_SHIFT))
89
90#define BLOCKS_PER_PAGE (PAGE_CACHE_SIZE/512) 69#define BLOCKS_PER_PAGE (PAGE_CACHE_SIZE/512)
91#define VM_ACCT(size) (PAGE_CACHE_ALIGN(size) >> PAGE_SHIFT) 70#define VM_ACCT(size) (PAGE_CACHE_ALIGN(size) >> PAGE_SHIFT)
92 71
93/* info->flags needs VM_flags to handle pagein/truncate races efficiently */
94#define SHMEM_PAGEIN VM_READ
95#define SHMEM_TRUNCATE VM_WRITE
96
97/* Definition to limit shmem_truncate's steps between cond_rescheds */
98#define LATENCY_LIMIT 64
99
100/* Pretend that each entry is of this size in directory's i_size */ 72/* Pretend that each entry is of this size in directory's i_size */
101#define BOGO_DIRENT_SIZE 20 73#define BOGO_DIRENT_SIZE 20
102 74
@@ -107,7 +79,7 @@ struct shmem_xattr {
107 char value[0]; 79 char value[0];
108}; 80};
109 81
110/* Flag allocation requirements to shmem_getpage and shmem_swp_alloc */ 82/* Flag allocation requirements to shmem_getpage */
111enum sgp_type { 83enum sgp_type {
112 SGP_READ, /* don't exceed i_size, don't allocate page */ 84 SGP_READ, /* don't exceed i_size, don't allocate page */
113 SGP_CACHE, /* don't exceed i_size, may allocate page */ 85 SGP_CACHE, /* don't exceed i_size, may allocate page */
@@ -137,56 +109,6 @@ static inline int shmem_getpage(struct inode *inode, pgoff_t index,
137 mapping_gfp_mask(inode->i_mapping), fault_type); 109 mapping_gfp_mask(inode->i_mapping), fault_type);
138} 110}
139 111
140static inline struct page *shmem_dir_alloc(gfp_t gfp_mask)
141{
142 /*
143 * The above definition of ENTRIES_PER_PAGE, and the use of
144 * BLOCKS_PER_PAGE on indirect pages, assume PAGE_CACHE_SIZE:
145 * might be reconsidered if it ever diverges from PAGE_SIZE.
146 *
147 * Mobility flags are masked out as swap vectors cannot move
148 */
149 return alloc_pages((gfp_mask & ~GFP_MOVABLE_MASK) | __GFP_ZERO,
150 PAGE_CACHE_SHIFT-PAGE_SHIFT);
151}
152
153static inline void shmem_dir_free(struct page *page)
154{
155 __free_pages(page, PAGE_CACHE_SHIFT-PAGE_SHIFT);
156}
157
158static struct page **shmem_dir_map(struct page *page)
159{
160 return (struct page **)kmap_atomic(page, KM_USER0);
161}
162
163static inline void shmem_dir_unmap(struct page **dir)
164{
165 kunmap_atomic(dir, KM_USER0);
166}
167
168static swp_entry_t *shmem_swp_map(struct page *page)
169{
170 return (swp_entry_t *)kmap_atomic(page, KM_USER1);
171}
172
173static inline void shmem_swp_balance_unmap(void)
174{
175 /*
176 * When passing a pointer to an i_direct entry, to code which
177 * also handles indirect entries and so will shmem_swp_unmap,
178 * we must arrange for the preempt count to remain in balance.
179 * What kmap_atomic of a lowmem page does depends on config
180 * and architecture, so pretend to kmap_atomic some lowmem page.
181 */
182 (void) kmap_atomic(ZERO_PAGE(0), KM_USER1);
183}
184
185static inline void shmem_swp_unmap(swp_entry_t *entry)
186{
187 kunmap_atomic(entry, KM_USER1);
188}
189
190static inline struct shmem_sb_info *SHMEM_SB(struct super_block *sb) 112static inline struct shmem_sb_info *SHMEM_SB(struct super_block *sb)
191{ 113{
192 return sb->s_fs_info; 114 return sb->s_fs_info;
@@ -303,468 +225,56 @@ static void shmem_recalc_inode(struct inode *inode)
303 } 225 }
304} 226}
305 227
306/** 228static void shmem_put_swap(struct shmem_inode_info *info, pgoff_t index,
307 * shmem_swp_entry - find the swap vector position in the info structure 229 swp_entry_t swap)
308 * @info: info structure for the inode
309 * @index: index of the page to find
310 * @page: optional page to add to the structure. Has to be preset to
311 * all zeros
312 *
313 * If there is no space allocated yet it will return NULL when
314 * page is NULL, else it will use the page for the needed block,
315 * setting it to NULL on return to indicate that it has been used.
316 *
317 * The swap vector is organized the following way:
318 *
319 * There are SHMEM_NR_DIRECT entries directly stored in the
320 * shmem_inode_info structure. So small files do not need an addional
321 * allocation.
322 *
323 * For pages with index > SHMEM_NR_DIRECT there is the pointer
324 * i_indirect which points to a page which holds in the first half
325 * doubly indirect blocks, in the second half triple indirect blocks:
326 *
327 * For an artificial ENTRIES_PER_PAGE = 4 this would lead to the
328 * following layout (for SHMEM_NR_DIRECT == 16):
329 *
330 * i_indirect -> dir --> 16-19
331 * | +-> 20-23
332 * |
333 * +-->dir2 --> 24-27
334 * | +-> 28-31
335 * | +-> 32-35
336 * | +-> 36-39
337 * |
338 * +-->dir3 --> 40-43
339 * +-> 44-47
340 * +-> 48-51
341 * +-> 52-55
342 */
343static swp_entry_t *shmem_swp_entry(struct shmem_inode_info *info, unsigned long index, struct page **page)
344{
345 unsigned long offset;
346 struct page **dir;
347 struct page *subdir;
348
349 if (index < SHMEM_NR_DIRECT) {
350 shmem_swp_balance_unmap();
351 return info->i_direct+index;
352 }
353 if (!info->i_indirect) {
354 if (page) {
355 info->i_indirect = *page;
356 *page = NULL;
357 }
358 return NULL; /* need another page */
359 }
360
361 index -= SHMEM_NR_DIRECT;
362 offset = index % ENTRIES_PER_PAGE;
363 index /= ENTRIES_PER_PAGE;
364 dir = shmem_dir_map(info->i_indirect);
365
366 if (index >= ENTRIES_PER_PAGE/2) {
367 index -= ENTRIES_PER_PAGE/2;
368 dir += ENTRIES_PER_PAGE/2 + index/ENTRIES_PER_PAGE;
369 index %= ENTRIES_PER_PAGE;
370 subdir = *dir;
371 if (!subdir) {
372 if (page) {
373 *dir = *page;
374 *page = NULL;
375 }
376 shmem_dir_unmap(dir);
377 return NULL; /* need another page */
378 }
379 shmem_dir_unmap(dir);
380 dir = shmem_dir_map(subdir);
381 }
382
383 dir += index;
384 subdir = *dir;
385 if (!subdir) {
386 if (!page || !(subdir = *page)) {
387 shmem_dir_unmap(dir);
388 return NULL; /* need a page */
389 }
390 *dir = subdir;
391 *page = NULL;
392 }
393 shmem_dir_unmap(dir);
394 return shmem_swp_map(subdir) + offset;
395}
396
397static void shmem_swp_set(struct shmem_inode_info *info, swp_entry_t *entry, unsigned long value)
398{ 230{
399 long incdec = value? 1: -1; 231 if (index < SHMEM_NR_DIRECT)
400 232 info->i_direct[index] = swap;
401 entry->val = value;
402 info->swapped += incdec;
403 if ((unsigned long)(entry - info->i_direct) >= SHMEM_NR_DIRECT) {
404 struct page *page = kmap_atomic_to_page(entry);
405 set_page_private(page, page_private(page) + incdec);
406 }
407}
408
409/**
410 * shmem_swp_alloc - get the position of the swap entry for the page.
411 * @info: info structure for the inode
412 * @index: index of the page to find
413 * @sgp: check and recheck i_size? skip allocation?
414 * @gfp: gfp mask to use for any page allocation
415 *
416 * If the entry does not exist, allocate it.
417 */
418static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info,
419 unsigned long index, enum sgp_type sgp, gfp_t gfp)
420{
421 struct inode *inode = &info->vfs_inode;
422 struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
423 struct page *page = NULL;
424 swp_entry_t *entry;
425
426 if (sgp != SGP_WRITE &&
427 ((loff_t) index << PAGE_CACHE_SHIFT) >= i_size_read(inode))
428 return ERR_PTR(-EINVAL);
429
430 while (!(entry = shmem_swp_entry(info, index, &page))) {
431 if (sgp == SGP_READ)
432 return shmem_swp_map(ZERO_PAGE(0));
433 /*
434 * Test used_blocks against 1 less max_blocks, since we have 1 data
435 * page (and perhaps indirect index pages) yet to allocate:
436 * a waste to allocate index if we cannot allocate data.
437 */
438 if (sbinfo->max_blocks) {
439 if (percpu_counter_compare(&sbinfo->used_blocks,
440 sbinfo->max_blocks - 1) >= 0)
441 return ERR_PTR(-ENOSPC);
442 percpu_counter_inc(&sbinfo->used_blocks);
443 inode->i_blocks += BLOCKS_PER_PAGE;
444 }
445
446 spin_unlock(&info->lock);
447 page = shmem_dir_alloc(gfp);
448 spin_lock(&info->lock);
449
450 if (!page) {
451 shmem_free_blocks(inode, 1);
452 return ERR_PTR(-ENOMEM);
453 }
454 if (sgp != SGP_WRITE &&
455 ((loff_t) index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
456 entry = ERR_PTR(-EINVAL);
457 break;
458 }
459 if (info->next_index <= index)
460 info->next_index = index + 1;
461 }
462 if (page) {
463 /* another task gave its page, or truncated the file */
464 shmem_free_blocks(inode, 1);
465 shmem_dir_free(page);
466 }
467 if (info->next_index <= index && !IS_ERR(entry))
468 info->next_index = index + 1;
469 return entry;
470}
471
472/**
473 * shmem_free_swp - free some swap entries in a directory
474 * @dir: pointer to the directory
475 * @edir: pointer after last entry of the directory
476 * @punch_lock: pointer to spinlock when needed for the holepunch case
477 */
478static int shmem_free_swp(swp_entry_t *dir, swp_entry_t *edir,
479 spinlock_t *punch_lock)
480{
481 spinlock_t *punch_unlock = NULL;
482 swp_entry_t *ptr;
483 int freed = 0;
484
485 for (ptr = dir; ptr < edir; ptr++) {
486 if (ptr->val) {
487 if (unlikely(punch_lock)) {
488 punch_unlock = punch_lock;
489 punch_lock = NULL;
490 spin_lock(punch_unlock);
491 if (!ptr->val)
492 continue;
493 }
494 free_swap_and_cache(*ptr);
495 *ptr = (swp_entry_t){0};
496 freed++;
497 }
498 }
499 if (punch_unlock)
500 spin_unlock(punch_unlock);
501 return freed;
502}
503
504static int shmem_map_and_free_swp(struct page *subdir, int offset,
505 int limit, struct page ***dir, spinlock_t *punch_lock)
506{
507 swp_entry_t *ptr;
508 int freed = 0;
509
510 ptr = shmem_swp_map(subdir);
511 for (; offset < limit; offset += LATENCY_LIMIT) {
512 int size = limit - offset;
513 if (size > LATENCY_LIMIT)
514 size = LATENCY_LIMIT;
515 freed += shmem_free_swp(ptr+offset, ptr+offset+size,
516 punch_lock);
517 if (need_resched()) {
518 shmem_swp_unmap(ptr);
519 if (*dir) {
520 shmem_dir_unmap(*dir);
521 *dir = NULL;
522 }
523 cond_resched();
524 ptr = shmem_swp_map(subdir);
525 }
526 }
527 shmem_swp_unmap(ptr);
528 return freed;
529} 233}
530 234
531static void shmem_free_pages(struct list_head *next) 235static swp_entry_t shmem_get_swap(struct shmem_inode_info *info, pgoff_t index)
532{ 236{
533 struct page *page; 237 return (index < SHMEM_NR_DIRECT) ?
534 int freed = 0; 238 info->i_direct[index] : (swp_entry_t){0};
535
536 do {
537 page = container_of(next, struct page, lru);
538 next = next->next;
539 shmem_dir_free(page);
540 freed++;
541 if (freed >= LATENCY_LIMIT) {
542 cond_resched();
543 freed = 0;
544 }
545 } while (next);
546} 239}
547 240
548void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end) 241void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
549{ 242{
243 struct address_space *mapping = inode->i_mapping;
550 struct shmem_inode_info *info = SHMEM_I(inode); 244 struct shmem_inode_info *info = SHMEM_I(inode);
551 unsigned long idx; 245 pgoff_t start = (lstart + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
552 unsigned long size; 246 pgoff_t end = (lend >> PAGE_CACHE_SHIFT);
553 unsigned long limit; 247 pgoff_t index;
554 unsigned long stage; 248 swp_entry_t swap;
555 unsigned long diroff;
556 struct page **dir;
557 struct page *topdir;
558 struct page *middir;
559 struct page *subdir;
560 swp_entry_t *ptr;
561 LIST_HEAD(pages_to_free);
562 long nr_pages_to_free = 0;
563 long nr_swaps_freed = 0;
564 int offset;
565 int freed;
566 int punch_hole;
567 spinlock_t *needs_lock;
568 spinlock_t *punch_lock;
569 unsigned long upper_limit;
570 249
571 truncate_inode_pages_range(inode->i_mapping, start, end); 250 truncate_inode_pages_range(mapping, lstart, lend);
572 251
573 inode->i_ctime = inode->i_mtime = CURRENT_TIME; 252 if (end > SHMEM_NR_DIRECT)
574 idx = (start + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 253 end = SHMEM_NR_DIRECT;
575 if (idx >= info->next_index)
576 return;
577 254
578 spin_lock(&info->lock); 255 spin_lock(&info->lock);
579 info->flags |= SHMEM_TRUNCATE; 256 for (index = start; index < end; index++) {
580 if (likely(end == (loff_t) -1)) { 257 swap = shmem_get_swap(info, index);
581 limit = info->next_index; 258 if (swap.val) {
582 upper_limit = SHMEM_MAX_INDEX; 259 free_swap_and_cache(swap);
583 info->next_index = idx; 260 shmem_put_swap(info, index, (swp_entry_t){0});
584 needs_lock = NULL; 261 info->swapped--;
585 punch_hole = 0;
586 } else {
587 if (end + 1 >= inode->i_size) { /* we may free a little more */
588 limit = (inode->i_size + PAGE_CACHE_SIZE - 1) >>
589 PAGE_CACHE_SHIFT;
590 upper_limit = SHMEM_MAX_INDEX;
591 } else {
592 limit = (end + 1) >> PAGE_CACHE_SHIFT;
593 upper_limit = limit;
594 }
595 needs_lock = &info->lock;
596 punch_hole = 1;
597 }
598
599 topdir = info->i_indirect;
600 if (topdir && idx <= SHMEM_NR_DIRECT && !punch_hole) {
601 info->i_indirect = NULL;
602 nr_pages_to_free++;
603 list_add(&topdir->lru, &pages_to_free);
604 }
605 spin_unlock(&info->lock);
606
607 if (info->swapped && idx < SHMEM_NR_DIRECT) {
608 ptr = info->i_direct;
609 size = limit;
610 if (size > SHMEM_NR_DIRECT)
611 size = SHMEM_NR_DIRECT;
612 nr_swaps_freed = shmem_free_swp(ptr+idx, ptr+size, needs_lock);
613 }
614
615 /*
616 * If there are no indirect blocks or we are punching a hole
617 * below indirect blocks, nothing to be done.
618 */
619 if (!topdir || limit <= SHMEM_NR_DIRECT)
620 goto done2;
621
622 /*
623 * The truncation case has already dropped info->lock, and we're safe
624 * because i_size and next_index have already been lowered, preventing
625 * access beyond. But in the punch_hole case, we still need to take
626 * the lock when updating the swap directory, because there might be
627 * racing accesses by shmem_getpage(SGP_CACHE), shmem_unuse_inode or
628 * shmem_writepage. However, whenever we find we can remove a whole
629 * directory page (not at the misaligned start or end of the range),
630 * we first NULLify its pointer in the level above, and then have no
631 * need to take the lock when updating its contents: needs_lock and
632 * punch_lock (either pointing to info->lock or NULL) manage this.
633 */
634
635 upper_limit -= SHMEM_NR_DIRECT;
636 limit -= SHMEM_NR_DIRECT;
637 idx = (idx > SHMEM_NR_DIRECT)? (idx - SHMEM_NR_DIRECT): 0;
638 offset = idx % ENTRIES_PER_PAGE;
639 idx -= offset;
640
641 dir = shmem_dir_map(topdir);
642 stage = ENTRIES_PER_PAGEPAGE/2;
643 if (idx < ENTRIES_PER_PAGEPAGE/2) {
644 middir = topdir;
645 diroff = idx/ENTRIES_PER_PAGE;
646 } else {
647 dir += ENTRIES_PER_PAGE/2;
648 dir += (idx - ENTRIES_PER_PAGEPAGE/2)/ENTRIES_PER_PAGEPAGE;
649 while (stage <= idx)
650 stage += ENTRIES_PER_PAGEPAGE;
651 middir = *dir;
652 if (*dir) {
653 diroff = ((idx - ENTRIES_PER_PAGEPAGE/2) %
654 ENTRIES_PER_PAGEPAGE) / ENTRIES_PER_PAGE;
655 if (!diroff && !offset && upper_limit >= stage) {
656 if (needs_lock) {
657 spin_lock(needs_lock);
658 *dir = NULL;
659 spin_unlock(needs_lock);
660 needs_lock = NULL;
661 } else
662 *dir = NULL;
663 nr_pages_to_free++;
664 list_add(&middir->lru, &pages_to_free);
665 }
666 shmem_dir_unmap(dir);
667 dir = shmem_dir_map(middir);
668 } else {
669 diroff = 0;
670 offset = 0;
671 idx = stage;
672 } 262 }
673 } 263 }
674 264
675 for (; idx < limit; idx += ENTRIES_PER_PAGE, diroff++) { 265 if (mapping->nrpages) {
676 if (unlikely(idx == stage)) { 266 spin_unlock(&info->lock);
677 shmem_dir_unmap(dir);
678 dir = shmem_dir_map(topdir) +
679 ENTRIES_PER_PAGE/2 + idx/ENTRIES_PER_PAGEPAGE;
680 while (!*dir) {
681 dir++;
682 idx += ENTRIES_PER_PAGEPAGE;
683 if (idx >= limit)
684 goto done1;
685 }
686 stage = idx + ENTRIES_PER_PAGEPAGE;
687 middir = *dir;
688 if (punch_hole)
689 needs_lock = &info->lock;
690 if (upper_limit >= stage) {
691 if (needs_lock) {
692 spin_lock(needs_lock);
693 *dir = NULL;
694 spin_unlock(needs_lock);
695 needs_lock = NULL;
696 } else
697 *dir = NULL;
698 nr_pages_to_free++;
699 list_add(&middir->lru, &pages_to_free);
700 }
701 shmem_dir_unmap(dir);
702 cond_resched();
703 dir = shmem_dir_map(middir);
704 diroff = 0;
705 }
706 punch_lock = needs_lock;
707 subdir = dir[diroff];
708 if (subdir && !offset && upper_limit-idx >= ENTRIES_PER_PAGE) {
709 if (needs_lock) {
710 spin_lock(needs_lock);
711 dir[diroff] = NULL;
712 spin_unlock(needs_lock);
713 punch_lock = NULL;
714 } else
715 dir[diroff] = NULL;
716 nr_pages_to_free++;
717 list_add(&subdir->lru, &pages_to_free);
718 }
719 if (subdir && page_private(subdir) /* has swap entries */) {
720 size = limit - idx;
721 if (size > ENTRIES_PER_PAGE)
722 size = ENTRIES_PER_PAGE;
723 freed = shmem_map_and_free_swp(subdir,
724 offset, size, &dir, punch_lock);
725 if (!dir)
726 dir = shmem_dir_map(middir);
727 nr_swaps_freed += freed;
728 if (offset || punch_lock) {
729 spin_lock(&info->lock);
730 set_page_private(subdir,
731 page_private(subdir) - freed);
732 spin_unlock(&info->lock);
733 } else
734 BUG_ON(page_private(subdir) != freed);
735 }
736 offset = 0;
737 }
738done1:
739 shmem_dir_unmap(dir);
740done2:
741 if (inode->i_mapping->nrpages && (info->flags & SHMEM_PAGEIN)) {
742 /* 267 /*
743 * Call truncate_inode_pages again: racing shmem_unuse_inode 268 * A page may have meanwhile sneaked in from swap.
744 * may have swizzled a page in from swap since
745 * truncate_pagecache or generic_delete_inode did it, before we
746 * lowered next_index. Also, though shmem_getpage checks
747 * i_size before adding to cache, no recheck after: so fix the
748 * narrow window there too.
749 */ 269 */
750 truncate_inode_pages_range(inode->i_mapping, start, end); 270 truncate_inode_pages_range(mapping, lstart, lend);
271 spin_lock(&info->lock);
751 } 272 }
752 273
753 spin_lock(&info->lock);
754 info->flags &= ~SHMEM_TRUNCATE;
755 info->swapped -= nr_swaps_freed;
756 if (nr_pages_to_free)
757 shmem_free_blocks(inode, nr_pages_to_free);
758 shmem_recalc_inode(inode); 274 shmem_recalc_inode(inode);
759 spin_unlock(&info->lock); 275 spin_unlock(&info->lock);
760 276
761 /* 277 inode->i_ctime = inode->i_mtime = CURRENT_TIME;
762 * Empty swap vector directory pages to be freed?
763 */
764 if (!list_empty(&pages_to_free)) {
765 pages_to_free.prev->next = NULL;
766 shmem_free_pages(pages_to_free.next);
767 }
768} 278}
769EXPORT_SYMBOL_GPL(shmem_truncate_range); 279EXPORT_SYMBOL_GPL(shmem_truncate_range);
770 280
@@ -797,19 +307,6 @@ static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
797 if (page) 307 if (page)
798 unlock_page(page); 308 unlock_page(page);
799 } 309 }
800 /*
801 * Reset SHMEM_PAGEIN flag so that shmem_truncate can
802 * detect if any pages might have been added to cache
803 * after truncate_inode_pages. But we needn't bother
804 * if it's being fully truncated to zero-length: the
805 * nrpages check is efficient enough in that case.
806 */
807 if (newsize) {
808 struct shmem_inode_info *info = SHMEM_I(inode);
809 spin_lock(&info->lock);
810 info->flags &= ~SHMEM_PAGEIN;
811 spin_unlock(&info->lock);
812 }
813 } 310 }
814 if (newsize != oldsize) { 311 if (newsize != oldsize) {
815 i_size_write(inode, newsize); 312 i_size_write(inode, newsize);
@@ -859,106 +356,28 @@ static void shmem_evict_inode(struct inode *inode)
859 end_writeback(inode); 356 end_writeback(inode);
860} 357}
861 358
862static inline int shmem_find_swp(swp_entry_t entry, swp_entry_t *dir, swp_entry_t *edir)
863{
864 swp_entry_t *ptr;
865
866 for (ptr = dir; ptr < edir; ptr++) {
867 if (ptr->val == entry.val)
868 return ptr - dir;
869 }
870 return -1;
871}
872
873static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, struct page *page) 359static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, struct page *page)
874{ 360{
875 struct address_space *mapping; 361 struct address_space *mapping = info->vfs_inode.i_mapping;
876 unsigned long idx; 362 unsigned long idx;
877 unsigned long size;
878 unsigned long limit;
879 unsigned long stage;
880 struct page **dir;
881 struct page *subdir;
882 swp_entry_t *ptr;
883 int offset;
884 int error; 363 int error;
885 364
886 idx = 0; 365 for (idx = 0; idx < SHMEM_NR_DIRECT; idx++)
887 ptr = info->i_direct; 366 if (shmem_get_swap(info, idx).val == entry.val)
888 spin_lock(&info->lock); 367 goto found;
889 if (!info->swapped) {
890 list_del_init(&info->swaplist);
891 goto lost2;
892 }
893 limit = info->next_index;
894 size = limit;
895 if (size > SHMEM_NR_DIRECT)
896 size = SHMEM_NR_DIRECT;
897 offset = shmem_find_swp(entry, ptr, ptr+size);
898 if (offset >= 0) {
899 shmem_swp_balance_unmap();
900 goto found;
901 }
902 if (!info->i_indirect)
903 goto lost2;
904
905 dir = shmem_dir_map(info->i_indirect);
906 stage = SHMEM_NR_DIRECT + ENTRIES_PER_PAGEPAGE/2;
907
908 for (idx = SHMEM_NR_DIRECT; idx < limit; idx += ENTRIES_PER_PAGE, dir++) {
909 if (unlikely(idx == stage)) {
910 shmem_dir_unmap(dir-1);
911 if (cond_resched_lock(&info->lock)) {
912 /* check it has not been truncated */
913 if (limit > info->next_index) {
914 limit = info->next_index;
915 if (idx >= limit)
916 goto lost2;
917 }
918 }
919 dir = shmem_dir_map(info->i_indirect) +
920 ENTRIES_PER_PAGE/2 + idx/ENTRIES_PER_PAGEPAGE;
921 while (!*dir) {
922 dir++;
923 idx += ENTRIES_PER_PAGEPAGE;
924 if (idx >= limit)
925 goto lost1;
926 }
927 stage = idx + ENTRIES_PER_PAGEPAGE;
928 subdir = *dir;
929 shmem_dir_unmap(dir);
930 dir = shmem_dir_map(subdir);
931 }
932 subdir = *dir;
933 if (subdir && page_private(subdir)) {
934 ptr = shmem_swp_map(subdir);
935 size = limit - idx;
936 if (size > ENTRIES_PER_PAGE)
937 size = ENTRIES_PER_PAGE;
938 offset = shmem_find_swp(entry, ptr, ptr+size);
939 shmem_swp_unmap(ptr);
940 if (offset >= 0) {
941 shmem_dir_unmap(dir);
942 ptr = shmem_swp_map(subdir);
943 goto found;
944 }
945 }
946 }
947lost1:
948 shmem_dir_unmap(dir-1);
949lost2:
950 spin_unlock(&info->lock);
951 return 0; 368 return 0;
952found: 369found:
953 idx += offset; 370 spin_lock(&info->lock);
954 ptr += offset; 371 if (shmem_get_swap(info, idx).val != entry.val) {
372 spin_unlock(&info->lock);
373 return 0;
374 }
955 375
956 /* 376 /*
957 * Move _head_ to start search for next from here. 377 * Move _head_ to start search for next from here.
958 * But be careful: shmem_evict_inode checks list_empty without taking 378 * But be careful: shmem_evict_inode checks list_empty without taking
959 * mutex, and there's an instant in list_move_tail when info->swaplist 379 * mutex, and there's an instant in list_move_tail when info->swaplist
960 * would appear empty, if it were the only one on shmem_swaplist. We 380 * would appear empty, if it were the only one on shmem_swaplist.
961 * could avoid doing it if inode NULL; or use this minor optimization.
962 */ 381 */
963 if (shmem_swaplist.next != &info->swaplist) 382 if (shmem_swaplist.next != &info->swaplist)
964 list_move_tail(&shmem_swaplist, &info->swaplist); 383 list_move_tail(&shmem_swaplist, &info->swaplist);
@@ -968,19 +387,17 @@ found:
968 * but also to hold up shmem_evict_inode(): so inode cannot be freed 387 * but also to hold up shmem_evict_inode(): so inode cannot be freed
969 * beneath us (pagelock doesn't help until the page is in pagecache). 388 * beneath us (pagelock doesn't help until the page is in pagecache).
970 */ 389 */
971 mapping = info->vfs_inode.i_mapping;
972 error = add_to_page_cache_locked(page, mapping, idx, GFP_NOWAIT); 390 error = add_to_page_cache_locked(page, mapping, idx, GFP_NOWAIT);
973 /* which does mem_cgroup_uncharge_cache_page on error */ 391 /* which does mem_cgroup_uncharge_cache_page on error */
974 392
975 if (error != -ENOMEM) { 393 if (error != -ENOMEM) {
976 delete_from_swap_cache(page); 394 delete_from_swap_cache(page);
977 set_page_dirty(page); 395 set_page_dirty(page);
978 info->flags |= SHMEM_PAGEIN; 396 shmem_put_swap(info, idx, (swp_entry_t){0});
979 shmem_swp_set(info, ptr, 0); 397 info->swapped--;
980 swap_free(entry); 398 swap_free(entry);
981 error = 1; /* not an error, but entry was found */ 399 error = 1; /* not an error, but entry was found */
982 } 400 }
983 shmem_swp_unmap(ptr);
984 spin_unlock(&info->lock); 401 spin_unlock(&info->lock);
985 return error; 402 return error;
986} 403}
@@ -1017,7 +434,14 @@ int shmem_unuse(swp_entry_t entry, struct page *page)
1017 mutex_lock(&shmem_swaplist_mutex); 434 mutex_lock(&shmem_swaplist_mutex);
1018 list_for_each_safe(p, next, &shmem_swaplist) { 435 list_for_each_safe(p, next, &shmem_swaplist) {
1019 info = list_entry(p, struct shmem_inode_info, swaplist); 436 info = list_entry(p, struct shmem_inode_info, swaplist);
1020 found = shmem_unuse_inode(info, entry, page); 437 if (!info->swapped) {
438 spin_lock(&info->lock);
439 if (!info->swapped)
440 list_del_init(&info->swaplist);
441 spin_unlock(&info->lock);
442 }
443 if (info->swapped)
444 found = shmem_unuse_inode(info, entry, page);
1021 cond_resched(); 445 cond_resched();
1022 if (found) 446 if (found)
1023 break; 447 break;
@@ -1041,7 +465,7 @@ out:
1041static int shmem_writepage(struct page *page, struct writeback_control *wbc) 465static int shmem_writepage(struct page *page, struct writeback_control *wbc)
1042{ 466{
1043 struct shmem_inode_info *info; 467 struct shmem_inode_info *info;
1044 swp_entry_t *entry, swap; 468 swp_entry_t swap, oswap;
1045 struct address_space *mapping; 469 struct address_space *mapping;
1046 unsigned long index; 470 unsigned long index;
1047 struct inode *inode; 471 struct inode *inode;
@@ -1067,6 +491,15 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
1067 WARN_ON_ONCE(1); /* Still happens? Tell us about it! */ 491 WARN_ON_ONCE(1); /* Still happens? Tell us about it! */
1068 goto redirty; 492 goto redirty;
1069 } 493 }
494
495 /*
496 * Just for this patch, we have a toy implementation,
497 * which can swap out only the first SHMEM_NR_DIRECT pages:
498 * for simple demonstration of where we need to think about swap.
499 */
500 if (index >= SHMEM_NR_DIRECT)
501 goto redirty;
502
1070 swap = get_swap_page(); 503 swap = get_swap_page();
1071 if (!swap.val) 504 if (!swap.val)
1072 goto redirty; 505 goto redirty;
@@ -1087,22 +520,19 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
1087 spin_lock(&info->lock); 520 spin_lock(&info->lock);
1088 mutex_unlock(&shmem_swaplist_mutex); 521 mutex_unlock(&shmem_swaplist_mutex);
1089 522
1090 if (index >= info->next_index) { 523 oswap = shmem_get_swap(info, index);
1091 BUG_ON(!(info->flags & SHMEM_TRUNCATE)); 524 if (oswap.val) {
1092 goto unlock;
1093 }
1094 entry = shmem_swp_entry(info, index, NULL);
1095 if (entry->val) {
1096 WARN_ON_ONCE(1); /* Still happens? Tell us about it! */ 525 WARN_ON_ONCE(1); /* Still happens? Tell us about it! */
1097 free_swap_and_cache(*entry); 526 free_swap_and_cache(oswap);
1098 shmem_swp_set(info, entry, 0); 527 shmem_put_swap(info, index, (swp_entry_t){0});
528 info->swapped--;
1099 } 529 }
1100 shmem_recalc_inode(inode); 530 shmem_recalc_inode(inode);
1101 531
1102 if (add_to_swap_cache(page, swap, GFP_ATOMIC) == 0) { 532 if (add_to_swap_cache(page, swap, GFP_ATOMIC) == 0) {
1103 delete_from_page_cache(page); 533 delete_from_page_cache(page);
1104 shmem_swp_set(info, entry, swap.val); 534 shmem_put_swap(info, index, swap);
1105 shmem_swp_unmap(entry); 535 info->swapped++;
1106 swap_shmem_alloc(swap); 536 swap_shmem_alloc(swap);
1107 spin_unlock(&info->lock); 537 spin_unlock(&info->lock);
1108 BUG_ON(page_mapped(page)); 538 BUG_ON(page_mapped(page));
@@ -1110,13 +540,7 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
1110 return 0; 540 return 0;
1111 } 541 }
1112 542
1113 shmem_swp_unmap(entry);
1114unlock:
1115 spin_unlock(&info->lock); 543 spin_unlock(&info->lock);
1116 /*
1117 * add_to_swap_cache() doesn't return -EEXIST, so we can safely
1118 * clear SWAP_HAS_CACHE flag.
1119 */
1120 swapcache_free(swap, NULL); 544 swapcache_free(swap, NULL);
1121redirty: 545redirty:
1122 set_page_dirty(page); 546 set_page_dirty(page);
@@ -1230,12 +654,10 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t idx,
1230 struct shmem_sb_info *sbinfo; 654 struct shmem_sb_info *sbinfo;
1231 struct page *page; 655 struct page *page;
1232 struct page *prealloc_page = NULL; 656 struct page *prealloc_page = NULL;
1233 swp_entry_t *entry;
1234 swp_entry_t swap; 657 swp_entry_t swap;
1235 int error; 658 int error;
1236 int ret;
1237 659
1238 if (idx >= SHMEM_MAX_INDEX) 660 if (idx > (MAX_LFS_FILESIZE >> PAGE_CACHE_SHIFT))
1239 return -EFBIG; 661 return -EFBIG;
1240repeat: 662repeat:
1241 page = find_lock_page(mapping, idx); 663 page = find_lock_page(mapping, idx);
@@ -1272,37 +694,22 @@ repeat:
1272 694
1273 spin_lock(&info->lock); 695 spin_lock(&info->lock);
1274 shmem_recalc_inode(inode); 696 shmem_recalc_inode(inode);
1275 entry = shmem_swp_alloc(info, idx, sgp, gfp); 697 swap = shmem_get_swap(info, idx);
1276 if (IS_ERR(entry)) {
1277 spin_unlock(&info->lock);
1278 error = PTR_ERR(entry);
1279 goto out;
1280 }
1281 swap = *entry;
1282
1283 if (swap.val) { 698 if (swap.val) {
1284 /* Look it up and read it in.. */ 699 /* Look it up and read it in.. */
1285 page = lookup_swap_cache(swap); 700 page = lookup_swap_cache(swap);
1286 if (!page) { 701 if (!page) {
1287 shmem_swp_unmap(entry);
1288 spin_unlock(&info->lock); 702 spin_unlock(&info->lock);
1289 /* here we actually do the io */ 703 /* here we actually do the io */
1290 if (fault_type) 704 if (fault_type)
1291 *fault_type |= VM_FAULT_MAJOR; 705 *fault_type |= VM_FAULT_MAJOR;
1292 page = shmem_swapin(swap, gfp, info, idx); 706 page = shmem_swapin(swap, gfp, info, idx);
1293 if (!page) { 707 if (!page) {
1294 spin_lock(&info->lock); 708 swp_entry_t nswap = shmem_get_swap(info, idx);
1295 entry = shmem_swp_alloc(info, idx, sgp, gfp); 709 if (nswap.val == swap.val) {
1296 if (IS_ERR(entry)) 710 error = -ENOMEM;
1297 error = PTR_ERR(entry);
1298 else {
1299 if (entry->val == swap.val)
1300 error = -ENOMEM;
1301 shmem_swp_unmap(entry);
1302 }
1303 spin_unlock(&info->lock);
1304 if (error)
1305 goto out; 711 goto out;
712 }
1306 goto repeat; 713 goto repeat;
1307 } 714 }
1308 wait_on_page_locked(page); 715 wait_on_page_locked(page);
@@ -1312,14 +719,12 @@ repeat:
1312 719
1313 /* We have to do this with page locked to prevent races */ 720 /* We have to do this with page locked to prevent races */
1314 if (!trylock_page(page)) { 721 if (!trylock_page(page)) {
1315 shmem_swp_unmap(entry);
1316 spin_unlock(&info->lock); 722 spin_unlock(&info->lock);
1317 wait_on_page_locked(page); 723 wait_on_page_locked(page);
1318 page_cache_release(page); 724 page_cache_release(page);
1319 goto repeat; 725 goto repeat;
1320 } 726 }
1321 if (PageWriteback(page)) { 727 if (PageWriteback(page)) {
1322 shmem_swp_unmap(entry);
1323 spin_unlock(&info->lock); 728 spin_unlock(&info->lock);
1324 wait_on_page_writeback(page); 729 wait_on_page_writeback(page);
1325 unlock_page(page); 730 unlock_page(page);
@@ -1327,7 +732,6 @@ repeat:
1327 goto repeat; 732 goto repeat;
1328 } 733 }
1329 if (!PageUptodate(page)) { 734 if (!PageUptodate(page)) {
1330 shmem_swp_unmap(entry);
1331 spin_unlock(&info->lock); 735 spin_unlock(&info->lock);
1332 unlock_page(page); 736 unlock_page(page);
1333 page_cache_release(page); 737 page_cache_release(page);
@@ -1338,7 +742,6 @@ repeat:
1338 error = add_to_page_cache_locked(page, mapping, 742 error = add_to_page_cache_locked(page, mapping,
1339 idx, GFP_NOWAIT); 743 idx, GFP_NOWAIT);
1340 if (error) { 744 if (error) {
1341 shmem_swp_unmap(entry);
1342 spin_unlock(&info->lock); 745 spin_unlock(&info->lock);
1343 if (error == -ENOMEM) { 746 if (error == -ENOMEM) {
1344 /* 747 /*
@@ -1358,16 +761,14 @@ repeat:
1358 goto repeat; 761 goto repeat;
1359 } 762 }
1360 763
1361 info->flags |= SHMEM_PAGEIN;
1362 shmem_swp_set(info, entry, 0);
1363 shmem_swp_unmap(entry);
1364 delete_from_swap_cache(page); 764 delete_from_swap_cache(page);
765 shmem_put_swap(info, idx, (swp_entry_t){0});
766 info->swapped--;
1365 spin_unlock(&info->lock); 767 spin_unlock(&info->lock);
1366 set_page_dirty(page); 768 set_page_dirty(page);
1367 swap_free(swap); 769 swap_free(swap);
1368 770
1369 } else if (sgp == SGP_READ) { 771 } else if (sgp == SGP_READ) {
1370 shmem_swp_unmap(entry);
1371 page = find_get_page(mapping, idx); 772 page = find_get_page(mapping, idx);
1372 if (page && !trylock_page(page)) { 773 if (page && !trylock_page(page)) {
1373 spin_unlock(&info->lock); 774 spin_unlock(&info->lock);
@@ -1378,7 +779,6 @@ repeat:
1378 spin_unlock(&info->lock); 779 spin_unlock(&info->lock);
1379 780
1380 } else if (prealloc_page) { 781 } else if (prealloc_page) {
1381 shmem_swp_unmap(entry);
1382 sbinfo = SHMEM_SB(inode->i_sb); 782 sbinfo = SHMEM_SB(inode->i_sb);
1383 if (sbinfo->max_blocks) { 783 if (sbinfo->max_blocks) {
1384 if (percpu_counter_compare(&sbinfo->used_blocks, 784 if (percpu_counter_compare(&sbinfo->used_blocks,
@@ -1393,34 +793,24 @@ repeat:
1393 page = prealloc_page; 793 page = prealloc_page;
1394 prealloc_page = NULL; 794 prealloc_page = NULL;
1395 795
1396 entry = shmem_swp_alloc(info, idx, sgp, gfp); 796 swap = shmem_get_swap(info, idx);
1397 if (IS_ERR(entry)) 797 if (swap.val)
1398 error = PTR_ERR(entry);
1399 else {
1400 swap = *entry;
1401 shmem_swp_unmap(entry);
1402 }
1403 ret = error || swap.val;
1404 if (ret)
1405 mem_cgroup_uncharge_cache_page(page); 798 mem_cgroup_uncharge_cache_page(page);
1406 else 799 else
1407 ret = add_to_page_cache_lru(page, mapping, 800 error = add_to_page_cache_lru(page, mapping,
1408 idx, GFP_NOWAIT); 801 idx, GFP_NOWAIT);
1409 /* 802 /*
1410 * At add_to_page_cache_lru() failure, 803 * At add_to_page_cache_lru() failure,
1411 * uncharge will be done automatically. 804 * uncharge will be done automatically.
1412 */ 805 */
1413 if (ret) { 806 if (swap.val || error) {
1414 shmem_unacct_blocks(info->flags, 1); 807 shmem_unacct_blocks(info->flags, 1);
1415 shmem_free_blocks(inode, 1); 808 shmem_free_blocks(inode, 1);
1416 spin_unlock(&info->lock); 809 spin_unlock(&info->lock);
1417 page_cache_release(page); 810 page_cache_release(page);
1418 if (error)
1419 goto out;
1420 goto repeat; 811 goto repeat;
1421 } 812 }
1422 813
1423 info->flags |= SHMEM_PAGEIN;
1424 info->alloced++; 814 info->alloced++;
1425 spin_unlock(&info->lock); 815 spin_unlock(&info->lock);
1426 clear_highpage(page); 816 clear_highpage(page);
@@ -2627,7 +2017,7 @@ int shmem_fill_super(struct super_block *sb, void *data, int silent)
2627 goto failed; 2017 goto failed;
2628 sbinfo->free_inodes = sbinfo->max_inodes; 2018 sbinfo->free_inodes = sbinfo->max_inodes;
2629 2019
2630 sb->s_maxbytes = SHMEM_MAX_BYTES; 2020 sb->s_maxbytes = MAX_LFS_FILESIZE;
2631 sb->s_blocksize = PAGE_CACHE_SIZE; 2021 sb->s_blocksize = PAGE_CACHE_SIZE;
2632 sb->s_blocksize_bits = PAGE_CACHE_SHIFT; 2022 sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
2633 sb->s_magic = TMPFS_MAGIC; 2023 sb->s_magic = TMPFS_MAGIC;
@@ -2863,7 +2253,7 @@ out4:
2863void mem_cgroup_get_shmem_target(struct inode *inode, pgoff_t pgoff, 2253void mem_cgroup_get_shmem_target(struct inode *inode, pgoff_t pgoff,
2864 struct page **pagep, swp_entry_t *ent) 2254 struct page **pagep, swp_entry_t *ent)
2865{ 2255{
2866 swp_entry_t entry = { .val = 0 }, *ptr; 2256 swp_entry_t entry = { .val = 0 };
2867 struct page *page = NULL; 2257 struct page *page = NULL;
2868 struct shmem_inode_info *info = SHMEM_I(inode); 2258 struct shmem_inode_info *info = SHMEM_I(inode);
2869 2259
@@ -2871,16 +2261,13 @@ void mem_cgroup_get_shmem_target(struct inode *inode, pgoff_t pgoff,
2871 goto out; 2261 goto out;
2872 2262
2873 spin_lock(&info->lock); 2263 spin_lock(&info->lock);
2874 ptr = shmem_swp_entry(info, pgoff, NULL);
2875#ifdef CONFIG_SWAP 2264#ifdef CONFIG_SWAP
2876 if (ptr && ptr->val) { 2265 entry = shmem_get_swap(info, pgoff);
2877 entry.val = ptr->val; 2266 if (entry.val)
2878 page = find_get_page(&swapper_space, entry.val); 2267 page = find_get_page(&swapper_space, entry.val);
2879 } else 2268 else
2880#endif 2269#endif
2881 page = find_get_page(inode->i_mapping, pgoff); 2270 page = find_get_page(inode->i_mapping, pgoff);
2882 if (ptr)
2883 shmem_swp_unmap(ptr);
2884 spin_unlock(&info->lock); 2271 spin_unlock(&info->lock);
2885out: 2272out:
2886 *pagep = page; 2273 *pagep = page;
@@ -2963,7 +2350,6 @@ out:
2963#define shmem_get_inode(sb, dir, mode, dev, flags) ramfs_get_inode(sb, dir, mode, dev) 2350#define shmem_get_inode(sb, dir, mode, dev, flags) ramfs_get_inode(sb, dir, mode, dev)
2964#define shmem_acct_size(flags, size) 0 2351#define shmem_acct_size(flags, size) 0
2965#define shmem_unacct_size(flags, size) do {} while (0) 2352#define shmem_unacct_size(flags, size) do {} while (0)
2966#define SHMEM_MAX_BYTES MAX_LFS_FILESIZE
2967 2353
2968#endif /* CONFIG_SHMEM */ 2354#endif /* CONFIG_SHMEM */
2969 2355
@@ -2987,7 +2373,7 @@ struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags
2987 if (IS_ERR(shm_mnt)) 2373 if (IS_ERR(shm_mnt))
2988 return (void *)shm_mnt; 2374 return (void *)shm_mnt;
2989 2375
2990 if (size < 0 || size > SHMEM_MAX_BYTES) 2376 if (size < 0 || size > MAX_LFS_FILESIZE)
2991 return ERR_PTR(-EINVAL); 2377 return ERR_PTR(-EINVAL);
2992 2378
2993 if (shmem_acct_size(flags, size)) 2379 if (shmem_acct_size(flags, size))