diff options
author | Pekka Enberg <penberg@kernel.org> | 2011-09-19 10:46:07 -0400 |
---|---|---|
committer | Pekka Enberg <penberg@kernel.org> | 2011-09-19 10:46:07 -0400 |
commit | d20bbfab01802e195a50435940f7e4aa747c217c (patch) | |
tree | 82b0007e33c083050a4e60a49dbb2f5477b4c99d /mm | |
parent | a37933c37c14b64e81c7c9cc44a5d3f5e0c91412 (diff) | |
parent | 136333d104bd3a62d783b0ac3d0f32ac0108c5d0 (diff) |
Merge branch 'slab/urgent' into slab/next
Diffstat (limited to 'mm')
-rw-r--r-- | mm/failslab.c | 14 | ||||
-rw-r--r-- | mm/filemap.c | 106 | ||||
-rw-r--r-- | mm/memcontrol.c | 66 | ||||
-rw-r--r-- | mm/memory-failure.c | 92 | ||||
-rw-r--r-- | mm/mincore.c | 11 | ||||
-rw-r--r-- | mm/oom_kill.c | 4 | ||||
-rw-r--r-- | mm/page_alloc.c | 13 | ||||
-rw-r--r-- | mm/shmem.c | 1493 | ||||
-rw-r--r-- | mm/slab.c | 99 | ||||
-rw-r--r-- | mm/slub.c | 22 | ||||
-rw-r--r-- | mm/swapfile.c | 20 | ||||
-rw-r--r-- | mm/truncate.c | 8 |
12 files changed, 783 insertions, 1165 deletions
diff --git a/mm/failslab.c b/mm/failslab.c index 1ce58c201dca..0dd7b8fec71c 100644 --- a/mm/failslab.c +++ b/mm/failslab.c | |||
@@ -34,23 +34,23 @@ __setup("failslab=", setup_failslab); | |||
34 | #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS | 34 | #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS |
35 | static int __init failslab_debugfs_init(void) | 35 | static int __init failslab_debugfs_init(void) |
36 | { | 36 | { |
37 | struct dentry *dir; | ||
37 | mode_t mode = S_IFREG | S_IRUSR | S_IWUSR; | 38 | mode_t mode = S_IFREG | S_IRUSR | S_IWUSR; |
38 | int err; | ||
39 | 39 | ||
40 | err = init_fault_attr_dentries(&failslab.attr, "failslab"); | 40 | dir = fault_create_debugfs_attr("failslab", NULL, &failslab.attr); |
41 | if (err) | 41 | if (IS_ERR(dir)) |
42 | return err; | 42 | return PTR_ERR(dir); |
43 | 43 | ||
44 | if (!debugfs_create_bool("ignore-gfp-wait", mode, failslab.attr.dir, | 44 | if (!debugfs_create_bool("ignore-gfp-wait", mode, dir, |
45 | &failslab.ignore_gfp_wait)) | 45 | &failslab.ignore_gfp_wait)) |
46 | goto fail; | 46 | goto fail; |
47 | if (!debugfs_create_bool("cache-filter", mode, failslab.attr.dir, | 47 | if (!debugfs_create_bool("cache-filter", mode, dir, |
48 | &failslab.cache_filter)) | 48 | &failslab.cache_filter)) |
49 | goto fail; | 49 | goto fail; |
50 | 50 | ||
51 | return 0; | 51 | return 0; |
52 | fail: | 52 | fail: |
53 | cleanup_fault_attr_dentries(&failslab.attr); | 53 | debugfs_remove_recursive(dir); |
54 | 54 | ||
55 | return -ENOMEM; | 55 | return -ENOMEM; |
56 | } | 56 | } |
diff --git a/mm/filemap.c b/mm/filemap.c index 867d40222ec7..645a080ba4df 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -33,7 +33,6 @@ | |||
33 | #include <linux/cpuset.h> | 33 | #include <linux/cpuset.h> |
34 | #include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */ | 34 | #include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */ |
35 | #include <linux/memcontrol.h> | 35 | #include <linux/memcontrol.h> |
36 | #include <linux/mm_inline.h> /* for page_is_file_cache() */ | ||
37 | #include <linux/cleancache.h> | 36 | #include <linux/cleancache.h> |
38 | #include "internal.h" | 37 | #include "internal.h" |
39 | 38 | ||
@@ -462,6 +461,7 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping, | |||
462 | int error; | 461 | int error; |
463 | 462 | ||
464 | VM_BUG_ON(!PageLocked(page)); | 463 | VM_BUG_ON(!PageLocked(page)); |
464 | VM_BUG_ON(PageSwapBacked(page)); | ||
465 | 465 | ||
466 | error = mem_cgroup_cache_charge(page, current->mm, | 466 | error = mem_cgroup_cache_charge(page, current->mm, |
467 | gfp_mask & GFP_RECLAIM_MASK); | 467 | gfp_mask & GFP_RECLAIM_MASK); |
@@ -479,8 +479,6 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping, | |||
479 | if (likely(!error)) { | 479 | if (likely(!error)) { |
480 | mapping->nrpages++; | 480 | mapping->nrpages++; |
481 | __inc_zone_page_state(page, NR_FILE_PAGES); | 481 | __inc_zone_page_state(page, NR_FILE_PAGES); |
482 | if (PageSwapBacked(page)) | ||
483 | __inc_zone_page_state(page, NR_SHMEM); | ||
484 | spin_unlock_irq(&mapping->tree_lock); | 482 | spin_unlock_irq(&mapping->tree_lock); |
485 | } else { | 483 | } else { |
486 | page->mapping = NULL; | 484 | page->mapping = NULL; |
@@ -502,22 +500,9 @@ int add_to_page_cache_lru(struct page *page, struct address_space *mapping, | |||
502 | { | 500 | { |
503 | int ret; | 501 | int ret; |
504 | 502 | ||
505 | /* | ||
506 | * Splice_read and readahead add shmem/tmpfs pages into the page cache | ||
507 | * before shmem_readpage has a chance to mark them as SwapBacked: they | ||
508 | * need to go on the anon lru below, and mem_cgroup_cache_charge | ||
509 | * (called in add_to_page_cache) needs to know where they're going too. | ||
510 | */ | ||
511 | if (mapping_cap_swap_backed(mapping)) | ||
512 | SetPageSwapBacked(page); | ||
513 | |||
514 | ret = add_to_page_cache(page, mapping, offset, gfp_mask); | 503 | ret = add_to_page_cache(page, mapping, offset, gfp_mask); |
515 | if (ret == 0) { | 504 | if (ret == 0) |
516 | if (page_is_file_cache(page)) | 505 | lru_cache_add_file(page); |
517 | lru_cache_add_file(page); | ||
518 | else | ||
519 | lru_cache_add_anon(page); | ||
520 | } | ||
521 | return ret; | 506 | return ret; |
522 | } | 507 | } |
523 | EXPORT_SYMBOL_GPL(add_to_page_cache_lru); | 508 | EXPORT_SYMBOL_GPL(add_to_page_cache_lru); |
@@ -714,9 +699,16 @@ repeat: | |||
714 | page = radix_tree_deref_slot(pagep); | 699 | page = radix_tree_deref_slot(pagep); |
715 | if (unlikely(!page)) | 700 | if (unlikely(!page)) |
716 | goto out; | 701 | goto out; |
717 | if (radix_tree_deref_retry(page)) | 702 | if (radix_tree_exception(page)) { |
718 | goto repeat; | 703 | if (radix_tree_deref_retry(page)) |
719 | 704 | goto repeat; | |
705 | /* | ||
706 | * Otherwise, shmem/tmpfs must be storing a swap entry | ||
707 | * here as an exceptional entry: so return it without | ||
708 | * attempting to raise page count. | ||
709 | */ | ||
710 | goto out; | ||
711 | } | ||
720 | if (!page_cache_get_speculative(page)) | 712 | if (!page_cache_get_speculative(page)) |
721 | goto repeat; | 713 | goto repeat; |
722 | 714 | ||
@@ -753,7 +745,7 @@ struct page *find_lock_page(struct address_space *mapping, pgoff_t offset) | |||
753 | 745 | ||
754 | repeat: | 746 | repeat: |
755 | page = find_get_page(mapping, offset); | 747 | page = find_get_page(mapping, offset); |
756 | if (page) { | 748 | if (page && !radix_tree_exception(page)) { |
757 | lock_page(page); | 749 | lock_page(page); |
758 | /* Has the page been truncated? */ | 750 | /* Has the page been truncated? */ |
759 | if (unlikely(page->mapping != mapping)) { | 751 | if (unlikely(page->mapping != mapping)) { |
@@ -840,7 +832,7 @@ unsigned find_get_pages(struct address_space *mapping, pgoff_t start, | |||
840 | rcu_read_lock(); | 832 | rcu_read_lock(); |
841 | restart: | 833 | restart: |
842 | nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree, | 834 | nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree, |
843 | (void ***)pages, start, nr_pages); | 835 | (void ***)pages, NULL, start, nr_pages); |
844 | ret = 0; | 836 | ret = 0; |
845 | for (i = 0; i < nr_found; i++) { | 837 | for (i = 0; i < nr_found; i++) { |
846 | struct page *page; | 838 | struct page *page; |
@@ -849,13 +841,22 @@ repeat: | |||
849 | if (unlikely(!page)) | 841 | if (unlikely(!page)) |
850 | continue; | 842 | continue; |
851 | 843 | ||
852 | /* | 844 | if (radix_tree_exception(page)) { |
853 | * This can only trigger when the entry at index 0 moves out | 845 | if (radix_tree_deref_retry(page)) { |
854 | * of or back to the root: none yet gotten, safe to restart. | 846 | /* |
855 | */ | 847 | * Transient condition which can only trigger |
856 | if (radix_tree_deref_retry(page)) { | 848 | * when entry at index 0 moves out of or back |
857 | WARN_ON(start | i); | 849 | * to root: none yet gotten, safe to restart. |
858 | goto restart; | 850 | */ |
851 | WARN_ON(start | i); | ||
852 | goto restart; | ||
853 | } | ||
854 | /* | ||
855 | * Otherwise, shmem/tmpfs must be storing a swap entry | ||
856 | * here as an exceptional entry: so skip over it - | ||
857 | * we only reach this from invalidate_mapping_pages(). | ||
858 | */ | ||
859 | continue; | ||
859 | } | 860 | } |
860 | 861 | ||
861 | if (!page_cache_get_speculative(page)) | 862 | if (!page_cache_get_speculative(page)) |
@@ -903,7 +904,7 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index, | |||
903 | rcu_read_lock(); | 904 | rcu_read_lock(); |
904 | restart: | 905 | restart: |
905 | nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree, | 906 | nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree, |
906 | (void ***)pages, index, nr_pages); | 907 | (void ***)pages, NULL, index, nr_pages); |
907 | ret = 0; | 908 | ret = 0; |
908 | for (i = 0; i < nr_found; i++) { | 909 | for (i = 0; i < nr_found; i++) { |
909 | struct page *page; | 910 | struct page *page; |
@@ -912,12 +913,22 @@ repeat: | |||
912 | if (unlikely(!page)) | 913 | if (unlikely(!page)) |
913 | continue; | 914 | continue; |
914 | 915 | ||
915 | /* | 916 | if (radix_tree_exception(page)) { |
916 | * This can only trigger when the entry at index 0 moves out | 917 | if (radix_tree_deref_retry(page)) { |
917 | * of or back to the root: none yet gotten, safe to restart. | 918 | /* |
918 | */ | 919 | * Transient condition which can only trigger |
919 | if (radix_tree_deref_retry(page)) | 920 | * when entry at index 0 moves out of or back |
920 | goto restart; | 921 | * to root: none yet gotten, safe to restart. |
922 | */ | ||
923 | goto restart; | ||
924 | } | ||
925 | /* | ||
926 | * Otherwise, shmem/tmpfs must be storing a swap entry | ||
927 | * here as an exceptional entry: so stop looking for | ||
928 | * contiguous pages. | ||
929 | */ | ||
930 | break; | ||
931 | } | ||
921 | 932 | ||
922 | if (!page_cache_get_speculative(page)) | 933 | if (!page_cache_get_speculative(page)) |
923 | goto repeat; | 934 | goto repeat; |
@@ -977,12 +988,21 @@ repeat: | |||
977 | if (unlikely(!page)) | 988 | if (unlikely(!page)) |
978 | continue; | 989 | continue; |
979 | 990 | ||
980 | /* | 991 | if (radix_tree_exception(page)) { |
981 | * This can only trigger when the entry at index 0 moves out | 992 | if (radix_tree_deref_retry(page)) { |
982 | * of or back to the root: none yet gotten, safe to restart. | 993 | /* |
983 | */ | 994 | * Transient condition which can only trigger |
984 | if (radix_tree_deref_retry(page)) | 995 | * when entry at index 0 moves out of or back |
985 | goto restart; | 996 | * to root: none yet gotten, safe to restart. |
997 | */ | ||
998 | goto restart; | ||
999 | } | ||
1000 | /* | ||
1001 | * This function is never used on a shmem/tmpfs | ||
1002 | * mapping, so a swap entry won't be found here. | ||
1003 | */ | ||
1004 | BUG(); | ||
1005 | } | ||
986 | 1006 | ||
987 | if (!page_cache_get_speculative(page)) | 1007 | if (!page_cache_get_speculative(page)) |
988 | goto repeat; | 1008 | goto repeat; |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 5f84d2351ddb..f4ec4e7ca4cd 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -35,7 +35,6 @@ | |||
35 | #include <linux/limits.h> | 35 | #include <linux/limits.h> |
36 | #include <linux/mutex.h> | 36 | #include <linux/mutex.h> |
37 | #include <linux/rbtree.h> | 37 | #include <linux/rbtree.h> |
38 | #include <linux/shmem_fs.h> | ||
39 | #include <linux/slab.h> | 38 | #include <linux/slab.h> |
40 | #include <linux/swap.h> | 39 | #include <linux/swap.h> |
41 | #include <linux/swapops.h> | 40 | #include <linux/swapops.h> |
@@ -2873,30 +2872,6 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, | |||
2873 | return 0; | 2872 | return 0; |
2874 | if (PageCompound(page)) | 2873 | if (PageCompound(page)) |
2875 | return 0; | 2874 | return 0; |
2876 | /* | ||
2877 | * Corner case handling. This is called from add_to_page_cache() | ||
2878 | * in usual. But some FS (shmem) precharges this page before calling it | ||
2879 | * and call add_to_page_cache() with GFP_NOWAIT. | ||
2880 | * | ||
2881 | * For GFP_NOWAIT case, the page may be pre-charged before calling | ||
2882 | * add_to_page_cache(). (See shmem.c) check it here and avoid to call | ||
2883 | * charge twice. (It works but has to pay a bit larger cost.) | ||
2884 | * And when the page is SwapCache, it should take swap information | ||
2885 | * into account. This is under lock_page() now. | ||
2886 | */ | ||
2887 | if (!(gfp_mask & __GFP_WAIT)) { | ||
2888 | struct page_cgroup *pc; | ||
2889 | |||
2890 | pc = lookup_page_cgroup(page); | ||
2891 | if (!pc) | ||
2892 | return 0; | ||
2893 | lock_page_cgroup(pc); | ||
2894 | if (PageCgroupUsed(pc)) { | ||
2895 | unlock_page_cgroup(pc); | ||
2896 | return 0; | ||
2897 | } | ||
2898 | unlock_page_cgroup(pc); | ||
2899 | } | ||
2900 | 2875 | ||
2901 | if (unlikely(!mm)) | 2876 | if (unlikely(!mm)) |
2902 | mm = &init_mm; | 2877 | mm = &init_mm; |
@@ -3486,31 +3461,6 @@ void mem_cgroup_end_migration(struct mem_cgroup *mem, | |||
3486 | cgroup_release_and_wakeup_rmdir(&mem->css); | 3461 | cgroup_release_and_wakeup_rmdir(&mem->css); |
3487 | } | 3462 | } |
3488 | 3463 | ||
3489 | /* | ||
3490 | * A call to try to shrink memory usage on charge failure at shmem's swapin. | ||
3491 | * Calling hierarchical_reclaim is not enough because we should update | ||
3492 | * last_oom_jiffies to prevent pagefault_out_of_memory from invoking global OOM. | ||
3493 | * Moreover considering hierarchy, we should reclaim from the mem_over_limit, | ||
3494 | * not from the memcg which this page would be charged to. | ||
3495 | * try_charge_swapin does all of these works properly. | ||
3496 | */ | ||
3497 | int mem_cgroup_shmem_charge_fallback(struct page *page, | ||
3498 | struct mm_struct *mm, | ||
3499 | gfp_t gfp_mask) | ||
3500 | { | ||
3501 | struct mem_cgroup *mem; | ||
3502 | int ret; | ||
3503 | |||
3504 | if (mem_cgroup_disabled()) | ||
3505 | return 0; | ||
3506 | |||
3507 | ret = mem_cgroup_try_charge_swapin(mm, page, gfp_mask, &mem); | ||
3508 | if (!ret) | ||
3509 | mem_cgroup_cancel_charge_swapin(mem); /* it does !mem check */ | ||
3510 | |||
3511 | return ret; | ||
3512 | } | ||
3513 | |||
3514 | #ifdef CONFIG_DEBUG_VM | 3464 | #ifdef CONFIG_DEBUG_VM |
3515 | static struct page_cgroup *lookup_page_cgroup_used(struct page *page) | 3465 | static struct page_cgroup *lookup_page_cgroup_used(struct page *page) |
3516 | { | 3466 | { |
@@ -5330,15 +5280,17 @@ static struct page *mc_handle_file_pte(struct vm_area_struct *vma, | |||
5330 | pgoff = pte_to_pgoff(ptent); | 5280 | pgoff = pte_to_pgoff(ptent); |
5331 | 5281 | ||
5332 | /* page is moved even if it's not RSS of this task(page-faulted). */ | 5282 | /* page is moved even if it's not RSS of this task(page-faulted). */ |
5333 | if (!mapping_cap_swap_backed(mapping)) { /* normal file */ | 5283 | page = find_get_page(mapping, pgoff); |
5334 | page = find_get_page(mapping, pgoff); | 5284 | |
5335 | } else { /* shmem/tmpfs file. we should take account of swap too. */ | 5285 | #ifdef CONFIG_SWAP |
5336 | swp_entry_t ent; | 5286 | /* shmem/tmpfs may report page out on swap: account for that too. */ |
5337 | mem_cgroup_get_shmem_target(inode, pgoff, &page, &ent); | 5287 | if (radix_tree_exceptional_entry(page)) { |
5288 | swp_entry_t swap = radix_to_swp_entry(page); | ||
5338 | if (do_swap_account) | 5289 | if (do_swap_account) |
5339 | entry->val = ent.val; | 5290 | *entry = swap; |
5291 | page = find_get_page(&swapper_space, swap.val); | ||
5340 | } | 5292 | } |
5341 | 5293 | #endif | |
5342 | return page; | 5294 | return page; |
5343 | } | 5295 | } |
5344 | 5296 | ||
diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 740c4f52059c..2b43ba051ac9 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c | |||
@@ -53,6 +53,7 @@ | |||
53 | #include <linux/hugetlb.h> | 53 | #include <linux/hugetlb.h> |
54 | #include <linux/memory_hotplug.h> | 54 | #include <linux/memory_hotplug.h> |
55 | #include <linux/mm_inline.h> | 55 | #include <linux/mm_inline.h> |
56 | #include <linux/kfifo.h> | ||
56 | #include "internal.h" | 57 | #include "internal.h" |
57 | 58 | ||
58 | int sysctl_memory_failure_early_kill __read_mostly = 0; | 59 | int sysctl_memory_failure_early_kill __read_mostly = 0; |
@@ -1178,6 +1179,97 @@ void memory_failure(unsigned long pfn, int trapno) | |||
1178 | __memory_failure(pfn, trapno, 0); | 1179 | __memory_failure(pfn, trapno, 0); |
1179 | } | 1180 | } |
1180 | 1181 | ||
1182 | #define MEMORY_FAILURE_FIFO_ORDER 4 | ||
1183 | #define MEMORY_FAILURE_FIFO_SIZE (1 << MEMORY_FAILURE_FIFO_ORDER) | ||
1184 | |||
1185 | struct memory_failure_entry { | ||
1186 | unsigned long pfn; | ||
1187 | int trapno; | ||
1188 | int flags; | ||
1189 | }; | ||
1190 | |||
1191 | struct memory_failure_cpu { | ||
1192 | DECLARE_KFIFO(fifo, struct memory_failure_entry, | ||
1193 | MEMORY_FAILURE_FIFO_SIZE); | ||
1194 | spinlock_t lock; | ||
1195 | struct work_struct work; | ||
1196 | }; | ||
1197 | |||
1198 | static DEFINE_PER_CPU(struct memory_failure_cpu, memory_failure_cpu); | ||
1199 | |||
1200 | /** | ||
1201 | * memory_failure_queue - Schedule handling memory failure of a page. | ||
1202 | * @pfn: Page Number of the corrupted page | ||
1203 | * @trapno: Trap number reported in the signal to user space. | ||
1204 | * @flags: Flags for memory failure handling | ||
1205 | * | ||
1206 | * This function is called by the low level hardware error handler | ||
1207 | * when it detects hardware memory corruption of a page. It schedules | ||
1208 | * the recovering of error page, including dropping pages, killing | ||
1209 | * processes etc. | ||
1210 | * | ||
1211 | * The function is primarily of use for corruptions that | ||
1212 | * happen outside the current execution context (e.g. when | ||
1213 | * detected by a background scrubber) | ||
1214 | * | ||
1215 | * Can run in IRQ context. | ||
1216 | */ | ||
1217 | void memory_failure_queue(unsigned long pfn, int trapno, int flags) | ||
1218 | { | ||
1219 | struct memory_failure_cpu *mf_cpu; | ||
1220 | unsigned long proc_flags; | ||
1221 | struct memory_failure_entry entry = { | ||
1222 | .pfn = pfn, | ||
1223 | .trapno = trapno, | ||
1224 | .flags = flags, | ||
1225 | }; | ||
1226 | |||
1227 | mf_cpu = &get_cpu_var(memory_failure_cpu); | ||
1228 | spin_lock_irqsave(&mf_cpu->lock, proc_flags); | ||
1229 | if (kfifo_put(&mf_cpu->fifo, &entry)) | ||
1230 | schedule_work_on(smp_processor_id(), &mf_cpu->work); | ||
1231 | else | ||
1232 | pr_err("Memory failure: buffer overflow when queuing memory failure at 0x%#lx\n", | ||
1233 | pfn); | ||
1234 | spin_unlock_irqrestore(&mf_cpu->lock, proc_flags); | ||
1235 | put_cpu_var(memory_failure_cpu); | ||
1236 | } | ||
1237 | EXPORT_SYMBOL_GPL(memory_failure_queue); | ||
1238 | |||
1239 | static void memory_failure_work_func(struct work_struct *work) | ||
1240 | { | ||
1241 | struct memory_failure_cpu *mf_cpu; | ||
1242 | struct memory_failure_entry entry = { 0, }; | ||
1243 | unsigned long proc_flags; | ||
1244 | int gotten; | ||
1245 | |||
1246 | mf_cpu = &__get_cpu_var(memory_failure_cpu); | ||
1247 | for (;;) { | ||
1248 | spin_lock_irqsave(&mf_cpu->lock, proc_flags); | ||
1249 | gotten = kfifo_get(&mf_cpu->fifo, &entry); | ||
1250 | spin_unlock_irqrestore(&mf_cpu->lock, proc_flags); | ||
1251 | if (!gotten) | ||
1252 | break; | ||
1253 | __memory_failure(entry.pfn, entry.trapno, entry.flags); | ||
1254 | } | ||
1255 | } | ||
1256 | |||
1257 | static int __init memory_failure_init(void) | ||
1258 | { | ||
1259 | struct memory_failure_cpu *mf_cpu; | ||
1260 | int cpu; | ||
1261 | |||
1262 | for_each_possible_cpu(cpu) { | ||
1263 | mf_cpu = &per_cpu(memory_failure_cpu, cpu); | ||
1264 | spin_lock_init(&mf_cpu->lock); | ||
1265 | INIT_KFIFO(mf_cpu->fifo); | ||
1266 | INIT_WORK(&mf_cpu->work, memory_failure_work_func); | ||
1267 | } | ||
1268 | |||
1269 | return 0; | ||
1270 | } | ||
1271 | core_initcall(memory_failure_init); | ||
1272 | |||
1181 | /** | 1273 | /** |
1182 | * unpoison_memory - Unpoison a previously poisoned page | 1274 | * unpoison_memory - Unpoison a previously poisoned page |
1183 | * @pfn: Page number of the to be unpoisoned page | 1275 | * @pfn: Page number of the to be unpoisoned page |
diff --git a/mm/mincore.c b/mm/mincore.c index a4e6b9d75c76..636a86876ff2 100644 --- a/mm/mincore.c +++ b/mm/mincore.c | |||
@@ -69,12 +69,15 @@ static unsigned char mincore_page(struct address_space *mapping, pgoff_t pgoff) | |||
69 | * file will not get a swp_entry_t in its pte, but rather it is like | 69 | * file will not get a swp_entry_t in its pte, but rather it is like |
70 | * any other file mapping (ie. marked !present and faulted in with | 70 | * any other file mapping (ie. marked !present and faulted in with |
71 | * tmpfs's .fault). So swapped out tmpfs mappings are tested here. | 71 | * tmpfs's .fault). So swapped out tmpfs mappings are tested here. |
72 | * | ||
73 | * However when tmpfs moves the page from pagecache and into swapcache, | ||
74 | * it is still in core, but the find_get_page below won't find it. | ||
75 | * No big deal, but make a note of it. | ||
76 | */ | 72 | */ |
77 | page = find_get_page(mapping, pgoff); | 73 | page = find_get_page(mapping, pgoff); |
74 | #ifdef CONFIG_SWAP | ||
75 | /* shmem/tmpfs may return swap: account for swapcache page too. */ | ||
76 | if (radix_tree_exceptional_entry(page)) { | ||
77 | swp_entry_t swap = radix_to_swp_entry(page); | ||
78 | page = find_get_page(&swapper_space, swap.val); | ||
79 | } | ||
80 | #endif | ||
78 | if (page) { | 81 | if (page) { |
79 | present = PageUptodate(page); | 82 | present = PageUptodate(page); |
80 | page_cache_release(page); | 83 | page_cache_release(page); |
diff --git a/mm/oom_kill.c b/mm/oom_kill.c index eafff89b3dd6..626303b52f3c 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c | |||
@@ -303,7 +303,7 @@ static struct task_struct *select_bad_process(unsigned int *ppoints, | |||
303 | do_each_thread(g, p) { | 303 | do_each_thread(g, p) { |
304 | unsigned int points; | 304 | unsigned int points; |
305 | 305 | ||
306 | if (!p->mm) | 306 | if (p->exit_state) |
307 | continue; | 307 | continue; |
308 | if (oom_unkillable_task(p, mem, nodemask)) | 308 | if (oom_unkillable_task(p, mem, nodemask)) |
309 | continue; | 309 | continue; |
@@ -319,6 +319,8 @@ static struct task_struct *select_bad_process(unsigned int *ppoints, | |||
319 | */ | 319 | */ |
320 | if (test_tsk_thread_flag(p, TIF_MEMDIE)) | 320 | if (test_tsk_thread_flag(p, TIF_MEMDIE)) |
321 | return ERR_PTR(-1UL); | 321 | return ERR_PTR(-1UL); |
322 | if (!p->mm) | ||
323 | continue; | ||
322 | 324 | ||
323 | if (p->flags & PF_EXITING) { | 325 | if (p->flags & PF_EXITING) { |
324 | /* | 326 | /* |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 1dbcf8888f14..6e8ecb6e021c 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -1409,14 +1409,11 @@ static int __init fail_page_alloc_debugfs(void) | |||
1409 | { | 1409 | { |
1410 | mode_t mode = S_IFREG | S_IRUSR | S_IWUSR; | 1410 | mode_t mode = S_IFREG | S_IRUSR | S_IWUSR; |
1411 | struct dentry *dir; | 1411 | struct dentry *dir; |
1412 | int err; | ||
1413 | 1412 | ||
1414 | err = init_fault_attr_dentries(&fail_page_alloc.attr, | 1413 | dir = fault_create_debugfs_attr("fail_page_alloc", NULL, |
1415 | "fail_page_alloc"); | 1414 | &fail_page_alloc.attr); |
1416 | if (err) | 1415 | if (IS_ERR(dir)) |
1417 | return err; | 1416 | return PTR_ERR(dir); |
1418 | |||
1419 | dir = fail_page_alloc.attr.dir; | ||
1420 | 1417 | ||
1421 | if (!debugfs_create_bool("ignore-gfp-wait", mode, dir, | 1418 | if (!debugfs_create_bool("ignore-gfp-wait", mode, dir, |
1422 | &fail_page_alloc.ignore_gfp_wait)) | 1419 | &fail_page_alloc.ignore_gfp_wait)) |
@@ -1430,7 +1427,7 @@ static int __init fail_page_alloc_debugfs(void) | |||
1430 | 1427 | ||
1431 | return 0; | 1428 | return 0; |
1432 | fail: | 1429 | fail: |
1433 | cleanup_fault_attr_dentries(&fail_page_alloc.attr); | 1430 | debugfs_remove_recursive(dir); |
1434 | 1431 | ||
1435 | return -ENOMEM; | 1432 | return -ENOMEM; |
1436 | } | 1433 | } |
diff --git a/mm/shmem.c b/mm/shmem.c index 5cc21f8b4cd3..32f6763f16fb 100644 --- a/mm/shmem.c +++ b/mm/shmem.c | |||
@@ -6,7 +6,8 @@ | |||
6 | * 2000-2001 Christoph Rohland | 6 | * 2000-2001 Christoph Rohland |
7 | * 2000-2001 SAP AG | 7 | * 2000-2001 SAP AG |
8 | * 2002 Red Hat Inc. | 8 | * 2002 Red Hat Inc. |
9 | * Copyright (C) 2002-2005 Hugh Dickins. | 9 | * Copyright (C) 2002-2011 Hugh Dickins. |
10 | * Copyright (C) 2011 Google Inc. | ||
10 | * Copyright (C) 2002-2005 VERITAS Software Corporation. | 11 | * Copyright (C) 2002-2005 VERITAS Software Corporation. |
11 | * Copyright (C) 2004 Andi Kleen, SuSE Labs | 12 | * Copyright (C) 2004 Andi Kleen, SuSE Labs |
12 | * | 13 | * |
@@ -28,7 +29,6 @@ | |||
28 | #include <linux/file.h> | 29 | #include <linux/file.h> |
29 | #include <linux/mm.h> | 30 | #include <linux/mm.h> |
30 | #include <linux/module.h> | 31 | #include <linux/module.h> |
31 | #include <linux/percpu_counter.h> | ||
32 | #include <linux/swap.h> | 32 | #include <linux/swap.h> |
33 | 33 | ||
34 | static struct vfsmount *shm_mnt; | 34 | static struct vfsmount *shm_mnt; |
@@ -51,6 +51,8 @@ static struct vfsmount *shm_mnt; | |||
51 | #include <linux/shmem_fs.h> | 51 | #include <linux/shmem_fs.h> |
52 | #include <linux/writeback.h> | 52 | #include <linux/writeback.h> |
53 | #include <linux/blkdev.h> | 53 | #include <linux/blkdev.h> |
54 | #include <linux/pagevec.h> | ||
55 | #include <linux/percpu_counter.h> | ||
54 | #include <linux/splice.h> | 56 | #include <linux/splice.h> |
55 | #include <linux/security.h> | 57 | #include <linux/security.h> |
56 | #include <linux/swapops.h> | 58 | #include <linux/swapops.h> |
@@ -63,43 +65,17 @@ static struct vfsmount *shm_mnt; | |||
63 | #include <linux/magic.h> | 65 | #include <linux/magic.h> |
64 | 66 | ||
65 | #include <asm/uaccess.h> | 67 | #include <asm/uaccess.h> |
66 | #include <asm/div64.h> | ||
67 | #include <asm/pgtable.h> | 68 | #include <asm/pgtable.h> |
68 | 69 | ||
69 | /* | ||
70 | * The maximum size of a shmem/tmpfs file is limited by the maximum size of | ||
71 | * its triple-indirect swap vector - see illustration at shmem_swp_entry(). | ||
72 | * | ||
73 | * With 4kB page size, maximum file size is just over 2TB on a 32-bit kernel, | ||
74 | * but one eighth of that on a 64-bit kernel. With 8kB page size, maximum | ||
75 | * file size is just over 4TB on a 64-bit kernel, but 16TB on a 32-bit kernel, | ||
76 | * MAX_LFS_FILESIZE being then more restrictive than swap vector layout. | ||
77 | * | ||
78 | * We use / and * instead of shifts in the definitions below, so that the swap | ||
79 | * vector can be tested with small even values (e.g. 20) for ENTRIES_PER_PAGE. | ||
80 | */ | ||
81 | #define ENTRIES_PER_PAGE (PAGE_CACHE_SIZE/sizeof(unsigned long)) | ||
82 | #define ENTRIES_PER_PAGEPAGE ((unsigned long long)ENTRIES_PER_PAGE*ENTRIES_PER_PAGE) | ||
83 | |||
84 | #define SHMSWP_MAX_INDEX (SHMEM_NR_DIRECT + (ENTRIES_PER_PAGEPAGE/2) * (ENTRIES_PER_PAGE+1)) | ||
85 | #define SHMSWP_MAX_BYTES (SHMSWP_MAX_INDEX << PAGE_CACHE_SHIFT) | ||
86 | |||
87 | #define SHMEM_MAX_BYTES min_t(unsigned long long, SHMSWP_MAX_BYTES, MAX_LFS_FILESIZE) | ||
88 | #define SHMEM_MAX_INDEX ((unsigned long)((SHMEM_MAX_BYTES+1) >> PAGE_CACHE_SHIFT)) | ||
89 | |||
90 | #define BLOCKS_PER_PAGE (PAGE_CACHE_SIZE/512) | 70 | #define BLOCKS_PER_PAGE (PAGE_CACHE_SIZE/512) |
91 | #define VM_ACCT(size) (PAGE_CACHE_ALIGN(size) >> PAGE_SHIFT) | 71 | #define VM_ACCT(size) (PAGE_CACHE_ALIGN(size) >> PAGE_SHIFT) |
92 | 72 | ||
93 | /* info->flags needs VM_flags to handle pagein/truncate races efficiently */ | ||
94 | #define SHMEM_PAGEIN VM_READ | ||
95 | #define SHMEM_TRUNCATE VM_WRITE | ||
96 | |||
97 | /* Definition to limit shmem_truncate's steps between cond_rescheds */ | ||
98 | #define LATENCY_LIMIT 64 | ||
99 | |||
100 | /* Pretend that each entry is of this size in directory's i_size */ | 73 | /* Pretend that each entry is of this size in directory's i_size */ |
101 | #define BOGO_DIRENT_SIZE 20 | 74 | #define BOGO_DIRENT_SIZE 20 |
102 | 75 | ||
76 | /* Symlink up to this size is kmalloc'ed instead of using a swappable page */ | ||
77 | #define SHORT_SYMLINK_LEN 128 | ||
78 | |||
103 | struct shmem_xattr { | 79 | struct shmem_xattr { |
104 | struct list_head list; /* anchored by shmem_inode_info->xattr_list */ | 80 | struct list_head list; /* anchored by shmem_inode_info->xattr_list */ |
105 | char *name; /* xattr name */ | 81 | char *name; /* xattr name */ |
@@ -107,7 +83,7 @@ struct shmem_xattr { | |||
107 | char value[0]; | 83 | char value[0]; |
108 | }; | 84 | }; |
109 | 85 | ||
110 | /* Flag allocation requirements to shmem_getpage and shmem_swp_alloc */ | 86 | /* Flag allocation requirements to shmem_getpage */ |
111 | enum sgp_type { | 87 | enum sgp_type { |
112 | SGP_READ, /* don't exceed i_size, don't allocate page */ | 88 | SGP_READ, /* don't exceed i_size, don't allocate page */ |
113 | SGP_CACHE, /* don't exceed i_size, may allocate page */ | 89 | SGP_CACHE, /* don't exceed i_size, may allocate page */ |
@@ -137,56 +113,6 @@ static inline int shmem_getpage(struct inode *inode, pgoff_t index, | |||
137 | mapping_gfp_mask(inode->i_mapping), fault_type); | 113 | mapping_gfp_mask(inode->i_mapping), fault_type); |
138 | } | 114 | } |
139 | 115 | ||
140 | static inline struct page *shmem_dir_alloc(gfp_t gfp_mask) | ||
141 | { | ||
142 | /* | ||
143 | * The above definition of ENTRIES_PER_PAGE, and the use of | ||
144 | * BLOCKS_PER_PAGE on indirect pages, assume PAGE_CACHE_SIZE: | ||
145 | * might be reconsidered if it ever diverges from PAGE_SIZE. | ||
146 | * | ||
147 | * Mobility flags are masked out as swap vectors cannot move | ||
148 | */ | ||
149 | return alloc_pages((gfp_mask & ~GFP_MOVABLE_MASK) | __GFP_ZERO, | ||
150 | PAGE_CACHE_SHIFT-PAGE_SHIFT); | ||
151 | } | ||
152 | |||
153 | static inline void shmem_dir_free(struct page *page) | ||
154 | { | ||
155 | __free_pages(page, PAGE_CACHE_SHIFT-PAGE_SHIFT); | ||
156 | } | ||
157 | |||
158 | static struct page **shmem_dir_map(struct page *page) | ||
159 | { | ||
160 | return (struct page **)kmap_atomic(page, KM_USER0); | ||
161 | } | ||
162 | |||
163 | static inline void shmem_dir_unmap(struct page **dir) | ||
164 | { | ||
165 | kunmap_atomic(dir, KM_USER0); | ||
166 | } | ||
167 | |||
168 | static swp_entry_t *shmem_swp_map(struct page *page) | ||
169 | { | ||
170 | return (swp_entry_t *)kmap_atomic(page, KM_USER1); | ||
171 | } | ||
172 | |||
173 | static inline void shmem_swp_balance_unmap(void) | ||
174 | { | ||
175 | /* | ||
176 | * When passing a pointer to an i_direct entry, to code which | ||
177 | * also handles indirect entries and so will shmem_swp_unmap, | ||
178 | * we must arrange for the preempt count to remain in balance. | ||
179 | * What kmap_atomic of a lowmem page does depends on config | ||
180 | * and architecture, so pretend to kmap_atomic some lowmem page. | ||
181 | */ | ||
182 | (void) kmap_atomic(ZERO_PAGE(0), KM_USER1); | ||
183 | } | ||
184 | |||
185 | static inline void shmem_swp_unmap(swp_entry_t *entry) | ||
186 | { | ||
187 | kunmap_atomic(entry, KM_USER1); | ||
188 | } | ||
189 | |||
190 | static inline struct shmem_sb_info *SHMEM_SB(struct super_block *sb) | 116 | static inline struct shmem_sb_info *SHMEM_SB(struct super_block *sb) |
191 | { | 117 | { |
192 | return sb->s_fs_info; | 118 | return sb->s_fs_info; |
@@ -244,15 +170,6 @@ static struct backing_dev_info shmem_backing_dev_info __read_mostly = { | |||
244 | static LIST_HEAD(shmem_swaplist); | 170 | static LIST_HEAD(shmem_swaplist); |
245 | static DEFINE_MUTEX(shmem_swaplist_mutex); | 171 | static DEFINE_MUTEX(shmem_swaplist_mutex); |
246 | 172 | ||
247 | static void shmem_free_blocks(struct inode *inode, long pages) | ||
248 | { | ||
249 | struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); | ||
250 | if (sbinfo->max_blocks) { | ||
251 | percpu_counter_add(&sbinfo->used_blocks, -pages); | ||
252 | inode->i_blocks -= pages*BLOCKS_PER_PAGE; | ||
253 | } | ||
254 | } | ||
255 | |||
256 | static int shmem_reserve_inode(struct super_block *sb) | 173 | static int shmem_reserve_inode(struct super_block *sb) |
257 | { | 174 | { |
258 | struct shmem_sb_info *sbinfo = SHMEM_SB(sb); | 175 | struct shmem_sb_info *sbinfo = SHMEM_SB(sb); |
@@ -279,7 +196,7 @@ static void shmem_free_inode(struct super_block *sb) | |||
279 | } | 196 | } |
280 | 197 | ||
281 | /** | 198 | /** |
282 | * shmem_recalc_inode - recalculate the size of an inode | 199 | * shmem_recalc_inode - recalculate the block usage of an inode |
283 | * @inode: inode to recalc | 200 | * @inode: inode to recalc |
284 | * | 201 | * |
285 | * We have to calculate the free blocks since the mm can drop | 202 | * We have to calculate the free blocks since the mm can drop |
@@ -297,474 +214,297 @@ static void shmem_recalc_inode(struct inode *inode) | |||
297 | 214 | ||
298 | freed = info->alloced - info->swapped - inode->i_mapping->nrpages; | 215 | freed = info->alloced - info->swapped - inode->i_mapping->nrpages; |
299 | if (freed > 0) { | 216 | if (freed > 0) { |
217 | struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); | ||
218 | if (sbinfo->max_blocks) | ||
219 | percpu_counter_add(&sbinfo->used_blocks, -freed); | ||
300 | info->alloced -= freed; | 220 | info->alloced -= freed; |
221 | inode->i_blocks -= freed * BLOCKS_PER_PAGE; | ||
301 | shmem_unacct_blocks(info->flags, freed); | 222 | shmem_unacct_blocks(info->flags, freed); |
302 | shmem_free_blocks(inode, freed); | ||
303 | } | 223 | } |
304 | } | 224 | } |
305 | 225 | ||
306 | /** | 226 | /* |
307 | * shmem_swp_entry - find the swap vector position in the info structure | 227 | * Replace item expected in radix tree by a new item, while holding tree lock. |
308 | * @info: info structure for the inode | ||
309 | * @index: index of the page to find | ||
310 | * @page: optional page to add to the structure. Has to be preset to | ||
311 | * all zeros | ||
312 | * | ||
313 | * If there is no space allocated yet it will return NULL when | ||
314 | * page is NULL, else it will use the page for the needed block, | ||
315 | * setting it to NULL on return to indicate that it has been used. | ||
316 | * | ||
317 | * The swap vector is organized the following way: | ||
318 | * | ||
319 | * There are SHMEM_NR_DIRECT entries directly stored in the | ||
320 | * shmem_inode_info structure. So small files do not need an addional | ||
321 | * allocation. | ||
322 | * | ||
323 | * For pages with index > SHMEM_NR_DIRECT there is the pointer | ||
324 | * i_indirect which points to a page which holds in the first half | ||
325 | * doubly indirect blocks, in the second half triple indirect blocks: | ||
326 | * | ||
327 | * For an artificial ENTRIES_PER_PAGE = 4 this would lead to the | ||
328 | * following layout (for SHMEM_NR_DIRECT == 16): | ||
329 | * | ||
330 | * i_indirect -> dir --> 16-19 | ||
331 | * | +-> 20-23 | ||
332 | * | | ||
333 | * +-->dir2 --> 24-27 | ||
334 | * | +-> 28-31 | ||
335 | * | +-> 32-35 | ||
336 | * | +-> 36-39 | ||
337 | * | | ||
338 | * +-->dir3 --> 40-43 | ||
339 | * +-> 44-47 | ||
340 | * +-> 48-51 | ||
341 | * +-> 52-55 | ||
342 | */ | 228 | */ |
343 | static swp_entry_t *shmem_swp_entry(struct shmem_inode_info *info, unsigned long index, struct page **page) | 229 | static int shmem_radix_tree_replace(struct address_space *mapping, |
344 | { | 230 | pgoff_t index, void *expected, void *replacement) |
345 | unsigned long offset; | 231 | { |
346 | struct page **dir; | 232 | void **pslot; |
347 | struct page *subdir; | 233 | void *item = NULL; |
348 | 234 | ||
349 | if (index < SHMEM_NR_DIRECT) { | 235 | VM_BUG_ON(!expected); |
350 | shmem_swp_balance_unmap(); | 236 | pslot = radix_tree_lookup_slot(&mapping->page_tree, index); |
351 | return info->i_direct+index; | 237 | if (pslot) |
352 | } | 238 | item = radix_tree_deref_slot_protected(pslot, |
353 | if (!info->i_indirect) { | 239 | &mapping->tree_lock); |
354 | if (page) { | 240 | if (item != expected) |
355 | info->i_indirect = *page; | 241 | return -ENOENT; |
356 | *page = NULL; | 242 | if (replacement) |
357 | } | 243 | radix_tree_replace_slot(pslot, replacement); |
358 | return NULL; /* need another page */ | 244 | else |
359 | } | 245 | radix_tree_delete(&mapping->page_tree, index); |
360 | 246 | return 0; | |
361 | index -= SHMEM_NR_DIRECT; | 247 | } |
362 | offset = index % ENTRIES_PER_PAGE; | ||
363 | index /= ENTRIES_PER_PAGE; | ||
364 | dir = shmem_dir_map(info->i_indirect); | ||
365 | |||
366 | if (index >= ENTRIES_PER_PAGE/2) { | ||
367 | index -= ENTRIES_PER_PAGE/2; | ||
368 | dir += ENTRIES_PER_PAGE/2 + index/ENTRIES_PER_PAGE; | ||
369 | index %= ENTRIES_PER_PAGE; | ||
370 | subdir = *dir; | ||
371 | if (!subdir) { | ||
372 | if (page) { | ||
373 | *dir = *page; | ||
374 | *page = NULL; | ||
375 | } | ||
376 | shmem_dir_unmap(dir); | ||
377 | return NULL; /* need another page */ | ||
378 | } | ||
379 | shmem_dir_unmap(dir); | ||
380 | dir = shmem_dir_map(subdir); | ||
381 | } | ||
382 | 248 | ||
383 | dir += index; | 249 | /* |
384 | subdir = *dir; | 250 | * Like add_to_page_cache_locked, but error if expected item has gone. |
385 | if (!subdir) { | 251 | */ |
386 | if (!page || !(subdir = *page)) { | 252 | static int shmem_add_to_page_cache(struct page *page, |
387 | shmem_dir_unmap(dir); | 253 | struct address_space *mapping, |
388 | return NULL; /* need a page */ | 254 | pgoff_t index, gfp_t gfp, void *expected) |
255 | { | ||
256 | int error = 0; | ||
257 | |||
258 | VM_BUG_ON(!PageLocked(page)); | ||
259 | VM_BUG_ON(!PageSwapBacked(page)); | ||
260 | |||
261 | if (!expected) | ||
262 | error = radix_tree_preload(gfp & GFP_RECLAIM_MASK); | ||
263 | if (!error) { | ||
264 | page_cache_get(page); | ||
265 | page->mapping = mapping; | ||
266 | page->index = index; | ||
267 | |||
268 | spin_lock_irq(&mapping->tree_lock); | ||
269 | if (!expected) | ||
270 | error = radix_tree_insert(&mapping->page_tree, | ||
271 | index, page); | ||
272 | else | ||
273 | error = shmem_radix_tree_replace(mapping, index, | ||
274 | expected, page); | ||
275 | if (!error) { | ||
276 | mapping->nrpages++; | ||
277 | __inc_zone_page_state(page, NR_FILE_PAGES); | ||
278 | __inc_zone_page_state(page, NR_SHMEM); | ||
279 | spin_unlock_irq(&mapping->tree_lock); | ||
280 | } else { | ||
281 | page->mapping = NULL; | ||
282 | spin_unlock_irq(&mapping->tree_lock); | ||
283 | page_cache_release(page); | ||
389 | } | 284 | } |
390 | *dir = subdir; | 285 | if (!expected) |
391 | *page = NULL; | 286 | radix_tree_preload_end(); |
392 | } | 287 | } |
393 | shmem_dir_unmap(dir); | 288 | if (error) |
394 | return shmem_swp_map(subdir) + offset; | 289 | mem_cgroup_uncharge_cache_page(page); |
290 | return error; | ||
395 | } | 291 | } |
396 | 292 | ||
397 | static void shmem_swp_set(struct shmem_inode_info *info, swp_entry_t *entry, unsigned long value) | 293 | /* |
294 | * Like delete_from_page_cache, but substitutes swap for page. | ||
295 | */ | ||
296 | static void shmem_delete_from_page_cache(struct page *page, void *radswap) | ||
398 | { | 297 | { |
399 | long incdec = value? 1: -1; | 298 | struct address_space *mapping = page->mapping; |
299 | int error; | ||
400 | 300 | ||
401 | entry->val = value; | 301 | spin_lock_irq(&mapping->tree_lock); |
402 | info->swapped += incdec; | 302 | error = shmem_radix_tree_replace(mapping, page->index, page, radswap); |
403 | if ((unsigned long)(entry - info->i_direct) >= SHMEM_NR_DIRECT) { | 303 | page->mapping = NULL; |
404 | struct page *page = kmap_atomic_to_page(entry); | 304 | mapping->nrpages--; |
405 | set_page_private(page, page_private(page) + incdec); | 305 | __dec_zone_page_state(page, NR_FILE_PAGES); |
406 | } | 306 | __dec_zone_page_state(page, NR_SHMEM); |
307 | spin_unlock_irq(&mapping->tree_lock); | ||
308 | page_cache_release(page); | ||
309 | BUG_ON(error); | ||
407 | } | 310 | } |
408 | 311 | ||
409 | /** | 312 | /* |
410 | * shmem_swp_alloc - get the position of the swap entry for the page. | 313 | * Like find_get_pages, but collecting swap entries as well as pages. |
411 | * @info: info structure for the inode | ||
412 | * @index: index of the page to find | ||
413 | * @sgp: check and recheck i_size? skip allocation? | ||
414 | * @gfp: gfp mask to use for any page allocation | ||
415 | * | ||
416 | * If the entry does not exist, allocate it. | ||
417 | */ | 314 | */ |
418 | static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, | 315 | static unsigned shmem_find_get_pages_and_swap(struct address_space *mapping, |
419 | unsigned long index, enum sgp_type sgp, gfp_t gfp) | 316 | pgoff_t start, unsigned int nr_pages, |
420 | { | 317 | struct page **pages, pgoff_t *indices) |
421 | struct inode *inode = &info->vfs_inode; | 318 | { |
422 | struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); | 319 | unsigned int i; |
423 | struct page *page = NULL; | 320 | unsigned int ret; |
424 | swp_entry_t *entry; | 321 | unsigned int nr_found; |
425 | 322 | ||
426 | if (sgp != SGP_WRITE && | 323 | rcu_read_lock(); |
427 | ((loff_t) index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) | 324 | restart: |
428 | return ERR_PTR(-EINVAL); | 325 | nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree, |
429 | 326 | (void ***)pages, indices, start, nr_pages); | |
430 | while (!(entry = shmem_swp_entry(info, index, &page))) { | 327 | ret = 0; |
431 | if (sgp == SGP_READ) | 328 | for (i = 0; i < nr_found; i++) { |
432 | return shmem_swp_map(ZERO_PAGE(0)); | 329 | struct page *page; |
433 | /* | 330 | repeat: |
434 | * Test used_blocks against 1 less max_blocks, since we have 1 data | 331 | page = radix_tree_deref_slot((void **)pages[i]); |
435 | * page (and perhaps indirect index pages) yet to allocate: | 332 | if (unlikely(!page)) |
436 | * a waste to allocate index if we cannot allocate data. | 333 | continue; |
437 | */ | 334 | if (radix_tree_exception(page)) { |
438 | if (sbinfo->max_blocks) { | 335 | if (radix_tree_deref_retry(page)) |
439 | if (percpu_counter_compare(&sbinfo->used_blocks, | 336 | goto restart; |
440 | sbinfo->max_blocks - 1) >= 0) | 337 | /* |
441 | return ERR_PTR(-ENOSPC); | 338 | * Otherwise, we must be storing a swap entry |
442 | percpu_counter_inc(&sbinfo->used_blocks); | 339 | * here as an exceptional entry: so return it |
443 | inode->i_blocks += BLOCKS_PER_PAGE; | 340 | * without attempting to raise page count. |
341 | */ | ||
342 | goto export; | ||
444 | } | 343 | } |
344 | if (!page_cache_get_speculative(page)) | ||
345 | goto repeat; | ||
445 | 346 | ||
446 | spin_unlock(&info->lock); | 347 | /* Has the page moved? */ |
447 | page = shmem_dir_alloc(gfp); | 348 | if (unlikely(page != *((void **)pages[i]))) { |
448 | spin_lock(&info->lock); | 349 | page_cache_release(page); |
449 | 350 | goto repeat; | |
450 | if (!page) { | ||
451 | shmem_free_blocks(inode, 1); | ||
452 | return ERR_PTR(-ENOMEM); | ||
453 | } | ||
454 | if (sgp != SGP_WRITE && | ||
455 | ((loff_t) index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) { | ||
456 | entry = ERR_PTR(-EINVAL); | ||
457 | break; | ||
458 | } | 351 | } |
459 | if (info->next_index <= index) | 352 | export: |
460 | info->next_index = index + 1; | 353 | indices[ret] = indices[i]; |
461 | } | 354 | pages[ret] = page; |
462 | if (page) { | 355 | ret++; |
463 | /* another task gave its page, or truncated the file */ | 356 | } |
464 | shmem_free_blocks(inode, 1); | 357 | if (unlikely(!ret && nr_found)) |
465 | shmem_dir_free(page); | 358 | goto restart; |
466 | } | 359 | rcu_read_unlock(); |
467 | if (info->next_index <= index && !IS_ERR(entry)) | 360 | return ret; |
468 | info->next_index = index + 1; | ||
469 | return entry; | ||
470 | } | 361 | } |
471 | 362 | ||
472 | /** | 363 | /* |
473 | * shmem_free_swp - free some swap entries in a directory | 364 | * Remove swap entry from radix tree, free the swap and its page cache. |
474 | * @dir: pointer to the directory | ||
475 | * @edir: pointer after last entry of the directory | ||
476 | * @punch_lock: pointer to spinlock when needed for the holepunch case | ||
477 | */ | 365 | */ |
478 | static int shmem_free_swp(swp_entry_t *dir, swp_entry_t *edir, | 366 | static int shmem_free_swap(struct address_space *mapping, |
479 | spinlock_t *punch_lock) | 367 | pgoff_t index, void *radswap) |
480 | { | 368 | { |
481 | spinlock_t *punch_unlock = NULL; | 369 | int error; |
482 | swp_entry_t *ptr; | 370 | |
483 | int freed = 0; | 371 | spin_lock_irq(&mapping->tree_lock); |
484 | 372 | error = shmem_radix_tree_replace(mapping, index, radswap, NULL); | |
485 | for (ptr = dir; ptr < edir; ptr++) { | 373 | spin_unlock_irq(&mapping->tree_lock); |
486 | if (ptr->val) { | 374 | if (!error) |
487 | if (unlikely(punch_lock)) { | 375 | free_swap_and_cache(radix_to_swp_entry(radswap)); |
488 | punch_unlock = punch_lock; | 376 | return error; |
489 | punch_lock = NULL; | ||
490 | spin_lock(punch_unlock); | ||
491 | if (!ptr->val) | ||
492 | continue; | ||
493 | } | ||
494 | free_swap_and_cache(*ptr); | ||
495 | *ptr = (swp_entry_t){0}; | ||
496 | freed++; | ||
497 | } | ||
498 | } | ||
499 | if (punch_unlock) | ||
500 | spin_unlock(punch_unlock); | ||
501 | return freed; | ||
502 | } | ||
503 | |||
504 | static int shmem_map_and_free_swp(struct page *subdir, int offset, | ||
505 | int limit, struct page ***dir, spinlock_t *punch_lock) | ||
506 | { | ||
507 | swp_entry_t *ptr; | ||
508 | int freed = 0; | ||
509 | |||
510 | ptr = shmem_swp_map(subdir); | ||
511 | for (; offset < limit; offset += LATENCY_LIMIT) { | ||
512 | int size = limit - offset; | ||
513 | if (size > LATENCY_LIMIT) | ||
514 | size = LATENCY_LIMIT; | ||
515 | freed += shmem_free_swp(ptr+offset, ptr+offset+size, | ||
516 | punch_lock); | ||
517 | if (need_resched()) { | ||
518 | shmem_swp_unmap(ptr); | ||
519 | if (*dir) { | ||
520 | shmem_dir_unmap(*dir); | ||
521 | *dir = NULL; | ||
522 | } | ||
523 | cond_resched(); | ||
524 | ptr = shmem_swp_map(subdir); | ||
525 | } | ||
526 | } | ||
527 | shmem_swp_unmap(ptr); | ||
528 | return freed; | ||
529 | } | 377 | } |
530 | 378 | ||
531 | static void shmem_free_pages(struct list_head *next) | 379 | /* |
380 | * Pagevec may contain swap entries, so shuffle up pages before releasing. | ||
381 | */ | ||
382 | static void shmem_pagevec_release(struct pagevec *pvec) | ||
532 | { | 383 | { |
533 | struct page *page; | 384 | int i, j; |
534 | int freed = 0; | 385 | |
535 | 386 | for (i = 0, j = 0; i < pagevec_count(pvec); i++) { | |
536 | do { | 387 | struct page *page = pvec->pages[i]; |
537 | page = container_of(next, struct page, lru); | 388 | if (!radix_tree_exceptional_entry(page)) |
538 | next = next->next; | 389 | pvec->pages[j++] = page; |
539 | shmem_dir_free(page); | 390 | } |
540 | freed++; | 391 | pvec->nr = j; |
541 | if (freed >= LATENCY_LIMIT) { | 392 | pagevec_release(pvec); |
542 | cond_resched(); | ||
543 | freed = 0; | ||
544 | } | ||
545 | } while (next); | ||
546 | } | 393 | } |
547 | 394 | ||
548 | void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end) | 395 | /* |
396 | * Remove range of pages and swap entries from radix tree, and free them. | ||
397 | */ | ||
398 | void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend) | ||
549 | { | 399 | { |
400 | struct address_space *mapping = inode->i_mapping; | ||
550 | struct shmem_inode_info *info = SHMEM_I(inode); | 401 | struct shmem_inode_info *info = SHMEM_I(inode); |
551 | unsigned long idx; | 402 | pgoff_t start = (lstart + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; |
552 | unsigned long size; | 403 | unsigned partial = lstart & (PAGE_CACHE_SIZE - 1); |
553 | unsigned long limit; | 404 | pgoff_t end = (lend >> PAGE_CACHE_SHIFT); |
554 | unsigned long stage; | 405 | struct pagevec pvec; |
555 | unsigned long diroff; | 406 | pgoff_t indices[PAGEVEC_SIZE]; |
556 | struct page **dir; | ||
557 | struct page *topdir; | ||
558 | struct page *middir; | ||
559 | struct page *subdir; | ||
560 | swp_entry_t *ptr; | ||
561 | LIST_HEAD(pages_to_free); | ||
562 | long nr_pages_to_free = 0; | ||
563 | long nr_swaps_freed = 0; | 407 | long nr_swaps_freed = 0; |
564 | int offset; | 408 | pgoff_t index; |
565 | int freed; | 409 | int i; |
566 | int punch_hole; | ||
567 | spinlock_t *needs_lock; | ||
568 | spinlock_t *punch_lock; | ||
569 | unsigned long upper_limit; | ||
570 | 410 | ||
571 | truncate_inode_pages_range(inode->i_mapping, start, end); | 411 | BUG_ON((lend & (PAGE_CACHE_SIZE - 1)) != (PAGE_CACHE_SIZE - 1)); |
572 | 412 | ||
573 | inode->i_ctime = inode->i_mtime = CURRENT_TIME; | 413 | pagevec_init(&pvec, 0); |
574 | idx = (start + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 414 | index = start; |
575 | if (idx >= info->next_index) | 415 | while (index <= end) { |
576 | return; | 416 | pvec.nr = shmem_find_get_pages_and_swap(mapping, index, |
417 | min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1, | ||
418 | pvec.pages, indices); | ||
419 | if (!pvec.nr) | ||
420 | break; | ||
421 | mem_cgroup_uncharge_start(); | ||
422 | for (i = 0; i < pagevec_count(&pvec); i++) { | ||
423 | struct page *page = pvec.pages[i]; | ||
577 | 424 | ||
578 | spin_lock(&info->lock); | 425 | index = indices[i]; |
579 | info->flags |= SHMEM_TRUNCATE; | 426 | if (index > end) |
580 | if (likely(end == (loff_t) -1)) { | 427 | break; |
581 | limit = info->next_index; | 428 | |
582 | upper_limit = SHMEM_MAX_INDEX; | 429 | if (radix_tree_exceptional_entry(page)) { |
583 | info->next_index = idx; | 430 | nr_swaps_freed += !shmem_free_swap(mapping, |
584 | needs_lock = NULL; | 431 | index, page); |
585 | punch_hole = 0; | 432 | continue; |
586 | } else { | 433 | } |
587 | if (end + 1 >= inode->i_size) { /* we may free a little more */ | ||
588 | limit = (inode->i_size + PAGE_CACHE_SIZE - 1) >> | ||
589 | PAGE_CACHE_SHIFT; | ||
590 | upper_limit = SHMEM_MAX_INDEX; | ||
591 | } else { | ||
592 | limit = (end + 1) >> PAGE_CACHE_SHIFT; | ||
593 | upper_limit = limit; | ||
594 | } | ||
595 | needs_lock = &info->lock; | ||
596 | punch_hole = 1; | ||
597 | } | ||
598 | 434 | ||
599 | topdir = info->i_indirect; | 435 | if (!trylock_page(page)) |
600 | if (topdir && idx <= SHMEM_NR_DIRECT && !punch_hole) { | 436 | continue; |
601 | info->i_indirect = NULL; | 437 | if (page->mapping == mapping) { |
602 | nr_pages_to_free++; | 438 | VM_BUG_ON(PageWriteback(page)); |
603 | list_add(&topdir->lru, &pages_to_free); | 439 | truncate_inode_page(mapping, page); |
440 | } | ||
441 | unlock_page(page); | ||
442 | } | ||
443 | shmem_pagevec_release(&pvec); | ||
444 | mem_cgroup_uncharge_end(); | ||
445 | cond_resched(); | ||
446 | index++; | ||
604 | } | 447 | } |
605 | spin_unlock(&info->lock); | ||
606 | 448 | ||
607 | if (info->swapped && idx < SHMEM_NR_DIRECT) { | 449 | if (partial) { |
608 | ptr = info->i_direct; | 450 | struct page *page = NULL; |
609 | size = limit; | 451 | shmem_getpage(inode, start - 1, &page, SGP_READ, NULL); |
610 | if (size > SHMEM_NR_DIRECT) | 452 | if (page) { |
611 | size = SHMEM_NR_DIRECT; | 453 | zero_user_segment(page, partial, PAGE_CACHE_SIZE); |
612 | nr_swaps_freed = shmem_free_swp(ptr+idx, ptr+size, needs_lock); | 454 | set_page_dirty(page); |
455 | unlock_page(page); | ||
456 | page_cache_release(page); | ||
457 | } | ||
613 | } | 458 | } |
614 | 459 | ||
615 | /* | 460 | index = start; |
616 | * If there are no indirect blocks or we are punching a hole | 461 | for ( ; ; ) { |
617 | * below indirect blocks, nothing to be done. | 462 | cond_resched(); |
618 | */ | 463 | pvec.nr = shmem_find_get_pages_and_swap(mapping, index, |
619 | if (!topdir || limit <= SHMEM_NR_DIRECT) | 464 | min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1, |
620 | goto done2; | 465 | pvec.pages, indices); |
466 | if (!pvec.nr) { | ||
467 | if (index == start) | ||
468 | break; | ||
469 | index = start; | ||
470 | continue; | ||
471 | } | ||
472 | if (index == start && indices[0] > end) { | ||
473 | shmem_pagevec_release(&pvec); | ||
474 | break; | ||
475 | } | ||
476 | mem_cgroup_uncharge_start(); | ||
477 | for (i = 0; i < pagevec_count(&pvec); i++) { | ||
478 | struct page *page = pvec.pages[i]; | ||
621 | 479 | ||
622 | /* | 480 | index = indices[i]; |
623 | * The truncation case has already dropped info->lock, and we're safe | 481 | if (index > end) |
624 | * because i_size and next_index have already been lowered, preventing | 482 | break; |
625 | * access beyond. But in the punch_hole case, we still need to take | ||
626 | * the lock when updating the swap directory, because there might be | ||
627 | * racing accesses by shmem_getpage(SGP_CACHE), shmem_unuse_inode or | ||
628 | * shmem_writepage. However, whenever we find we can remove a whole | ||
629 | * directory page (not at the misaligned start or end of the range), | ||
630 | * we first NULLify its pointer in the level above, and then have no | ||
631 | * need to take the lock when updating its contents: needs_lock and | ||
632 | * punch_lock (either pointing to info->lock or NULL) manage this. | ||
633 | */ | ||
634 | 483 | ||
635 | upper_limit -= SHMEM_NR_DIRECT; | 484 | if (radix_tree_exceptional_entry(page)) { |
636 | limit -= SHMEM_NR_DIRECT; | 485 | nr_swaps_freed += !shmem_free_swap(mapping, |
637 | idx = (idx > SHMEM_NR_DIRECT)? (idx - SHMEM_NR_DIRECT): 0; | 486 | index, page); |
638 | offset = idx % ENTRIES_PER_PAGE; | 487 | continue; |
639 | idx -= offset; | ||
640 | |||
641 | dir = shmem_dir_map(topdir); | ||
642 | stage = ENTRIES_PER_PAGEPAGE/2; | ||
643 | if (idx < ENTRIES_PER_PAGEPAGE/2) { | ||
644 | middir = topdir; | ||
645 | diroff = idx/ENTRIES_PER_PAGE; | ||
646 | } else { | ||
647 | dir += ENTRIES_PER_PAGE/2; | ||
648 | dir += (idx - ENTRIES_PER_PAGEPAGE/2)/ENTRIES_PER_PAGEPAGE; | ||
649 | while (stage <= idx) | ||
650 | stage += ENTRIES_PER_PAGEPAGE; | ||
651 | middir = *dir; | ||
652 | if (*dir) { | ||
653 | diroff = ((idx - ENTRIES_PER_PAGEPAGE/2) % | ||
654 | ENTRIES_PER_PAGEPAGE) / ENTRIES_PER_PAGE; | ||
655 | if (!diroff && !offset && upper_limit >= stage) { | ||
656 | if (needs_lock) { | ||
657 | spin_lock(needs_lock); | ||
658 | *dir = NULL; | ||
659 | spin_unlock(needs_lock); | ||
660 | needs_lock = NULL; | ||
661 | } else | ||
662 | *dir = NULL; | ||
663 | nr_pages_to_free++; | ||
664 | list_add(&middir->lru, &pages_to_free); | ||
665 | } | 488 | } |
666 | shmem_dir_unmap(dir); | ||
667 | dir = shmem_dir_map(middir); | ||
668 | } else { | ||
669 | diroff = 0; | ||
670 | offset = 0; | ||
671 | idx = stage; | ||
672 | } | ||
673 | } | ||
674 | 489 | ||
675 | for (; idx < limit; idx += ENTRIES_PER_PAGE, diroff++) { | 490 | lock_page(page); |
676 | if (unlikely(idx == stage)) { | 491 | if (page->mapping == mapping) { |
677 | shmem_dir_unmap(dir); | 492 | VM_BUG_ON(PageWriteback(page)); |
678 | dir = shmem_dir_map(topdir) + | 493 | truncate_inode_page(mapping, page); |
679 | ENTRIES_PER_PAGE/2 + idx/ENTRIES_PER_PAGEPAGE; | ||
680 | while (!*dir) { | ||
681 | dir++; | ||
682 | idx += ENTRIES_PER_PAGEPAGE; | ||
683 | if (idx >= limit) | ||
684 | goto done1; | ||
685 | } | ||
686 | stage = idx + ENTRIES_PER_PAGEPAGE; | ||
687 | middir = *dir; | ||
688 | if (punch_hole) | ||
689 | needs_lock = &info->lock; | ||
690 | if (upper_limit >= stage) { | ||
691 | if (needs_lock) { | ||
692 | spin_lock(needs_lock); | ||
693 | *dir = NULL; | ||
694 | spin_unlock(needs_lock); | ||
695 | needs_lock = NULL; | ||
696 | } else | ||
697 | *dir = NULL; | ||
698 | nr_pages_to_free++; | ||
699 | list_add(&middir->lru, &pages_to_free); | ||
700 | } | 494 | } |
701 | shmem_dir_unmap(dir); | 495 | unlock_page(page); |
702 | cond_resched(); | ||
703 | dir = shmem_dir_map(middir); | ||
704 | diroff = 0; | ||
705 | } | ||
706 | punch_lock = needs_lock; | ||
707 | subdir = dir[diroff]; | ||
708 | if (subdir && !offset && upper_limit-idx >= ENTRIES_PER_PAGE) { | ||
709 | if (needs_lock) { | ||
710 | spin_lock(needs_lock); | ||
711 | dir[diroff] = NULL; | ||
712 | spin_unlock(needs_lock); | ||
713 | punch_lock = NULL; | ||
714 | } else | ||
715 | dir[diroff] = NULL; | ||
716 | nr_pages_to_free++; | ||
717 | list_add(&subdir->lru, &pages_to_free); | ||
718 | } | ||
719 | if (subdir && page_private(subdir) /* has swap entries */) { | ||
720 | size = limit - idx; | ||
721 | if (size > ENTRIES_PER_PAGE) | ||
722 | size = ENTRIES_PER_PAGE; | ||
723 | freed = shmem_map_and_free_swp(subdir, | ||
724 | offset, size, &dir, punch_lock); | ||
725 | if (!dir) | ||
726 | dir = shmem_dir_map(middir); | ||
727 | nr_swaps_freed += freed; | ||
728 | if (offset || punch_lock) { | ||
729 | spin_lock(&info->lock); | ||
730 | set_page_private(subdir, | ||
731 | page_private(subdir) - freed); | ||
732 | spin_unlock(&info->lock); | ||
733 | } else | ||
734 | BUG_ON(page_private(subdir) != freed); | ||
735 | } | 496 | } |
736 | offset = 0; | 497 | shmem_pagevec_release(&pvec); |
737 | } | 498 | mem_cgroup_uncharge_end(); |
738 | done1: | 499 | index++; |
739 | shmem_dir_unmap(dir); | ||
740 | done2: | ||
741 | if (inode->i_mapping->nrpages && (info->flags & SHMEM_PAGEIN)) { | ||
742 | /* | ||
743 | * Call truncate_inode_pages again: racing shmem_unuse_inode | ||
744 | * may have swizzled a page in from swap since | ||
745 | * truncate_pagecache or generic_delete_inode did it, before we | ||
746 | * lowered next_index. Also, though shmem_getpage checks | ||
747 | * i_size before adding to cache, no recheck after: so fix the | ||
748 | * narrow window there too. | ||
749 | */ | ||
750 | truncate_inode_pages_range(inode->i_mapping, start, end); | ||
751 | } | 500 | } |
752 | 501 | ||
753 | spin_lock(&info->lock); | 502 | spin_lock(&info->lock); |
754 | info->flags &= ~SHMEM_TRUNCATE; | ||
755 | info->swapped -= nr_swaps_freed; | 503 | info->swapped -= nr_swaps_freed; |
756 | if (nr_pages_to_free) | ||
757 | shmem_free_blocks(inode, nr_pages_to_free); | ||
758 | shmem_recalc_inode(inode); | 504 | shmem_recalc_inode(inode); |
759 | spin_unlock(&info->lock); | 505 | spin_unlock(&info->lock); |
760 | 506 | ||
761 | /* | 507 | inode->i_ctime = inode->i_mtime = CURRENT_TIME; |
762 | * Empty swap vector directory pages to be freed? | ||
763 | */ | ||
764 | if (!list_empty(&pages_to_free)) { | ||
765 | pages_to_free.prev->next = NULL; | ||
766 | shmem_free_pages(pages_to_free.next); | ||
767 | } | ||
768 | } | 508 | } |
769 | EXPORT_SYMBOL_GPL(shmem_truncate_range); | 509 | EXPORT_SYMBOL_GPL(shmem_truncate_range); |
770 | 510 | ||
@@ -780,37 +520,7 @@ static int shmem_setattr(struct dentry *dentry, struct iattr *attr) | |||
780 | if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) { | 520 | if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) { |
781 | loff_t oldsize = inode->i_size; | 521 | loff_t oldsize = inode->i_size; |
782 | loff_t newsize = attr->ia_size; | 522 | loff_t newsize = attr->ia_size; |
783 | struct page *page = NULL; | ||
784 | 523 | ||
785 | if (newsize < oldsize) { | ||
786 | /* | ||
787 | * If truncating down to a partial page, then | ||
788 | * if that page is already allocated, hold it | ||
789 | * in memory until the truncation is over, so | ||
790 | * truncate_partial_page cannot miss it were | ||
791 | * it assigned to swap. | ||
792 | */ | ||
793 | if (newsize & (PAGE_CACHE_SIZE-1)) { | ||
794 | (void) shmem_getpage(inode, | ||
795 | newsize >> PAGE_CACHE_SHIFT, | ||
796 | &page, SGP_READ, NULL); | ||
797 | if (page) | ||
798 | unlock_page(page); | ||
799 | } | ||
800 | /* | ||
801 | * Reset SHMEM_PAGEIN flag so that shmem_truncate can | ||
802 | * detect if any pages might have been added to cache | ||
803 | * after truncate_inode_pages. But we needn't bother | ||
804 | * if it's being fully truncated to zero-length: the | ||
805 | * nrpages check is efficient enough in that case. | ||
806 | */ | ||
807 | if (newsize) { | ||
808 | struct shmem_inode_info *info = SHMEM_I(inode); | ||
809 | spin_lock(&info->lock); | ||
810 | info->flags &= ~SHMEM_PAGEIN; | ||
811 | spin_unlock(&info->lock); | ||
812 | } | ||
813 | } | ||
814 | if (newsize != oldsize) { | 524 | if (newsize != oldsize) { |
815 | i_size_write(inode, newsize); | 525 | i_size_write(inode, newsize); |
816 | inode->i_ctime = inode->i_mtime = CURRENT_TIME; | 526 | inode->i_ctime = inode->i_mtime = CURRENT_TIME; |
@@ -822,8 +532,6 @@ static int shmem_setattr(struct dentry *dentry, struct iattr *attr) | |||
822 | /* unmap again to remove racily COWed private pages */ | 532 | /* unmap again to remove racily COWed private pages */ |
823 | unmap_mapping_range(inode->i_mapping, holebegin, 0, 1); | 533 | unmap_mapping_range(inode->i_mapping, holebegin, 0, 1); |
824 | } | 534 | } |
825 | if (page) | ||
826 | page_cache_release(page); | ||
827 | } | 535 | } |
828 | 536 | ||
829 | setattr_copy(inode, attr); | 537 | setattr_copy(inode, attr); |
@@ -848,7 +556,8 @@ static void shmem_evict_inode(struct inode *inode) | |||
848 | list_del_init(&info->swaplist); | 556 | list_del_init(&info->swaplist); |
849 | mutex_unlock(&shmem_swaplist_mutex); | 557 | mutex_unlock(&shmem_swaplist_mutex); |
850 | } | 558 | } |
851 | } | 559 | } else |
560 | kfree(info->symlink); | ||
852 | 561 | ||
853 | list_for_each_entry_safe(xattr, nxattr, &info->xattr_list, list) { | 562 | list_for_each_entry_safe(xattr, nxattr, &info->xattr_list, list) { |
854 | kfree(xattr->name); | 563 | kfree(xattr->name); |
@@ -859,106 +568,27 @@ static void shmem_evict_inode(struct inode *inode) | |||
859 | end_writeback(inode); | 568 | end_writeback(inode); |
860 | } | 569 | } |
861 | 570 | ||
862 | static inline int shmem_find_swp(swp_entry_t entry, swp_entry_t *dir, swp_entry_t *edir) | 571 | /* |
863 | { | 572 | * If swap found in inode, free it and move page from swapcache to filecache. |
864 | swp_entry_t *ptr; | 573 | */ |
865 | 574 | static int shmem_unuse_inode(struct shmem_inode_info *info, | |
866 | for (ptr = dir; ptr < edir; ptr++) { | 575 | swp_entry_t swap, struct page *page) |
867 | if (ptr->val == entry.val) | ||
868 | return ptr - dir; | ||
869 | } | ||
870 | return -1; | ||
871 | } | ||
872 | |||
873 | static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, struct page *page) | ||
874 | { | 576 | { |
875 | struct address_space *mapping; | 577 | struct address_space *mapping = info->vfs_inode.i_mapping; |
876 | unsigned long idx; | 578 | void *radswap; |
877 | unsigned long size; | 579 | pgoff_t index; |
878 | unsigned long limit; | ||
879 | unsigned long stage; | ||
880 | struct page **dir; | ||
881 | struct page *subdir; | ||
882 | swp_entry_t *ptr; | ||
883 | int offset; | ||
884 | int error; | 580 | int error; |
885 | 581 | ||
886 | idx = 0; | 582 | radswap = swp_to_radix_entry(swap); |
887 | ptr = info->i_direct; | 583 | index = radix_tree_locate_item(&mapping->page_tree, radswap); |
888 | spin_lock(&info->lock); | 584 | if (index == -1) |
889 | if (!info->swapped) { | 585 | return 0; |
890 | list_del_init(&info->swaplist); | ||
891 | goto lost2; | ||
892 | } | ||
893 | limit = info->next_index; | ||
894 | size = limit; | ||
895 | if (size > SHMEM_NR_DIRECT) | ||
896 | size = SHMEM_NR_DIRECT; | ||
897 | offset = shmem_find_swp(entry, ptr, ptr+size); | ||
898 | if (offset >= 0) { | ||
899 | shmem_swp_balance_unmap(); | ||
900 | goto found; | ||
901 | } | ||
902 | if (!info->i_indirect) | ||
903 | goto lost2; | ||
904 | |||
905 | dir = shmem_dir_map(info->i_indirect); | ||
906 | stage = SHMEM_NR_DIRECT + ENTRIES_PER_PAGEPAGE/2; | ||
907 | |||
908 | for (idx = SHMEM_NR_DIRECT; idx < limit; idx += ENTRIES_PER_PAGE, dir++) { | ||
909 | if (unlikely(idx == stage)) { | ||
910 | shmem_dir_unmap(dir-1); | ||
911 | if (cond_resched_lock(&info->lock)) { | ||
912 | /* check it has not been truncated */ | ||
913 | if (limit > info->next_index) { | ||
914 | limit = info->next_index; | ||
915 | if (idx >= limit) | ||
916 | goto lost2; | ||
917 | } | ||
918 | } | ||
919 | dir = shmem_dir_map(info->i_indirect) + | ||
920 | ENTRIES_PER_PAGE/2 + idx/ENTRIES_PER_PAGEPAGE; | ||
921 | while (!*dir) { | ||
922 | dir++; | ||
923 | idx += ENTRIES_PER_PAGEPAGE; | ||
924 | if (idx >= limit) | ||
925 | goto lost1; | ||
926 | } | ||
927 | stage = idx + ENTRIES_PER_PAGEPAGE; | ||
928 | subdir = *dir; | ||
929 | shmem_dir_unmap(dir); | ||
930 | dir = shmem_dir_map(subdir); | ||
931 | } | ||
932 | subdir = *dir; | ||
933 | if (subdir && page_private(subdir)) { | ||
934 | ptr = shmem_swp_map(subdir); | ||
935 | size = limit - idx; | ||
936 | if (size > ENTRIES_PER_PAGE) | ||
937 | size = ENTRIES_PER_PAGE; | ||
938 | offset = shmem_find_swp(entry, ptr, ptr+size); | ||
939 | shmem_swp_unmap(ptr); | ||
940 | if (offset >= 0) { | ||
941 | shmem_dir_unmap(dir); | ||
942 | ptr = shmem_swp_map(subdir); | ||
943 | goto found; | ||
944 | } | ||
945 | } | ||
946 | } | ||
947 | lost1: | ||
948 | shmem_dir_unmap(dir-1); | ||
949 | lost2: | ||
950 | spin_unlock(&info->lock); | ||
951 | return 0; | ||
952 | found: | ||
953 | idx += offset; | ||
954 | ptr += offset; | ||
955 | 586 | ||
956 | /* | 587 | /* |
957 | * Move _head_ to start search for next from here. | 588 | * Move _head_ to start search for next from here. |
958 | * But be careful: shmem_evict_inode checks list_empty without taking | 589 | * But be careful: shmem_evict_inode checks list_empty without taking |
959 | * mutex, and there's an instant in list_move_tail when info->swaplist | 590 | * mutex, and there's an instant in list_move_tail when info->swaplist |
960 | * would appear empty, if it were the only one on shmem_swaplist. We | 591 | * would appear empty, if it were the only one on shmem_swaplist. |
961 | * could avoid doing it if inode NULL; or use this minor optimization. | ||
962 | */ | 592 | */ |
963 | if (shmem_swaplist.next != &info->swaplist) | 593 | if (shmem_swaplist.next != &info->swaplist) |
964 | list_move_tail(&shmem_swaplist, &info->swaplist); | 594 | list_move_tail(&shmem_swaplist, &info->swaplist); |
@@ -968,29 +598,34 @@ found: | |||
968 | * but also to hold up shmem_evict_inode(): so inode cannot be freed | 598 | * but also to hold up shmem_evict_inode(): so inode cannot be freed |
969 | * beneath us (pagelock doesn't help until the page is in pagecache). | 599 | * beneath us (pagelock doesn't help until the page is in pagecache). |
970 | */ | 600 | */ |
971 | mapping = info->vfs_inode.i_mapping; | 601 | error = shmem_add_to_page_cache(page, mapping, index, |
972 | error = add_to_page_cache_locked(page, mapping, idx, GFP_NOWAIT); | 602 | GFP_NOWAIT, radswap); |
973 | /* which does mem_cgroup_uncharge_cache_page on error */ | 603 | /* which does mem_cgroup_uncharge_cache_page on error */ |
974 | 604 | ||
975 | if (error != -ENOMEM) { | 605 | if (error != -ENOMEM) { |
606 | /* | ||
607 | * Truncation and eviction use free_swap_and_cache(), which | ||
608 | * only does trylock page: if we raced, best clean up here. | ||
609 | */ | ||
976 | delete_from_swap_cache(page); | 610 | delete_from_swap_cache(page); |
977 | set_page_dirty(page); | 611 | set_page_dirty(page); |
978 | info->flags |= SHMEM_PAGEIN; | 612 | if (!error) { |
979 | shmem_swp_set(info, ptr, 0); | 613 | spin_lock(&info->lock); |
980 | swap_free(entry); | 614 | info->swapped--; |
615 | spin_unlock(&info->lock); | ||
616 | swap_free(swap); | ||
617 | } | ||
981 | error = 1; /* not an error, but entry was found */ | 618 | error = 1; /* not an error, but entry was found */ |
982 | } | 619 | } |
983 | shmem_swp_unmap(ptr); | ||
984 | spin_unlock(&info->lock); | ||
985 | return error; | 620 | return error; |
986 | } | 621 | } |
987 | 622 | ||
988 | /* | 623 | /* |
989 | * shmem_unuse() search for an eventually swapped out shmem page. | 624 | * Search through swapped inodes to find and replace swap by page. |
990 | */ | 625 | */ |
991 | int shmem_unuse(swp_entry_t entry, struct page *page) | 626 | int shmem_unuse(swp_entry_t swap, struct page *page) |
992 | { | 627 | { |
993 | struct list_head *p, *next; | 628 | struct list_head *this, *next; |
994 | struct shmem_inode_info *info; | 629 | struct shmem_inode_info *info; |
995 | int found = 0; | 630 | int found = 0; |
996 | int error; | 631 | int error; |
@@ -999,32 +634,25 @@ int shmem_unuse(swp_entry_t entry, struct page *page) | |||
999 | * Charge page using GFP_KERNEL while we can wait, before taking | 634 | * Charge page using GFP_KERNEL while we can wait, before taking |
1000 | * the shmem_swaplist_mutex which might hold up shmem_writepage(). | 635 | * the shmem_swaplist_mutex which might hold up shmem_writepage(). |
1001 | * Charged back to the user (not to caller) when swap account is used. | 636 | * Charged back to the user (not to caller) when swap account is used. |
1002 | * add_to_page_cache() will be called with GFP_NOWAIT. | ||
1003 | */ | 637 | */ |
1004 | error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL); | 638 | error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL); |
1005 | if (error) | 639 | if (error) |
1006 | goto out; | 640 | goto out; |
1007 | /* | 641 | /* No radix_tree_preload: swap entry keeps a place for page in tree */ |
1008 | * Try to preload while we can wait, to not make a habit of | ||
1009 | * draining atomic reserves; but don't latch on to this cpu, | ||
1010 | * it's okay if sometimes we get rescheduled after this. | ||
1011 | */ | ||
1012 | error = radix_tree_preload(GFP_KERNEL); | ||
1013 | if (error) | ||
1014 | goto uncharge; | ||
1015 | radix_tree_preload_end(); | ||
1016 | 642 | ||
1017 | mutex_lock(&shmem_swaplist_mutex); | 643 | mutex_lock(&shmem_swaplist_mutex); |
1018 | list_for_each_safe(p, next, &shmem_swaplist) { | 644 | list_for_each_safe(this, next, &shmem_swaplist) { |
1019 | info = list_entry(p, struct shmem_inode_info, swaplist); | 645 | info = list_entry(this, struct shmem_inode_info, swaplist); |
1020 | found = shmem_unuse_inode(info, entry, page); | 646 | if (info->swapped) |
647 | found = shmem_unuse_inode(info, swap, page); | ||
648 | else | ||
649 | list_del_init(&info->swaplist); | ||
1021 | cond_resched(); | 650 | cond_resched(); |
1022 | if (found) | 651 | if (found) |
1023 | break; | 652 | break; |
1024 | } | 653 | } |
1025 | mutex_unlock(&shmem_swaplist_mutex); | 654 | mutex_unlock(&shmem_swaplist_mutex); |
1026 | 655 | ||
1027 | uncharge: | ||
1028 | if (!found) | 656 | if (!found) |
1029 | mem_cgroup_uncharge_cache_page(page); | 657 | mem_cgroup_uncharge_cache_page(page); |
1030 | if (found < 0) | 658 | if (found < 0) |
@@ -1041,10 +669,10 @@ out: | |||
1041 | static int shmem_writepage(struct page *page, struct writeback_control *wbc) | 669 | static int shmem_writepage(struct page *page, struct writeback_control *wbc) |
1042 | { | 670 | { |
1043 | struct shmem_inode_info *info; | 671 | struct shmem_inode_info *info; |
1044 | swp_entry_t *entry, swap; | ||
1045 | struct address_space *mapping; | 672 | struct address_space *mapping; |
1046 | unsigned long index; | ||
1047 | struct inode *inode; | 673 | struct inode *inode; |
674 | swp_entry_t swap; | ||
675 | pgoff_t index; | ||
1048 | 676 | ||
1049 | BUG_ON(!PageLocked(page)); | 677 | BUG_ON(!PageLocked(page)); |
1050 | mapping = page->mapping; | 678 | mapping = page->mapping; |
@@ -1073,50 +701,32 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc) | |||
1073 | 701 | ||
1074 | /* | 702 | /* |
1075 | * Add inode to shmem_unuse()'s list of swapped-out inodes, | 703 | * Add inode to shmem_unuse()'s list of swapped-out inodes, |
1076 | * if it's not already there. Do it now because we cannot take | 704 | * if it's not already there. Do it now before the page is |
1077 | * mutex while holding spinlock, and must do so before the page | 705 | * moved to swap cache, when its pagelock no longer protects |
1078 | * is moved to swap cache, when its pagelock no longer protects | ||
1079 | * the inode from eviction. But don't unlock the mutex until | 706 | * the inode from eviction. But don't unlock the mutex until |
1080 | * we've taken the spinlock, because shmem_unuse_inode() will | 707 | * we've incremented swapped, because shmem_unuse_inode() will |
1081 | * prune a !swapped inode from the swaplist under both locks. | 708 | * prune a !swapped inode from the swaplist under this mutex. |
1082 | */ | 709 | */ |
1083 | mutex_lock(&shmem_swaplist_mutex); | 710 | mutex_lock(&shmem_swaplist_mutex); |
1084 | if (list_empty(&info->swaplist)) | 711 | if (list_empty(&info->swaplist)) |
1085 | list_add_tail(&info->swaplist, &shmem_swaplist); | 712 | list_add_tail(&info->swaplist, &shmem_swaplist); |
1086 | 713 | ||
1087 | spin_lock(&info->lock); | ||
1088 | mutex_unlock(&shmem_swaplist_mutex); | ||
1089 | |||
1090 | if (index >= info->next_index) { | ||
1091 | BUG_ON(!(info->flags & SHMEM_TRUNCATE)); | ||
1092 | goto unlock; | ||
1093 | } | ||
1094 | entry = shmem_swp_entry(info, index, NULL); | ||
1095 | if (entry->val) { | ||
1096 | WARN_ON_ONCE(1); /* Still happens? Tell us about it! */ | ||
1097 | free_swap_and_cache(*entry); | ||
1098 | shmem_swp_set(info, entry, 0); | ||
1099 | } | ||
1100 | shmem_recalc_inode(inode); | ||
1101 | |||
1102 | if (add_to_swap_cache(page, swap, GFP_ATOMIC) == 0) { | 714 | if (add_to_swap_cache(page, swap, GFP_ATOMIC) == 0) { |
1103 | delete_from_page_cache(page); | ||
1104 | shmem_swp_set(info, entry, swap.val); | ||
1105 | shmem_swp_unmap(entry); | ||
1106 | swap_shmem_alloc(swap); | 715 | swap_shmem_alloc(swap); |
716 | shmem_delete_from_page_cache(page, swp_to_radix_entry(swap)); | ||
717 | |||
718 | spin_lock(&info->lock); | ||
719 | info->swapped++; | ||
720 | shmem_recalc_inode(inode); | ||
1107 | spin_unlock(&info->lock); | 721 | spin_unlock(&info->lock); |
722 | |||
723 | mutex_unlock(&shmem_swaplist_mutex); | ||
1108 | BUG_ON(page_mapped(page)); | 724 | BUG_ON(page_mapped(page)); |
1109 | swap_writepage(page, wbc); | 725 | swap_writepage(page, wbc); |
1110 | return 0; | 726 | return 0; |
1111 | } | 727 | } |
1112 | 728 | ||
1113 | shmem_swp_unmap(entry); | 729 | mutex_unlock(&shmem_swaplist_mutex); |
1114 | unlock: | ||
1115 | spin_unlock(&info->lock); | ||
1116 | /* | ||
1117 | * add_to_swap_cache() doesn't return -EEXIST, so we can safely | ||
1118 | * clear SWAP_HAS_CACHE flag. | ||
1119 | */ | ||
1120 | swapcache_free(swap, NULL); | 730 | swapcache_free(swap, NULL); |
1121 | redirty: | 731 | redirty: |
1122 | set_page_dirty(page); | 732 | set_page_dirty(page); |
@@ -1153,35 +763,33 @@ static struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo) | |||
1153 | } | 763 | } |
1154 | #endif /* CONFIG_TMPFS */ | 764 | #endif /* CONFIG_TMPFS */ |
1155 | 765 | ||
1156 | static struct page *shmem_swapin(swp_entry_t entry, gfp_t gfp, | 766 | static struct page *shmem_swapin(swp_entry_t swap, gfp_t gfp, |
1157 | struct shmem_inode_info *info, unsigned long idx) | 767 | struct shmem_inode_info *info, pgoff_t index) |
1158 | { | 768 | { |
1159 | struct mempolicy mpol, *spol; | 769 | struct mempolicy mpol, *spol; |
1160 | struct vm_area_struct pvma; | 770 | struct vm_area_struct pvma; |
1161 | struct page *page; | ||
1162 | 771 | ||
1163 | spol = mpol_cond_copy(&mpol, | 772 | spol = mpol_cond_copy(&mpol, |
1164 | mpol_shared_policy_lookup(&info->policy, idx)); | 773 | mpol_shared_policy_lookup(&info->policy, index)); |
1165 | 774 | ||
1166 | /* Create a pseudo vma that just contains the policy */ | 775 | /* Create a pseudo vma that just contains the policy */ |
1167 | pvma.vm_start = 0; | 776 | pvma.vm_start = 0; |
1168 | pvma.vm_pgoff = idx; | 777 | pvma.vm_pgoff = index; |
1169 | pvma.vm_ops = NULL; | 778 | pvma.vm_ops = NULL; |
1170 | pvma.vm_policy = spol; | 779 | pvma.vm_policy = spol; |
1171 | page = swapin_readahead(entry, gfp, &pvma, 0); | 780 | return swapin_readahead(swap, gfp, &pvma, 0); |
1172 | return page; | ||
1173 | } | 781 | } |
1174 | 782 | ||
1175 | static struct page *shmem_alloc_page(gfp_t gfp, | 783 | static struct page *shmem_alloc_page(gfp_t gfp, |
1176 | struct shmem_inode_info *info, unsigned long idx) | 784 | struct shmem_inode_info *info, pgoff_t index) |
1177 | { | 785 | { |
1178 | struct vm_area_struct pvma; | 786 | struct vm_area_struct pvma; |
1179 | 787 | ||
1180 | /* Create a pseudo vma that just contains the policy */ | 788 | /* Create a pseudo vma that just contains the policy */ |
1181 | pvma.vm_start = 0; | 789 | pvma.vm_start = 0; |
1182 | pvma.vm_pgoff = idx; | 790 | pvma.vm_pgoff = index; |
1183 | pvma.vm_ops = NULL; | 791 | pvma.vm_ops = NULL; |
1184 | pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, idx); | 792 | pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, index); |
1185 | 793 | ||
1186 | /* | 794 | /* |
1187 | * alloc_page_vma() will drop the shared policy reference | 795 | * alloc_page_vma() will drop the shared policy reference |
@@ -1190,19 +798,19 @@ static struct page *shmem_alloc_page(gfp_t gfp, | |||
1190 | } | 798 | } |
1191 | #else /* !CONFIG_NUMA */ | 799 | #else /* !CONFIG_NUMA */ |
1192 | #ifdef CONFIG_TMPFS | 800 | #ifdef CONFIG_TMPFS |
1193 | static inline void shmem_show_mpol(struct seq_file *seq, struct mempolicy *p) | 801 | static inline void shmem_show_mpol(struct seq_file *seq, struct mempolicy *mpol) |
1194 | { | 802 | { |
1195 | } | 803 | } |
1196 | #endif /* CONFIG_TMPFS */ | 804 | #endif /* CONFIG_TMPFS */ |
1197 | 805 | ||
1198 | static inline struct page *shmem_swapin(swp_entry_t entry, gfp_t gfp, | 806 | static inline struct page *shmem_swapin(swp_entry_t swap, gfp_t gfp, |
1199 | struct shmem_inode_info *info, unsigned long idx) | 807 | struct shmem_inode_info *info, pgoff_t index) |
1200 | { | 808 | { |
1201 | return swapin_readahead(entry, gfp, NULL, 0); | 809 | return swapin_readahead(swap, gfp, NULL, 0); |
1202 | } | 810 | } |
1203 | 811 | ||
1204 | static inline struct page *shmem_alloc_page(gfp_t gfp, | 812 | static inline struct page *shmem_alloc_page(gfp_t gfp, |
1205 | struct shmem_inode_info *info, unsigned long idx) | 813 | struct shmem_inode_info *info, pgoff_t index) |
1206 | { | 814 | { |
1207 | return alloc_page(gfp); | 815 | return alloc_page(gfp); |
1208 | } | 816 | } |
@@ -1222,243 +830,190 @@ static inline struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo) | |||
1222 | * vm. If we swap it in we mark it dirty since we also free the swap | 830 | * vm. If we swap it in we mark it dirty since we also free the swap |
1223 | * entry since a page cannot live in both the swap and page cache | 831 | * entry since a page cannot live in both the swap and page cache |
1224 | */ | 832 | */ |
1225 | static int shmem_getpage_gfp(struct inode *inode, pgoff_t idx, | 833 | static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, |
1226 | struct page **pagep, enum sgp_type sgp, gfp_t gfp, int *fault_type) | 834 | struct page **pagep, enum sgp_type sgp, gfp_t gfp, int *fault_type) |
1227 | { | 835 | { |
1228 | struct address_space *mapping = inode->i_mapping; | 836 | struct address_space *mapping = inode->i_mapping; |
1229 | struct shmem_inode_info *info = SHMEM_I(inode); | 837 | struct shmem_inode_info *info; |
1230 | struct shmem_sb_info *sbinfo; | 838 | struct shmem_sb_info *sbinfo; |
1231 | struct page *page; | 839 | struct page *page; |
1232 | struct page *prealloc_page = NULL; | ||
1233 | swp_entry_t *entry; | ||
1234 | swp_entry_t swap; | 840 | swp_entry_t swap; |
1235 | int error; | 841 | int error; |
1236 | int ret; | 842 | int once = 0; |
1237 | 843 | ||
1238 | if (idx >= SHMEM_MAX_INDEX) | 844 | if (index > (MAX_LFS_FILESIZE >> PAGE_CACHE_SHIFT)) |
1239 | return -EFBIG; | 845 | return -EFBIG; |
1240 | repeat: | 846 | repeat: |
1241 | page = find_lock_page(mapping, idx); | 847 | swap.val = 0; |
1242 | if (page) { | 848 | page = find_lock_page(mapping, index); |
849 | if (radix_tree_exceptional_entry(page)) { | ||
850 | swap = radix_to_swp_entry(page); | ||
851 | page = NULL; | ||
852 | } | ||
853 | |||
854 | if (sgp != SGP_WRITE && | ||
855 | ((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) { | ||
856 | error = -EINVAL; | ||
857 | goto failed; | ||
858 | } | ||
859 | |||
860 | if (page || (sgp == SGP_READ && !swap.val)) { | ||
1243 | /* | 861 | /* |
1244 | * Once we can get the page lock, it must be uptodate: | 862 | * Once we can get the page lock, it must be uptodate: |
1245 | * if there were an error in reading back from swap, | 863 | * if there were an error in reading back from swap, |
1246 | * the page would not be inserted into the filecache. | 864 | * the page would not be inserted into the filecache. |
1247 | */ | 865 | */ |
1248 | BUG_ON(!PageUptodate(page)); | 866 | BUG_ON(page && !PageUptodate(page)); |
1249 | goto done; | 867 | *pagep = page; |
868 | return 0; | ||
1250 | } | 869 | } |
1251 | 870 | ||
1252 | /* | 871 | /* |
1253 | * Try to preload while we can wait, to not make a habit of | 872 | * Fast cache lookup did not find it: |
1254 | * draining atomic reserves; but don't latch on to this cpu. | 873 | * bring it back from swap or allocate. |
1255 | */ | 874 | */ |
1256 | error = radix_tree_preload(gfp & GFP_RECLAIM_MASK); | 875 | info = SHMEM_I(inode); |
1257 | if (error) | 876 | sbinfo = SHMEM_SB(inode->i_sb); |
1258 | goto out; | ||
1259 | radix_tree_preload_end(); | ||
1260 | |||
1261 | if (sgp != SGP_READ && !prealloc_page) { | ||
1262 | prealloc_page = shmem_alloc_page(gfp, info, idx); | ||
1263 | if (prealloc_page) { | ||
1264 | SetPageSwapBacked(prealloc_page); | ||
1265 | if (mem_cgroup_cache_charge(prealloc_page, | ||
1266 | current->mm, GFP_KERNEL)) { | ||
1267 | page_cache_release(prealloc_page); | ||
1268 | prealloc_page = NULL; | ||
1269 | } | ||
1270 | } | ||
1271 | } | ||
1272 | |||
1273 | spin_lock(&info->lock); | ||
1274 | shmem_recalc_inode(inode); | ||
1275 | entry = shmem_swp_alloc(info, idx, sgp, gfp); | ||
1276 | if (IS_ERR(entry)) { | ||
1277 | spin_unlock(&info->lock); | ||
1278 | error = PTR_ERR(entry); | ||
1279 | goto out; | ||
1280 | } | ||
1281 | swap = *entry; | ||
1282 | 877 | ||
1283 | if (swap.val) { | 878 | if (swap.val) { |
1284 | /* Look it up and read it in.. */ | 879 | /* Look it up and read it in.. */ |
1285 | page = lookup_swap_cache(swap); | 880 | page = lookup_swap_cache(swap); |
1286 | if (!page) { | 881 | if (!page) { |
1287 | shmem_swp_unmap(entry); | ||
1288 | spin_unlock(&info->lock); | ||
1289 | /* here we actually do the io */ | 882 | /* here we actually do the io */ |
1290 | if (fault_type) | 883 | if (fault_type) |
1291 | *fault_type |= VM_FAULT_MAJOR; | 884 | *fault_type |= VM_FAULT_MAJOR; |
1292 | page = shmem_swapin(swap, gfp, info, idx); | 885 | page = shmem_swapin(swap, gfp, info, index); |
1293 | if (!page) { | 886 | if (!page) { |
1294 | spin_lock(&info->lock); | 887 | error = -ENOMEM; |
1295 | entry = shmem_swp_alloc(info, idx, sgp, gfp); | 888 | goto failed; |
1296 | if (IS_ERR(entry)) | ||
1297 | error = PTR_ERR(entry); | ||
1298 | else { | ||
1299 | if (entry->val == swap.val) | ||
1300 | error = -ENOMEM; | ||
1301 | shmem_swp_unmap(entry); | ||
1302 | } | ||
1303 | spin_unlock(&info->lock); | ||
1304 | if (error) | ||
1305 | goto out; | ||
1306 | goto repeat; | ||
1307 | } | 889 | } |
1308 | wait_on_page_locked(page); | ||
1309 | page_cache_release(page); | ||
1310 | goto repeat; | ||
1311 | } | 890 | } |
1312 | 891 | ||
1313 | /* We have to do this with page locked to prevent races */ | 892 | /* We have to do this with page locked to prevent races */ |
1314 | if (!trylock_page(page)) { | 893 | lock_page(page); |
1315 | shmem_swp_unmap(entry); | ||
1316 | spin_unlock(&info->lock); | ||
1317 | wait_on_page_locked(page); | ||
1318 | page_cache_release(page); | ||
1319 | goto repeat; | ||
1320 | } | ||
1321 | if (PageWriteback(page)) { | ||
1322 | shmem_swp_unmap(entry); | ||
1323 | spin_unlock(&info->lock); | ||
1324 | wait_on_page_writeback(page); | ||
1325 | unlock_page(page); | ||
1326 | page_cache_release(page); | ||
1327 | goto repeat; | ||
1328 | } | ||
1329 | if (!PageUptodate(page)) { | 894 | if (!PageUptodate(page)) { |
1330 | shmem_swp_unmap(entry); | ||
1331 | spin_unlock(&info->lock); | ||
1332 | unlock_page(page); | ||
1333 | page_cache_release(page); | ||
1334 | error = -EIO; | 895 | error = -EIO; |
1335 | goto out; | 896 | goto failed; |
1336 | } | 897 | } |
1337 | 898 | wait_on_page_writeback(page); | |
1338 | error = add_to_page_cache_locked(page, mapping, | 899 | |
1339 | idx, GFP_NOWAIT); | 900 | /* Someone may have already done it for us */ |
1340 | if (error) { | 901 | if (page->mapping) { |
1341 | shmem_swp_unmap(entry); | 902 | if (page->mapping == mapping && |
1342 | spin_unlock(&info->lock); | 903 | page->index == index) |
1343 | if (error == -ENOMEM) { | 904 | goto done; |
1344 | /* | 905 | error = -EEXIST; |
1345 | * reclaim from proper memory cgroup and | 906 | goto failed; |
1346 | * call memcg's OOM if needed. | ||
1347 | */ | ||
1348 | error = mem_cgroup_shmem_charge_fallback( | ||
1349 | page, current->mm, gfp); | ||
1350 | if (error) { | ||
1351 | unlock_page(page); | ||
1352 | page_cache_release(page); | ||
1353 | goto out; | ||
1354 | } | ||
1355 | } | ||
1356 | unlock_page(page); | ||
1357 | page_cache_release(page); | ||
1358 | goto repeat; | ||
1359 | } | 907 | } |
1360 | 908 | ||
1361 | info->flags |= SHMEM_PAGEIN; | 909 | error = mem_cgroup_cache_charge(page, current->mm, |
1362 | shmem_swp_set(info, entry, 0); | 910 | gfp & GFP_RECLAIM_MASK); |
1363 | shmem_swp_unmap(entry); | 911 | if (!error) |
1364 | delete_from_swap_cache(page); | 912 | error = shmem_add_to_page_cache(page, mapping, index, |
913 | gfp, swp_to_radix_entry(swap)); | ||
914 | if (error) | ||
915 | goto failed; | ||
916 | |||
917 | spin_lock(&info->lock); | ||
918 | info->swapped--; | ||
919 | shmem_recalc_inode(inode); | ||
1365 | spin_unlock(&info->lock); | 920 | spin_unlock(&info->lock); |
921 | |||
922 | delete_from_swap_cache(page); | ||
1366 | set_page_dirty(page); | 923 | set_page_dirty(page); |
1367 | swap_free(swap); | 924 | swap_free(swap); |
1368 | 925 | ||
1369 | } else if (sgp == SGP_READ) { | 926 | } else { |
1370 | shmem_swp_unmap(entry); | 927 | if (shmem_acct_block(info->flags)) { |
1371 | page = find_get_page(mapping, idx); | 928 | error = -ENOSPC; |
1372 | if (page && !trylock_page(page)) { | 929 | goto failed; |
1373 | spin_unlock(&info->lock); | ||
1374 | wait_on_page_locked(page); | ||
1375 | page_cache_release(page); | ||
1376 | goto repeat; | ||
1377 | } | 930 | } |
1378 | spin_unlock(&info->lock); | ||
1379 | |||
1380 | } else if (prealloc_page) { | ||
1381 | shmem_swp_unmap(entry); | ||
1382 | sbinfo = SHMEM_SB(inode->i_sb); | ||
1383 | if (sbinfo->max_blocks) { | 931 | if (sbinfo->max_blocks) { |
1384 | if (percpu_counter_compare(&sbinfo->used_blocks, | 932 | if (percpu_counter_compare(&sbinfo->used_blocks, |
1385 | sbinfo->max_blocks) >= 0 || | 933 | sbinfo->max_blocks) >= 0) { |
1386 | shmem_acct_block(info->flags)) | 934 | error = -ENOSPC; |
1387 | goto nospace; | 935 | goto unacct; |
936 | } | ||
1388 | percpu_counter_inc(&sbinfo->used_blocks); | 937 | percpu_counter_inc(&sbinfo->used_blocks); |
1389 | inode->i_blocks += BLOCKS_PER_PAGE; | ||
1390 | } else if (shmem_acct_block(info->flags)) | ||
1391 | goto nospace; | ||
1392 | |||
1393 | page = prealloc_page; | ||
1394 | prealloc_page = NULL; | ||
1395 | |||
1396 | entry = shmem_swp_alloc(info, idx, sgp, gfp); | ||
1397 | if (IS_ERR(entry)) | ||
1398 | error = PTR_ERR(entry); | ||
1399 | else { | ||
1400 | swap = *entry; | ||
1401 | shmem_swp_unmap(entry); | ||
1402 | } | 938 | } |
1403 | ret = error || swap.val; | 939 | |
1404 | if (ret) | 940 | page = shmem_alloc_page(gfp, info, index); |
1405 | mem_cgroup_uncharge_cache_page(page); | 941 | if (!page) { |
1406 | else | 942 | error = -ENOMEM; |
1407 | ret = add_to_page_cache_lru(page, mapping, | 943 | goto decused; |
1408 | idx, GFP_NOWAIT); | ||
1409 | /* | ||
1410 | * At add_to_page_cache_lru() failure, | ||
1411 | * uncharge will be done automatically. | ||
1412 | */ | ||
1413 | if (ret) { | ||
1414 | shmem_unacct_blocks(info->flags, 1); | ||
1415 | shmem_free_blocks(inode, 1); | ||
1416 | spin_unlock(&info->lock); | ||
1417 | page_cache_release(page); | ||
1418 | if (error) | ||
1419 | goto out; | ||
1420 | goto repeat; | ||
1421 | } | 944 | } |
1422 | 945 | ||
1423 | info->flags |= SHMEM_PAGEIN; | 946 | SetPageSwapBacked(page); |
947 | __set_page_locked(page); | ||
948 | error = mem_cgroup_cache_charge(page, current->mm, | ||
949 | gfp & GFP_RECLAIM_MASK); | ||
950 | if (!error) | ||
951 | error = shmem_add_to_page_cache(page, mapping, index, | ||
952 | gfp, NULL); | ||
953 | if (error) | ||
954 | goto decused; | ||
955 | lru_cache_add_anon(page); | ||
956 | |||
957 | spin_lock(&info->lock); | ||
1424 | info->alloced++; | 958 | info->alloced++; |
959 | inode->i_blocks += BLOCKS_PER_PAGE; | ||
960 | shmem_recalc_inode(inode); | ||
1425 | spin_unlock(&info->lock); | 961 | spin_unlock(&info->lock); |
962 | |||
1426 | clear_highpage(page); | 963 | clear_highpage(page); |
1427 | flush_dcache_page(page); | 964 | flush_dcache_page(page); |
1428 | SetPageUptodate(page); | 965 | SetPageUptodate(page); |
1429 | if (sgp == SGP_DIRTY) | 966 | if (sgp == SGP_DIRTY) |
1430 | set_page_dirty(page); | 967 | set_page_dirty(page); |
1431 | |||
1432 | } else { | ||
1433 | spin_unlock(&info->lock); | ||
1434 | error = -ENOMEM; | ||
1435 | goto out; | ||
1436 | } | 968 | } |
1437 | done: | 969 | done: |
1438 | *pagep = page; | 970 | /* Perhaps the file has been truncated since we checked */ |
1439 | error = 0; | 971 | if (sgp != SGP_WRITE && |
1440 | out: | 972 | ((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) { |
1441 | if (prealloc_page) { | 973 | error = -EINVAL; |
1442 | mem_cgroup_uncharge_cache_page(prealloc_page); | 974 | goto trunc; |
1443 | page_cache_release(prealloc_page); | ||
1444 | } | 975 | } |
1445 | return error; | 976 | *pagep = page; |
977 | return 0; | ||
1446 | 978 | ||
1447 | nospace: | ||
1448 | /* | 979 | /* |
1449 | * Perhaps the page was brought in from swap between find_lock_page | 980 | * Error recovery. |
1450 | * and taking info->lock? We allow for that at add_to_page_cache_lru, | ||
1451 | * but must also avoid reporting a spurious ENOSPC while working on a | ||
1452 | * full tmpfs. | ||
1453 | */ | 981 | */ |
1454 | page = find_get_page(mapping, idx); | 982 | trunc: |
983 | ClearPageDirty(page); | ||
984 | delete_from_page_cache(page); | ||
985 | spin_lock(&info->lock); | ||
986 | info->alloced--; | ||
987 | inode->i_blocks -= BLOCKS_PER_PAGE; | ||
1455 | spin_unlock(&info->lock); | 988 | spin_unlock(&info->lock); |
989 | decused: | ||
990 | if (sbinfo->max_blocks) | ||
991 | percpu_counter_add(&sbinfo->used_blocks, -1); | ||
992 | unacct: | ||
993 | shmem_unacct_blocks(info->flags, 1); | ||
994 | failed: | ||
995 | if (swap.val && error != -EINVAL) { | ||
996 | struct page *test = find_get_page(mapping, index); | ||
997 | if (test && !radix_tree_exceptional_entry(test)) | ||
998 | page_cache_release(test); | ||
999 | /* Have another try if the entry has changed */ | ||
1000 | if (test != swp_to_radix_entry(swap)) | ||
1001 | error = -EEXIST; | ||
1002 | } | ||
1456 | if (page) { | 1003 | if (page) { |
1004 | unlock_page(page); | ||
1457 | page_cache_release(page); | 1005 | page_cache_release(page); |
1006 | } | ||
1007 | if (error == -ENOSPC && !once++) { | ||
1008 | info = SHMEM_I(inode); | ||
1009 | spin_lock(&info->lock); | ||
1010 | shmem_recalc_inode(inode); | ||
1011 | spin_unlock(&info->lock); | ||
1458 | goto repeat; | 1012 | goto repeat; |
1459 | } | 1013 | } |
1460 | error = -ENOSPC; | 1014 | if (error == -EEXIST) |
1461 | goto out; | 1015 | goto repeat; |
1016 | return error; | ||
1462 | } | 1017 | } |
1463 | 1018 | ||
1464 | static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | 1019 | static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) |
@@ -1467,9 +1022,6 @@ static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
1467 | int error; | 1022 | int error; |
1468 | int ret = VM_FAULT_LOCKED; | 1023 | int ret = VM_FAULT_LOCKED; |
1469 | 1024 | ||
1470 | if (((loff_t)vmf->pgoff << PAGE_CACHE_SHIFT) >= i_size_read(inode)) | ||
1471 | return VM_FAULT_SIGBUS; | ||
1472 | |||
1473 | error = shmem_getpage(inode, vmf->pgoff, &vmf->page, SGP_CACHE, &ret); | 1025 | error = shmem_getpage(inode, vmf->pgoff, &vmf->page, SGP_CACHE, &ret); |
1474 | if (error) | 1026 | if (error) |
1475 | return ((error == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS); | 1027 | return ((error == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS); |
@@ -1482,20 +1034,20 @@ static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
1482 | } | 1034 | } |
1483 | 1035 | ||
1484 | #ifdef CONFIG_NUMA | 1036 | #ifdef CONFIG_NUMA |
1485 | static int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *new) | 1037 | static int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *mpol) |
1486 | { | 1038 | { |
1487 | struct inode *i = vma->vm_file->f_path.dentry->d_inode; | 1039 | struct inode *inode = vma->vm_file->f_path.dentry->d_inode; |
1488 | return mpol_set_shared_policy(&SHMEM_I(i)->policy, vma, new); | 1040 | return mpol_set_shared_policy(&SHMEM_I(inode)->policy, vma, mpol); |
1489 | } | 1041 | } |
1490 | 1042 | ||
1491 | static struct mempolicy *shmem_get_policy(struct vm_area_struct *vma, | 1043 | static struct mempolicy *shmem_get_policy(struct vm_area_struct *vma, |
1492 | unsigned long addr) | 1044 | unsigned long addr) |
1493 | { | 1045 | { |
1494 | struct inode *i = vma->vm_file->f_path.dentry->d_inode; | 1046 | struct inode *inode = vma->vm_file->f_path.dentry->d_inode; |
1495 | unsigned long idx; | 1047 | pgoff_t index; |
1496 | 1048 | ||
1497 | idx = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; | 1049 | index = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; |
1498 | return mpol_shared_policy_lookup(&SHMEM_I(i)->policy, idx); | 1050 | return mpol_shared_policy_lookup(&SHMEM_I(inode)->policy, index); |
1499 | } | 1051 | } |
1500 | #endif | 1052 | #endif |
1501 | 1053 | ||
@@ -1593,7 +1145,7 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode | |||
1593 | 1145 | ||
1594 | #ifdef CONFIG_TMPFS | 1146 | #ifdef CONFIG_TMPFS |
1595 | static const struct inode_operations shmem_symlink_inode_operations; | 1147 | static const struct inode_operations shmem_symlink_inode_operations; |
1596 | static const struct inode_operations shmem_symlink_inline_operations; | 1148 | static const struct inode_operations shmem_short_symlink_operations; |
1597 | 1149 | ||
1598 | static int | 1150 | static int |
1599 | shmem_write_begin(struct file *file, struct address_space *mapping, | 1151 | shmem_write_begin(struct file *file, struct address_space *mapping, |
@@ -1626,7 +1178,8 @@ static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_ | |||
1626 | { | 1178 | { |
1627 | struct inode *inode = filp->f_path.dentry->d_inode; | 1179 | struct inode *inode = filp->f_path.dentry->d_inode; |
1628 | struct address_space *mapping = inode->i_mapping; | 1180 | struct address_space *mapping = inode->i_mapping; |
1629 | unsigned long index, offset; | 1181 | pgoff_t index; |
1182 | unsigned long offset; | ||
1630 | enum sgp_type sgp = SGP_READ; | 1183 | enum sgp_type sgp = SGP_READ; |
1631 | 1184 | ||
1632 | /* | 1185 | /* |
@@ -1642,7 +1195,8 @@ static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_ | |||
1642 | 1195 | ||
1643 | for (;;) { | 1196 | for (;;) { |
1644 | struct page *page = NULL; | 1197 | struct page *page = NULL; |
1645 | unsigned long end_index, nr, ret; | 1198 | pgoff_t end_index; |
1199 | unsigned long nr, ret; | ||
1646 | loff_t i_size = i_size_read(inode); | 1200 | loff_t i_size = i_size_read(inode); |
1647 | 1201 | ||
1648 | end_index = i_size >> PAGE_CACHE_SHIFT; | 1202 | end_index = i_size >> PAGE_CACHE_SHIFT; |
@@ -1880,8 +1434,9 @@ static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
1880 | buf->f_namelen = NAME_MAX; | 1434 | buf->f_namelen = NAME_MAX; |
1881 | if (sbinfo->max_blocks) { | 1435 | if (sbinfo->max_blocks) { |
1882 | buf->f_blocks = sbinfo->max_blocks; | 1436 | buf->f_blocks = sbinfo->max_blocks; |
1883 | buf->f_bavail = buf->f_bfree = | 1437 | buf->f_bavail = |
1884 | sbinfo->max_blocks - percpu_counter_sum(&sbinfo->used_blocks); | 1438 | buf->f_bfree = sbinfo->max_blocks - |
1439 | percpu_counter_sum(&sbinfo->used_blocks); | ||
1885 | } | 1440 | } |
1886 | if (sbinfo->max_inodes) { | 1441 | if (sbinfo->max_inodes) { |
1887 | buf->f_files = sbinfo->max_inodes; | 1442 | buf->f_files = sbinfo->max_inodes; |
@@ -2055,10 +1610,13 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s | |||
2055 | 1610 | ||
2056 | info = SHMEM_I(inode); | 1611 | info = SHMEM_I(inode); |
2057 | inode->i_size = len-1; | 1612 | inode->i_size = len-1; |
2058 | if (len <= SHMEM_SYMLINK_INLINE_LEN) { | 1613 | if (len <= SHORT_SYMLINK_LEN) { |
2059 | /* do it inline */ | 1614 | info->symlink = kmemdup(symname, len, GFP_KERNEL); |
2060 | memcpy(info->inline_symlink, symname, len); | 1615 | if (!info->symlink) { |
2061 | inode->i_op = &shmem_symlink_inline_operations; | 1616 | iput(inode); |
1617 | return -ENOMEM; | ||
1618 | } | ||
1619 | inode->i_op = &shmem_short_symlink_operations; | ||
2062 | } else { | 1620 | } else { |
2063 | error = shmem_getpage(inode, 0, &page, SGP_WRITE, NULL); | 1621 | error = shmem_getpage(inode, 0, &page, SGP_WRITE, NULL); |
2064 | if (error) { | 1622 | if (error) { |
@@ -2081,17 +1639,17 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s | |||
2081 | return 0; | 1639 | return 0; |
2082 | } | 1640 | } |
2083 | 1641 | ||
2084 | static void *shmem_follow_link_inline(struct dentry *dentry, struct nameidata *nd) | 1642 | static void *shmem_follow_short_symlink(struct dentry *dentry, struct nameidata *nd) |
2085 | { | 1643 | { |
2086 | nd_set_link(nd, SHMEM_I(dentry->d_inode)->inline_symlink); | 1644 | nd_set_link(nd, SHMEM_I(dentry->d_inode)->symlink); |
2087 | return NULL; | 1645 | return NULL; |
2088 | } | 1646 | } |
2089 | 1647 | ||
2090 | static void *shmem_follow_link(struct dentry *dentry, struct nameidata *nd) | 1648 | static void *shmem_follow_link(struct dentry *dentry, struct nameidata *nd) |
2091 | { | 1649 | { |
2092 | struct page *page = NULL; | 1650 | struct page *page = NULL; |
2093 | int res = shmem_getpage(dentry->d_inode, 0, &page, SGP_READ, NULL); | 1651 | int error = shmem_getpage(dentry->d_inode, 0, &page, SGP_READ, NULL); |
2094 | nd_set_link(nd, res ? ERR_PTR(res) : kmap(page)); | 1652 | nd_set_link(nd, error ? ERR_PTR(error) : kmap(page)); |
2095 | if (page) | 1653 | if (page) |
2096 | unlock_page(page); | 1654 | unlock_page(page); |
2097 | return page; | 1655 | return page; |
@@ -2202,7 +1760,6 @@ out: | |||
2202 | return err; | 1760 | return err; |
2203 | } | 1761 | } |
2204 | 1762 | ||
2205 | |||
2206 | static const struct xattr_handler *shmem_xattr_handlers[] = { | 1763 | static const struct xattr_handler *shmem_xattr_handlers[] = { |
2207 | #ifdef CONFIG_TMPFS_POSIX_ACL | 1764 | #ifdef CONFIG_TMPFS_POSIX_ACL |
2208 | &generic_acl_access_handler, | 1765 | &generic_acl_access_handler, |
@@ -2332,9 +1889,9 @@ static ssize_t shmem_listxattr(struct dentry *dentry, char *buffer, size_t size) | |||
2332 | } | 1889 | } |
2333 | #endif /* CONFIG_TMPFS_XATTR */ | 1890 | #endif /* CONFIG_TMPFS_XATTR */ |
2334 | 1891 | ||
2335 | static const struct inode_operations shmem_symlink_inline_operations = { | 1892 | static const struct inode_operations shmem_short_symlink_operations = { |
2336 | .readlink = generic_readlink, | 1893 | .readlink = generic_readlink, |
2337 | .follow_link = shmem_follow_link_inline, | 1894 | .follow_link = shmem_follow_short_symlink, |
2338 | #ifdef CONFIG_TMPFS_XATTR | 1895 | #ifdef CONFIG_TMPFS_XATTR |
2339 | .setxattr = shmem_setxattr, | 1896 | .setxattr = shmem_setxattr, |
2340 | .getxattr = shmem_getxattr, | 1897 | .getxattr = shmem_getxattr, |
@@ -2534,8 +2091,7 @@ static int shmem_remount_fs(struct super_block *sb, int *flags, char *data) | |||
2534 | if (config.max_inodes < inodes) | 2091 | if (config.max_inodes < inodes) |
2535 | goto out; | 2092 | goto out; |
2536 | /* | 2093 | /* |
2537 | * Those tests also disallow limited->unlimited while any are in | 2094 | * Those tests disallow limited->unlimited while any are in use; |
2538 | * use, so i_blocks will always be zero when max_blocks is zero; | ||
2539 | * but we must separately disallow unlimited->limited, because | 2095 | * but we must separately disallow unlimited->limited, because |
2540 | * in that case we have no record of how much is already in use. | 2096 | * in that case we have no record of how much is already in use. |
2541 | */ | 2097 | */ |
@@ -2627,7 +2183,7 @@ int shmem_fill_super(struct super_block *sb, void *data, int silent) | |||
2627 | goto failed; | 2183 | goto failed; |
2628 | sbinfo->free_inodes = sbinfo->max_inodes; | 2184 | sbinfo->free_inodes = sbinfo->max_inodes; |
2629 | 2185 | ||
2630 | sb->s_maxbytes = SHMEM_MAX_BYTES; | 2186 | sb->s_maxbytes = MAX_LFS_FILESIZE; |
2631 | sb->s_blocksize = PAGE_CACHE_SIZE; | 2187 | sb->s_blocksize = PAGE_CACHE_SIZE; |
2632 | sb->s_blocksize_bits = PAGE_CACHE_SHIFT; | 2188 | sb->s_blocksize_bits = PAGE_CACHE_SHIFT; |
2633 | sb->s_magic = TMPFS_MAGIC; | 2189 | sb->s_magic = TMPFS_MAGIC; |
@@ -2662,14 +2218,14 @@ static struct kmem_cache *shmem_inode_cachep; | |||
2662 | 2218 | ||
2663 | static struct inode *shmem_alloc_inode(struct super_block *sb) | 2219 | static struct inode *shmem_alloc_inode(struct super_block *sb) |
2664 | { | 2220 | { |
2665 | struct shmem_inode_info *p; | 2221 | struct shmem_inode_info *info; |
2666 | p = (struct shmem_inode_info *)kmem_cache_alloc(shmem_inode_cachep, GFP_KERNEL); | 2222 | info = kmem_cache_alloc(shmem_inode_cachep, GFP_KERNEL); |
2667 | if (!p) | 2223 | if (!info) |
2668 | return NULL; | 2224 | return NULL; |
2669 | return &p->vfs_inode; | 2225 | return &info->vfs_inode; |
2670 | } | 2226 | } |
2671 | 2227 | ||
2672 | static void shmem_i_callback(struct rcu_head *head) | 2228 | static void shmem_destroy_callback(struct rcu_head *head) |
2673 | { | 2229 | { |
2674 | struct inode *inode = container_of(head, struct inode, i_rcu); | 2230 | struct inode *inode = container_of(head, struct inode, i_rcu); |
2675 | INIT_LIST_HEAD(&inode->i_dentry); | 2231 | INIT_LIST_HEAD(&inode->i_dentry); |
@@ -2678,29 +2234,26 @@ static void shmem_i_callback(struct rcu_head *head) | |||
2678 | 2234 | ||
2679 | static void shmem_destroy_inode(struct inode *inode) | 2235 | static void shmem_destroy_inode(struct inode *inode) |
2680 | { | 2236 | { |
2681 | if ((inode->i_mode & S_IFMT) == S_IFREG) { | 2237 | if ((inode->i_mode & S_IFMT) == S_IFREG) |
2682 | /* only struct inode is valid if it's an inline symlink */ | ||
2683 | mpol_free_shared_policy(&SHMEM_I(inode)->policy); | 2238 | mpol_free_shared_policy(&SHMEM_I(inode)->policy); |
2684 | } | 2239 | call_rcu(&inode->i_rcu, shmem_destroy_callback); |
2685 | call_rcu(&inode->i_rcu, shmem_i_callback); | ||
2686 | } | 2240 | } |
2687 | 2241 | ||
2688 | static void init_once(void *foo) | 2242 | static void shmem_init_inode(void *foo) |
2689 | { | 2243 | { |
2690 | struct shmem_inode_info *p = (struct shmem_inode_info *) foo; | 2244 | struct shmem_inode_info *info = foo; |
2691 | 2245 | inode_init_once(&info->vfs_inode); | |
2692 | inode_init_once(&p->vfs_inode); | ||
2693 | } | 2246 | } |
2694 | 2247 | ||
2695 | static int init_inodecache(void) | 2248 | static int shmem_init_inodecache(void) |
2696 | { | 2249 | { |
2697 | shmem_inode_cachep = kmem_cache_create("shmem_inode_cache", | 2250 | shmem_inode_cachep = kmem_cache_create("shmem_inode_cache", |
2698 | sizeof(struct shmem_inode_info), | 2251 | sizeof(struct shmem_inode_info), |
2699 | 0, SLAB_PANIC, init_once); | 2252 | 0, SLAB_PANIC, shmem_init_inode); |
2700 | return 0; | 2253 | return 0; |
2701 | } | 2254 | } |
2702 | 2255 | ||
2703 | static void destroy_inodecache(void) | 2256 | static void shmem_destroy_inodecache(void) |
2704 | { | 2257 | { |
2705 | kmem_cache_destroy(shmem_inode_cachep); | 2258 | kmem_cache_destroy(shmem_inode_cachep); |
2706 | } | 2259 | } |
@@ -2797,21 +2350,20 @@ static const struct vm_operations_struct shmem_vm_ops = { | |||
2797 | #endif | 2350 | #endif |
2798 | }; | 2351 | }; |
2799 | 2352 | ||
2800 | |||
2801 | static struct dentry *shmem_mount(struct file_system_type *fs_type, | 2353 | static struct dentry *shmem_mount(struct file_system_type *fs_type, |
2802 | int flags, const char *dev_name, void *data) | 2354 | int flags, const char *dev_name, void *data) |
2803 | { | 2355 | { |
2804 | return mount_nodev(fs_type, flags, data, shmem_fill_super); | 2356 | return mount_nodev(fs_type, flags, data, shmem_fill_super); |
2805 | } | 2357 | } |
2806 | 2358 | ||
2807 | static struct file_system_type tmpfs_fs_type = { | 2359 | static struct file_system_type shmem_fs_type = { |
2808 | .owner = THIS_MODULE, | 2360 | .owner = THIS_MODULE, |
2809 | .name = "tmpfs", | 2361 | .name = "tmpfs", |
2810 | .mount = shmem_mount, | 2362 | .mount = shmem_mount, |
2811 | .kill_sb = kill_litter_super, | 2363 | .kill_sb = kill_litter_super, |
2812 | }; | 2364 | }; |
2813 | 2365 | ||
2814 | int __init init_tmpfs(void) | 2366 | int __init shmem_init(void) |
2815 | { | 2367 | { |
2816 | int error; | 2368 | int error; |
2817 | 2369 | ||
@@ -2819,18 +2371,18 @@ int __init init_tmpfs(void) | |||
2819 | if (error) | 2371 | if (error) |
2820 | goto out4; | 2372 | goto out4; |
2821 | 2373 | ||
2822 | error = init_inodecache(); | 2374 | error = shmem_init_inodecache(); |
2823 | if (error) | 2375 | if (error) |
2824 | goto out3; | 2376 | goto out3; |
2825 | 2377 | ||
2826 | error = register_filesystem(&tmpfs_fs_type); | 2378 | error = register_filesystem(&shmem_fs_type); |
2827 | if (error) { | 2379 | if (error) { |
2828 | printk(KERN_ERR "Could not register tmpfs\n"); | 2380 | printk(KERN_ERR "Could not register tmpfs\n"); |
2829 | goto out2; | 2381 | goto out2; |
2830 | } | 2382 | } |
2831 | 2383 | ||
2832 | shm_mnt = vfs_kern_mount(&tmpfs_fs_type, MS_NOUSER, | 2384 | shm_mnt = vfs_kern_mount(&shmem_fs_type, MS_NOUSER, |
2833 | tmpfs_fs_type.name, NULL); | 2385 | shmem_fs_type.name, NULL); |
2834 | if (IS_ERR(shm_mnt)) { | 2386 | if (IS_ERR(shm_mnt)) { |
2835 | error = PTR_ERR(shm_mnt); | 2387 | error = PTR_ERR(shm_mnt); |
2836 | printk(KERN_ERR "Could not kern_mount tmpfs\n"); | 2388 | printk(KERN_ERR "Could not kern_mount tmpfs\n"); |
@@ -2839,9 +2391,9 @@ int __init init_tmpfs(void) | |||
2839 | return 0; | 2391 | return 0; |
2840 | 2392 | ||
2841 | out1: | 2393 | out1: |
2842 | unregister_filesystem(&tmpfs_fs_type); | 2394 | unregister_filesystem(&shmem_fs_type); |
2843 | out2: | 2395 | out2: |
2844 | destroy_inodecache(); | 2396 | shmem_destroy_inodecache(); |
2845 | out3: | 2397 | out3: |
2846 | bdi_destroy(&shmem_backing_dev_info); | 2398 | bdi_destroy(&shmem_backing_dev_info); |
2847 | out4: | 2399 | out4: |
@@ -2849,45 +2401,6 @@ out4: | |||
2849 | return error; | 2401 | return error; |
2850 | } | 2402 | } |
2851 | 2403 | ||
2852 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR | ||
2853 | /** | ||
2854 | * mem_cgroup_get_shmem_target - find a page or entry assigned to the shmem file | ||
2855 | * @inode: the inode to be searched | ||
2856 | * @pgoff: the offset to be searched | ||
2857 | * @pagep: the pointer for the found page to be stored | ||
2858 | * @ent: the pointer for the found swap entry to be stored | ||
2859 | * | ||
2860 | * If a page is found, refcount of it is incremented. Callers should handle | ||
2861 | * these refcount. | ||
2862 | */ | ||
2863 | void mem_cgroup_get_shmem_target(struct inode *inode, pgoff_t pgoff, | ||
2864 | struct page **pagep, swp_entry_t *ent) | ||
2865 | { | ||
2866 | swp_entry_t entry = { .val = 0 }, *ptr; | ||
2867 | struct page *page = NULL; | ||
2868 | struct shmem_inode_info *info = SHMEM_I(inode); | ||
2869 | |||
2870 | if ((pgoff << PAGE_CACHE_SHIFT) >= i_size_read(inode)) | ||
2871 | goto out; | ||
2872 | |||
2873 | spin_lock(&info->lock); | ||
2874 | ptr = shmem_swp_entry(info, pgoff, NULL); | ||
2875 | #ifdef CONFIG_SWAP | ||
2876 | if (ptr && ptr->val) { | ||
2877 | entry.val = ptr->val; | ||
2878 | page = find_get_page(&swapper_space, entry.val); | ||
2879 | } else | ||
2880 | #endif | ||
2881 | page = find_get_page(inode->i_mapping, pgoff); | ||
2882 | if (ptr) | ||
2883 | shmem_swp_unmap(ptr); | ||
2884 | spin_unlock(&info->lock); | ||
2885 | out: | ||
2886 | *pagep = page; | ||
2887 | *ent = entry; | ||
2888 | } | ||
2889 | #endif | ||
2890 | |||
2891 | #else /* !CONFIG_SHMEM */ | 2404 | #else /* !CONFIG_SHMEM */ |
2892 | 2405 | ||
2893 | /* | 2406 | /* |
@@ -2901,23 +2414,23 @@ out: | |||
2901 | 2414 | ||
2902 | #include <linux/ramfs.h> | 2415 | #include <linux/ramfs.h> |
2903 | 2416 | ||
2904 | static struct file_system_type tmpfs_fs_type = { | 2417 | static struct file_system_type shmem_fs_type = { |
2905 | .name = "tmpfs", | 2418 | .name = "tmpfs", |
2906 | .mount = ramfs_mount, | 2419 | .mount = ramfs_mount, |
2907 | .kill_sb = kill_litter_super, | 2420 | .kill_sb = kill_litter_super, |
2908 | }; | 2421 | }; |
2909 | 2422 | ||
2910 | int __init init_tmpfs(void) | 2423 | int __init shmem_init(void) |
2911 | { | 2424 | { |
2912 | BUG_ON(register_filesystem(&tmpfs_fs_type) != 0); | 2425 | BUG_ON(register_filesystem(&shmem_fs_type) != 0); |
2913 | 2426 | ||
2914 | shm_mnt = kern_mount(&tmpfs_fs_type); | 2427 | shm_mnt = kern_mount(&shmem_fs_type); |
2915 | BUG_ON(IS_ERR(shm_mnt)); | 2428 | BUG_ON(IS_ERR(shm_mnt)); |
2916 | 2429 | ||
2917 | return 0; | 2430 | return 0; |
2918 | } | 2431 | } |
2919 | 2432 | ||
2920 | int shmem_unuse(swp_entry_t entry, struct page *page) | 2433 | int shmem_unuse(swp_entry_t swap, struct page *page) |
2921 | { | 2434 | { |
2922 | return 0; | 2435 | return 0; |
2923 | } | 2436 | } |
@@ -2927,43 +2440,17 @@ int shmem_lock(struct file *file, int lock, struct user_struct *user) | |||
2927 | return 0; | 2440 | return 0; |
2928 | } | 2441 | } |
2929 | 2442 | ||
2930 | void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end) | 2443 | void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend) |
2931 | { | 2444 | { |
2932 | truncate_inode_pages_range(inode->i_mapping, start, end); | 2445 | truncate_inode_pages_range(inode->i_mapping, lstart, lend); |
2933 | } | 2446 | } |
2934 | EXPORT_SYMBOL_GPL(shmem_truncate_range); | 2447 | EXPORT_SYMBOL_GPL(shmem_truncate_range); |
2935 | 2448 | ||
2936 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR | ||
2937 | /** | ||
2938 | * mem_cgroup_get_shmem_target - find a page or entry assigned to the shmem file | ||
2939 | * @inode: the inode to be searched | ||
2940 | * @pgoff: the offset to be searched | ||
2941 | * @pagep: the pointer for the found page to be stored | ||
2942 | * @ent: the pointer for the found swap entry to be stored | ||
2943 | * | ||
2944 | * If a page is found, refcount of it is incremented. Callers should handle | ||
2945 | * these refcount. | ||
2946 | */ | ||
2947 | void mem_cgroup_get_shmem_target(struct inode *inode, pgoff_t pgoff, | ||
2948 | struct page **pagep, swp_entry_t *ent) | ||
2949 | { | ||
2950 | struct page *page = NULL; | ||
2951 | |||
2952 | if ((pgoff << PAGE_CACHE_SHIFT) >= i_size_read(inode)) | ||
2953 | goto out; | ||
2954 | page = find_get_page(inode->i_mapping, pgoff); | ||
2955 | out: | ||
2956 | *pagep = page; | ||
2957 | *ent = (swp_entry_t){ .val = 0 }; | ||
2958 | } | ||
2959 | #endif | ||
2960 | |||
2961 | #define shmem_vm_ops generic_file_vm_ops | 2449 | #define shmem_vm_ops generic_file_vm_ops |
2962 | #define shmem_file_operations ramfs_file_operations | 2450 | #define shmem_file_operations ramfs_file_operations |
2963 | #define shmem_get_inode(sb, dir, mode, dev, flags) ramfs_get_inode(sb, dir, mode, dev) | 2451 | #define shmem_get_inode(sb, dir, mode, dev, flags) ramfs_get_inode(sb, dir, mode, dev) |
2964 | #define shmem_acct_size(flags, size) 0 | 2452 | #define shmem_acct_size(flags, size) 0 |
2965 | #define shmem_unacct_size(flags, size) do {} while (0) | 2453 | #define shmem_unacct_size(flags, size) do {} while (0) |
2966 | #define SHMEM_MAX_BYTES MAX_LFS_FILESIZE | ||
2967 | 2454 | ||
2968 | #endif /* CONFIG_SHMEM */ | 2455 | #endif /* CONFIG_SHMEM */ |
2969 | 2456 | ||
@@ -2987,7 +2474,7 @@ struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags | |||
2987 | if (IS_ERR(shm_mnt)) | 2474 | if (IS_ERR(shm_mnt)) |
2988 | return (void *)shm_mnt; | 2475 | return (void *)shm_mnt; |
2989 | 2476 | ||
2990 | if (size < 0 || size > SHMEM_MAX_BYTES) | 2477 | if (size < 0 || size > MAX_LFS_FILESIZE) |
2991 | return ERR_PTR(-EINVAL); | 2478 | return ERR_PTR(-EINVAL); |
2992 | 2479 | ||
2993 | if (shmem_acct_size(flags, size)) | 2480 | if (shmem_acct_size(flags, size)) |
@@ -622,6 +622,51 @@ int slab_is_available(void) | |||
622 | static struct lock_class_key on_slab_l3_key; | 622 | static struct lock_class_key on_slab_l3_key; |
623 | static struct lock_class_key on_slab_alc_key; | 623 | static struct lock_class_key on_slab_alc_key; |
624 | 624 | ||
625 | static struct lock_class_key debugobj_l3_key; | ||
626 | static struct lock_class_key debugobj_alc_key; | ||
627 | |||
628 | static void slab_set_lock_classes(struct kmem_cache *cachep, | ||
629 | struct lock_class_key *l3_key, struct lock_class_key *alc_key, | ||
630 | int q) | ||
631 | { | ||
632 | struct array_cache **alc; | ||
633 | struct kmem_list3 *l3; | ||
634 | int r; | ||
635 | |||
636 | l3 = cachep->nodelists[q]; | ||
637 | if (!l3) | ||
638 | return; | ||
639 | |||
640 | lockdep_set_class(&l3->list_lock, l3_key); | ||
641 | alc = l3->alien; | ||
642 | /* | ||
643 | * FIXME: This check for BAD_ALIEN_MAGIC | ||
644 | * should go away when common slab code is taught to | ||
645 | * work even without alien caches. | ||
646 | * Currently, non NUMA code returns BAD_ALIEN_MAGIC | ||
647 | * for alloc_alien_cache, | ||
648 | */ | ||
649 | if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC) | ||
650 | return; | ||
651 | for_each_node(r) { | ||
652 | if (alc[r]) | ||
653 | lockdep_set_class(&alc[r]->lock, alc_key); | ||
654 | } | ||
655 | } | ||
656 | |||
657 | static void slab_set_debugobj_lock_classes_node(struct kmem_cache *cachep, int node) | ||
658 | { | ||
659 | slab_set_lock_classes(cachep, &debugobj_l3_key, &debugobj_alc_key, node); | ||
660 | } | ||
661 | |||
662 | static void slab_set_debugobj_lock_classes(struct kmem_cache *cachep) | ||
663 | { | ||
664 | int node; | ||
665 | |||
666 | for_each_online_node(node) | ||
667 | slab_set_debugobj_lock_classes_node(cachep, node); | ||
668 | } | ||
669 | |||
625 | static void init_node_lock_keys(int q) | 670 | static void init_node_lock_keys(int q) |
626 | { | 671 | { |
627 | struct cache_sizes *s = malloc_sizes; | 672 | struct cache_sizes *s = malloc_sizes; |
@@ -630,29 +675,14 @@ static void init_node_lock_keys(int q) | |||
630 | return; | 675 | return; |
631 | 676 | ||
632 | for (s = malloc_sizes; s->cs_size != ULONG_MAX; s++) { | 677 | for (s = malloc_sizes; s->cs_size != ULONG_MAX; s++) { |
633 | struct array_cache **alc; | ||
634 | struct kmem_list3 *l3; | 678 | struct kmem_list3 *l3; |
635 | int r; | ||
636 | 679 | ||
637 | l3 = s->cs_cachep->nodelists[q]; | 680 | l3 = s->cs_cachep->nodelists[q]; |
638 | if (!l3 || OFF_SLAB(s->cs_cachep)) | 681 | if (!l3 || OFF_SLAB(s->cs_cachep)) |
639 | continue; | 682 | continue; |
640 | lockdep_set_class(&l3->list_lock, &on_slab_l3_key); | 683 | |
641 | alc = l3->alien; | 684 | slab_set_lock_classes(s->cs_cachep, &on_slab_l3_key, |
642 | /* | 685 | &on_slab_alc_key, q); |
643 | * FIXME: This check for BAD_ALIEN_MAGIC | ||
644 | * should go away when common slab code is taught to | ||
645 | * work even without alien caches. | ||
646 | * Currently, non NUMA code returns BAD_ALIEN_MAGIC | ||
647 | * for alloc_alien_cache, | ||
648 | */ | ||
649 | if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC) | ||
650 | continue; | ||
651 | for_each_node(r) { | ||
652 | if (alc[r]) | ||
653 | lockdep_set_class(&alc[r]->lock, | ||
654 | &on_slab_alc_key); | ||
655 | } | ||
656 | } | 686 | } |
657 | } | 687 | } |
658 | 688 | ||
@@ -671,6 +701,14 @@ static void init_node_lock_keys(int q) | |||
671 | static inline void init_lock_keys(void) | 701 | static inline void init_lock_keys(void) |
672 | { | 702 | { |
673 | } | 703 | } |
704 | |||
705 | static void slab_set_debugobj_lock_classes_node(struct kmem_cache *cachep, int node) | ||
706 | { | ||
707 | } | ||
708 | |||
709 | static void slab_set_debugobj_lock_classes(struct kmem_cache *cachep) | ||
710 | { | ||
711 | } | ||
674 | #endif | 712 | #endif |
675 | 713 | ||
676 | /* | 714 | /* |
@@ -1264,6 +1302,8 @@ static int __cpuinit cpuup_prepare(long cpu) | |||
1264 | spin_unlock_irq(&l3->list_lock); | 1302 | spin_unlock_irq(&l3->list_lock); |
1265 | kfree(shared); | 1303 | kfree(shared); |
1266 | free_alien_cache(alien); | 1304 | free_alien_cache(alien); |
1305 | if (cachep->flags & SLAB_DEBUG_OBJECTS) | ||
1306 | slab_set_debugobj_lock_classes_node(cachep, node); | ||
1267 | } | 1307 | } |
1268 | init_node_lock_keys(node); | 1308 | init_node_lock_keys(node); |
1269 | 1309 | ||
@@ -1626,6 +1666,9 @@ void __init kmem_cache_init_late(void) | |||
1626 | { | 1666 | { |
1627 | struct kmem_cache *cachep; | 1667 | struct kmem_cache *cachep; |
1628 | 1668 | ||
1669 | /* Annotate slab for lockdep -- annotate the malloc caches */ | ||
1670 | init_lock_keys(); | ||
1671 | |||
1629 | /* 6) resize the head arrays to their final sizes */ | 1672 | /* 6) resize the head arrays to their final sizes */ |
1630 | mutex_lock(&cache_chain_mutex); | 1673 | mutex_lock(&cache_chain_mutex); |
1631 | list_for_each_entry(cachep, &cache_chain, next) | 1674 | list_for_each_entry(cachep, &cache_chain, next) |
@@ -1636,9 +1679,6 @@ void __init kmem_cache_init_late(void) | |||
1636 | /* Done! */ | 1679 | /* Done! */ |
1637 | g_cpucache_up = FULL; | 1680 | g_cpucache_up = FULL; |
1638 | 1681 | ||
1639 | /* Annotate slab for lockdep -- annotate the malloc caches */ | ||
1640 | init_lock_keys(); | ||
1641 | |||
1642 | /* | 1682 | /* |
1643 | * Register a cpu startup notifier callback that initializes | 1683 | * Register a cpu startup notifier callback that initializes |
1644 | * cpu_cache_get for all new cpus | 1684 | * cpu_cache_get for all new cpus |
@@ -2426,6 +2466,16 @@ kmem_cache_create (const char *name, size_t size, size_t align, | |||
2426 | goto oops; | 2466 | goto oops; |
2427 | } | 2467 | } |
2428 | 2468 | ||
2469 | if (flags & SLAB_DEBUG_OBJECTS) { | ||
2470 | /* | ||
2471 | * Would deadlock through slab_destroy()->call_rcu()-> | ||
2472 | * debug_object_activate()->kmem_cache_alloc(). | ||
2473 | */ | ||
2474 | WARN_ON_ONCE(flags & SLAB_DESTROY_BY_RCU); | ||
2475 | |||
2476 | slab_set_debugobj_lock_classes(cachep); | ||
2477 | } | ||
2478 | |||
2429 | /* cache setup completed, link it into the list */ | 2479 | /* cache setup completed, link it into the list */ |
2430 | list_add(&cachep->next, &cache_chain); | 2480 | list_add(&cachep->next, &cache_chain); |
2431 | oops: | 2481 | oops: |
@@ -3398,7 +3448,7 @@ __cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid, | |||
3398 | cache_alloc_debugcheck_before(cachep, flags); | 3448 | cache_alloc_debugcheck_before(cachep, flags); |
3399 | local_irq_save(save_flags); | 3449 | local_irq_save(save_flags); |
3400 | 3450 | ||
3401 | if (nodeid == -1) | 3451 | if (nodeid == NUMA_NO_NODE) |
3402 | nodeid = slab_node; | 3452 | nodeid = slab_node; |
3403 | 3453 | ||
3404 | if (unlikely(!cachep->nodelists[nodeid])) { | 3454 | if (unlikely(!cachep->nodelists[nodeid])) { |
@@ -3929,7 +3979,7 @@ fail: | |||
3929 | 3979 | ||
3930 | struct ccupdate_struct { | 3980 | struct ccupdate_struct { |
3931 | struct kmem_cache *cachep; | 3981 | struct kmem_cache *cachep; |
3932 | struct array_cache *new[NR_CPUS]; | 3982 | struct array_cache *new[0]; |
3933 | }; | 3983 | }; |
3934 | 3984 | ||
3935 | static void do_ccupdate_local(void *info) | 3985 | static void do_ccupdate_local(void *info) |
@@ -3951,7 +4001,8 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit, | |||
3951 | struct ccupdate_struct *new; | 4001 | struct ccupdate_struct *new; |
3952 | int i; | 4002 | int i; |
3953 | 4003 | ||
3954 | new = kzalloc(sizeof(*new), gfp); | 4004 | new = kzalloc(sizeof(*new) + nr_cpu_ids * sizeof(struct array_cache *), |
4005 | gfp); | ||
3955 | if (!new) | 4006 | if (!new) |
3956 | return -ENOMEM; | 4007 | return -ENOMEM; |
3957 | 4008 | ||
@@ -675,7 +675,7 @@ static u8 *check_bytes(u8 *start, u8 value, unsigned int bytes) | |||
675 | return check_bytes8(start, value, bytes); | 675 | return check_bytes8(start, value, bytes); |
676 | 676 | ||
677 | value64 = value | value << 8 | value << 16 | value << 24; | 677 | value64 = value | value << 8 | value << 16 | value << 24; |
678 | value64 = value64 | value64 << 32; | 678 | value64 = (value64 & 0xffffffff) | value64 << 32; |
679 | prefix = 8 - ((unsigned long)start) % 8; | 679 | prefix = 8 - ((unsigned long)start) % 8; |
680 | 680 | ||
681 | if (prefix) { | 681 | if (prefix) { |
@@ -1508,7 +1508,7 @@ static inline void add_partial(struct kmem_cache_node *n, | |||
1508 | struct page *page, int tail) | 1508 | struct page *page, int tail) |
1509 | { | 1509 | { |
1510 | n->nr_partial++; | 1510 | n->nr_partial++; |
1511 | if (tail) | 1511 | if (tail == DEACTIVATE_TO_TAIL) |
1512 | list_add_tail(&page->lru, &n->partial); | 1512 | list_add_tail(&page->lru, &n->partial); |
1513 | else | 1513 | else |
1514 | list_add(&page->lru, &n->partial); | 1514 | list_add(&page->lru, &n->partial); |
@@ -1755,13 +1755,13 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) | |||
1755 | enum slab_modes l = M_NONE, m = M_NONE; | 1755 | enum slab_modes l = M_NONE, m = M_NONE; |
1756 | void *freelist; | 1756 | void *freelist; |
1757 | void *nextfree; | 1757 | void *nextfree; |
1758 | int tail = 0; | 1758 | int tail = DEACTIVATE_TO_HEAD; |
1759 | struct page new; | 1759 | struct page new; |
1760 | struct page old; | 1760 | struct page old; |
1761 | 1761 | ||
1762 | if (page->freelist) { | 1762 | if (page->freelist) { |
1763 | stat(s, DEACTIVATE_REMOTE_FREES); | 1763 | stat(s, DEACTIVATE_REMOTE_FREES); |
1764 | tail = 1; | 1764 | tail = DEACTIVATE_TO_TAIL; |
1765 | } | 1765 | } |
1766 | 1766 | ||
1767 | c->tid = next_tid(c->tid); | 1767 | c->tid = next_tid(c->tid); |
@@ -1828,7 +1828,7 @@ redo: | |||
1828 | 1828 | ||
1829 | new.frozen = 0; | 1829 | new.frozen = 0; |
1830 | 1830 | ||
1831 | if (!new.inuse && n->nr_partial < s->min_partial) | 1831 | if (!new.inuse && n->nr_partial > s->min_partial) |
1832 | m = M_FREE; | 1832 | m = M_FREE; |
1833 | else if (new.freelist) { | 1833 | else if (new.freelist) { |
1834 | m = M_PARTIAL; | 1834 | m = M_PARTIAL; |
@@ -1867,7 +1867,7 @@ redo: | |||
1867 | if (m == M_PARTIAL) { | 1867 | if (m == M_PARTIAL) { |
1868 | 1868 | ||
1869 | add_partial(n, page, tail); | 1869 | add_partial(n, page, tail); |
1870 | stat(s, tail ? DEACTIVATE_TO_TAIL : DEACTIVATE_TO_HEAD); | 1870 | stat(s, tail); |
1871 | 1871 | ||
1872 | } else if (m == M_FULL) { | 1872 | } else if (m == M_FULL) { |
1873 | 1873 | ||
@@ -2351,7 +2351,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page, | |||
2351 | */ | 2351 | */ |
2352 | if (unlikely(!prior)) { | 2352 | if (unlikely(!prior)) { |
2353 | remove_full(s, page); | 2353 | remove_full(s, page); |
2354 | add_partial(n, page, 0); | 2354 | add_partial(n, page, DEACTIVATE_TO_TAIL); |
2355 | stat(s, FREE_ADD_PARTIAL); | 2355 | stat(s, FREE_ADD_PARTIAL); |
2356 | } | 2356 | } |
2357 | } | 2357 | } |
@@ -2361,11 +2361,13 @@ static void __slab_free(struct kmem_cache *s, struct page *page, | |||
2361 | slab_empty: | 2361 | slab_empty: |
2362 | if (prior) { | 2362 | if (prior) { |
2363 | /* | 2363 | /* |
2364 | * Slab still on the partial list. | 2364 | * Slab on the partial list. |
2365 | */ | 2365 | */ |
2366 | remove_partial(n, page); | 2366 | remove_partial(n, page); |
2367 | stat(s, FREE_REMOVE_PARTIAL); | 2367 | stat(s, FREE_REMOVE_PARTIAL); |
2368 | } | 2368 | } else |
2369 | /* Slab must be on the full list */ | ||
2370 | remove_full(s, page); | ||
2369 | 2371 | ||
2370 | spin_unlock_irqrestore(&n->list_lock, flags); | 2372 | spin_unlock_irqrestore(&n->list_lock, flags); |
2371 | stat(s, FREE_SLAB); | 2373 | stat(s, FREE_SLAB); |
@@ -2667,7 +2669,7 @@ static void early_kmem_cache_node_alloc(int node) | |||
2667 | init_kmem_cache_node(n, kmem_cache_node); | 2669 | init_kmem_cache_node(n, kmem_cache_node); |
2668 | inc_slabs_node(kmem_cache_node, node, page->objects); | 2670 | inc_slabs_node(kmem_cache_node, node, page->objects); |
2669 | 2671 | ||
2670 | add_partial(n, page, 0); | 2672 | add_partial(n, page, DEACTIVATE_TO_HEAD); |
2671 | } | 2673 | } |
2672 | 2674 | ||
2673 | static void free_kmem_cache_nodes(struct kmem_cache *s) | 2675 | static void free_kmem_cache_nodes(struct kmem_cache *s) |
diff --git a/mm/swapfile.c b/mm/swapfile.c index 1b8c33907242..17bc224bce68 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c | |||
@@ -1924,20 +1924,24 @@ static unsigned long read_swap_header(struct swap_info_struct *p, | |||
1924 | 1924 | ||
1925 | /* | 1925 | /* |
1926 | * Find out how many pages are allowed for a single swap | 1926 | * Find out how many pages are allowed for a single swap |
1927 | * device. There are two limiting factors: 1) the number of | 1927 | * device. There are three limiting factors: 1) the number |
1928 | * bits for the swap offset in the swp_entry_t type and | 1928 | * of bits for the swap offset in the swp_entry_t type, and |
1929 | * 2) the number of bits in the a swap pte as defined by | 1929 | * 2) the number of bits in the swap pte as defined by the |
1930 | * the different architectures. In order to find the | 1930 | * the different architectures, and 3) the number of free bits |
1931 | * largest possible bit mask a swap entry with swap type 0 | 1931 | * in an exceptional radix_tree entry. In order to find the |
1932 | * largest possible bit mask, a swap entry with swap type 0 | ||
1932 | * and swap offset ~0UL is created, encoded to a swap pte, | 1933 | * and swap offset ~0UL is created, encoded to a swap pte, |
1933 | * decoded to a swp_entry_t again and finally the swap | 1934 | * decoded to a swp_entry_t again, and finally the swap |
1934 | * offset is extracted. This will mask all the bits from | 1935 | * offset is extracted. This will mask all the bits from |
1935 | * the initial ~0UL mask that can't be encoded in either | 1936 | * the initial ~0UL mask that can't be encoded in either |
1936 | * the swp_entry_t or the architecture definition of a | 1937 | * the swp_entry_t or the architecture definition of a |
1937 | * swap pte. | 1938 | * swap pte. Then the same is done for a radix_tree entry. |
1938 | */ | 1939 | */ |
1939 | maxpages = swp_offset(pte_to_swp_entry( | 1940 | maxpages = swp_offset(pte_to_swp_entry( |
1940 | swp_entry_to_pte(swp_entry(0, ~0UL)))) + 1; | 1941 | swp_entry_to_pte(swp_entry(0, ~0UL)))); |
1942 | maxpages = swp_offset(radix_to_swp_entry( | ||
1943 | swp_to_radix_entry(swp_entry(0, maxpages)))) + 1; | ||
1944 | |||
1941 | if (maxpages > swap_header->info.last_page) { | 1945 | if (maxpages > swap_header->info.last_page) { |
1942 | maxpages = swap_header->info.last_page + 1; | 1946 | maxpages = swap_header->info.last_page + 1; |
1943 | /* p->max is an unsigned int: don't overflow it */ | 1947 | /* p->max is an unsigned int: don't overflow it */ |
diff --git a/mm/truncate.c b/mm/truncate.c index 232eb2736a79..b40ac6d4e86e 100644 --- a/mm/truncate.c +++ b/mm/truncate.c | |||
@@ -336,6 +336,14 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping, | |||
336 | unsigned long count = 0; | 336 | unsigned long count = 0; |
337 | int i; | 337 | int i; |
338 | 338 | ||
339 | /* | ||
340 | * Note: this function may get called on a shmem/tmpfs mapping: | ||
341 | * pagevec_lookup() might then return 0 prematurely (because it | ||
342 | * got a gangful of swap entries); but it's hardly worth worrying | ||
343 | * about - it can rarely have anything to free from such a mapping | ||
344 | * (most pages are dirty), and already skips over any difficulties. | ||
345 | */ | ||
346 | |||
339 | pagevec_init(&pvec, 0); | 347 | pagevec_init(&pvec, 0); |
340 | while (index <= end && pagevec_lookup(&pvec, mapping, index, | 348 | while (index <= end && pagevec_lookup(&pvec, mapping, index, |
341 | min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) { | 349 | min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) { |