aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-03-28 20:19:27 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-03-28 20:19:28 -0400
commit532bfc851a7475fb6a36c1e953aa395798a7cca7 (patch)
treea7892e5a31330dd59f31959efbe9fda1803784fd /mm
parent0195c00244dc2e9f522475868fa278c473ba7339 (diff)
parent8da00edc1069f01c34510fa405dc15d96c090a3f (diff)
Merge branch 'akpm' (Andrew's patch-bomb)
Merge third batch of patches from Andrew Morton: - Some MM stragglers - core SMP library cleanups (on_each_cpu_mask) - Some IPI optimisations - kexec - kdump - IPMI - the radix-tree iterator work - various other misc bits. "That'll do for -rc1. I still have ~10 patches for 3.4, will send those along when they've baked a little more." * emailed from Andrew Morton <akpm@linux-foundation.org>: (35 commits) backlight: fix typo in tosa_lcd.c crc32: add help text for the algorithm select option mm: move hugepage test examples to tools/testing/selftests/vm mm: move slabinfo.c to tools/vm mm: move page-types.c from Documentation to tools/vm selftests/Makefile: make `run_tests' depend on `all' selftests: launch individual selftests from the main Makefile radix-tree: use iterators in find_get_pages* functions radix-tree: rewrite gang lookup using iterator radix-tree: introduce bit-optimized iterator fs/proc/namespaces.c: prevent crash when ns_entries[] is empty nbd: rename the nbd_device variable from lo to nbd pidns: add reboot_pid_ns() to handle the reboot syscall sysctl: use bitmap library functions ipmi: use locks on watchdog timeout set on reboot ipmi: simplify locking ipmi: fix message handling during panics ipmi: use a tasklet for handling received messages ipmi: increase KCS timeouts ipmi: decrease the IPMI message transaction time in interrupt mode ...
Diffstat (limited to 'mm')
-rw-r--r--mm/filemap.c86
-rw-r--r--mm/memcontrol.c4
-rw-r--r--mm/page_alloc.c44
-rw-r--r--mm/slub.c10
-rw-r--r--mm/swapfile.c3
-rw-r--r--mm/truncate.c40
6 files changed, 136 insertions, 51 deletions
diff --git a/mm/filemap.c b/mm/filemap.c
index c3811bc6b9e3..79c4b2b0b14e 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -813,20 +813,19 @@ EXPORT_SYMBOL(find_or_create_page);
813unsigned find_get_pages(struct address_space *mapping, pgoff_t start, 813unsigned find_get_pages(struct address_space *mapping, pgoff_t start,
814 unsigned int nr_pages, struct page **pages) 814 unsigned int nr_pages, struct page **pages)
815{ 815{
816 unsigned int i; 816 struct radix_tree_iter iter;
817 unsigned int ret; 817 void **slot;
818 unsigned int nr_found, nr_skip; 818 unsigned ret = 0;
819
820 if (unlikely(!nr_pages))
821 return 0;
819 822
820 rcu_read_lock(); 823 rcu_read_lock();
821restart: 824restart:
822 nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree, 825 radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
823 (void ***)pages, NULL, start, nr_pages);
824 ret = 0;
825 nr_skip = 0;
826 for (i = 0; i < nr_found; i++) {
827 struct page *page; 826 struct page *page;
828repeat: 827repeat:
829 page = radix_tree_deref_slot((void **)pages[i]); 828 page = radix_tree_deref_slot(slot);
830 if (unlikely(!page)) 829 if (unlikely(!page))
831 continue; 830 continue;
832 831
@@ -837,7 +836,7 @@ repeat:
837 * when entry at index 0 moves out of or back 836 * when entry at index 0 moves out of or back
838 * to root: none yet gotten, safe to restart. 837 * to root: none yet gotten, safe to restart.
839 */ 838 */
840 WARN_ON(start | i); 839 WARN_ON(iter.index);
841 goto restart; 840 goto restart;
842 } 841 }
843 /* 842 /*
@@ -845,7 +844,6 @@ repeat:
845 * here as an exceptional entry: so skip over it - 844 * here as an exceptional entry: so skip over it -
846 * we only reach this from invalidate_mapping_pages(). 845 * we only reach this from invalidate_mapping_pages().
847 */ 846 */
848 nr_skip++;
849 continue; 847 continue;
850 } 848 }
851 849
@@ -853,21 +851,16 @@ repeat:
853 goto repeat; 851 goto repeat;
854 852
855 /* Has the page moved? */ 853 /* Has the page moved? */
856 if (unlikely(page != *((void **)pages[i]))) { 854 if (unlikely(page != *slot)) {
857 page_cache_release(page); 855 page_cache_release(page);
858 goto repeat; 856 goto repeat;
859 } 857 }
860 858
861 pages[ret] = page; 859 pages[ret] = page;
862 ret++; 860 if (++ret == nr_pages)
861 break;
863 } 862 }
864 863
865 /*
866 * If all entries were removed before we could secure them,
867 * try again, because callers stop trying once 0 is returned.
868 */
869 if (unlikely(!ret && nr_found > nr_skip))
870 goto restart;
871 rcu_read_unlock(); 864 rcu_read_unlock();
872 return ret; 865 return ret;
873} 866}
@@ -887,21 +880,22 @@ repeat:
887unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index, 880unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index,
888 unsigned int nr_pages, struct page **pages) 881 unsigned int nr_pages, struct page **pages)
889{ 882{
890 unsigned int i; 883 struct radix_tree_iter iter;
891 unsigned int ret; 884 void **slot;
892 unsigned int nr_found; 885 unsigned int ret = 0;
886
887 if (unlikely(!nr_pages))
888 return 0;
893 889
894 rcu_read_lock(); 890 rcu_read_lock();
895restart: 891restart:
896 nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree, 892 radix_tree_for_each_contig(slot, &mapping->page_tree, &iter, index) {
897 (void ***)pages, NULL, index, nr_pages);
898 ret = 0;
899 for (i = 0; i < nr_found; i++) {
900 struct page *page; 893 struct page *page;
901repeat: 894repeat:
902 page = radix_tree_deref_slot((void **)pages[i]); 895 page = radix_tree_deref_slot(slot);
896 /* The hole, there no reason to continue */
903 if (unlikely(!page)) 897 if (unlikely(!page))
904 continue; 898 break;
905 899
906 if (radix_tree_exception(page)) { 900 if (radix_tree_exception(page)) {
907 if (radix_tree_deref_retry(page)) { 901 if (radix_tree_deref_retry(page)) {
@@ -924,7 +918,7 @@ repeat:
924 goto repeat; 918 goto repeat;
925 919
926 /* Has the page moved? */ 920 /* Has the page moved? */
927 if (unlikely(page != *((void **)pages[i]))) { 921 if (unlikely(page != *slot)) {
928 page_cache_release(page); 922 page_cache_release(page);
929 goto repeat; 923 goto repeat;
930 } 924 }
@@ -934,14 +928,14 @@ repeat:
934 * otherwise we can get both false positives and false 928 * otherwise we can get both false positives and false
935 * negatives, which is just confusing to the caller. 929 * negatives, which is just confusing to the caller.
936 */ 930 */
937 if (page->mapping == NULL || page->index != index) { 931 if (page->mapping == NULL || page->index != iter.index) {
938 page_cache_release(page); 932 page_cache_release(page);
939 break; 933 break;
940 } 934 }
941 935
942 pages[ret] = page; 936 pages[ret] = page;
943 ret++; 937 if (++ret == nr_pages)
944 index++; 938 break;
945 } 939 }
946 rcu_read_unlock(); 940 rcu_read_unlock();
947 return ret; 941 return ret;
@@ -962,19 +956,20 @@ EXPORT_SYMBOL(find_get_pages_contig);
962unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index, 956unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index,
963 int tag, unsigned int nr_pages, struct page **pages) 957 int tag, unsigned int nr_pages, struct page **pages)
964{ 958{
965 unsigned int i; 959 struct radix_tree_iter iter;
966 unsigned int ret; 960 void **slot;
967 unsigned int nr_found; 961 unsigned ret = 0;
962
963 if (unlikely(!nr_pages))
964 return 0;
968 965
969 rcu_read_lock(); 966 rcu_read_lock();
970restart: 967restart:
971 nr_found = radix_tree_gang_lookup_tag_slot(&mapping->page_tree, 968 radix_tree_for_each_tagged(slot, &mapping->page_tree,
972 (void ***)pages, *index, nr_pages, tag); 969 &iter, *index, tag) {
973 ret = 0;
974 for (i = 0; i < nr_found; i++) {
975 struct page *page; 970 struct page *page;
976repeat: 971repeat:
977 page = radix_tree_deref_slot((void **)pages[i]); 972 page = radix_tree_deref_slot(slot);
978 if (unlikely(!page)) 973 if (unlikely(!page))
979 continue; 974 continue;
980 975
@@ -998,21 +993,16 @@ repeat:
998 goto repeat; 993 goto repeat;
999 994
1000 /* Has the page moved? */ 995 /* Has the page moved? */
1001 if (unlikely(page != *((void **)pages[i]))) { 996 if (unlikely(page != *slot)) {
1002 page_cache_release(page); 997 page_cache_release(page);
1003 goto repeat; 998 goto repeat;
1004 } 999 }
1005 1000
1006 pages[ret] = page; 1001 pages[ret] = page;
1007 ret++; 1002 if (++ret == nr_pages)
1003 break;
1008 } 1004 }
1009 1005
1010 /*
1011 * If all entries were removed before we could secure them,
1012 * try again, because callers stop trying once 0 is returned.
1013 */
1014 if (unlikely(!ret && nr_found))
1015 goto restart;
1016 rcu_read_unlock(); 1006 rcu_read_unlock();
1017 1007
1018 if (ret) 1008 if (ret)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index b2ee6df0e9bb..7d698df4a067 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5306,6 +5306,8 @@ static int mem_cgroup_count_precharge_pte_range(pmd_t *pmd,
5306 return 0; 5306 return 0;
5307 } 5307 }
5308 5308
5309 if (pmd_trans_unstable(pmd))
5310 return 0;
5309 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); 5311 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
5310 for (; addr != end; pte++, addr += PAGE_SIZE) 5312 for (; addr != end; pte++, addr += PAGE_SIZE)
5311 if (get_mctgt_type(vma, addr, *pte, NULL)) 5313 if (get_mctgt_type(vma, addr, *pte, NULL))
@@ -5502,6 +5504,8 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd,
5502 return 0; 5504 return 0;
5503 } 5505 }
5504 5506
5507 if (pmd_trans_unstable(pmd))
5508 return 0;
5505retry: 5509retry:
5506 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); 5510 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
5507 for (; addr != end; addr += PAGE_SIZE) { 5511 for (; addr != end; addr += PAGE_SIZE) {
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index caea788628e4..a712fb9e04ce 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1161,11 +1161,47 @@ void drain_local_pages(void *arg)
1161} 1161}
1162 1162
1163/* 1163/*
1164 * Spill all the per-cpu pages from all CPUs back into the buddy allocator 1164 * Spill all the per-cpu pages from all CPUs back into the buddy allocator.
1165 *
1166 * Note that this code is protected against sending an IPI to an offline
1167 * CPU but does not guarantee sending an IPI to newly hotplugged CPUs:
1168 * on_each_cpu_mask() blocks hotplug and won't talk to offlined CPUs but
1169 * nothing keeps CPUs from showing up after we populated the cpumask and
1170 * before the call to on_each_cpu_mask().
1165 */ 1171 */
1166void drain_all_pages(void) 1172void drain_all_pages(void)
1167{ 1173{
1168 on_each_cpu(drain_local_pages, NULL, 1); 1174 int cpu;
1175 struct per_cpu_pageset *pcp;
1176 struct zone *zone;
1177
1178 /*
1179 * Allocate in the BSS so we wont require allocation in
1180 * direct reclaim path for CONFIG_CPUMASK_OFFSTACK=y
1181 */
1182 static cpumask_t cpus_with_pcps;
1183
1184 /*
1185 * We don't care about racing with CPU hotplug event
1186 * as offline notification will cause the notified
1187 * cpu to drain that CPU pcps and on_each_cpu_mask
1188 * disables preemption as part of its processing
1189 */
1190 for_each_online_cpu(cpu) {
1191 bool has_pcps = false;
1192 for_each_populated_zone(zone) {
1193 pcp = per_cpu_ptr(zone->pageset, cpu);
1194 if (pcp->pcp.count) {
1195 has_pcps = true;
1196 break;
1197 }
1198 }
1199 if (has_pcps)
1200 cpumask_set_cpu(cpu, &cpus_with_pcps);
1201 else
1202 cpumask_clear_cpu(cpu, &cpus_with_pcps);
1203 }
1204 on_each_cpu_mask(&cpus_with_pcps, drain_local_pages, NULL, 1);
1169} 1205}
1170 1206
1171#ifdef CONFIG_HIBERNATION 1207#ifdef CONFIG_HIBERNATION
@@ -2308,6 +2344,10 @@ rebalance:
2308 if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY)) { 2344 if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY)) {
2309 if (oom_killer_disabled) 2345 if (oom_killer_disabled)
2310 goto nopage; 2346 goto nopage;
2347 /* Coredumps can quickly deplete all memory reserves */
2348 if ((current->flags & PF_DUMPCORE) &&
2349 !(gfp_mask & __GFP_NOFAIL))
2350 goto nopage;
2311 page = __alloc_pages_may_oom(gfp_mask, order, 2351 page = __alloc_pages_may_oom(gfp_mask, order,
2312 zonelist, high_zoneidx, 2352 zonelist, high_zoneidx,
2313 nodemask, preferred_zone, 2353 nodemask, preferred_zone,
diff --git a/mm/slub.c b/mm/slub.c
index 64d9966d16bc..ffe13fdf8144 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2035,9 +2035,17 @@ static void flush_cpu_slab(void *d)
2035 __flush_cpu_slab(s, smp_processor_id()); 2035 __flush_cpu_slab(s, smp_processor_id());
2036} 2036}
2037 2037
2038static bool has_cpu_slab(int cpu, void *info)
2039{
2040 struct kmem_cache *s = info;
2041 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
2042
2043 return !!(c->page);
2044}
2045
2038static void flush_all(struct kmem_cache *s) 2046static void flush_all(struct kmem_cache *s)
2039{ 2047{
2040 on_each_cpu(flush_cpu_slab, s, 1); 2048 on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1, GFP_ATOMIC);
2041} 2049}
2042 2050
2043/* 2051/*
diff --git a/mm/swapfile.c b/mm/swapfile.c
index dae42f380d6e..fafc26d1b1dc 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -2022,6 +2022,9 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
2022 struct page *page = NULL; 2022 struct page *page = NULL;
2023 struct inode *inode = NULL; 2023 struct inode *inode = NULL;
2024 2024
2025 if (swap_flags & ~SWAP_FLAGS_VALID)
2026 return -EINVAL;
2027
2025 if (!capable(CAP_SYS_ADMIN)) 2028 if (!capable(CAP_SYS_ADMIN))
2026 return -EPERM; 2029 return -EPERM;
2027 2030
diff --git a/mm/truncate.c b/mm/truncate.c
index 18aded3a89fc..61a183b89df6 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -626,3 +626,43 @@ int vmtruncate_range(struct inode *inode, loff_t lstart, loff_t lend)
626 626
627 return 0; 627 return 0;
628} 628}
629
630/**
631 * truncate_pagecache_range - unmap and remove pagecache that is hole-punched
632 * @inode: inode
633 * @lstart: offset of beginning of hole
634 * @lend: offset of last byte of hole
635 *
636 * This function should typically be called before the filesystem
637 * releases resources associated with the freed range (eg. deallocates
638 * blocks). This way, pagecache will always stay logically coherent
639 * with on-disk format, and the filesystem would not have to deal with
640 * situations such as writepage being called for a page that has already
641 * had its underlying blocks deallocated.
642 */
643void truncate_pagecache_range(struct inode *inode, loff_t lstart, loff_t lend)
644{
645 struct address_space *mapping = inode->i_mapping;
646 loff_t unmap_start = round_up(lstart, PAGE_SIZE);
647 loff_t unmap_end = round_down(1 + lend, PAGE_SIZE) - 1;
648 /*
649 * This rounding is currently just for example: unmap_mapping_range
650 * expands its hole outwards, whereas we want it to contract the hole
651 * inwards. However, existing callers of truncate_pagecache_range are
652 * doing their own page rounding first; and truncate_inode_pages_range
653 * currently BUGs if lend is not pagealigned-1 (it handles partial
654 * page at start of hole, but not partial page at end of hole). Note
655 * unmap_mapping_range allows holelen 0 for all, and we allow lend -1.
656 */
657
658 /*
659 * Unlike in truncate_pagecache, unmap_mapping_range is called only
660 * once (before truncating pagecache), and without "even_cows" flag:
661 * hole-punching should not remove private COWed pages from the hole.
662 */
663 if ((u64)unmap_end > (u64)unmap_start)
664 unmap_mapping_range(mapping, unmap_start,
665 1 + unmap_end - unmap_start, 0);
666 truncate_inode_pages_range(mapping, lstart, lend);
667}
668EXPORT_SYMBOL(truncate_pagecache_range);