diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-03-28 20:19:27 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-03-28 20:19:28 -0400 |
commit | 532bfc851a7475fb6a36c1e953aa395798a7cca7 (patch) | |
tree | a7892e5a31330dd59f31959efbe9fda1803784fd /mm | |
parent | 0195c00244dc2e9f522475868fa278c473ba7339 (diff) | |
parent | 8da00edc1069f01c34510fa405dc15d96c090a3f (diff) |
Merge branch 'akpm' (Andrew's patch-bomb)
Merge third batch of patches from Andrew Morton:
- Some MM stragglers
- core SMP library cleanups (on_each_cpu_mask)
- Some IPI optimisations
- kexec
- kdump
- IPMI
- the radix-tree iterator work
- various other misc bits.
"That'll do for -rc1. I still have ~10 patches for 3.4, will send
those along when they've baked a little more."
* emailed from Andrew Morton <akpm@linux-foundation.org>: (35 commits)
backlight: fix typo in tosa_lcd.c
crc32: add help text for the algorithm select option
mm: move hugepage test examples to tools/testing/selftests/vm
mm: move slabinfo.c to tools/vm
mm: move page-types.c from Documentation to tools/vm
selftests/Makefile: make `run_tests' depend on `all'
selftests: launch individual selftests from the main Makefile
radix-tree: use iterators in find_get_pages* functions
radix-tree: rewrite gang lookup using iterator
radix-tree: introduce bit-optimized iterator
fs/proc/namespaces.c: prevent crash when ns_entries[] is empty
nbd: rename the nbd_device variable from lo to nbd
pidns: add reboot_pid_ns() to handle the reboot syscall
sysctl: use bitmap library functions
ipmi: use locks on watchdog timeout set on reboot
ipmi: simplify locking
ipmi: fix message handling during panics
ipmi: use a tasklet for handling received messages
ipmi: increase KCS timeouts
ipmi: decrease the IPMI message transaction time in interrupt mode
...
Diffstat (limited to 'mm')
-rw-r--r-- | mm/filemap.c | 86 | ||||
-rw-r--r-- | mm/memcontrol.c | 4 | ||||
-rw-r--r-- | mm/page_alloc.c | 44 | ||||
-rw-r--r-- | mm/slub.c | 10 | ||||
-rw-r--r-- | mm/swapfile.c | 3 | ||||
-rw-r--r-- | mm/truncate.c | 40 |
6 files changed, 136 insertions, 51 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index c3811bc6b9e3..79c4b2b0b14e 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -813,20 +813,19 @@ EXPORT_SYMBOL(find_or_create_page); | |||
813 | unsigned find_get_pages(struct address_space *mapping, pgoff_t start, | 813 | unsigned find_get_pages(struct address_space *mapping, pgoff_t start, |
814 | unsigned int nr_pages, struct page **pages) | 814 | unsigned int nr_pages, struct page **pages) |
815 | { | 815 | { |
816 | unsigned int i; | 816 | struct radix_tree_iter iter; |
817 | unsigned int ret; | 817 | void **slot; |
818 | unsigned int nr_found, nr_skip; | 818 | unsigned ret = 0; |
819 | |||
820 | if (unlikely(!nr_pages)) | ||
821 | return 0; | ||
819 | 822 | ||
820 | rcu_read_lock(); | 823 | rcu_read_lock(); |
821 | restart: | 824 | restart: |
822 | nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree, | 825 | radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) { |
823 | (void ***)pages, NULL, start, nr_pages); | ||
824 | ret = 0; | ||
825 | nr_skip = 0; | ||
826 | for (i = 0; i < nr_found; i++) { | ||
827 | struct page *page; | 826 | struct page *page; |
828 | repeat: | 827 | repeat: |
829 | page = radix_tree_deref_slot((void **)pages[i]); | 828 | page = radix_tree_deref_slot(slot); |
830 | if (unlikely(!page)) | 829 | if (unlikely(!page)) |
831 | continue; | 830 | continue; |
832 | 831 | ||
@@ -837,7 +836,7 @@ repeat: | |||
837 | * when entry at index 0 moves out of or back | 836 | * when entry at index 0 moves out of or back |
838 | * to root: none yet gotten, safe to restart. | 837 | * to root: none yet gotten, safe to restart. |
839 | */ | 838 | */ |
840 | WARN_ON(start | i); | 839 | WARN_ON(iter.index); |
841 | goto restart; | 840 | goto restart; |
842 | } | 841 | } |
843 | /* | 842 | /* |
@@ -845,7 +844,6 @@ repeat: | |||
845 | * here as an exceptional entry: so skip over it - | 844 | * here as an exceptional entry: so skip over it - |
846 | * we only reach this from invalidate_mapping_pages(). | 845 | * we only reach this from invalidate_mapping_pages(). |
847 | */ | 846 | */ |
848 | nr_skip++; | ||
849 | continue; | 847 | continue; |
850 | } | 848 | } |
851 | 849 | ||
@@ -853,21 +851,16 @@ repeat: | |||
853 | goto repeat; | 851 | goto repeat; |
854 | 852 | ||
855 | /* Has the page moved? */ | 853 | /* Has the page moved? */ |
856 | if (unlikely(page != *((void **)pages[i]))) { | 854 | if (unlikely(page != *slot)) { |
857 | page_cache_release(page); | 855 | page_cache_release(page); |
858 | goto repeat; | 856 | goto repeat; |
859 | } | 857 | } |
860 | 858 | ||
861 | pages[ret] = page; | 859 | pages[ret] = page; |
862 | ret++; | 860 | if (++ret == nr_pages) |
861 | break; | ||
863 | } | 862 | } |
864 | 863 | ||
865 | /* | ||
866 | * If all entries were removed before we could secure them, | ||
867 | * try again, because callers stop trying once 0 is returned. | ||
868 | */ | ||
869 | if (unlikely(!ret && nr_found > nr_skip)) | ||
870 | goto restart; | ||
871 | rcu_read_unlock(); | 864 | rcu_read_unlock(); |
872 | return ret; | 865 | return ret; |
873 | } | 866 | } |
@@ -887,21 +880,22 @@ repeat: | |||
887 | unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index, | 880 | unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index, |
888 | unsigned int nr_pages, struct page **pages) | 881 | unsigned int nr_pages, struct page **pages) |
889 | { | 882 | { |
890 | unsigned int i; | 883 | struct radix_tree_iter iter; |
891 | unsigned int ret; | 884 | void **slot; |
892 | unsigned int nr_found; | 885 | unsigned int ret = 0; |
886 | |||
887 | if (unlikely(!nr_pages)) | ||
888 | return 0; | ||
893 | 889 | ||
894 | rcu_read_lock(); | 890 | rcu_read_lock(); |
895 | restart: | 891 | restart: |
896 | nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree, | 892 | radix_tree_for_each_contig(slot, &mapping->page_tree, &iter, index) { |
897 | (void ***)pages, NULL, index, nr_pages); | ||
898 | ret = 0; | ||
899 | for (i = 0; i < nr_found; i++) { | ||
900 | struct page *page; | 893 | struct page *page; |
901 | repeat: | 894 | repeat: |
902 | page = radix_tree_deref_slot((void **)pages[i]); | 895 | page = radix_tree_deref_slot(slot); |
896 | /* The hole, there no reason to continue */ | ||
903 | if (unlikely(!page)) | 897 | if (unlikely(!page)) |
904 | continue; | 898 | break; |
905 | 899 | ||
906 | if (radix_tree_exception(page)) { | 900 | if (radix_tree_exception(page)) { |
907 | if (radix_tree_deref_retry(page)) { | 901 | if (radix_tree_deref_retry(page)) { |
@@ -924,7 +918,7 @@ repeat: | |||
924 | goto repeat; | 918 | goto repeat; |
925 | 919 | ||
926 | /* Has the page moved? */ | 920 | /* Has the page moved? */ |
927 | if (unlikely(page != *((void **)pages[i]))) { | 921 | if (unlikely(page != *slot)) { |
928 | page_cache_release(page); | 922 | page_cache_release(page); |
929 | goto repeat; | 923 | goto repeat; |
930 | } | 924 | } |
@@ -934,14 +928,14 @@ repeat: | |||
934 | * otherwise we can get both false positives and false | 928 | * otherwise we can get both false positives and false |
935 | * negatives, which is just confusing to the caller. | 929 | * negatives, which is just confusing to the caller. |
936 | */ | 930 | */ |
937 | if (page->mapping == NULL || page->index != index) { | 931 | if (page->mapping == NULL || page->index != iter.index) { |
938 | page_cache_release(page); | 932 | page_cache_release(page); |
939 | break; | 933 | break; |
940 | } | 934 | } |
941 | 935 | ||
942 | pages[ret] = page; | 936 | pages[ret] = page; |
943 | ret++; | 937 | if (++ret == nr_pages) |
944 | index++; | 938 | break; |
945 | } | 939 | } |
946 | rcu_read_unlock(); | 940 | rcu_read_unlock(); |
947 | return ret; | 941 | return ret; |
@@ -962,19 +956,20 @@ EXPORT_SYMBOL(find_get_pages_contig); | |||
962 | unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index, | 956 | unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index, |
963 | int tag, unsigned int nr_pages, struct page **pages) | 957 | int tag, unsigned int nr_pages, struct page **pages) |
964 | { | 958 | { |
965 | unsigned int i; | 959 | struct radix_tree_iter iter; |
966 | unsigned int ret; | 960 | void **slot; |
967 | unsigned int nr_found; | 961 | unsigned ret = 0; |
962 | |||
963 | if (unlikely(!nr_pages)) | ||
964 | return 0; | ||
968 | 965 | ||
969 | rcu_read_lock(); | 966 | rcu_read_lock(); |
970 | restart: | 967 | restart: |
971 | nr_found = radix_tree_gang_lookup_tag_slot(&mapping->page_tree, | 968 | radix_tree_for_each_tagged(slot, &mapping->page_tree, |
972 | (void ***)pages, *index, nr_pages, tag); | 969 | &iter, *index, tag) { |
973 | ret = 0; | ||
974 | for (i = 0; i < nr_found; i++) { | ||
975 | struct page *page; | 970 | struct page *page; |
976 | repeat: | 971 | repeat: |
977 | page = radix_tree_deref_slot((void **)pages[i]); | 972 | page = radix_tree_deref_slot(slot); |
978 | if (unlikely(!page)) | 973 | if (unlikely(!page)) |
979 | continue; | 974 | continue; |
980 | 975 | ||
@@ -998,21 +993,16 @@ repeat: | |||
998 | goto repeat; | 993 | goto repeat; |
999 | 994 | ||
1000 | /* Has the page moved? */ | 995 | /* Has the page moved? */ |
1001 | if (unlikely(page != *((void **)pages[i]))) { | 996 | if (unlikely(page != *slot)) { |
1002 | page_cache_release(page); | 997 | page_cache_release(page); |
1003 | goto repeat; | 998 | goto repeat; |
1004 | } | 999 | } |
1005 | 1000 | ||
1006 | pages[ret] = page; | 1001 | pages[ret] = page; |
1007 | ret++; | 1002 | if (++ret == nr_pages) |
1003 | break; | ||
1008 | } | 1004 | } |
1009 | 1005 | ||
1010 | /* | ||
1011 | * If all entries were removed before we could secure them, | ||
1012 | * try again, because callers stop trying once 0 is returned. | ||
1013 | */ | ||
1014 | if (unlikely(!ret && nr_found)) | ||
1015 | goto restart; | ||
1016 | rcu_read_unlock(); | 1006 | rcu_read_unlock(); |
1017 | 1007 | ||
1018 | if (ret) | 1008 | if (ret) |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index b2ee6df0e9bb..7d698df4a067 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -5306,6 +5306,8 @@ static int mem_cgroup_count_precharge_pte_range(pmd_t *pmd, | |||
5306 | return 0; | 5306 | return 0; |
5307 | } | 5307 | } |
5308 | 5308 | ||
5309 | if (pmd_trans_unstable(pmd)) | ||
5310 | return 0; | ||
5309 | pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); | 5311 | pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); |
5310 | for (; addr != end; pte++, addr += PAGE_SIZE) | 5312 | for (; addr != end; pte++, addr += PAGE_SIZE) |
5311 | if (get_mctgt_type(vma, addr, *pte, NULL)) | 5313 | if (get_mctgt_type(vma, addr, *pte, NULL)) |
@@ -5502,6 +5504,8 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd, | |||
5502 | return 0; | 5504 | return 0; |
5503 | } | 5505 | } |
5504 | 5506 | ||
5507 | if (pmd_trans_unstable(pmd)) | ||
5508 | return 0; | ||
5505 | retry: | 5509 | retry: |
5506 | pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); | 5510 | pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); |
5507 | for (; addr != end; addr += PAGE_SIZE) { | 5511 | for (; addr != end; addr += PAGE_SIZE) { |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index caea788628e4..a712fb9e04ce 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -1161,11 +1161,47 @@ void drain_local_pages(void *arg) | |||
1161 | } | 1161 | } |
1162 | 1162 | ||
1163 | /* | 1163 | /* |
1164 | * Spill all the per-cpu pages from all CPUs back into the buddy allocator | 1164 | * Spill all the per-cpu pages from all CPUs back into the buddy allocator. |
1165 | * | ||
1166 | * Note that this code is protected against sending an IPI to an offline | ||
1167 | * CPU but does not guarantee sending an IPI to newly hotplugged CPUs: | ||
1168 | * on_each_cpu_mask() blocks hotplug and won't talk to offlined CPUs but | ||
1169 | * nothing keeps CPUs from showing up after we populated the cpumask and | ||
1170 | * before the call to on_each_cpu_mask(). | ||
1165 | */ | 1171 | */ |
1166 | void drain_all_pages(void) | 1172 | void drain_all_pages(void) |
1167 | { | 1173 | { |
1168 | on_each_cpu(drain_local_pages, NULL, 1); | 1174 | int cpu; |
1175 | struct per_cpu_pageset *pcp; | ||
1176 | struct zone *zone; | ||
1177 | |||
1178 | /* | ||
1179 | * Allocate in the BSS so we wont require allocation in | ||
1180 | * direct reclaim path for CONFIG_CPUMASK_OFFSTACK=y | ||
1181 | */ | ||
1182 | static cpumask_t cpus_with_pcps; | ||
1183 | |||
1184 | /* | ||
1185 | * We don't care about racing with CPU hotplug event | ||
1186 | * as offline notification will cause the notified | ||
1187 | * cpu to drain that CPU pcps and on_each_cpu_mask | ||
1188 | * disables preemption as part of its processing | ||
1189 | */ | ||
1190 | for_each_online_cpu(cpu) { | ||
1191 | bool has_pcps = false; | ||
1192 | for_each_populated_zone(zone) { | ||
1193 | pcp = per_cpu_ptr(zone->pageset, cpu); | ||
1194 | if (pcp->pcp.count) { | ||
1195 | has_pcps = true; | ||
1196 | break; | ||
1197 | } | ||
1198 | } | ||
1199 | if (has_pcps) | ||
1200 | cpumask_set_cpu(cpu, &cpus_with_pcps); | ||
1201 | else | ||
1202 | cpumask_clear_cpu(cpu, &cpus_with_pcps); | ||
1203 | } | ||
1204 | on_each_cpu_mask(&cpus_with_pcps, drain_local_pages, NULL, 1); | ||
1169 | } | 1205 | } |
1170 | 1206 | ||
1171 | #ifdef CONFIG_HIBERNATION | 1207 | #ifdef CONFIG_HIBERNATION |
@@ -2308,6 +2344,10 @@ rebalance: | |||
2308 | if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY)) { | 2344 | if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY)) { |
2309 | if (oom_killer_disabled) | 2345 | if (oom_killer_disabled) |
2310 | goto nopage; | 2346 | goto nopage; |
2347 | /* Coredumps can quickly deplete all memory reserves */ | ||
2348 | if ((current->flags & PF_DUMPCORE) && | ||
2349 | !(gfp_mask & __GFP_NOFAIL)) | ||
2350 | goto nopage; | ||
2311 | page = __alloc_pages_may_oom(gfp_mask, order, | 2351 | page = __alloc_pages_may_oom(gfp_mask, order, |
2312 | zonelist, high_zoneidx, | 2352 | zonelist, high_zoneidx, |
2313 | nodemask, preferred_zone, | 2353 | nodemask, preferred_zone, |
@@ -2035,9 +2035,17 @@ static void flush_cpu_slab(void *d) | |||
2035 | __flush_cpu_slab(s, smp_processor_id()); | 2035 | __flush_cpu_slab(s, smp_processor_id()); |
2036 | } | 2036 | } |
2037 | 2037 | ||
2038 | static bool has_cpu_slab(int cpu, void *info) | ||
2039 | { | ||
2040 | struct kmem_cache *s = info; | ||
2041 | struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu); | ||
2042 | |||
2043 | return !!(c->page); | ||
2044 | } | ||
2045 | |||
2038 | static void flush_all(struct kmem_cache *s) | 2046 | static void flush_all(struct kmem_cache *s) |
2039 | { | 2047 | { |
2040 | on_each_cpu(flush_cpu_slab, s, 1); | 2048 | on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1, GFP_ATOMIC); |
2041 | } | 2049 | } |
2042 | 2050 | ||
2043 | /* | 2051 | /* |
diff --git a/mm/swapfile.c b/mm/swapfile.c index dae42f380d6e..fafc26d1b1dc 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c | |||
@@ -2022,6 +2022,9 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) | |||
2022 | struct page *page = NULL; | 2022 | struct page *page = NULL; |
2023 | struct inode *inode = NULL; | 2023 | struct inode *inode = NULL; |
2024 | 2024 | ||
2025 | if (swap_flags & ~SWAP_FLAGS_VALID) | ||
2026 | return -EINVAL; | ||
2027 | |||
2025 | if (!capable(CAP_SYS_ADMIN)) | 2028 | if (!capable(CAP_SYS_ADMIN)) |
2026 | return -EPERM; | 2029 | return -EPERM; |
2027 | 2030 | ||
diff --git a/mm/truncate.c b/mm/truncate.c index 18aded3a89fc..61a183b89df6 100644 --- a/mm/truncate.c +++ b/mm/truncate.c | |||
@@ -626,3 +626,43 @@ int vmtruncate_range(struct inode *inode, loff_t lstart, loff_t lend) | |||
626 | 626 | ||
627 | return 0; | 627 | return 0; |
628 | } | 628 | } |
629 | |||
630 | /** | ||
631 | * truncate_pagecache_range - unmap and remove pagecache that is hole-punched | ||
632 | * @inode: inode | ||
633 | * @lstart: offset of beginning of hole | ||
634 | * @lend: offset of last byte of hole | ||
635 | * | ||
636 | * This function should typically be called before the filesystem | ||
637 | * releases resources associated with the freed range (eg. deallocates | ||
638 | * blocks). This way, pagecache will always stay logically coherent | ||
639 | * with on-disk format, and the filesystem would not have to deal with | ||
640 | * situations such as writepage being called for a page that has already | ||
641 | * had its underlying blocks deallocated. | ||
642 | */ | ||
643 | void truncate_pagecache_range(struct inode *inode, loff_t lstart, loff_t lend) | ||
644 | { | ||
645 | struct address_space *mapping = inode->i_mapping; | ||
646 | loff_t unmap_start = round_up(lstart, PAGE_SIZE); | ||
647 | loff_t unmap_end = round_down(1 + lend, PAGE_SIZE) - 1; | ||
648 | /* | ||
649 | * This rounding is currently just for example: unmap_mapping_range | ||
650 | * expands its hole outwards, whereas we want it to contract the hole | ||
651 | * inwards. However, existing callers of truncate_pagecache_range are | ||
652 | * doing their own page rounding first; and truncate_inode_pages_range | ||
653 | * currently BUGs if lend is not pagealigned-1 (it handles partial | ||
654 | * page at start of hole, but not partial page at end of hole). Note | ||
655 | * unmap_mapping_range allows holelen 0 for all, and we allow lend -1. | ||
656 | */ | ||
657 | |||
658 | /* | ||
659 | * Unlike in truncate_pagecache, unmap_mapping_range is called only | ||
660 | * once (before truncating pagecache), and without "even_cows" flag: | ||
661 | * hole-punching should not remove private COWed pages from the hole. | ||
662 | */ | ||
663 | if ((u64)unmap_end > (u64)unmap_start) | ||
664 | unmap_mapping_range(mapping, unmap_start, | ||
665 | 1 + unmap_end - unmap_start, 0); | ||
666 | truncate_inode_pages_range(mapping, lstart, lend); | ||
667 | } | ||
668 | EXPORT_SYMBOL(truncate_pagecache_range); | ||