diff options
author | Thomas Gleixner <tglx@linutronix.de> | 2009-07-28 18:00:16 -0400 |
---|---|---|
committer | Thomas Gleixner <tglx@linutronix.de> | 2009-07-28 18:00:16 -0400 |
commit | ba36d1d9dd11b98a0bdee1d15ef2a11148905805 (patch) | |
tree | 7749d3ba1d71aaa62a8dab72cca8820e27af7069 /mm | |
parent | 55f9e9a3b3a3229f0ee73c1c2f990785bbf2ff88 (diff) | |
parent | 104f75cb1a751a023beddacf56ca6c19ed90ce6c (diff) |
Merge branch 'rt/mm' into rt/base
Conflicts:
include/linux/percpu.h
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/memcontrol.c | 7 | ||||
-rw-r--r-- | mm/memory.c | 7 | ||||
-rw-r--r-- | mm/page_alloc.c | 205 | ||||
-rw-r--r-- | mm/quicklist.c | 15 | ||||
-rw-r--r-- | mm/slab.c | 581 | ||||
-rw-r--r-- | mm/swap.c | 107 | ||||
-rw-r--r-- | mm/vmscan.c | 10 | ||||
-rw-r--r-- | mm/vmstat.c | 23 |
8 files changed, 718 insertions, 237 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index e717964cb5a0..e5159e2ff807 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -948,13 +948,14 @@ void mem_cgroup_update_mapped_file_stat(struct page *page, int val) | |||
948 | goto done; | 948 | goto done; |
949 | 949 | ||
950 | /* | 950 | /* |
951 | * Preemption is already disabled, we don't need get_cpu() | 951 | * Preemption is already disabled, we don't need get_cpu(), but |
952 | * that's not true for RT :) | ||
952 | */ | 953 | */ |
953 | cpu = smp_processor_id(); | 954 | cpu = get_cpu(); |
954 | stat = &mem->stat; | 955 | stat = &mem->stat; |
955 | cpustat = &stat->cpustat[cpu]; | 956 | cpustat = &stat->cpustat[cpu]; |
956 | |||
957 | __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_MAPPED_FILE, val); | 957 | __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_MAPPED_FILE, val); |
958 | put_cpu(); | ||
958 | done: | 959 | done: |
959 | unlock_page_cgroup(pc); | 960 | unlock_page_cgroup(pc); |
960 | } | 961 | } |
diff --git a/mm/memory.c b/mm/memory.c index 2d2fc7a3db52..f5579956fa4c 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -922,10 +922,13 @@ static unsigned long unmap_page_range(struct mmu_gather *tlb, | |||
922 | return addr; | 922 | return addr; |
923 | } | 923 | } |
924 | 924 | ||
925 | #ifdef CONFIG_PREEMPT | 925 | #if defined(CONFIG_PREEMPT) && !defined(CONFIG_PREEMPT_RT) |
926 | # define ZAP_BLOCK_SIZE (8 * PAGE_SIZE) | 926 | # define ZAP_BLOCK_SIZE (8 * PAGE_SIZE) |
927 | #else | 927 | #else |
928 | /* No preempt: go for improved straight-line efficiency */ | 928 | /* |
929 | * No preempt: go for improved straight-line efficiency | ||
930 | * on PREEMPT_RT this is not a critical latency-path. | ||
931 | */ | ||
929 | # define ZAP_BLOCK_SIZE (1024 * PAGE_SIZE) | 932 | # define ZAP_BLOCK_SIZE (1024 * PAGE_SIZE) |
930 | #endif | 933 | #endif |
931 | 934 | ||
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index caa92689aac9..910b62810a1e 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -161,6 +161,53 @@ static unsigned long __meminitdata dma_reserve; | |||
161 | EXPORT_SYMBOL(movable_zone); | 161 | EXPORT_SYMBOL(movable_zone); |
162 | #endif /* CONFIG_ARCH_POPULATES_NODE_MAP */ | 162 | #endif /* CONFIG_ARCH_POPULATES_NODE_MAP */ |
163 | 163 | ||
164 | #ifdef CONFIG_PREEMPT_RT | ||
165 | static DEFINE_PER_CPU_LOCKED(int, pcp_locks); | ||
166 | #endif | ||
167 | |||
168 | static inline void __lock_cpu_pcp(unsigned long *flags, int cpu) | ||
169 | { | ||
170 | #ifdef CONFIG_PREEMPT_RT | ||
171 | spin_lock(&__get_cpu_lock(pcp_locks, cpu)); | ||
172 | flags = 0; | ||
173 | #else | ||
174 | local_irq_save(*flags); | ||
175 | #endif | ||
176 | } | ||
177 | |||
178 | static inline void lock_cpu_pcp(unsigned long *flags, int *this_cpu) | ||
179 | { | ||
180 | #ifdef CONFIG_PREEMPT_RT | ||
181 | (void)get_cpu_var_locked(pcp_locks, this_cpu); | ||
182 | flags = 0; | ||
183 | #else | ||
184 | local_irq_save(*flags); | ||
185 | *this_cpu = smp_processor_id(); | ||
186 | #endif | ||
187 | } | ||
188 | |||
189 | static inline void unlock_cpu_pcp(unsigned long flags, int this_cpu) | ||
190 | { | ||
191 | #ifdef CONFIG_PREEMPT_RT | ||
192 | put_cpu_var_locked(pcp_locks, this_cpu); | ||
193 | #else | ||
194 | local_irq_restore(flags); | ||
195 | #endif | ||
196 | } | ||
197 | |||
198 | static struct per_cpu_pageset * | ||
199 | get_zone_pcp(struct zone *zone, unsigned long *flags, int *this_cpu) | ||
200 | { | ||
201 | lock_cpu_pcp(flags, this_cpu); | ||
202 | return zone_pcp(zone, *this_cpu); | ||
203 | } | ||
204 | |||
205 | static void | ||
206 | put_zone_pcp(struct zone *zone, unsigned long flags, int this_cpu) | ||
207 | { | ||
208 | unlock_cpu_pcp(flags, this_cpu); | ||
209 | } | ||
210 | |||
164 | #if MAX_NUMNODES > 1 | 211 | #if MAX_NUMNODES > 1 |
165 | int nr_node_ids __read_mostly = MAX_NUMNODES; | 212 | int nr_node_ids __read_mostly = MAX_NUMNODES; |
166 | int nr_online_nodes __read_mostly = 1; | 213 | int nr_online_nodes __read_mostly = 1; |
@@ -523,7 +570,9 @@ static inline int free_pages_check(struct page *page) | |||
523 | static void free_pages_bulk(struct zone *zone, int count, | 570 | static void free_pages_bulk(struct zone *zone, int count, |
524 | struct list_head *list, int order) | 571 | struct list_head *list, int order) |
525 | { | 572 | { |
526 | spin_lock(&zone->lock); | 573 | unsigned long flags; |
574 | |||
575 | spin_lock_irqsave(&zone->lock, flags); | ||
527 | zone_clear_flag(zone, ZONE_ALL_UNRECLAIMABLE); | 576 | zone_clear_flag(zone, ZONE_ALL_UNRECLAIMABLE); |
528 | zone->pages_scanned = 0; | 577 | zone->pages_scanned = 0; |
529 | 578 | ||
@@ -536,27 +585,31 @@ static void free_pages_bulk(struct zone *zone, int count, | |||
536 | /* have to delete it as __free_one_page list manipulates */ | 585 | /* have to delete it as __free_one_page list manipulates */ |
537 | list_del(&page->lru); | 586 | list_del(&page->lru); |
538 | __free_one_page(page, zone, order, page_private(page)); | 587 | __free_one_page(page, zone, order, page_private(page)); |
588 | #ifdef CONFIG_PREEMPT_RT | ||
589 | cond_resched_lock(&zone->lock); | ||
590 | #endif | ||
539 | } | 591 | } |
540 | spin_unlock(&zone->lock); | 592 | spin_unlock_irqrestore(&zone->lock, flags); |
541 | } | 593 | } |
542 | 594 | ||
543 | static void free_one_page(struct zone *zone, struct page *page, int order, | 595 | static void free_one_page(struct zone *zone, struct page *page, int order, |
544 | int migratetype) | 596 | int migratetype) |
545 | { | 597 | { |
546 | spin_lock(&zone->lock); | 598 | unsigned long flags; |
599 | |||
600 | spin_lock_irqsave(&zone->lock, flags); | ||
547 | zone_clear_flag(zone, ZONE_ALL_UNRECLAIMABLE); | 601 | zone_clear_flag(zone, ZONE_ALL_UNRECLAIMABLE); |
548 | zone->pages_scanned = 0; | 602 | zone->pages_scanned = 0; |
549 | 603 | ||
550 | __mod_zone_page_state(zone, NR_FREE_PAGES, 1 << order); | 604 | __mod_zone_page_state(zone, NR_FREE_PAGES, 1 << order); |
551 | __free_one_page(page, zone, order, migratetype); | 605 | __free_one_page(page, zone, order, migratetype); |
552 | spin_unlock(&zone->lock); | 606 | spin_unlock_irqrestore(&zone->lock, flags); |
553 | } | 607 | } |
554 | 608 | ||
555 | static void __free_pages_ok(struct page *page, unsigned int order) | 609 | static void __free_pages_ok(struct page *page, unsigned int order) |
556 | { | 610 | { |
557 | unsigned long flags; | 611 | unsigned long flags; |
558 | int i; | 612 | int i, this_cpu, bad = 0; |
559 | int bad = 0; | ||
560 | int wasMlocked = TestClearPageMlocked(page); | 613 | int wasMlocked = TestClearPageMlocked(page); |
561 | 614 | ||
562 | kmemcheck_free_shadow(page, order); | 615 | kmemcheck_free_shadow(page, order); |
@@ -574,13 +627,13 @@ static void __free_pages_ok(struct page *page, unsigned int order) | |||
574 | arch_free_page(page, order); | 627 | arch_free_page(page, order); |
575 | kernel_map_pages(page, 1 << order, 0); | 628 | kernel_map_pages(page, 1 << order, 0); |
576 | 629 | ||
577 | local_irq_save(flags); | 630 | lock_cpu_pcp(&flags, &this_cpu); |
578 | if (unlikely(wasMlocked)) | 631 | if (unlikely(wasMlocked)) |
579 | free_page_mlock(page); | 632 | free_page_mlock(page); |
580 | __count_vm_events(PGFREE, 1 << order); | 633 | count_vm_events(PGFREE, 1 << order); |
634 | unlock_cpu_pcp(flags, this_cpu); | ||
581 | free_one_page(page_zone(page), page, order, | 635 | free_one_page(page_zone(page), page, order, |
582 | get_pageblock_migratetype(page)); | 636 | get_pageblock_migratetype(page)); |
583 | local_irq_restore(flags); | ||
584 | } | 637 | } |
585 | 638 | ||
586 | /* | 639 | /* |
@@ -910,6 +963,16 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, | |||
910 | return i; | 963 | return i; |
911 | } | 964 | } |
912 | 965 | ||
966 | static void | ||
967 | isolate_pcp_pages(int count, struct list_head *src, struct list_head *dst) | ||
968 | { | ||
969 | while (count--) { | ||
970 | struct page *page = list_last_entry(src, struct page, lru); | ||
971 | list_move(&page->lru, dst); | ||
972 | } | ||
973 | } | ||
974 | |||
975 | |||
913 | #ifdef CONFIG_NUMA | 976 | #ifdef CONFIG_NUMA |
914 | /* | 977 | /* |
915 | * Called from the vmstat counter updater to drain pagesets of this | 978 | * Called from the vmstat counter updater to drain pagesets of this |
@@ -921,17 +984,20 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, | |||
921 | */ | 984 | */ |
922 | void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp) | 985 | void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp) |
923 | { | 986 | { |
987 | LIST_HEAD(free_list); | ||
924 | unsigned long flags; | 988 | unsigned long flags; |
925 | int to_drain; | 989 | int to_drain; |
990 | int this_cpu; | ||
926 | 991 | ||
927 | local_irq_save(flags); | 992 | lock_cpu_pcp(&flags, &this_cpu); |
928 | if (pcp->count >= pcp->batch) | 993 | if (pcp->count >= pcp->batch) |
929 | to_drain = pcp->batch; | 994 | to_drain = pcp->batch; |
930 | else | 995 | else |
931 | to_drain = pcp->count; | 996 | to_drain = pcp->count; |
932 | free_pages_bulk(zone, to_drain, &pcp->list, 0); | 997 | isolate_pcp_pages(to_drain, &pcp->list, &free_list); |
933 | pcp->count -= to_drain; | 998 | pcp->count -= to_drain; |
934 | local_irq_restore(flags); | 999 | unlock_cpu_pcp(flags, this_cpu); |
1000 | free_pages_bulk(zone, to_drain, &free_list, 0); | ||
935 | } | 1001 | } |
936 | #endif | 1002 | #endif |
937 | 1003 | ||
@@ -950,14 +1016,22 @@ static void drain_pages(unsigned int cpu) | |||
950 | for_each_populated_zone(zone) { | 1016 | for_each_populated_zone(zone) { |
951 | struct per_cpu_pageset *pset; | 1017 | struct per_cpu_pageset *pset; |
952 | struct per_cpu_pages *pcp; | 1018 | struct per_cpu_pages *pcp; |
1019 | LIST_HEAD(free_list); | ||
1020 | int count; | ||
953 | 1021 | ||
1022 | __lock_cpu_pcp(&flags, cpu); | ||
954 | pset = zone_pcp(zone, cpu); | 1023 | pset = zone_pcp(zone, cpu); |
955 | 1024 | if (!pset) { | |
1025 | unlock_cpu_pcp(flags, cpu); | ||
1026 | WARN_ON(1); | ||
1027 | continue; | ||
1028 | } | ||
956 | pcp = &pset->pcp; | 1029 | pcp = &pset->pcp; |
957 | local_irq_save(flags); | 1030 | isolate_pcp_pages(pcp->count, &pcp->list, &free_list); |
958 | free_pages_bulk(zone, pcp->count, &pcp->list, 0); | 1031 | count = pcp->count; |
959 | pcp->count = 0; | 1032 | pcp->count = 0; |
960 | local_irq_restore(flags); | 1033 | unlock_cpu_pcp(flags, cpu); |
1034 | free_pages_bulk(zone, count, &free_list, 0); | ||
961 | } | 1035 | } |
962 | } | 1036 | } |
963 | 1037 | ||
@@ -969,12 +1043,52 @@ void drain_local_pages(void *arg) | |||
969 | drain_pages(smp_processor_id()); | 1043 | drain_pages(smp_processor_id()); |
970 | } | 1044 | } |
971 | 1045 | ||
1046 | #ifdef CONFIG_PREEMPT_RT | ||
1047 | static void drain_local_pages_work(struct work_struct *wrk) | ||
1048 | { | ||
1049 | drain_pages(smp_processor_id()); | ||
1050 | } | ||
1051 | #endif | ||
1052 | |||
972 | /* | 1053 | /* |
973 | * Spill all the per-cpu pages from all CPUs back into the buddy allocator | 1054 | * Spill all the per-cpu pages from all CPUs back into the buddy allocator |
974 | */ | 1055 | */ |
975 | void drain_all_pages(void) | 1056 | void drain_all_pages(void) |
976 | { | 1057 | { |
1058 | #ifdef CONFIG_PREEMPT_RT | ||
1059 | /* | ||
1060 | * HACK!!!!! | ||
1061 | * For RT we can't use IPIs to run drain_local_pages, since | ||
1062 | * that code will call spin_locks that will now sleep. | ||
1063 | * But, schedule_on_each_cpu will call kzalloc, which will | ||
1064 | * call page_alloc which was what calls this. | ||
1065 | * | ||
1066 | * Luckily, there's a condition to get here, and that is if | ||
1067 | * the order passed in to alloc_pages is greater than 0 | ||
1068 | * (alloced more than a page size). The slabs only allocate | ||
1069 | * what is needed, and the allocation made by schedule_on_each_cpu | ||
1070 | * does an alloc of "sizeof(void *)*nr_cpu_ids". | ||
1071 | * | ||
1072 | * So we can safely call schedule_on_each_cpu if that number | ||
1073 | * is less than a page. Otherwise don't bother. At least warn of | ||
1074 | * this issue. | ||
1075 | * | ||
1076 | * And yes, this is one big hack. Please fix ;-) | ||
1077 | */ | ||
1078 | if (sizeof(void *)*nr_cpu_ids < PAGE_SIZE) | ||
1079 | schedule_on_each_cpu(drain_local_pages_work); | ||
1080 | else { | ||
1081 | static int once; | ||
1082 | if (!once) { | ||
1083 | printk(KERN_ERR "Can't drain all CPUS due to possible recursion\n"); | ||
1084 | once = 1; | ||
1085 | } | ||
1086 | drain_local_pages(NULL); | ||
1087 | } | ||
1088 | |||
1089 | #else | ||
977 | on_each_cpu(drain_local_pages, NULL, 1); | 1090 | on_each_cpu(drain_local_pages, NULL, 1); |
1091 | #endif | ||
978 | } | 1092 | } |
979 | 1093 | ||
980 | #ifdef CONFIG_HIBERNATION | 1094 | #ifdef CONFIG_HIBERNATION |
@@ -1019,9 +1133,10 @@ void mark_free_pages(struct zone *zone) | |||
1019 | static void free_hot_cold_page(struct page *page, int cold) | 1133 | static void free_hot_cold_page(struct page *page, int cold) |
1020 | { | 1134 | { |
1021 | struct zone *zone = page_zone(page); | 1135 | struct zone *zone = page_zone(page); |
1136 | struct per_cpu_pageset *pset; | ||
1022 | struct per_cpu_pages *pcp; | 1137 | struct per_cpu_pages *pcp; |
1023 | unsigned long flags; | 1138 | unsigned long flags; |
1024 | int wasMlocked = TestClearPageMlocked(page); | 1139 | int count, this_cpu, wasMlocked = TestClearPageMlocked(page); |
1025 | 1140 | ||
1026 | kmemcheck_free_shadow(page, 0); | 1141 | kmemcheck_free_shadow(page, 0); |
1027 | 1142 | ||
@@ -1037,12 +1152,12 @@ static void free_hot_cold_page(struct page *page, int cold) | |||
1037 | arch_free_page(page, 0); | 1152 | arch_free_page(page, 0); |
1038 | kernel_map_pages(page, 1, 0); | 1153 | kernel_map_pages(page, 1, 0); |
1039 | 1154 | ||
1040 | pcp = &zone_pcp(zone, get_cpu())->pcp; | 1155 | pset = get_zone_pcp(zone, &flags, &this_cpu); |
1156 | pcp = &pset->pcp; | ||
1041 | set_page_private(page, get_pageblock_migratetype(page)); | 1157 | set_page_private(page, get_pageblock_migratetype(page)); |
1042 | local_irq_save(flags); | ||
1043 | if (unlikely(wasMlocked)) | 1158 | if (unlikely(wasMlocked)) |
1044 | free_page_mlock(page); | 1159 | free_page_mlock(page); |
1045 | __count_vm_event(PGFREE); | 1160 | count_vm_event(PGFREE); |
1046 | 1161 | ||
1047 | if (cold) | 1162 | if (cold) |
1048 | list_add_tail(&page->lru, &pcp->list); | 1163 | list_add_tail(&page->lru, &pcp->list); |
@@ -1050,11 +1165,15 @@ static void free_hot_cold_page(struct page *page, int cold) | |||
1050 | list_add(&page->lru, &pcp->list); | 1165 | list_add(&page->lru, &pcp->list); |
1051 | pcp->count++; | 1166 | pcp->count++; |
1052 | if (pcp->count >= pcp->high) { | 1167 | if (pcp->count >= pcp->high) { |
1053 | free_pages_bulk(zone, pcp->batch, &pcp->list, 0); | 1168 | LIST_HEAD(free_list); |
1169 | |||
1170 | isolate_pcp_pages(pcp->batch, &pcp->list, &free_list); | ||
1054 | pcp->count -= pcp->batch; | 1171 | pcp->count -= pcp->batch; |
1055 | } | 1172 | count = pcp->batch; |
1056 | local_irq_restore(flags); | 1173 | put_zone_pcp(zone, flags, this_cpu); |
1057 | put_cpu(); | 1174 | free_pages_bulk(zone, count, &free_list, 0); |
1175 | } else | ||
1176 | put_zone_pcp(zone, flags, this_cpu); | ||
1058 | } | 1177 | } |
1059 | 1178 | ||
1060 | void free_hot_page(struct page *page) | 1179 | void free_hot_page(struct page *page) |
@@ -1108,15 +1227,15 @@ struct page *buffered_rmqueue(struct zone *preferred_zone, | |||
1108 | unsigned long flags; | 1227 | unsigned long flags; |
1109 | struct page *page; | 1228 | struct page *page; |
1110 | int cold = !!(gfp_flags & __GFP_COLD); | 1229 | int cold = !!(gfp_flags & __GFP_COLD); |
1111 | int cpu; | 1230 | struct per_cpu_pageset *pset; |
1231 | int this_cpu; | ||
1112 | 1232 | ||
1113 | again: | 1233 | again: |
1114 | cpu = get_cpu(); | 1234 | pset = get_zone_pcp(zone, &flags, &this_cpu); |
1235 | |||
1115 | if (likely(order == 0)) { | 1236 | if (likely(order == 0)) { |
1116 | struct per_cpu_pages *pcp; | 1237 | struct per_cpu_pages *pcp = &pset->pcp; |
1117 | 1238 | ||
1118 | pcp = &zone_pcp(zone, cpu)->pcp; | ||
1119 | local_irq_save(flags); | ||
1120 | if (!pcp->count) { | 1239 | if (!pcp->count) { |
1121 | pcp->count = rmqueue_bulk(zone, 0, | 1240 | pcp->count = rmqueue_bulk(zone, 0, |
1122 | pcp->batch, &pcp->list, migratetype); | 1241 | pcp->batch, &pcp->list, migratetype); |
@@ -1158,7 +1277,7 @@ again: | |||
1158 | */ | 1277 | */ |
1159 | WARN_ON_ONCE(order > 1); | 1278 | WARN_ON_ONCE(order > 1); |
1160 | } | 1279 | } |
1161 | spin_lock_irqsave(&zone->lock, flags); | 1280 | spin_lock(&zone->lock); |
1162 | page = __rmqueue(zone, order, migratetype); | 1281 | page = __rmqueue(zone, order, migratetype); |
1163 | __mod_zone_page_state(zone, NR_FREE_PAGES, -(1 << order)); | 1282 | __mod_zone_page_state(zone, NR_FREE_PAGES, -(1 << order)); |
1164 | spin_unlock(&zone->lock); | 1283 | spin_unlock(&zone->lock); |
@@ -1168,8 +1287,7 @@ again: | |||
1168 | 1287 | ||
1169 | __count_zone_vm_events(PGALLOC, zone, 1 << order); | 1288 | __count_zone_vm_events(PGALLOC, zone, 1 << order); |
1170 | zone_statistics(preferred_zone, zone); | 1289 | zone_statistics(preferred_zone, zone); |
1171 | local_irq_restore(flags); | 1290 | put_zone_pcp(zone, flags, this_cpu); |
1172 | put_cpu(); | ||
1173 | 1291 | ||
1174 | VM_BUG_ON(bad_range(zone, page)); | 1292 | VM_BUG_ON(bad_range(zone, page)); |
1175 | if (prep_new_page(page, order, gfp_flags)) | 1293 | if (prep_new_page(page, order, gfp_flags)) |
@@ -1177,8 +1295,7 @@ again: | |||
1177 | return page; | 1295 | return page; |
1178 | 1296 | ||
1179 | failed: | 1297 | failed: |
1180 | local_irq_restore(flags); | 1298 | put_zone_pcp(zone, flags, this_cpu); |
1181 | put_cpu(); | ||
1182 | return NULL; | 1299 | return NULL; |
1183 | } | 1300 | } |
1184 | 1301 | ||
@@ -3036,7 +3153,23 @@ static inline void free_zone_pagesets(int cpu) | |||
3036 | struct zone *zone; | 3153 | struct zone *zone; |
3037 | 3154 | ||
3038 | for_each_zone(zone) { | 3155 | for_each_zone(zone) { |
3039 | struct per_cpu_pageset *pset = zone_pcp(zone, cpu); | 3156 | unsigned long flags; |
3157 | struct per_cpu_pageset *pset; | ||
3158 | |||
3159 | /* | ||
3160 | * On PREEMPT_RT the allocator is preemptible, therefore | ||
3161 | * kstopmachine can preempt a process in the middle of an | ||
3162 | * allocation, freeing the pset underneath such a process | ||
3163 | * isn't a good idea. | ||
3164 | * | ||
3165 | * Take the per-cpu pcp lock to allow the task to complete | ||
3166 | * before we free it. New tasks will be held off by the | ||
3167 | * cpu_online() check in get_cpu_var_locked(). | ||
3168 | */ | ||
3169 | __lock_cpu_pcp(&flags, cpu); | ||
3170 | pset = zone_pcp(zone, cpu); | ||
3171 | zone_pcp(zone, cpu) = NULL; | ||
3172 | unlock_cpu_pcp(flags, cpu); | ||
3040 | 3173 | ||
3041 | /* Free per_cpu_pageset if it is slab allocated */ | 3174 | /* Free per_cpu_pageset if it is slab allocated */ |
3042 | if (pset != &boot_pageset[cpu]) | 3175 | if (pset != &boot_pageset[cpu]) |
diff --git a/mm/quicklist.c b/mm/quicklist.c index e66d07d1b4ff..03341b014c2b 100644 --- a/mm/quicklist.c +++ b/mm/quicklist.c | |||
@@ -19,7 +19,7 @@ | |||
19 | #include <linux/module.h> | 19 | #include <linux/module.h> |
20 | #include <linux/quicklist.h> | 20 | #include <linux/quicklist.h> |
21 | 21 | ||
22 | DEFINE_PER_CPU(struct quicklist, quicklist)[CONFIG_NR_QUICK]; | 22 | DEFINE_PER_CPU_LOCKED(struct quicklist, quicklist)[CONFIG_NR_QUICK]; |
23 | 23 | ||
24 | #define FRACTION_OF_NODE_MEM 16 | 24 | #define FRACTION_OF_NODE_MEM 16 |
25 | 25 | ||
@@ -66,17 +66,14 @@ void quicklist_trim(int nr, void (*dtor)(void *), | |||
66 | { | 66 | { |
67 | long pages_to_free; | 67 | long pages_to_free; |
68 | struct quicklist *q; | 68 | struct quicklist *q; |
69 | int cpu; | ||
69 | 70 | ||
70 | q = &get_cpu_var(quicklist)[nr]; | 71 | q = &get_cpu_var_locked(quicklist, &cpu)[nr]; |
71 | if (q->nr_pages > min_pages) { | 72 | if (q->nr_pages > min_pages) { |
72 | pages_to_free = min_pages_to_free(q, min_pages, max_free); | 73 | pages_to_free = min_pages_to_free(q, min_pages, max_free); |
73 | 74 | ||
74 | while (pages_to_free > 0) { | 75 | while (pages_to_free > 0) { |
75 | /* | 76 | void *p = __quicklist_alloc(q); |
76 | * We pass a gfp_t of 0 to quicklist_alloc here | ||
77 | * because we will never call into the page allocator. | ||
78 | */ | ||
79 | void *p = quicklist_alloc(nr, 0, NULL); | ||
80 | 77 | ||
81 | if (dtor) | 78 | if (dtor) |
82 | dtor(p); | 79 | dtor(p); |
@@ -84,7 +81,7 @@ void quicklist_trim(int nr, void (*dtor)(void *), | |||
84 | pages_to_free--; | 81 | pages_to_free--; |
85 | } | 82 | } |
86 | } | 83 | } |
87 | put_cpu_var(quicklist); | 84 | put_cpu_var_locked(quicklist, cpu); |
88 | } | 85 | } |
89 | 86 | ||
90 | unsigned long quicklist_total_size(void) | 87 | unsigned long quicklist_total_size(void) |
@@ -94,7 +91,7 @@ unsigned long quicklist_total_size(void) | |||
94 | struct quicklist *ql, *q; | 91 | struct quicklist *ql, *q; |
95 | 92 | ||
96 | for_each_online_cpu(cpu) { | 93 | for_each_online_cpu(cpu) { |
97 | ql = per_cpu(quicklist, cpu); | 94 | ql = per_cpu_var_locked(quicklist, cpu); |
98 | for (q = ql; q < ql + CONFIG_NR_QUICK; q++) | 95 | for (q = ql; q < ql + CONFIG_NR_QUICK; q++) |
99 | count += q->nr_pages; | 96 | count += q->nr_pages; |
100 | } | 97 | } |
@@ -121,6 +121,138 @@ | |||
121 | #include <asm/page.h> | 121 | #include <asm/page.h> |
122 | 122 | ||
123 | /* | 123 | /* |
124 | * On !PREEMPT_RT, raw irq flags are used as a per-CPU locking | ||
125 | * mechanism. | ||
126 | * | ||
127 | * On PREEMPT_RT, we use per-CPU locks for this. That's why the | ||
128 | * calling convention is changed slightly: a new 'flags' argument | ||
129 | * is passed to 'irq disable/enable' - the PREEMPT_RT code stores | ||
130 | * the CPU number of the lock there. | ||
131 | */ | ||
132 | #ifndef CONFIG_PREEMPT_RT | ||
133 | |||
134 | # define slab_irq_disable(cpu) \ | ||
135 | do { local_irq_disable(); (cpu) = smp_processor_id(); } while (0) | ||
136 | # define slab_irq_enable(cpu) local_irq_enable() | ||
137 | |||
138 | static inline void slab_irq_disable_this_rt(int cpu) | ||
139 | { | ||
140 | } | ||
141 | |||
142 | static inline void slab_irq_enable_rt(int cpu) | ||
143 | { | ||
144 | } | ||
145 | |||
146 | # define slab_irq_save(flags, cpu) \ | ||
147 | do { local_irq_save(flags); (cpu) = smp_processor_id(); } while (0) | ||
148 | # define slab_irq_restore(flags, cpu) local_irq_restore(flags) | ||
149 | |||
150 | /* | ||
151 | * In the __GFP_WAIT case we enable/disable interrupts on !PREEMPT_RT, | ||
152 | * which has no per-CPU locking effect since we are holding the cache | ||
153 | * lock in that case already. | ||
154 | */ | ||
155 | static void slab_irq_enable_GFP_WAIT(gfp_t flags, int *cpu) | ||
156 | { | ||
157 | if (flags & __GFP_WAIT) | ||
158 | local_irq_enable(); | ||
159 | } | ||
160 | |||
161 | static void slab_irq_disable_GFP_WAIT(gfp_t flags, int *cpu) | ||
162 | { | ||
163 | if (flags & __GFP_WAIT) | ||
164 | local_irq_disable(); | ||
165 | } | ||
166 | |||
167 | # define slab_spin_lock_irq(lock, cpu) \ | ||
168 | do { spin_lock_irq(lock); (cpu) = smp_processor_id(); } while (0) | ||
169 | # define slab_spin_unlock_irq(lock, cpu) spin_unlock_irq(lock) | ||
170 | |||
171 | # define slab_spin_lock_irqsave(lock, flags, cpu) \ | ||
172 | do { spin_lock_irqsave(lock, flags); (cpu) = smp_processor_id(); } while (0) | ||
173 | # define slab_spin_unlock_irqrestore(lock, flags, cpu) \ | ||
174 | do { spin_unlock_irqrestore(lock, flags); } while (0) | ||
175 | |||
176 | #else /* CONFIG_PREEMPT_RT */ | ||
177 | |||
178 | /* | ||
179 | * Instead of serializing the per-cpu state by disabling interrupts we do so | ||
180 | * by a lock. This keeps the code preemptable - albeit at the cost of remote | ||
181 | * memory access when the task does get migrated away. | ||
182 | */ | ||
183 | DEFINE_PER_CPU_LOCKED(struct list_head, slab) = { 0, }; | ||
184 | |||
185 | static void _slab_irq_disable(int *cpu) | ||
186 | { | ||
187 | (void)get_cpu_var_locked(slab, cpu); | ||
188 | } | ||
189 | |||
190 | #define slab_irq_disable(cpu) _slab_irq_disable(&(cpu)) | ||
191 | |||
192 | static inline void slab_irq_enable(int cpu) | ||
193 | { | ||
194 | LIST_HEAD(list); | ||
195 | |||
196 | list_splice_init(&__get_cpu_var_locked(slab, cpu), &list); | ||
197 | put_cpu_var_locked(slab, cpu); | ||
198 | |||
199 | while (!list_empty(&list)) { | ||
200 | struct page *page = list_first_entry(&list, struct page, lru); | ||
201 | list_del(&page->lru); | ||
202 | __free_pages(page, page->index); | ||
203 | } | ||
204 | } | ||
205 | |||
206 | static inline void slab_irq_disable_this_rt(int cpu) | ||
207 | { | ||
208 | spin_lock(&__get_cpu_lock(slab, cpu)); | ||
209 | } | ||
210 | |||
211 | static inline void slab_irq_enable_rt(int cpu) | ||
212 | { | ||
213 | LIST_HEAD(list); | ||
214 | |||
215 | list_splice_init(&__get_cpu_var_locked(slab, cpu), &list); | ||
216 | spin_unlock(&__get_cpu_lock(slab, cpu)); | ||
217 | |||
218 | while (!list_empty(&list)) { | ||
219 | struct page *page = list_first_entry(&list, struct page, lru); | ||
220 | list_del(&page->lru); | ||
221 | __free_pages(page, page->index); | ||
222 | } | ||
223 | } | ||
224 | |||
225 | # define slab_irq_save(flags, cpu) \ | ||
226 | do { slab_irq_disable(cpu); (void) (flags); } while (0) | ||
227 | # define slab_irq_restore(flags, cpu) \ | ||
228 | do { slab_irq_enable(cpu); (void) (flags); } while (0) | ||
229 | |||
230 | /* | ||
231 | * On PREEMPT_RT we have to drop the locks unconditionally to avoid lock | ||
232 | * recursion on the cache_grow()->alloc_slabmgmt() path. | ||
233 | */ | ||
234 | static void slab_irq_enable_GFP_WAIT(gfp_t flags, int *cpu) | ||
235 | { | ||
236 | slab_irq_enable(*cpu); | ||
237 | } | ||
238 | |||
239 | static void slab_irq_disable_GFP_WAIT(gfp_t flags, int *cpu) | ||
240 | { | ||
241 | slab_irq_disable(*cpu); | ||
242 | } | ||
243 | |||
244 | # define slab_spin_lock_irq(lock, cpu) \ | ||
245 | do { slab_irq_disable(cpu); spin_lock(lock); } while (0) | ||
246 | # define slab_spin_unlock_irq(lock, cpu) \ | ||
247 | do { spin_unlock(lock); slab_irq_enable(cpu); } while (0) | ||
248 | # define slab_spin_lock_irqsave(lock, flags, cpu) \ | ||
249 | do { slab_irq_disable(cpu); spin_lock_irqsave(lock, flags); } while (0) | ||
250 | # define slab_spin_unlock_irqrestore(lock, flags, cpu) \ | ||
251 | do { spin_unlock_irqrestore(lock, flags); slab_irq_enable(cpu); } while (0) | ||
252 | |||
253 | #endif /* CONFIG_PREEMPT_RT */ | ||
254 | |||
255 | /* | ||
124 | * DEBUG - 1 for kmem_cache_create() to honour; SLAB_RED_ZONE & SLAB_POISON. | 256 | * DEBUG - 1 for kmem_cache_create() to honour; SLAB_RED_ZONE & SLAB_POISON. |
125 | * 0 for faster, smaller code (especially in the critical paths). | 257 | * 0 for faster, smaller code (especially in the critical paths). |
126 | * | 258 | * |
@@ -316,7 +448,7 @@ struct kmem_list3 __initdata initkmem_list3[NUM_INIT_LISTS]; | |||
316 | static int drain_freelist(struct kmem_cache *cache, | 448 | static int drain_freelist(struct kmem_cache *cache, |
317 | struct kmem_list3 *l3, int tofree); | 449 | struct kmem_list3 *l3, int tofree); |
318 | static void free_block(struct kmem_cache *cachep, void **objpp, int len, | 450 | static void free_block(struct kmem_cache *cachep, void **objpp, int len, |
319 | int node); | 451 | int node, int *this_cpu); |
320 | static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp); | 452 | static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp); |
321 | static void cache_reap(struct work_struct *unused); | 453 | static void cache_reap(struct work_struct *unused); |
322 | 454 | ||
@@ -687,9 +819,10 @@ int slab_is_available(void) | |||
687 | 819 | ||
688 | static DEFINE_PER_CPU(struct delayed_work, reap_work); | 820 | static DEFINE_PER_CPU(struct delayed_work, reap_work); |
689 | 821 | ||
690 | static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep) | 822 | static inline struct array_cache * |
823 | cpu_cache_get(struct kmem_cache *cachep, int this_cpu) | ||
691 | { | 824 | { |
692 | return cachep->array[smp_processor_id()]; | 825 | return cachep->array[this_cpu]; |
693 | } | 826 | } |
694 | 827 | ||
695 | static inline struct kmem_cache *__find_general_cachep(size_t size, | 828 | static inline struct kmem_cache *__find_general_cachep(size_t size, |
@@ -930,7 +1063,7 @@ static int transfer_objects(struct array_cache *to, | |||
930 | #ifndef CONFIG_NUMA | 1063 | #ifndef CONFIG_NUMA |
931 | 1064 | ||
932 | #define drain_alien_cache(cachep, alien) do { } while (0) | 1065 | #define drain_alien_cache(cachep, alien) do { } while (0) |
933 | #define reap_alien(cachep, l3) do { } while (0) | 1066 | #define reap_alien(cachep, l3, this_cpu) 0 |
934 | 1067 | ||
935 | static inline struct array_cache **alloc_alien_cache(int node, int limit, gfp_t gfp) | 1068 | static inline struct array_cache **alloc_alien_cache(int node, int limit, gfp_t gfp) |
936 | { | 1069 | { |
@@ -941,27 +1074,28 @@ static inline void free_alien_cache(struct array_cache **ac_ptr) | |||
941 | { | 1074 | { |
942 | } | 1075 | } |
943 | 1076 | ||
944 | static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) | 1077 | static inline int |
1078 | cache_free_alien(struct kmem_cache *cachep, void *objp, int *this_cpu) | ||
945 | { | 1079 | { |
946 | return 0; | 1080 | return 0; |
947 | } | 1081 | } |
948 | 1082 | ||
949 | static inline void *alternate_node_alloc(struct kmem_cache *cachep, | 1083 | static inline void *alternate_node_alloc(struct kmem_cache *cachep, |
950 | gfp_t flags) | 1084 | gfp_t flags, int *this_cpu) |
951 | { | 1085 | { |
952 | return NULL; | 1086 | return NULL; |
953 | } | 1087 | } |
954 | 1088 | ||
955 | static inline void *____cache_alloc_node(struct kmem_cache *cachep, | 1089 | static inline void *____cache_alloc_node(struct kmem_cache *cachep, |
956 | gfp_t flags, int nodeid) | 1090 | gfp_t flags, int nodeid, int *this_cpu) |
957 | { | 1091 | { |
958 | return NULL; | 1092 | return NULL; |
959 | } | 1093 | } |
960 | 1094 | ||
961 | #else /* CONFIG_NUMA */ | 1095 | #else /* CONFIG_NUMA */ |
962 | 1096 | ||
963 | static void *____cache_alloc_node(struct kmem_cache *, gfp_t, int); | 1097 | static void *____cache_alloc_node(struct kmem_cache *, gfp_t, int, int *); |
964 | static void *alternate_node_alloc(struct kmem_cache *, gfp_t); | 1098 | static void *alternate_node_alloc(struct kmem_cache *, gfp_t, int *); |
965 | 1099 | ||
966 | static struct array_cache **alloc_alien_cache(int node, int limit, gfp_t gfp) | 1100 | static struct array_cache **alloc_alien_cache(int node, int limit, gfp_t gfp) |
967 | { | 1101 | { |
@@ -1002,7 +1136,8 @@ static void free_alien_cache(struct array_cache **ac_ptr) | |||
1002 | } | 1136 | } |
1003 | 1137 | ||
1004 | static void __drain_alien_cache(struct kmem_cache *cachep, | 1138 | static void __drain_alien_cache(struct kmem_cache *cachep, |
1005 | struct array_cache *ac, int node) | 1139 | struct array_cache *ac, int node, |
1140 | int *this_cpu) | ||
1006 | { | 1141 | { |
1007 | struct kmem_list3 *rl3 = cachep->nodelists[node]; | 1142 | struct kmem_list3 *rl3 = cachep->nodelists[node]; |
1008 | 1143 | ||
@@ -1016,7 +1151,7 @@ static void __drain_alien_cache(struct kmem_cache *cachep, | |||
1016 | if (rl3->shared) | 1151 | if (rl3->shared) |
1017 | transfer_objects(rl3->shared, ac, ac->limit); | 1152 | transfer_objects(rl3->shared, ac, ac->limit); |
1018 | 1153 | ||
1019 | free_block(cachep, ac->entry, ac->avail, node); | 1154 | free_block(cachep, ac->entry, ac->avail, node, this_cpu); |
1020 | ac->avail = 0; | 1155 | ac->avail = 0; |
1021 | spin_unlock(&rl3->list_lock); | 1156 | spin_unlock(&rl3->list_lock); |
1022 | } | 1157 | } |
@@ -1025,38 +1160,42 @@ static void __drain_alien_cache(struct kmem_cache *cachep, | |||
1025 | /* | 1160 | /* |
1026 | * Called from cache_reap() to regularly drain alien caches round robin. | 1161 | * Called from cache_reap() to regularly drain alien caches round robin. |
1027 | */ | 1162 | */ |
1028 | static void reap_alien(struct kmem_cache *cachep, struct kmem_list3 *l3) | 1163 | static int |
1164 | reap_alien(struct kmem_cache *cachep, struct kmem_list3 *l3, int *this_cpu) | ||
1029 | { | 1165 | { |
1030 | int node = __get_cpu_var(reap_node); | 1166 | int node = per_cpu(reap_node, *this_cpu); |
1031 | 1167 | ||
1032 | if (l3->alien) { | 1168 | if (l3->alien) { |
1033 | struct array_cache *ac = l3->alien[node]; | 1169 | struct array_cache *ac = l3->alien[node]; |
1034 | 1170 | ||
1035 | if (ac && ac->avail && spin_trylock_irq(&ac->lock)) { | 1171 | if (ac && ac->avail && spin_trylock_irq(&ac->lock)) { |
1036 | __drain_alien_cache(cachep, ac, node); | 1172 | __drain_alien_cache(cachep, ac, node, this_cpu); |
1037 | spin_unlock_irq(&ac->lock); | 1173 | spin_unlock_irq(&ac->lock); |
1174 | return 1; | ||
1038 | } | 1175 | } |
1039 | } | 1176 | } |
1177 | return 0; | ||
1040 | } | 1178 | } |
1041 | 1179 | ||
1042 | static void drain_alien_cache(struct kmem_cache *cachep, | 1180 | static void drain_alien_cache(struct kmem_cache *cachep, |
1043 | struct array_cache **alien) | 1181 | struct array_cache **alien) |
1044 | { | 1182 | { |
1045 | int i = 0; | 1183 | int i = 0, this_cpu; |
1046 | struct array_cache *ac; | 1184 | struct array_cache *ac; |
1047 | unsigned long flags; | 1185 | unsigned long flags; |
1048 | 1186 | ||
1049 | for_each_online_node(i) { | 1187 | for_each_online_node(i) { |
1050 | ac = alien[i]; | 1188 | ac = alien[i]; |
1051 | if (ac) { | 1189 | if (ac) { |
1052 | spin_lock_irqsave(&ac->lock, flags); | 1190 | slab_spin_lock_irqsave(&ac->lock, flags, this_cpu); |
1053 | __drain_alien_cache(cachep, ac, i); | 1191 | __drain_alien_cache(cachep, ac, i, &this_cpu); |
1054 | spin_unlock_irqrestore(&ac->lock, flags); | 1192 | slab_spin_unlock_irqrestore(&ac->lock, flags, this_cpu); |
1055 | } | 1193 | } |
1056 | } | 1194 | } |
1057 | } | 1195 | } |
1058 | 1196 | ||
1059 | static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) | 1197 | static inline int |
1198 | cache_free_alien(struct kmem_cache *cachep, void *objp, int *this_cpu) | ||
1060 | { | 1199 | { |
1061 | struct slab *slabp = virt_to_slab(objp); | 1200 | struct slab *slabp = virt_to_slab(objp); |
1062 | int nodeid = slabp->nodeid; | 1201 | int nodeid = slabp->nodeid; |
@@ -1064,7 +1203,7 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) | |||
1064 | struct array_cache *alien = NULL; | 1203 | struct array_cache *alien = NULL; |
1065 | int node; | 1204 | int node; |
1066 | 1205 | ||
1067 | node = numa_node_id(); | 1206 | node = cpu_to_node(*this_cpu); |
1068 | 1207 | ||
1069 | /* | 1208 | /* |
1070 | * Make sure we are not freeing a object from another node to the array | 1209 | * Make sure we are not freeing a object from another node to the array |
@@ -1080,20 +1219,20 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) | |||
1080 | spin_lock(&alien->lock); | 1219 | spin_lock(&alien->lock); |
1081 | if (unlikely(alien->avail == alien->limit)) { | 1220 | if (unlikely(alien->avail == alien->limit)) { |
1082 | STATS_INC_ACOVERFLOW(cachep); | 1221 | STATS_INC_ACOVERFLOW(cachep); |
1083 | __drain_alien_cache(cachep, alien, nodeid); | 1222 | __drain_alien_cache(cachep, alien, nodeid, this_cpu); |
1084 | } | 1223 | } |
1085 | alien->entry[alien->avail++] = objp; | 1224 | alien->entry[alien->avail++] = objp; |
1086 | spin_unlock(&alien->lock); | 1225 | spin_unlock(&alien->lock); |
1087 | } else { | 1226 | } else { |
1088 | spin_lock(&(cachep->nodelists[nodeid])->list_lock); | 1227 | spin_lock(&(cachep->nodelists[nodeid])->list_lock); |
1089 | free_block(cachep, &objp, 1, nodeid); | 1228 | free_block(cachep, &objp, 1, nodeid, this_cpu); |
1090 | spin_unlock(&(cachep->nodelists[nodeid])->list_lock); | 1229 | spin_unlock(&(cachep->nodelists[nodeid])->list_lock); |
1091 | } | 1230 | } |
1092 | return 1; | 1231 | return 1; |
1093 | } | 1232 | } |
1094 | #endif | 1233 | #endif |
1095 | 1234 | ||
1096 | static void __cpuinit cpuup_canceled(long cpu) | 1235 | static void __cpuinit cpuup_canceled(int cpu) |
1097 | { | 1236 | { |
1098 | struct kmem_cache *cachep; | 1237 | struct kmem_cache *cachep; |
1099 | struct kmem_list3 *l3 = NULL; | 1238 | struct kmem_list3 *l3 = NULL; |
@@ -1104,6 +1243,7 @@ static void __cpuinit cpuup_canceled(long cpu) | |||
1104 | struct array_cache *nc; | 1243 | struct array_cache *nc; |
1105 | struct array_cache *shared; | 1244 | struct array_cache *shared; |
1106 | struct array_cache **alien; | 1245 | struct array_cache **alien; |
1246 | int orig_cpu = cpu; | ||
1107 | 1247 | ||
1108 | /* cpu is dead; no one can alloc from it. */ | 1248 | /* cpu is dead; no one can alloc from it. */ |
1109 | nc = cachep->array[cpu]; | 1249 | nc = cachep->array[cpu]; |
@@ -1118,7 +1258,8 @@ static void __cpuinit cpuup_canceled(long cpu) | |||
1118 | /* Free limit for this kmem_list3 */ | 1258 | /* Free limit for this kmem_list3 */ |
1119 | l3->free_limit -= cachep->batchcount; | 1259 | l3->free_limit -= cachep->batchcount; |
1120 | if (nc) | 1260 | if (nc) |
1121 | free_block(cachep, nc->entry, nc->avail, node); | 1261 | free_block(cachep, nc->entry, nc->avail, node, |
1262 | &cpu); | ||
1122 | 1263 | ||
1123 | if (!cpus_empty(*mask)) { | 1264 | if (!cpus_empty(*mask)) { |
1124 | spin_unlock_irq(&l3->list_lock); | 1265 | spin_unlock_irq(&l3->list_lock); |
@@ -1128,7 +1269,7 @@ static void __cpuinit cpuup_canceled(long cpu) | |||
1128 | shared = l3->shared; | 1269 | shared = l3->shared; |
1129 | if (shared) { | 1270 | if (shared) { |
1130 | free_block(cachep, shared->entry, | 1271 | free_block(cachep, shared->entry, |
1131 | shared->avail, node); | 1272 | shared->avail, node, &cpu); |
1132 | l3->shared = NULL; | 1273 | l3->shared = NULL; |
1133 | } | 1274 | } |
1134 | 1275 | ||
@@ -1144,6 +1285,7 @@ static void __cpuinit cpuup_canceled(long cpu) | |||
1144 | } | 1285 | } |
1145 | free_array_cache: | 1286 | free_array_cache: |
1146 | kfree(nc); | 1287 | kfree(nc); |
1288 | BUG_ON(cpu != orig_cpu); | ||
1147 | } | 1289 | } |
1148 | /* | 1290 | /* |
1149 | * In the previous loop, all the objects were freed to | 1291 | * In the previous loop, all the objects were freed to |
@@ -1158,7 +1300,7 @@ free_array_cache: | |||
1158 | } | 1300 | } |
1159 | } | 1301 | } |
1160 | 1302 | ||
1161 | static int __cpuinit cpuup_prepare(long cpu) | 1303 | static int __cpuinit cpuup_prepare(int cpu) |
1162 | { | 1304 | { |
1163 | struct kmem_cache *cachep; | 1305 | struct kmem_cache *cachep; |
1164 | struct kmem_list3 *l3 = NULL; | 1306 | struct kmem_list3 *l3 = NULL; |
@@ -1266,10 +1408,19 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb, | |||
1266 | long cpu = (long)hcpu; | 1408 | long cpu = (long)hcpu; |
1267 | int err = 0; | 1409 | int err = 0; |
1268 | 1410 | ||
1411 | |||
1269 | switch (action) { | 1412 | switch (action) { |
1270 | case CPU_UP_PREPARE: | 1413 | case CPU_UP_PREPARE: |
1271 | case CPU_UP_PREPARE_FROZEN: | 1414 | case CPU_UP_PREPARE_FROZEN: |
1272 | mutex_lock(&cache_chain_mutex); | 1415 | mutex_lock(&cache_chain_mutex); |
1416 | /* | ||
1417 | * lock/unlock cycle to push any holders away -- no new ones | ||
1418 | * can come in due to the cpu still being offline. | ||
1419 | * | ||
1420 | * XXX -- weird case anyway, can it happen? | ||
1421 | */ | ||
1422 | slab_irq_disable_this_rt(cpu); | ||
1423 | slab_irq_enable_rt(cpu); | ||
1273 | err = cpuup_prepare(cpu); | 1424 | err = cpuup_prepare(cpu); |
1274 | mutex_unlock(&cache_chain_mutex); | 1425 | mutex_unlock(&cache_chain_mutex); |
1275 | break; | 1426 | break; |
@@ -1309,10 +1460,14 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb, | |||
1309 | case CPU_UP_CANCELED: | 1460 | case CPU_UP_CANCELED: |
1310 | case CPU_UP_CANCELED_FROZEN: | 1461 | case CPU_UP_CANCELED_FROZEN: |
1311 | mutex_lock(&cache_chain_mutex); | 1462 | mutex_lock(&cache_chain_mutex); |
1463 | slab_irq_disable_this_rt(cpu); | ||
1312 | cpuup_canceled(cpu); | 1464 | cpuup_canceled(cpu); |
1465 | slab_irq_enable_rt(cpu); | ||
1313 | mutex_unlock(&cache_chain_mutex); | 1466 | mutex_unlock(&cache_chain_mutex); |
1314 | break; | 1467 | break; |
1315 | } | 1468 | } |
1469 | |||
1470 | |||
1316 | return err ? NOTIFY_BAD : NOTIFY_OK; | 1471 | return err ? NOTIFY_BAD : NOTIFY_OK; |
1317 | } | 1472 | } |
1318 | 1473 | ||
@@ -1370,6 +1525,12 @@ void __init kmem_cache_init(void) | |||
1370 | int order; | 1525 | int order; |
1371 | int node; | 1526 | int node; |
1372 | 1527 | ||
1528 | #ifdef CONFIG_PREEMPT_RT | ||
1529 | for_each_possible_cpu(i) { | ||
1530 | INIT_LIST_HEAD(&__get_cpu_var_locked(slab, i)); | ||
1531 | } | ||
1532 | #endif | ||
1533 | |||
1373 | if (num_possible_nodes() == 1) | 1534 | if (num_possible_nodes() == 1) |
1374 | use_alien_caches = 0; | 1535 | use_alien_caches = 0; |
1375 | 1536 | ||
@@ -1499,32 +1660,34 @@ void __init kmem_cache_init(void) | |||
1499 | /* 4) Replace the bootstrap head arrays */ | 1660 | /* 4) Replace the bootstrap head arrays */ |
1500 | { | 1661 | { |
1501 | struct array_cache *ptr; | 1662 | struct array_cache *ptr; |
1663 | int cpu = smp_processor_id(); | ||
1502 | 1664 | ||
1503 | ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT); | 1665 | ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT); |
1504 | 1666 | ||
1505 | BUG_ON(cpu_cache_get(&cache_cache) != &initarray_cache.cache); | 1667 | BUG_ON(cpu_cache_get(&cache_cache, cpu) != |
1506 | memcpy(ptr, cpu_cache_get(&cache_cache), | 1668 | &initarray_cache.cache); |
1669 | memcpy(ptr, cpu_cache_get(&cache_cache, cpu), | ||
1507 | sizeof(struct arraycache_init)); | 1670 | sizeof(struct arraycache_init)); |
1508 | /* | 1671 | /* |
1509 | * Do not assume that spinlocks can be initialized via memcpy: | 1672 | * Do not assume that spinlocks can be initialized via memcpy: |
1510 | */ | 1673 | */ |
1511 | spin_lock_init(&ptr->lock); | 1674 | spin_lock_init(&ptr->lock); |
1512 | 1675 | ||
1513 | cache_cache.array[smp_processor_id()] = ptr; | 1676 | cache_cache.array[cpu] = ptr; |
1514 | 1677 | ||
1515 | ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT); | 1678 | ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT); |
1516 | 1679 | ||
1517 | BUG_ON(cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep) | 1680 | BUG_ON(cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep, cpu) |
1518 | != &initarray_generic.cache); | 1681 | != &initarray_generic.cache); |
1519 | memcpy(ptr, cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep), | 1682 | memcpy(ptr, |
1683 | cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep, cpu), | ||
1520 | sizeof(struct arraycache_init)); | 1684 | sizeof(struct arraycache_init)); |
1521 | /* | 1685 | /* |
1522 | * Do not assume that spinlocks can be initialized via memcpy: | 1686 | * Do not assume that spinlocks can be initialized via memcpy: |
1523 | */ | 1687 | */ |
1524 | spin_lock_init(&ptr->lock); | 1688 | spin_lock_init(&ptr->lock); |
1525 | 1689 | ||
1526 | malloc_sizes[INDEX_AC].cs_cachep->array[smp_processor_id()] = | 1690 | malloc_sizes[INDEX_AC].cs_cachep->array[cpu] = ptr; |
1527 | ptr; | ||
1528 | } | 1691 | } |
1529 | /* 5) Replace the bootstrap kmem_list3's */ | 1692 | /* 5) Replace the bootstrap kmem_list3's */ |
1530 | { | 1693 | { |
@@ -1642,12 +1805,14 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid) | |||
1642 | /* | 1805 | /* |
1643 | * Interface to system's page release. | 1806 | * Interface to system's page release. |
1644 | */ | 1807 | */ |
1645 | static void kmem_freepages(struct kmem_cache *cachep, void *addr) | 1808 | static void kmem_freepages(struct kmem_cache *cachep, void *addr, int cpu) |
1646 | { | 1809 | { |
1647 | unsigned long i = (1 << cachep->gfporder); | 1810 | unsigned long i = (1 << cachep->gfporder); |
1648 | struct page *page = virt_to_page(addr); | 1811 | struct page *page, *basepage = virt_to_page(addr); |
1649 | const unsigned long nr_freed = i; | 1812 | const unsigned long nr_freed = i; |
1650 | 1813 | ||
1814 | page = basepage; | ||
1815 | |||
1651 | kmemcheck_free_shadow(page, cachep->gfporder); | 1816 | kmemcheck_free_shadow(page, cachep->gfporder); |
1652 | 1817 | ||
1653 | if (cachep->flags & SLAB_RECLAIM_ACCOUNT) | 1818 | if (cachep->flags & SLAB_RECLAIM_ACCOUNT) |
@@ -1656,6 +1821,7 @@ static void kmem_freepages(struct kmem_cache *cachep, void *addr) | |||
1656 | else | 1821 | else |
1657 | sub_zone_page_state(page_zone(page), | 1822 | sub_zone_page_state(page_zone(page), |
1658 | NR_SLAB_UNRECLAIMABLE, nr_freed); | 1823 | NR_SLAB_UNRECLAIMABLE, nr_freed); |
1824 | |||
1659 | while (i--) { | 1825 | while (i--) { |
1660 | BUG_ON(!PageSlab(page)); | 1826 | BUG_ON(!PageSlab(page)); |
1661 | __ClearPageSlab(page); | 1827 | __ClearPageSlab(page); |
@@ -1663,6 +1829,13 @@ static void kmem_freepages(struct kmem_cache *cachep, void *addr) | |||
1663 | } | 1829 | } |
1664 | if (current->reclaim_state) | 1830 | if (current->reclaim_state) |
1665 | current->reclaim_state->reclaimed_slab += nr_freed; | 1831 | current->reclaim_state->reclaimed_slab += nr_freed; |
1832 | |||
1833 | #ifdef CONFIG_PREEMPT_RT | ||
1834 | if (cpu >= 0) { | ||
1835 | basepage->index = cachep->gfporder; | ||
1836 | list_add(&basepage->lru, &__get_cpu_var_locked(slab, cpu)); | ||
1837 | } else | ||
1838 | #endif | ||
1666 | free_pages((unsigned long)addr, cachep->gfporder); | 1839 | free_pages((unsigned long)addr, cachep->gfporder); |
1667 | } | 1840 | } |
1668 | 1841 | ||
@@ -1671,7 +1844,7 @@ static void kmem_rcu_free(struct rcu_head *head) | |||
1671 | struct slab_rcu *slab_rcu = (struct slab_rcu *)head; | 1844 | struct slab_rcu *slab_rcu = (struct slab_rcu *)head; |
1672 | struct kmem_cache *cachep = slab_rcu->cachep; | 1845 | struct kmem_cache *cachep = slab_rcu->cachep; |
1673 | 1846 | ||
1674 | kmem_freepages(cachep, slab_rcu->addr); | 1847 | kmem_freepages(cachep, slab_rcu->addr, -1); |
1675 | if (OFF_SLAB(cachep)) | 1848 | if (OFF_SLAB(cachep)) |
1676 | kmem_cache_free(cachep->slabp_cache, slab_rcu); | 1849 | kmem_cache_free(cachep->slabp_cache, slab_rcu); |
1677 | } | 1850 | } |
@@ -1691,7 +1864,7 @@ static void store_stackinfo(struct kmem_cache *cachep, unsigned long *addr, | |||
1691 | 1864 | ||
1692 | *addr++ = 0x12345678; | 1865 | *addr++ = 0x12345678; |
1693 | *addr++ = caller; | 1866 | *addr++ = caller; |
1694 | *addr++ = smp_processor_id(); | 1867 | *addr++ = raw_smp_processor_id(); |
1695 | size -= 3 * sizeof(unsigned long); | 1868 | size -= 3 * sizeof(unsigned long); |
1696 | { | 1869 | { |
1697 | unsigned long *sptr = &caller; | 1870 | unsigned long *sptr = &caller; |
@@ -1881,6 +2054,10 @@ static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slab | |||
1881 | } | 2054 | } |
1882 | #endif | 2055 | #endif |
1883 | 2056 | ||
2057 | static void | ||
2058 | __cache_free(struct kmem_cache *cachep, void *objp, int *this_cpu); | ||
2059 | |||
2060 | |||
1884 | /** | 2061 | /** |
1885 | * slab_destroy - destroy and release all objects in a slab | 2062 | * slab_destroy - destroy and release all objects in a slab |
1886 | * @cachep: cache pointer being destroyed | 2063 | * @cachep: cache pointer being destroyed |
@@ -1890,7 +2067,8 @@ static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slab | |||
1890 | * Before calling the slab must have been unlinked from the cache. The | 2067 | * Before calling the slab must have been unlinked from the cache. The |
1891 | * cache-lock is not held/needed. | 2068 | * cache-lock is not held/needed. |
1892 | */ | 2069 | */ |
1893 | static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp) | 2070 | static void |
2071 | slab_destroy(struct kmem_cache *cachep, struct slab *slabp, int *this_cpu) | ||
1894 | { | 2072 | { |
1895 | void *addr = slabp->s_mem - slabp->colouroff; | 2073 | void *addr = slabp->s_mem - slabp->colouroff; |
1896 | 2074 | ||
@@ -1903,9 +2081,13 @@ static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp) | |||
1903 | slab_rcu->addr = addr; | 2081 | slab_rcu->addr = addr; |
1904 | call_rcu(&slab_rcu->head, kmem_rcu_free); | 2082 | call_rcu(&slab_rcu->head, kmem_rcu_free); |
1905 | } else { | 2083 | } else { |
1906 | kmem_freepages(cachep, addr); | 2084 | kmem_freepages(cachep, addr, *this_cpu); |
1907 | if (OFF_SLAB(cachep)) | 2085 | if (OFF_SLAB(cachep)) { |
1908 | kmem_cache_free(cachep->slabp_cache, slabp); | 2086 | if (this_cpu) |
2087 | __cache_free(cachep->slabp_cache, slabp, this_cpu); | ||
2088 | else | ||
2089 | kmem_cache_free(cachep->slabp_cache, slabp); | ||
2090 | } | ||
1909 | } | 2091 | } |
1910 | } | 2092 | } |
1911 | 2093 | ||
@@ -2002,6 +2184,8 @@ static size_t calculate_slab_order(struct kmem_cache *cachep, | |||
2002 | 2184 | ||
2003 | static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp) | 2185 | static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp) |
2004 | { | 2186 | { |
2187 | int this_cpu; | ||
2188 | |||
2005 | if (g_cpucache_up == FULL) | 2189 | if (g_cpucache_up == FULL) |
2006 | return enable_cpucache(cachep, gfp); | 2190 | return enable_cpucache(cachep, gfp); |
2007 | 2191 | ||
@@ -2045,10 +2229,12 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp) | |||
2045 | jiffies + REAPTIMEOUT_LIST3 + | 2229 | jiffies + REAPTIMEOUT_LIST3 + |
2046 | ((unsigned long)cachep) % REAPTIMEOUT_LIST3; | 2230 | ((unsigned long)cachep) % REAPTIMEOUT_LIST3; |
2047 | 2231 | ||
2048 | cpu_cache_get(cachep)->avail = 0; | 2232 | this_cpu = raw_smp_processor_id(); |
2049 | cpu_cache_get(cachep)->limit = BOOT_CPUCACHE_ENTRIES; | 2233 | |
2050 | cpu_cache_get(cachep)->batchcount = 1; | 2234 | cpu_cache_get(cachep, this_cpu)->avail = 0; |
2051 | cpu_cache_get(cachep)->touched = 0; | 2235 | cpu_cache_get(cachep, this_cpu)->limit = BOOT_CPUCACHE_ENTRIES; |
2236 | cpu_cache_get(cachep, this_cpu)->batchcount = 1; | ||
2237 | cpu_cache_get(cachep, this_cpu)->touched = 0; | ||
2052 | cachep->batchcount = 1; | 2238 | cachep->batchcount = 1; |
2053 | cachep->limit = BOOT_CPUCACHE_ENTRIES; | 2239 | cachep->limit = BOOT_CPUCACHE_ENTRIES; |
2054 | return 0; | 2240 | return 0; |
@@ -2358,19 +2544,19 @@ EXPORT_SYMBOL(kmem_cache_create); | |||
2358 | #if DEBUG | 2544 | #if DEBUG |
2359 | static void check_irq_off(void) | 2545 | static void check_irq_off(void) |
2360 | { | 2546 | { |
2547 | /* | ||
2548 | * On PREEMPT_RT we use locks to protect the per-CPU lists, | ||
2549 | * and keep interrupts enabled. | ||
2550 | */ | ||
2551 | #ifndef CONFIG_PREEMPT_RT | ||
2361 | BUG_ON(!irqs_disabled()); | 2552 | BUG_ON(!irqs_disabled()); |
2553 | #endif | ||
2362 | } | 2554 | } |
2363 | 2555 | ||
2364 | static void check_irq_on(void) | 2556 | static void check_irq_on(void) |
2365 | { | 2557 | { |
2558 | #ifndef CONFIG_PREEMPT_RT | ||
2366 | BUG_ON(irqs_disabled()); | 2559 | BUG_ON(irqs_disabled()); |
2367 | } | ||
2368 | |||
2369 | static void check_spinlock_acquired(struct kmem_cache *cachep) | ||
2370 | { | ||
2371 | #ifdef CONFIG_SMP | ||
2372 | check_irq_off(); | ||
2373 | assert_spin_locked(&cachep->nodelists[numa_node_id()]->list_lock); | ||
2374 | #endif | 2560 | #endif |
2375 | } | 2561 | } |
2376 | 2562 | ||
@@ -2385,34 +2571,67 @@ static void check_spinlock_acquired_node(struct kmem_cache *cachep, int node) | |||
2385 | #else | 2571 | #else |
2386 | #define check_irq_off() do { } while(0) | 2572 | #define check_irq_off() do { } while(0) |
2387 | #define check_irq_on() do { } while(0) | 2573 | #define check_irq_on() do { } while(0) |
2388 | #define check_spinlock_acquired(x) do { } while(0) | ||
2389 | #define check_spinlock_acquired_node(x, y) do { } while(0) | 2574 | #define check_spinlock_acquired_node(x, y) do { } while(0) |
2390 | #endif | 2575 | #endif |
2391 | 2576 | ||
2392 | static void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3, | 2577 | static int drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3, |
2393 | struct array_cache *ac, | 2578 | struct array_cache *ac, |
2394 | int force, int node); | 2579 | int force, int node); |
2395 | 2580 | ||
2396 | static void do_drain(void *arg) | 2581 | static void __do_drain(void *arg, int this_cpu) |
2397 | { | 2582 | { |
2398 | struct kmem_cache *cachep = arg; | 2583 | struct kmem_cache *cachep = arg; |
2584 | int node = cpu_to_node(this_cpu); | ||
2399 | struct array_cache *ac; | 2585 | struct array_cache *ac; |
2400 | int node = numa_node_id(); | ||
2401 | 2586 | ||
2402 | check_irq_off(); | 2587 | check_irq_off(); |
2403 | ac = cpu_cache_get(cachep); | 2588 | ac = cpu_cache_get(cachep, this_cpu); |
2404 | spin_lock(&cachep->nodelists[node]->list_lock); | 2589 | spin_lock(&cachep->nodelists[node]->list_lock); |
2405 | free_block(cachep, ac->entry, ac->avail, node); | 2590 | free_block(cachep, ac->entry, ac->avail, node, &this_cpu); |
2406 | spin_unlock(&cachep->nodelists[node]->list_lock); | 2591 | spin_unlock(&cachep->nodelists[node]->list_lock); |
2407 | ac->avail = 0; | 2592 | ac->avail = 0; |
2408 | } | 2593 | } |
2409 | 2594 | ||
2595 | #ifdef CONFIG_PREEMPT_RT | ||
2596 | static void do_drain(void *arg, int this_cpu) | ||
2597 | { | ||
2598 | __do_drain(arg, this_cpu); | ||
2599 | } | ||
2600 | #else | ||
2601 | static void do_drain(void *arg) | ||
2602 | { | ||
2603 | __do_drain(arg, smp_processor_id()); | ||
2604 | } | ||
2605 | #endif | ||
2606 | |||
2607 | #ifdef CONFIG_PREEMPT_RT | ||
2608 | /* | ||
2609 | * execute func() for all CPUs. On PREEMPT_RT we dont actually have | ||
2610 | * to run on the remote CPUs - we only have to take their CPU-locks. | ||
2611 | * (This is a rare operation, so cacheline bouncing is not an issue.) | ||
2612 | */ | ||
2613 | static void | ||
2614 | slab_on_each_cpu(void (*func)(void *arg, int this_cpu), void *arg) | ||
2615 | { | ||
2616 | unsigned int i; | ||
2617 | |||
2618 | check_irq_on(); | ||
2619 | for_each_online_cpu(i) { | ||
2620 | spin_lock(&__get_cpu_lock(slab, i)); | ||
2621 | func(arg, i); | ||
2622 | spin_unlock(&__get_cpu_lock(slab, i)); | ||
2623 | } | ||
2624 | } | ||
2625 | #else | ||
2626 | # define slab_on_each_cpu(func, cachep) on_each_cpu(func, cachep, 1) | ||
2627 | #endif | ||
2628 | |||
2410 | static void drain_cpu_caches(struct kmem_cache *cachep) | 2629 | static void drain_cpu_caches(struct kmem_cache *cachep) |
2411 | { | 2630 | { |
2412 | struct kmem_list3 *l3; | 2631 | struct kmem_list3 *l3; |
2413 | int node; | 2632 | int node; |
2414 | 2633 | ||
2415 | on_each_cpu(do_drain, cachep, 1); | 2634 | slab_on_each_cpu(do_drain, cachep); |
2416 | check_irq_on(); | 2635 | check_irq_on(); |
2417 | for_each_online_node(node) { | 2636 | for_each_online_node(node) { |
2418 | l3 = cachep->nodelists[node]; | 2637 | l3 = cachep->nodelists[node]; |
@@ -2437,16 +2656,16 @@ static int drain_freelist(struct kmem_cache *cache, | |||
2437 | struct kmem_list3 *l3, int tofree) | 2656 | struct kmem_list3 *l3, int tofree) |
2438 | { | 2657 | { |
2439 | struct list_head *p; | 2658 | struct list_head *p; |
2440 | int nr_freed; | 2659 | int nr_freed, this_cpu; |
2441 | struct slab *slabp; | 2660 | struct slab *slabp; |
2442 | 2661 | ||
2443 | nr_freed = 0; | 2662 | nr_freed = 0; |
2444 | while (nr_freed < tofree && !list_empty(&l3->slabs_free)) { | 2663 | while (nr_freed < tofree && !list_empty(&l3->slabs_free)) { |
2445 | 2664 | ||
2446 | spin_lock_irq(&l3->list_lock); | 2665 | slab_spin_lock_irq(&l3->list_lock, this_cpu); |
2447 | p = l3->slabs_free.prev; | 2666 | p = l3->slabs_free.prev; |
2448 | if (p == &l3->slabs_free) { | 2667 | if (p == &l3->slabs_free) { |
2449 | spin_unlock_irq(&l3->list_lock); | 2668 | slab_spin_unlock_irq(&l3->list_lock, this_cpu); |
2450 | goto out; | 2669 | goto out; |
2451 | } | 2670 | } |
2452 | 2671 | ||
@@ -2455,13 +2674,9 @@ static int drain_freelist(struct kmem_cache *cache, | |||
2455 | BUG_ON(slabp->inuse); | 2674 | BUG_ON(slabp->inuse); |
2456 | #endif | 2675 | #endif |
2457 | list_del(&slabp->list); | 2676 | list_del(&slabp->list); |
2458 | /* | ||
2459 | * Safe to drop the lock. The slab is no longer linked | ||
2460 | * to the cache. | ||
2461 | */ | ||
2462 | l3->free_objects -= cache->num; | 2677 | l3->free_objects -= cache->num; |
2463 | spin_unlock_irq(&l3->list_lock); | 2678 | slab_destroy(cache, slabp, &this_cpu); |
2464 | slab_destroy(cache, slabp); | 2679 | slab_spin_unlock_irq(&l3->list_lock, this_cpu); |
2465 | nr_freed++; | 2680 | nr_freed++; |
2466 | } | 2681 | } |
2467 | out: | 2682 | out: |
@@ -2725,8 +2940,8 @@ static void slab_map_pages(struct kmem_cache *cache, struct slab *slab, | |||
2725 | * Grow (by 1) the number of slabs within a cache. This is called by | 2940 | * Grow (by 1) the number of slabs within a cache. This is called by |
2726 | * kmem_cache_alloc() when there are no active objs left in a cache. | 2941 | * kmem_cache_alloc() when there are no active objs left in a cache. |
2727 | */ | 2942 | */ |
2728 | static int cache_grow(struct kmem_cache *cachep, | 2943 | static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid, |
2729 | gfp_t flags, int nodeid, void *objp) | 2944 | void *objp, int *this_cpu) |
2730 | { | 2945 | { |
2731 | struct slab *slabp; | 2946 | struct slab *slabp; |
2732 | size_t offset; | 2947 | size_t offset; |
@@ -2754,8 +2969,7 @@ static int cache_grow(struct kmem_cache *cachep, | |||
2754 | 2969 | ||
2755 | offset *= cachep->colour_off; | 2970 | offset *= cachep->colour_off; |
2756 | 2971 | ||
2757 | if (local_flags & __GFP_WAIT) | 2972 | slab_irq_enable_GFP_WAIT(local_flags, this_cpu); |
2758 | local_irq_enable(); | ||
2759 | 2973 | ||
2760 | /* | 2974 | /* |
2761 | * The test for missing atomic flag is performed here, rather than | 2975 | * The test for missing atomic flag is performed here, rather than |
@@ -2784,8 +2998,8 @@ static int cache_grow(struct kmem_cache *cachep, | |||
2784 | 2998 | ||
2785 | cache_init_objs(cachep, slabp); | 2999 | cache_init_objs(cachep, slabp); |
2786 | 3000 | ||
2787 | if (local_flags & __GFP_WAIT) | 3001 | slab_irq_disable_GFP_WAIT(local_flags, this_cpu); |
2788 | local_irq_disable(); | 3002 | |
2789 | check_irq_off(); | 3003 | check_irq_off(); |
2790 | spin_lock(&l3->list_lock); | 3004 | spin_lock(&l3->list_lock); |
2791 | 3005 | ||
@@ -2796,10 +3010,9 @@ static int cache_grow(struct kmem_cache *cachep, | |||
2796 | spin_unlock(&l3->list_lock); | 3010 | spin_unlock(&l3->list_lock); |
2797 | return 1; | 3011 | return 1; |
2798 | opps1: | 3012 | opps1: |
2799 | kmem_freepages(cachep, objp); | 3013 | kmem_freepages(cachep, objp, -1); |
2800 | failed: | 3014 | failed: |
2801 | if (local_flags & __GFP_WAIT) | 3015 | slab_irq_disable_GFP_WAIT(local_flags, this_cpu); |
2802 | local_irq_disable(); | ||
2803 | return 0; | 3016 | return 0; |
2804 | } | 3017 | } |
2805 | 3018 | ||
@@ -2921,7 +3134,8 @@ bad: | |||
2921 | #define check_slabp(x,y) do { } while(0) | 3134 | #define check_slabp(x,y) do { } while(0) |
2922 | #endif | 3135 | #endif |
2923 | 3136 | ||
2924 | static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags) | 3137 | static void * |
3138 | cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags, int *this_cpu) | ||
2925 | { | 3139 | { |
2926 | int batchcount; | 3140 | int batchcount; |
2927 | struct kmem_list3 *l3; | 3141 | struct kmem_list3 *l3; |
@@ -2931,7 +3145,7 @@ static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags) | |||
2931 | retry: | 3145 | retry: |
2932 | check_irq_off(); | 3146 | check_irq_off(); |
2933 | node = numa_node_id(); | 3147 | node = numa_node_id(); |
2934 | ac = cpu_cache_get(cachep); | 3148 | ac = cpu_cache_get(cachep, *this_cpu); |
2935 | batchcount = ac->batchcount; | 3149 | batchcount = ac->batchcount; |
2936 | if (!ac->touched && batchcount > BATCHREFILL_LIMIT) { | 3150 | if (!ac->touched && batchcount > BATCHREFILL_LIMIT) { |
2937 | /* | 3151 | /* |
@@ -2941,7 +3155,7 @@ retry: | |||
2941 | */ | 3155 | */ |
2942 | batchcount = BATCHREFILL_LIMIT; | 3156 | batchcount = BATCHREFILL_LIMIT; |
2943 | } | 3157 | } |
2944 | l3 = cachep->nodelists[node]; | 3158 | l3 = cachep->nodelists[cpu_to_node(*this_cpu)]; |
2945 | 3159 | ||
2946 | BUG_ON(ac->avail > 0 || !l3); | 3160 | BUG_ON(ac->avail > 0 || !l3); |
2947 | spin_lock(&l3->list_lock); | 3161 | spin_lock(&l3->list_lock); |
@@ -2964,7 +3178,7 @@ retry: | |||
2964 | 3178 | ||
2965 | slabp = list_entry(entry, struct slab, list); | 3179 | slabp = list_entry(entry, struct slab, list); |
2966 | check_slabp(cachep, slabp); | 3180 | check_slabp(cachep, slabp); |
2967 | check_spinlock_acquired(cachep); | 3181 | check_spinlock_acquired_node(cachep, cpu_to_node(*this_cpu)); |
2968 | 3182 | ||
2969 | /* | 3183 | /* |
2970 | * The slab was either on partial or free list so | 3184 | * The slab was either on partial or free list so |
@@ -2978,8 +3192,9 @@ retry: | |||
2978 | STATS_INC_ACTIVE(cachep); | 3192 | STATS_INC_ACTIVE(cachep); |
2979 | STATS_SET_HIGH(cachep); | 3193 | STATS_SET_HIGH(cachep); |
2980 | 3194 | ||
2981 | ac->entry[ac->avail++] = slab_get_obj(cachep, slabp, | 3195 | ac->entry[ac->avail++] = |
2982 | node); | 3196 | slab_get_obj(cachep, slabp, |
3197 | cpu_to_node(*this_cpu)); | ||
2983 | } | 3198 | } |
2984 | check_slabp(cachep, slabp); | 3199 | check_slabp(cachep, slabp); |
2985 | 3200 | ||
@@ -2998,10 +3213,10 @@ alloc_done: | |||
2998 | 3213 | ||
2999 | if (unlikely(!ac->avail)) { | 3214 | if (unlikely(!ac->avail)) { |
3000 | int x; | 3215 | int x; |
3001 | x = cache_grow(cachep, flags | GFP_THISNODE, node, NULL); | 3216 | x = cache_grow(cachep, flags | GFP_THISNODE, cpu_to_node(*this_cpu), NULL, this_cpu); |
3002 | 3217 | ||
3003 | /* cache_grow can reenable interrupts, then ac could change. */ | 3218 | /* cache_grow can reenable interrupts, then ac could change. */ |
3004 | ac = cpu_cache_get(cachep); | 3219 | ac = cpu_cache_get(cachep, *this_cpu); |
3005 | if (!x && ac->avail == 0) /* no objects in sight? abort */ | 3220 | if (!x && ac->avail == 0) /* no objects in sight? abort */ |
3006 | return NULL; | 3221 | return NULL; |
3007 | 3222 | ||
@@ -3088,21 +3303,22 @@ static bool slab_should_failslab(struct kmem_cache *cachep, gfp_t flags) | |||
3088 | return should_failslab(obj_size(cachep), flags); | 3303 | return should_failslab(obj_size(cachep), flags); |
3089 | } | 3304 | } |
3090 | 3305 | ||
3091 | static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags) | 3306 | static inline void * |
3307 | ____cache_alloc(struct kmem_cache *cachep, gfp_t flags, int *this_cpu) | ||
3092 | { | 3308 | { |
3093 | void *objp; | 3309 | void *objp; |
3094 | struct array_cache *ac; | 3310 | struct array_cache *ac; |
3095 | 3311 | ||
3096 | check_irq_off(); | 3312 | check_irq_off(); |
3097 | 3313 | ||
3098 | ac = cpu_cache_get(cachep); | 3314 | ac = cpu_cache_get(cachep, *this_cpu); |
3099 | if (likely(ac->avail)) { | 3315 | if (likely(ac->avail)) { |
3100 | STATS_INC_ALLOCHIT(cachep); | 3316 | STATS_INC_ALLOCHIT(cachep); |
3101 | ac->touched = 1; | 3317 | ac->touched = 1; |
3102 | objp = ac->entry[--ac->avail]; | 3318 | objp = ac->entry[--ac->avail]; |
3103 | } else { | 3319 | } else { |
3104 | STATS_INC_ALLOCMISS(cachep); | 3320 | STATS_INC_ALLOCMISS(cachep); |
3105 | objp = cache_alloc_refill(cachep, flags); | 3321 | objp = cache_alloc_refill(cachep, flags, this_cpu); |
3106 | } | 3322 | } |
3107 | /* | 3323 | /* |
3108 | * To avoid a false negative, if an object that is in one of the | 3324 | * To avoid a false negative, if an object that is in one of the |
@@ -3120,7 +3336,8 @@ static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags) | |||
3120 | * If we are in_interrupt, then process context, including cpusets and | 3336 | * If we are in_interrupt, then process context, including cpusets and |
3121 | * mempolicy, may not apply and should not be used for allocation policy. | 3337 | * mempolicy, may not apply and should not be used for allocation policy. |
3122 | */ | 3338 | */ |
3123 | static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags) | 3339 | static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags, |
3340 | int *this_cpu) | ||
3124 | { | 3341 | { |
3125 | int nid_alloc, nid_here; | 3342 | int nid_alloc, nid_here; |
3126 | 3343 | ||
@@ -3132,7 +3349,7 @@ static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags) | |||
3132 | else if (current->mempolicy) | 3349 | else if (current->mempolicy) |
3133 | nid_alloc = slab_node(current->mempolicy); | 3350 | nid_alloc = slab_node(current->mempolicy); |
3134 | if (nid_alloc != nid_here) | 3351 | if (nid_alloc != nid_here) |
3135 | return ____cache_alloc_node(cachep, flags, nid_alloc); | 3352 | return ____cache_alloc_node(cachep, flags, nid_alloc, this_cpu); |
3136 | return NULL; | 3353 | return NULL; |
3137 | } | 3354 | } |
3138 | 3355 | ||
@@ -3144,7 +3361,7 @@ static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags) | |||
3144 | * allocator to do its reclaim / fallback magic. We then insert the | 3361 | * allocator to do its reclaim / fallback magic. We then insert the |
3145 | * slab into the proper nodelist and then allocate from it. | 3362 | * slab into the proper nodelist and then allocate from it. |
3146 | */ | 3363 | */ |
3147 | static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags) | 3364 | static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags, int *this_cpu) |
3148 | { | 3365 | { |
3149 | struct zonelist *zonelist; | 3366 | struct zonelist *zonelist; |
3150 | gfp_t local_flags; | 3367 | gfp_t local_flags; |
@@ -3172,7 +3389,8 @@ retry: | |||
3172 | cache->nodelists[nid] && | 3389 | cache->nodelists[nid] && |
3173 | cache->nodelists[nid]->free_objects) { | 3390 | cache->nodelists[nid]->free_objects) { |
3174 | obj = ____cache_alloc_node(cache, | 3391 | obj = ____cache_alloc_node(cache, |
3175 | flags | GFP_THISNODE, nid); | 3392 | flags | GFP_THISNODE, nid, |
3393 | this_cpu); | ||
3176 | if (obj) | 3394 | if (obj) |
3177 | break; | 3395 | break; |
3178 | } | 3396 | } |
@@ -3185,20 +3403,21 @@ retry: | |||
3185 | * We may trigger various forms of reclaim on the allowed | 3403 | * We may trigger various forms of reclaim on the allowed |
3186 | * set and go into memory reserves if necessary. | 3404 | * set and go into memory reserves if necessary. |
3187 | */ | 3405 | */ |
3188 | if (local_flags & __GFP_WAIT) | 3406 | slab_irq_enable_GFP_WAIT(local_flags, this_cpu); |
3189 | local_irq_enable(); | 3407 | |
3190 | kmem_flagcheck(cache, flags); | 3408 | kmem_flagcheck(cache, flags); |
3191 | obj = kmem_getpages(cache, local_flags, numa_node_id()); | 3409 | obj = kmem_getpages(cache, local_flags, cpu_to_node(*this_cpu)); |
3192 | if (local_flags & __GFP_WAIT) | 3410 | |
3193 | local_irq_disable(); | 3411 | slab_irq_disable_GFP_WAIT(local_flags, this_cpu); |
3412 | |||
3194 | if (obj) { | 3413 | if (obj) { |
3195 | /* | 3414 | /* |
3196 | * Insert into the appropriate per node queues | 3415 | * Insert into the appropriate per node queues |
3197 | */ | 3416 | */ |
3198 | nid = page_to_nid(virt_to_page(obj)); | 3417 | nid = page_to_nid(virt_to_page(obj)); |
3199 | if (cache_grow(cache, flags, nid, obj)) { | 3418 | if (cache_grow(cache, flags, nid, obj, this_cpu)) { |
3200 | obj = ____cache_alloc_node(cache, | 3419 | obj = ____cache_alloc_node(cache, |
3201 | flags | GFP_THISNODE, nid); | 3420 | flags | GFP_THISNODE, nid, this_cpu); |
3202 | if (!obj) | 3421 | if (!obj) |
3203 | /* | 3422 | /* |
3204 | * Another processor may allocate the | 3423 | * Another processor may allocate the |
@@ -3219,7 +3438,7 @@ retry: | |||
3219 | * A interface to enable slab creation on nodeid | 3438 | * A interface to enable slab creation on nodeid |
3220 | */ | 3439 | */ |
3221 | static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, | 3440 | static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, |
3222 | int nodeid) | 3441 | int nodeid, int *this_cpu) |
3223 | { | 3442 | { |
3224 | struct list_head *entry; | 3443 | struct list_head *entry; |
3225 | struct slab *slabp; | 3444 | struct slab *slabp; |
@@ -3267,11 +3486,11 @@ retry: | |||
3267 | 3486 | ||
3268 | must_grow: | 3487 | must_grow: |
3269 | spin_unlock(&l3->list_lock); | 3488 | spin_unlock(&l3->list_lock); |
3270 | x = cache_grow(cachep, flags | GFP_THISNODE, nodeid, NULL); | 3489 | x = cache_grow(cachep, flags | GFP_THISNODE, nodeid, NULL, this_cpu); |
3271 | if (x) | 3490 | if (x) |
3272 | goto retry; | 3491 | goto retry; |
3273 | 3492 | ||
3274 | return fallback_alloc(cachep, flags); | 3493 | return fallback_alloc(cachep, flags, this_cpu); |
3275 | 3494 | ||
3276 | done: | 3495 | done: |
3277 | return obj; | 3496 | return obj; |
@@ -3294,6 +3513,7 @@ __cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid, | |||
3294 | void *caller) | 3513 | void *caller) |
3295 | { | 3514 | { |
3296 | unsigned long save_flags; | 3515 | unsigned long save_flags; |
3516 | int this_cpu, this_node; | ||
3297 | void *ptr; | 3517 | void *ptr; |
3298 | 3518 | ||
3299 | flags &= gfp_allowed_mask; | 3519 | flags &= gfp_allowed_mask; |
@@ -3304,32 +3524,34 @@ __cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid, | |||
3304 | return NULL; | 3524 | return NULL; |
3305 | 3525 | ||
3306 | cache_alloc_debugcheck_before(cachep, flags); | 3526 | cache_alloc_debugcheck_before(cachep, flags); |
3307 | local_irq_save(save_flags); | ||
3308 | 3527 | ||
3528 | slab_irq_save(save_flags, this_cpu); | ||
3529 | |||
3530 | this_node = cpu_to_node(this_cpu); | ||
3309 | if (unlikely(nodeid == -1)) | 3531 | if (unlikely(nodeid == -1)) |
3310 | nodeid = numa_node_id(); | 3532 | nodeid = this_node; |
3311 | 3533 | ||
3312 | if (unlikely(!cachep->nodelists[nodeid])) { | 3534 | if (unlikely(!cachep->nodelists[nodeid])) { |
3313 | /* Node not bootstrapped yet */ | 3535 | /* Node not bootstrapped yet */ |
3314 | ptr = fallback_alloc(cachep, flags); | 3536 | ptr = fallback_alloc(cachep, flags, &this_cpu); |
3315 | goto out; | 3537 | goto out; |
3316 | } | 3538 | } |
3317 | 3539 | ||
3318 | if (nodeid == numa_node_id()) { | 3540 | if (nodeid == this_node) { |
3319 | /* | 3541 | /* |
3320 | * Use the locally cached objects if possible. | 3542 | * Use the locally cached objects if possible. |
3321 | * However ____cache_alloc does not allow fallback | 3543 | * However ____cache_alloc does not allow fallback |
3322 | * to other nodes. It may fail while we still have | 3544 | * to other nodes. It may fail while we still have |
3323 | * objects on other nodes available. | 3545 | * objects on other nodes available. |
3324 | */ | 3546 | */ |
3325 | ptr = ____cache_alloc(cachep, flags); | 3547 | ptr = ____cache_alloc(cachep, flags, &this_cpu); |
3326 | if (ptr) | 3548 | if (ptr) |
3327 | goto out; | 3549 | goto out; |
3328 | } | 3550 | } |
3329 | /* ___cache_alloc_node can fall back to other nodes */ | 3551 | /* ___cache_alloc_node can fall back to other nodes */ |
3330 | ptr = ____cache_alloc_node(cachep, flags, nodeid); | 3552 | ptr = ____cache_alloc_node(cachep, flags, nodeid, &this_cpu); |
3331 | out: | 3553 | out: |
3332 | local_irq_restore(save_flags); | 3554 | slab_irq_restore(save_flags, this_cpu); |
3333 | ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller); | 3555 | ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller); |
3334 | kmemleak_alloc_recursive(ptr, obj_size(cachep), 1, cachep->flags, | 3556 | kmemleak_alloc_recursive(ptr, obj_size(cachep), 1, cachep->flags, |
3335 | flags); | 3557 | flags); |
@@ -3344,33 +3566,33 @@ __cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid, | |||
3344 | } | 3566 | } |
3345 | 3567 | ||
3346 | static __always_inline void * | 3568 | static __always_inline void * |
3347 | __do_cache_alloc(struct kmem_cache *cache, gfp_t flags) | 3569 | __do_cache_alloc(struct kmem_cache *cache, gfp_t flags, int *this_cpu) |
3348 | { | 3570 | { |
3349 | void *objp; | 3571 | void *objp; |
3350 | 3572 | ||
3351 | if (unlikely(current->flags & (PF_SPREAD_SLAB | PF_MEMPOLICY))) { | 3573 | if (unlikely(current->flags & (PF_SPREAD_SLAB | PF_MEMPOLICY))) { |
3352 | objp = alternate_node_alloc(cache, flags); | 3574 | objp = alternate_node_alloc(cache, flags, this_cpu); |
3353 | if (objp) | 3575 | if (objp) |
3354 | goto out; | 3576 | goto out; |
3355 | } | 3577 | } |
3356 | objp = ____cache_alloc(cache, flags); | ||
3357 | 3578 | ||
3579 | objp = ____cache_alloc(cache, flags, this_cpu); | ||
3358 | /* | 3580 | /* |
3359 | * We may just have run out of memory on the local node. | 3581 | * We may just have run out of memory on the local node. |
3360 | * ____cache_alloc_node() knows how to locate memory on other nodes | 3582 | * ____cache_alloc_node() knows how to locate memory on other nodes |
3361 | */ | 3583 | */ |
3362 | if (!objp) | 3584 | if (!objp) |
3363 | objp = ____cache_alloc_node(cache, flags, numa_node_id()); | 3585 | objp = ____cache_alloc_node(cache, flags, |
3364 | 3586 | cpu_to_node(*this_cpu), this_cpu); | |
3365 | out: | 3587 | out: |
3366 | return objp; | 3588 | return objp; |
3367 | } | 3589 | } |
3368 | #else | 3590 | #else |
3369 | 3591 | ||
3370 | static __always_inline void * | 3592 | static __always_inline void * |
3371 | __do_cache_alloc(struct kmem_cache *cachep, gfp_t flags) | 3593 | __do_cache_alloc(struct kmem_cache *cachep, gfp_t flags, int *this_cpu) |
3372 | { | 3594 | { |
3373 | return ____cache_alloc(cachep, flags); | 3595 | return ____cache_alloc(cachep, flags, this_cpu); |
3374 | } | 3596 | } |
3375 | 3597 | ||
3376 | #endif /* CONFIG_NUMA */ | 3598 | #endif /* CONFIG_NUMA */ |
@@ -3379,6 +3601,7 @@ static __always_inline void * | |||
3379 | __cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller) | 3601 | __cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller) |
3380 | { | 3602 | { |
3381 | unsigned long save_flags; | 3603 | unsigned long save_flags; |
3604 | int this_cpu; | ||
3382 | void *objp; | 3605 | void *objp; |
3383 | 3606 | ||
3384 | flags &= gfp_allowed_mask; | 3607 | flags &= gfp_allowed_mask; |
@@ -3389,9 +3612,9 @@ __cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller) | |||
3389 | return NULL; | 3612 | return NULL; |
3390 | 3613 | ||
3391 | cache_alloc_debugcheck_before(cachep, flags); | 3614 | cache_alloc_debugcheck_before(cachep, flags); |
3392 | local_irq_save(save_flags); | 3615 | slab_irq_save(save_flags, this_cpu); |
3393 | objp = __do_cache_alloc(cachep, flags); | 3616 | objp = __do_cache_alloc(cachep, flags, &this_cpu); |
3394 | local_irq_restore(save_flags); | 3617 | slab_irq_restore(save_flags, this_cpu); |
3395 | objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller); | 3618 | objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller); |
3396 | kmemleak_alloc_recursive(objp, obj_size(cachep), 1, cachep->flags, | 3619 | kmemleak_alloc_recursive(objp, obj_size(cachep), 1, cachep->flags, |
3397 | flags); | 3620 | flags); |
@@ -3410,7 +3633,7 @@ __cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller) | |||
3410 | * Caller needs to acquire correct kmem_list's list_lock | 3633 | * Caller needs to acquire correct kmem_list's list_lock |
3411 | */ | 3634 | */ |
3412 | static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects, | 3635 | static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects, |
3413 | int node) | 3636 | int node, int *this_cpu) |
3414 | { | 3637 | { |
3415 | int i; | 3638 | int i; |
3416 | struct kmem_list3 *l3; | 3639 | struct kmem_list3 *l3; |
@@ -3439,7 +3662,7 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects, | |||
3439 | * a different cache, refer to comments before | 3662 | * a different cache, refer to comments before |
3440 | * alloc_slabmgmt. | 3663 | * alloc_slabmgmt. |
3441 | */ | 3664 | */ |
3442 | slab_destroy(cachep, slabp); | 3665 | slab_destroy(cachep, slabp, this_cpu); |
3443 | } else { | 3666 | } else { |
3444 | list_add(&slabp->list, &l3->slabs_free); | 3667 | list_add(&slabp->list, &l3->slabs_free); |
3445 | } | 3668 | } |
@@ -3453,11 +3676,12 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects, | |||
3453 | } | 3676 | } |
3454 | } | 3677 | } |
3455 | 3678 | ||
3456 | static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac) | 3679 | static void |
3680 | cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac, int *this_cpu) | ||
3457 | { | 3681 | { |
3458 | int batchcount; | 3682 | int batchcount; |
3459 | struct kmem_list3 *l3; | 3683 | struct kmem_list3 *l3; |
3460 | int node = numa_node_id(); | 3684 | int node = cpu_to_node(*this_cpu); |
3461 | 3685 | ||
3462 | batchcount = ac->batchcount; | 3686 | batchcount = ac->batchcount; |
3463 | #if DEBUG | 3687 | #if DEBUG |
@@ -3479,7 +3703,7 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac) | |||
3479 | } | 3703 | } |
3480 | } | 3704 | } |
3481 | 3705 | ||
3482 | free_block(cachep, ac->entry, batchcount, node); | 3706 | free_block(cachep, ac->entry, batchcount, node, this_cpu); |
3483 | free_done: | 3707 | free_done: |
3484 | #if STATS | 3708 | #if STATS |
3485 | { | 3709 | { |
@@ -3508,9 +3732,10 @@ free_done: | |||
3508 | * Release an obj back to its cache. If the obj has a constructed state, it must | 3732 | * Release an obj back to its cache. If the obj has a constructed state, it must |
3509 | * be in this state _before_ it is released. Called with disabled ints. | 3733 | * be in this state _before_ it is released. Called with disabled ints. |
3510 | */ | 3734 | */ |
3511 | static inline void __cache_free(struct kmem_cache *cachep, void *objp) | 3735 | static inline void |
3736 | __cache_free(struct kmem_cache *cachep, void *objp, int *this_cpu) | ||
3512 | { | 3737 | { |
3513 | struct array_cache *ac = cpu_cache_get(cachep); | 3738 | struct array_cache *ac = cpu_cache_get(cachep, *this_cpu); |
3514 | 3739 | ||
3515 | check_irq_off(); | 3740 | check_irq_off(); |
3516 | kmemleak_free_recursive(objp, cachep->flags); | 3741 | kmemleak_free_recursive(objp, cachep->flags); |
@@ -3525,7 +3750,7 @@ static inline void __cache_free(struct kmem_cache *cachep, void *objp) | |||
3525 | * variable to skip the call, which is mostly likely to be present in | 3750 | * variable to skip the call, which is mostly likely to be present in |
3526 | * the cache. | 3751 | * the cache. |
3527 | */ | 3752 | */ |
3528 | if (nr_online_nodes > 1 && cache_free_alien(cachep, objp)) | 3753 | if (nr_online_nodes > 1 && cache_free_alien(cachep, objp, this_cpu)) |
3529 | return; | 3754 | return; |
3530 | 3755 | ||
3531 | if (likely(ac->avail < ac->limit)) { | 3756 | if (likely(ac->avail < ac->limit)) { |
@@ -3534,7 +3759,7 @@ static inline void __cache_free(struct kmem_cache *cachep, void *objp) | |||
3534 | return; | 3759 | return; |
3535 | } else { | 3760 | } else { |
3536 | STATS_INC_FREEMISS(cachep); | 3761 | STATS_INC_FREEMISS(cachep); |
3537 | cache_flusharray(cachep, ac); | 3762 | cache_flusharray(cachep, ac, this_cpu); |
3538 | ac->entry[ac->avail++] = objp; | 3763 | ac->entry[ac->avail++] = objp; |
3539 | } | 3764 | } |
3540 | } | 3765 | } |
@@ -3733,13 +3958,14 @@ EXPORT_SYMBOL(__kmalloc); | |||
3733 | void kmem_cache_free(struct kmem_cache *cachep, void *objp) | 3958 | void kmem_cache_free(struct kmem_cache *cachep, void *objp) |
3734 | { | 3959 | { |
3735 | unsigned long flags; | 3960 | unsigned long flags; |
3961 | int this_cpu; | ||
3736 | 3962 | ||
3737 | local_irq_save(flags); | 3963 | slab_irq_save(flags, this_cpu); |
3738 | debug_check_no_locks_freed(objp, obj_size(cachep)); | 3964 | debug_check_no_locks_freed(objp, obj_size(cachep)); |
3739 | if (!(cachep->flags & SLAB_DEBUG_OBJECTS)) | 3965 | if (!(cachep->flags & SLAB_DEBUG_OBJECTS)) |
3740 | debug_check_no_obj_freed(objp, obj_size(cachep)); | 3966 | debug_check_no_obj_freed(objp, obj_size(cachep)); |
3741 | __cache_free(cachep, objp); | 3967 | __cache_free(cachep, objp, &this_cpu); |
3742 | local_irq_restore(flags); | 3968 | slab_irq_restore(flags, this_cpu); |
3743 | 3969 | ||
3744 | trace_kmem_cache_free(_RET_IP_, objp); | 3970 | trace_kmem_cache_free(_RET_IP_, objp); |
3745 | } | 3971 | } |
@@ -3758,18 +3984,19 @@ void kfree(const void *objp) | |||
3758 | { | 3984 | { |
3759 | struct kmem_cache *c; | 3985 | struct kmem_cache *c; |
3760 | unsigned long flags; | 3986 | unsigned long flags; |
3987 | int this_cpu; | ||
3761 | 3988 | ||
3762 | trace_kfree(_RET_IP_, objp); | 3989 | trace_kfree(_RET_IP_, objp); |
3763 | 3990 | ||
3764 | if (unlikely(ZERO_OR_NULL_PTR(objp))) | 3991 | if (unlikely(ZERO_OR_NULL_PTR(objp))) |
3765 | return; | 3992 | return; |
3766 | local_irq_save(flags); | 3993 | slab_irq_save(flags, this_cpu); |
3767 | kfree_debugcheck(objp); | 3994 | kfree_debugcheck(objp); |
3768 | c = virt_to_cache(objp); | 3995 | c = virt_to_cache(objp); |
3769 | debug_check_no_locks_freed(objp, obj_size(c)); | 3996 | debug_check_no_locks_freed(objp, obj_size(c)); |
3770 | debug_check_no_obj_freed(objp, obj_size(c)); | 3997 | debug_check_no_obj_freed(objp, obj_size(c)); |
3771 | __cache_free(c, (void *)objp); | 3998 | __cache_free(c, (void *)objp, &this_cpu); |
3772 | local_irq_restore(flags); | 3999 | slab_irq_restore(flags, this_cpu); |
3773 | } | 4000 | } |
3774 | EXPORT_SYMBOL(kfree); | 4001 | EXPORT_SYMBOL(kfree); |
3775 | 4002 | ||
@@ -3790,7 +4017,7 @@ EXPORT_SYMBOL_GPL(kmem_cache_name); | |||
3790 | */ | 4017 | */ |
3791 | static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp) | 4018 | static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp) |
3792 | { | 4019 | { |
3793 | int node; | 4020 | int node, this_cpu; |
3794 | struct kmem_list3 *l3; | 4021 | struct kmem_list3 *l3; |
3795 | struct array_cache *new_shared; | 4022 | struct array_cache *new_shared; |
3796 | struct array_cache **new_alien = NULL; | 4023 | struct array_cache **new_alien = NULL; |
@@ -3818,11 +4045,11 @@ static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp) | |||
3818 | if (l3) { | 4045 | if (l3) { |
3819 | struct array_cache *shared = l3->shared; | 4046 | struct array_cache *shared = l3->shared; |
3820 | 4047 | ||
3821 | spin_lock_irq(&l3->list_lock); | 4048 | slab_spin_lock_irq(&l3->list_lock, this_cpu); |
3822 | 4049 | ||
3823 | if (shared) | 4050 | if (shared) |
3824 | free_block(cachep, shared->entry, | 4051 | free_block(cachep, shared->entry, |
3825 | shared->avail, node); | 4052 | shared->avail, node, &this_cpu); |
3826 | 4053 | ||
3827 | l3->shared = new_shared; | 4054 | l3->shared = new_shared; |
3828 | if (!l3->alien) { | 4055 | if (!l3->alien) { |
@@ -3831,7 +4058,7 @@ static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp) | |||
3831 | } | 4058 | } |
3832 | l3->free_limit = (1 + nr_cpus_node(node)) * | 4059 | l3->free_limit = (1 + nr_cpus_node(node)) * |
3833 | cachep->batchcount + cachep->num; | 4060 | cachep->batchcount + cachep->num; |
3834 | spin_unlock_irq(&l3->list_lock); | 4061 | slab_spin_unlock_irq(&l3->list_lock, this_cpu); |
3835 | kfree(shared); | 4062 | kfree(shared); |
3836 | free_alien_cache(new_alien); | 4063 | free_alien_cache(new_alien); |
3837 | continue; | 4064 | continue; |
@@ -3878,24 +4105,36 @@ struct ccupdate_struct { | |||
3878 | struct array_cache *new[NR_CPUS]; | 4105 | struct array_cache *new[NR_CPUS]; |
3879 | }; | 4106 | }; |
3880 | 4107 | ||
3881 | static void do_ccupdate_local(void *info) | 4108 | static void __do_ccupdate_local(void *info, int this_cpu) |
3882 | { | 4109 | { |
3883 | struct ccupdate_struct *new = info; | 4110 | struct ccupdate_struct *new = info; |
3884 | struct array_cache *old; | 4111 | struct array_cache *old; |
3885 | 4112 | ||
3886 | check_irq_off(); | 4113 | check_irq_off(); |
3887 | old = cpu_cache_get(new->cachep); | 4114 | old = cpu_cache_get(new->cachep, this_cpu); |
3888 | 4115 | ||
3889 | new->cachep->array[smp_processor_id()] = new->new[smp_processor_id()]; | 4116 | new->cachep->array[this_cpu] = new->new[this_cpu]; |
3890 | new->new[smp_processor_id()] = old; | 4117 | new->new[this_cpu] = old; |
3891 | } | 4118 | } |
3892 | 4119 | ||
4120 | #ifdef CONFIG_PREEMPT_RT | ||
4121 | static void do_ccupdate_local(void *arg, int this_cpu) | ||
4122 | { | ||
4123 | __do_ccupdate_local(arg, this_cpu); | ||
4124 | } | ||
4125 | #else | ||
4126 | static void do_ccupdate_local(void *arg) | ||
4127 | { | ||
4128 | __do_ccupdate_local(arg, smp_processor_id()); | ||
4129 | } | ||
4130 | #endif | ||
4131 | |||
3893 | /* Always called with the cache_chain_mutex held */ | 4132 | /* Always called with the cache_chain_mutex held */ |
3894 | static int do_tune_cpucache(struct kmem_cache *cachep, int limit, | 4133 | static int do_tune_cpucache(struct kmem_cache *cachep, int limit, |
3895 | int batchcount, int shared, gfp_t gfp) | 4134 | int batchcount, int shared, gfp_t gfp) |
3896 | { | 4135 | { |
3897 | struct ccupdate_struct *new; | 4136 | struct ccupdate_struct *new; |
3898 | int i; | 4137 | int i, this_cpu; |
3899 | 4138 | ||
3900 | new = kzalloc(sizeof(*new), gfp); | 4139 | new = kzalloc(sizeof(*new), gfp); |
3901 | if (!new) | 4140 | if (!new) |
@@ -3913,7 +4152,7 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit, | |||
3913 | } | 4152 | } |
3914 | new->cachep = cachep; | 4153 | new->cachep = cachep; |
3915 | 4154 | ||
3916 | on_each_cpu(do_ccupdate_local, (void *)new, 1); | 4155 | slab_on_each_cpu(do_ccupdate_local, (void *)new); |
3917 | 4156 | ||
3918 | check_irq_on(); | 4157 | check_irq_on(); |
3919 | cachep->batchcount = batchcount; | 4158 | cachep->batchcount = batchcount; |
@@ -3924,9 +4163,12 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit, | |||
3924 | struct array_cache *ccold = new->new[i]; | 4163 | struct array_cache *ccold = new->new[i]; |
3925 | if (!ccold) | 4164 | if (!ccold) |
3926 | continue; | 4165 | continue; |
3927 | spin_lock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock); | 4166 | slab_spin_lock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock, |
3928 | free_block(cachep, ccold->entry, ccold->avail, cpu_to_node(i)); | 4167 | this_cpu); |
3929 | spin_unlock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock); | 4168 | free_block(cachep, ccold->entry, ccold->avail, cpu_to_node(i), |
4169 | &this_cpu); | ||
4170 | slab_spin_unlock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock, | ||
4171 | this_cpu); | ||
3930 | kfree(ccold); | 4172 | kfree(ccold); |
3931 | } | 4173 | } |
3932 | kfree(new); | 4174 | kfree(new); |
@@ -3991,29 +4233,31 @@ static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp) | |||
3991 | * Drain an array if it contains any elements taking the l3 lock only if | 4233 | * Drain an array if it contains any elements taking the l3 lock only if |
3992 | * necessary. Note that the l3 listlock also protects the array_cache | 4234 | * necessary. Note that the l3 listlock also protects the array_cache |
3993 | * if drain_array() is used on the shared array. | 4235 | * if drain_array() is used on the shared array. |
4236 | * returns non-zero if some work is done | ||
3994 | */ | 4237 | */ |
3995 | void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3, | 4238 | int drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3, |
3996 | struct array_cache *ac, int force, int node) | 4239 | struct array_cache *ac, int force, int node) |
3997 | { | 4240 | { |
3998 | int tofree; | 4241 | int tofree, this_cpu; |
3999 | 4242 | ||
4000 | if (!ac || !ac->avail) | 4243 | if (!ac || !ac->avail) |
4001 | return; | 4244 | return 0; |
4002 | if (ac->touched && !force) { | 4245 | if (ac->touched && !force) { |
4003 | ac->touched = 0; | 4246 | ac->touched = 0; |
4004 | } else { | 4247 | } else { |
4005 | spin_lock_irq(&l3->list_lock); | 4248 | slab_spin_lock_irq(&l3->list_lock, this_cpu); |
4006 | if (ac->avail) { | 4249 | if (ac->avail) { |
4007 | tofree = force ? ac->avail : (ac->limit + 4) / 5; | 4250 | tofree = force ? ac->avail : (ac->limit + 4) / 5; |
4008 | if (tofree > ac->avail) | 4251 | if (tofree > ac->avail) |
4009 | tofree = (ac->avail + 1) / 2; | 4252 | tofree = (ac->avail + 1) / 2; |
4010 | free_block(cachep, ac->entry, tofree, node); | 4253 | free_block(cachep, ac->entry, tofree, node, &this_cpu); |
4011 | ac->avail -= tofree; | 4254 | ac->avail -= tofree; |
4012 | memmove(ac->entry, &(ac->entry[tofree]), | 4255 | memmove(ac->entry, &(ac->entry[tofree]), |
4013 | sizeof(void *) * ac->avail); | 4256 | sizeof(void *) * ac->avail); |
4014 | } | 4257 | } |
4015 | spin_unlock_irq(&l3->list_lock); | 4258 | slab_spin_unlock_irq(&l3->list_lock, this_cpu); |
4016 | } | 4259 | } |
4260 | return 1; | ||
4017 | } | 4261 | } |
4018 | 4262 | ||
4019 | /** | 4263 | /** |
@@ -4030,10 +4274,11 @@ void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3, | |||
4030 | */ | 4274 | */ |
4031 | static void cache_reap(struct work_struct *w) | 4275 | static void cache_reap(struct work_struct *w) |
4032 | { | 4276 | { |
4277 | int this_cpu = raw_smp_processor_id(), node = cpu_to_node(this_cpu); | ||
4033 | struct kmem_cache *searchp; | 4278 | struct kmem_cache *searchp; |
4034 | struct kmem_list3 *l3; | 4279 | struct kmem_list3 *l3; |
4035 | int node = numa_node_id(); | ||
4036 | struct delayed_work *work = to_delayed_work(w); | 4280 | struct delayed_work *work = to_delayed_work(w); |
4281 | int work_done = 0; | ||
4037 | 4282 | ||
4038 | if (!mutex_trylock(&cache_chain_mutex)) | 4283 | if (!mutex_trylock(&cache_chain_mutex)) |
4039 | /* Give up. Setup the next iteration. */ | 4284 | /* Give up. Setup the next iteration. */ |
@@ -4049,9 +4294,12 @@ static void cache_reap(struct work_struct *w) | |||
4049 | */ | 4294 | */ |
4050 | l3 = searchp->nodelists[node]; | 4295 | l3 = searchp->nodelists[node]; |
4051 | 4296 | ||
4052 | reap_alien(searchp, l3); | 4297 | work_done += reap_alien(searchp, l3, &this_cpu); |
4298 | |||
4299 | node = cpu_to_node(this_cpu); | ||
4053 | 4300 | ||
4054 | drain_array(searchp, l3, cpu_cache_get(searchp), 0, node); | 4301 | work_done += drain_array(searchp, l3, |
4302 | cpu_cache_get(searchp, this_cpu), 0, node); | ||
4055 | 4303 | ||
4056 | /* | 4304 | /* |
4057 | * These are racy checks but it does not matter | 4305 | * These are racy checks but it does not matter |
@@ -4062,7 +4310,7 @@ static void cache_reap(struct work_struct *w) | |||
4062 | 4310 | ||
4063 | l3->next_reap = jiffies + REAPTIMEOUT_LIST3; | 4311 | l3->next_reap = jiffies + REAPTIMEOUT_LIST3; |
4064 | 4312 | ||
4065 | drain_array(searchp, l3, l3->shared, 0, node); | 4313 | work_done += drain_array(searchp, l3, l3->shared, 0, node); |
4066 | 4314 | ||
4067 | if (l3->free_touched) | 4315 | if (l3->free_touched) |
4068 | l3->free_touched = 0; | 4316 | l3->free_touched = 0; |
@@ -4081,7 +4329,8 @@ next: | |||
4081 | next_reap_node(); | 4329 | next_reap_node(); |
4082 | out: | 4330 | out: |
4083 | /* Set up the next iteration */ | 4331 | /* Set up the next iteration */ |
4084 | schedule_delayed_work(work, round_jiffies_relative(REAPTIMEOUT_CPUC)); | 4332 | schedule_delayed_work(work, |
4333 | round_jiffies_relative((1+!work_done) * REAPTIMEOUT_CPUC)); | ||
4085 | } | 4334 | } |
4086 | 4335 | ||
4087 | #ifdef CONFIG_SLABINFO | 4336 | #ifdef CONFIG_SLABINFO |
@@ -4140,7 +4389,7 @@ static int s_show(struct seq_file *m, void *p) | |||
4140 | unsigned long num_slabs, free_objects = 0, shared_avail = 0; | 4389 | unsigned long num_slabs, free_objects = 0, shared_avail = 0; |
4141 | const char *name; | 4390 | const char *name; |
4142 | char *error = NULL; | 4391 | char *error = NULL; |
4143 | int node; | 4392 | int this_cpu, node; |
4144 | struct kmem_list3 *l3; | 4393 | struct kmem_list3 *l3; |
4145 | 4394 | ||
4146 | active_objs = 0; | 4395 | active_objs = 0; |
@@ -4151,7 +4400,7 @@ static int s_show(struct seq_file *m, void *p) | |||
4151 | continue; | 4400 | continue; |
4152 | 4401 | ||
4153 | check_irq_on(); | 4402 | check_irq_on(); |
4154 | spin_lock_irq(&l3->list_lock); | 4403 | slab_spin_lock_irq(&l3->list_lock, this_cpu); |
4155 | 4404 | ||
4156 | list_for_each_entry(slabp, &l3->slabs_full, list) { | 4405 | list_for_each_entry(slabp, &l3->slabs_full, list) { |
4157 | if (slabp->inuse != cachep->num && !error) | 4406 | if (slabp->inuse != cachep->num && !error) |
@@ -4176,7 +4425,7 @@ static int s_show(struct seq_file *m, void *p) | |||
4176 | if (l3->shared) | 4425 | if (l3->shared) |
4177 | shared_avail += l3->shared->avail; | 4426 | shared_avail += l3->shared->avail; |
4178 | 4427 | ||
4179 | spin_unlock_irq(&l3->list_lock); | 4428 | slab_spin_unlock_irq(&l3->list_lock, this_cpu); |
4180 | } | 4429 | } |
4181 | num_slabs += active_slabs; | 4430 | num_slabs += active_slabs; |
4182 | num_objs = num_slabs * cachep->num; | 4431 | num_objs = num_slabs * cachep->num; |
@@ -4386,7 +4635,7 @@ static int leaks_show(struct seq_file *m, void *p) | |||
4386 | struct kmem_list3 *l3; | 4635 | struct kmem_list3 *l3; |
4387 | const char *name; | 4636 | const char *name; |
4388 | unsigned long *n = m->private; | 4637 | unsigned long *n = m->private; |
4389 | int node; | 4638 | int node, this_cpu; |
4390 | int i; | 4639 | int i; |
4391 | 4640 | ||
4392 | if (!(cachep->flags & SLAB_STORE_USER)) | 4641 | if (!(cachep->flags & SLAB_STORE_USER)) |
@@ -4404,13 +4653,13 @@ static int leaks_show(struct seq_file *m, void *p) | |||
4404 | continue; | 4653 | continue; |
4405 | 4654 | ||
4406 | check_irq_on(); | 4655 | check_irq_on(); |
4407 | spin_lock_irq(&l3->list_lock); | 4656 | slab_spin_lock_irq(&l3->list_lock, this_cpu); |
4408 | 4657 | ||
4409 | list_for_each_entry(slabp, &l3->slabs_full, list) | 4658 | list_for_each_entry(slabp, &l3->slabs_full, list) |
4410 | handle_slab(n, cachep, slabp); | 4659 | handle_slab(n, cachep, slabp); |
4411 | list_for_each_entry(slabp, &l3->slabs_partial, list) | 4660 | list_for_each_entry(slabp, &l3->slabs_partial, list) |
4412 | handle_slab(n, cachep, slabp); | 4661 | handle_slab(n, cachep, slabp); |
4413 | spin_unlock_irq(&l3->list_lock); | 4662 | slab_spin_unlock_irq(&l3->list_lock, this_cpu); |
4414 | } | 4663 | } |
4415 | name = cachep->name; | 4664 | name = cachep->name; |
4416 | if (n[0] == n[1]) { | 4665 | if (n[0] == n[1]) { |
@@ -30,15 +30,92 @@ | |||
30 | #include <linux/notifier.h> | 30 | #include <linux/notifier.h> |
31 | #include <linux/backing-dev.h> | 31 | #include <linux/backing-dev.h> |
32 | #include <linux/memcontrol.h> | 32 | #include <linux/memcontrol.h> |
33 | #include <linux/interrupt.h> | ||
33 | 34 | ||
34 | #include "internal.h" | 35 | #include "internal.h" |
35 | 36 | ||
36 | /* How many pages do we try to swap or page in/out together? */ | 37 | /* How many pages do we try to swap or page in/out together? */ |
37 | int page_cluster; | 38 | int page_cluster; |
38 | 39 | ||
40 | #ifdef CONFIG_PREEMPT_RT | ||
41 | /* | ||
42 | * On PREEMPT_RT we don't want to disable preemption for cpu variables. | ||
43 | * We grab a cpu and then use that cpu to lock the variables accordingly. | ||
44 | * | ||
45 | * (On !PREEMPT_RT this turns into normal preempt-off sections, as before.) | ||
46 | */ | ||
47 | static DEFINE_PER_CPU_LOCKED(struct pagevec[NR_LRU_LISTS], lru_add_pvecs); | ||
48 | static DEFINE_PER_CPU_LOCKED(struct pagevec, lru_rotate_pvecs); | ||
49 | |||
50 | #define swap_get_cpu_var_irq_save(var, flags, cpu) \ | ||
51 | ({ \ | ||
52 | (void)flags; \ | ||
53 | &get_cpu_var_locked(var, &cpu); \ | ||
54 | }) | ||
55 | |||
56 | #define swap_put_cpu_var_irq_restore(var, flags, cpu) \ | ||
57 | put_cpu_var_locked(var, cpu) | ||
58 | |||
59 | #define swap_get_cpu_var(var, cpu) \ | ||
60 | &get_cpu_var_locked(var, &cpu) | ||
61 | |||
62 | #define swap_put_cpu_var(var, cpu) \ | ||
63 | put_cpu_var_locked(var, cpu) | ||
64 | |||
65 | #define swap_per_cpu_lock(var, cpu) \ | ||
66 | ({ \ | ||
67 | spin_lock(&__get_cpu_lock(var, cpu)); \ | ||
68 | &__get_cpu_var_locked(var, cpu); \ | ||
69 | }) | ||
70 | |||
71 | #define swap_per_cpu_unlock(var, cpu) \ | ||
72 | spin_unlock(&__get_cpu_lock(var, cpu)); | ||
73 | |||
74 | #define swap_get_cpu() raw_smp_processor_id() | ||
75 | |||
76 | #define swap_put_cpu() do { } while (0) | ||
77 | |||
78 | #define swap_irq_save(flags) do { (void)flags; } while (0) | ||
79 | |||
80 | #define swap_irq_restore(flags) do { (void)flags; } while (0) | ||
81 | |||
82 | #else | ||
83 | |||
39 | static DEFINE_PER_CPU(struct pagevec[NR_LRU_LISTS], lru_add_pvecs); | 84 | static DEFINE_PER_CPU(struct pagevec[NR_LRU_LISTS], lru_add_pvecs); |
40 | static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs); | 85 | static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs); |
41 | 86 | ||
87 | #define swap_get_cpu_var_irq_save(var, flags, cpu) \ | ||
88 | ({ \ | ||
89 | (void)cpu; \ | ||
90 | local_irq_save(flags); \ | ||
91 | &__get_cpu_var(var); \ | ||
92 | }) | ||
93 | |||
94 | #define swap_put_cpu_var_irq_restore(var, flags, cpu) \ | ||
95 | local_irq_restore(flags) | ||
96 | |||
97 | #define swap_get_cpu_var(var, cpu) \ | ||
98 | ({ \ | ||
99 | (void)cpu; \ | ||
100 | &get_cpu_var(var); \ | ||
101 | }) | ||
102 | |||
103 | #define swap_put_cpu_var(var, cpu) put_cpu_var(var) | ||
104 | |||
105 | #define swap_per_cpu_lock(var, cpu) &per_cpu(var, cpu) | ||
106 | |||
107 | #define swap_per_cpu_unlock(var, cpu) do { } while (0) | ||
108 | |||
109 | #define swap_get_cpu() get_cpu() | ||
110 | |||
111 | #define swap_put_cpu() put_cpu() | ||
112 | |||
113 | #define swap_irq_save(flags) local_irq_save(flags) | ||
114 | |||
115 | #define swap_irq_restore(flags) local_irq_restore(flags) | ||
116 | |||
117 | #endif | ||
118 | |||
42 | /* | 119 | /* |
43 | * This path almost never happens for VM activity - pages are normally | 120 | * This path almost never happens for VM activity - pages are normally |
44 | * freed via pagevecs. But it gets used by networking. | 121 | * freed via pagevecs. But it gets used by networking. |
@@ -141,13 +218,13 @@ void rotate_reclaimable_page(struct page *page) | |||
141 | !PageUnevictable(page) && PageLRU(page)) { | 218 | !PageUnevictable(page) && PageLRU(page)) { |
142 | struct pagevec *pvec; | 219 | struct pagevec *pvec; |
143 | unsigned long flags; | 220 | unsigned long flags; |
221 | int cpu; | ||
144 | 222 | ||
145 | page_cache_get(page); | 223 | page_cache_get(page); |
146 | local_irq_save(flags); | 224 | pvec = swap_get_cpu_var_irq_save(lru_rotate_pvecs, flags, cpu); |
147 | pvec = &__get_cpu_var(lru_rotate_pvecs); | ||
148 | if (!pagevec_add(pvec, page)) | 225 | if (!pagevec_add(pvec, page)) |
149 | pagevec_move_tail(pvec); | 226 | pagevec_move_tail(pvec); |
150 | local_irq_restore(flags); | 227 | swap_put_cpu_var_irq_restore(lru_rotate_pvecs, flags, cpu); |
151 | } | 228 | } |
152 | } | 229 | } |
153 | 230 | ||
@@ -216,12 +293,14 @@ EXPORT_SYMBOL(mark_page_accessed); | |||
216 | 293 | ||
217 | void __lru_cache_add(struct page *page, enum lru_list lru) | 294 | void __lru_cache_add(struct page *page, enum lru_list lru) |
218 | { | 295 | { |
219 | struct pagevec *pvec = &get_cpu_var(lru_add_pvecs)[lru]; | 296 | struct pagevec *pvec; |
297 | int cpu; | ||
220 | 298 | ||
299 | pvec = swap_get_cpu_var(lru_add_pvecs, cpu)[lru]; | ||
221 | page_cache_get(page); | 300 | page_cache_get(page); |
222 | if (!pagevec_add(pvec, page)) | 301 | if (!pagevec_add(pvec, page)) |
223 | ____pagevec_lru_add(pvec, lru); | 302 | ____pagevec_lru_add(pvec, lru); |
224 | put_cpu_var(lru_add_pvecs); | 303 | swap_put_cpu_var(lru_add_pvecs, cpu); |
225 | } | 304 | } |
226 | 305 | ||
227 | /** | 306 | /** |
@@ -271,31 +350,33 @@ void add_page_to_unevictable_list(struct page *page) | |||
271 | */ | 350 | */ |
272 | static void drain_cpu_pagevecs(int cpu) | 351 | static void drain_cpu_pagevecs(int cpu) |
273 | { | 352 | { |
274 | struct pagevec *pvecs = per_cpu(lru_add_pvecs, cpu); | 353 | struct pagevec *pvecs, *pvec; |
275 | struct pagevec *pvec; | ||
276 | int lru; | 354 | int lru; |
277 | 355 | ||
356 | pvecs = swap_per_cpu_lock(lru_add_pvecs, cpu)[0]; | ||
278 | for_each_lru(lru) { | 357 | for_each_lru(lru) { |
279 | pvec = &pvecs[lru - LRU_BASE]; | 358 | pvec = &pvecs[lru - LRU_BASE]; |
280 | if (pagevec_count(pvec)) | 359 | if (pagevec_count(pvec)) |
281 | ____pagevec_lru_add(pvec, lru); | 360 | ____pagevec_lru_add(pvec, lru); |
282 | } | 361 | } |
362 | swap_per_cpu_unlock(lru_add_pvecs, cpu); | ||
283 | 363 | ||
284 | pvec = &per_cpu(lru_rotate_pvecs, cpu); | 364 | pvec = swap_per_cpu_lock(lru_rotate_pvecs, cpu); |
285 | if (pagevec_count(pvec)) { | 365 | if (pagevec_count(pvec)) { |
286 | unsigned long flags; | 366 | unsigned long flags; |
287 | 367 | ||
288 | /* No harm done if a racing interrupt already did this */ | 368 | /* No harm done if a racing interrupt already did this */ |
289 | local_irq_save(flags); | 369 | swap_irq_save(flags); |
290 | pagevec_move_tail(pvec); | 370 | pagevec_move_tail(pvec); |
291 | local_irq_restore(flags); | 371 | swap_irq_restore(flags); |
292 | } | 372 | } |
373 | swap_per_cpu_unlock(lru_rotate_pvecs, cpu); | ||
293 | } | 374 | } |
294 | 375 | ||
295 | void lru_add_drain(void) | 376 | void lru_add_drain(void) |
296 | { | 377 | { |
297 | drain_cpu_pagevecs(get_cpu()); | 378 | drain_cpu_pagevecs(swap_get_cpu()); |
298 | put_cpu(); | 379 | swap_put_cpu(); |
299 | } | 380 | } |
300 | 381 | ||
301 | static void lru_add_drain_per_cpu(struct work_struct *dummy) | 382 | static void lru_add_drain_per_cpu(struct work_struct *dummy) |
@@ -369,7 +450,7 @@ void release_pages(struct page **pages, int nr, int cold) | |||
369 | } | 450 | } |
370 | __pagevec_free(&pages_to_free); | 451 | __pagevec_free(&pages_to_free); |
371 | pagevec_reinit(&pages_to_free); | 452 | pagevec_reinit(&pages_to_free); |
372 | } | 453 | } |
373 | } | 454 | } |
374 | if (zone) | 455 | if (zone) |
375 | spin_unlock_irqrestore(&zone->lru_lock, flags); | 456 | spin_unlock_irqrestore(&zone->lru_lock, flags); |
diff --git a/mm/vmscan.c b/mm/vmscan.c index dea7abd31098..6911d54ff9c8 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -23,6 +23,7 @@ | |||
23 | #include <linux/file.h> | 23 | #include <linux/file.h> |
24 | #include <linux/writeback.h> | 24 | #include <linux/writeback.h> |
25 | #include <linux/blkdev.h> | 25 | #include <linux/blkdev.h> |
26 | #include <linux/interrupt.h> | ||
26 | #include <linux/buffer_head.h> /* for try_to_release_page(), | 27 | #include <linux/buffer_head.h> /* for try_to_release_page(), |
27 | buffer_heads_over_limit */ | 28 | buffer_heads_over_limit */ |
28 | #include <linux/mm_inline.h> | 29 | #include <linux/mm_inline.h> |
@@ -1118,7 +1119,7 @@ static unsigned long shrink_inactive_list(unsigned long max_scan, | |||
1118 | } | 1119 | } |
1119 | 1120 | ||
1120 | nr_reclaimed += nr_freed; | 1121 | nr_reclaimed += nr_freed; |
1121 | local_irq_disable(); | 1122 | local_irq_disable_nort(); |
1122 | if (current_is_kswapd()) { | 1123 | if (current_is_kswapd()) { |
1123 | __count_zone_vm_events(PGSCAN_KSWAPD, zone, nr_scan); | 1124 | __count_zone_vm_events(PGSCAN_KSWAPD, zone, nr_scan); |
1124 | __count_vm_events(KSWAPD_STEAL, nr_freed); | 1125 | __count_vm_events(KSWAPD_STEAL, nr_freed); |
@@ -1159,9 +1160,14 @@ static unsigned long shrink_inactive_list(unsigned long max_scan, | |||
1159 | } | 1160 | } |
1160 | } | 1161 | } |
1161 | } while (nr_scanned < max_scan); | 1162 | } while (nr_scanned < max_scan); |
1163 | /* | ||
1164 | * Non-PREEMPT_RT relies on IRQs-off protecting the page_states | ||
1165 | * per-CPU data. PREEMPT_RT has that data protected even in | ||
1166 | * __mod_page_state(), so no need to keep IRQs disabled. | ||
1167 | */ | ||
1162 | spin_unlock(&zone->lru_lock); | 1168 | spin_unlock(&zone->lru_lock); |
1163 | done: | 1169 | done: |
1164 | local_irq_enable(); | 1170 | local_irq_enable_nort(); |
1165 | pagevec_release(&pvec); | 1171 | pagevec_release(&pvec); |
1166 | return nr_reclaimed; | 1172 | return nr_reclaimed; |
1167 | } | 1173 | } |
diff --git a/mm/vmstat.c b/mm/vmstat.c index 138bed53706e..9f7c001f1820 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c | |||
@@ -149,17 +149,16 @@ static void refresh_zone_stat_thresholds(void) | |||
149 | void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item, | 149 | void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item, |
150 | int delta) | 150 | int delta) |
151 | { | 151 | { |
152 | struct per_cpu_pageset *pcp = zone_pcp(zone, smp_processor_id()); | 152 | struct per_cpu_pageset *pcp = zone_pcp(zone, get_cpu()); |
153 | s8 *p = pcp->vm_stat_diff + item; | 153 | s8 *p = pcp->vm_stat_diff + item; |
154 | long x; | 154 | long x = delta + *p; |
155 | |||
156 | x = delta + *p; | ||
157 | 155 | ||
158 | if (unlikely(x > pcp->stat_threshold || x < -pcp->stat_threshold)) { | 156 | if (unlikely(x > pcp->stat_threshold || x < -pcp->stat_threshold)) { |
159 | zone_page_state_add(x, zone, item); | 157 | zone_page_state_add(x, zone, item); |
160 | x = 0; | 158 | x = 0; |
161 | } | 159 | } |
162 | *p = x; | 160 | *p = x; |
161 | put_cpu(); | ||
163 | } | 162 | } |
164 | EXPORT_SYMBOL(__mod_zone_page_state); | 163 | EXPORT_SYMBOL(__mod_zone_page_state); |
165 | 164 | ||
@@ -202,7 +201,7 @@ EXPORT_SYMBOL(mod_zone_page_state); | |||
202 | */ | 201 | */ |
203 | void __inc_zone_state(struct zone *zone, enum zone_stat_item item) | 202 | void __inc_zone_state(struct zone *zone, enum zone_stat_item item) |
204 | { | 203 | { |
205 | struct per_cpu_pageset *pcp = zone_pcp(zone, smp_processor_id()); | 204 | struct per_cpu_pageset *pcp = zone_pcp(zone, get_cpu()); |
206 | s8 *p = pcp->vm_stat_diff + item; | 205 | s8 *p = pcp->vm_stat_diff + item; |
207 | 206 | ||
208 | (*p)++; | 207 | (*p)++; |
@@ -213,17 +212,28 @@ void __inc_zone_state(struct zone *zone, enum zone_stat_item item) | |||
213 | zone_page_state_add(*p + overstep, zone, item); | 212 | zone_page_state_add(*p + overstep, zone, item); |
214 | *p = -overstep; | 213 | *p = -overstep; |
215 | } | 214 | } |
215 | put_cpu(); | ||
216 | } | 216 | } |
217 | 217 | ||
218 | void __inc_zone_page_state(struct page *page, enum zone_stat_item item) | 218 | void __inc_zone_page_state(struct page *page, enum zone_stat_item item) |
219 | { | 219 | { |
220 | #ifdef CONFIG_PREEMPT_RT | ||
221 | unsigned long flags; | ||
222 | struct zone *zone; | ||
223 | |||
224 | zone = page_zone(page); | ||
225 | local_irq_save(flags); | ||
226 | __inc_zone_state(zone, item); | ||
227 | local_irq_restore(flags); | ||
228 | #else | ||
220 | __inc_zone_state(page_zone(page), item); | 229 | __inc_zone_state(page_zone(page), item); |
230 | #endif | ||
221 | } | 231 | } |
222 | EXPORT_SYMBOL(__inc_zone_page_state); | 232 | EXPORT_SYMBOL(__inc_zone_page_state); |
223 | 233 | ||
224 | void __dec_zone_state(struct zone *zone, enum zone_stat_item item) | 234 | void __dec_zone_state(struct zone *zone, enum zone_stat_item item) |
225 | { | 235 | { |
226 | struct per_cpu_pageset *pcp = zone_pcp(zone, smp_processor_id()); | 236 | struct per_cpu_pageset *pcp = zone_pcp(zone, get_cpu()); |
227 | s8 *p = pcp->vm_stat_diff + item; | 237 | s8 *p = pcp->vm_stat_diff + item; |
228 | 238 | ||
229 | (*p)--; | 239 | (*p)--; |
@@ -234,6 +244,7 @@ void __dec_zone_state(struct zone *zone, enum zone_stat_item item) | |||
234 | zone_page_state_add(*p - overstep, zone, item); | 244 | zone_page_state_add(*p - overstep, zone, item); |
235 | *p = overstep; | 245 | *p = overstep; |
236 | } | 246 | } |
247 | put_cpu(); | ||
237 | } | 248 | } |
238 | 249 | ||
239 | void __dec_zone_page_state(struct page *page, enum zone_stat_item item) | 250 | void __dec_zone_page_state(struct page *page, enum zone_stat_item item) |