aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/backing-dev.c8
-rw-r--r--mm/huge_memory.c16
-rw-r--r--mm/hugetlb.c3
-rw-r--r--mm/migrate.c2
-rw-r--r--mm/nommu.c2
-rw-r--r--mm/oom_kill.c5
-rw-r--r--mm/page-writeback.c23
-rw-r--r--mm/page_alloc.c10
-rw-r--r--mm/percpu-vm.c17
-rw-r--r--mm/percpu.c62
-rw-r--r--mm/slab.c5
-rw-r--r--mm/slub.c4
-rw-r--r--mm/vmalloc.c29
-rw-r--r--mm/vmscan.c26
14 files changed, 120 insertions, 92 deletions
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index a0860640378d..71034f41a2ba 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -724,6 +724,14 @@ void bdi_destroy(struct backing_dev_info *bdi)
724 724
725 bdi_unregister(bdi); 725 bdi_unregister(bdi);
726 726
727 /*
728 * If bdi_unregister() had already been called earlier, the
729 * wakeup_timer could still be armed because bdi_prune_sb()
730 * can race with the bdi_wakeup_thread_delayed() calls from
731 * __mark_inode_dirty().
732 */
733 del_timer_sync(&bdi->wb.wakeup_timer);
734
727 for (i = 0; i < NR_BDI_STAT_ITEMS; i++) 735 for (i = 0; i < NR_BDI_STAT_ITEMS; i++)
728 percpu_counter_destroy(&bdi->bdi_stat[i]); 736 percpu_counter_destroy(&bdi->bdi_stat[i]);
729 737
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 4298abaae153..36b3d988b4ef 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2259,12 +2259,8 @@ static void khugepaged_do_scan(struct page **hpage)
2259 2259
2260static void khugepaged_alloc_sleep(void) 2260static void khugepaged_alloc_sleep(void)
2261{ 2261{
2262 DEFINE_WAIT(wait); 2262 wait_event_freezable_timeout(khugepaged_wait, false,
2263 add_wait_queue(&khugepaged_wait, &wait); 2263 msecs_to_jiffies(khugepaged_alloc_sleep_millisecs));
2264 schedule_timeout_interruptible(
2265 msecs_to_jiffies(
2266 khugepaged_alloc_sleep_millisecs));
2267 remove_wait_queue(&khugepaged_wait, &wait);
2268} 2264}
2269 2265
2270#ifndef CONFIG_NUMA 2266#ifndef CONFIG_NUMA
@@ -2313,14 +2309,10 @@ static void khugepaged_loop(void)
2313 if (unlikely(kthread_should_stop())) 2309 if (unlikely(kthread_should_stop()))
2314 break; 2310 break;
2315 if (khugepaged_has_work()) { 2311 if (khugepaged_has_work()) {
2316 DEFINE_WAIT(wait);
2317 if (!khugepaged_scan_sleep_millisecs) 2312 if (!khugepaged_scan_sleep_millisecs)
2318 continue; 2313 continue;
2319 add_wait_queue(&khugepaged_wait, &wait); 2314 wait_event_freezable_timeout(khugepaged_wait, false,
2320 schedule_timeout_interruptible( 2315 msecs_to_jiffies(khugepaged_scan_sleep_millisecs));
2321 msecs_to_jiffies(
2322 khugepaged_scan_sleep_millisecs));
2323 remove_wait_queue(&khugepaged_wait, &wait);
2324 } else if (khugepaged_enabled()) 2316 } else if (khugepaged_enabled())
2325 wait_event_freezable(khugepaged_wait, 2317 wait_event_freezable(khugepaged_wait,
2326 khugepaged_wait_event()); 2318 khugepaged_wait_event());
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index dae27ba3be2c..73f17c0293c0 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -576,6 +576,7 @@ static void prep_compound_gigantic_page(struct page *page, unsigned long order)
576 __SetPageHead(page); 576 __SetPageHead(page);
577 for (i = 1; i < nr_pages; i++, p = mem_map_next(p, page, i)) { 577 for (i = 1; i < nr_pages; i++, p = mem_map_next(p, page, i)) {
578 __SetPageTail(p); 578 __SetPageTail(p);
579 set_page_count(p, 0);
579 p->first_page = page; 580 p->first_page = page;
580 } 581 }
581} 582}
@@ -2422,6 +2423,8 @@ retry_avoidcopy:
2422 * anon_vma prepared. 2423 * anon_vma prepared.
2423 */ 2424 */
2424 if (unlikely(anon_vma_prepare(vma))) { 2425 if (unlikely(anon_vma_prepare(vma))) {
2426 page_cache_release(new_page);
2427 page_cache_release(old_page);
2425 /* Caller expects lock to be held */ 2428 /* Caller expects lock to be held */
2426 spin_lock(&mm->page_table_lock); 2429 spin_lock(&mm->page_table_lock);
2427 return VM_FAULT_OOM; 2430 return VM_FAULT_OOM;
diff --git a/mm/migrate.c b/mm/migrate.c
index 578e29174fa6..177aca424a06 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -871,9 +871,9 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
871 871
872 if (anon_vma) 872 if (anon_vma)
873 put_anon_vma(anon_vma); 873 put_anon_vma(anon_vma);
874out:
875 unlock_page(hpage); 874 unlock_page(hpage);
876 875
876out:
877 if (rc != -EAGAIN) { 877 if (rc != -EAGAIN) {
878 list_del(&hpage->lru); 878 list_del(&hpage->lru);
879 put_page(hpage); 879 put_page(hpage);
diff --git a/mm/nommu.c b/mm/nommu.c
index 73419c55eda6..b982290fd962 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -454,7 +454,7 @@ void __attribute__((weak)) vmalloc_sync_all(void)
454 * between processes, it syncs the pagetable across all 454 * between processes, it syncs the pagetable across all
455 * processes. 455 * processes.
456 */ 456 */
457struct vm_struct *alloc_vm_area(size_t size) 457struct vm_struct *alloc_vm_area(size_t size, pte_t **ptes)
458{ 458{
459 BUG(); 459 BUG();
460 return NULL; 460 return NULL;
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 471dedb463ab..76f2c5ae908e 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -185,6 +185,11 @@ unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *mem,
185 if (!p) 185 if (!p)
186 return 0; 186 return 0;
187 187
188 if (p->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) {
189 task_unlock(p);
190 return 0;
191 }
192
188 /* 193 /*
189 * The memory controller may have a limit of 0 bytes, so avoid a divide 194 * The memory controller may have a limit of 0 bytes, so avoid a divide
190 * by zero, if necessary. 195 * by zero, if necessary.
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index a3278f005230..71252486bc6f 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -128,7 +128,6 @@ unsigned long global_dirty_limit;
128 * 128 *
129 */ 129 */
130static struct prop_descriptor vm_completions; 130static struct prop_descriptor vm_completions;
131static struct prop_descriptor vm_dirties;
132 131
133/* 132/*
134 * couple the period to the dirty_ratio: 133 * couple the period to the dirty_ratio:
@@ -154,7 +153,6 @@ static void update_completion_period(void)
154{ 153{
155 int shift = calc_period_shift(); 154 int shift = calc_period_shift();
156 prop_change_shift(&vm_completions, shift); 155 prop_change_shift(&vm_completions, shift);
157 prop_change_shift(&vm_dirties, shift);
158 156
159 writeback_set_ratelimit(); 157 writeback_set_ratelimit();
160} 158}
@@ -235,11 +233,6 @@ void bdi_writeout_inc(struct backing_dev_info *bdi)
235} 233}
236EXPORT_SYMBOL_GPL(bdi_writeout_inc); 234EXPORT_SYMBOL_GPL(bdi_writeout_inc);
237 235
238void task_dirty_inc(struct task_struct *tsk)
239{
240 prop_inc_single(&vm_dirties, &tsk->dirties);
241}
242
243/* 236/*
244 * Obtain an accurate fraction of the BDI's portion. 237 * Obtain an accurate fraction of the BDI's portion.
245 */ 238 */
@@ -1133,17 +1126,17 @@ pause:
1133 pages_dirtied, 1126 pages_dirtied,
1134 pause, 1127 pause,
1135 start_time); 1128 start_time);
1136 __set_current_state(TASK_UNINTERRUPTIBLE); 1129 __set_current_state(TASK_KILLABLE);
1137 io_schedule_timeout(pause); 1130 io_schedule_timeout(pause);
1138 1131
1139 dirty_thresh = hard_dirty_limit(dirty_thresh);
1140 /* 1132 /*
1141 * max-pause area. If dirty exceeded but still within this 1133 * This is typically equal to (nr_dirty < dirty_thresh) and can
1142 * area, no need to sleep for more than 200ms: (a) 8 pages per 1134 * also keep "1000+ dd on a slow USB stick" under control.
1143 * 200ms is typically more than enough to curb heavy dirtiers;
1144 * (b) the pause time limit makes the dirtiers more responsive.
1145 */ 1135 */
1146 if (nr_dirty < dirty_thresh) 1136 if (task_ratelimit)
1137 break;
1138
1139 if (fatal_signal_pending(current))
1147 break; 1140 break;
1148 } 1141 }
1149 1142
@@ -1395,7 +1388,6 @@ void __init page_writeback_init(void)
1395 1388
1396 shift = calc_period_shift(); 1389 shift = calc_period_shift();
1397 prop_descriptor_init(&vm_completions, shift); 1390 prop_descriptor_init(&vm_completions, shift);
1398 prop_descriptor_init(&vm_dirties, shift);
1399} 1391}
1400 1392
1401/** 1393/**
@@ -1724,7 +1716,6 @@ void account_page_dirtied(struct page *page, struct address_space *mapping)
1724 __inc_zone_page_state(page, NR_DIRTIED); 1716 __inc_zone_page_state(page, NR_DIRTIED);
1725 __inc_bdi_stat(mapping->backing_dev_info, BDI_RECLAIMABLE); 1717 __inc_bdi_stat(mapping->backing_dev_info, BDI_RECLAIMABLE);
1726 __inc_bdi_stat(mapping->backing_dev_info, BDI_DIRTIED); 1718 __inc_bdi_stat(mapping->backing_dev_info, BDI_DIRTIED);
1727 task_dirty_inc(current);
1728 task_io_account_write(PAGE_CACHE_SIZE); 1719 task_io_account_write(PAGE_CACHE_SIZE);
1729 } 1720 }
1730} 1721}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 9dd443d89d8b..2b8ba3aebf6e 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -356,8 +356,8 @@ void prep_compound_page(struct page *page, unsigned long order)
356 __SetPageHead(page); 356 __SetPageHead(page);
357 for (i = 1; i < nr_pages; i++) { 357 for (i = 1; i < nr_pages; i++) {
358 struct page *p = page + i; 358 struct page *p = page + i;
359
360 __SetPageTail(p); 359 __SetPageTail(p);
360 set_page_count(p, 0);
361 p->first_page = page; 361 p->first_page = page;
362 } 362 }
363} 363}
@@ -3377,9 +3377,15 @@ static void setup_zone_migrate_reserve(struct zone *zone)
3377 unsigned long block_migratetype; 3377 unsigned long block_migratetype;
3378 int reserve; 3378 int reserve;
3379 3379
3380 /* Get the start pfn, end pfn and the number of blocks to reserve */ 3380 /*
3381 * Get the start pfn, end pfn and the number of blocks to reserve
3382 * We have to be careful to be aligned to pageblock_nr_pages to
3383 * make sure that we always check pfn_valid for the first page in
3384 * the block.
3385 */
3381 start_pfn = zone->zone_start_pfn; 3386 start_pfn = zone->zone_start_pfn;
3382 end_pfn = start_pfn + zone->spanned_pages; 3387 end_pfn = start_pfn + zone->spanned_pages;
3388 start_pfn = roundup(start_pfn, pageblock_nr_pages);
3383 reserve = roundup(min_wmark_pages(zone), pageblock_nr_pages) >> 3389 reserve = roundup(min_wmark_pages(zone), pageblock_nr_pages) >>
3384 pageblock_order; 3390 pageblock_order;
3385 3391
diff --git a/mm/percpu-vm.c b/mm/percpu-vm.c
index ea534960a04b..12a48a88c0d8 100644
--- a/mm/percpu-vm.c
+++ b/mm/percpu-vm.c
@@ -50,14 +50,13 @@ static struct page **pcpu_get_pages_and_bitmap(struct pcpu_chunk *chunk,
50 50
51 if (!pages || !bitmap) { 51 if (!pages || !bitmap) {
52 if (may_alloc && !pages) 52 if (may_alloc && !pages)
53 pages = pcpu_mem_alloc(pages_size); 53 pages = pcpu_mem_zalloc(pages_size);
54 if (may_alloc && !bitmap) 54 if (may_alloc && !bitmap)
55 bitmap = pcpu_mem_alloc(bitmap_size); 55 bitmap = pcpu_mem_zalloc(bitmap_size);
56 if (!pages || !bitmap) 56 if (!pages || !bitmap)
57 return NULL; 57 return NULL;
58 } 58 }
59 59
60 memset(pages, 0, pages_size);
61 bitmap_copy(bitmap, chunk->populated, pcpu_unit_pages); 60 bitmap_copy(bitmap, chunk->populated, pcpu_unit_pages);
62 61
63 *bitmapp = bitmap; 62 *bitmapp = bitmap;
@@ -143,8 +142,8 @@ static void pcpu_pre_unmap_flush(struct pcpu_chunk *chunk,
143 int page_start, int page_end) 142 int page_start, int page_end)
144{ 143{
145 flush_cache_vunmap( 144 flush_cache_vunmap(
146 pcpu_chunk_addr(chunk, pcpu_first_unit_cpu, page_start), 145 pcpu_chunk_addr(chunk, pcpu_low_unit_cpu, page_start),
147 pcpu_chunk_addr(chunk, pcpu_last_unit_cpu, page_end)); 146 pcpu_chunk_addr(chunk, pcpu_high_unit_cpu, page_end));
148} 147}
149 148
150static void __pcpu_unmap_pages(unsigned long addr, int nr_pages) 149static void __pcpu_unmap_pages(unsigned long addr, int nr_pages)
@@ -206,8 +205,8 @@ static void pcpu_post_unmap_tlb_flush(struct pcpu_chunk *chunk,
206 int page_start, int page_end) 205 int page_start, int page_end)
207{ 206{
208 flush_tlb_kernel_range( 207 flush_tlb_kernel_range(
209 pcpu_chunk_addr(chunk, pcpu_first_unit_cpu, page_start), 208 pcpu_chunk_addr(chunk, pcpu_low_unit_cpu, page_start),
210 pcpu_chunk_addr(chunk, pcpu_last_unit_cpu, page_end)); 209 pcpu_chunk_addr(chunk, pcpu_high_unit_cpu, page_end));
211} 210}
212 211
213static int __pcpu_map_pages(unsigned long addr, struct page **pages, 212static int __pcpu_map_pages(unsigned long addr, struct page **pages,
@@ -284,8 +283,8 @@ static void pcpu_post_map_flush(struct pcpu_chunk *chunk,
284 int page_start, int page_end) 283 int page_start, int page_end)
285{ 284{
286 flush_cache_vmap( 285 flush_cache_vmap(
287 pcpu_chunk_addr(chunk, pcpu_first_unit_cpu, page_start), 286 pcpu_chunk_addr(chunk, pcpu_low_unit_cpu, page_start),
288 pcpu_chunk_addr(chunk, pcpu_last_unit_cpu, page_end)); 287 pcpu_chunk_addr(chunk, pcpu_high_unit_cpu, page_end));
289} 288}
290 289
291/** 290/**
diff --git a/mm/percpu.c b/mm/percpu.c
index bf80e55dbed7..3bb810a72006 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -116,9 +116,9 @@ static int pcpu_atom_size __read_mostly;
116static int pcpu_nr_slots __read_mostly; 116static int pcpu_nr_slots __read_mostly;
117static size_t pcpu_chunk_struct_size __read_mostly; 117static size_t pcpu_chunk_struct_size __read_mostly;
118 118
119/* cpus with the lowest and highest unit numbers */ 119/* cpus with the lowest and highest unit addresses */
120static unsigned int pcpu_first_unit_cpu __read_mostly; 120static unsigned int pcpu_low_unit_cpu __read_mostly;
121static unsigned int pcpu_last_unit_cpu __read_mostly; 121static unsigned int pcpu_high_unit_cpu __read_mostly;
122 122
123/* the address of the first chunk which starts with the kernel static area */ 123/* the address of the first chunk which starts with the kernel static area */
124void *pcpu_base_addr __read_mostly; 124void *pcpu_base_addr __read_mostly;
@@ -273,11 +273,11 @@ static void __maybe_unused pcpu_next_pop(struct pcpu_chunk *chunk,
273 (rs) = (re) + 1, pcpu_next_pop((chunk), &(rs), &(re), (end))) 273 (rs) = (re) + 1, pcpu_next_pop((chunk), &(rs), &(re), (end)))
274 274
275/** 275/**
276 * pcpu_mem_alloc - allocate memory 276 * pcpu_mem_zalloc - allocate memory
277 * @size: bytes to allocate 277 * @size: bytes to allocate
278 * 278 *
279 * Allocate @size bytes. If @size is smaller than PAGE_SIZE, 279 * Allocate @size bytes. If @size is smaller than PAGE_SIZE,
280 * kzalloc() is used; otherwise, vmalloc() is used. The returned 280 * kzalloc() is used; otherwise, vzalloc() is used. The returned
281 * memory is always zeroed. 281 * memory is always zeroed.
282 * 282 *
283 * CONTEXT: 283 * CONTEXT:
@@ -286,7 +286,7 @@ static void __maybe_unused pcpu_next_pop(struct pcpu_chunk *chunk,
286 * RETURNS: 286 * RETURNS:
287 * Pointer to the allocated area on success, NULL on failure. 287 * Pointer to the allocated area on success, NULL on failure.
288 */ 288 */
289static void *pcpu_mem_alloc(size_t size) 289static void *pcpu_mem_zalloc(size_t size)
290{ 290{
291 if (WARN_ON_ONCE(!slab_is_available())) 291 if (WARN_ON_ONCE(!slab_is_available()))
292 return NULL; 292 return NULL;
@@ -302,7 +302,7 @@ static void *pcpu_mem_alloc(size_t size)
302 * @ptr: memory to free 302 * @ptr: memory to free
303 * @size: size of the area 303 * @size: size of the area
304 * 304 *
305 * Free @ptr. @ptr should have been allocated using pcpu_mem_alloc(). 305 * Free @ptr. @ptr should have been allocated using pcpu_mem_zalloc().
306 */ 306 */
307static void pcpu_mem_free(void *ptr, size_t size) 307static void pcpu_mem_free(void *ptr, size_t size)
308{ 308{
@@ -384,7 +384,7 @@ static int pcpu_extend_area_map(struct pcpu_chunk *chunk, int new_alloc)
384 size_t old_size = 0, new_size = new_alloc * sizeof(new[0]); 384 size_t old_size = 0, new_size = new_alloc * sizeof(new[0]);
385 unsigned long flags; 385 unsigned long flags;
386 386
387 new = pcpu_mem_alloc(new_size); 387 new = pcpu_mem_zalloc(new_size);
388 if (!new) 388 if (!new)
389 return -ENOMEM; 389 return -ENOMEM;
390 390
@@ -604,11 +604,12 @@ static struct pcpu_chunk *pcpu_alloc_chunk(void)
604{ 604{
605 struct pcpu_chunk *chunk; 605 struct pcpu_chunk *chunk;
606 606
607 chunk = pcpu_mem_alloc(pcpu_chunk_struct_size); 607 chunk = pcpu_mem_zalloc(pcpu_chunk_struct_size);
608 if (!chunk) 608 if (!chunk)
609 return NULL; 609 return NULL;
610 610
611 chunk->map = pcpu_mem_alloc(PCPU_DFL_MAP_ALLOC * sizeof(chunk->map[0])); 611 chunk->map = pcpu_mem_zalloc(PCPU_DFL_MAP_ALLOC *
612 sizeof(chunk->map[0]));
612 if (!chunk->map) { 613 if (!chunk->map) {
613 kfree(chunk); 614 kfree(chunk);
614 return NULL; 615 return NULL;
@@ -977,6 +978,17 @@ bool is_kernel_percpu_address(unsigned long addr)
977 * address. The caller is responsible for ensuring @addr stays valid 978 * address. The caller is responsible for ensuring @addr stays valid
978 * until this function finishes. 979 * until this function finishes.
979 * 980 *
981 * percpu allocator has special setup for the first chunk, which currently
982 * supports either embedding in linear address space or vmalloc mapping,
983 * and, from the second one, the backing allocator (currently either vm or
984 * km) provides translation.
985 *
986 * The addr can be tranlated simply without checking if it falls into the
987 * first chunk. But the current code reflects better how percpu allocator
988 * actually works, and the verification can discover both bugs in percpu
989 * allocator itself and per_cpu_ptr_to_phys() callers. So we keep current
990 * code.
991 *
980 * RETURNS: 992 * RETURNS:
981 * The physical address for @addr. 993 * The physical address for @addr.
982 */ 994 */
@@ -984,19 +996,19 @@ phys_addr_t per_cpu_ptr_to_phys(void *addr)
984{ 996{
985 void __percpu *base = __addr_to_pcpu_ptr(pcpu_base_addr); 997 void __percpu *base = __addr_to_pcpu_ptr(pcpu_base_addr);
986 bool in_first_chunk = false; 998 bool in_first_chunk = false;
987 unsigned long first_start, first_end; 999 unsigned long first_low, first_high;
988 unsigned int cpu; 1000 unsigned int cpu;
989 1001
990 /* 1002 /*
991 * The following test on first_start/end isn't strictly 1003 * The following test on unit_low/high isn't strictly
992 * necessary but will speed up lookups of addresses which 1004 * necessary but will speed up lookups of addresses which
993 * aren't in the first chunk. 1005 * aren't in the first chunk.
994 */ 1006 */
995 first_start = pcpu_chunk_addr(pcpu_first_chunk, pcpu_first_unit_cpu, 0); 1007 first_low = pcpu_chunk_addr(pcpu_first_chunk, pcpu_low_unit_cpu, 0);
996 first_end = pcpu_chunk_addr(pcpu_first_chunk, pcpu_last_unit_cpu, 1008 first_high = pcpu_chunk_addr(pcpu_first_chunk, pcpu_high_unit_cpu,
997 pcpu_unit_pages); 1009 pcpu_unit_pages);
998 if ((unsigned long)addr >= first_start && 1010 if ((unsigned long)addr >= first_low &&
999 (unsigned long)addr < first_end) { 1011 (unsigned long)addr < first_high) {
1000 for_each_possible_cpu(cpu) { 1012 for_each_possible_cpu(cpu) {
1001 void *start = per_cpu_ptr(base, cpu); 1013 void *start = per_cpu_ptr(base, cpu);
1002 1014
@@ -1233,7 +1245,9 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
1233 1245
1234 for (cpu = 0; cpu < nr_cpu_ids; cpu++) 1246 for (cpu = 0; cpu < nr_cpu_ids; cpu++)
1235 unit_map[cpu] = UINT_MAX; 1247 unit_map[cpu] = UINT_MAX;
1236 pcpu_first_unit_cpu = NR_CPUS; 1248
1249 pcpu_low_unit_cpu = NR_CPUS;
1250 pcpu_high_unit_cpu = NR_CPUS;
1237 1251
1238 for (group = 0, unit = 0; group < ai->nr_groups; group++, unit += i) { 1252 for (group = 0, unit = 0; group < ai->nr_groups; group++, unit += i) {
1239 const struct pcpu_group_info *gi = &ai->groups[group]; 1253 const struct pcpu_group_info *gi = &ai->groups[group];
@@ -1253,9 +1267,13 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
1253 unit_map[cpu] = unit + i; 1267 unit_map[cpu] = unit + i;
1254 unit_off[cpu] = gi->base_offset + i * ai->unit_size; 1268 unit_off[cpu] = gi->base_offset + i * ai->unit_size;
1255 1269
1256 if (pcpu_first_unit_cpu == NR_CPUS) 1270 /* determine low/high unit_cpu */
1257 pcpu_first_unit_cpu = cpu; 1271 if (pcpu_low_unit_cpu == NR_CPUS ||
1258 pcpu_last_unit_cpu = cpu; 1272 unit_off[cpu] < unit_off[pcpu_low_unit_cpu])
1273 pcpu_low_unit_cpu = cpu;
1274 if (pcpu_high_unit_cpu == NR_CPUS ||
1275 unit_off[cpu] > unit_off[pcpu_high_unit_cpu])
1276 pcpu_high_unit_cpu = cpu;
1259 } 1277 }
1260 } 1278 }
1261 pcpu_nr_units = unit; 1279 pcpu_nr_units = unit;
@@ -1889,7 +1907,7 @@ void __init percpu_init_late(void)
1889 1907
1890 BUILD_BUG_ON(size > PAGE_SIZE); 1908 BUILD_BUG_ON(size > PAGE_SIZE);
1891 1909
1892 map = pcpu_mem_alloc(size); 1910 map = pcpu_mem_zalloc(size);
1893 BUG_ON(!map); 1911 BUG_ON(!map);
1894 1912
1895 spin_lock_irqsave(&pcpu_lock, flags); 1913 spin_lock_irqsave(&pcpu_lock, flags);
diff --git a/mm/slab.c b/mm/slab.c
index a7f9c244aac6..4ef42baf66f0 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -597,6 +597,7 @@ static enum {
597 PARTIAL_AC, 597 PARTIAL_AC,
598 PARTIAL_L3, 598 PARTIAL_L3,
599 EARLY, 599 EARLY,
600 LATE,
600 FULL 601 FULL
601} g_cpucache_up; 602} g_cpucache_up;
602 603
@@ -673,7 +674,7 @@ static void init_node_lock_keys(int q)
673{ 674{
674 struct cache_sizes *s = malloc_sizes; 675 struct cache_sizes *s = malloc_sizes;
675 676
676 if (g_cpucache_up != FULL) 677 if (g_cpucache_up < LATE)
677 return; 678 return;
678 679
679 for (s = malloc_sizes; s->cs_size != ULONG_MAX; s++) { 680 for (s = malloc_sizes; s->cs_size != ULONG_MAX; s++) {
@@ -1680,6 +1681,8 @@ void __init kmem_cache_init_late(void)
1680{ 1681{
1681 struct kmem_cache *cachep; 1682 struct kmem_cache *cachep;
1682 1683
1684 g_cpucache_up = LATE;
1685
1683 /* Annotate slab for lockdep -- annotate the malloc caches */ 1686 /* Annotate slab for lockdep -- annotate the malloc caches */
1684 init_lock_keys(); 1687 init_lock_keys();
1685 1688
diff --git a/mm/slub.c b/mm/slub.c
index a47df0aa5d36..19436f538760 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -3043,7 +3043,9 @@ static int kmem_cache_open(struct kmem_cache *s,
3043 * per node list when we run out of per cpu objects. We only fetch 50% 3043 * per node list when we run out of per cpu objects. We only fetch 50%
3044 * to keep some capacity around for frees. 3044 * to keep some capacity around for frees.
3045 */ 3045 */
3046 if (s->size >= PAGE_SIZE) 3046 if (kmem_cache_debug(s))
3047 s->cpu_partial = 0;
3048 else if (s->size >= PAGE_SIZE)
3047 s->cpu_partial = 2; 3049 s->cpu_partial = 2;
3048 else if (s->size >= 1024) 3050 else if (s->size >= 1024)
3049 s->cpu_partial = 6; 3051 s->cpu_partial = 6;
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index b669aa6f6caf..1d8b32f07139 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1633,6 +1633,8 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align,
1633 goto fail; 1633 goto fail;
1634 1634
1635 addr = __vmalloc_area_node(area, gfp_mask, prot, node, caller); 1635 addr = __vmalloc_area_node(area, gfp_mask, prot, node, caller);
1636 if (!addr)
1637 return NULL;
1636 1638
1637 /* 1639 /*
1638 * In this function, newly allocated vm_struct is not added 1640 * In this function, newly allocated vm_struct is not added
@@ -2141,23 +2143,30 @@ void __attribute__((weak)) vmalloc_sync_all(void)
2141 2143
2142static int f(pte_t *pte, pgtable_t table, unsigned long addr, void *data) 2144static int f(pte_t *pte, pgtable_t table, unsigned long addr, void *data)
2143{ 2145{
2144 /* apply_to_page_range() does all the hard work. */ 2146 pte_t ***p = data;
2147
2148 if (p) {
2149 *(*p) = pte;
2150 (*p)++;
2151 }
2145 return 0; 2152 return 0;
2146} 2153}
2147 2154
2148/** 2155/**
2149 * alloc_vm_area - allocate a range of kernel address space 2156 * alloc_vm_area - allocate a range of kernel address space
2150 * @size: size of the area 2157 * @size: size of the area
2158 * @ptes: returns the PTEs for the address space
2151 * 2159 *
2152 * Returns: NULL on failure, vm_struct on success 2160 * Returns: NULL on failure, vm_struct on success
2153 * 2161 *
2154 * This function reserves a range of kernel address space, and 2162 * This function reserves a range of kernel address space, and
2155 * allocates pagetables to map that range. No actual mappings 2163 * allocates pagetables to map that range. No actual mappings
2156 * are created. If the kernel address space is not shared 2164 * are created.
2157 * between processes, it syncs the pagetable across all 2165 *
2158 * processes. 2166 * If @ptes is non-NULL, pointers to the PTEs (in init_mm)
2167 * allocated for the VM area are returned.
2159 */ 2168 */
2160struct vm_struct *alloc_vm_area(size_t size) 2169struct vm_struct *alloc_vm_area(size_t size, pte_t **ptes)
2161{ 2170{
2162 struct vm_struct *area; 2171 struct vm_struct *area;
2163 2172
@@ -2171,19 +2180,11 @@ struct vm_struct *alloc_vm_area(size_t size)
2171 * of kernel virtual address space and mapped into init_mm. 2180 * of kernel virtual address space and mapped into init_mm.
2172 */ 2181 */
2173 if (apply_to_page_range(&init_mm, (unsigned long)area->addr, 2182 if (apply_to_page_range(&init_mm, (unsigned long)area->addr,
2174 area->size, f, NULL)) { 2183 size, f, ptes ? &ptes : NULL)) {
2175 free_vm_area(area); 2184 free_vm_area(area);
2176 return NULL; 2185 return NULL;
2177 } 2186 }
2178 2187
2179 /*
2180 * If the allocated address space is passed to a hypercall
2181 * before being used then we cannot rely on a page fault to
2182 * trigger an update of the page tables. So sync all the page
2183 * tables here.
2184 */
2185 vmalloc_sync_all();
2186
2187 return area; 2188 return area;
2188} 2189}
2189EXPORT_SYMBOL_GPL(alloc_vm_area); 2190EXPORT_SYMBOL_GPL(alloc_vm_area);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index a1893c050795..f54a05b7a61d 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -183,7 +183,7 @@ static unsigned long zone_nr_lru_pages(struct zone *zone,
183 */ 183 */
184void register_shrinker(struct shrinker *shrinker) 184void register_shrinker(struct shrinker *shrinker)
185{ 185{
186 shrinker->nr = 0; 186 atomic_long_set(&shrinker->nr_in_batch, 0);
187 down_write(&shrinker_rwsem); 187 down_write(&shrinker_rwsem);
188 list_add_tail(&shrinker->list, &shrinker_list); 188 list_add_tail(&shrinker->list, &shrinker_list);
189 up_write(&shrinker_rwsem); 189 up_write(&shrinker_rwsem);
@@ -247,25 +247,26 @@ unsigned long shrink_slab(struct shrink_control *shrink,
247 247
248 list_for_each_entry(shrinker, &shrinker_list, list) { 248 list_for_each_entry(shrinker, &shrinker_list, list) {
249 unsigned long long delta; 249 unsigned long long delta;
250 unsigned long total_scan; 250 long total_scan;
251 unsigned long max_pass; 251 long max_pass;
252 int shrink_ret = 0; 252 int shrink_ret = 0;
253 long nr; 253 long nr;
254 long new_nr; 254 long new_nr;
255 long batch_size = shrinker->batch ? shrinker->batch 255 long batch_size = shrinker->batch ? shrinker->batch
256 : SHRINK_BATCH; 256 : SHRINK_BATCH;
257 257
258 max_pass = do_shrinker_shrink(shrinker, shrink, 0);
259 if (max_pass <= 0)
260 continue;
261
258 /* 262 /*
259 * copy the current shrinker scan count into a local variable 263 * copy the current shrinker scan count into a local variable
260 * and zero it so that other concurrent shrinker invocations 264 * and zero it so that other concurrent shrinker invocations
261 * don't also do this scanning work. 265 * don't also do this scanning work.
262 */ 266 */
263 do { 267 nr = atomic_long_xchg(&shrinker->nr_in_batch, 0);
264 nr = shrinker->nr;
265 } while (cmpxchg(&shrinker->nr, nr, 0) != nr);
266 268
267 total_scan = nr; 269 total_scan = nr;
268 max_pass = do_shrinker_shrink(shrinker, shrink, 0);
269 delta = (4 * nr_pages_scanned) / shrinker->seeks; 270 delta = (4 * nr_pages_scanned) / shrinker->seeks;
270 delta *= max_pass; 271 delta *= max_pass;
271 do_div(delta, lru_pages + 1); 272 do_div(delta, lru_pages + 1);
@@ -325,12 +326,11 @@ unsigned long shrink_slab(struct shrink_control *shrink,
325 * manner that handles concurrent updates. If we exhausted the 326 * manner that handles concurrent updates. If we exhausted the
326 * scan, there is no need to do an update. 327 * scan, there is no need to do an update.
327 */ 328 */
328 do { 329 if (total_scan > 0)
329 nr = shrinker->nr; 330 new_nr = atomic_long_add_return(total_scan,
330 new_nr = total_scan + nr; 331 &shrinker->nr_in_batch);
331 if (total_scan <= 0) 332 else
332 break; 333 new_nr = atomic_long_read(&shrinker->nr_in_batch);
333 } while (cmpxchg(&shrinker->nr, nr, new_nr) != nr);
334 334
335 trace_mm_shrink_slab_end(shrinker, shrink_ret, nr, new_nr); 335 trace_mm_shrink_slab_end(shrinker, shrink_ret, nr, new_nr);
336 } 336 }