aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorRafael J. Wysocki <rjw@sisk.pl>2011-12-21 15:59:45 -0500
committerRafael J. Wysocki <rjw@sisk.pl>2011-12-21 15:59:45 -0500
commitb00f4dc5ff022cb9cbaffd376d9454d7fa1e496f (patch)
tree40f1b232e2f1e8ac365317a14fdcbcb331722b46 /mm
parent1eac8111e0763853266a171ce11214da3a347a0a (diff)
parentb9e26dfdad5a4f9cbdaacafac6998614cc9c41bc (diff)
Merge branch 'master' into pm-sleep
* master: (848 commits) SELinux: Fix RCU deref check warning in sel_netport_insert() binary_sysctl(): fix memory leak mm/vmalloc.c: remove static declaration of va from __get_vm_area_node ipmi_watchdog: restore settings when BMC reset oom: fix integer overflow of points in oom_badness memcg: keep root group unchanged if creation fails nilfs2: potential integer overflow in nilfs_ioctl_clean_segments() nilfs2: unbreak compat ioctl cpusets: stall when updating mems_allowed for mempolicy or disjoint nodemask evm: prevent racing during tfm allocation evm: key must be set once during initialization mmc: vub300: fix type of firmware_rom_wait_states module parameter Revert "mmc: enable runtime PM by default" mmc: sdhci: remove "state" argument from sdhci_suspend_host x86, dumpstack: Fix code bytes breakage due to missing KERN_CONT IB/qib: Correct sense on freectxts increment and decrement RDMA/cma: Verify private data length cgroups: fix a css_set not found bug in cgroup_attach_proc oprofile: Fix uninitialized memory access when writing to writing to oprofilefs Revert "xen/pv-on-hvm kexec: add xs_reset_watches to shutdown watches from old kernel" ... Conflicts: kernel/cgroup_freezer.c
Diffstat (limited to 'mm')
-rw-r--r--mm/filemap.c6
-rw-r--r--mm/huge_memory.c16
-rw-r--r--mm/hugetlb.c1
-rw-r--r--mm/memcontrol.c3
-rw-r--r--mm/migrate.c2
-rw-r--r--mm/oom_kill.c2
-rw-r--r--mm/page-writeback.c32
-rw-r--r--mm/page_alloc.c10
-rw-r--r--mm/percpu-vm.c17
-rw-r--r--mm/percpu.c68
-rw-r--r--mm/slab.c5
-rw-r--r--mm/slub.c42
-rw-r--r--mm/vmalloc.c4
-rw-r--r--mm/vmscan.c26
14 files changed, 146 insertions, 88 deletions
diff --git a/mm/filemap.c b/mm/filemap.c
index c0018f2d50e0..c106d3b3cc64 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2407,7 +2407,6 @@ static ssize_t generic_perform_write(struct file *file,
2407 iov_iter_count(i)); 2407 iov_iter_count(i));
2408 2408
2409again: 2409again:
2410
2411 /* 2410 /*
2412 * Bring in the user page that we will copy from _first_. 2411 * Bring in the user page that we will copy from _first_.
2413 * Otherwise there's a nasty deadlock on copying from the 2412 * Otherwise there's a nasty deadlock on copying from the
@@ -2463,7 +2462,10 @@ again:
2463 written += copied; 2462 written += copied;
2464 2463
2465 balance_dirty_pages_ratelimited(mapping); 2464 balance_dirty_pages_ratelimited(mapping);
2466 2465 if (fatal_signal_pending(current)) {
2466 status = -EINTR;
2467 break;
2468 }
2467 } while (iov_iter_count(i)); 2469 } while (iov_iter_count(i));
2468 2470
2469 return written ? written : status; 2471 return written ? written : status;
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 4298abaae153..36b3d988b4ef 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2259,12 +2259,8 @@ static void khugepaged_do_scan(struct page **hpage)
2259 2259
2260static void khugepaged_alloc_sleep(void) 2260static void khugepaged_alloc_sleep(void)
2261{ 2261{
2262 DEFINE_WAIT(wait); 2262 wait_event_freezable_timeout(khugepaged_wait, false,
2263 add_wait_queue(&khugepaged_wait, &wait); 2263 msecs_to_jiffies(khugepaged_alloc_sleep_millisecs));
2264 schedule_timeout_interruptible(
2265 msecs_to_jiffies(
2266 khugepaged_alloc_sleep_millisecs));
2267 remove_wait_queue(&khugepaged_wait, &wait);
2268} 2264}
2269 2265
2270#ifndef CONFIG_NUMA 2266#ifndef CONFIG_NUMA
@@ -2313,14 +2309,10 @@ static void khugepaged_loop(void)
2313 if (unlikely(kthread_should_stop())) 2309 if (unlikely(kthread_should_stop()))
2314 break; 2310 break;
2315 if (khugepaged_has_work()) { 2311 if (khugepaged_has_work()) {
2316 DEFINE_WAIT(wait);
2317 if (!khugepaged_scan_sleep_millisecs) 2312 if (!khugepaged_scan_sleep_millisecs)
2318 continue; 2313 continue;
2319 add_wait_queue(&khugepaged_wait, &wait); 2314 wait_event_freezable_timeout(khugepaged_wait, false,
2320 schedule_timeout_interruptible( 2315 msecs_to_jiffies(khugepaged_scan_sleep_millisecs));
2321 msecs_to_jiffies(
2322 khugepaged_scan_sleep_millisecs));
2323 remove_wait_queue(&khugepaged_wait, &wait);
2324 } else if (khugepaged_enabled()) 2316 } else if (khugepaged_enabled())
2325 wait_event_freezable(khugepaged_wait, 2317 wait_event_freezable(khugepaged_wait,
2326 khugepaged_wait_event()); 2318 khugepaged_wait_event());
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index bb28a5f9db8d..73f17c0293c0 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -576,6 +576,7 @@ static void prep_compound_gigantic_page(struct page *page, unsigned long order)
576 __SetPageHead(page); 576 __SetPageHead(page);
577 for (i = 1; i < nr_pages; i++, p = mem_map_next(p, page, i)) { 577 for (i = 1; i < nr_pages; i++, p = mem_map_next(p, page, i)) {
578 __SetPageTail(p); 578 __SetPageTail(p);
579 set_page_count(p, 0);
579 p->first_page = page; 580 p->first_page = page;
580 } 581 }
581} 582}
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 6aff93c98aca..b63f5f7dfa07 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4907,9 +4907,9 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
4907 int cpu; 4907 int cpu;
4908 enable_swap_cgroup(); 4908 enable_swap_cgroup();
4909 parent = NULL; 4909 parent = NULL;
4910 root_mem_cgroup = memcg;
4911 if (mem_cgroup_soft_limit_tree_init()) 4910 if (mem_cgroup_soft_limit_tree_init())
4912 goto free_out; 4911 goto free_out;
4912 root_mem_cgroup = memcg;
4913 for_each_possible_cpu(cpu) { 4913 for_each_possible_cpu(cpu) {
4914 struct memcg_stock_pcp *stock = 4914 struct memcg_stock_pcp *stock =
4915 &per_cpu(memcg_stock, cpu); 4915 &per_cpu(memcg_stock, cpu);
@@ -4948,7 +4948,6 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
4948 return &memcg->css; 4948 return &memcg->css;
4949free_out: 4949free_out:
4950 __mem_cgroup_free(memcg); 4950 __mem_cgroup_free(memcg);
4951 root_mem_cgroup = NULL;
4952 return ERR_PTR(error); 4951 return ERR_PTR(error);
4953} 4952}
4954 4953
diff --git a/mm/migrate.c b/mm/migrate.c
index 578e29174fa6..177aca424a06 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -871,9 +871,9 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
871 871
872 if (anon_vma) 872 if (anon_vma)
873 put_anon_vma(anon_vma); 873 put_anon_vma(anon_vma);
874out:
875 unlock_page(hpage); 874 unlock_page(hpage);
876 875
876out:
877 if (rc != -EAGAIN) { 877 if (rc != -EAGAIN) {
878 list_del(&hpage->lru); 878 list_del(&hpage->lru);
879 put_page(hpage); 879 put_page(hpage);
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 3134ee2fb2e8..eeb27e27dce3 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -176,7 +176,7 @@ static bool oom_unkillable_task(struct task_struct *p,
176unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *mem, 176unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *mem,
177 const nodemask_t *nodemask, unsigned long totalpages) 177 const nodemask_t *nodemask, unsigned long totalpages)
178{ 178{
179 int points; 179 long points;
180 180
181 if (oom_unkillable_task(p, mem, nodemask)) 181 if (oom_unkillable_task(p, mem, nodemask))
182 return 0; 182 return 0;
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 71252486bc6f..50f08241f981 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -411,8 +411,13 @@ void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty)
411 * 411 *
412 * Returns @bdi's dirty limit in pages. The term "dirty" in the context of 412 * Returns @bdi's dirty limit in pages. The term "dirty" in the context of
413 * dirty balancing includes all PG_dirty, PG_writeback and NFS unstable pages. 413 * dirty balancing includes all PG_dirty, PG_writeback and NFS unstable pages.
414 * And the "limit" in the name is not seriously taken as hard limit in 414 *
415 * balance_dirty_pages(). 415 * Note that balance_dirty_pages() will only seriously take it as a hard limit
416 * when sleeping max_pause per page is not enough to keep the dirty pages under
417 * control. For example, when the device is completely stalled due to some error
418 * conditions, or when there are 1000 dd tasks writing to a slow 10MB/s USB key.
419 * In the other normal situations, it acts more gently by throttling the tasks
420 * more (rather than completely block them) when the bdi dirty pages go high.
416 * 421 *
417 * It allocates high/low dirty limits to fast/slow devices, in order to prevent 422 * It allocates high/low dirty limits to fast/slow devices, in order to prevent
418 * - starving fast devices 423 * - starving fast devices
@@ -594,6 +599,13 @@ static unsigned long bdi_position_ratio(struct backing_dev_info *bdi,
594 */ 599 */
595 if (unlikely(bdi_thresh > thresh)) 600 if (unlikely(bdi_thresh > thresh))
596 bdi_thresh = thresh; 601 bdi_thresh = thresh;
602 /*
603 * It's very possible that bdi_thresh is close to 0 not because the
604 * device is slow, but that it has remained inactive for long time.
605 * Honour such devices a reasonable good (hopefully IO efficient)
606 * threshold, so that the occasional writes won't be blocked and active
607 * writes can rampup the threshold quickly.
608 */
597 bdi_thresh = max(bdi_thresh, (limit - dirty) / 8); 609 bdi_thresh = max(bdi_thresh, (limit - dirty) / 8);
598 /* 610 /*
599 * scale global setpoint to bdi's: 611 * scale global setpoint to bdi's:
@@ -977,8 +989,7 @@ static unsigned long bdi_max_pause(struct backing_dev_info *bdi,
977 * 989 *
978 * 8 serves as the safety ratio. 990 * 8 serves as the safety ratio.
979 */ 991 */
980 if (bdi_dirty) 992 t = min(t, bdi_dirty * HZ / (8 * bw + 1));
981 t = min(t, bdi_dirty * HZ / (8 * bw + 1));
982 993
983 /* 994 /*
984 * The pause time will be settled within range (max_pause/4, max_pause). 995 * The pause time will be settled within range (max_pause/4, max_pause).
@@ -1136,6 +1147,19 @@ pause:
1136 if (task_ratelimit) 1147 if (task_ratelimit)
1137 break; 1148 break;
1138 1149
1150 /*
1151 * In the case of an unresponding NFS server and the NFS dirty
1152 * pages exceeds dirty_thresh, give the other good bdi's a pipe
1153 * to go through, so that tasks on them still remain responsive.
1154 *
1155 * In theory 1 page is enough to keep the comsumer-producer
1156 * pipe going: the flusher cleans 1 page => the task dirties 1
1157 * more page. However bdi_dirty has accounting errors. So use
1158 * the larger and more IO friendly bdi_stat_error.
1159 */
1160 if (bdi_dirty <= bdi_stat_error(bdi))
1161 break;
1162
1139 if (fatal_signal_pending(current)) 1163 if (fatal_signal_pending(current))
1140 break; 1164 break;
1141 } 1165 }
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 9dd443d89d8b..2b8ba3aebf6e 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -356,8 +356,8 @@ void prep_compound_page(struct page *page, unsigned long order)
356 __SetPageHead(page); 356 __SetPageHead(page);
357 for (i = 1; i < nr_pages; i++) { 357 for (i = 1; i < nr_pages; i++) {
358 struct page *p = page + i; 358 struct page *p = page + i;
359
360 __SetPageTail(p); 359 __SetPageTail(p);
360 set_page_count(p, 0);
361 p->first_page = page; 361 p->first_page = page;
362 } 362 }
363} 363}
@@ -3377,9 +3377,15 @@ static void setup_zone_migrate_reserve(struct zone *zone)
3377 unsigned long block_migratetype; 3377 unsigned long block_migratetype;
3378 int reserve; 3378 int reserve;
3379 3379
3380 /* Get the start pfn, end pfn and the number of blocks to reserve */ 3380 /*
3381 * Get the start pfn, end pfn and the number of blocks to reserve
3382 * We have to be careful to be aligned to pageblock_nr_pages to
3383 * make sure that we always check pfn_valid for the first page in
3384 * the block.
3385 */
3381 start_pfn = zone->zone_start_pfn; 3386 start_pfn = zone->zone_start_pfn;
3382 end_pfn = start_pfn + zone->spanned_pages; 3387 end_pfn = start_pfn + zone->spanned_pages;
3388 start_pfn = roundup(start_pfn, pageblock_nr_pages);
3383 reserve = roundup(min_wmark_pages(zone), pageblock_nr_pages) >> 3389 reserve = roundup(min_wmark_pages(zone), pageblock_nr_pages) >>
3384 pageblock_order; 3390 pageblock_order;
3385 3391
diff --git a/mm/percpu-vm.c b/mm/percpu-vm.c
index ea534960a04b..12a48a88c0d8 100644
--- a/mm/percpu-vm.c
+++ b/mm/percpu-vm.c
@@ -50,14 +50,13 @@ static struct page **pcpu_get_pages_and_bitmap(struct pcpu_chunk *chunk,
50 50
51 if (!pages || !bitmap) { 51 if (!pages || !bitmap) {
52 if (may_alloc && !pages) 52 if (may_alloc && !pages)
53 pages = pcpu_mem_alloc(pages_size); 53 pages = pcpu_mem_zalloc(pages_size);
54 if (may_alloc && !bitmap) 54 if (may_alloc && !bitmap)
55 bitmap = pcpu_mem_alloc(bitmap_size); 55 bitmap = pcpu_mem_zalloc(bitmap_size);
56 if (!pages || !bitmap) 56 if (!pages || !bitmap)
57 return NULL; 57 return NULL;
58 } 58 }
59 59
60 memset(pages, 0, pages_size);
61 bitmap_copy(bitmap, chunk->populated, pcpu_unit_pages); 60 bitmap_copy(bitmap, chunk->populated, pcpu_unit_pages);
62 61
63 *bitmapp = bitmap; 62 *bitmapp = bitmap;
@@ -143,8 +142,8 @@ static void pcpu_pre_unmap_flush(struct pcpu_chunk *chunk,
143 int page_start, int page_end) 142 int page_start, int page_end)
144{ 143{
145 flush_cache_vunmap( 144 flush_cache_vunmap(
146 pcpu_chunk_addr(chunk, pcpu_first_unit_cpu, page_start), 145 pcpu_chunk_addr(chunk, pcpu_low_unit_cpu, page_start),
147 pcpu_chunk_addr(chunk, pcpu_last_unit_cpu, page_end)); 146 pcpu_chunk_addr(chunk, pcpu_high_unit_cpu, page_end));
148} 147}
149 148
150static void __pcpu_unmap_pages(unsigned long addr, int nr_pages) 149static void __pcpu_unmap_pages(unsigned long addr, int nr_pages)
@@ -206,8 +205,8 @@ static void pcpu_post_unmap_tlb_flush(struct pcpu_chunk *chunk,
206 int page_start, int page_end) 205 int page_start, int page_end)
207{ 206{
208 flush_tlb_kernel_range( 207 flush_tlb_kernel_range(
209 pcpu_chunk_addr(chunk, pcpu_first_unit_cpu, page_start), 208 pcpu_chunk_addr(chunk, pcpu_low_unit_cpu, page_start),
210 pcpu_chunk_addr(chunk, pcpu_last_unit_cpu, page_end)); 209 pcpu_chunk_addr(chunk, pcpu_high_unit_cpu, page_end));
211} 210}
212 211
213static int __pcpu_map_pages(unsigned long addr, struct page **pages, 212static int __pcpu_map_pages(unsigned long addr, struct page **pages,
@@ -284,8 +283,8 @@ static void pcpu_post_map_flush(struct pcpu_chunk *chunk,
284 int page_start, int page_end) 283 int page_start, int page_end)
285{ 284{
286 flush_cache_vmap( 285 flush_cache_vmap(
287 pcpu_chunk_addr(chunk, pcpu_first_unit_cpu, page_start), 286 pcpu_chunk_addr(chunk, pcpu_low_unit_cpu, page_start),
288 pcpu_chunk_addr(chunk, pcpu_last_unit_cpu, page_end)); 287 pcpu_chunk_addr(chunk, pcpu_high_unit_cpu, page_end));
289} 288}
290 289
291/** 290/**
diff --git a/mm/percpu.c b/mm/percpu.c
index bf80e55dbed7..716eb4acf2fc 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -116,9 +116,9 @@ static int pcpu_atom_size __read_mostly;
116static int pcpu_nr_slots __read_mostly; 116static int pcpu_nr_slots __read_mostly;
117static size_t pcpu_chunk_struct_size __read_mostly; 117static size_t pcpu_chunk_struct_size __read_mostly;
118 118
119/* cpus with the lowest and highest unit numbers */ 119/* cpus with the lowest and highest unit addresses */
120static unsigned int pcpu_first_unit_cpu __read_mostly; 120static unsigned int pcpu_low_unit_cpu __read_mostly;
121static unsigned int pcpu_last_unit_cpu __read_mostly; 121static unsigned int pcpu_high_unit_cpu __read_mostly;
122 122
123/* the address of the first chunk which starts with the kernel static area */ 123/* the address of the first chunk which starts with the kernel static area */
124void *pcpu_base_addr __read_mostly; 124void *pcpu_base_addr __read_mostly;
@@ -273,11 +273,11 @@ static void __maybe_unused pcpu_next_pop(struct pcpu_chunk *chunk,
273 (rs) = (re) + 1, pcpu_next_pop((chunk), &(rs), &(re), (end))) 273 (rs) = (re) + 1, pcpu_next_pop((chunk), &(rs), &(re), (end)))
274 274
275/** 275/**
276 * pcpu_mem_alloc - allocate memory 276 * pcpu_mem_zalloc - allocate memory
277 * @size: bytes to allocate 277 * @size: bytes to allocate
278 * 278 *
279 * Allocate @size bytes. If @size is smaller than PAGE_SIZE, 279 * Allocate @size bytes. If @size is smaller than PAGE_SIZE,
280 * kzalloc() is used; otherwise, vmalloc() is used. The returned 280 * kzalloc() is used; otherwise, vzalloc() is used. The returned
281 * memory is always zeroed. 281 * memory is always zeroed.
282 * 282 *
283 * CONTEXT: 283 * CONTEXT:
@@ -286,7 +286,7 @@ static void __maybe_unused pcpu_next_pop(struct pcpu_chunk *chunk,
286 * RETURNS: 286 * RETURNS:
287 * Pointer to the allocated area on success, NULL on failure. 287 * Pointer to the allocated area on success, NULL on failure.
288 */ 288 */
289static void *pcpu_mem_alloc(size_t size) 289static void *pcpu_mem_zalloc(size_t size)
290{ 290{
291 if (WARN_ON_ONCE(!slab_is_available())) 291 if (WARN_ON_ONCE(!slab_is_available()))
292 return NULL; 292 return NULL;
@@ -302,7 +302,7 @@ static void *pcpu_mem_alloc(size_t size)
302 * @ptr: memory to free 302 * @ptr: memory to free
303 * @size: size of the area 303 * @size: size of the area
304 * 304 *
305 * Free @ptr. @ptr should have been allocated using pcpu_mem_alloc(). 305 * Free @ptr. @ptr should have been allocated using pcpu_mem_zalloc().
306 */ 306 */
307static void pcpu_mem_free(void *ptr, size_t size) 307static void pcpu_mem_free(void *ptr, size_t size)
308{ 308{
@@ -384,7 +384,7 @@ static int pcpu_extend_area_map(struct pcpu_chunk *chunk, int new_alloc)
384 size_t old_size = 0, new_size = new_alloc * sizeof(new[0]); 384 size_t old_size = 0, new_size = new_alloc * sizeof(new[0]);
385 unsigned long flags; 385 unsigned long flags;
386 386
387 new = pcpu_mem_alloc(new_size); 387 new = pcpu_mem_zalloc(new_size);
388 if (!new) 388 if (!new)
389 return -ENOMEM; 389 return -ENOMEM;
390 390
@@ -604,11 +604,12 @@ static struct pcpu_chunk *pcpu_alloc_chunk(void)
604{ 604{
605 struct pcpu_chunk *chunk; 605 struct pcpu_chunk *chunk;
606 606
607 chunk = pcpu_mem_alloc(pcpu_chunk_struct_size); 607 chunk = pcpu_mem_zalloc(pcpu_chunk_struct_size);
608 if (!chunk) 608 if (!chunk)
609 return NULL; 609 return NULL;
610 610
611 chunk->map = pcpu_mem_alloc(PCPU_DFL_MAP_ALLOC * sizeof(chunk->map[0])); 611 chunk->map = pcpu_mem_zalloc(PCPU_DFL_MAP_ALLOC *
612 sizeof(chunk->map[0]));
612 if (!chunk->map) { 613 if (!chunk->map) {
613 kfree(chunk); 614 kfree(chunk);
614 return NULL; 615 return NULL;
@@ -977,6 +978,17 @@ bool is_kernel_percpu_address(unsigned long addr)
977 * address. The caller is responsible for ensuring @addr stays valid 978 * address. The caller is responsible for ensuring @addr stays valid
978 * until this function finishes. 979 * until this function finishes.
979 * 980 *
981 * percpu allocator has special setup for the first chunk, which currently
982 * supports either embedding in linear address space or vmalloc mapping,
983 * and, from the second one, the backing allocator (currently either vm or
984 * km) provides translation.
985 *
986 * The addr can be tranlated simply without checking if it falls into the
987 * first chunk. But the current code reflects better how percpu allocator
988 * actually works, and the verification can discover both bugs in percpu
989 * allocator itself and per_cpu_ptr_to_phys() callers. So we keep current
990 * code.
991 *
980 * RETURNS: 992 * RETURNS:
981 * The physical address for @addr. 993 * The physical address for @addr.
982 */ 994 */
@@ -984,19 +996,19 @@ phys_addr_t per_cpu_ptr_to_phys(void *addr)
984{ 996{
985 void __percpu *base = __addr_to_pcpu_ptr(pcpu_base_addr); 997 void __percpu *base = __addr_to_pcpu_ptr(pcpu_base_addr);
986 bool in_first_chunk = false; 998 bool in_first_chunk = false;
987 unsigned long first_start, first_end; 999 unsigned long first_low, first_high;
988 unsigned int cpu; 1000 unsigned int cpu;
989 1001
990 /* 1002 /*
991 * The following test on first_start/end isn't strictly 1003 * The following test on unit_low/high isn't strictly
992 * necessary but will speed up lookups of addresses which 1004 * necessary but will speed up lookups of addresses which
993 * aren't in the first chunk. 1005 * aren't in the first chunk.
994 */ 1006 */
995 first_start = pcpu_chunk_addr(pcpu_first_chunk, pcpu_first_unit_cpu, 0); 1007 first_low = pcpu_chunk_addr(pcpu_first_chunk, pcpu_low_unit_cpu, 0);
996 first_end = pcpu_chunk_addr(pcpu_first_chunk, pcpu_last_unit_cpu, 1008 first_high = pcpu_chunk_addr(pcpu_first_chunk, pcpu_high_unit_cpu,
997 pcpu_unit_pages); 1009 pcpu_unit_pages);
998 if ((unsigned long)addr >= first_start && 1010 if ((unsigned long)addr >= first_low &&
999 (unsigned long)addr < first_end) { 1011 (unsigned long)addr < first_high) {
1000 for_each_possible_cpu(cpu) { 1012 for_each_possible_cpu(cpu) {
1001 void *start = per_cpu_ptr(base, cpu); 1013 void *start = per_cpu_ptr(base, cpu);
1002 1014
@@ -1011,9 +1023,11 @@ phys_addr_t per_cpu_ptr_to_phys(void *addr)
1011 if (!is_vmalloc_addr(addr)) 1023 if (!is_vmalloc_addr(addr))
1012 return __pa(addr); 1024 return __pa(addr);
1013 else 1025 else
1014 return page_to_phys(vmalloc_to_page(addr)); 1026 return page_to_phys(vmalloc_to_page(addr)) +
1027 offset_in_page(addr);
1015 } else 1028 } else
1016 return page_to_phys(pcpu_addr_to_page(addr)); 1029 return page_to_phys(pcpu_addr_to_page(addr)) +
1030 offset_in_page(addr);
1017} 1031}
1018 1032
1019/** 1033/**
@@ -1233,7 +1247,9 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
1233 1247
1234 for (cpu = 0; cpu < nr_cpu_ids; cpu++) 1248 for (cpu = 0; cpu < nr_cpu_ids; cpu++)
1235 unit_map[cpu] = UINT_MAX; 1249 unit_map[cpu] = UINT_MAX;
1236 pcpu_first_unit_cpu = NR_CPUS; 1250
1251 pcpu_low_unit_cpu = NR_CPUS;
1252 pcpu_high_unit_cpu = NR_CPUS;
1237 1253
1238 for (group = 0, unit = 0; group < ai->nr_groups; group++, unit += i) { 1254 for (group = 0, unit = 0; group < ai->nr_groups; group++, unit += i) {
1239 const struct pcpu_group_info *gi = &ai->groups[group]; 1255 const struct pcpu_group_info *gi = &ai->groups[group];
@@ -1253,9 +1269,13 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
1253 unit_map[cpu] = unit + i; 1269 unit_map[cpu] = unit + i;
1254 unit_off[cpu] = gi->base_offset + i * ai->unit_size; 1270 unit_off[cpu] = gi->base_offset + i * ai->unit_size;
1255 1271
1256 if (pcpu_first_unit_cpu == NR_CPUS) 1272 /* determine low/high unit_cpu */
1257 pcpu_first_unit_cpu = cpu; 1273 if (pcpu_low_unit_cpu == NR_CPUS ||
1258 pcpu_last_unit_cpu = cpu; 1274 unit_off[cpu] < unit_off[pcpu_low_unit_cpu])
1275 pcpu_low_unit_cpu = cpu;
1276 if (pcpu_high_unit_cpu == NR_CPUS ||
1277 unit_off[cpu] > unit_off[pcpu_high_unit_cpu])
1278 pcpu_high_unit_cpu = cpu;
1259 } 1279 }
1260 } 1280 }
1261 pcpu_nr_units = unit; 1281 pcpu_nr_units = unit;
@@ -1889,7 +1909,7 @@ void __init percpu_init_late(void)
1889 1909
1890 BUILD_BUG_ON(size > PAGE_SIZE); 1910 BUILD_BUG_ON(size > PAGE_SIZE);
1891 1911
1892 map = pcpu_mem_alloc(size); 1912 map = pcpu_mem_zalloc(size);
1893 BUG_ON(!map); 1913 BUG_ON(!map);
1894 1914
1895 spin_lock_irqsave(&pcpu_lock, flags); 1915 spin_lock_irqsave(&pcpu_lock, flags);
diff --git a/mm/slab.c b/mm/slab.c
index 708efe886154..83311c9aaf9d 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -595,6 +595,7 @@ static enum {
595 PARTIAL_AC, 595 PARTIAL_AC,
596 PARTIAL_L3, 596 PARTIAL_L3,
597 EARLY, 597 EARLY,
598 LATE,
598 FULL 599 FULL
599} g_cpucache_up; 600} g_cpucache_up;
600 601
@@ -671,7 +672,7 @@ static void init_node_lock_keys(int q)
671{ 672{
672 struct cache_sizes *s = malloc_sizes; 673 struct cache_sizes *s = malloc_sizes;
673 674
674 if (g_cpucache_up != FULL) 675 if (g_cpucache_up < LATE)
675 return; 676 return;
676 677
677 for (s = malloc_sizes; s->cs_size != ULONG_MAX; s++) { 678 for (s = malloc_sizes; s->cs_size != ULONG_MAX; s++) {
@@ -1666,6 +1667,8 @@ void __init kmem_cache_init_late(void)
1666{ 1667{
1667 struct kmem_cache *cachep; 1668 struct kmem_cache *cachep;
1668 1669
1670 g_cpucache_up = LATE;
1671
1669 /* Annotate slab for lockdep -- annotate the malloc caches */ 1672 /* Annotate slab for lockdep -- annotate the malloc caches */
1670 init_lock_keys(); 1673 init_lock_keys();
1671 1674
diff --git a/mm/slub.c b/mm/slub.c
index 7d2a996c307e..ed3334d9b6da 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1862,7 +1862,7 @@ static void unfreeze_partials(struct kmem_cache *s)
1862{ 1862{
1863 struct kmem_cache_node *n = NULL; 1863 struct kmem_cache_node *n = NULL;
1864 struct kmem_cache_cpu *c = this_cpu_ptr(s->cpu_slab); 1864 struct kmem_cache_cpu *c = this_cpu_ptr(s->cpu_slab);
1865 struct page *page; 1865 struct page *page, *discard_page = NULL;
1866 1866
1867 while ((page = c->partial)) { 1867 while ((page = c->partial)) {
1868 enum slab_modes { M_PARTIAL, M_FREE }; 1868 enum slab_modes { M_PARTIAL, M_FREE };
@@ -1904,7 +1904,8 @@ static void unfreeze_partials(struct kmem_cache *s)
1904 if (l == M_PARTIAL) 1904 if (l == M_PARTIAL)
1905 remove_partial(n, page); 1905 remove_partial(n, page);
1906 else 1906 else
1907 add_partial(n, page, 1); 1907 add_partial(n, page,
1908 DEACTIVATE_TO_TAIL);
1908 1909
1909 l = m; 1910 l = m;
1910 } 1911 }
@@ -1915,14 +1916,22 @@ static void unfreeze_partials(struct kmem_cache *s)
1915 "unfreezing slab")); 1916 "unfreezing slab"));
1916 1917
1917 if (m == M_FREE) { 1918 if (m == M_FREE) {
1918 stat(s, DEACTIVATE_EMPTY); 1919 page->next = discard_page;
1919 discard_slab(s, page); 1920 discard_page = page;
1920 stat(s, FREE_SLAB);
1921 } 1921 }
1922 } 1922 }
1923 1923
1924 if (n) 1924 if (n)
1925 spin_unlock(&n->list_lock); 1925 spin_unlock(&n->list_lock);
1926
1927 while (discard_page) {
1928 page = discard_page;
1929 discard_page = discard_page->next;
1930
1931 stat(s, DEACTIVATE_EMPTY);
1932 discard_slab(s, page);
1933 stat(s, FREE_SLAB);
1934 }
1926} 1935}
1927 1936
1928/* 1937/*
@@ -1969,7 +1978,7 @@ int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
1969 page->pobjects = pobjects; 1978 page->pobjects = pobjects;
1970 page->next = oldpage; 1979 page->next = oldpage;
1971 1980
1972 } while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page) != oldpage); 1981 } while (irqsafe_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page) != oldpage);
1973 stat(s, CPU_PARTIAL_FREE); 1982 stat(s, CPU_PARTIAL_FREE);
1974 return pobjects; 1983 return pobjects;
1975} 1984}
@@ -4435,30 +4444,31 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
4435 4444
4436 for_each_possible_cpu(cpu) { 4445 for_each_possible_cpu(cpu) {
4437 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu); 4446 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
4447 int node = ACCESS_ONCE(c->node);
4438 struct page *page; 4448 struct page *page;
4439 4449
4440 if (!c || c->node < 0) 4450 if (node < 0)
4441 continue; 4451 continue;
4442 4452 page = ACCESS_ONCE(c->page);
4443 if (c->page) { 4453 if (page) {
4444 if (flags & SO_TOTAL) 4454 if (flags & SO_TOTAL)
4445 x = c->page->objects; 4455 x = page->objects;
4446 else if (flags & SO_OBJECTS) 4456 else if (flags & SO_OBJECTS)
4447 x = c->page->inuse; 4457 x = page->inuse;
4448 else 4458 else
4449 x = 1; 4459 x = 1;
4450 4460
4451 total += x; 4461 total += x;
4452 nodes[c->node] += x; 4462 nodes[node] += x;
4453 } 4463 }
4454 page = c->partial; 4464 page = c->partial;
4455 4465
4456 if (page) { 4466 if (page) {
4457 x = page->pobjects; 4467 x = page->pobjects;
4458 total += x; 4468 total += x;
4459 nodes[c->node] += x; 4469 nodes[node] += x;
4460 } 4470 }
4461 per_cpu[c->node]++; 4471 per_cpu[node]++;
4462 } 4472 }
4463 } 4473 }
4464 4474
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 3231bf332878..27be2f0d4cb7 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1290,7 +1290,7 @@ static struct vm_struct *__get_vm_area_node(unsigned long size,
1290 unsigned long align, unsigned long flags, unsigned long start, 1290 unsigned long align, unsigned long flags, unsigned long start,
1291 unsigned long end, int node, gfp_t gfp_mask, void *caller) 1291 unsigned long end, int node, gfp_t gfp_mask, void *caller)
1292{ 1292{
1293 static struct vmap_area *va; 1293 struct vmap_area *va;
1294 struct vm_struct *area; 1294 struct vm_struct *area;
1295 1295
1296 BUG_ON(in_interrupt()); 1296 BUG_ON(in_interrupt());
@@ -1633,6 +1633,8 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align,
1633 goto fail; 1633 goto fail;
1634 1634
1635 addr = __vmalloc_area_node(area, gfp_mask, prot, node, caller); 1635 addr = __vmalloc_area_node(area, gfp_mask, prot, node, caller);
1636 if (!addr)
1637 return NULL;
1636 1638
1637 /* 1639 /*
1638 * In this function, newly allocated vm_struct is not added 1640 * In this function, newly allocated vm_struct is not added
diff --git a/mm/vmscan.c b/mm/vmscan.c
index a1893c050795..f54a05b7a61d 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -183,7 +183,7 @@ static unsigned long zone_nr_lru_pages(struct zone *zone,
183 */ 183 */
184void register_shrinker(struct shrinker *shrinker) 184void register_shrinker(struct shrinker *shrinker)
185{ 185{
186 shrinker->nr = 0; 186 atomic_long_set(&shrinker->nr_in_batch, 0);
187 down_write(&shrinker_rwsem); 187 down_write(&shrinker_rwsem);
188 list_add_tail(&shrinker->list, &shrinker_list); 188 list_add_tail(&shrinker->list, &shrinker_list);
189 up_write(&shrinker_rwsem); 189 up_write(&shrinker_rwsem);
@@ -247,25 +247,26 @@ unsigned long shrink_slab(struct shrink_control *shrink,
247 247
248 list_for_each_entry(shrinker, &shrinker_list, list) { 248 list_for_each_entry(shrinker, &shrinker_list, list) {
249 unsigned long long delta; 249 unsigned long long delta;
250 unsigned long total_scan; 250 long total_scan;
251 unsigned long max_pass; 251 long max_pass;
252 int shrink_ret = 0; 252 int shrink_ret = 0;
253 long nr; 253 long nr;
254 long new_nr; 254 long new_nr;
255 long batch_size = shrinker->batch ? shrinker->batch 255 long batch_size = shrinker->batch ? shrinker->batch
256 : SHRINK_BATCH; 256 : SHRINK_BATCH;
257 257
258 max_pass = do_shrinker_shrink(shrinker, shrink, 0);
259 if (max_pass <= 0)
260 continue;
261
258 /* 262 /*
259 * copy the current shrinker scan count into a local variable 263 * copy the current shrinker scan count into a local variable
260 * and zero it so that other concurrent shrinker invocations 264 * and zero it so that other concurrent shrinker invocations
261 * don't also do this scanning work. 265 * don't also do this scanning work.
262 */ 266 */
263 do { 267 nr = atomic_long_xchg(&shrinker->nr_in_batch, 0);
264 nr = shrinker->nr;
265 } while (cmpxchg(&shrinker->nr, nr, 0) != nr);
266 268
267 total_scan = nr; 269 total_scan = nr;
268 max_pass = do_shrinker_shrink(shrinker, shrink, 0);
269 delta = (4 * nr_pages_scanned) / shrinker->seeks; 270 delta = (4 * nr_pages_scanned) / shrinker->seeks;
270 delta *= max_pass; 271 delta *= max_pass;
271 do_div(delta, lru_pages + 1); 272 do_div(delta, lru_pages + 1);
@@ -325,12 +326,11 @@ unsigned long shrink_slab(struct shrink_control *shrink,
325 * manner that handles concurrent updates. If we exhausted the 326 * manner that handles concurrent updates. If we exhausted the
326 * scan, there is no need to do an update. 327 * scan, there is no need to do an update.
327 */ 328 */
328 do { 329 if (total_scan > 0)
329 nr = shrinker->nr; 330 new_nr = atomic_long_add_return(total_scan,
330 new_nr = total_scan + nr; 331 &shrinker->nr_in_batch);
331 if (total_scan <= 0) 332 else
332 break; 333 new_nr = atomic_long_read(&shrinker->nr_in_batch);
333 } while (cmpxchg(&shrinker->nr, nr, new_nr) != nr);
334 334
335 trace_mm_shrink_slab_end(shrinker, shrink_ret, nr, new_nr); 335 trace_mm_shrink_slab_end(shrinker, shrink_ret, nr, new_nr);
336 } 336 }