diff options
author | Rafael J. Wysocki <rjw@sisk.pl> | 2011-12-21 15:59:45 -0500 |
---|---|---|
committer | Rafael J. Wysocki <rjw@sisk.pl> | 2011-12-21 15:59:45 -0500 |
commit | b00f4dc5ff022cb9cbaffd376d9454d7fa1e496f (patch) | |
tree | 40f1b232e2f1e8ac365317a14fdcbcb331722b46 /mm | |
parent | 1eac8111e0763853266a171ce11214da3a347a0a (diff) | |
parent | b9e26dfdad5a4f9cbdaacafac6998614cc9c41bc (diff) |
Merge branch 'master' into pm-sleep
* master: (848 commits)
SELinux: Fix RCU deref check warning in sel_netport_insert()
binary_sysctl(): fix memory leak
mm/vmalloc.c: remove static declaration of va from __get_vm_area_node
ipmi_watchdog: restore settings when BMC reset
oom: fix integer overflow of points in oom_badness
memcg: keep root group unchanged if creation fails
nilfs2: potential integer overflow in nilfs_ioctl_clean_segments()
nilfs2: unbreak compat ioctl
cpusets: stall when updating mems_allowed for mempolicy or disjoint nodemask
evm: prevent racing during tfm allocation
evm: key must be set once during initialization
mmc: vub300: fix type of firmware_rom_wait_states module parameter
Revert "mmc: enable runtime PM by default"
mmc: sdhci: remove "state" argument from sdhci_suspend_host
x86, dumpstack: Fix code bytes breakage due to missing KERN_CONT
IB/qib: Correct sense on freectxts increment and decrement
RDMA/cma: Verify private data length
cgroups: fix a css_set not found bug in cgroup_attach_proc
oprofile: Fix uninitialized memory access when writing to writing to oprofilefs
Revert "xen/pv-on-hvm kexec: add xs_reset_watches to shutdown watches from old kernel"
...
Conflicts:
kernel/cgroup_freezer.c
Diffstat (limited to 'mm')
-rw-r--r-- | mm/filemap.c | 6 | ||||
-rw-r--r-- | mm/huge_memory.c | 16 | ||||
-rw-r--r-- | mm/hugetlb.c | 1 | ||||
-rw-r--r-- | mm/memcontrol.c | 3 | ||||
-rw-r--r-- | mm/migrate.c | 2 | ||||
-rw-r--r-- | mm/oom_kill.c | 2 | ||||
-rw-r--r-- | mm/page-writeback.c | 32 | ||||
-rw-r--r-- | mm/page_alloc.c | 10 | ||||
-rw-r--r-- | mm/percpu-vm.c | 17 | ||||
-rw-r--r-- | mm/percpu.c | 68 | ||||
-rw-r--r-- | mm/slab.c | 5 | ||||
-rw-r--r-- | mm/slub.c | 42 | ||||
-rw-r--r-- | mm/vmalloc.c | 4 | ||||
-rw-r--r-- | mm/vmscan.c | 26 |
14 files changed, 146 insertions, 88 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index c0018f2d50e0..c106d3b3cc64 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -2407,7 +2407,6 @@ static ssize_t generic_perform_write(struct file *file, | |||
2407 | iov_iter_count(i)); | 2407 | iov_iter_count(i)); |
2408 | 2408 | ||
2409 | again: | 2409 | again: |
2410 | |||
2411 | /* | 2410 | /* |
2412 | * Bring in the user page that we will copy from _first_. | 2411 | * Bring in the user page that we will copy from _first_. |
2413 | * Otherwise there's a nasty deadlock on copying from the | 2412 | * Otherwise there's a nasty deadlock on copying from the |
@@ -2463,7 +2462,10 @@ again: | |||
2463 | written += copied; | 2462 | written += copied; |
2464 | 2463 | ||
2465 | balance_dirty_pages_ratelimited(mapping); | 2464 | balance_dirty_pages_ratelimited(mapping); |
2466 | 2465 | if (fatal_signal_pending(current)) { | |
2466 | status = -EINTR; | ||
2467 | break; | ||
2468 | } | ||
2467 | } while (iov_iter_count(i)); | 2469 | } while (iov_iter_count(i)); |
2468 | 2470 | ||
2469 | return written ? written : status; | 2471 | return written ? written : status; |
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 4298abaae153..36b3d988b4ef 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -2259,12 +2259,8 @@ static void khugepaged_do_scan(struct page **hpage) | |||
2259 | 2259 | ||
2260 | static void khugepaged_alloc_sleep(void) | 2260 | static void khugepaged_alloc_sleep(void) |
2261 | { | 2261 | { |
2262 | DEFINE_WAIT(wait); | 2262 | wait_event_freezable_timeout(khugepaged_wait, false, |
2263 | add_wait_queue(&khugepaged_wait, &wait); | 2263 | msecs_to_jiffies(khugepaged_alloc_sleep_millisecs)); |
2264 | schedule_timeout_interruptible( | ||
2265 | msecs_to_jiffies( | ||
2266 | khugepaged_alloc_sleep_millisecs)); | ||
2267 | remove_wait_queue(&khugepaged_wait, &wait); | ||
2268 | } | 2264 | } |
2269 | 2265 | ||
2270 | #ifndef CONFIG_NUMA | 2266 | #ifndef CONFIG_NUMA |
@@ -2313,14 +2309,10 @@ static void khugepaged_loop(void) | |||
2313 | if (unlikely(kthread_should_stop())) | 2309 | if (unlikely(kthread_should_stop())) |
2314 | break; | 2310 | break; |
2315 | if (khugepaged_has_work()) { | 2311 | if (khugepaged_has_work()) { |
2316 | DEFINE_WAIT(wait); | ||
2317 | if (!khugepaged_scan_sleep_millisecs) | 2312 | if (!khugepaged_scan_sleep_millisecs) |
2318 | continue; | 2313 | continue; |
2319 | add_wait_queue(&khugepaged_wait, &wait); | 2314 | wait_event_freezable_timeout(khugepaged_wait, false, |
2320 | schedule_timeout_interruptible( | 2315 | msecs_to_jiffies(khugepaged_scan_sleep_millisecs)); |
2321 | msecs_to_jiffies( | ||
2322 | khugepaged_scan_sleep_millisecs)); | ||
2323 | remove_wait_queue(&khugepaged_wait, &wait); | ||
2324 | } else if (khugepaged_enabled()) | 2316 | } else if (khugepaged_enabled()) |
2325 | wait_event_freezable(khugepaged_wait, | 2317 | wait_event_freezable(khugepaged_wait, |
2326 | khugepaged_wait_event()); | 2318 | khugepaged_wait_event()); |
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index bb28a5f9db8d..73f17c0293c0 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -576,6 +576,7 @@ static void prep_compound_gigantic_page(struct page *page, unsigned long order) | |||
576 | __SetPageHead(page); | 576 | __SetPageHead(page); |
577 | for (i = 1; i < nr_pages; i++, p = mem_map_next(p, page, i)) { | 577 | for (i = 1; i < nr_pages; i++, p = mem_map_next(p, page, i)) { |
578 | __SetPageTail(p); | 578 | __SetPageTail(p); |
579 | set_page_count(p, 0); | ||
579 | p->first_page = page; | 580 | p->first_page = page; |
580 | } | 581 | } |
581 | } | 582 | } |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 6aff93c98aca..b63f5f7dfa07 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -4907,9 +4907,9 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) | |||
4907 | int cpu; | 4907 | int cpu; |
4908 | enable_swap_cgroup(); | 4908 | enable_swap_cgroup(); |
4909 | parent = NULL; | 4909 | parent = NULL; |
4910 | root_mem_cgroup = memcg; | ||
4911 | if (mem_cgroup_soft_limit_tree_init()) | 4910 | if (mem_cgroup_soft_limit_tree_init()) |
4912 | goto free_out; | 4911 | goto free_out; |
4912 | root_mem_cgroup = memcg; | ||
4913 | for_each_possible_cpu(cpu) { | 4913 | for_each_possible_cpu(cpu) { |
4914 | struct memcg_stock_pcp *stock = | 4914 | struct memcg_stock_pcp *stock = |
4915 | &per_cpu(memcg_stock, cpu); | 4915 | &per_cpu(memcg_stock, cpu); |
@@ -4948,7 +4948,6 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) | |||
4948 | return &memcg->css; | 4948 | return &memcg->css; |
4949 | free_out: | 4949 | free_out: |
4950 | __mem_cgroup_free(memcg); | 4950 | __mem_cgroup_free(memcg); |
4951 | root_mem_cgroup = NULL; | ||
4952 | return ERR_PTR(error); | 4951 | return ERR_PTR(error); |
4953 | } | 4952 | } |
4954 | 4953 | ||
diff --git a/mm/migrate.c b/mm/migrate.c index 578e29174fa6..177aca424a06 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
@@ -871,9 +871,9 @@ static int unmap_and_move_huge_page(new_page_t get_new_page, | |||
871 | 871 | ||
872 | if (anon_vma) | 872 | if (anon_vma) |
873 | put_anon_vma(anon_vma); | 873 | put_anon_vma(anon_vma); |
874 | out: | ||
875 | unlock_page(hpage); | 874 | unlock_page(hpage); |
876 | 875 | ||
876 | out: | ||
877 | if (rc != -EAGAIN) { | 877 | if (rc != -EAGAIN) { |
878 | list_del(&hpage->lru); | 878 | list_del(&hpage->lru); |
879 | put_page(hpage); | 879 | put_page(hpage); |
diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 3134ee2fb2e8..eeb27e27dce3 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c | |||
@@ -176,7 +176,7 @@ static bool oom_unkillable_task(struct task_struct *p, | |||
176 | unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *mem, | 176 | unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *mem, |
177 | const nodemask_t *nodemask, unsigned long totalpages) | 177 | const nodemask_t *nodemask, unsigned long totalpages) |
178 | { | 178 | { |
179 | int points; | 179 | long points; |
180 | 180 | ||
181 | if (oom_unkillable_task(p, mem, nodemask)) | 181 | if (oom_unkillable_task(p, mem, nodemask)) |
182 | return 0; | 182 | return 0; |
diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 71252486bc6f..50f08241f981 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c | |||
@@ -411,8 +411,13 @@ void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty) | |||
411 | * | 411 | * |
412 | * Returns @bdi's dirty limit in pages. The term "dirty" in the context of | 412 | * Returns @bdi's dirty limit in pages. The term "dirty" in the context of |
413 | * dirty balancing includes all PG_dirty, PG_writeback and NFS unstable pages. | 413 | * dirty balancing includes all PG_dirty, PG_writeback and NFS unstable pages. |
414 | * And the "limit" in the name is not seriously taken as hard limit in | 414 | * |
415 | * balance_dirty_pages(). | 415 | * Note that balance_dirty_pages() will only seriously take it as a hard limit |
416 | * when sleeping max_pause per page is not enough to keep the dirty pages under | ||
417 | * control. For example, when the device is completely stalled due to some error | ||
418 | * conditions, or when there are 1000 dd tasks writing to a slow 10MB/s USB key. | ||
419 | * In the other normal situations, it acts more gently by throttling the tasks | ||
420 | * more (rather than completely block them) when the bdi dirty pages go high. | ||
416 | * | 421 | * |
417 | * It allocates high/low dirty limits to fast/slow devices, in order to prevent | 422 | * It allocates high/low dirty limits to fast/slow devices, in order to prevent |
418 | * - starving fast devices | 423 | * - starving fast devices |
@@ -594,6 +599,13 @@ static unsigned long bdi_position_ratio(struct backing_dev_info *bdi, | |||
594 | */ | 599 | */ |
595 | if (unlikely(bdi_thresh > thresh)) | 600 | if (unlikely(bdi_thresh > thresh)) |
596 | bdi_thresh = thresh; | 601 | bdi_thresh = thresh; |
602 | /* | ||
603 | * It's very possible that bdi_thresh is close to 0 not because the | ||
604 | * device is slow, but that it has remained inactive for long time. | ||
605 | * Honour such devices a reasonable good (hopefully IO efficient) | ||
606 | * threshold, so that the occasional writes won't be blocked and active | ||
607 | * writes can rampup the threshold quickly. | ||
608 | */ | ||
597 | bdi_thresh = max(bdi_thresh, (limit - dirty) / 8); | 609 | bdi_thresh = max(bdi_thresh, (limit - dirty) / 8); |
598 | /* | 610 | /* |
599 | * scale global setpoint to bdi's: | 611 | * scale global setpoint to bdi's: |
@@ -977,8 +989,7 @@ static unsigned long bdi_max_pause(struct backing_dev_info *bdi, | |||
977 | * | 989 | * |
978 | * 8 serves as the safety ratio. | 990 | * 8 serves as the safety ratio. |
979 | */ | 991 | */ |
980 | if (bdi_dirty) | 992 | t = min(t, bdi_dirty * HZ / (8 * bw + 1)); |
981 | t = min(t, bdi_dirty * HZ / (8 * bw + 1)); | ||
982 | 993 | ||
983 | /* | 994 | /* |
984 | * The pause time will be settled within range (max_pause/4, max_pause). | 995 | * The pause time will be settled within range (max_pause/4, max_pause). |
@@ -1136,6 +1147,19 @@ pause: | |||
1136 | if (task_ratelimit) | 1147 | if (task_ratelimit) |
1137 | break; | 1148 | break; |
1138 | 1149 | ||
1150 | /* | ||
1151 | * In the case of an unresponding NFS server and the NFS dirty | ||
1152 | * pages exceeds dirty_thresh, give the other good bdi's a pipe | ||
1153 | * to go through, so that tasks on them still remain responsive. | ||
1154 | * | ||
1155 | * In theory 1 page is enough to keep the comsumer-producer | ||
1156 | * pipe going: the flusher cleans 1 page => the task dirties 1 | ||
1157 | * more page. However bdi_dirty has accounting errors. So use | ||
1158 | * the larger and more IO friendly bdi_stat_error. | ||
1159 | */ | ||
1160 | if (bdi_dirty <= bdi_stat_error(bdi)) | ||
1161 | break; | ||
1162 | |||
1139 | if (fatal_signal_pending(current)) | 1163 | if (fatal_signal_pending(current)) |
1140 | break; | 1164 | break; |
1141 | } | 1165 | } |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 9dd443d89d8b..2b8ba3aebf6e 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -356,8 +356,8 @@ void prep_compound_page(struct page *page, unsigned long order) | |||
356 | __SetPageHead(page); | 356 | __SetPageHead(page); |
357 | for (i = 1; i < nr_pages; i++) { | 357 | for (i = 1; i < nr_pages; i++) { |
358 | struct page *p = page + i; | 358 | struct page *p = page + i; |
359 | |||
360 | __SetPageTail(p); | 359 | __SetPageTail(p); |
360 | set_page_count(p, 0); | ||
361 | p->first_page = page; | 361 | p->first_page = page; |
362 | } | 362 | } |
363 | } | 363 | } |
@@ -3377,9 +3377,15 @@ static void setup_zone_migrate_reserve(struct zone *zone) | |||
3377 | unsigned long block_migratetype; | 3377 | unsigned long block_migratetype; |
3378 | int reserve; | 3378 | int reserve; |
3379 | 3379 | ||
3380 | /* Get the start pfn, end pfn and the number of blocks to reserve */ | 3380 | /* |
3381 | * Get the start pfn, end pfn and the number of blocks to reserve | ||
3382 | * We have to be careful to be aligned to pageblock_nr_pages to | ||
3383 | * make sure that we always check pfn_valid for the first page in | ||
3384 | * the block. | ||
3385 | */ | ||
3381 | start_pfn = zone->zone_start_pfn; | 3386 | start_pfn = zone->zone_start_pfn; |
3382 | end_pfn = start_pfn + zone->spanned_pages; | 3387 | end_pfn = start_pfn + zone->spanned_pages; |
3388 | start_pfn = roundup(start_pfn, pageblock_nr_pages); | ||
3383 | reserve = roundup(min_wmark_pages(zone), pageblock_nr_pages) >> | 3389 | reserve = roundup(min_wmark_pages(zone), pageblock_nr_pages) >> |
3384 | pageblock_order; | 3390 | pageblock_order; |
3385 | 3391 | ||
diff --git a/mm/percpu-vm.c b/mm/percpu-vm.c index ea534960a04b..12a48a88c0d8 100644 --- a/mm/percpu-vm.c +++ b/mm/percpu-vm.c | |||
@@ -50,14 +50,13 @@ static struct page **pcpu_get_pages_and_bitmap(struct pcpu_chunk *chunk, | |||
50 | 50 | ||
51 | if (!pages || !bitmap) { | 51 | if (!pages || !bitmap) { |
52 | if (may_alloc && !pages) | 52 | if (may_alloc && !pages) |
53 | pages = pcpu_mem_alloc(pages_size); | 53 | pages = pcpu_mem_zalloc(pages_size); |
54 | if (may_alloc && !bitmap) | 54 | if (may_alloc && !bitmap) |
55 | bitmap = pcpu_mem_alloc(bitmap_size); | 55 | bitmap = pcpu_mem_zalloc(bitmap_size); |
56 | if (!pages || !bitmap) | 56 | if (!pages || !bitmap) |
57 | return NULL; | 57 | return NULL; |
58 | } | 58 | } |
59 | 59 | ||
60 | memset(pages, 0, pages_size); | ||
61 | bitmap_copy(bitmap, chunk->populated, pcpu_unit_pages); | 60 | bitmap_copy(bitmap, chunk->populated, pcpu_unit_pages); |
62 | 61 | ||
63 | *bitmapp = bitmap; | 62 | *bitmapp = bitmap; |
@@ -143,8 +142,8 @@ static void pcpu_pre_unmap_flush(struct pcpu_chunk *chunk, | |||
143 | int page_start, int page_end) | 142 | int page_start, int page_end) |
144 | { | 143 | { |
145 | flush_cache_vunmap( | 144 | flush_cache_vunmap( |
146 | pcpu_chunk_addr(chunk, pcpu_first_unit_cpu, page_start), | 145 | pcpu_chunk_addr(chunk, pcpu_low_unit_cpu, page_start), |
147 | pcpu_chunk_addr(chunk, pcpu_last_unit_cpu, page_end)); | 146 | pcpu_chunk_addr(chunk, pcpu_high_unit_cpu, page_end)); |
148 | } | 147 | } |
149 | 148 | ||
150 | static void __pcpu_unmap_pages(unsigned long addr, int nr_pages) | 149 | static void __pcpu_unmap_pages(unsigned long addr, int nr_pages) |
@@ -206,8 +205,8 @@ static void pcpu_post_unmap_tlb_flush(struct pcpu_chunk *chunk, | |||
206 | int page_start, int page_end) | 205 | int page_start, int page_end) |
207 | { | 206 | { |
208 | flush_tlb_kernel_range( | 207 | flush_tlb_kernel_range( |
209 | pcpu_chunk_addr(chunk, pcpu_first_unit_cpu, page_start), | 208 | pcpu_chunk_addr(chunk, pcpu_low_unit_cpu, page_start), |
210 | pcpu_chunk_addr(chunk, pcpu_last_unit_cpu, page_end)); | 209 | pcpu_chunk_addr(chunk, pcpu_high_unit_cpu, page_end)); |
211 | } | 210 | } |
212 | 211 | ||
213 | static int __pcpu_map_pages(unsigned long addr, struct page **pages, | 212 | static int __pcpu_map_pages(unsigned long addr, struct page **pages, |
@@ -284,8 +283,8 @@ static void pcpu_post_map_flush(struct pcpu_chunk *chunk, | |||
284 | int page_start, int page_end) | 283 | int page_start, int page_end) |
285 | { | 284 | { |
286 | flush_cache_vmap( | 285 | flush_cache_vmap( |
287 | pcpu_chunk_addr(chunk, pcpu_first_unit_cpu, page_start), | 286 | pcpu_chunk_addr(chunk, pcpu_low_unit_cpu, page_start), |
288 | pcpu_chunk_addr(chunk, pcpu_last_unit_cpu, page_end)); | 287 | pcpu_chunk_addr(chunk, pcpu_high_unit_cpu, page_end)); |
289 | } | 288 | } |
290 | 289 | ||
291 | /** | 290 | /** |
diff --git a/mm/percpu.c b/mm/percpu.c index bf80e55dbed7..716eb4acf2fc 100644 --- a/mm/percpu.c +++ b/mm/percpu.c | |||
@@ -116,9 +116,9 @@ static int pcpu_atom_size __read_mostly; | |||
116 | static int pcpu_nr_slots __read_mostly; | 116 | static int pcpu_nr_slots __read_mostly; |
117 | static size_t pcpu_chunk_struct_size __read_mostly; | 117 | static size_t pcpu_chunk_struct_size __read_mostly; |
118 | 118 | ||
119 | /* cpus with the lowest and highest unit numbers */ | 119 | /* cpus with the lowest and highest unit addresses */ |
120 | static unsigned int pcpu_first_unit_cpu __read_mostly; | 120 | static unsigned int pcpu_low_unit_cpu __read_mostly; |
121 | static unsigned int pcpu_last_unit_cpu __read_mostly; | 121 | static unsigned int pcpu_high_unit_cpu __read_mostly; |
122 | 122 | ||
123 | /* the address of the first chunk which starts with the kernel static area */ | 123 | /* the address of the first chunk which starts with the kernel static area */ |
124 | void *pcpu_base_addr __read_mostly; | 124 | void *pcpu_base_addr __read_mostly; |
@@ -273,11 +273,11 @@ static void __maybe_unused pcpu_next_pop(struct pcpu_chunk *chunk, | |||
273 | (rs) = (re) + 1, pcpu_next_pop((chunk), &(rs), &(re), (end))) | 273 | (rs) = (re) + 1, pcpu_next_pop((chunk), &(rs), &(re), (end))) |
274 | 274 | ||
275 | /** | 275 | /** |
276 | * pcpu_mem_alloc - allocate memory | 276 | * pcpu_mem_zalloc - allocate memory |
277 | * @size: bytes to allocate | 277 | * @size: bytes to allocate |
278 | * | 278 | * |
279 | * Allocate @size bytes. If @size is smaller than PAGE_SIZE, | 279 | * Allocate @size bytes. If @size is smaller than PAGE_SIZE, |
280 | * kzalloc() is used; otherwise, vmalloc() is used. The returned | 280 | * kzalloc() is used; otherwise, vzalloc() is used. The returned |
281 | * memory is always zeroed. | 281 | * memory is always zeroed. |
282 | * | 282 | * |
283 | * CONTEXT: | 283 | * CONTEXT: |
@@ -286,7 +286,7 @@ static void __maybe_unused pcpu_next_pop(struct pcpu_chunk *chunk, | |||
286 | * RETURNS: | 286 | * RETURNS: |
287 | * Pointer to the allocated area on success, NULL on failure. | 287 | * Pointer to the allocated area on success, NULL on failure. |
288 | */ | 288 | */ |
289 | static void *pcpu_mem_alloc(size_t size) | 289 | static void *pcpu_mem_zalloc(size_t size) |
290 | { | 290 | { |
291 | if (WARN_ON_ONCE(!slab_is_available())) | 291 | if (WARN_ON_ONCE(!slab_is_available())) |
292 | return NULL; | 292 | return NULL; |
@@ -302,7 +302,7 @@ static void *pcpu_mem_alloc(size_t size) | |||
302 | * @ptr: memory to free | 302 | * @ptr: memory to free |
303 | * @size: size of the area | 303 | * @size: size of the area |
304 | * | 304 | * |
305 | * Free @ptr. @ptr should have been allocated using pcpu_mem_alloc(). | 305 | * Free @ptr. @ptr should have been allocated using pcpu_mem_zalloc(). |
306 | */ | 306 | */ |
307 | static void pcpu_mem_free(void *ptr, size_t size) | 307 | static void pcpu_mem_free(void *ptr, size_t size) |
308 | { | 308 | { |
@@ -384,7 +384,7 @@ static int pcpu_extend_area_map(struct pcpu_chunk *chunk, int new_alloc) | |||
384 | size_t old_size = 0, new_size = new_alloc * sizeof(new[0]); | 384 | size_t old_size = 0, new_size = new_alloc * sizeof(new[0]); |
385 | unsigned long flags; | 385 | unsigned long flags; |
386 | 386 | ||
387 | new = pcpu_mem_alloc(new_size); | 387 | new = pcpu_mem_zalloc(new_size); |
388 | if (!new) | 388 | if (!new) |
389 | return -ENOMEM; | 389 | return -ENOMEM; |
390 | 390 | ||
@@ -604,11 +604,12 @@ static struct pcpu_chunk *pcpu_alloc_chunk(void) | |||
604 | { | 604 | { |
605 | struct pcpu_chunk *chunk; | 605 | struct pcpu_chunk *chunk; |
606 | 606 | ||
607 | chunk = pcpu_mem_alloc(pcpu_chunk_struct_size); | 607 | chunk = pcpu_mem_zalloc(pcpu_chunk_struct_size); |
608 | if (!chunk) | 608 | if (!chunk) |
609 | return NULL; | 609 | return NULL; |
610 | 610 | ||
611 | chunk->map = pcpu_mem_alloc(PCPU_DFL_MAP_ALLOC * sizeof(chunk->map[0])); | 611 | chunk->map = pcpu_mem_zalloc(PCPU_DFL_MAP_ALLOC * |
612 | sizeof(chunk->map[0])); | ||
612 | if (!chunk->map) { | 613 | if (!chunk->map) { |
613 | kfree(chunk); | 614 | kfree(chunk); |
614 | return NULL; | 615 | return NULL; |
@@ -977,6 +978,17 @@ bool is_kernel_percpu_address(unsigned long addr) | |||
977 | * address. The caller is responsible for ensuring @addr stays valid | 978 | * address. The caller is responsible for ensuring @addr stays valid |
978 | * until this function finishes. | 979 | * until this function finishes. |
979 | * | 980 | * |
981 | * percpu allocator has special setup for the first chunk, which currently | ||
982 | * supports either embedding in linear address space or vmalloc mapping, | ||
983 | * and, from the second one, the backing allocator (currently either vm or | ||
984 | * km) provides translation. | ||
985 | * | ||
986 | * The addr can be tranlated simply without checking if it falls into the | ||
987 | * first chunk. But the current code reflects better how percpu allocator | ||
988 | * actually works, and the verification can discover both bugs in percpu | ||
989 | * allocator itself and per_cpu_ptr_to_phys() callers. So we keep current | ||
990 | * code. | ||
991 | * | ||
980 | * RETURNS: | 992 | * RETURNS: |
981 | * The physical address for @addr. | 993 | * The physical address for @addr. |
982 | */ | 994 | */ |
@@ -984,19 +996,19 @@ phys_addr_t per_cpu_ptr_to_phys(void *addr) | |||
984 | { | 996 | { |
985 | void __percpu *base = __addr_to_pcpu_ptr(pcpu_base_addr); | 997 | void __percpu *base = __addr_to_pcpu_ptr(pcpu_base_addr); |
986 | bool in_first_chunk = false; | 998 | bool in_first_chunk = false; |
987 | unsigned long first_start, first_end; | 999 | unsigned long first_low, first_high; |
988 | unsigned int cpu; | 1000 | unsigned int cpu; |
989 | 1001 | ||
990 | /* | 1002 | /* |
991 | * The following test on first_start/end isn't strictly | 1003 | * The following test on unit_low/high isn't strictly |
992 | * necessary but will speed up lookups of addresses which | 1004 | * necessary but will speed up lookups of addresses which |
993 | * aren't in the first chunk. | 1005 | * aren't in the first chunk. |
994 | */ | 1006 | */ |
995 | first_start = pcpu_chunk_addr(pcpu_first_chunk, pcpu_first_unit_cpu, 0); | 1007 | first_low = pcpu_chunk_addr(pcpu_first_chunk, pcpu_low_unit_cpu, 0); |
996 | first_end = pcpu_chunk_addr(pcpu_first_chunk, pcpu_last_unit_cpu, | 1008 | first_high = pcpu_chunk_addr(pcpu_first_chunk, pcpu_high_unit_cpu, |
997 | pcpu_unit_pages); | 1009 | pcpu_unit_pages); |
998 | if ((unsigned long)addr >= first_start && | 1010 | if ((unsigned long)addr >= first_low && |
999 | (unsigned long)addr < first_end) { | 1011 | (unsigned long)addr < first_high) { |
1000 | for_each_possible_cpu(cpu) { | 1012 | for_each_possible_cpu(cpu) { |
1001 | void *start = per_cpu_ptr(base, cpu); | 1013 | void *start = per_cpu_ptr(base, cpu); |
1002 | 1014 | ||
@@ -1011,9 +1023,11 @@ phys_addr_t per_cpu_ptr_to_phys(void *addr) | |||
1011 | if (!is_vmalloc_addr(addr)) | 1023 | if (!is_vmalloc_addr(addr)) |
1012 | return __pa(addr); | 1024 | return __pa(addr); |
1013 | else | 1025 | else |
1014 | return page_to_phys(vmalloc_to_page(addr)); | 1026 | return page_to_phys(vmalloc_to_page(addr)) + |
1027 | offset_in_page(addr); | ||
1015 | } else | 1028 | } else |
1016 | return page_to_phys(pcpu_addr_to_page(addr)); | 1029 | return page_to_phys(pcpu_addr_to_page(addr)) + |
1030 | offset_in_page(addr); | ||
1017 | } | 1031 | } |
1018 | 1032 | ||
1019 | /** | 1033 | /** |
@@ -1233,7 +1247,9 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, | |||
1233 | 1247 | ||
1234 | for (cpu = 0; cpu < nr_cpu_ids; cpu++) | 1248 | for (cpu = 0; cpu < nr_cpu_ids; cpu++) |
1235 | unit_map[cpu] = UINT_MAX; | 1249 | unit_map[cpu] = UINT_MAX; |
1236 | pcpu_first_unit_cpu = NR_CPUS; | 1250 | |
1251 | pcpu_low_unit_cpu = NR_CPUS; | ||
1252 | pcpu_high_unit_cpu = NR_CPUS; | ||
1237 | 1253 | ||
1238 | for (group = 0, unit = 0; group < ai->nr_groups; group++, unit += i) { | 1254 | for (group = 0, unit = 0; group < ai->nr_groups; group++, unit += i) { |
1239 | const struct pcpu_group_info *gi = &ai->groups[group]; | 1255 | const struct pcpu_group_info *gi = &ai->groups[group]; |
@@ -1253,9 +1269,13 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, | |||
1253 | unit_map[cpu] = unit + i; | 1269 | unit_map[cpu] = unit + i; |
1254 | unit_off[cpu] = gi->base_offset + i * ai->unit_size; | 1270 | unit_off[cpu] = gi->base_offset + i * ai->unit_size; |
1255 | 1271 | ||
1256 | if (pcpu_first_unit_cpu == NR_CPUS) | 1272 | /* determine low/high unit_cpu */ |
1257 | pcpu_first_unit_cpu = cpu; | 1273 | if (pcpu_low_unit_cpu == NR_CPUS || |
1258 | pcpu_last_unit_cpu = cpu; | 1274 | unit_off[cpu] < unit_off[pcpu_low_unit_cpu]) |
1275 | pcpu_low_unit_cpu = cpu; | ||
1276 | if (pcpu_high_unit_cpu == NR_CPUS || | ||
1277 | unit_off[cpu] > unit_off[pcpu_high_unit_cpu]) | ||
1278 | pcpu_high_unit_cpu = cpu; | ||
1259 | } | 1279 | } |
1260 | } | 1280 | } |
1261 | pcpu_nr_units = unit; | 1281 | pcpu_nr_units = unit; |
@@ -1889,7 +1909,7 @@ void __init percpu_init_late(void) | |||
1889 | 1909 | ||
1890 | BUILD_BUG_ON(size > PAGE_SIZE); | 1910 | BUILD_BUG_ON(size > PAGE_SIZE); |
1891 | 1911 | ||
1892 | map = pcpu_mem_alloc(size); | 1912 | map = pcpu_mem_zalloc(size); |
1893 | BUG_ON(!map); | 1913 | BUG_ON(!map); |
1894 | 1914 | ||
1895 | spin_lock_irqsave(&pcpu_lock, flags); | 1915 | spin_lock_irqsave(&pcpu_lock, flags); |
@@ -595,6 +595,7 @@ static enum { | |||
595 | PARTIAL_AC, | 595 | PARTIAL_AC, |
596 | PARTIAL_L3, | 596 | PARTIAL_L3, |
597 | EARLY, | 597 | EARLY, |
598 | LATE, | ||
598 | FULL | 599 | FULL |
599 | } g_cpucache_up; | 600 | } g_cpucache_up; |
600 | 601 | ||
@@ -671,7 +672,7 @@ static void init_node_lock_keys(int q) | |||
671 | { | 672 | { |
672 | struct cache_sizes *s = malloc_sizes; | 673 | struct cache_sizes *s = malloc_sizes; |
673 | 674 | ||
674 | if (g_cpucache_up != FULL) | 675 | if (g_cpucache_up < LATE) |
675 | return; | 676 | return; |
676 | 677 | ||
677 | for (s = malloc_sizes; s->cs_size != ULONG_MAX; s++) { | 678 | for (s = malloc_sizes; s->cs_size != ULONG_MAX; s++) { |
@@ -1666,6 +1667,8 @@ void __init kmem_cache_init_late(void) | |||
1666 | { | 1667 | { |
1667 | struct kmem_cache *cachep; | 1668 | struct kmem_cache *cachep; |
1668 | 1669 | ||
1670 | g_cpucache_up = LATE; | ||
1671 | |||
1669 | /* Annotate slab for lockdep -- annotate the malloc caches */ | 1672 | /* Annotate slab for lockdep -- annotate the malloc caches */ |
1670 | init_lock_keys(); | 1673 | init_lock_keys(); |
1671 | 1674 | ||
@@ -1862,7 +1862,7 @@ static void unfreeze_partials(struct kmem_cache *s) | |||
1862 | { | 1862 | { |
1863 | struct kmem_cache_node *n = NULL; | 1863 | struct kmem_cache_node *n = NULL; |
1864 | struct kmem_cache_cpu *c = this_cpu_ptr(s->cpu_slab); | 1864 | struct kmem_cache_cpu *c = this_cpu_ptr(s->cpu_slab); |
1865 | struct page *page; | 1865 | struct page *page, *discard_page = NULL; |
1866 | 1866 | ||
1867 | while ((page = c->partial)) { | 1867 | while ((page = c->partial)) { |
1868 | enum slab_modes { M_PARTIAL, M_FREE }; | 1868 | enum slab_modes { M_PARTIAL, M_FREE }; |
@@ -1904,7 +1904,8 @@ static void unfreeze_partials(struct kmem_cache *s) | |||
1904 | if (l == M_PARTIAL) | 1904 | if (l == M_PARTIAL) |
1905 | remove_partial(n, page); | 1905 | remove_partial(n, page); |
1906 | else | 1906 | else |
1907 | add_partial(n, page, 1); | 1907 | add_partial(n, page, |
1908 | DEACTIVATE_TO_TAIL); | ||
1908 | 1909 | ||
1909 | l = m; | 1910 | l = m; |
1910 | } | 1911 | } |
@@ -1915,14 +1916,22 @@ static void unfreeze_partials(struct kmem_cache *s) | |||
1915 | "unfreezing slab")); | 1916 | "unfreezing slab")); |
1916 | 1917 | ||
1917 | if (m == M_FREE) { | 1918 | if (m == M_FREE) { |
1918 | stat(s, DEACTIVATE_EMPTY); | 1919 | page->next = discard_page; |
1919 | discard_slab(s, page); | 1920 | discard_page = page; |
1920 | stat(s, FREE_SLAB); | ||
1921 | } | 1921 | } |
1922 | } | 1922 | } |
1923 | 1923 | ||
1924 | if (n) | 1924 | if (n) |
1925 | spin_unlock(&n->list_lock); | 1925 | spin_unlock(&n->list_lock); |
1926 | |||
1927 | while (discard_page) { | ||
1928 | page = discard_page; | ||
1929 | discard_page = discard_page->next; | ||
1930 | |||
1931 | stat(s, DEACTIVATE_EMPTY); | ||
1932 | discard_slab(s, page); | ||
1933 | stat(s, FREE_SLAB); | ||
1934 | } | ||
1926 | } | 1935 | } |
1927 | 1936 | ||
1928 | /* | 1937 | /* |
@@ -1969,7 +1978,7 @@ int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain) | |||
1969 | page->pobjects = pobjects; | 1978 | page->pobjects = pobjects; |
1970 | page->next = oldpage; | 1979 | page->next = oldpage; |
1971 | 1980 | ||
1972 | } while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page) != oldpage); | 1981 | } while (irqsafe_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page) != oldpage); |
1973 | stat(s, CPU_PARTIAL_FREE); | 1982 | stat(s, CPU_PARTIAL_FREE); |
1974 | return pobjects; | 1983 | return pobjects; |
1975 | } | 1984 | } |
@@ -4435,30 +4444,31 @@ static ssize_t show_slab_objects(struct kmem_cache *s, | |||
4435 | 4444 | ||
4436 | for_each_possible_cpu(cpu) { | 4445 | for_each_possible_cpu(cpu) { |
4437 | struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu); | 4446 | struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu); |
4447 | int node = ACCESS_ONCE(c->node); | ||
4438 | struct page *page; | 4448 | struct page *page; |
4439 | 4449 | ||
4440 | if (!c || c->node < 0) | 4450 | if (node < 0) |
4441 | continue; | 4451 | continue; |
4442 | 4452 | page = ACCESS_ONCE(c->page); | |
4443 | if (c->page) { | 4453 | if (page) { |
4444 | if (flags & SO_TOTAL) | 4454 | if (flags & SO_TOTAL) |
4445 | x = c->page->objects; | 4455 | x = page->objects; |
4446 | else if (flags & SO_OBJECTS) | 4456 | else if (flags & SO_OBJECTS) |
4447 | x = c->page->inuse; | 4457 | x = page->inuse; |
4448 | else | 4458 | else |
4449 | x = 1; | 4459 | x = 1; |
4450 | 4460 | ||
4451 | total += x; | 4461 | total += x; |
4452 | nodes[c->node] += x; | 4462 | nodes[node] += x; |
4453 | } | 4463 | } |
4454 | page = c->partial; | 4464 | page = c->partial; |
4455 | 4465 | ||
4456 | if (page) { | 4466 | if (page) { |
4457 | x = page->pobjects; | 4467 | x = page->pobjects; |
4458 | total += x; | 4468 | total += x; |
4459 | nodes[c->node] += x; | 4469 | nodes[node] += x; |
4460 | } | 4470 | } |
4461 | per_cpu[c->node]++; | 4471 | per_cpu[node]++; |
4462 | } | 4472 | } |
4463 | } | 4473 | } |
4464 | 4474 | ||
diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 3231bf332878..27be2f0d4cb7 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c | |||
@@ -1290,7 +1290,7 @@ static struct vm_struct *__get_vm_area_node(unsigned long size, | |||
1290 | unsigned long align, unsigned long flags, unsigned long start, | 1290 | unsigned long align, unsigned long flags, unsigned long start, |
1291 | unsigned long end, int node, gfp_t gfp_mask, void *caller) | 1291 | unsigned long end, int node, gfp_t gfp_mask, void *caller) |
1292 | { | 1292 | { |
1293 | static struct vmap_area *va; | 1293 | struct vmap_area *va; |
1294 | struct vm_struct *area; | 1294 | struct vm_struct *area; |
1295 | 1295 | ||
1296 | BUG_ON(in_interrupt()); | 1296 | BUG_ON(in_interrupt()); |
@@ -1633,6 +1633,8 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align, | |||
1633 | goto fail; | 1633 | goto fail; |
1634 | 1634 | ||
1635 | addr = __vmalloc_area_node(area, gfp_mask, prot, node, caller); | 1635 | addr = __vmalloc_area_node(area, gfp_mask, prot, node, caller); |
1636 | if (!addr) | ||
1637 | return NULL; | ||
1636 | 1638 | ||
1637 | /* | 1639 | /* |
1638 | * In this function, newly allocated vm_struct is not added | 1640 | * In this function, newly allocated vm_struct is not added |
diff --git a/mm/vmscan.c b/mm/vmscan.c index a1893c050795..f54a05b7a61d 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -183,7 +183,7 @@ static unsigned long zone_nr_lru_pages(struct zone *zone, | |||
183 | */ | 183 | */ |
184 | void register_shrinker(struct shrinker *shrinker) | 184 | void register_shrinker(struct shrinker *shrinker) |
185 | { | 185 | { |
186 | shrinker->nr = 0; | 186 | atomic_long_set(&shrinker->nr_in_batch, 0); |
187 | down_write(&shrinker_rwsem); | 187 | down_write(&shrinker_rwsem); |
188 | list_add_tail(&shrinker->list, &shrinker_list); | 188 | list_add_tail(&shrinker->list, &shrinker_list); |
189 | up_write(&shrinker_rwsem); | 189 | up_write(&shrinker_rwsem); |
@@ -247,25 +247,26 @@ unsigned long shrink_slab(struct shrink_control *shrink, | |||
247 | 247 | ||
248 | list_for_each_entry(shrinker, &shrinker_list, list) { | 248 | list_for_each_entry(shrinker, &shrinker_list, list) { |
249 | unsigned long long delta; | 249 | unsigned long long delta; |
250 | unsigned long total_scan; | 250 | long total_scan; |
251 | unsigned long max_pass; | 251 | long max_pass; |
252 | int shrink_ret = 0; | 252 | int shrink_ret = 0; |
253 | long nr; | 253 | long nr; |
254 | long new_nr; | 254 | long new_nr; |
255 | long batch_size = shrinker->batch ? shrinker->batch | 255 | long batch_size = shrinker->batch ? shrinker->batch |
256 | : SHRINK_BATCH; | 256 | : SHRINK_BATCH; |
257 | 257 | ||
258 | max_pass = do_shrinker_shrink(shrinker, shrink, 0); | ||
259 | if (max_pass <= 0) | ||
260 | continue; | ||
261 | |||
258 | /* | 262 | /* |
259 | * copy the current shrinker scan count into a local variable | 263 | * copy the current shrinker scan count into a local variable |
260 | * and zero it so that other concurrent shrinker invocations | 264 | * and zero it so that other concurrent shrinker invocations |
261 | * don't also do this scanning work. | 265 | * don't also do this scanning work. |
262 | */ | 266 | */ |
263 | do { | 267 | nr = atomic_long_xchg(&shrinker->nr_in_batch, 0); |
264 | nr = shrinker->nr; | ||
265 | } while (cmpxchg(&shrinker->nr, nr, 0) != nr); | ||
266 | 268 | ||
267 | total_scan = nr; | 269 | total_scan = nr; |
268 | max_pass = do_shrinker_shrink(shrinker, shrink, 0); | ||
269 | delta = (4 * nr_pages_scanned) / shrinker->seeks; | 270 | delta = (4 * nr_pages_scanned) / shrinker->seeks; |
270 | delta *= max_pass; | 271 | delta *= max_pass; |
271 | do_div(delta, lru_pages + 1); | 272 | do_div(delta, lru_pages + 1); |
@@ -325,12 +326,11 @@ unsigned long shrink_slab(struct shrink_control *shrink, | |||
325 | * manner that handles concurrent updates. If we exhausted the | 326 | * manner that handles concurrent updates. If we exhausted the |
326 | * scan, there is no need to do an update. | 327 | * scan, there is no need to do an update. |
327 | */ | 328 | */ |
328 | do { | 329 | if (total_scan > 0) |
329 | nr = shrinker->nr; | 330 | new_nr = atomic_long_add_return(total_scan, |
330 | new_nr = total_scan + nr; | 331 | &shrinker->nr_in_batch); |
331 | if (total_scan <= 0) | 332 | else |
332 | break; | 333 | new_nr = atomic_long_read(&shrinker->nr_in_batch); |
333 | } while (cmpxchg(&shrinker->nr, nr, new_nr) != nr); | ||
334 | 334 | ||
335 | trace_mm_shrink_slab_end(shrinker, shrink_ret, nr, new_nr); | 335 | trace_mm_shrink_slab_end(shrinker, shrink_ret, nr, new_nr); |
336 | } | 336 | } |