diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/backing-dev.c | 8 | ||||
-rw-r--r-- | mm/huge_memory.c | 16 | ||||
-rw-r--r-- | mm/hugetlb.c | 3 | ||||
-rw-r--r-- | mm/migrate.c | 2 | ||||
-rw-r--r-- | mm/nommu.c | 2 | ||||
-rw-r--r-- | mm/oom_kill.c | 5 | ||||
-rw-r--r-- | mm/page-writeback.c | 23 | ||||
-rw-r--r-- | mm/page_alloc.c | 10 | ||||
-rw-r--r-- | mm/percpu-vm.c | 17 | ||||
-rw-r--r-- | mm/percpu.c | 62 | ||||
-rw-r--r-- | mm/slab.c | 5 | ||||
-rw-r--r-- | mm/slub.c | 4 | ||||
-rw-r--r-- | mm/vmalloc.c | 29 | ||||
-rw-r--r-- | mm/vmscan.c | 26 |
14 files changed, 120 insertions, 92 deletions
diff --git a/mm/backing-dev.c b/mm/backing-dev.c index a0860640378d..71034f41a2ba 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c | |||
@@ -724,6 +724,14 @@ void bdi_destroy(struct backing_dev_info *bdi) | |||
724 | 724 | ||
725 | bdi_unregister(bdi); | 725 | bdi_unregister(bdi); |
726 | 726 | ||
727 | /* | ||
728 | * If bdi_unregister() had already been called earlier, the | ||
729 | * wakeup_timer could still be armed because bdi_prune_sb() | ||
730 | * can race with the bdi_wakeup_thread_delayed() calls from | ||
731 | * __mark_inode_dirty(). | ||
732 | */ | ||
733 | del_timer_sync(&bdi->wb.wakeup_timer); | ||
734 | |||
727 | for (i = 0; i < NR_BDI_STAT_ITEMS; i++) | 735 | for (i = 0; i < NR_BDI_STAT_ITEMS; i++) |
728 | percpu_counter_destroy(&bdi->bdi_stat[i]); | 736 | percpu_counter_destroy(&bdi->bdi_stat[i]); |
729 | 737 | ||
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 4298abaae153..36b3d988b4ef 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -2259,12 +2259,8 @@ static void khugepaged_do_scan(struct page **hpage) | |||
2259 | 2259 | ||
2260 | static void khugepaged_alloc_sleep(void) | 2260 | static void khugepaged_alloc_sleep(void) |
2261 | { | 2261 | { |
2262 | DEFINE_WAIT(wait); | 2262 | wait_event_freezable_timeout(khugepaged_wait, false, |
2263 | add_wait_queue(&khugepaged_wait, &wait); | 2263 | msecs_to_jiffies(khugepaged_alloc_sleep_millisecs)); |
2264 | schedule_timeout_interruptible( | ||
2265 | msecs_to_jiffies( | ||
2266 | khugepaged_alloc_sleep_millisecs)); | ||
2267 | remove_wait_queue(&khugepaged_wait, &wait); | ||
2268 | } | 2264 | } |
2269 | 2265 | ||
2270 | #ifndef CONFIG_NUMA | 2266 | #ifndef CONFIG_NUMA |
@@ -2313,14 +2309,10 @@ static void khugepaged_loop(void) | |||
2313 | if (unlikely(kthread_should_stop())) | 2309 | if (unlikely(kthread_should_stop())) |
2314 | break; | 2310 | break; |
2315 | if (khugepaged_has_work()) { | 2311 | if (khugepaged_has_work()) { |
2316 | DEFINE_WAIT(wait); | ||
2317 | if (!khugepaged_scan_sleep_millisecs) | 2312 | if (!khugepaged_scan_sleep_millisecs) |
2318 | continue; | 2313 | continue; |
2319 | add_wait_queue(&khugepaged_wait, &wait); | 2314 | wait_event_freezable_timeout(khugepaged_wait, false, |
2320 | schedule_timeout_interruptible( | 2315 | msecs_to_jiffies(khugepaged_scan_sleep_millisecs)); |
2321 | msecs_to_jiffies( | ||
2322 | khugepaged_scan_sleep_millisecs)); | ||
2323 | remove_wait_queue(&khugepaged_wait, &wait); | ||
2324 | } else if (khugepaged_enabled()) | 2316 | } else if (khugepaged_enabled()) |
2325 | wait_event_freezable(khugepaged_wait, | 2317 | wait_event_freezable(khugepaged_wait, |
2326 | khugepaged_wait_event()); | 2318 | khugepaged_wait_event()); |
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index dae27ba3be2c..73f17c0293c0 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -576,6 +576,7 @@ static void prep_compound_gigantic_page(struct page *page, unsigned long order) | |||
576 | __SetPageHead(page); | 576 | __SetPageHead(page); |
577 | for (i = 1; i < nr_pages; i++, p = mem_map_next(p, page, i)) { | 577 | for (i = 1; i < nr_pages; i++, p = mem_map_next(p, page, i)) { |
578 | __SetPageTail(p); | 578 | __SetPageTail(p); |
579 | set_page_count(p, 0); | ||
579 | p->first_page = page; | 580 | p->first_page = page; |
580 | } | 581 | } |
581 | } | 582 | } |
@@ -2422,6 +2423,8 @@ retry_avoidcopy: | |||
2422 | * anon_vma prepared. | 2423 | * anon_vma prepared. |
2423 | */ | 2424 | */ |
2424 | if (unlikely(anon_vma_prepare(vma))) { | 2425 | if (unlikely(anon_vma_prepare(vma))) { |
2426 | page_cache_release(new_page); | ||
2427 | page_cache_release(old_page); | ||
2425 | /* Caller expects lock to be held */ | 2428 | /* Caller expects lock to be held */ |
2426 | spin_lock(&mm->page_table_lock); | 2429 | spin_lock(&mm->page_table_lock); |
2427 | return VM_FAULT_OOM; | 2430 | return VM_FAULT_OOM; |
diff --git a/mm/migrate.c b/mm/migrate.c index 578e29174fa6..177aca424a06 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
@@ -871,9 +871,9 @@ static int unmap_and_move_huge_page(new_page_t get_new_page, | |||
871 | 871 | ||
872 | if (anon_vma) | 872 | if (anon_vma) |
873 | put_anon_vma(anon_vma); | 873 | put_anon_vma(anon_vma); |
874 | out: | ||
875 | unlock_page(hpage); | 874 | unlock_page(hpage); |
876 | 875 | ||
876 | out: | ||
877 | if (rc != -EAGAIN) { | 877 | if (rc != -EAGAIN) { |
878 | list_del(&hpage->lru); | 878 | list_del(&hpage->lru); |
879 | put_page(hpage); | 879 | put_page(hpage); |
diff --git a/mm/nommu.c b/mm/nommu.c index 73419c55eda6..b982290fd962 100644 --- a/mm/nommu.c +++ b/mm/nommu.c | |||
@@ -454,7 +454,7 @@ void __attribute__((weak)) vmalloc_sync_all(void) | |||
454 | * between processes, it syncs the pagetable across all | 454 | * between processes, it syncs the pagetable across all |
455 | * processes. | 455 | * processes. |
456 | */ | 456 | */ |
457 | struct vm_struct *alloc_vm_area(size_t size) | 457 | struct vm_struct *alloc_vm_area(size_t size, pte_t **ptes) |
458 | { | 458 | { |
459 | BUG(); | 459 | BUG(); |
460 | return NULL; | 460 | return NULL; |
diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 471dedb463ab..76f2c5ae908e 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c | |||
@@ -185,6 +185,11 @@ unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *mem, | |||
185 | if (!p) | 185 | if (!p) |
186 | return 0; | 186 | return 0; |
187 | 187 | ||
188 | if (p->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) { | ||
189 | task_unlock(p); | ||
190 | return 0; | ||
191 | } | ||
192 | |||
188 | /* | 193 | /* |
189 | * The memory controller may have a limit of 0 bytes, so avoid a divide | 194 | * The memory controller may have a limit of 0 bytes, so avoid a divide |
190 | * by zero, if necessary. | 195 | * by zero, if necessary. |
diff --git a/mm/page-writeback.c b/mm/page-writeback.c index a3278f005230..71252486bc6f 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c | |||
@@ -128,7 +128,6 @@ unsigned long global_dirty_limit; | |||
128 | * | 128 | * |
129 | */ | 129 | */ |
130 | static struct prop_descriptor vm_completions; | 130 | static struct prop_descriptor vm_completions; |
131 | static struct prop_descriptor vm_dirties; | ||
132 | 131 | ||
133 | /* | 132 | /* |
134 | * couple the period to the dirty_ratio: | 133 | * couple the period to the dirty_ratio: |
@@ -154,7 +153,6 @@ static void update_completion_period(void) | |||
154 | { | 153 | { |
155 | int shift = calc_period_shift(); | 154 | int shift = calc_period_shift(); |
156 | prop_change_shift(&vm_completions, shift); | 155 | prop_change_shift(&vm_completions, shift); |
157 | prop_change_shift(&vm_dirties, shift); | ||
158 | 156 | ||
159 | writeback_set_ratelimit(); | 157 | writeback_set_ratelimit(); |
160 | } | 158 | } |
@@ -235,11 +233,6 @@ void bdi_writeout_inc(struct backing_dev_info *bdi) | |||
235 | } | 233 | } |
236 | EXPORT_SYMBOL_GPL(bdi_writeout_inc); | 234 | EXPORT_SYMBOL_GPL(bdi_writeout_inc); |
237 | 235 | ||
238 | void task_dirty_inc(struct task_struct *tsk) | ||
239 | { | ||
240 | prop_inc_single(&vm_dirties, &tsk->dirties); | ||
241 | } | ||
242 | |||
243 | /* | 236 | /* |
244 | * Obtain an accurate fraction of the BDI's portion. | 237 | * Obtain an accurate fraction of the BDI's portion. |
245 | */ | 238 | */ |
@@ -1133,17 +1126,17 @@ pause: | |||
1133 | pages_dirtied, | 1126 | pages_dirtied, |
1134 | pause, | 1127 | pause, |
1135 | start_time); | 1128 | start_time); |
1136 | __set_current_state(TASK_UNINTERRUPTIBLE); | 1129 | __set_current_state(TASK_KILLABLE); |
1137 | io_schedule_timeout(pause); | 1130 | io_schedule_timeout(pause); |
1138 | 1131 | ||
1139 | dirty_thresh = hard_dirty_limit(dirty_thresh); | ||
1140 | /* | 1132 | /* |
1141 | * max-pause area. If dirty exceeded but still within this | 1133 | * This is typically equal to (nr_dirty < dirty_thresh) and can |
1142 | * area, no need to sleep for more than 200ms: (a) 8 pages per | 1134 | * also keep "1000+ dd on a slow USB stick" under control. |
1143 | * 200ms is typically more than enough to curb heavy dirtiers; | ||
1144 | * (b) the pause time limit makes the dirtiers more responsive. | ||
1145 | */ | 1135 | */ |
1146 | if (nr_dirty < dirty_thresh) | 1136 | if (task_ratelimit) |
1137 | break; | ||
1138 | |||
1139 | if (fatal_signal_pending(current)) | ||
1147 | break; | 1140 | break; |
1148 | } | 1141 | } |
1149 | 1142 | ||
@@ -1395,7 +1388,6 @@ void __init page_writeback_init(void) | |||
1395 | 1388 | ||
1396 | shift = calc_period_shift(); | 1389 | shift = calc_period_shift(); |
1397 | prop_descriptor_init(&vm_completions, shift); | 1390 | prop_descriptor_init(&vm_completions, shift); |
1398 | prop_descriptor_init(&vm_dirties, shift); | ||
1399 | } | 1391 | } |
1400 | 1392 | ||
1401 | /** | 1393 | /** |
@@ -1724,7 +1716,6 @@ void account_page_dirtied(struct page *page, struct address_space *mapping) | |||
1724 | __inc_zone_page_state(page, NR_DIRTIED); | 1716 | __inc_zone_page_state(page, NR_DIRTIED); |
1725 | __inc_bdi_stat(mapping->backing_dev_info, BDI_RECLAIMABLE); | 1717 | __inc_bdi_stat(mapping->backing_dev_info, BDI_RECLAIMABLE); |
1726 | __inc_bdi_stat(mapping->backing_dev_info, BDI_DIRTIED); | 1718 | __inc_bdi_stat(mapping->backing_dev_info, BDI_DIRTIED); |
1727 | task_dirty_inc(current); | ||
1728 | task_io_account_write(PAGE_CACHE_SIZE); | 1719 | task_io_account_write(PAGE_CACHE_SIZE); |
1729 | } | 1720 | } |
1730 | } | 1721 | } |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 9dd443d89d8b..2b8ba3aebf6e 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -356,8 +356,8 @@ void prep_compound_page(struct page *page, unsigned long order) | |||
356 | __SetPageHead(page); | 356 | __SetPageHead(page); |
357 | for (i = 1; i < nr_pages; i++) { | 357 | for (i = 1; i < nr_pages; i++) { |
358 | struct page *p = page + i; | 358 | struct page *p = page + i; |
359 | |||
360 | __SetPageTail(p); | 359 | __SetPageTail(p); |
360 | set_page_count(p, 0); | ||
361 | p->first_page = page; | 361 | p->first_page = page; |
362 | } | 362 | } |
363 | } | 363 | } |
@@ -3377,9 +3377,15 @@ static void setup_zone_migrate_reserve(struct zone *zone) | |||
3377 | unsigned long block_migratetype; | 3377 | unsigned long block_migratetype; |
3378 | int reserve; | 3378 | int reserve; |
3379 | 3379 | ||
3380 | /* Get the start pfn, end pfn and the number of blocks to reserve */ | 3380 | /* |
3381 | * Get the start pfn, end pfn and the number of blocks to reserve | ||
3382 | * We have to be careful to be aligned to pageblock_nr_pages to | ||
3383 | * make sure that we always check pfn_valid for the first page in | ||
3384 | * the block. | ||
3385 | */ | ||
3381 | start_pfn = zone->zone_start_pfn; | 3386 | start_pfn = zone->zone_start_pfn; |
3382 | end_pfn = start_pfn + zone->spanned_pages; | 3387 | end_pfn = start_pfn + zone->spanned_pages; |
3388 | start_pfn = roundup(start_pfn, pageblock_nr_pages); | ||
3383 | reserve = roundup(min_wmark_pages(zone), pageblock_nr_pages) >> | 3389 | reserve = roundup(min_wmark_pages(zone), pageblock_nr_pages) >> |
3384 | pageblock_order; | 3390 | pageblock_order; |
3385 | 3391 | ||
diff --git a/mm/percpu-vm.c b/mm/percpu-vm.c index ea534960a04b..12a48a88c0d8 100644 --- a/mm/percpu-vm.c +++ b/mm/percpu-vm.c | |||
@@ -50,14 +50,13 @@ static struct page **pcpu_get_pages_and_bitmap(struct pcpu_chunk *chunk, | |||
50 | 50 | ||
51 | if (!pages || !bitmap) { | 51 | if (!pages || !bitmap) { |
52 | if (may_alloc && !pages) | 52 | if (may_alloc && !pages) |
53 | pages = pcpu_mem_alloc(pages_size); | 53 | pages = pcpu_mem_zalloc(pages_size); |
54 | if (may_alloc && !bitmap) | 54 | if (may_alloc && !bitmap) |
55 | bitmap = pcpu_mem_alloc(bitmap_size); | 55 | bitmap = pcpu_mem_zalloc(bitmap_size); |
56 | if (!pages || !bitmap) | 56 | if (!pages || !bitmap) |
57 | return NULL; | 57 | return NULL; |
58 | } | 58 | } |
59 | 59 | ||
60 | memset(pages, 0, pages_size); | ||
61 | bitmap_copy(bitmap, chunk->populated, pcpu_unit_pages); | 60 | bitmap_copy(bitmap, chunk->populated, pcpu_unit_pages); |
62 | 61 | ||
63 | *bitmapp = bitmap; | 62 | *bitmapp = bitmap; |
@@ -143,8 +142,8 @@ static void pcpu_pre_unmap_flush(struct pcpu_chunk *chunk, | |||
143 | int page_start, int page_end) | 142 | int page_start, int page_end) |
144 | { | 143 | { |
145 | flush_cache_vunmap( | 144 | flush_cache_vunmap( |
146 | pcpu_chunk_addr(chunk, pcpu_first_unit_cpu, page_start), | 145 | pcpu_chunk_addr(chunk, pcpu_low_unit_cpu, page_start), |
147 | pcpu_chunk_addr(chunk, pcpu_last_unit_cpu, page_end)); | 146 | pcpu_chunk_addr(chunk, pcpu_high_unit_cpu, page_end)); |
148 | } | 147 | } |
149 | 148 | ||
150 | static void __pcpu_unmap_pages(unsigned long addr, int nr_pages) | 149 | static void __pcpu_unmap_pages(unsigned long addr, int nr_pages) |
@@ -206,8 +205,8 @@ static void pcpu_post_unmap_tlb_flush(struct pcpu_chunk *chunk, | |||
206 | int page_start, int page_end) | 205 | int page_start, int page_end) |
207 | { | 206 | { |
208 | flush_tlb_kernel_range( | 207 | flush_tlb_kernel_range( |
209 | pcpu_chunk_addr(chunk, pcpu_first_unit_cpu, page_start), | 208 | pcpu_chunk_addr(chunk, pcpu_low_unit_cpu, page_start), |
210 | pcpu_chunk_addr(chunk, pcpu_last_unit_cpu, page_end)); | 209 | pcpu_chunk_addr(chunk, pcpu_high_unit_cpu, page_end)); |
211 | } | 210 | } |
212 | 211 | ||
213 | static int __pcpu_map_pages(unsigned long addr, struct page **pages, | 212 | static int __pcpu_map_pages(unsigned long addr, struct page **pages, |
@@ -284,8 +283,8 @@ static void pcpu_post_map_flush(struct pcpu_chunk *chunk, | |||
284 | int page_start, int page_end) | 283 | int page_start, int page_end) |
285 | { | 284 | { |
286 | flush_cache_vmap( | 285 | flush_cache_vmap( |
287 | pcpu_chunk_addr(chunk, pcpu_first_unit_cpu, page_start), | 286 | pcpu_chunk_addr(chunk, pcpu_low_unit_cpu, page_start), |
288 | pcpu_chunk_addr(chunk, pcpu_last_unit_cpu, page_end)); | 287 | pcpu_chunk_addr(chunk, pcpu_high_unit_cpu, page_end)); |
289 | } | 288 | } |
290 | 289 | ||
291 | /** | 290 | /** |
diff --git a/mm/percpu.c b/mm/percpu.c index bf80e55dbed7..3bb810a72006 100644 --- a/mm/percpu.c +++ b/mm/percpu.c | |||
@@ -116,9 +116,9 @@ static int pcpu_atom_size __read_mostly; | |||
116 | static int pcpu_nr_slots __read_mostly; | 116 | static int pcpu_nr_slots __read_mostly; |
117 | static size_t pcpu_chunk_struct_size __read_mostly; | 117 | static size_t pcpu_chunk_struct_size __read_mostly; |
118 | 118 | ||
119 | /* cpus with the lowest and highest unit numbers */ | 119 | /* cpus with the lowest and highest unit addresses */ |
120 | static unsigned int pcpu_first_unit_cpu __read_mostly; | 120 | static unsigned int pcpu_low_unit_cpu __read_mostly; |
121 | static unsigned int pcpu_last_unit_cpu __read_mostly; | 121 | static unsigned int pcpu_high_unit_cpu __read_mostly; |
122 | 122 | ||
123 | /* the address of the first chunk which starts with the kernel static area */ | 123 | /* the address of the first chunk which starts with the kernel static area */ |
124 | void *pcpu_base_addr __read_mostly; | 124 | void *pcpu_base_addr __read_mostly; |
@@ -273,11 +273,11 @@ static void __maybe_unused pcpu_next_pop(struct pcpu_chunk *chunk, | |||
273 | (rs) = (re) + 1, pcpu_next_pop((chunk), &(rs), &(re), (end))) | 273 | (rs) = (re) + 1, pcpu_next_pop((chunk), &(rs), &(re), (end))) |
274 | 274 | ||
275 | /** | 275 | /** |
276 | * pcpu_mem_alloc - allocate memory | 276 | * pcpu_mem_zalloc - allocate memory |
277 | * @size: bytes to allocate | 277 | * @size: bytes to allocate |
278 | * | 278 | * |
279 | * Allocate @size bytes. If @size is smaller than PAGE_SIZE, | 279 | * Allocate @size bytes. If @size is smaller than PAGE_SIZE, |
280 | * kzalloc() is used; otherwise, vmalloc() is used. The returned | 280 | * kzalloc() is used; otherwise, vzalloc() is used. The returned |
281 | * memory is always zeroed. | 281 | * memory is always zeroed. |
282 | * | 282 | * |
283 | * CONTEXT: | 283 | * CONTEXT: |
@@ -286,7 +286,7 @@ static void __maybe_unused pcpu_next_pop(struct pcpu_chunk *chunk, | |||
286 | * RETURNS: | 286 | * RETURNS: |
287 | * Pointer to the allocated area on success, NULL on failure. | 287 | * Pointer to the allocated area on success, NULL on failure. |
288 | */ | 288 | */ |
289 | static void *pcpu_mem_alloc(size_t size) | 289 | static void *pcpu_mem_zalloc(size_t size) |
290 | { | 290 | { |
291 | if (WARN_ON_ONCE(!slab_is_available())) | 291 | if (WARN_ON_ONCE(!slab_is_available())) |
292 | return NULL; | 292 | return NULL; |
@@ -302,7 +302,7 @@ static void *pcpu_mem_alloc(size_t size) | |||
302 | * @ptr: memory to free | 302 | * @ptr: memory to free |
303 | * @size: size of the area | 303 | * @size: size of the area |
304 | * | 304 | * |
305 | * Free @ptr. @ptr should have been allocated using pcpu_mem_alloc(). | 305 | * Free @ptr. @ptr should have been allocated using pcpu_mem_zalloc(). |
306 | */ | 306 | */ |
307 | static void pcpu_mem_free(void *ptr, size_t size) | 307 | static void pcpu_mem_free(void *ptr, size_t size) |
308 | { | 308 | { |
@@ -384,7 +384,7 @@ static int pcpu_extend_area_map(struct pcpu_chunk *chunk, int new_alloc) | |||
384 | size_t old_size = 0, new_size = new_alloc * sizeof(new[0]); | 384 | size_t old_size = 0, new_size = new_alloc * sizeof(new[0]); |
385 | unsigned long flags; | 385 | unsigned long flags; |
386 | 386 | ||
387 | new = pcpu_mem_alloc(new_size); | 387 | new = pcpu_mem_zalloc(new_size); |
388 | if (!new) | 388 | if (!new) |
389 | return -ENOMEM; | 389 | return -ENOMEM; |
390 | 390 | ||
@@ -604,11 +604,12 @@ static struct pcpu_chunk *pcpu_alloc_chunk(void) | |||
604 | { | 604 | { |
605 | struct pcpu_chunk *chunk; | 605 | struct pcpu_chunk *chunk; |
606 | 606 | ||
607 | chunk = pcpu_mem_alloc(pcpu_chunk_struct_size); | 607 | chunk = pcpu_mem_zalloc(pcpu_chunk_struct_size); |
608 | if (!chunk) | 608 | if (!chunk) |
609 | return NULL; | 609 | return NULL; |
610 | 610 | ||
611 | chunk->map = pcpu_mem_alloc(PCPU_DFL_MAP_ALLOC * sizeof(chunk->map[0])); | 611 | chunk->map = pcpu_mem_zalloc(PCPU_DFL_MAP_ALLOC * |
612 | sizeof(chunk->map[0])); | ||
612 | if (!chunk->map) { | 613 | if (!chunk->map) { |
613 | kfree(chunk); | 614 | kfree(chunk); |
614 | return NULL; | 615 | return NULL; |
@@ -977,6 +978,17 @@ bool is_kernel_percpu_address(unsigned long addr) | |||
977 | * address. The caller is responsible for ensuring @addr stays valid | 978 | * address. The caller is responsible for ensuring @addr stays valid |
978 | * until this function finishes. | 979 | * until this function finishes. |
979 | * | 980 | * |
981 | * percpu allocator has special setup for the first chunk, which currently | ||
982 | * supports either embedding in linear address space or vmalloc mapping, | ||
983 | * and, from the second one, the backing allocator (currently either vm or | ||
984 | * km) provides translation. | ||
985 | * | ||
986 | * The addr can be tranlated simply without checking if it falls into the | ||
987 | * first chunk. But the current code reflects better how percpu allocator | ||
988 | * actually works, and the verification can discover both bugs in percpu | ||
989 | * allocator itself and per_cpu_ptr_to_phys() callers. So we keep current | ||
990 | * code. | ||
991 | * | ||
980 | * RETURNS: | 992 | * RETURNS: |
981 | * The physical address for @addr. | 993 | * The physical address for @addr. |
982 | */ | 994 | */ |
@@ -984,19 +996,19 @@ phys_addr_t per_cpu_ptr_to_phys(void *addr) | |||
984 | { | 996 | { |
985 | void __percpu *base = __addr_to_pcpu_ptr(pcpu_base_addr); | 997 | void __percpu *base = __addr_to_pcpu_ptr(pcpu_base_addr); |
986 | bool in_first_chunk = false; | 998 | bool in_first_chunk = false; |
987 | unsigned long first_start, first_end; | 999 | unsigned long first_low, first_high; |
988 | unsigned int cpu; | 1000 | unsigned int cpu; |
989 | 1001 | ||
990 | /* | 1002 | /* |
991 | * The following test on first_start/end isn't strictly | 1003 | * The following test on unit_low/high isn't strictly |
992 | * necessary but will speed up lookups of addresses which | 1004 | * necessary but will speed up lookups of addresses which |
993 | * aren't in the first chunk. | 1005 | * aren't in the first chunk. |
994 | */ | 1006 | */ |
995 | first_start = pcpu_chunk_addr(pcpu_first_chunk, pcpu_first_unit_cpu, 0); | 1007 | first_low = pcpu_chunk_addr(pcpu_first_chunk, pcpu_low_unit_cpu, 0); |
996 | first_end = pcpu_chunk_addr(pcpu_first_chunk, pcpu_last_unit_cpu, | 1008 | first_high = pcpu_chunk_addr(pcpu_first_chunk, pcpu_high_unit_cpu, |
997 | pcpu_unit_pages); | 1009 | pcpu_unit_pages); |
998 | if ((unsigned long)addr >= first_start && | 1010 | if ((unsigned long)addr >= first_low && |
999 | (unsigned long)addr < first_end) { | 1011 | (unsigned long)addr < first_high) { |
1000 | for_each_possible_cpu(cpu) { | 1012 | for_each_possible_cpu(cpu) { |
1001 | void *start = per_cpu_ptr(base, cpu); | 1013 | void *start = per_cpu_ptr(base, cpu); |
1002 | 1014 | ||
@@ -1233,7 +1245,9 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, | |||
1233 | 1245 | ||
1234 | for (cpu = 0; cpu < nr_cpu_ids; cpu++) | 1246 | for (cpu = 0; cpu < nr_cpu_ids; cpu++) |
1235 | unit_map[cpu] = UINT_MAX; | 1247 | unit_map[cpu] = UINT_MAX; |
1236 | pcpu_first_unit_cpu = NR_CPUS; | 1248 | |
1249 | pcpu_low_unit_cpu = NR_CPUS; | ||
1250 | pcpu_high_unit_cpu = NR_CPUS; | ||
1237 | 1251 | ||
1238 | for (group = 0, unit = 0; group < ai->nr_groups; group++, unit += i) { | 1252 | for (group = 0, unit = 0; group < ai->nr_groups; group++, unit += i) { |
1239 | const struct pcpu_group_info *gi = &ai->groups[group]; | 1253 | const struct pcpu_group_info *gi = &ai->groups[group]; |
@@ -1253,9 +1267,13 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, | |||
1253 | unit_map[cpu] = unit + i; | 1267 | unit_map[cpu] = unit + i; |
1254 | unit_off[cpu] = gi->base_offset + i * ai->unit_size; | 1268 | unit_off[cpu] = gi->base_offset + i * ai->unit_size; |
1255 | 1269 | ||
1256 | if (pcpu_first_unit_cpu == NR_CPUS) | 1270 | /* determine low/high unit_cpu */ |
1257 | pcpu_first_unit_cpu = cpu; | 1271 | if (pcpu_low_unit_cpu == NR_CPUS || |
1258 | pcpu_last_unit_cpu = cpu; | 1272 | unit_off[cpu] < unit_off[pcpu_low_unit_cpu]) |
1273 | pcpu_low_unit_cpu = cpu; | ||
1274 | if (pcpu_high_unit_cpu == NR_CPUS || | ||
1275 | unit_off[cpu] > unit_off[pcpu_high_unit_cpu]) | ||
1276 | pcpu_high_unit_cpu = cpu; | ||
1259 | } | 1277 | } |
1260 | } | 1278 | } |
1261 | pcpu_nr_units = unit; | 1279 | pcpu_nr_units = unit; |
@@ -1889,7 +1907,7 @@ void __init percpu_init_late(void) | |||
1889 | 1907 | ||
1890 | BUILD_BUG_ON(size > PAGE_SIZE); | 1908 | BUILD_BUG_ON(size > PAGE_SIZE); |
1891 | 1909 | ||
1892 | map = pcpu_mem_alloc(size); | 1910 | map = pcpu_mem_zalloc(size); |
1893 | BUG_ON(!map); | 1911 | BUG_ON(!map); |
1894 | 1912 | ||
1895 | spin_lock_irqsave(&pcpu_lock, flags); | 1913 | spin_lock_irqsave(&pcpu_lock, flags); |
@@ -597,6 +597,7 @@ static enum { | |||
597 | PARTIAL_AC, | 597 | PARTIAL_AC, |
598 | PARTIAL_L3, | 598 | PARTIAL_L3, |
599 | EARLY, | 599 | EARLY, |
600 | LATE, | ||
600 | FULL | 601 | FULL |
601 | } g_cpucache_up; | 602 | } g_cpucache_up; |
602 | 603 | ||
@@ -673,7 +674,7 @@ static void init_node_lock_keys(int q) | |||
673 | { | 674 | { |
674 | struct cache_sizes *s = malloc_sizes; | 675 | struct cache_sizes *s = malloc_sizes; |
675 | 676 | ||
676 | if (g_cpucache_up != FULL) | 677 | if (g_cpucache_up < LATE) |
677 | return; | 678 | return; |
678 | 679 | ||
679 | for (s = malloc_sizes; s->cs_size != ULONG_MAX; s++) { | 680 | for (s = malloc_sizes; s->cs_size != ULONG_MAX; s++) { |
@@ -1680,6 +1681,8 @@ void __init kmem_cache_init_late(void) | |||
1680 | { | 1681 | { |
1681 | struct kmem_cache *cachep; | 1682 | struct kmem_cache *cachep; |
1682 | 1683 | ||
1684 | g_cpucache_up = LATE; | ||
1685 | |||
1683 | /* Annotate slab for lockdep -- annotate the malloc caches */ | 1686 | /* Annotate slab for lockdep -- annotate the malloc caches */ |
1684 | init_lock_keys(); | 1687 | init_lock_keys(); |
1685 | 1688 | ||
@@ -3043,7 +3043,9 @@ static int kmem_cache_open(struct kmem_cache *s, | |||
3043 | * per node list when we run out of per cpu objects. We only fetch 50% | 3043 | * per node list when we run out of per cpu objects. We only fetch 50% |
3044 | * to keep some capacity around for frees. | 3044 | * to keep some capacity around for frees. |
3045 | */ | 3045 | */ |
3046 | if (s->size >= PAGE_SIZE) | 3046 | if (kmem_cache_debug(s)) |
3047 | s->cpu_partial = 0; | ||
3048 | else if (s->size >= PAGE_SIZE) | ||
3047 | s->cpu_partial = 2; | 3049 | s->cpu_partial = 2; |
3048 | else if (s->size >= 1024) | 3050 | else if (s->size >= 1024) |
3049 | s->cpu_partial = 6; | 3051 | s->cpu_partial = 6; |
diff --git a/mm/vmalloc.c b/mm/vmalloc.c index b669aa6f6caf..1d8b32f07139 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c | |||
@@ -1633,6 +1633,8 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align, | |||
1633 | goto fail; | 1633 | goto fail; |
1634 | 1634 | ||
1635 | addr = __vmalloc_area_node(area, gfp_mask, prot, node, caller); | 1635 | addr = __vmalloc_area_node(area, gfp_mask, prot, node, caller); |
1636 | if (!addr) | ||
1637 | return NULL; | ||
1636 | 1638 | ||
1637 | /* | 1639 | /* |
1638 | * In this function, newly allocated vm_struct is not added | 1640 | * In this function, newly allocated vm_struct is not added |
@@ -2141,23 +2143,30 @@ void __attribute__((weak)) vmalloc_sync_all(void) | |||
2141 | 2143 | ||
2142 | static int f(pte_t *pte, pgtable_t table, unsigned long addr, void *data) | 2144 | static int f(pte_t *pte, pgtable_t table, unsigned long addr, void *data) |
2143 | { | 2145 | { |
2144 | /* apply_to_page_range() does all the hard work. */ | 2146 | pte_t ***p = data; |
2147 | |||
2148 | if (p) { | ||
2149 | *(*p) = pte; | ||
2150 | (*p)++; | ||
2151 | } | ||
2145 | return 0; | 2152 | return 0; |
2146 | } | 2153 | } |
2147 | 2154 | ||
2148 | /** | 2155 | /** |
2149 | * alloc_vm_area - allocate a range of kernel address space | 2156 | * alloc_vm_area - allocate a range of kernel address space |
2150 | * @size: size of the area | 2157 | * @size: size of the area |
2158 | * @ptes: returns the PTEs for the address space | ||
2151 | * | 2159 | * |
2152 | * Returns: NULL on failure, vm_struct on success | 2160 | * Returns: NULL on failure, vm_struct on success |
2153 | * | 2161 | * |
2154 | * This function reserves a range of kernel address space, and | 2162 | * This function reserves a range of kernel address space, and |
2155 | * allocates pagetables to map that range. No actual mappings | 2163 | * allocates pagetables to map that range. No actual mappings |
2156 | * are created. If the kernel address space is not shared | 2164 | * are created. |
2157 | * between processes, it syncs the pagetable across all | 2165 | * |
2158 | * processes. | 2166 | * If @ptes is non-NULL, pointers to the PTEs (in init_mm) |
2167 | * allocated for the VM area are returned. | ||
2159 | */ | 2168 | */ |
2160 | struct vm_struct *alloc_vm_area(size_t size) | 2169 | struct vm_struct *alloc_vm_area(size_t size, pte_t **ptes) |
2161 | { | 2170 | { |
2162 | struct vm_struct *area; | 2171 | struct vm_struct *area; |
2163 | 2172 | ||
@@ -2171,19 +2180,11 @@ struct vm_struct *alloc_vm_area(size_t size) | |||
2171 | * of kernel virtual address space and mapped into init_mm. | 2180 | * of kernel virtual address space and mapped into init_mm. |
2172 | */ | 2181 | */ |
2173 | if (apply_to_page_range(&init_mm, (unsigned long)area->addr, | 2182 | if (apply_to_page_range(&init_mm, (unsigned long)area->addr, |
2174 | area->size, f, NULL)) { | 2183 | size, f, ptes ? &ptes : NULL)) { |
2175 | free_vm_area(area); | 2184 | free_vm_area(area); |
2176 | return NULL; | 2185 | return NULL; |
2177 | } | 2186 | } |
2178 | 2187 | ||
2179 | /* | ||
2180 | * If the allocated address space is passed to a hypercall | ||
2181 | * before being used then we cannot rely on a page fault to | ||
2182 | * trigger an update of the page tables. So sync all the page | ||
2183 | * tables here. | ||
2184 | */ | ||
2185 | vmalloc_sync_all(); | ||
2186 | |||
2187 | return area; | 2188 | return area; |
2188 | } | 2189 | } |
2189 | EXPORT_SYMBOL_GPL(alloc_vm_area); | 2190 | EXPORT_SYMBOL_GPL(alloc_vm_area); |
diff --git a/mm/vmscan.c b/mm/vmscan.c index a1893c050795..f54a05b7a61d 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -183,7 +183,7 @@ static unsigned long zone_nr_lru_pages(struct zone *zone, | |||
183 | */ | 183 | */ |
184 | void register_shrinker(struct shrinker *shrinker) | 184 | void register_shrinker(struct shrinker *shrinker) |
185 | { | 185 | { |
186 | shrinker->nr = 0; | 186 | atomic_long_set(&shrinker->nr_in_batch, 0); |
187 | down_write(&shrinker_rwsem); | 187 | down_write(&shrinker_rwsem); |
188 | list_add_tail(&shrinker->list, &shrinker_list); | 188 | list_add_tail(&shrinker->list, &shrinker_list); |
189 | up_write(&shrinker_rwsem); | 189 | up_write(&shrinker_rwsem); |
@@ -247,25 +247,26 @@ unsigned long shrink_slab(struct shrink_control *shrink, | |||
247 | 247 | ||
248 | list_for_each_entry(shrinker, &shrinker_list, list) { | 248 | list_for_each_entry(shrinker, &shrinker_list, list) { |
249 | unsigned long long delta; | 249 | unsigned long long delta; |
250 | unsigned long total_scan; | 250 | long total_scan; |
251 | unsigned long max_pass; | 251 | long max_pass; |
252 | int shrink_ret = 0; | 252 | int shrink_ret = 0; |
253 | long nr; | 253 | long nr; |
254 | long new_nr; | 254 | long new_nr; |
255 | long batch_size = shrinker->batch ? shrinker->batch | 255 | long batch_size = shrinker->batch ? shrinker->batch |
256 | : SHRINK_BATCH; | 256 | : SHRINK_BATCH; |
257 | 257 | ||
258 | max_pass = do_shrinker_shrink(shrinker, shrink, 0); | ||
259 | if (max_pass <= 0) | ||
260 | continue; | ||
261 | |||
258 | /* | 262 | /* |
259 | * copy the current shrinker scan count into a local variable | 263 | * copy the current shrinker scan count into a local variable |
260 | * and zero it so that other concurrent shrinker invocations | 264 | * and zero it so that other concurrent shrinker invocations |
261 | * don't also do this scanning work. | 265 | * don't also do this scanning work. |
262 | */ | 266 | */ |
263 | do { | 267 | nr = atomic_long_xchg(&shrinker->nr_in_batch, 0); |
264 | nr = shrinker->nr; | ||
265 | } while (cmpxchg(&shrinker->nr, nr, 0) != nr); | ||
266 | 268 | ||
267 | total_scan = nr; | 269 | total_scan = nr; |
268 | max_pass = do_shrinker_shrink(shrinker, shrink, 0); | ||
269 | delta = (4 * nr_pages_scanned) / shrinker->seeks; | 270 | delta = (4 * nr_pages_scanned) / shrinker->seeks; |
270 | delta *= max_pass; | 271 | delta *= max_pass; |
271 | do_div(delta, lru_pages + 1); | 272 | do_div(delta, lru_pages + 1); |
@@ -325,12 +326,11 @@ unsigned long shrink_slab(struct shrink_control *shrink, | |||
325 | * manner that handles concurrent updates. If we exhausted the | 326 | * manner that handles concurrent updates. If we exhausted the |
326 | * scan, there is no need to do an update. | 327 | * scan, there is no need to do an update. |
327 | */ | 328 | */ |
328 | do { | 329 | if (total_scan > 0) |
329 | nr = shrinker->nr; | 330 | new_nr = atomic_long_add_return(total_scan, |
330 | new_nr = total_scan + nr; | 331 | &shrinker->nr_in_batch); |
331 | if (total_scan <= 0) | 332 | else |
332 | break; | 333 | new_nr = atomic_long_read(&shrinker->nr_in_batch); |
333 | } while (cmpxchg(&shrinker->nr, nr, new_nr) != nr); | ||
334 | 334 | ||
335 | trace_mm_shrink_slab_end(shrinker, shrink_ret, nr, new_nr); | 335 | trace_mm_shrink_slab_end(shrinker, shrink_ret, nr, new_nr); |
336 | } | 336 | } |