diff options
Diffstat (limited to 'mm')
| -rw-r--r-- | mm/backing-dev.c | 8 | ||||
| -rw-r--r-- | mm/huge_memory.c | 16 | ||||
| -rw-r--r-- | mm/hugetlb.c | 3 | ||||
| -rw-r--r-- | mm/migrate.c | 2 | ||||
| -rw-r--r-- | mm/nommu.c | 2 | ||||
| -rw-r--r-- | mm/oom_kill.c | 5 | ||||
| -rw-r--r-- | mm/page-writeback.c | 23 | ||||
| -rw-r--r-- | mm/page_alloc.c | 10 | ||||
| -rw-r--r-- | mm/percpu-vm.c | 17 | ||||
| -rw-r--r-- | mm/percpu.c | 62 | ||||
| -rw-r--r-- | mm/slab.c | 5 | ||||
| -rw-r--r-- | mm/slub.c | 42 | ||||
| -rw-r--r-- | mm/vmalloc.c | 29 | ||||
| -rw-r--r-- | mm/vmscan.c | 26 |
14 files changed, 143 insertions, 107 deletions
diff --git a/mm/backing-dev.c b/mm/backing-dev.c index a0860640378d..71034f41a2ba 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c | |||
| @@ -724,6 +724,14 @@ void bdi_destroy(struct backing_dev_info *bdi) | |||
| 724 | 724 | ||
| 725 | bdi_unregister(bdi); | 725 | bdi_unregister(bdi); |
| 726 | 726 | ||
| 727 | /* | ||
| 728 | * If bdi_unregister() had already been called earlier, the | ||
| 729 | * wakeup_timer could still be armed because bdi_prune_sb() | ||
| 730 | * can race with the bdi_wakeup_thread_delayed() calls from | ||
| 731 | * __mark_inode_dirty(). | ||
| 732 | */ | ||
| 733 | del_timer_sync(&bdi->wb.wakeup_timer); | ||
| 734 | |||
| 727 | for (i = 0; i < NR_BDI_STAT_ITEMS; i++) | 735 | for (i = 0; i < NR_BDI_STAT_ITEMS; i++) |
| 728 | percpu_counter_destroy(&bdi->bdi_stat[i]); | 736 | percpu_counter_destroy(&bdi->bdi_stat[i]); |
| 729 | 737 | ||
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 4298abaae153..36b3d988b4ef 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
| @@ -2259,12 +2259,8 @@ static void khugepaged_do_scan(struct page **hpage) | |||
| 2259 | 2259 | ||
| 2260 | static void khugepaged_alloc_sleep(void) | 2260 | static void khugepaged_alloc_sleep(void) |
| 2261 | { | 2261 | { |
| 2262 | DEFINE_WAIT(wait); | 2262 | wait_event_freezable_timeout(khugepaged_wait, false, |
| 2263 | add_wait_queue(&khugepaged_wait, &wait); | 2263 | msecs_to_jiffies(khugepaged_alloc_sleep_millisecs)); |
| 2264 | schedule_timeout_interruptible( | ||
| 2265 | msecs_to_jiffies( | ||
| 2266 | khugepaged_alloc_sleep_millisecs)); | ||
| 2267 | remove_wait_queue(&khugepaged_wait, &wait); | ||
| 2268 | } | 2264 | } |
| 2269 | 2265 | ||
| 2270 | #ifndef CONFIG_NUMA | 2266 | #ifndef CONFIG_NUMA |
| @@ -2313,14 +2309,10 @@ static void khugepaged_loop(void) | |||
| 2313 | if (unlikely(kthread_should_stop())) | 2309 | if (unlikely(kthread_should_stop())) |
| 2314 | break; | 2310 | break; |
| 2315 | if (khugepaged_has_work()) { | 2311 | if (khugepaged_has_work()) { |
| 2316 | DEFINE_WAIT(wait); | ||
| 2317 | if (!khugepaged_scan_sleep_millisecs) | 2312 | if (!khugepaged_scan_sleep_millisecs) |
| 2318 | continue; | 2313 | continue; |
| 2319 | add_wait_queue(&khugepaged_wait, &wait); | 2314 | wait_event_freezable_timeout(khugepaged_wait, false, |
| 2320 | schedule_timeout_interruptible( | 2315 | msecs_to_jiffies(khugepaged_scan_sleep_millisecs)); |
| 2321 | msecs_to_jiffies( | ||
| 2322 | khugepaged_scan_sleep_millisecs)); | ||
| 2323 | remove_wait_queue(&khugepaged_wait, &wait); | ||
| 2324 | } else if (khugepaged_enabled()) | 2316 | } else if (khugepaged_enabled()) |
| 2325 | wait_event_freezable(khugepaged_wait, | 2317 | wait_event_freezable(khugepaged_wait, |
| 2326 | khugepaged_wait_event()); | 2318 | khugepaged_wait_event()); |
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index dae27ba3be2c..73f17c0293c0 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
| @@ -576,6 +576,7 @@ static void prep_compound_gigantic_page(struct page *page, unsigned long order) | |||
| 576 | __SetPageHead(page); | 576 | __SetPageHead(page); |
| 577 | for (i = 1; i < nr_pages; i++, p = mem_map_next(p, page, i)) { | 577 | for (i = 1; i < nr_pages; i++, p = mem_map_next(p, page, i)) { |
| 578 | __SetPageTail(p); | 578 | __SetPageTail(p); |
| 579 | set_page_count(p, 0); | ||
| 579 | p->first_page = page; | 580 | p->first_page = page; |
| 580 | } | 581 | } |
| 581 | } | 582 | } |
| @@ -2422,6 +2423,8 @@ retry_avoidcopy: | |||
| 2422 | * anon_vma prepared. | 2423 | * anon_vma prepared. |
| 2423 | */ | 2424 | */ |
| 2424 | if (unlikely(anon_vma_prepare(vma))) { | 2425 | if (unlikely(anon_vma_prepare(vma))) { |
| 2426 | page_cache_release(new_page); | ||
| 2427 | page_cache_release(old_page); | ||
| 2425 | /* Caller expects lock to be held */ | 2428 | /* Caller expects lock to be held */ |
| 2426 | spin_lock(&mm->page_table_lock); | 2429 | spin_lock(&mm->page_table_lock); |
| 2427 | return VM_FAULT_OOM; | 2430 | return VM_FAULT_OOM; |
diff --git a/mm/migrate.c b/mm/migrate.c index 578e29174fa6..177aca424a06 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
| @@ -871,9 +871,9 @@ static int unmap_and_move_huge_page(new_page_t get_new_page, | |||
| 871 | 871 | ||
| 872 | if (anon_vma) | 872 | if (anon_vma) |
| 873 | put_anon_vma(anon_vma); | 873 | put_anon_vma(anon_vma); |
| 874 | out: | ||
| 875 | unlock_page(hpage); | 874 | unlock_page(hpage); |
| 876 | 875 | ||
| 876 | out: | ||
| 877 | if (rc != -EAGAIN) { | 877 | if (rc != -EAGAIN) { |
| 878 | list_del(&hpage->lru); | 878 | list_del(&hpage->lru); |
| 879 | put_page(hpage); | 879 | put_page(hpage); |
diff --git a/mm/nommu.c b/mm/nommu.c index 73419c55eda6..b982290fd962 100644 --- a/mm/nommu.c +++ b/mm/nommu.c | |||
| @@ -454,7 +454,7 @@ void __attribute__((weak)) vmalloc_sync_all(void) | |||
| 454 | * between processes, it syncs the pagetable across all | 454 | * between processes, it syncs the pagetable across all |
| 455 | * processes. | 455 | * processes. |
| 456 | */ | 456 | */ |
| 457 | struct vm_struct *alloc_vm_area(size_t size) | 457 | struct vm_struct *alloc_vm_area(size_t size, pte_t **ptes) |
| 458 | { | 458 | { |
| 459 | BUG(); | 459 | BUG(); |
| 460 | return NULL; | 460 | return NULL; |
diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 471dedb463ab..76f2c5ae908e 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c | |||
| @@ -185,6 +185,11 @@ unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *mem, | |||
| 185 | if (!p) | 185 | if (!p) |
| 186 | return 0; | 186 | return 0; |
| 187 | 187 | ||
| 188 | if (p->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) { | ||
| 189 | task_unlock(p); | ||
| 190 | return 0; | ||
| 191 | } | ||
| 192 | |||
| 188 | /* | 193 | /* |
| 189 | * The memory controller may have a limit of 0 bytes, so avoid a divide | 194 | * The memory controller may have a limit of 0 bytes, so avoid a divide |
| 190 | * by zero, if necessary. | 195 | * by zero, if necessary. |
diff --git a/mm/page-writeback.c b/mm/page-writeback.c index a3278f005230..71252486bc6f 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c | |||
| @@ -128,7 +128,6 @@ unsigned long global_dirty_limit; | |||
| 128 | * | 128 | * |
| 129 | */ | 129 | */ |
| 130 | static struct prop_descriptor vm_completions; | 130 | static struct prop_descriptor vm_completions; |
| 131 | static struct prop_descriptor vm_dirties; | ||
| 132 | 131 | ||
| 133 | /* | 132 | /* |
| 134 | * couple the period to the dirty_ratio: | 133 | * couple the period to the dirty_ratio: |
| @@ -154,7 +153,6 @@ static void update_completion_period(void) | |||
| 154 | { | 153 | { |
| 155 | int shift = calc_period_shift(); | 154 | int shift = calc_period_shift(); |
| 156 | prop_change_shift(&vm_completions, shift); | 155 | prop_change_shift(&vm_completions, shift); |
| 157 | prop_change_shift(&vm_dirties, shift); | ||
| 158 | 156 | ||
| 159 | writeback_set_ratelimit(); | 157 | writeback_set_ratelimit(); |
| 160 | } | 158 | } |
| @@ -235,11 +233,6 @@ void bdi_writeout_inc(struct backing_dev_info *bdi) | |||
| 235 | } | 233 | } |
| 236 | EXPORT_SYMBOL_GPL(bdi_writeout_inc); | 234 | EXPORT_SYMBOL_GPL(bdi_writeout_inc); |
| 237 | 235 | ||
| 238 | void task_dirty_inc(struct task_struct *tsk) | ||
| 239 | { | ||
| 240 | prop_inc_single(&vm_dirties, &tsk->dirties); | ||
| 241 | } | ||
| 242 | |||
| 243 | /* | 236 | /* |
| 244 | * Obtain an accurate fraction of the BDI's portion. | 237 | * Obtain an accurate fraction of the BDI's portion. |
| 245 | */ | 238 | */ |
| @@ -1133,17 +1126,17 @@ pause: | |||
| 1133 | pages_dirtied, | 1126 | pages_dirtied, |
| 1134 | pause, | 1127 | pause, |
| 1135 | start_time); | 1128 | start_time); |
| 1136 | __set_current_state(TASK_UNINTERRUPTIBLE); | 1129 | __set_current_state(TASK_KILLABLE); |
| 1137 | io_schedule_timeout(pause); | 1130 | io_schedule_timeout(pause); |
| 1138 | 1131 | ||
| 1139 | dirty_thresh = hard_dirty_limit(dirty_thresh); | ||
| 1140 | /* | 1132 | /* |
| 1141 | * max-pause area. If dirty exceeded but still within this | 1133 | * This is typically equal to (nr_dirty < dirty_thresh) and can |
| 1142 | * area, no need to sleep for more than 200ms: (a) 8 pages per | 1134 | * also keep "1000+ dd on a slow USB stick" under control. |
| 1143 | * 200ms is typically more than enough to curb heavy dirtiers; | ||
| 1144 | * (b) the pause time limit makes the dirtiers more responsive. | ||
| 1145 | */ | 1135 | */ |
| 1146 | if (nr_dirty < dirty_thresh) | 1136 | if (task_ratelimit) |
| 1137 | break; | ||
| 1138 | |||
| 1139 | if (fatal_signal_pending(current)) | ||
| 1147 | break; | 1140 | break; |
| 1148 | } | 1141 | } |
| 1149 | 1142 | ||
| @@ -1395,7 +1388,6 @@ void __init page_writeback_init(void) | |||
| 1395 | 1388 | ||
| 1396 | shift = calc_period_shift(); | 1389 | shift = calc_period_shift(); |
| 1397 | prop_descriptor_init(&vm_completions, shift); | 1390 | prop_descriptor_init(&vm_completions, shift); |
| 1398 | prop_descriptor_init(&vm_dirties, shift); | ||
| 1399 | } | 1391 | } |
| 1400 | 1392 | ||
| 1401 | /** | 1393 | /** |
| @@ -1724,7 +1716,6 @@ void account_page_dirtied(struct page *page, struct address_space *mapping) | |||
| 1724 | __inc_zone_page_state(page, NR_DIRTIED); | 1716 | __inc_zone_page_state(page, NR_DIRTIED); |
| 1725 | __inc_bdi_stat(mapping->backing_dev_info, BDI_RECLAIMABLE); | 1717 | __inc_bdi_stat(mapping->backing_dev_info, BDI_RECLAIMABLE); |
| 1726 | __inc_bdi_stat(mapping->backing_dev_info, BDI_DIRTIED); | 1718 | __inc_bdi_stat(mapping->backing_dev_info, BDI_DIRTIED); |
| 1727 | task_dirty_inc(current); | ||
| 1728 | task_io_account_write(PAGE_CACHE_SIZE); | 1719 | task_io_account_write(PAGE_CACHE_SIZE); |
| 1729 | } | 1720 | } |
| 1730 | } | 1721 | } |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 9dd443d89d8b..2b8ba3aebf6e 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
| @@ -356,8 +356,8 @@ void prep_compound_page(struct page *page, unsigned long order) | |||
| 356 | __SetPageHead(page); | 356 | __SetPageHead(page); |
| 357 | for (i = 1; i < nr_pages; i++) { | 357 | for (i = 1; i < nr_pages; i++) { |
| 358 | struct page *p = page + i; | 358 | struct page *p = page + i; |
| 359 | |||
| 360 | __SetPageTail(p); | 359 | __SetPageTail(p); |
| 360 | set_page_count(p, 0); | ||
| 361 | p->first_page = page; | 361 | p->first_page = page; |
| 362 | } | 362 | } |
| 363 | } | 363 | } |
| @@ -3377,9 +3377,15 @@ static void setup_zone_migrate_reserve(struct zone *zone) | |||
| 3377 | unsigned long block_migratetype; | 3377 | unsigned long block_migratetype; |
| 3378 | int reserve; | 3378 | int reserve; |
| 3379 | 3379 | ||
| 3380 | /* Get the start pfn, end pfn and the number of blocks to reserve */ | 3380 | /* |
| 3381 | * Get the start pfn, end pfn and the number of blocks to reserve | ||
| 3382 | * We have to be careful to be aligned to pageblock_nr_pages to | ||
| 3383 | * make sure that we always check pfn_valid for the first page in | ||
| 3384 | * the block. | ||
| 3385 | */ | ||
| 3381 | start_pfn = zone->zone_start_pfn; | 3386 | start_pfn = zone->zone_start_pfn; |
| 3382 | end_pfn = start_pfn + zone->spanned_pages; | 3387 | end_pfn = start_pfn + zone->spanned_pages; |
| 3388 | start_pfn = roundup(start_pfn, pageblock_nr_pages); | ||
| 3383 | reserve = roundup(min_wmark_pages(zone), pageblock_nr_pages) >> | 3389 | reserve = roundup(min_wmark_pages(zone), pageblock_nr_pages) >> |
| 3384 | pageblock_order; | 3390 | pageblock_order; |
| 3385 | 3391 | ||
diff --git a/mm/percpu-vm.c b/mm/percpu-vm.c index ea534960a04b..12a48a88c0d8 100644 --- a/mm/percpu-vm.c +++ b/mm/percpu-vm.c | |||
| @@ -50,14 +50,13 @@ static struct page **pcpu_get_pages_and_bitmap(struct pcpu_chunk *chunk, | |||
| 50 | 50 | ||
| 51 | if (!pages || !bitmap) { | 51 | if (!pages || !bitmap) { |
| 52 | if (may_alloc && !pages) | 52 | if (may_alloc && !pages) |
| 53 | pages = pcpu_mem_alloc(pages_size); | 53 | pages = pcpu_mem_zalloc(pages_size); |
| 54 | if (may_alloc && !bitmap) | 54 | if (may_alloc && !bitmap) |
| 55 | bitmap = pcpu_mem_alloc(bitmap_size); | 55 | bitmap = pcpu_mem_zalloc(bitmap_size); |
| 56 | if (!pages || !bitmap) | 56 | if (!pages || !bitmap) |
| 57 | return NULL; | 57 | return NULL; |
| 58 | } | 58 | } |
| 59 | 59 | ||
| 60 | memset(pages, 0, pages_size); | ||
| 61 | bitmap_copy(bitmap, chunk->populated, pcpu_unit_pages); | 60 | bitmap_copy(bitmap, chunk->populated, pcpu_unit_pages); |
| 62 | 61 | ||
| 63 | *bitmapp = bitmap; | 62 | *bitmapp = bitmap; |
| @@ -143,8 +142,8 @@ static void pcpu_pre_unmap_flush(struct pcpu_chunk *chunk, | |||
| 143 | int page_start, int page_end) | 142 | int page_start, int page_end) |
| 144 | { | 143 | { |
| 145 | flush_cache_vunmap( | 144 | flush_cache_vunmap( |
| 146 | pcpu_chunk_addr(chunk, pcpu_first_unit_cpu, page_start), | 145 | pcpu_chunk_addr(chunk, pcpu_low_unit_cpu, page_start), |
| 147 | pcpu_chunk_addr(chunk, pcpu_last_unit_cpu, page_end)); | 146 | pcpu_chunk_addr(chunk, pcpu_high_unit_cpu, page_end)); |
| 148 | } | 147 | } |
| 149 | 148 | ||
| 150 | static void __pcpu_unmap_pages(unsigned long addr, int nr_pages) | 149 | static void __pcpu_unmap_pages(unsigned long addr, int nr_pages) |
| @@ -206,8 +205,8 @@ static void pcpu_post_unmap_tlb_flush(struct pcpu_chunk *chunk, | |||
| 206 | int page_start, int page_end) | 205 | int page_start, int page_end) |
| 207 | { | 206 | { |
| 208 | flush_tlb_kernel_range( | 207 | flush_tlb_kernel_range( |
| 209 | pcpu_chunk_addr(chunk, pcpu_first_unit_cpu, page_start), | 208 | pcpu_chunk_addr(chunk, pcpu_low_unit_cpu, page_start), |
| 210 | pcpu_chunk_addr(chunk, pcpu_last_unit_cpu, page_end)); | 209 | pcpu_chunk_addr(chunk, pcpu_high_unit_cpu, page_end)); |
| 211 | } | 210 | } |
| 212 | 211 | ||
| 213 | static int __pcpu_map_pages(unsigned long addr, struct page **pages, | 212 | static int __pcpu_map_pages(unsigned long addr, struct page **pages, |
| @@ -284,8 +283,8 @@ static void pcpu_post_map_flush(struct pcpu_chunk *chunk, | |||
| 284 | int page_start, int page_end) | 283 | int page_start, int page_end) |
| 285 | { | 284 | { |
| 286 | flush_cache_vmap( | 285 | flush_cache_vmap( |
| 287 | pcpu_chunk_addr(chunk, pcpu_first_unit_cpu, page_start), | 286 | pcpu_chunk_addr(chunk, pcpu_low_unit_cpu, page_start), |
| 288 | pcpu_chunk_addr(chunk, pcpu_last_unit_cpu, page_end)); | 287 | pcpu_chunk_addr(chunk, pcpu_high_unit_cpu, page_end)); |
| 289 | } | 288 | } |
| 290 | 289 | ||
| 291 | /** | 290 | /** |
diff --git a/mm/percpu.c b/mm/percpu.c index bf80e55dbed7..3bb810a72006 100644 --- a/mm/percpu.c +++ b/mm/percpu.c | |||
| @@ -116,9 +116,9 @@ static int pcpu_atom_size __read_mostly; | |||
| 116 | static int pcpu_nr_slots __read_mostly; | 116 | static int pcpu_nr_slots __read_mostly; |
| 117 | static size_t pcpu_chunk_struct_size __read_mostly; | 117 | static size_t pcpu_chunk_struct_size __read_mostly; |
| 118 | 118 | ||
| 119 | /* cpus with the lowest and highest unit numbers */ | 119 | /* cpus with the lowest and highest unit addresses */ |
| 120 | static unsigned int pcpu_first_unit_cpu __read_mostly; | 120 | static unsigned int pcpu_low_unit_cpu __read_mostly; |
| 121 | static unsigned int pcpu_last_unit_cpu __read_mostly; | 121 | static unsigned int pcpu_high_unit_cpu __read_mostly; |
| 122 | 122 | ||
| 123 | /* the address of the first chunk which starts with the kernel static area */ | 123 | /* the address of the first chunk which starts with the kernel static area */ |
| 124 | void *pcpu_base_addr __read_mostly; | 124 | void *pcpu_base_addr __read_mostly; |
| @@ -273,11 +273,11 @@ static void __maybe_unused pcpu_next_pop(struct pcpu_chunk *chunk, | |||
| 273 | (rs) = (re) + 1, pcpu_next_pop((chunk), &(rs), &(re), (end))) | 273 | (rs) = (re) + 1, pcpu_next_pop((chunk), &(rs), &(re), (end))) |
| 274 | 274 | ||
| 275 | /** | 275 | /** |
| 276 | * pcpu_mem_alloc - allocate memory | 276 | * pcpu_mem_zalloc - allocate memory |
| 277 | * @size: bytes to allocate | 277 | * @size: bytes to allocate |
| 278 | * | 278 | * |
| 279 | * Allocate @size bytes. If @size is smaller than PAGE_SIZE, | 279 | * Allocate @size bytes. If @size is smaller than PAGE_SIZE, |
| 280 | * kzalloc() is used; otherwise, vmalloc() is used. The returned | 280 | * kzalloc() is used; otherwise, vzalloc() is used. The returned |
| 281 | * memory is always zeroed. | 281 | * memory is always zeroed. |
| 282 | * | 282 | * |
| 283 | * CONTEXT: | 283 | * CONTEXT: |
| @@ -286,7 +286,7 @@ static void __maybe_unused pcpu_next_pop(struct pcpu_chunk *chunk, | |||
| 286 | * RETURNS: | 286 | * RETURNS: |
| 287 | * Pointer to the allocated area on success, NULL on failure. | 287 | * Pointer to the allocated area on success, NULL on failure. |
| 288 | */ | 288 | */ |
| 289 | static void *pcpu_mem_alloc(size_t size) | 289 | static void *pcpu_mem_zalloc(size_t size) |
| 290 | { | 290 | { |
| 291 | if (WARN_ON_ONCE(!slab_is_available())) | 291 | if (WARN_ON_ONCE(!slab_is_available())) |
| 292 | return NULL; | 292 | return NULL; |
| @@ -302,7 +302,7 @@ static void *pcpu_mem_alloc(size_t size) | |||
| 302 | * @ptr: memory to free | 302 | * @ptr: memory to free |
| 303 | * @size: size of the area | 303 | * @size: size of the area |
| 304 | * | 304 | * |
| 305 | * Free @ptr. @ptr should have been allocated using pcpu_mem_alloc(). | 305 | * Free @ptr. @ptr should have been allocated using pcpu_mem_zalloc(). |
| 306 | */ | 306 | */ |
| 307 | static void pcpu_mem_free(void *ptr, size_t size) | 307 | static void pcpu_mem_free(void *ptr, size_t size) |
| 308 | { | 308 | { |
| @@ -384,7 +384,7 @@ static int pcpu_extend_area_map(struct pcpu_chunk *chunk, int new_alloc) | |||
| 384 | size_t old_size = 0, new_size = new_alloc * sizeof(new[0]); | 384 | size_t old_size = 0, new_size = new_alloc * sizeof(new[0]); |
| 385 | unsigned long flags; | 385 | unsigned long flags; |
| 386 | 386 | ||
| 387 | new = pcpu_mem_alloc(new_size); | 387 | new = pcpu_mem_zalloc(new_size); |
| 388 | if (!new) | 388 | if (!new) |
| 389 | return -ENOMEM; | 389 | return -ENOMEM; |
| 390 | 390 | ||
| @@ -604,11 +604,12 @@ static struct pcpu_chunk *pcpu_alloc_chunk(void) | |||
| 604 | { | 604 | { |
| 605 | struct pcpu_chunk *chunk; | 605 | struct pcpu_chunk *chunk; |
| 606 | 606 | ||
| 607 | chunk = pcpu_mem_alloc(pcpu_chunk_struct_size); | 607 | chunk = pcpu_mem_zalloc(pcpu_chunk_struct_size); |
| 608 | if (!chunk) | 608 | if (!chunk) |
| 609 | return NULL; | 609 | return NULL; |
| 610 | 610 | ||
| 611 | chunk->map = pcpu_mem_alloc(PCPU_DFL_MAP_ALLOC * sizeof(chunk->map[0])); | 611 | chunk->map = pcpu_mem_zalloc(PCPU_DFL_MAP_ALLOC * |
| 612 | sizeof(chunk->map[0])); | ||
| 612 | if (!chunk->map) { | 613 | if (!chunk->map) { |
| 613 | kfree(chunk); | 614 | kfree(chunk); |
| 614 | return NULL; | 615 | return NULL; |
| @@ -977,6 +978,17 @@ bool is_kernel_percpu_address(unsigned long addr) | |||
| 977 | * address. The caller is responsible for ensuring @addr stays valid | 978 | * address. The caller is responsible for ensuring @addr stays valid |
| 978 | * until this function finishes. | 979 | * until this function finishes. |
| 979 | * | 980 | * |
| 981 | * percpu allocator has special setup for the first chunk, which currently | ||
| 982 | * supports either embedding in linear address space or vmalloc mapping, | ||
| 983 | * and, from the second one, the backing allocator (currently either vm or | ||
| 984 | * km) provides translation. | ||
| 985 | * | ||
| 986 | * The addr can be tranlated simply without checking if it falls into the | ||
| 987 | * first chunk. But the current code reflects better how percpu allocator | ||
| 988 | * actually works, and the verification can discover both bugs in percpu | ||
| 989 | * allocator itself and per_cpu_ptr_to_phys() callers. So we keep current | ||
| 990 | * code. | ||
| 991 | * | ||
| 980 | * RETURNS: | 992 | * RETURNS: |
| 981 | * The physical address for @addr. | 993 | * The physical address for @addr. |
| 982 | */ | 994 | */ |
| @@ -984,19 +996,19 @@ phys_addr_t per_cpu_ptr_to_phys(void *addr) | |||
| 984 | { | 996 | { |
| 985 | void __percpu *base = __addr_to_pcpu_ptr(pcpu_base_addr); | 997 | void __percpu *base = __addr_to_pcpu_ptr(pcpu_base_addr); |
| 986 | bool in_first_chunk = false; | 998 | bool in_first_chunk = false; |
| 987 | unsigned long first_start, first_end; | 999 | unsigned long first_low, first_high; |
| 988 | unsigned int cpu; | 1000 | unsigned int cpu; |
| 989 | 1001 | ||
| 990 | /* | 1002 | /* |
| 991 | * The following test on first_start/end isn't strictly | 1003 | * The following test on unit_low/high isn't strictly |
| 992 | * necessary but will speed up lookups of addresses which | 1004 | * necessary but will speed up lookups of addresses which |
| 993 | * aren't in the first chunk. | 1005 | * aren't in the first chunk. |
| 994 | */ | 1006 | */ |
| 995 | first_start = pcpu_chunk_addr(pcpu_first_chunk, pcpu_first_unit_cpu, 0); | 1007 | first_low = pcpu_chunk_addr(pcpu_first_chunk, pcpu_low_unit_cpu, 0); |
| 996 | first_end = pcpu_chunk_addr(pcpu_first_chunk, pcpu_last_unit_cpu, | 1008 | first_high = pcpu_chunk_addr(pcpu_first_chunk, pcpu_high_unit_cpu, |
| 997 | pcpu_unit_pages); | 1009 | pcpu_unit_pages); |
| 998 | if ((unsigned long)addr >= first_start && | 1010 | if ((unsigned long)addr >= first_low && |
| 999 | (unsigned long)addr < first_end) { | 1011 | (unsigned long)addr < first_high) { |
| 1000 | for_each_possible_cpu(cpu) { | 1012 | for_each_possible_cpu(cpu) { |
| 1001 | void *start = per_cpu_ptr(base, cpu); | 1013 | void *start = per_cpu_ptr(base, cpu); |
| 1002 | 1014 | ||
| @@ -1233,7 +1245,9 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, | |||
| 1233 | 1245 | ||
| 1234 | for (cpu = 0; cpu < nr_cpu_ids; cpu++) | 1246 | for (cpu = 0; cpu < nr_cpu_ids; cpu++) |
| 1235 | unit_map[cpu] = UINT_MAX; | 1247 | unit_map[cpu] = UINT_MAX; |
| 1236 | pcpu_first_unit_cpu = NR_CPUS; | 1248 | |
| 1249 | pcpu_low_unit_cpu = NR_CPUS; | ||
| 1250 | pcpu_high_unit_cpu = NR_CPUS; | ||
| 1237 | 1251 | ||
| 1238 | for (group = 0, unit = 0; group < ai->nr_groups; group++, unit += i) { | 1252 | for (group = 0, unit = 0; group < ai->nr_groups; group++, unit += i) { |
| 1239 | const struct pcpu_group_info *gi = &ai->groups[group]; | 1253 | const struct pcpu_group_info *gi = &ai->groups[group]; |
| @@ -1253,9 +1267,13 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, | |||
| 1253 | unit_map[cpu] = unit + i; | 1267 | unit_map[cpu] = unit + i; |
| 1254 | unit_off[cpu] = gi->base_offset + i * ai->unit_size; | 1268 | unit_off[cpu] = gi->base_offset + i * ai->unit_size; |
| 1255 | 1269 | ||
| 1256 | if (pcpu_first_unit_cpu == NR_CPUS) | 1270 | /* determine low/high unit_cpu */ |
| 1257 | pcpu_first_unit_cpu = cpu; | 1271 | if (pcpu_low_unit_cpu == NR_CPUS || |
| 1258 | pcpu_last_unit_cpu = cpu; | 1272 | unit_off[cpu] < unit_off[pcpu_low_unit_cpu]) |
| 1273 | pcpu_low_unit_cpu = cpu; | ||
| 1274 | if (pcpu_high_unit_cpu == NR_CPUS || | ||
| 1275 | unit_off[cpu] > unit_off[pcpu_high_unit_cpu]) | ||
| 1276 | pcpu_high_unit_cpu = cpu; | ||
| 1259 | } | 1277 | } |
| 1260 | } | 1278 | } |
| 1261 | pcpu_nr_units = unit; | 1279 | pcpu_nr_units = unit; |
| @@ -1889,7 +1907,7 @@ void __init percpu_init_late(void) | |||
| 1889 | 1907 | ||
| 1890 | BUILD_BUG_ON(size > PAGE_SIZE); | 1908 | BUILD_BUG_ON(size > PAGE_SIZE); |
| 1891 | 1909 | ||
| 1892 | map = pcpu_mem_alloc(size); | 1910 | map = pcpu_mem_zalloc(size); |
| 1893 | BUG_ON(!map); | 1911 | BUG_ON(!map); |
| 1894 | 1912 | ||
| 1895 | spin_lock_irqsave(&pcpu_lock, flags); | 1913 | spin_lock_irqsave(&pcpu_lock, flags); |
| @@ -595,6 +595,7 @@ static enum { | |||
| 595 | PARTIAL_AC, | 595 | PARTIAL_AC, |
| 596 | PARTIAL_L3, | 596 | PARTIAL_L3, |
| 597 | EARLY, | 597 | EARLY, |
| 598 | LATE, | ||
| 598 | FULL | 599 | FULL |
| 599 | } g_cpucache_up; | 600 | } g_cpucache_up; |
| 600 | 601 | ||
| @@ -671,7 +672,7 @@ static void init_node_lock_keys(int q) | |||
| 671 | { | 672 | { |
| 672 | struct cache_sizes *s = malloc_sizes; | 673 | struct cache_sizes *s = malloc_sizes; |
| 673 | 674 | ||
| 674 | if (g_cpucache_up != FULL) | 675 | if (g_cpucache_up < LATE) |
| 675 | return; | 676 | return; |
| 676 | 677 | ||
| 677 | for (s = malloc_sizes; s->cs_size != ULONG_MAX; s++) { | 678 | for (s = malloc_sizes; s->cs_size != ULONG_MAX; s++) { |
| @@ -1666,6 +1667,8 @@ void __init kmem_cache_init_late(void) | |||
| 1666 | { | 1667 | { |
| 1667 | struct kmem_cache *cachep; | 1668 | struct kmem_cache *cachep; |
| 1668 | 1669 | ||
| 1670 | g_cpucache_up = LATE; | ||
| 1671 | |||
| 1669 | /* Annotate slab for lockdep -- annotate the malloc caches */ | 1672 | /* Annotate slab for lockdep -- annotate the malloc caches */ |
| 1670 | init_lock_keys(); | 1673 | init_lock_keys(); |
| 1671 | 1674 | ||
| @@ -1862,7 +1862,7 @@ static void unfreeze_partials(struct kmem_cache *s) | |||
| 1862 | { | 1862 | { |
| 1863 | struct kmem_cache_node *n = NULL; | 1863 | struct kmem_cache_node *n = NULL; |
| 1864 | struct kmem_cache_cpu *c = this_cpu_ptr(s->cpu_slab); | 1864 | struct kmem_cache_cpu *c = this_cpu_ptr(s->cpu_slab); |
| 1865 | struct page *page; | 1865 | struct page *page, *discard_page = NULL; |
| 1866 | 1866 | ||
| 1867 | while ((page = c->partial)) { | 1867 | while ((page = c->partial)) { |
| 1868 | enum slab_modes { M_PARTIAL, M_FREE }; | 1868 | enum slab_modes { M_PARTIAL, M_FREE }; |
| @@ -1904,7 +1904,8 @@ static void unfreeze_partials(struct kmem_cache *s) | |||
| 1904 | if (l == M_PARTIAL) | 1904 | if (l == M_PARTIAL) |
| 1905 | remove_partial(n, page); | 1905 | remove_partial(n, page); |
| 1906 | else | 1906 | else |
| 1907 | add_partial(n, page, 1); | 1907 | add_partial(n, page, |
| 1908 | DEACTIVATE_TO_TAIL); | ||
| 1908 | 1909 | ||
| 1909 | l = m; | 1910 | l = m; |
| 1910 | } | 1911 | } |
| @@ -1915,14 +1916,22 @@ static void unfreeze_partials(struct kmem_cache *s) | |||
| 1915 | "unfreezing slab")); | 1916 | "unfreezing slab")); |
| 1916 | 1917 | ||
| 1917 | if (m == M_FREE) { | 1918 | if (m == M_FREE) { |
| 1918 | stat(s, DEACTIVATE_EMPTY); | 1919 | page->next = discard_page; |
| 1919 | discard_slab(s, page); | 1920 | discard_page = page; |
| 1920 | stat(s, FREE_SLAB); | ||
| 1921 | } | 1921 | } |
| 1922 | } | 1922 | } |
| 1923 | 1923 | ||
| 1924 | if (n) | 1924 | if (n) |
| 1925 | spin_unlock(&n->list_lock); | 1925 | spin_unlock(&n->list_lock); |
| 1926 | |||
| 1927 | while (discard_page) { | ||
| 1928 | page = discard_page; | ||
| 1929 | discard_page = discard_page->next; | ||
| 1930 | |||
| 1931 | stat(s, DEACTIVATE_EMPTY); | ||
| 1932 | discard_slab(s, page); | ||
| 1933 | stat(s, FREE_SLAB); | ||
| 1934 | } | ||
| 1926 | } | 1935 | } |
| 1927 | 1936 | ||
| 1928 | /* | 1937 | /* |
| @@ -1969,7 +1978,7 @@ int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain) | |||
| 1969 | page->pobjects = pobjects; | 1978 | page->pobjects = pobjects; |
| 1970 | page->next = oldpage; | 1979 | page->next = oldpage; |
| 1971 | 1980 | ||
| 1972 | } while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page) != oldpage); | 1981 | } while (irqsafe_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page) != oldpage); |
| 1973 | stat(s, CPU_PARTIAL_FREE); | 1982 | stat(s, CPU_PARTIAL_FREE); |
| 1974 | return pobjects; | 1983 | return pobjects; |
| 1975 | } | 1984 | } |
| @@ -4435,30 +4444,31 @@ static ssize_t show_slab_objects(struct kmem_cache *s, | |||
| 4435 | 4444 | ||
| 4436 | for_each_possible_cpu(cpu) { | 4445 | for_each_possible_cpu(cpu) { |
| 4437 | struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu); | 4446 | struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu); |
| 4447 | int node = ACCESS_ONCE(c->node); | ||
| 4438 | struct page *page; | 4448 | struct page *page; |
| 4439 | 4449 | ||
| 4440 | if (!c || c->node < 0) | 4450 | if (node < 0) |
| 4441 | continue; | 4451 | continue; |
| 4442 | 4452 | page = ACCESS_ONCE(c->page); | |
| 4443 | if (c->page) { | 4453 | if (page) { |
| 4444 | if (flags & SO_TOTAL) | 4454 | if (flags & SO_TOTAL) |
| 4445 | x = c->page->objects; | 4455 | x = page->objects; |
| 4446 | else if (flags & SO_OBJECTS) | 4456 | else if (flags & SO_OBJECTS) |
| 4447 | x = c->page->inuse; | 4457 | x = page->inuse; |
| 4448 | else | 4458 | else |
| 4449 | x = 1; | 4459 | x = 1; |
| 4450 | 4460 | ||
| 4451 | total += x; | 4461 | total += x; |
| 4452 | nodes[c->node] += x; | 4462 | nodes[node] += x; |
| 4453 | } | 4463 | } |
| 4454 | page = c->partial; | 4464 | page = c->partial; |
| 4455 | 4465 | ||
| 4456 | if (page) { | 4466 | if (page) { |
| 4457 | x = page->pobjects; | 4467 | x = page->pobjects; |
| 4458 | total += x; | 4468 | total += x; |
| 4459 | nodes[c->node] += x; | 4469 | nodes[node] += x; |
| 4460 | } | 4470 | } |
| 4461 | per_cpu[c->node]++; | 4471 | per_cpu[node]++; |
| 4462 | } | 4472 | } |
| 4463 | } | 4473 | } |
| 4464 | 4474 | ||
diff --git a/mm/vmalloc.c b/mm/vmalloc.c index b669aa6f6caf..1d8b32f07139 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c | |||
| @@ -1633,6 +1633,8 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align, | |||
| 1633 | goto fail; | 1633 | goto fail; |
| 1634 | 1634 | ||
| 1635 | addr = __vmalloc_area_node(area, gfp_mask, prot, node, caller); | 1635 | addr = __vmalloc_area_node(area, gfp_mask, prot, node, caller); |
| 1636 | if (!addr) | ||
| 1637 | return NULL; | ||
| 1636 | 1638 | ||
| 1637 | /* | 1639 | /* |
| 1638 | * In this function, newly allocated vm_struct is not added | 1640 | * In this function, newly allocated vm_struct is not added |
| @@ -2141,23 +2143,30 @@ void __attribute__((weak)) vmalloc_sync_all(void) | |||
| 2141 | 2143 | ||
| 2142 | static int f(pte_t *pte, pgtable_t table, unsigned long addr, void *data) | 2144 | static int f(pte_t *pte, pgtable_t table, unsigned long addr, void *data) |
| 2143 | { | 2145 | { |
| 2144 | /* apply_to_page_range() does all the hard work. */ | 2146 | pte_t ***p = data; |
| 2147 | |||
| 2148 | if (p) { | ||
| 2149 | *(*p) = pte; | ||
| 2150 | (*p)++; | ||
| 2151 | } | ||
| 2145 | return 0; | 2152 | return 0; |
| 2146 | } | 2153 | } |
| 2147 | 2154 | ||
| 2148 | /** | 2155 | /** |
| 2149 | * alloc_vm_area - allocate a range of kernel address space | 2156 | * alloc_vm_area - allocate a range of kernel address space |
| 2150 | * @size: size of the area | 2157 | * @size: size of the area |
| 2158 | * @ptes: returns the PTEs for the address space | ||
| 2151 | * | 2159 | * |
| 2152 | * Returns: NULL on failure, vm_struct on success | 2160 | * Returns: NULL on failure, vm_struct on success |
| 2153 | * | 2161 | * |
| 2154 | * This function reserves a range of kernel address space, and | 2162 | * This function reserves a range of kernel address space, and |
| 2155 | * allocates pagetables to map that range. No actual mappings | 2163 | * allocates pagetables to map that range. No actual mappings |
| 2156 | * are created. If the kernel address space is not shared | 2164 | * are created. |
| 2157 | * between processes, it syncs the pagetable across all | 2165 | * |
| 2158 | * processes. | 2166 | * If @ptes is non-NULL, pointers to the PTEs (in init_mm) |
| 2167 | * allocated for the VM area are returned. | ||
| 2159 | */ | 2168 | */ |
| 2160 | struct vm_struct *alloc_vm_area(size_t size) | 2169 | struct vm_struct *alloc_vm_area(size_t size, pte_t **ptes) |
| 2161 | { | 2170 | { |
| 2162 | struct vm_struct *area; | 2171 | struct vm_struct *area; |
| 2163 | 2172 | ||
| @@ -2171,19 +2180,11 @@ struct vm_struct *alloc_vm_area(size_t size) | |||
| 2171 | * of kernel virtual address space and mapped into init_mm. | 2180 | * of kernel virtual address space and mapped into init_mm. |
| 2172 | */ | 2181 | */ |
| 2173 | if (apply_to_page_range(&init_mm, (unsigned long)area->addr, | 2182 | if (apply_to_page_range(&init_mm, (unsigned long)area->addr, |
| 2174 | area->size, f, NULL)) { | 2183 | size, f, ptes ? &ptes : NULL)) { |
| 2175 | free_vm_area(area); | 2184 | free_vm_area(area); |
| 2176 | return NULL; | 2185 | return NULL; |
| 2177 | } | 2186 | } |
| 2178 | 2187 | ||
| 2179 | /* | ||
| 2180 | * If the allocated address space is passed to a hypercall | ||
| 2181 | * before being used then we cannot rely on a page fault to | ||
| 2182 | * trigger an update of the page tables. So sync all the page | ||
| 2183 | * tables here. | ||
| 2184 | */ | ||
| 2185 | vmalloc_sync_all(); | ||
| 2186 | |||
| 2187 | return area; | 2188 | return area; |
| 2188 | } | 2189 | } |
| 2189 | EXPORT_SYMBOL_GPL(alloc_vm_area); | 2190 | EXPORT_SYMBOL_GPL(alloc_vm_area); |
diff --git a/mm/vmscan.c b/mm/vmscan.c index a1893c050795..f54a05b7a61d 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
| @@ -183,7 +183,7 @@ static unsigned long zone_nr_lru_pages(struct zone *zone, | |||
| 183 | */ | 183 | */ |
| 184 | void register_shrinker(struct shrinker *shrinker) | 184 | void register_shrinker(struct shrinker *shrinker) |
| 185 | { | 185 | { |
| 186 | shrinker->nr = 0; | 186 | atomic_long_set(&shrinker->nr_in_batch, 0); |
| 187 | down_write(&shrinker_rwsem); | 187 | down_write(&shrinker_rwsem); |
| 188 | list_add_tail(&shrinker->list, &shrinker_list); | 188 | list_add_tail(&shrinker->list, &shrinker_list); |
| 189 | up_write(&shrinker_rwsem); | 189 | up_write(&shrinker_rwsem); |
| @@ -247,25 +247,26 @@ unsigned long shrink_slab(struct shrink_control *shrink, | |||
| 247 | 247 | ||
| 248 | list_for_each_entry(shrinker, &shrinker_list, list) { | 248 | list_for_each_entry(shrinker, &shrinker_list, list) { |
| 249 | unsigned long long delta; | 249 | unsigned long long delta; |
| 250 | unsigned long total_scan; | 250 | long total_scan; |
| 251 | unsigned long max_pass; | 251 | long max_pass; |
| 252 | int shrink_ret = 0; | 252 | int shrink_ret = 0; |
| 253 | long nr; | 253 | long nr; |
| 254 | long new_nr; | 254 | long new_nr; |
| 255 | long batch_size = shrinker->batch ? shrinker->batch | 255 | long batch_size = shrinker->batch ? shrinker->batch |
| 256 | : SHRINK_BATCH; | 256 | : SHRINK_BATCH; |
| 257 | 257 | ||
| 258 | max_pass = do_shrinker_shrink(shrinker, shrink, 0); | ||
| 259 | if (max_pass <= 0) | ||
| 260 | continue; | ||
| 261 | |||
| 258 | /* | 262 | /* |
| 259 | * copy the current shrinker scan count into a local variable | 263 | * copy the current shrinker scan count into a local variable |
| 260 | * and zero it so that other concurrent shrinker invocations | 264 | * and zero it so that other concurrent shrinker invocations |
| 261 | * don't also do this scanning work. | 265 | * don't also do this scanning work. |
| 262 | */ | 266 | */ |
| 263 | do { | 267 | nr = atomic_long_xchg(&shrinker->nr_in_batch, 0); |
| 264 | nr = shrinker->nr; | ||
| 265 | } while (cmpxchg(&shrinker->nr, nr, 0) != nr); | ||
| 266 | 268 | ||
| 267 | total_scan = nr; | 269 | total_scan = nr; |
| 268 | max_pass = do_shrinker_shrink(shrinker, shrink, 0); | ||
| 269 | delta = (4 * nr_pages_scanned) / shrinker->seeks; | 270 | delta = (4 * nr_pages_scanned) / shrinker->seeks; |
| 270 | delta *= max_pass; | 271 | delta *= max_pass; |
| 271 | do_div(delta, lru_pages + 1); | 272 | do_div(delta, lru_pages + 1); |
| @@ -325,12 +326,11 @@ unsigned long shrink_slab(struct shrink_control *shrink, | |||
| 325 | * manner that handles concurrent updates. If we exhausted the | 326 | * manner that handles concurrent updates. If we exhausted the |
| 326 | * scan, there is no need to do an update. | 327 | * scan, there is no need to do an update. |
| 327 | */ | 328 | */ |
| 328 | do { | 329 | if (total_scan > 0) |
| 329 | nr = shrinker->nr; | 330 | new_nr = atomic_long_add_return(total_scan, |
| 330 | new_nr = total_scan + nr; | 331 | &shrinker->nr_in_batch); |
| 331 | if (total_scan <= 0) | 332 | else |
| 332 | break; | 333 | new_nr = atomic_long_read(&shrinker->nr_in_batch); |
| 333 | } while (cmpxchg(&shrinker->nr, nr, new_nr) != nr); | ||
| 334 | 334 | ||
| 335 | trace_mm_shrink_slab_end(shrinker, shrink_ret, nr, new_nr); | 335 | trace_mm_shrink_slab_end(shrinker, shrink_ret, nr, new_nr); |
| 336 | } | 336 | } |
