aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/fadvise.c11
-rw-r--r--mm/hugetlb.c42
-rw-r--r--mm/kasan/kasan.c4
-rw-r--r--mm/memcontrol.c2
-rw-r--r--mm/page-writeback.c21
-rw-r--r--mm/percpu.c73
-rw-r--r--mm/swap.c20
-rw-r--r--mm/swap_state.c5
8 files changed, 133 insertions, 45 deletions
diff --git a/mm/fadvise.c b/mm/fadvise.c
index b8024fa7101d..6c707bfe02fd 100644
--- a/mm/fadvise.c
+++ b/mm/fadvise.c
@@ -126,6 +126,17 @@ SYSCALL_DEFINE4(fadvise64_64, int, fd, loff_t, offset, loff_t, len, int, advice)
126 */ 126 */
127 start_index = (offset+(PAGE_SIZE-1)) >> PAGE_SHIFT; 127 start_index = (offset+(PAGE_SIZE-1)) >> PAGE_SHIFT;
128 end_index = (endbyte >> PAGE_SHIFT); 128 end_index = (endbyte >> PAGE_SHIFT);
129 if ((endbyte & ~PAGE_MASK) != ~PAGE_MASK) {
130 /* First page is tricky as 0 - 1 = -1, but pgoff_t
131 * is unsigned, so the end_index >= start_index
132 * check below would be true and we'll discard the whole
133 * file cache which is not what was asked.
134 */
135 if (end_index == 0)
136 break;
137
138 end_index--;
139 }
129 140
130 if (end_index >= start_index) { 141 if (end_index >= start_index) {
131 unsigned long count = invalidate_mapping_pages(mapping, 142 unsigned long count = invalidate_mapping_pages(mapping,
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index d26162e81fea..388c2bb9b55c 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -832,8 +832,27 @@ static bool vma_has_reserves(struct vm_area_struct *vma, long chg)
832 * Only the process that called mmap() has reserves for 832 * Only the process that called mmap() has reserves for
833 * private mappings. 833 * private mappings.
834 */ 834 */
835 if (is_vma_resv_set(vma, HPAGE_RESV_OWNER)) 835 if (is_vma_resv_set(vma, HPAGE_RESV_OWNER)) {
836 return true; 836 /*
837 * Like the shared case above, a hole punch or truncate
838 * could have been performed on the private mapping.
839 * Examine the value of chg to determine if reserves
840 * actually exist or were previously consumed.
841 * Very Subtle - The value of chg comes from a previous
842 * call to vma_needs_reserves(). The reserve map for
843 * private mappings has different (opposite) semantics
844 * than that of shared mappings. vma_needs_reserves()
845 * has already taken this difference in semantics into
846 * account. Therefore, the meaning of chg is the same
847 * as in the shared case above. Code could easily be
848 * combined, but keeping it separate draws attention to
849 * subtle differences.
850 */
851 if (chg)
852 return false;
853 else
854 return true;
855 }
837 856
838 return false; 857 return false;
839} 858}
@@ -1816,6 +1835,25 @@ static long __vma_reservation_common(struct hstate *h,
1816 1835
1817 if (vma->vm_flags & VM_MAYSHARE) 1836 if (vma->vm_flags & VM_MAYSHARE)
1818 return ret; 1837 return ret;
1838 else if (is_vma_resv_set(vma, HPAGE_RESV_OWNER) && ret >= 0) {
1839 /*
1840 * In most cases, reserves always exist for private mappings.
1841 * However, a file associated with mapping could have been
1842 * hole punched or truncated after reserves were consumed.
1843 * As subsequent fault on such a range will not use reserves.
1844 * Subtle - The reserve map for private mappings has the
1845 * opposite meaning than that of shared mappings. If NO
1846 * entry is in the reserve map, it means a reservation exists.
1847 * If an entry exists in the reserve map, it means the
1848 * reservation has already been consumed. As a result, the
1849 * return value of this routine is the opposite of the
1850 * value returned from reserve map manipulation routines above.
1851 */
1852 if (ret)
1853 return 0;
1854 else
1855 return 1;
1856 }
1819 else 1857 else
1820 return ret < 0 ? ret : 0; 1858 return ret < 0 ? ret : 0;
1821} 1859}
diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c
index 18b6a2b8d183..28439acda6ec 100644
--- a/mm/kasan/kasan.c
+++ b/mm/kasan/kasan.c
@@ -763,8 +763,8 @@ static int kasan_mem_notifier(struct notifier_block *nb,
763 763
764static int __init kasan_memhotplug_init(void) 764static int __init kasan_memhotplug_init(void)
765{ 765{
766 pr_err("WARNING: KASAN doesn't support memory hot-add\n"); 766 pr_info("WARNING: KASAN doesn't support memory hot-add\n");
767 pr_err("Memory hot-add will be disabled\n"); 767 pr_info("Memory hot-add will be disabled\n");
768 768
769 hotplug_memory_notifier(kasan_mem_notifier, 0); 769 hotplug_memory_notifier(kasan_mem_notifier, 0);
770 770
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 58c69c94402a..75e74408cc8f 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1608,7 +1608,7 @@ static void memcg_oom_recover(struct mem_cgroup *memcg)
1608 1608
1609static void mem_cgroup_oom(struct mem_cgroup *memcg, gfp_t mask, int order) 1609static void mem_cgroup_oom(struct mem_cgroup *memcg, gfp_t mask, int order)
1610{ 1610{
1611 if (!current->memcg_may_oom || current->memcg_in_oom) 1611 if (!current->memcg_may_oom)
1612 return; 1612 return;
1613 /* 1613 /*
1614 * We are in the middle of the charge context here, so we 1614 * We are in the middle of the charge context here, so we
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index b9956fdee8f5..e2481949494c 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -373,8 +373,9 @@ static void domain_dirty_limits(struct dirty_throttle_control *dtc)
373 struct dirty_throttle_control *gdtc = mdtc_gdtc(dtc); 373 struct dirty_throttle_control *gdtc = mdtc_gdtc(dtc);
374 unsigned long bytes = vm_dirty_bytes; 374 unsigned long bytes = vm_dirty_bytes;
375 unsigned long bg_bytes = dirty_background_bytes; 375 unsigned long bg_bytes = dirty_background_bytes;
376 unsigned long ratio = vm_dirty_ratio; 376 /* convert ratios to per-PAGE_SIZE for higher precision */
377 unsigned long bg_ratio = dirty_background_ratio; 377 unsigned long ratio = (vm_dirty_ratio * PAGE_SIZE) / 100;
378 unsigned long bg_ratio = (dirty_background_ratio * PAGE_SIZE) / 100;
378 unsigned long thresh; 379 unsigned long thresh;
379 unsigned long bg_thresh; 380 unsigned long bg_thresh;
380 struct task_struct *tsk; 381 struct task_struct *tsk;
@@ -386,26 +387,28 @@ static void domain_dirty_limits(struct dirty_throttle_control *dtc)
386 /* 387 /*
387 * The byte settings can't be applied directly to memcg 388 * The byte settings can't be applied directly to memcg
388 * domains. Convert them to ratios by scaling against 389 * domains. Convert them to ratios by scaling against
389 * globally available memory. 390 * globally available memory. As the ratios are in
391 * per-PAGE_SIZE, they can be obtained by dividing bytes by
392 * number of pages.
390 */ 393 */
391 if (bytes) 394 if (bytes)
392 ratio = min(DIV_ROUND_UP(bytes, PAGE_SIZE) * 100 / 395 ratio = min(DIV_ROUND_UP(bytes, global_avail),
393 global_avail, 100UL); 396 PAGE_SIZE);
394 if (bg_bytes) 397 if (bg_bytes)
395 bg_ratio = min(DIV_ROUND_UP(bg_bytes, PAGE_SIZE) * 100 / 398 bg_ratio = min(DIV_ROUND_UP(bg_bytes, global_avail),
396 global_avail, 100UL); 399 PAGE_SIZE);
397 bytes = bg_bytes = 0; 400 bytes = bg_bytes = 0;
398 } 401 }
399 402
400 if (bytes) 403 if (bytes)
401 thresh = DIV_ROUND_UP(bytes, PAGE_SIZE); 404 thresh = DIV_ROUND_UP(bytes, PAGE_SIZE);
402 else 405 else
403 thresh = (ratio * available_memory) / 100; 406 thresh = (ratio * available_memory) / PAGE_SIZE;
404 407
405 if (bg_bytes) 408 if (bg_bytes)
406 bg_thresh = DIV_ROUND_UP(bg_bytes, PAGE_SIZE); 409 bg_thresh = DIV_ROUND_UP(bg_bytes, PAGE_SIZE);
407 else 410 else
408 bg_thresh = (bg_ratio * available_memory) / 100; 411 bg_thresh = (bg_ratio * available_memory) / PAGE_SIZE;
409 412
410 if (bg_thresh >= thresh) 413 if (bg_thresh >= thresh)
411 bg_thresh = thresh / 2; 414 bg_thresh = thresh / 2;
diff --git a/mm/percpu.c b/mm/percpu.c
index 0c59684f1ff2..9903830aaebb 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -112,7 +112,7 @@ struct pcpu_chunk {
112 int map_used; /* # of map entries used before the sentry */ 112 int map_used; /* # of map entries used before the sentry */
113 int map_alloc; /* # of map entries allocated */ 113 int map_alloc; /* # of map entries allocated */
114 int *map; /* allocation map */ 114 int *map; /* allocation map */
115 struct work_struct map_extend_work;/* async ->map[] extension */ 115 struct list_head map_extend_list;/* on pcpu_map_extend_chunks */
116 116
117 void *data; /* chunk data */ 117 void *data; /* chunk data */
118 int first_free; /* no free below this */ 118 int first_free; /* no free below this */
@@ -162,10 +162,13 @@ static struct pcpu_chunk *pcpu_reserved_chunk;
162static int pcpu_reserved_chunk_limit; 162static int pcpu_reserved_chunk_limit;
163 163
164static DEFINE_SPINLOCK(pcpu_lock); /* all internal data structures */ 164static DEFINE_SPINLOCK(pcpu_lock); /* all internal data structures */
165static DEFINE_MUTEX(pcpu_alloc_mutex); /* chunk create/destroy, [de]pop */ 165static DEFINE_MUTEX(pcpu_alloc_mutex); /* chunk create/destroy, [de]pop, map ext */
166 166
167static struct list_head *pcpu_slot __read_mostly; /* chunk list slots */ 167static struct list_head *pcpu_slot __read_mostly; /* chunk list slots */
168 168
169/* chunks which need their map areas extended, protected by pcpu_lock */
170static LIST_HEAD(pcpu_map_extend_chunks);
171
169/* 172/*
170 * The number of empty populated pages, protected by pcpu_lock. The 173 * The number of empty populated pages, protected by pcpu_lock. The
171 * reserved chunk doesn't contribute to the count. 174 * reserved chunk doesn't contribute to the count.
@@ -395,13 +398,19 @@ static int pcpu_need_to_extend(struct pcpu_chunk *chunk, bool is_atomic)
395{ 398{
396 int margin, new_alloc; 399 int margin, new_alloc;
397 400
401 lockdep_assert_held(&pcpu_lock);
402
398 if (is_atomic) { 403 if (is_atomic) {
399 margin = 3; 404 margin = 3;
400 405
401 if (chunk->map_alloc < 406 if (chunk->map_alloc <
402 chunk->map_used + PCPU_ATOMIC_MAP_MARGIN_LOW && 407 chunk->map_used + PCPU_ATOMIC_MAP_MARGIN_LOW) {
403 pcpu_async_enabled) 408 if (list_empty(&chunk->map_extend_list)) {
404 schedule_work(&chunk->map_extend_work); 409 list_add_tail(&chunk->map_extend_list,
410 &pcpu_map_extend_chunks);
411 pcpu_schedule_balance_work();
412 }
413 }
405 } else { 414 } else {
406 margin = PCPU_ATOMIC_MAP_MARGIN_HIGH; 415 margin = PCPU_ATOMIC_MAP_MARGIN_HIGH;
407 } 416 }
@@ -435,6 +444,8 @@ static int pcpu_extend_area_map(struct pcpu_chunk *chunk, int new_alloc)
435 size_t old_size = 0, new_size = new_alloc * sizeof(new[0]); 444 size_t old_size = 0, new_size = new_alloc * sizeof(new[0]);
436 unsigned long flags; 445 unsigned long flags;
437 446
447 lockdep_assert_held(&pcpu_alloc_mutex);
448
438 new = pcpu_mem_zalloc(new_size); 449 new = pcpu_mem_zalloc(new_size);
439 if (!new) 450 if (!new)
440 return -ENOMEM; 451 return -ENOMEM;
@@ -467,20 +478,6 @@ out_unlock:
467 return 0; 478 return 0;
468} 479}
469 480
470static void pcpu_map_extend_workfn(struct work_struct *work)
471{
472 struct pcpu_chunk *chunk = container_of(work, struct pcpu_chunk,
473 map_extend_work);
474 int new_alloc;
475
476 spin_lock_irq(&pcpu_lock);
477 new_alloc = pcpu_need_to_extend(chunk, false);
478 spin_unlock_irq(&pcpu_lock);
479
480 if (new_alloc)
481 pcpu_extend_area_map(chunk, new_alloc);
482}
483
484/** 481/**
485 * pcpu_fit_in_area - try to fit the requested allocation in a candidate area 482 * pcpu_fit_in_area - try to fit the requested allocation in a candidate area
486 * @chunk: chunk the candidate area belongs to 483 * @chunk: chunk the candidate area belongs to
@@ -740,7 +737,7 @@ static struct pcpu_chunk *pcpu_alloc_chunk(void)
740 chunk->map_used = 1; 737 chunk->map_used = 1;
741 738
742 INIT_LIST_HEAD(&chunk->list); 739 INIT_LIST_HEAD(&chunk->list);
743 INIT_WORK(&chunk->map_extend_work, pcpu_map_extend_workfn); 740 INIT_LIST_HEAD(&chunk->map_extend_list);
744 chunk->free_size = pcpu_unit_size; 741 chunk->free_size = pcpu_unit_size;
745 chunk->contig_hint = pcpu_unit_size; 742 chunk->contig_hint = pcpu_unit_size;
746 743
@@ -895,6 +892,9 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved,
895 return NULL; 892 return NULL;
896 } 893 }
897 894
895 if (!is_atomic)
896 mutex_lock(&pcpu_alloc_mutex);
897
898 spin_lock_irqsave(&pcpu_lock, flags); 898 spin_lock_irqsave(&pcpu_lock, flags);
899 899
900 /* serve reserved allocations from the reserved chunk if available */ 900 /* serve reserved allocations from the reserved chunk if available */
@@ -967,12 +967,9 @@ restart:
967 if (is_atomic) 967 if (is_atomic)
968 goto fail; 968 goto fail;
969 969
970 mutex_lock(&pcpu_alloc_mutex);
971
972 if (list_empty(&pcpu_slot[pcpu_nr_slots - 1])) { 970 if (list_empty(&pcpu_slot[pcpu_nr_slots - 1])) {
973 chunk = pcpu_create_chunk(); 971 chunk = pcpu_create_chunk();
974 if (!chunk) { 972 if (!chunk) {
975 mutex_unlock(&pcpu_alloc_mutex);
976 err = "failed to allocate new chunk"; 973 err = "failed to allocate new chunk";
977 goto fail; 974 goto fail;
978 } 975 }
@@ -983,7 +980,6 @@ restart:
983 spin_lock_irqsave(&pcpu_lock, flags); 980 spin_lock_irqsave(&pcpu_lock, flags);
984 } 981 }
985 982
986 mutex_unlock(&pcpu_alloc_mutex);
987 goto restart; 983 goto restart;
988 984
989area_found: 985area_found:
@@ -993,8 +989,6 @@ area_found:
993 if (!is_atomic) { 989 if (!is_atomic) {
994 int page_start, page_end, rs, re; 990 int page_start, page_end, rs, re;
995 991
996 mutex_lock(&pcpu_alloc_mutex);
997
998 page_start = PFN_DOWN(off); 992 page_start = PFN_DOWN(off);
999 page_end = PFN_UP(off + size); 993 page_end = PFN_UP(off + size);
1000 994
@@ -1005,7 +999,6 @@ area_found:
1005 999
1006 spin_lock_irqsave(&pcpu_lock, flags); 1000 spin_lock_irqsave(&pcpu_lock, flags);
1007 if (ret) { 1001 if (ret) {
1008 mutex_unlock(&pcpu_alloc_mutex);
1009 pcpu_free_area(chunk, off, &occ_pages); 1002 pcpu_free_area(chunk, off, &occ_pages);
1010 err = "failed to populate"; 1003 err = "failed to populate";
1011 goto fail_unlock; 1004 goto fail_unlock;
@@ -1045,6 +1038,8 @@ fail:
1045 /* see the flag handling in pcpu_blance_workfn() */ 1038 /* see the flag handling in pcpu_blance_workfn() */
1046 pcpu_atomic_alloc_failed = true; 1039 pcpu_atomic_alloc_failed = true;
1047 pcpu_schedule_balance_work(); 1040 pcpu_schedule_balance_work();
1041 } else {
1042 mutex_unlock(&pcpu_alloc_mutex);
1048 } 1043 }
1049 return NULL; 1044 return NULL;
1050} 1045}
@@ -1129,6 +1124,7 @@ static void pcpu_balance_workfn(struct work_struct *work)
1129 if (chunk == list_first_entry(free_head, struct pcpu_chunk, list)) 1124 if (chunk == list_first_entry(free_head, struct pcpu_chunk, list))
1130 continue; 1125 continue;
1131 1126
1127 list_del_init(&chunk->map_extend_list);
1132 list_move(&chunk->list, &to_free); 1128 list_move(&chunk->list, &to_free);
1133 } 1129 }
1134 1130
@@ -1146,6 +1142,25 @@ static void pcpu_balance_workfn(struct work_struct *work)
1146 pcpu_destroy_chunk(chunk); 1142 pcpu_destroy_chunk(chunk);
1147 } 1143 }
1148 1144
1145 /* service chunks which requested async area map extension */
1146 do {
1147 int new_alloc = 0;
1148
1149 spin_lock_irq(&pcpu_lock);
1150
1151 chunk = list_first_entry_or_null(&pcpu_map_extend_chunks,
1152 struct pcpu_chunk, map_extend_list);
1153 if (chunk) {
1154 list_del_init(&chunk->map_extend_list);
1155 new_alloc = pcpu_need_to_extend(chunk, false);
1156 }
1157
1158 spin_unlock_irq(&pcpu_lock);
1159
1160 if (new_alloc)
1161 pcpu_extend_area_map(chunk, new_alloc);
1162 } while (chunk);
1163
1149 /* 1164 /*
1150 * Ensure there are certain number of free populated pages for 1165 * Ensure there are certain number of free populated pages for
1151 * atomic allocs. Fill up from the most packed so that atomic 1166 * atomic allocs. Fill up from the most packed so that atomic
@@ -1644,7 +1659,7 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
1644 */ 1659 */
1645 schunk = memblock_virt_alloc(pcpu_chunk_struct_size, 0); 1660 schunk = memblock_virt_alloc(pcpu_chunk_struct_size, 0);
1646 INIT_LIST_HEAD(&schunk->list); 1661 INIT_LIST_HEAD(&schunk->list);
1647 INIT_WORK(&schunk->map_extend_work, pcpu_map_extend_workfn); 1662 INIT_LIST_HEAD(&schunk->map_extend_list);
1648 schunk->base_addr = base_addr; 1663 schunk->base_addr = base_addr;
1649 schunk->map = smap; 1664 schunk->map = smap;
1650 schunk->map_alloc = ARRAY_SIZE(smap); 1665 schunk->map_alloc = ARRAY_SIZE(smap);
@@ -1673,7 +1688,7 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
1673 if (dyn_size) { 1688 if (dyn_size) {
1674 dchunk = memblock_virt_alloc(pcpu_chunk_struct_size, 0); 1689 dchunk = memblock_virt_alloc(pcpu_chunk_struct_size, 0);
1675 INIT_LIST_HEAD(&dchunk->list); 1690 INIT_LIST_HEAD(&dchunk->list);
1676 INIT_WORK(&dchunk->map_extend_work, pcpu_map_extend_workfn); 1691 INIT_LIST_HEAD(&dchunk->map_extend_list);
1677 dchunk->base_addr = base_addr; 1692 dchunk->base_addr = base_addr;
1678 dchunk->map = dmap; 1693 dchunk->map = dmap;
1679 dchunk->map_alloc = ARRAY_SIZE(dmap); 1694 dchunk->map_alloc = ARRAY_SIZE(dmap);
diff --git a/mm/swap.c b/mm/swap.c
index 95916142fc46..59f5fafa6e1f 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -667,6 +667,24 @@ static void lru_add_drain_per_cpu(struct work_struct *dummy)
667 667
668static DEFINE_PER_CPU(struct work_struct, lru_add_drain_work); 668static DEFINE_PER_CPU(struct work_struct, lru_add_drain_work);
669 669
670/*
671 * lru_add_drain_wq is used to do lru_add_drain_all() from a WQ_MEM_RECLAIM
672 * workqueue, aiding in getting memory freed.
673 */
674static struct workqueue_struct *lru_add_drain_wq;
675
676static int __init lru_init(void)
677{
678 lru_add_drain_wq = alloc_workqueue("lru-add-drain", WQ_MEM_RECLAIM, 0);
679
680 if (WARN(!lru_add_drain_wq,
681 "Failed to create workqueue lru_add_drain_wq"))
682 return -ENOMEM;
683
684 return 0;
685}
686early_initcall(lru_init);
687
670void lru_add_drain_all(void) 688void lru_add_drain_all(void)
671{ 689{
672 static DEFINE_MUTEX(lock); 690 static DEFINE_MUTEX(lock);
@@ -686,7 +704,7 @@ void lru_add_drain_all(void)
686 pagevec_count(&per_cpu(lru_deactivate_pvecs, cpu)) || 704 pagevec_count(&per_cpu(lru_deactivate_pvecs, cpu)) ||
687 need_activate_page_drain(cpu)) { 705 need_activate_page_drain(cpu)) {
688 INIT_WORK(work, lru_add_drain_per_cpu); 706 INIT_WORK(work, lru_add_drain_per_cpu);
689 schedule_work_on(cpu, work); 707 queue_work_on(cpu, lru_add_drain_wq, work);
690 cpumask_set_cpu(cpu, &has_work); 708 cpumask_set_cpu(cpu, &has_work);
691 } 709 }
692 } 710 }
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 0d457e7db8d6..c99463ac02fb 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -252,7 +252,10 @@ static inline void free_swap_cache(struct page *page)
252void free_page_and_swap_cache(struct page *page) 252void free_page_and_swap_cache(struct page *page)
253{ 253{
254 free_swap_cache(page); 254 free_swap_cache(page);
255 put_page(page); 255 if (is_huge_zero_page(page))
256 put_huge_zero_page();
257 else
258 put_page(page);
256} 259}
257 260
258/* 261/*