aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/filemap.c13
-rw-r--r--mm/huge_memory.c16
-rw-r--r--mm/hugetlb.c1
-rw-r--r--mm/memcontrol.c3
-rw-r--r--mm/migrate.c2
-rw-r--r--mm/oom_kill.c2
-rw-r--r--mm/page-writeback.c32
-rw-r--r--mm/page_alloc.c10
-rw-r--r--mm/percpu.c6
-rw-r--r--mm/slab.c5
-rw-r--r--mm/vmalloc.c4
-rw-r--r--mm/vmscan.c26
12 files changed, 74 insertions, 46 deletions
diff --git a/mm/filemap.c b/mm/filemap.c
index c0018f2d50e0..5f0a3c91fdac 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1828,7 +1828,7 @@ repeat:
1828 page = __page_cache_alloc(gfp | __GFP_COLD); 1828 page = __page_cache_alloc(gfp | __GFP_COLD);
1829 if (!page) 1829 if (!page)
1830 return ERR_PTR(-ENOMEM); 1830 return ERR_PTR(-ENOMEM);
1831 err = add_to_page_cache_lru(page, mapping, index, GFP_KERNEL); 1831 err = add_to_page_cache_lru(page, mapping, index, gfp);
1832 if (unlikely(err)) { 1832 if (unlikely(err)) {
1833 page_cache_release(page); 1833 page_cache_release(page);
1834 if (err == -EEXIST) 1834 if (err == -EEXIST)
@@ -1925,10 +1925,7 @@ static struct page *wait_on_page_read(struct page *page)
1925 * @gfp: the page allocator flags to use if allocating 1925 * @gfp: the page allocator flags to use if allocating
1926 * 1926 *
1927 * This is the same as "read_mapping_page(mapping, index, NULL)", but with 1927 * This is the same as "read_mapping_page(mapping, index, NULL)", but with
1928 * any new page allocations done using the specified allocation flags. Note 1928 * any new page allocations done using the specified allocation flags.
1929 * that the Radix tree operations will still use GFP_KERNEL, so you can't
1930 * expect to do this atomically or anything like that - but you can pass in
1931 * other page requirements.
1932 * 1929 *
1933 * If the page does not get brought uptodate, return -EIO. 1930 * If the page does not get brought uptodate, return -EIO.
1934 */ 1931 */
@@ -2407,7 +2404,6 @@ static ssize_t generic_perform_write(struct file *file,
2407 iov_iter_count(i)); 2404 iov_iter_count(i));
2408 2405
2409again: 2406again:
2410
2411 /* 2407 /*
2412 * Bring in the user page that we will copy from _first_. 2408 * Bring in the user page that we will copy from _first_.
2413 * Otherwise there's a nasty deadlock on copying from the 2409 * Otherwise there's a nasty deadlock on copying from the
@@ -2463,7 +2459,10 @@ again:
2463 written += copied; 2459 written += copied;
2464 2460
2465 balance_dirty_pages_ratelimited(mapping); 2461 balance_dirty_pages_ratelimited(mapping);
2466 2462 if (fatal_signal_pending(current)) {
2463 status = -EINTR;
2464 break;
2465 }
2467 } while (iov_iter_count(i)); 2466 } while (iov_iter_count(i));
2468 2467
2469 return written ? written : status; 2468 return written ? written : status;
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 4298abaae153..36b3d988b4ef 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2259,12 +2259,8 @@ static void khugepaged_do_scan(struct page **hpage)
2259 2259
2260static void khugepaged_alloc_sleep(void) 2260static void khugepaged_alloc_sleep(void)
2261{ 2261{
2262 DEFINE_WAIT(wait); 2262 wait_event_freezable_timeout(khugepaged_wait, false,
2263 add_wait_queue(&khugepaged_wait, &wait); 2263 msecs_to_jiffies(khugepaged_alloc_sleep_millisecs));
2264 schedule_timeout_interruptible(
2265 msecs_to_jiffies(
2266 khugepaged_alloc_sleep_millisecs));
2267 remove_wait_queue(&khugepaged_wait, &wait);
2268} 2264}
2269 2265
2270#ifndef CONFIG_NUMA 2266#ifndef CONFIG_NUMA
@@ -2313,14 +2309,10 @@ static void khugepaged_loop(void)
2313 if (unlikely(kthread_should_stop())) 2309 if (unlikely(kthread_should_stop()))
2314 break; 2310 break;
2315 if (khugepaged_has_work()) { 2311 if (khugepaged_has_work()) {
2316 DEFINE_WAIT(wait);
2317 if (!khugepaged_scan_sleep_millisecs) 2312 if (!khugepaged_scan_sleep_millisecs)
2318 continue; 2313 continue;
2319 add_wait_queue(&khugepaged_wait, &wait); 2314 wait_event_freezable_timeout(khugepaged_wait, false,
2320 schedule_timeout_interruptible( 2315 msecs_to_jiffies(khugepaged_scan_sleep_millisecs));
2321 msecs_to_jiffies(
2322 khugepaged_scan_sleep_millisecs));
2323 remove_wait_queue(&khugepaged_wait, &wait);
2324 } else if (khugepaged_enabled()) 2316 } else if (khugepaged_enabled())
2325 wait_event_freezable(khugepaged_wait, 2317 wait_event_freezable(khugepaged_wait,
2326 khugepaged_wait_event()); 2318 khugepaged_wait_event());
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index bb28a5f9db8d..73f17c0293c0 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -576,6 +576,7 @@ static void prep_compound_gigantic_page(struct page *page, unsigned long order)
576 __SetPageHead(page); 576 __SetPageHead(page);
577 for (i = 1; i < nr_pages; i++, p = mem_map_next(p, page, i)) { 577 for (i = 1; i < nr_pages; i++, p = mem_map_next(p, page, i)) {
578 __SetPageTail(p); 578 __SetPageTail(p);
579 set_page_count(p, 0);
579 p->first_page = page; 580 p->first_page = page;
580 } 581 }
581} 582}
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 6aff93c98aca..b63f5f7dfa07 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4907,9 +4907,9 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
4907 int cpu; 4907 int cpu;
4908 enable_swap_cgroup(); 4908 enable_swap_cgroup();
4909 parent = NULL; 4909 parent = NULL;
4910 root_mem_cgroup = memcg;
4911 if (mem_cgroup_soft_limit_tree_init()) 4910 if (mem_cgroup_soft_limit_tree_init())
4912 goto free_out; 4911 goto free_out;
4912 root_mem_cgroup = memcg;
4913 for_each_possible_cpu(cpu) { 4913 for_each_possible_cpu(cpu) {
4914 struct memcg_stock_pcp *stock = 4914 struct memcg_stock_pcp *stock =
4915 &per_cpu(memcg_stock, cpu); 4915 &per_cpu(memcg_stock, cpu);
@@ -4948,7 +4948,6 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
4948 return &memcg->css; 4948 return &memcg->css;
4949free_out: 4949free_out:
4950 __mem_cgroup_free(memcg); 4950 __mem_cgroup_free(memcg);
4951 root_mem_cgroup = NULL;
4952 return ERR_PTR(error); 4951 return ERR_PTR(error);
4953} 4952}
4954 4953
diff --git a/mm/migrate.c b/mm/migrate.c
index 578e29174fa6..177aca424a06 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -871,9 +871,9 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
871 871
872 if (anon_vma) 872 if (anon_vma)
873 put_anon_vma(anon_vma); 873 put_anon_vma(anon_vma);
874out:
875 unlock_page(hpage); 874 unlock_page(hpage);
876 875
876out:
877 if (rc != -EAGAIN) { 877 if (rc != -EAGAIN) {
878 list_del(&hpage->lru); 878 list_del(&hpage->lru);
879 put_page(hpage); 879 put_page(hpage);
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 76f2c5ae908e..069b64e521fc 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -176,7 +176,7 @@ static bool oom_unkillable_task(struct task_struct *p,
176unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *mem, 176unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *mem,
177 const nodemask_t *nodemask, unsigned long totalpages) 177 const nodemask_t *nodemask, unsigned long totalpages)
178{ 178{
179 int points; 179 long points;
180 180
181 if (oom_unkillable_task(p, mem, nodemask)) 181 if (oom_unkillable_task(p, mem, nodemask))
182 return 0; 182 return 0;
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 71252486bc6f..50f08241f981 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -411,8 +411,13 @@ void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty)
411 * 411 *
412 * Returns @bdi's dirty limit in pages. The term "dirty" in the context of 412 * Returns @bdi's dirty limit in pages. The term "dirty" in the context of
413 * dirty balancing includes all PG_dirty, PG_writeback and NFS unstable pages. 413 * dirty balancing includes all PG_dirty, PG_writeback and NFS unstable pages.
414 * And the "limit" in the name is not seriously taken as hard limit in 414 *
415 * balance_dirty_pages(). 415 * Note that balance_dirty_pages() will only seriously take it as a hard limit
416 * when sleeping max_pause per page is not enough to keep the dirty pages under
417 * control. For example, when the device is completely stalled due to some error
418 * conditions, or when there are 1000 dd tasks writing to a slow 10MB/s USB key.
419 * In the other normal situations, it acts more gently by throttling the tasks
420 * more (rather than completely block them) when the bdi dirty pages go high.
416 * 421 *
417 * It allocates high/low dirty limits to fast/slow devices, in order to prevent 422 * It allocates high/low dirty limits to fast/slow devices, in order to prevent
418 * - starving fast devices 423 * - starving fast devices
@@ -594,6 +599,13 @@ static unsigned long bdi_position_ratio(struct backing_dev_info *bdi,
594 */ 599 */
595 if (unlikely(bdi_thresh > thresh)) 600 if (unlikely(bdi_thresh > thresh))
596 bdi_thresh = thresh; 601 bdi_thresh = thresh;
602 /*
603 * It's very possible that bdi_thresh is close to 0 not because the
604 * device is slow, but that it has remained inactive for long time.
605 * Honour such devices a reasonable good (hopefully IO efficient)
606 * threshold, so that the occasional writes won't be blocked and active
607 * writes can rampup the threshold quickly.
608 */
597 bdi_thresh = max(bdi_thresh, (limit - dirty) / 8); 609 bdi_thresh = max(bdi_thresh, (limit - dirty) / 8);
598 /* 610 /*
599 * scale global setpoint to bdi's: 611 * scale global setpoint to bdi's:
@@ -977,8 +989,7 @@ static unsigned long bdi_max_pause(struct backing_dev_info *bdi,
977 * 989 *
978 * 8 serves as the safety ratio. 990 * 8 serves as the safety ratio.
979 */ 991 */
980 if (bdi_dirty) 992 t = min(t, bdi_dirty * HZ / (8 * bw + 1));
981 t = min(t, bdi_dirty * HZ / (8 * bw + 1));
982 993
983 /* 994 /*
984 * The pause time will be settled within range (max_pause/4, max_pause). 995 * The pause time will be settled within range (max_pause/4, max_pause).
@@ -1136,6 +1147,19 @@ pause:
1136 if (task_ratelimit) 1147 if (task_ratelimit)
1137 break; 1148 break;
1138 1149
1150 /*
1151 * In the case of an unresponding NFS server and the NFS dirty
1152 * pages exceeds dirty_thresh, give the other good bdi's a pipe
1153 * to go through, so that tasks on them still remain responsive.
1154 *
1155 * In theory 1 page is enough to keep the comsumer-producer
1156 * pipe going: the flusher cleans 1 page => the task dirties 1
1157 * more page. However bdi_dirty has accounting errors. So use
1158 * the larger and more IO friendly bdi_stat_error.
1159 */
1160 if (bdi_dirty <= bdi_stat_error(bdi))
1161 break;
1162
1139 if (fatal_signal_pending(current)) 1163 if (fatal_signal_pending(current))
1140 break; 1164 break;
1141 } 1165 }
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 9dd443d89d8b..2b8ba3aebf6e 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -356,8 +356,8 @@ void prep_compound_page(struct page *page, unsigned long order)
356 __SetPageHead(page); 356 __SetPageHead(page);
357 for (i = 1; i < nr_pages; i++) { 357 for (i = 1; i < nr_pages; i++) {
358 struct page *p = page + i; 358 struct page *p = page + i;
359
360 __SetPageTail(p); 359 __SetPageTail(p);
360 set_page_count(p, 0);
361 p->first_page = page; 361 p->first_page = page;
362 } 362 }
363} 363}
@@ -3377,9 +3377,15 @@ static void setup_zone_migrate_reserve(struct zone *zone)
3377 unsigned long block_migratetype; 3377 unsigned long block_migratetype;
3378 int reserve; 3378 int reserve;
3379 3379
3380 /* Get the start pfn, end pfn and the number of blocks to reserve */ 3380 /*
3381 * Get the start pfn, end pfn and the number of blocks to reserve
3382 * We have to be careful to be aligned to pageblock_nr_pages to
3383 * make sure that we always check pfn_valid for the first page in
3384 * the block.
3385 */
3381 start_pfn = zone->zone_start_pfn; 3386 start_pfn = zone->zone_start_pfn;
3382 end_pfn = start_pfn + zone->spanned_pages; 3387 end_pfn = start_pfn + zone->spanned_pages;
3388 start_pfn = roundup(start_pfn, pageblock_nr_pages);
3383 reserve = roundup(min_wmark_pages(zone), pageblock_nr_pages) >> 3389 reserve = roundup(min_wmark_pages(zone), pageblock_nr_pages) >>
3384 pageblock_order; 3390 pageblock_order;
3385 3391
diff --git a/mm/percpu.c b/mm/percpu.c
index 3bb810a72006..716eb4acf2fc 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1023,9 +1023,11 @@ phys_addr_t per_cpu_ptr_to_phys(void *addr)
1023 if (!is_vmalloc_addr(addr)) 1023 if (!is_vmalloc_addr(addr))
1024 return __pa(addr); 1024 return __pa(addr);
1025 else 1025 else
1026 return page_to_phys(vmalloc_to_page(addr)); 1026 return page_to_phys(vmalloc_to_page(addr)) +
1027 offset_in_page(addr);
1027 } else 1028 } else
1028 return page_to_phys(pcpu_addr_to_page(addr)); 1029 return page_to_phys(pcpu_addr_to_page(addr)) +
1030 offset_in_page(addr);
1029} 1031}
1030 1032
1031/** 1033/**
diff --git a/mm/slab.c b/mm/slab.c
index 708efe886154..83311c9aaf9d 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -595,6 +595,7 @@ static enum {
595 PARTIAL_AC, 595 PARTIAL_AC,
596 PARTIAL_L3, 596 PARTIAL_L3,
597 EARLY, 597 EARLY,
598 LATE,
598 FULL 599 FULL
599} g_cpucache_up; 600} g_cpucache_up;
600 601
@@ -671,7 +672,7 @@ static void init_node_lock_keys(int q)
671{ 672{
672 struct cache_sizes *s = malloc_sizes; 673 struct cache_sizes *s = malloc_sizes;
673 674
674 if (g_cpucache_up != FULL) 675 if (g_cpucache_up < LATE)
675 return; 676 return;
676 677
677 for (s = malloc_sizes; s->cs_size != ULONG_MAX; s++) { 678 for (s = malloc_sizes; s->cs_size != ULONG_MAX; s++) {
@@ -1666,6 +1667,8 @@ void __init kmem_cache_init_late(void)
1666{ 1667{
1667 struct kmem_cache *cachep; 1668 struct kmem_cache *cachep;
1668 1669
1670 g_cpucache_up = LATE;
1671
1669 /* Annotate slab for lockdep -- annotate the malloc caches */ 1672 /* Annotate slab for lockdep -- annotate the malloc caches */
1670 init_lock_keys(); 1673 init_lock_keys();
1671 1674
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index e583f770dfee..21fdf46ad5aa 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1315,7 +1315,7 @@ static struct vm_struct *__get_vm_area_node(unsigned long size,
1315 unsigned long align, unsigned long flags, unsigned long start, 1315 unsigned long align, unsigned long flags, unsigned long start,
1316 unsigned long end, int node, gfp_t gfp_mask, void *caller) 1316 unsigned long end, int node, gfp_t gfp_mask, void *caller)
1317{ 1317{
1318 static struct vmap_area *va; 1318 struct vmap_area *va;
1319 struct vm_struct *area; 1319 struct vm_struct *area;
1320 1320
1321 BUG_ON(in_interrupt()); 1321 BUG_ON(in_interrupt());
@@ -1658,6 +1658,8 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align,
1658 goto fail; 1658 goto fail;
1659 1659
1660 addr = __vmalloc_area_node(area, gfp_mask, prot, node, caller); 1660 addr = __vmalloc_area_node(area, gfp_mask, prot, node, caller);
1661 if (!addr)
1662 return NULL;
1661 1663
1662 /* 1664 /*
1663 * In this function, newly allocated vm_struct is not added 1665 * In this function, newly allocated vm_struct is not added
diff --git a/mm/vmscan.c b/mm/vmscan.c
index a1893c050795..f54a05b7a61d 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -183,7 +183,7 @@ static unsigned long zone_nr_lru_pages(struct zone *zone,
183 */ 183 */
184void register_shrinker(struct shrinker *shrinker) 184void register_shrinker(struct shrinker *shrinker)
185{ 185{
186 shrinker->nr = 0; 186 atomic_long_set(&shrinker->nr_in_batch, 0);
187 down_write(&shrinker_rwsem); 187 down_write(&shrinker_rwsem);
188 list_add_tail(&shrinker->list, &shrinker_list); 188 list_add_tail(&shrinker->list, &shrinker_list);
189 up_write(&shrinker_rwsem); 189 up_write(&shrinker_rwsem);
@@ -247,25 +247,26 @@ unsigned long shrink_slab(struct shrink_control *shrink,
247 247
248 list_for_each_entry(shrinker, &shrinker_list, list) { 248 list_for_each_entry(shrinker, &shrinker_list, list) {
249 unsigned long long delta; 249 unsigned long long delta;
250 unsigned long total_scan; 250 long total_scan;
251 unsigned long max_pass; 251 long max_pass;
252 int shrink_ret = 0; 252 int shrink_ret = 0;
253 long nr; 253 long nr;
254 long new_nr; 254 long new_nr;
255 long batch_size = shrinker->batch ? shrinker->batch 255 long batch_size = shrinker->batch ? shrinker->batch
256 : SHRINK_BATCH; 256 : SHRINK_BATCH;
257 257
258 max_pass = do_shrinker_shrink(shrinker, shrink, 0);
259 if (max_pass <= 0)
260 continue;
261
258 /* 262 /*
259 * copy the current shrinker scan count into a local variable 263 * copy the current shrinker scan count into a local variable
260 * and zero it so that other concurrent shrinker invocations 264 * and zero it so that other concurrent shrinker invocations
261 * don't also do this scanning work. 265 * don't also do this scanning work.
262 */ 266 */
263 do { 267 nr = atomic_long_xchg(&shrinker->nr_in_batch, 0);
264 nr = shrinker->nr;
265 } while (cmpxchg(&shrinker->nr, nr, 0) != nr);
266 268
267 total_scan = nr; 269 total_scan = nr;
268 max_pass = do_shrinker_shrink(shrinker, shrink, 0);
269 delta = (4 * nr_pages_scanned) / shrinker->seeks; 270 delta = (4 * nr_pages_scanned) / shrinker->seeks;
270 delta *= max_pass; 271 delta *= max_pass;
271 do_div(delta, lru_pages + 1); 272 do_div(delta, lru_pages + 1);
@@ -325,12 +326,11 @@ unsigned long shrink_slab(struct shrink_control *shrink,
325 * manner that handles concurrent updates. If we exhausted the 326 * manner that handles concurrent updates. If we exhausted the
326 * scan, there is no need to do an update. 327 * scan, there is no need to do an update.
327 */ 328 */
328 do { 329 if (total_scan > 0)
329 nr = shrinker->nr; 330 new_nr = atomic_long_add_return(total_scan,
330 new_nr = total_scan + nr; 331 &shrinker->nr_in_batch);
331 if (total_scan <= 0) 332 else
332 break; 333 new_nr = atomic_long_read(&shrinker->nr_in_batch);
333 } while (cmpxchg(&shrinker->nr, nr, new_nr) != nr);
334 334
335 trace_mm_shrink_slab_end(shrinker, shrink_ret, nr, new_nr); 335 trace_mm_shrink_slab_end(shrinker, shrink_ret, nr, new_nr);
336 } 336 }