diff options
Diffstat (limited to 'mm')
| -rw-r--r-- | mm/Kconfig | 12 | ||||
| -rw-r--r-- | mm/Makefile | 4 | ||||
| -rw-r--r-- | mm/backing-dev.c | 18 | ||||
| -rw-r--r-- | mm/filemap.c | 2 | ||||
| -rw-r--r-- | mm/filemap_xip.c | 2 | ||||
| -rw-r--r-- | mm/hugetlb.c | 2 | ||||
| -rw-r--r-- | mm/kmemleak.c | 5 | ||||
| -rw-r--r-- | mm/ksm.c | 10 | ||||
| -rw-r--r-- | mm/memcontrol.c | 127 | ||||
| -rw-r--r-- | mm/memory-failure.c | 59 | ||||
| -rw-r--r-- | mm/memory.c | 14 | ||||
| -rw-r--r-- | mm/mempolicy.c | 13 | ||||
| -rw-r--r-- | mm/mmap.c | 2 | ||||
| -rw-r--r-- | mm/nommu.c | 50 | ||||
| -rw-r--r-- | mm/page-writeback.c | 33 | ||||
| -rw-r--r-- | mm/page_alloc.c | 3 | ||||
| -rw-r--r-- | mm/percpu.c | 112 | ||||
| -rw-r--r-- | mm/rmap.c | 4 | ||||
| -rw-r--r-- | mm/shmem.c | 9 | ||||
| -rw-r--r-- | mm/swapfile.c | 15 | ||||
| -rw-r--r-- | mm/vmalloc.c | 50 | ||||
| -rw-r--r-- | mm/vmscan.c | 22 |
22 files changed, 326 insertions, 242 deletions
diff --git a/mm/Kconfig b/mm/Kconfig index 247760729593..fd3386242cf0 100644 --- a/mm/Kconfig +++ b/mm/Kconfig | |||
| @@ -67,7 +67,7 @@ config DISCONTIGMEM | |||
| 67 | 67 | ||
| 68 | config SPARSEMEM | 68 | config SPARSEMEM |
| 69 | def_bool y | 69 | def_bool y |
| 70 | depends on SPARSEMEM_MANUAL | 70 | depends on (!SELECT_MEMORY_MODEL && ARCH_SPARSEMEM_ENABLE) || SPARSEMEM_MANUAL |
| 71 | 71 | ||
| 72 | config FLATMEM | 72 | config FLATMEM |
| 73 | def_bool y | 73 | def_bool y |
| @@ -129,7 +129,7 @@ config MEMORY_HOTPLUG | |||
| 129 | bool "Allow for memory hot-add" | 129 | bool "Allow for memory hot-add" |
| 130 | depends on SPARSEMEM || X86_64_ACPI_NUMA | 130 | depends on SPARSEMEM || X86_64_ACPI_NUMA |
| 131 | depends on HOTPLUG && !(HIBERNATION && !S390) && ARCH_ENABLE_MEMORY_HOTPLUG | 131 | depends on HOTPLUG && !(HIBERNATION && !S390) && ARCH_ENABLE_MEMORY_HOTPLUG |
| 132 | depends on (IA64 || X86 || PPC64 || SUPERH || S390) | 132 | depends on (IA64 || X86 || PPC_BOOK3S_64 || SUPERH || S390) |
| 133 | 133 | ||
| 134 | comment "Memory hotplug is currently incompatible with Software Suspend" | 134 | comment "Memory hotplug is currently incompatible with Software Suspend" |
| 135 | depends on SPARSEMEM && HOTPLUG && HIBERNATION && !S390 | 135 | depends on SPARSEMEM && HOTPLUG && HIBERNATION && !S390 |
| @@ -224,7 +224,9 @@ config KSM | |||
| 224 | the many instances by a single resident page with that content, so | 224 | the many instances by a single resident page with that content, so |
| 225 | saving memory until one or another app needs to modify the content. | 225 | saving memory until one or another app needs to modify the content. |
| 226 | Recommended for use with KVM, or with other duplicative applications. | 226 | Recommended for use with KVM, or with other duplicative applications. |
| 227 | See Documentation/vm/ksm.txt for more information. | 227 | See Documentation/vm/ksm.txt for more information: KSM is inactive |
| 228 | until a program has madvised that an area is MADV_MERGEABLE, and | ||
| 229 | root has set /sys/kernel/mm/ksm/run to 1 (if CONFIG_SYSFS is set). | ||
| 228 | 230 | ||
| 229 | config DEFAULT_MMAP_MIN_ADDR | 231 | config DEFAULT_MMAP_MIN_ADDR |
| 230 | int "Low address space to protect from user allocation" | 232 | int "Low address space to protect from user allocation" |
| @@ -244,10 +246,12 @@ config DEFAULT_MMAP_MIN_ADDR | |||
| 244 | This value can be changed after boot using the | 246 | This value can be changed after boot using the |
| 245 | /proc/sys/vm/mmap_min_addr tunable. | 247 | /proc/sys/vm/mmap_min_addr tunable. |
| 246 | 248 | ||
| 249 | config ARCH_SUPPORTS_MEMORY_FAILURE | ||
| 250 | bool | ||
| 247 | 251 | ||
| 248 | config MEMORY_FAILURE | 252 | config MEMORY_FAILURE |
| 249 | depends on MMU | 253 | depends on MMU |
| 250 | depends on X86_MCE | 254 | depends on ARCH_SUPPORTS_MEMORY_FAILURE |
| 251 | bool "Enable recovery from hardware memory errors" | 255 | bool "Enable recovery from hardware memory errors" |
| 252 | help | 256 | help |
| 253 | Enables code to recover from some memory failures on systems | 257 | Enables code to recover from some memory failures on systems |
diff --git a/mm/Makefile b/mm/Makefile index 515fd793c17f..ebf849042ed3 100644 --- a/mm/Makefile +++ b/mm/Makefile | |||
| @@ -5,14 +5,14 @@ | |||
| 5 | mmu-y := nommu.o | 5 | mmu-y := nommu.o |
| 6 | mmu-$(CONFIG_MMU) := fremap.o highmem.o madvise.o memory.o mincore.o \ | 6 | mmu-$(CONFIG_MMU) := fremap.o highmem.o madvise.o memory.o mincore.o \ |
| 7 | mlock.o mmap.o mprotect.o mremap.o msync.o rmap.o \ | 7 | mlock.o mmap.o mprotect.o mremap.o msync.o rmap.o \ |
| 8 | vmalloc.o | 8 | vmalloc.o pagewalk.o |
| 9 | 9 | ||
| 10 | obj-y := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \ | 10 | obj-y := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \ |
| 11 | maccess.o page_alloc.o page-writeback.o \ | 11 | maccess.o page_alloc.o page-writeback.o \ |
| 12 | readahead.o swap.o truncate.o vmscan.o shmem.o \ | 12 | readahead.o swap.o truncate.o vmscan.o shmem.o \ |
| 13 | prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \ | 13 | prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \ |
| 14 | page_isolation.o mm_init.o mmu_context.o \ | 14 | page_isolation.o mm_init.o mmu_context.o \ |
| 15 | pagewalk.o $(mmu-y) | 15 | $(mmu-y) |
| 16 | obj-y += init-mm.o | 16 | obj-y += init-mm.o |
| 17 | 17 | ||
| 18 | obj-$(CONFIG_BOUNCE) += bounce.o | 18 | obj-$(CONFIG_BOUNCE) += bounce.o |
diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 3d3accb1f800..1065b715ef64 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c | |||
| @@ -92,7 +92,7 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v) | |||
| 92 | "BdiDirtyThresh: %8lu kB\n" | 92 | "BdiDirtyThresh: %8lu kB\n" |
| 93 | "DirtyThresh: %8lu kB\n" | 93 | "DirtyThresh: %8lu kB\n" |
| 94 | "BackgroundThresh: %8lu kB\n" | 94 | "BackgroundThresh: %8lu kB\n" |
| 95 | "WriteBack threads:%8lu\n" | 95 | "WritebackThreads: %8lu\n" |
| 96 | "b_dirty: %8lu\n" | 96 | "b_dirty: %8lu\n" |
| 97 | "b_io: %8lu\n" | 97 | "b_io: %8lu\n" |
| 98 | "b_more_io: %8lu\n" | 98 | "b_more_io: %8lu\n" |
| @@ -610,6 +610,21 @@ static void bdi_wb_shutdown(struct backing_dev_info *bdi) | |||
| 610 | kthread_stop(wb->task); | 610 | kthread_stop(wb->task); |
| 611 | } | 611 | } |
| 612 | 612 | ||
| 613 | /* | ||
| 614 | * This bdi is going away now, make sure that no super_blocks point to it | ||
| 615 | */ | ||
| 616 | static void bdi_prune_sb(struct backing_dev_info *bdi) | ||
| 617 | { | ||
| 618 | struct super_block *sb; | ||
| 619 | |||
| 620 | spin_lock(&sb_lock); | ||
| 621 | list_for_each_entry(sb, &super_blocks, s_list) { | ||
| 622 | if (sb->s_bdi == bdi) | ||
| 623 | sb->s_bdi = NULL; | ||
| 624 | } | ||
| 625 | spin_unlock(&sb_lock); | ||
| 626 | } | ||
| 627 | |||
| 613 | void bdi_unregister(struct backing_dev_info *bdi) | 628 | void bdi_unregister(struct backing_dev_info *bdi) |
| 614 | { | 629 | { |
| 615 | if (bdi->dev) { | 630 | if (bdi->dev) { |
| @@ -682,6 +697,7 @@ void bdi_destroy(struct backing_dev_info *bdi) | |||
| 682 | spin_unlock(&inode_lock); | 697 | spin_unlock(&inode_lock); |
| 683 | } | 698 | } |
| 684 | 699 | ||
| 700 | bdi_prune_sb(bdi); | ||
| 685 | bdi_unregister(bdi); | 701 | bdi_unregister(bdi); |
| 686 | 702 | ||
| 687 | for (i = 0; i < NR_BDI_STAT_ITEMS; i++) | 703 | for (i = 0; i < NR_BDI_STAT_ITEMS; i++) |
diff --git a/mm/filemap.c b/mm/filemap.c index 6c84e598b4a9..ef169f37156d 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
| @@ -1611,7 +1611,7 @@ page_not_uptodate: | |||
| 1611 | } | 1611 | } |
| 1612 | EXPORT_SYMBOL(filemap_fault); | 1612 | EXPORT_SYMBOL(filemap_fault); |
| 1613 | 1613 | ||
| 1614 | struct vm_operations_struct generic_file_vm_ops = { | 1614 | const struct vm_operations_struct generic_file_vm_ops = { |
| 1615 | .fault = filemap_fault, | 1615 | .fault = filemap_fault, |
| 1616 | }; | 1616 | }; |
| 1617 | 1617 | ||
diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c index 427dfe3ce78c..1888b2d71bb8 100644 --- a/mm/filemap_xip.c +++ b/mm/filemap_xip.c | |||
| @@ -296,7 +296,7 @@ out: | |||
| 296 | } | 296 | } |
| 297 | } | 297 | } |
| 298 | 298 | ||
| 299 | static struct vm_operations_struct xip_file_vm_ops = { | 299 | static const struct vm_operations_struct xip_file_vm_ops = { |
| 300 | .fault = xip_file_fault, | 300 | .fault = xip_file_fault, |
| 301 | }; | 301 | }; |
| 302 | 302 | ||
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 6f048fcc749c..5d7601b02874 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
| @@ -1721,7 +1721,7 @@ static int hugetlb_vm_op_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
| 1721 | return 0; | 1721 | return 0; |
| 1722 | } | 1722 | } |
| 1723 | 1723 | ||
| 1724 | struct vm_operations_struct hugetlb_vm_ops = { | 1724 | const struct vm_operations_struct hugetlb_vm_ops = { |
| 1725 | .fault = hugetlb_vm_op_fault, | 1725 | .fault = hugetlb_vm_op_fault, |
| 1726 | .open = hugetlb_vm_op_open, | 1726 | .open = hugetlb_vm_op_open, |
| 1727 | .close = hugetlb_vm_op_close, | 1727 | .close = hugetlb_vm_op_close, |
diff --git a/mm/kmemleak.c b/mm/kmemleak.c index 4ea4510e2996..8bf765c4f58d 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c | |||
| @@ -833,12 +833,15 @@ static void early_alloc(struct early_log *log) | |||
| 833 | */ | 833 | */ |
| 834 | rcu_read_lock(); | 834 | rcu_read_lock(); |
| 835 | object = create_object((unsigned long)log->ptr, log->size, | 835 | object = create_object((unsigned long)log->ptr, log->size, |
| 836 | log->min_count, GFP_KERNEL); | 836 | log->min_count, GFP_ATOMIC); |
| 837 | if (!object) | ||
| 838 | goto out; | ||
| 837 | spin_lock_irqsave(&object->lock, flags); | 839 | spin_lock_irqsave(&object->lock, flags); |
| 838 | for (i = 0; i < log->trace_len; i++) | 840 | for (i = 0; i < log->trace_len; i++) |
| 839 | object->trace[i] = log->trace[i]; | 841 | object->trace[i] = log->trace[i]; |
| 840 | object->trace_len = log->trace_len; | 842 | object->trace_len = log->trace_len; |
| 841 | spin_unlock_irqrestore(&object->lock, flags); | 843 | spin_unlock_irqrestore(&object->lock, flags); |
| 844 | out: | ||
| 842 | rcu_read_unlock(); | 845 | rcu_read_unlock(); |
| 843 | } | 846 | } |
| 844 | 847 | ||
| @@ -184,11 +184,6 @@ static DEFINE_SPINLOCK(ksm_mmlist_lock); | |||
| 184 | sizeof(struct __struct), __alignof__(struct __struct),\ | 184 | sizeof(struct __struct), __alignof__(struct __struct),\ |
| 185 | (__flags), NULL) | 185 | (__flags), NULL) |
| 186 | 186 | ||
| 187 | static void __init ksm_init_max_kernel_pages(void) | ||
| 188 | { | ||
| 189 | ksm_max_kernel_pages = nr_free_buffer_pages() / 4; | ||
| 190 | } | ||
| 191 | |||
| 192 | static int __init ksm_slab_init(void) | 187 | static int __init ksm_slab_init(void) |
| 193 | { | 188 | { |
| 194 | rmap_item_cache = KSM_KMEM_CACHE(rmap_item, 0); | 189 | rmap_item_cache = KSM_KMEM_CACHE(rmap_item, 0); |
| @@ -1673,7 +1668,7 @@ static int __init ksm_init(void) | |||
| 1673 | struct task_struct *ksm_thread; | 1668 | struct task_struct *ksm_thread; |
| 1674 | int err; | 1669 | int err; |
| 1675 | 1670 | ||
| 1676 | ksm_init_max_kernel_pages(); | 1671 | ksm_max_kernel_pages = totalram_pages / 4; |
| 1677 | 1672 | ||
| 1678 | err = ksm_slab_init(); | 1673 | err = ksm_slab_init(); |
| 1679 | if (err) | 1674 | if (err) |
| @@ -1697,6 +1692,9 @@ static int __init ksm_init(void) | |||
| 1697 | kthread_stop(ksm_thread); | 1692 | kthread_stop(ksm_thread); |
| 1698 | goto out_free2; | 1693 | goto out_free2; |
| 1699 | } | 1694 | } |
| 1695 | #else | ||
| 1696 | ksm_run = KSM_RUN_MERGE; /* no way for user to start it */ | ||
| 1697 | |||
| 1700 | #endif /* CONFIG_SYSFS */ | 1698 | #endif /* CONFIG_SYSFS */ |
| 1701 | 1699 | ||
| 1702 | return 0; | 1700 | return 0; |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index e2b98a6875c0..f99f5991d6bb 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
| @@ -313,7 +313,8 @@ soft_limit_tree_from_page(struct page *page) | |||
| 313 | static void | 313 | static void |
| 314 | __mem_cgroup_insert_exceeded(struct mem_cgroup *mem, | 314 | __mem_cgroup_insert_exceeded(struct mem_cgroup *mem, |
| 315 | struct mem_cgroup_per_zone *mz, | 315 | struct mem_cgroup_per_zone *mz, |
| 316 | struct mem_cgroup_tree_per_zone *mctz) | 316 | struct mem_cgroup_tree_per_zone *mctz, |
| 317 | unsigned long long new_usage_in_excess) | ||
| 317 | { | 318 | { |
| 318 | struct rb_node **p = &mctz->rb_root.rb_node; | 319 | struct rb_node **p = &mctz->rb_root.rb_node; |
| 319 | struct rb_node *parent = NULL; | 320 | struct rb_node *parent = NULL; |
| @@ -322,7 +323,9 @@ __mem_cgroup_insert_exceeded(struct mem_cgroup *mem, | |||
| 322 | if (mz->on_tree) | 323 | if (mz->on_tree) |
| 323 | return; | 324 | return; |
| 324 | 325 | ||
| 325 | mz->usage_in_excess = res_counter_soft_limit_excess(&mem->res); | 326 | mz->usage_in_excess = new_usage_in_excess; |
| 327 | if (!mz->usage_in_excess) | ||
| 328 | return; | ||
| 326 | while (*p) { | 329 | while (*p) { |
| 327 | parent = *p; | 330 | parent = *p; |
| 328 | mz_node = rb_entry(parent, struct mem_cgroup_per_zone, | 331 | mz_node = rb_entry(parent, struct mem_cgroup_per_zone, |
| @@ -353,16 +356,6 @@ __mem_cgroup_remove_exceeded(struct mem_cgroup *mem, | |||
| 353 | } | 356 | } |
| 354 | 357 | ||
| 355 | static void | 358 | static void |
| 356 | mem_cgroup_insert_exceeded(struct mem_cgroup *mem, | ||
| 357 | struct mem_cgroup_per_zone *mz, | ||
| 358 | struct mem_cgroup_tree_per_zone *mctz) | ||
| 359 | { | ||
| 360 | spin_lock(&mctz->lock); | ||
| 361 | __mem_cgroup_insert_exceeded(mem, mz, mctz); | ||
| 362 | spin_unlock(&mctz->lock); | ||
| 363 | } | ||
| 364 | |||
| 365 | static void | ||
| 366 | mem_cgroup_remove_exceeded(struct mem_cgroup *mem, | 359 | mem_cgroup_remove_exceeded(struct mem_cgroup *mem, |
| 367 | struct mem_cgroup_per_zone *mz, | 360 | struct mem_cgroup_per_zone *mz, |
| 368 | struct mem_cgroup_tree_per_zone *mctz) | 361 | struct mem_cgroup_tree_per_zone *mctz) |
| @@ -392,34 +385,36 @@ static bool mem_cgroup_soft_limit_check(struct mem_cgroup *mem) | |||
| 392 | 385 | ||
| 393 | static void mem_cgroup_update_tree(struct mem_cgroup *mem, struct page *page) | 386 | static void mem_cgroup_update_tree(struct mem_cgroup *mem, struct page *page) |
| 394 | { | 387 | { |
| 395 | unsigned long long prev_usage_in_excess, new_usage_in_excess; | 388 | unsigned long long excess; |
| 396 | bool updated_tree = false; | ||
| 397 | struct mem_cgroup_per_zone *mz; | 389 | struct mem_cgroup_per_zone *mz; |
| 398 | struct mem_cgroup_tree_per_zone *mctz; | 390 | struct mem_cgroup_tree_per_zone *mctz; |
| 399 | 391 | int nid = page_to_nid(page); | |
| 400 | mz = mem_cgroup_zoneinfo(mem, page_to_nid(page), page_zonenum(page)); | 392 | int zid = page_zonenum(page); |
| 401 | mctz = soft_limit_tree_from_page(page); | 393 | mctz = soft_limit_tree_from_page(page); |
| 402 | 394 | ||
| 403 | /* | 395 | /* |
| 404 | * We do updates in lazy mode, mem's are removed | 396 | * Necessary to update all ancestors when hierarchy is used. |
| 405 | * lazily from the per-zone, per-node rb tree | 397 | * because their event counter is not touched. |
| 406 | */ | 398 | */ |
| 407 | prev_usage_in_excess = mz->usage_in_excess; | 399 | for (; mem; mem = parent_mem_cgroup(mem)) { |
| 408 | 400 | mz = mem_cgroup_zoneinfo(mem, nid, zid); | |
| 409 | new_usage_in_excess = res_counter_soft_limit_excess(&mem->res); | 401 | excess = res_counter_soft_limit_excess(&mem->res); |
| 410 | if (prev_usage_in_excess) { | 402 | /* |
| 411 | mem_cgroup_remove_exceeded(mem, mz, mctz); | 403 | * We have to update the tree if mz is on RB-tree or |
| 412 | updated_tree = true; | 404 | * mem is over its softlimit. |
| 413 | } | 405 | */ |
| 414 | if (!new_usage_in_excess) | 406 | if (excess || mz->on_tree) { |
| 415 | goto done; | 407 | spin_lock(&mctz->lock); |
| 416 | mem_cgroup_insert_exceeded(mem, mz, mctz); | 408 | /* if on-tree, remove it */ |
| 417 | 409 | if (mz->on_tree) | |
| 418 | done: | 410 | __mem_cgroup_remove_exceeded(mem, mz, mctz); |
| 419 | if (updated_tree) { | 411 | /* |
| 420 | spin_lock(&mctz->lock); | 412 | * Insert again. mz->usage_in_excess will be updated. |
| 421 | mz->usage_in_excess = new_usage_in_excess; | 413 | * If excess is 0, no tree ops. |
| 422 | spin_unlock(&mctz->lock); | 414 | */ |
| 415 | __mem_cgroup_insert_exceeded(mem, mz, mctz, excess); | ||
| 416 | spin_unlock(&mctz->lock); | ||
| 417 | } | ||
| 423 | } | 418 | } |
| 424 | } | 419 | } |
| 425 | 420 | ||
| @@ -447,9 +442,10 @@ static struct mem_cgroup_per_zone * | |||
| 447 | __mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz) | 442 | __mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz) |
| 448 | { | 443 | { |
| 449 | struct rb_node *rightmost = NULL; | 444 | struct rb_node *rightmost = NULL; |
| 450 | struct mem_cgroup_per_zone *mz = NULL; | 445 | struct mem_cgroup_per_zone *mz; |
| 451 | 446 | ||
| 452 | retry: | 447 | retry: |
| 448 | mz = NULL; | ||
| 453 | rightmost = rb_last(&mctz->rb_root); | 449 | rightmost = rb_last(&mctz->rb_root); |
| 454 | if (!rightmost) | 450 | if (!rightmost) |
| 455 | goto done; /* Nothing to reclaim from */ | 451 | goto done; /* Nothing to reclaim from */ |
| @@ -1270,9 +1266,9 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm, | |||
| 1270 | gfp_t gfp_mask, struct mem_cgroup **memcg, | 1266 | gfp_t gfp_mask, struct mem_cgroup **memcg, |
| 1271 | bool oom, struct page *page) | 1267 | bool oom, struct page *page) |
| 1272 | { | 1268 | { |
| 1273 | struct mem_cgroup *mem, *mem_over_limit, *mem_over_soft_limit; | 1269 | struct mem_cgroup *mem, *mem_over_limit; |
| 1274 | int nr_retries = MEM_CGROUP_RECLAIM_RETRIES; | 1270 | int nr_retries = MEM_CGROUP_RECLAIM_RETRIES; |
| 1275 | struct res_counter *fail_res, *soft_fail_res = NULL; | 1271 | struct res_counter *fail_res; |
| 1276 | 1272 | ||
| 1277 | if (unlikely(test_thread_flag(TIF_MEMDIE))) { | 1273 | if (unlikely(test_thread_flag(TIF_MEMDIE))) { |
| 1278 | /* Don't account this! */ | 1274 | /* Don't account this! */ |
| @@ -1304,17 +1300,16 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm, | |||
| 1304 | 1300 | ||
| 1305 | if (mem_cgroup_is_root(mem)) | 1301 | if (mem_cgroup_is_root(mem)) |
| 1306 | goto done; | 1302 | goto done; |
| 1307 | ret = res_counter_charge(&mem->res, PAGE_SIZE, &fail_res, | 1303 | ret = res_counter_charge(&mem->res, PAGE_SIZE, &fail_res); |
| 1308 | &soft_fail_res); | ||
| 1309 | if (likely(!ret)) { | 1304 | if (likely(!ret)) { |
| 1310 | if (!do_swap_account) | 1305 | if (!do_swap_account) |
| 1311 | break; | 1306 | break; |
| 1312 | ret = res_counter_charge(&mem->memsw, PAGE_SIZE, | 1307 | ret = res_counter_charge(&mem->memsw, PAGE_SIZE, |
| 1313 | &fail_res, NULL); | 1308 | &fail_res); |
| 1314 | if (likely(!ret)) | 1309 | if (likely(!ret)) |
| 1315 | break; | 1310 | break; |
| 1316 | /* mem+swap counter fails */ | 1311 | /* mem+swap counter fails */ |
| 1317 | res_counter_uncharge(&mem->res, PAGE_SIZE, NULL); | 1312 | res_counter_uncharge(&mem->res, PAGE_SIZE); |
| 1318 | flags |= MEM_CGROUP_RECLAIM_NOSWAP; | 1313 | flags |= MEM_CGROUP_RECLAIM_NOSWAP; |
| 1319 | mem_over_limit = mem_cgroup_from_res_counter(fail_res, | 1314 | mem_over_limit = mem_cgroup_from_res_counter(fail_res, |
| 1320 | memsw); | 1315 | memsw); |
| @@ -1353,16 +1348,11 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm, | |||
| 1353 | } | 1348 | } |
| 1354 | } | 1349 | } |
| 1355 | /* | 1350 | /* |
| 1356 | * Insert just the ancestor, we should trickle down to the correct | 1351 | * Insert ancestor (and ancestor's ancestors), to softlimit RB-tree. |
| 1357 | * cgroup for reclaim, since the other nodes will be below their | 1352 | * if they exceeds softlimit. |
| 1358 | * soft limit | ||
| 1359 | */ | 1353 | */ |
| 1360 | if (soft_fail_res) { | 1354 | if (mem_cgroup_soft_limit_check(mem)) |
| 1361 | mem_over_soft_limit = | 1355 | mem_cgroup_update_tree(mem, page); |
| 1362 | mem_cgroup_from_res_counter(soft_fail_res, res); | ||
| 1363 | if (mem_cgroup_soft_limit_check(mem_over_soft_limit)) | ||
| 1364 | mem_cgroup_update_tree(mem_over_soft_limit, page); | ||
| 1365 | } | ||
| 1366 | done: | 1356 | done: |
| 1367 | return 0; | 1357 | return 0; |
| 1368 | nomem: | 1358 | nomem: |
| @@ -1437,10 +1427,9 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *mem, | |||
| 1437 | if (unlikely(PageCgroupUsed(pc))) { | 1427 | if (unlikely(PageCgroupUsed(pc))) { |
| 1438 | unlock_page_cgroup(pc); | 1428 | unlock_page_cgroup(pc); |
| 1439 | if (!mem_cgroup_is_root(mem)) { | 1429 | if (!mem_cgroup_is_root(mem)) { |
| 1440 | res_counter_uncharge(&mem->res, PAGE_SIZE, NULL); | 1430 | res_counter_uncharge(&mem->res, PAGE_SIZE); |
| 1441 | if (do_swap_account) | 1431 | if (do_swap_account) |
| 1442 | res_counter_uncharge(&mem->memsw, PAGE_SIZE, | 1432 | res_counter_uncharge(&mem->memsw, PAGE_SIZE); |
| 1443 | NULL); | ||
| 1444 | } | 1433 | } |
| 1445 | css_put(&mem->css); | 1434 | css_put(&mem->css); |
| 1446 | return; | 1435 | return; |
| @@ -1519,7 +1508,7 @@ static int mem_cgroup_move_account(struct page_cgroup *pc, | |||
| 1519 | goto out; | 1508 | goto out; |
| 1520 | 1509 | ||
| 1521 | if (!mem_cgroup_is_root(from)) | 1510 | if (!mem_cgroup_is_root(from)) |
| 1522 | res_counter_uncharge(&from->res, PAGE_SIZE, NULL); | 1511 | res_counter_uncharge(&from->res, PAGE_SIZE); |
| 1523 | mem_cgroup_charge_statistics(from, pc, false); | 1512 | mem_cgroup_charge_statistics(from, pc, false); |
| 1524 | 1513 | ||
| 1525 | page = pc->page; | 1514 | page = pc->page; |
| @@ -1539,7 +1528,7 @@ static int mem_cgroup_move_account(struct page_cgroup *pc, | |||
| 1539 | } | 1528 | } |
| 1540 | 1529 | ||
| 1541 | if (do_swap_account && !mem_cgroup_is_root(from)) | 1530 | if (do_swap_account && !mem_cgroup_is_root(from)) |
| 1542 | res_counter_uncharge(&from->memsw, PAGE_SIZE, NULL); | 1531 | res_counter_uncharge(&from->memsw, PAGE_SIZE); |
| 1543 | css_put(&from->css); | 1532 | css_put(&from->css); |
| 1544 | 1533 | ||
| 1545 | css_get(&to->css); | 1534 | css_get(&to->css); |
| @@ -1610,9 +1599,9 @@ uncharge: | |||
| 1610 | css_put(&parent->css); | 1599 | css_put(&parent->css); |
| 1611 | /* uncharge if move fails */ | 1600 | /* uncharge if move fails */ |
| 1612 | if (!mem_cgroup_is_root(parent)) { | 1601 | if (!mem_cgroup_is_root(parent)) { |
| 1613 | res_counter_uncharge(&parent->res, PAGE_SIZE, NULL); | 1602 | res_counter_uncharge(&parent->res, PAGE_SIZE); |
| 1614 | if (do_swap_account) | 1603 | if (do_swap_account) |
| 1615 | res_counter_uncharge(&parent->memsw, PAGE_SIZE, NULL); | 1604 | res_counter_uncharge(&parent->memsw, PAGE_SIZE); |
| 1616 | } | 1605 | } |
| 1617 | return ret; | 1606 | return ret; |
| 1618 | } | 1607 | } |
| @@ -1803,8 +1792,7 @@ __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr, | |||
| 1803 | * calling css_tryget | 1792 | * calling css_tryget |
| 1804 | */ | 1793 | */ |
| 1805 | if (!mem_cgroup_is_root(memcg)) | 1794 | if (!mem_cgroup_is_root(memcg)) |
| 1806 | res_counter_uncharge(&memcg->memsw, PAGE_SIZE, | 1795 | res_counter_uncharge(&memcg->memsw, PAGE_SIZE); |
| 1807 | NULL); | ||
| 1808 | mem_cgroup_swap_statistics(memcg, false); | 1796 | mem_cgroup_swap_statistics(memcg, false); |
| 1809 | mem_cgroup_put(memcg); | 1797 | mem_cgroup_put(memcg); |
| 1810 | } | 1798 | } |
| @@ -1831,9 +1819,9 @@ void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *mem) | |||
| 1831 | if (!mem) | 1819 | if (!mem) |
| 1832 | return; | 1820 | return; |
| 1833 | if (!mem_cgroup_is_root(mem)) { | 1821 | if (!mem_cgroup_is_root(mem)) { |
| 1834 | res_counter_uncharge(&mem->res, PAGE_SIZE, NULL); | 1822 | res_counter_uncharge(&mem->res, PAGE_SIZE); |
| 1835 | if (do_swap_account) | 1823 | if (do_swap_account) |
| 1836 | res_counter_uncharge(&mem->memsw, PAGE_SIZE, NULL); | 1824 | res_counter_uncharge(&mem->memsw, PAGE_SIZE); |
| 1837 | } | 1825 | } |
| 1838 | css_put(&mem->css); | 1826 | css_put(&mem->css); |
| 1839 | } | 1827 | } |
| @@ -1848,7 +1836,6 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) | |||
| 1848 | struct page_cgroup *pc; | 1836 | struct page_cgroup *pc; |
| 1849 | struct mem_cgroup *mem = NULL; | 1837 | struct mem_cgroup *mem = NULL; |
| 1850 | struct mem_cgroup_per_zone *mz; | 1838 | struct mem_cgroup_per_zone *mz; |
| 1851 | bool soft_limit_excess = false; | ||
| 1852 | 1839 | ||
| 1853 | if (mem_cgroup_disabled()) | 1840 | if (mem_cgroup_disabled()) |
| 1854 | return NULL; | 1841 | return NULL; |
| @@ -1888,10 +1875,10 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) | |||
| 1888 | } | 1875 | } |
| 1889 | 1876 | ||
| 1890 | if (!mem_cgroup_is_root(mem)) { | 1877 | if (!mem_cgroup_is_root(mem)) { |
| 1891 | res_counter_uncharge(&mem->res, PAGE_SIZE, &soft_limit_excess); | 1878 | res_counter_uncharge(&mem->res, PAGE_SIZE); |
| 1892 | if (do_swap_account && | 1879 | if (do_swap_account && |
| 1893 | (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT)) | 1880 | (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT)) |
| 1894 | res_counter_uncharge(&mem->memsw, PAGE_SIZE, NULL); | 1881 | res_counter_uncharge(&mem->memsw, PAGE_SIZE); |
| 1895 | } | 1882 | } |
| 1896 | if (ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT) | 1883 | if (ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT) |
| 1897 | mem_cgroup_swap_statistics(mem, true); | 1884 | mem_cgroup_swap_statistics(mem, true); |
| @@ -1908,7 +1895,7 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) | |||
| 1908 | mz = page_cgroup_zoneinfo(pc); | 1895 | mz = page_cgroup_zoneinfo(pc); |
| 1909 | unlock_page_cgroup(pc); | 1896 | unlock_page_cgroup(pc); |
| 1910 | 1897 | ||
| 1911 | if (soft_limit_excess && mem_cgroup_soft_limit_check(mem)) | 1898 | if (mem_cgroup_soft_limit_check(mem)) |
| 1912 | mem_cgroup_update_tree(mem, page); | 1899 | mem_cgroup_update_tree(mem, page); |
| 1913 | /* at swapout, this memcg will be accessed to record to swap */ | 1900 | /* at swapout, this memcg will be accessed to record to swap */ |
| 1914 | if (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT) | 1901 | if (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT) |
| @@ -1986,7 +1973,7 @@ void mem_cgroup_uncharge_swap(swp_entry_t ent) | |||
| 1986 | * This memcg can be obsolete one. We avoid calling css_tryget | 1973 | * This memcg can be obsolete one. We avoid calling css_tryget |
| 1987 | */ | 1974 | */ |
| 1988 | if (!mem_cgroup_is_root(memcg)) | 1975 | if (!mem_cgroup_is_root(memcg)) |
| 1989 | res_counter_uncharge(&memcg->memsw, PAGE_SIZE, NULL); | 1976 | res_counter_uncharge(&memcg->memsw, PAGE_SIZE); |
| 1990 | mem_cgroup_swap_statistics(memcg, false); | 1977 | mem_cgroup_swap_statistics(memcg, false); |
| 1991 | mem_cgroup_put(memcg); | 1978 | mem_cgroup_put(memcg); |
| 1992 | } | 1979 | } |
| @@ -2233,6 +2220,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, | |||
| 2233 | unsigned long reclaimed; | 2220 | unsigned long reclaimed; |
| 2234 | int loop = 0; | 2221 | int loop = 0; |
| 2235 | struct mem_cgroup_tree_per_zone *mctz; | 2222 | struct mem_cgroup_tree_per_zone *mctz; |
| 2223 | unsigned long long excess; | ||
| 2236 | 2224 | ||
| 2237 | if (order > 0) | 2225 | if (order > 0) |
| 2238 | return 0; | 2226 | return 0; |
| @@ -2284,9 +2272,8 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, | |||
| 2284 | break; | 2272 | break; |
| 2285 | } while (1); | 2273 | } while (1); |
| 2286 | } | 2274 | } |
| 2287 | mz->usage_in_excess = | ||
| 2288 | res_counter_soft_limit_excess(&mz->mem->res); | ||
| 2289 | __mem_cgroup_remove_exceeded(mz->mem, mz, mctz); | 2275 | __mem_cgroup_remove_exceeded(mz->mem, mz, mctz); |
| 2276 | excess = res_counter_soft_limit_excess(&mz->mem->res); | ||
| 2290 | /* | 2277 | /* |
| 2291 | * One school of thought says that we should not add | 2278 | * One school of thought says that we should not add |
| 2292 | * back the node to the tree if reclaim returns 0. | 2279 | * back the node to the tree if reclaim returns 0. |
| @@ -2295,8 +2282,8 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, | |||
| 2295 | * memory to reclaim from. Consider this as a longer | 2282 | * memory to reclaim from. Consider this as a longer |
| 2296 | * term TODO. | 2283 | * term TODO. |
| 2297 | */ | 2284 | */ |
| 2298 | if (mz->usage_in_excess) | 2285 | /* If excess == 0, no tree ops */ |
| 2299 | __mem_cgroup_insert_exceeded(mz->mem, mz, mctz); | 2286 | __mem_cgroup_insert_exceeded(mz->mem, mz, mctz, excess); |
| 2300 | spin_unlock(&mctz->lock); | 2287 | spin_unlock(&mctz->lock); |
| 2301 | css_put(&mz->mem->css); | 2288 | css_put(&mz->mem->css); |
| 2302 | loop++; | 2289 | loop++; |
diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 729d4b15b645..dacc64183874 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c | |||
| @@ -35,6 +35,7 @@ | |||
| 35 | #include <linux/mm.h> | 35 | #include <linux/mm.h> |
| 36 | #include <linux/page-flags.h> | 36 | #include <linux/page-flags.h> |
| 37 | #include <linux/sched.h> | 37 | #include <linux/sched.h> |
| 38 | #include <linux/ksm.h> | ||
| 38 | #include <linux/rmap.h> | 39 | #include <linux/rmap.h> |
| 39 | #include <linux/pagemap.h> | 40 | #include <linux/pagemap.h> |
| 40 | #include <linux/swap.h> | 41 | #include <linux/swap.h> |
| @@ -370,9 +371,6 @@ static int me_pagecache_clean(struct page *p, unsigned long pfn) | |||
| 370 | int ret = FAILED; | 371 | int ret = FAILED; |
| 371 | struct address_space *mapping; | 372 | struct address_space *mapping; |
| 372 | 373 | ||
| 373 | if (!isolate_lru_page(p)) | ||
| 374 | page_cache_release(p); | ||
| 375 | |||
| 376 | /* | 374 | /* |
| 377 | * For anonymous pages we're done the only reference left | 375 | * For anonymous pages we're done the only reference left |
| 378 | * should be the one m_f() holds. | 376 | * should be the one m_f() holds. |
| @@ -498,30 +496,18 @@ static int me_pagecache_dirty(struct page *p, unsigned long pfn) | |||
| 498 | */ | 496 | */ |
| 499 | static int me_swapcache_dirty(struct page *p, unsigned long pfn) | 497 | static int me_swapcache_dirty(struct page *p, unsigned long pfn) |
| 500 | { | 498 | { |
| 501 | int ret = FAILED; | ||
| 502 | |||
| 503 | ClearPageDirty(p); | 499 | ClearPageDirty(p); |
| 504 | /* Trigger EIO in shmem: */ | 500 | /* Trigger EIO in shmem: */ |
| 505 | ClearPageUptodate(p); | 501 | ClearPageUptodate(p); |
| 506 | 502 | ||
| 507 | if (!isolate_lru_page(p)) { | 503 | return DELAYED; |
| 508 | page_cache_release(p); | ||
| 509 | ret = DELAYED; | ||
| 510 | } | ||
| 511 | |||
| 512 | return ret; | ||
| 513 | } | 504 | } |
| 514 | 505 | ||
| 515 | static int me_swapcache_clean(struct page *p, unsigned long pfn) | 506 | static int me_swapcache_clean(struct page *p, unsigned long pfn) |
| 516 | { | 507 | { |
| 517 | int ret = FAILED; | ||
| 518 | |||
| 519 | if (!isolate_lru_page(p)) { | ||
| 520 | page_cache_release(p); | ||
| 521 | ret = RECOVERED; | ||
| 522 | } | ||
| 523 | delete_from_swap_cache(p); | 508 | delete_from_swap_cache(p); |
| 524 | return ret; | 509 | |
| 510 | return RECOVERED; | ||
| 525 | } | 511 | } |
| 526 | 512 | ||
| 527 | /* | 513 | /* |
| @@ -611,8 +597,6 @@ static struct page_state { | |||
| 611 | { 0, 0, "unknown page state", me_unknown }, | 597 | { 0, 0, "unknown page state", me_unknown }, |
| 612 | }; | 598 | }; |
| 613 | 599 | ||
| 614 | #undef lru | ||
| 615 | |||
| 616 | static void action_result(unsigned long pfn, char *msg, int result) | 600 | static void action_result(unsigned long pfn, char *msg, int result) |
| 617 | { | 601 | { |
| 618 | struct page *page = NULL; | 602 | struct page *page = NULL; |
| @@ -629,13 +613,16 @@ static int page_action(struct page_state *ps, struct page *p, | |||
| 629 | unsigned long pfn, int ref) | 613 | unsigned long pfn, int ref) |
| 630 | { | 614 | { |
| 631 | int result; | 615 | int result; |
| 616 | int count; | ||
| 632 | 617 | ||
| 633 | result = ps->action(p, pfn); | 618 | result = ps->action(p, pfn); |
| 634 | action_result(pfn, ps->msg, result); | 619 | action_result(pfn, ps->msg, result); |
| 635 | if (page_count(p) != 1 + ref) | 620 | |
| 621 | count = page_count(p) - 1 - ref; | ||
| 622 | if (count != 0) | ||
| 636 | printk(KERN_ERR | 623 | printk(KERN_ERR |
| 637 | "MCE %#lx: %s page still referenced by %d users\n", | 624 | "MCE %#lx: %s page still referenced by %d users\n", |
| 638 | pfn, ps->msg, page_count(p) - 1); | 625 | pfn, ps->msg, count); |
| 639 | 626 | ||
| 640 | /* Could do more checks here if page looks ok */ | 627 | /* Could do more checks here if page looks ok */ |
| 641 | /* | 628 | /* |
| @@ -661,12 +648,9 @@ static void hwpoison_user_mappings(struct page *p, unsigned long pfn, | |||
| 661 | int i; | 648 | int i; |
| 662 | int kill = 1; | 649 | int kill = 1; |
| 663 | 650 | ||
| 664 | if (PageReserved(p) || PageCompound(p) || PageSlab(p)) | 651 | if (PageReserved(p) || PageCompound(p) || PageSlab(p) || PageKsm(p)) |
| 665 | return; | 652 | return; |
| 666 | 653 | ||
| 667 | if (!PageLRU(p)) | ||
| 668 | lru_add_drain_all(); | ||
| 669 | |||
| 670 | /* | 654 | /* |
| 671 | * This check implies we don't kill processes if their pages | 655 | * This check implies we don't kill processes if their pages |
| 672 | * are in the swap cache early. Those are always late kills. | 656 | * are in the swap cache early. Those are always late kills. |
| @@ -738,6 +722,7 @@ static void hwpoison_user_mappings(struct page *p, unsigned long pfn, | |||
| 738 | 722 | ||
| 739 | int __memory_failure(unsigned long pfn, int trapno, int ref) | 723 | int __memory_failure(unsigned long pfn, int trapno, int ref) |
| 740 | { | 724 | { |
| 725 | unsigned long lru_flag; | ||
| 741 | struct page_state *ps; | 726 | struct page_state *ps; |
| 742 | struct page *p; | 727 | struct page *p; |
| 743 | int res; | 728 | int res; |
| @@ -775,6 +760,24 @@ int __memory_failure(unsigned long pfn, int trapno, int ref) | |||
| 775 | } | 760 | } |
| 776 | 761 | ||
| 777 | /* | 762 | /* |
| 763 | * We ignore non-LRU pages for good reasons. | ||
| 764 | * - PG_locked is only well defined for LRU pages and a few others | ||
| 765 | * - to avoid races with __set_page_locked() | ||
| 766 | * - to avoid races with __SetPageSlab*() (and more non-atomic ops) | ||
| 767 | * The check (unnecessarily) ignores LRU pages being isolated and | ||
| 768 | * walked by the page reclaim code, however that's not a big loss. | ||
| 769 | */ | ||
| 770 | if (!PageLRU(p)) | ||
| 771 | lru_add_drain_all(); | ||
| 772 | lru_flag = p->flags & lru; | ||
| 773 | if (isolate_lru_page(p)) { | ||
| 774 | action_result(pfn, "non LRU", IGNORED); | ||
| 775 | put_page(p); | ||
| 776 | return -EBUSY; | ||
| 777 | } | ||
| 778 | page_cache_release(p); | ||
| 779 | |||
| 780 | /* | ||
| 778 | * Lock the page and wait for writeback to finish. | 781 | * Lock the page and wait for writeback to finish. |
| 779 | * It's very difficult to mess with pages currently under IO | 782 | * It's very difficult to mess with pages currently under IO |
| 780 | * and in many cases impossible, so we just avoid it here. | 783 | * and in many cases impossible, so we just avoid it here. |
| @@ -790,7 +793,7 @@ int __memory_failure(unsigned long pfn, int trapno, int ref) | |||
| 790 | /* | 793 | /* |
| 791 | * Torn down by someone else? | 794 | * Torn down by someone else? |
| 792 | */ | 795 | */ |
| 793 | if (PageLRU(p) && !PageSwapCache(p) && p->mapping == NULL) { | 796 | if ((lru_flag & lru) && !PageSwapCache(p) && p->mapping == NULL) { |
| 794 | action_result(pfn, "already truncated LRU", IGNORED); | 797 | action_result(pfn, "already truncated LRU", IGNORED); |
| 795 | res = 0; | 798 | res = 0; |
| 796 | goto out; | 799 | goto out; |
| @@ -798,7 +801,7 @@ int __memory_failure(unsigned long pfn, int trapno, int ref) | |||
| 798 | 801 | ||
| 799 | res = -EBUSY; | 802 | res = -EBUSY; |
| 800 | for (ps = error_states;; ps++) { | 803 | for (ps = error_states;; ps++) { |
| 801 | if ((p->flags & ps->mask) == ps->res) { | 804 | if (((p->flags | lru_flag)& ps->mask) == ps->res) { |
| 802 | res = page_action(ps, p, pfn, ref); | 805 | res = page_action(ps, p, pfn, ref); |
| 803 | break; | 806 | break; |
| 804 | } | 807 | } |
diff --git a/mm/memory.c b/mm/memory.c index 7e91b5f9f690..6ab19dd4a199 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
| @@ -641,6 +641,7 @@ static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, | |||
| 641 | pmd_t *dst_pmd, pmd_t *src_pmd, struct vm_area_struct *vma, | 641 | pmd_t *dst_pmd, pmd_t *src_pmd, struct vm_area_struct *vma, |
| 642 | unsigned long addr, unsigned long end) | 642 | unsigned long addr, unsigned long end) |
| 643 | { | 643 | { |
| 644 | pte_t *orig_src_pte, *orig_dst_pte; | ||
| 644 | pte_t *src_pte, *dst_pte; | 645 | pte_t *src_pte, *dst_pte; |
| 645 | spinlock_t *src_ptl, *dst_ptl; | 646 | spinlock_t *src_ptl, *dst_ptl; |
| 646 | int progress = 0; | 647 | int progress = 0; |
| @@ -654,6 +655,8 @@ again: | |||
| 654 | src_pte = pte_offset_map_nested(src_pmd, addr); | 655 | src_pte = pte_offset_map_nested(src_pmd, addr); |
| 655 | src_ptl = pte_lockptr(src_mm, src_pmd); | 656 | src_ptl = pte_lockptr(src_mm, src_pmd); |
| 656 | spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING); | 657 | spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING); |
| 658 | orig_src_pte = src_pte; | ||
| 659 | orig_dst_pte = dst_pte; | ||
| 657 | arch_enter_lazy_mmu_mode(); | 660 | arch_enter_lazy_mmu_mode(); |
| 658 | 661 | ||
| 659 | do { | 662 | do { |
| @@ -677,9 +680,9 @@ again: | |||
| 677 | 680 | ||
| 678 | arch_leave_lazy_mmu_mode(); | 681 | arch_leave_lazy_mmu_mode(); |
| 679 | spin_unlock(src_ptl); | 682 | spin_unlock(src_ptl); |
| 680 | pte_unmap_nested(src_pte - 1); | 683 | pte_unmap_nested(orig_src_pte); |
| 681 | add_mm_rss(dst_mm, rss[0], rss[1]); | 684 | add_mm_rss(dst_mm, rss[0], rss[1]); |
| 682 | pte_unmap_unlock(dst_pte - 1, dst_ptl); | 685 | pte_unmap_unlock(orig_dst_pte, dst_ptl); |
| 683 | cond_resched(); | 686 | cond_resched(); |
| 684 | if (addr != end) | 687 | if (addr != end) |
| 685 | goto again; | 688 | goto again; |
| @@ -1820,10 +1823,10 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd, | |||
| 1820 | token = pmd_pgtable(*pmd); | 1823 | token = pmd_pgtable(*pmd); |
| 1821 | 1824 | ||
| 1822 | do { | 1825 | do { |
| 1823 | err = fn(pte, token, addr, data); | 1826 | err = fn(pte++, token, addr, data); |
| 1824 | if (err) | 1827 | if (err) |
| 1825 | break; | 1828 | break; |
| 1826 | } while (pte++, addr += PAGE_SIZE, addr != end); | 1829 | } while (addr += PAGE_SIZE, addr != end); |
| 1827 | 1830 | ||
| 1828 | arch_leave_lazy_mmu_mode(); | 1831 | arch_leave_lazy_mmu_mode(); |
| 1829 | 1832 | ||
| @@ -2539,7 +2542,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
| 2539 | } else if (PageHWPoison(page)) { | 2542 | } else if (PageHWPoison(page)) { |
| 2540 | ret = VM_FAULT_HWPOISON; | 2543 | ret = VM_FAULT_HWPOISON; |
| 2541 | delayacct_clear_flag(DELAYACCT_PF_SWAPIN); | 2544 | delayacct_clear_flag(DELAYACCT_PF_SWAPIN); |
| 2542 | goto out; | 2545 | goto out_release; |
| 2543 | } | 2546 | } |
| 2544 | 2547 | ||
| 2545 | lock_page(page); | 2548 | lock_page(page); |
| @@ -2611,6 +2614,7 @@ out_nomap: | |||
| 2611 | pte_unmap_unlock(page_table, ptl); | 2614 | pte_unmap_unlock(page_table, ptl); |
| 2612 | out_page: | 2615 | out_page: |
| 2613 | unlock_page(page); | 2616 | unlock_page(page); |
| 2617 | out_release: | ||
| 2614 | page_cache_release(page); | 2618 | page_cache_release(page); |
| 2615 | return ret; | 2619 | return ret; |
| 2616 | } | 2620 | } |
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 7dd9d9f80694..4545d5944243 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
| @@ -1024,7 +1024,7 @@ static long do_mbind(unsigned long start, unsigned long len, | |||
| 1024 | 1024 | ||
| 1025 | err = migrate_prep(); | 1025 | err = migrate_prep(); |
| 1026 | if (err) | 1026 | if (err) |
| 1027 | return err; | 1027 | goto mpol_out; |
| 1028 | } | 1028 | } |
| 1029 | { | 1029 | { |
| 1030 | NODEMASK_SCRATCH(scratch); | 1030 | NODEMASK_SCRATCH(scratch); |
| @@ -1039,10 +1039,9 @@ static long do_mbind(unsigned long start, unsigned long len, | |||
| 1039 | err = -ENOMEM; | 1039 | err = -ENOMEM; |
| 1040 | NODEMASK_SCRATCH_FREE(scratch); | 1040 | NODEMASK_SCRATCH_FREE(scratch); |
| 1041 | } | 1041 | } |
| 1042 | if (err) { | 1042 | if (err) |
| 1043 | mpol_put(new); | 1043 | goto mpol_out; |
| 1044 | return err; | 1044 | |
| 1045 | } | ||
| 1046 | vma = check_range(mm, start, end, nmask, | 1045 | vma = check_range(mm, start, end, nmask, |
| 1047 | flags | MPOL_MF_INVERT, &pagelist); | 1046 | flags | MPOL_MF_INVERT, &pagelist); |
| 1048 | 1047 | ||
| @@ -1058,9 +1057,11 @@ static long do_mbind(unsigned long start, unsigned long len, | |||
| 1058 | 1057 | ||
| 1059 | if (!err && nr_failed && (flags & MPOL_MF_STRICT)) | 1058 | if (!err && nr_failed && (flags & MPOL_MF_STRICT)) |
| 1060 | err = -EIO; | 1059 | err = -EIO; |
| 1061 | } | 1060 | } else |
| 1061 | putback_lru_pages(&pagelist); | ||
| 1062 | 1062 | ||
| 1063 | up_write(&mm->mmap_sem); | 1063 | up_write(&mm->mmap_sem); |
| 1064 | mpol_out: | ||
| 1064 | mpol_put(new); | 1065 | mpol_put(new); |
| 1065 | return err; | 1066 | return err; |
| 1066 | } | 1067 | } |
| @@ -2282,7 +2282,7 @@ static void special_mapping_close(struct vm_area_struct *vma) | |||
| 2282 | { | 2282 | { |
| 2283 | } | 2283 | } |
| 2284 | 2284 | ||
| 2285 | static struct vm_operations_struct special_mapping_vmops = { | 2285 | static const struct vm_operations_struct special_mapping_vmops = { |
| 2286 | .close = special_mapping_close, | 2286 | .close = special_mapping_close, |
| 2287 | .fault = special_mapping_fault, | 2287 | .fault = special_mapping_fault, |
| 2288 | }; | 2288 | }; |
diff --git a/mm/nommu.c b/mm/nommu.c index 56a446f05971..9876fa0c3ad3 100644 --- a/mm/nommu.c +++ b/mm/nommu.c | |||
| @@ -79,7 +79,7 @@ static struct kmem_cache *vm_region_jar; | |||
| 79 | struct rb_root nommu_region_tree = RB_ROOT; | 79 | struct rb_root nommu_region_tree = RB_ROOT; |
| 80 | DECLARE_RWSEM(nommu_region_sem); | 80 | DECLARE_RWSEM(nommu_region_sem); |
| 81 | 81 | ||
| 82 | struct vm_operations_struct generic_file_vm_ops = { | 82 | const struct vm_operations_struct generic_file_vm_ops = { |
| 83 | }; | 83 | }; |
| 84 | 84 | ||
| 85 | /* | 85 | /* |
| @@ -826,7 +826,7 @@ static int validate_mmap_request(struct file *file, | |||
| 826 | int ret; | 826 | int ret; |
| 827 | 827 | ||
| 828 | /* do the simple checks first */ | 828 | /* do the simple checks first */ |
| 829 | if (flags & MAP_FIXED || addr) { | 829 | if (flags & MAP_FIXED) { |
| 830 | printk(KERN_DEBUG | 830 | printk(KERN_DEBUG |
| 831 | "%d: Can't do fixed-address/overlay mmap of RAM\n", | 831 | "%d: Can't do fixed-address/overlay mmap of RAM\n", |
| 832 | current->pid); | 832 | current->pid); |
| @@ -1034,7 +1034,7 @@ static int do_mmap_shared_file(struct vm_area_struct *vma) | |||
| 1034 | ret = vma->vm_file->f_op->mmap(vma->vm_file, vma); | 1034 | ret = vma->vm_file->f_op->mmap(vma->vm_file, vma); |
| 1035 | if (ret == 0) { | 1035 | if (ret == 0) { |
| 1036 | vma->vm_region->vm_top = vma->vm_region->vm_end; | 1036 | vma->vm_region->vm_top = vma->vm_region->vm_end; |
| 1037 | return ret; | 1037 | return 0; |
| 1038 | } | 1038 | } |
| 1039 | if (ret != -ENOSYS) | 1039 | if (ret != -ENOSYS) |
| 1040 | return ret; | 1040 | return ret; |
| @@ -1051,7 +1051,8 @@ static int do_mmap_shared_file(struct vm_area_struct *vma) | |||
| 1051 | */ | 1051 | */ |
| 1052 | static int do_mmap_private(struct vm_area_struct *vma, | 1052 | static int do_mmap_private(struct vm_area_struct *vma, |
| 1053 | struct vm_region *region, | 1053 | struct vm_region *region, |
| 1054 | unsigned long len) | 1054 | unsigned long len, |
| 1055 | unsigned long capabilities) | ||
| 1055 | { | 1056 | { |
| 1056 | struct page *pages; | 1057 | struct page *pages; |
| 1057 | unsigned long total, point, n, rlen; | 1058 | unsigned long total, point, n, rlen; |
| @@ -1062,13 +1063,13 @@ static int do_mmap_private(struct vm_area_struct *vma, | |||
| 1062 | * shared mappings on devices or memory | 1063 | * shared mappings on devices or memory |
| 1063 | * - VM_MAYSHARE will be set if it may attempt to share | 1064 | * - VM_MAYSHARE will be set if it may attempt to share |
| 1064 | */ | 1065 | */ |
| 1065 | if (vma->vm_file) { | 1066 | if (capabilities & BDI_CAP_MAP_DIRECT) { |
| 1066 | ret = vma->vm_file->f_op->mmap(vma->vm_file, vma); | 1067 | ret = vma->vm_file->f_op->mmap(vma->vm_file, vma); |
| 1067 | if (ret == 0) { | 1068 | if (ret == 0) { |
| 1068 | /* shouldn't return success if we're not sharing */ | 1069 | /* shouldn't return success if we're not sharing */ |
| 1069 | BUG_ON(!(vma->vm_flags & VM_MAYSHARE)); | 1070 | BUG_ON(!(vma->vm_flags & VM_MAYSHARE)); |
| 1070 | vma->vm_region->vm_top = vma->vm_region->vm_end; | 1071 | vma->vm_region->vm_top = vma->vm_region->vm_end; |
| 1071 | return ret; | 1072 | return 0; |
| 1072 | } | 1073 | } |
| 1073 | if (ret != -ENOSYS) | 1074 | if (ret != -ENOSYS) |
| 1074 | return ret; | 1075 | return ret; |
| @@ -1181,9 +1182,6 @@ unsigned long do_mmap_pgoff(struct file *file, | |||
| 1181 | 1182 | ||
| 1182 | kenter(",%lx,%lx,%lx,%lx,%lx", addr, len, prot, flags, pgoff); | 1183 | kenter(",%lx,%lx,%lx,%lx,%lx", addr, len, prot, flags, pgoff); |
| 1183 | 1184 | ||
| 1184 | if (!(flags & MAP_FIXED)) | ||
| 1185 | addr = round_hint_to_min(addr); | ||
| 1186 | |||
| 1187 | /* decide whether we should attempt the mapping, and if so what sort of | 1185 | /* decide whether we should attempt the mapping, and if so what sort of |
| 1188 | * mapping */ | 1186 | * mapping */ |
| 1189 | ret = validate_mmap_request(file, addr, len, prot, flags, pgoff, | 1187 | ret = validate_mmap_request(file, addr, len, prot, flags, pgoff, |
| @@ -1193,6 +1191,9 @@ unsigned long do_mmap_pgoff(struct file *file, | |||
| 1193 | return ret; | 1191 | return ret; |
| 1194 | } | 1192 | } |
| 1195 | 1193 | ||
| 1194 | /* we ignore the address hint */ | ||
| 1195 | addr = 0; | ||
| 1196 | |||
| 1196 | /* we've determined that we can make the mapping, now translate what we | 1197 | /* we've determined that we can make the mapping, now translate what we |
| 1197 | * now know into VMA flags */ | 1198 | * now know into VMA flags */ |
| 1198 | vm_flags = determine_vm_flags(file, prot, flags, capabilities); | 1199 | vm_flags = determine_vm_flags(file, prot, flags, capabilities); |
| @@ -1306,7 +1307,7 @@ unsigned long do_mmap_pgoff(struct file *file, | |||
| 1306 | * - this is the hook for quasi-memory character devices to | 1307 | * - this is the hook for quasi-memory character devices to |
| 1307 | * tell us the location of a shared mapping | 1308 | * tell us the location of a shared mapping |
| 1308 | */ | 1309 | */ |
| 1309 | if (file && file->f_op->get_unmapped_area) { | 1310 | if (capabilities & BDI_CAP_MAP_DIRECT) { |
| 1310 | addr = file->f_op->get_unmapped_area(file, addr, len, | 1311 | addr = file->f_op->get_unmapped_area(file, addr, len, |
| 1311 | pgoff, flags); | 1312 | pgoff, flags); |
| 1312 | if (IS_ERR((void *) addr)) { | 1313 | if (IS_ERR((void *) addr)) { |
| @@ -1330,15 +1331,17 @@ unsigned long do_mmap_pgoff(struct file *file, | |||
| 1330 | } | 1331 | } |
| 1331 | 1332 | ||
| 1332 | vma->vm_region = region; | 1333 | vma->vm_region = region; |
| 1333 | add_nommu_region(region); | ||
| 1334 | 1334 | ||
| 1335 | /* set up the mapping */ | 1335 | /* set up the mapping |
| 1336 | * - the region is filled in if BDI_CAP_MAP_DIRECT is still set | ||
| 1337 | */ | ||
| 1336 | if (file && vma->vm_flags & VM_SHARED) | 1338 | if (file && vma->vm_flags & VM_SHARED) |
| 1337 | ret = do_mmap_shared_file(vma); | 1339 | ret = do_mmap_shared_file(vma); |
| 1338 | else | 1340 | else |
| 1339 | ret = do_mmap_private(vma, region, len); | 1341 | ret = do_mmap_private(vma, region, len, capabilities); |
| 1340 | if (ret < 0) | 1342 | if (ret < 0) |
| 1341 | goto error_put_region; | 1343 | goto error_just_free; |
| 1344 | add_nommu_region(region); | ||
| 1342 | 1345 | ||
| 1343 | /* okay... we have a mapping; now we have to register it */ | 1346 | /* okay... we have a mapping; now we have to register it */ |
| 1344 | result = vma->vm_start; | 1347 | result = vma->vm_start; |
| @@ -1356,25 +1359,14 @@ share: | |||
| 1356 | kleave(" = %lx", result); | 1359 | kleave(" = %lx", result); |
| 1357 | return result; | 1360 | return result; |
| 1358 | 1361 | ||
| 1359 | error_put_region: | ||
| 1360 | __put_nommu_region(region); | ||
| 1361 | if (vma) { | ||
| 1362 | if (vma->vm_file) { | ||
| 1363 | fput(vma->vm_file); | ||
| 1364 | if (vma->vm_flags & VM_EXECUTABLE) | ||
| 1365 | removed_exe_file_vma(vma->vm_mm); | ||
| 1366 | } | ||
| 1367 | kmem_cache_free(vm_area_cachep, vma); | ||
| 1368 | } | ||
| 1369 | kleave(" = %d [pr]", ret); | ||
| 1370 | return ret; | ||
| 1371 | |||
| 1372 | error_just_free: | 1362 | error_just_free: |
| 1373 | up_write(&nommu_region_sem); | 1363 | up_write(&nommu_region_sem); |
| 1374 | error: | 1364 | error: |
| 1375 | fput(region->vm_file); | 1365 | if (region->vm_file) |
| 1366 | fput(region->vm_file); | ||
| 1376 | kmem_cache_free(vm_region_jar, region); | 1367 | kmem_cache_free(vm_region_jar, region); |
| 1377 | fput(vma->vm_file); | 1368 | if (vma->vm_file) |
| 1369 | fput(vma->vm_file); | ||
| 1378 | if (vma->vm_flags & VM_EXECUTABLE) | 1370 | if (vma->vm_flags & VM_EXECUTABLE) |
| 1379 | removed_exe_file_vma(vma->vm_mm); | 1371 | removed_exe_file_vma(vma->vm_mm); |
| 1380 | kmem_cache_free(vm_area_cachep, vma); | 1372 | kmem_cache_free(vm_area_cachep, vma); |
diff --git a/mm/page-writeback.c b/mm/page-writeback.c index d99664e8607e..2c5d79236ead 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c | |||
| @@ -44,18 +44,21 @@ static long ratelimit_pages = 32; | |||
| 44 | /* | 44 | /* |
| 45 | * When balance_dirty_pages decides that the caller needs to perform some | 45 | * When balance_dirty_pages decides that the caller needs to perform some |
| 46 | * non-background writeback, this is how many pages it will attempt to write. | 46 | * non-background writeback, this is how many pages it will attempt to write. |
| 47 | * It should be somewhat larger than RATELIMIT_PAGES to ensure that reasonably | 47 | * It should be somewhat larger than dirtied pages to ensure that reasonably |
| 48 | * large amounts of I/O are submitted. | 48 | * large amounts of I/O are submitted. |
| 49 | */ | 49 | */ |
| 50 | static inline long sync_writeback_pages(void) | 50 | static inline long sync_writeback_pages(unsigned long dirtied) |
| 51 | { | 51 | { |
| 52 | return ratelimit_pages + ratelimit_pages / 2; | 52 | if (dirtied < ratelimit_pages) |
| 53 | dirtied = ratelimit_pages; | ||
| 54 | |||
| 55 | return dirtied + dirtied / 2; | ||
| 53 | } | 56 | } |
| 54 | 57 | ||
| 55 | /* The following parameters are exported via /proc/sys/vm */ | 58 | /* The following parameters are exported via /proc/sys/vm */ |
| 56 | 59 | ||
| 57 | /* | 60 | /* |
| 58 | * Start background writeback (via pdflush) at this percentage | 61 | * Start background writeback (via writeback threads) at this percentage |
| 59 | */ | 62 | */ |
| 60 | int dirty_background_ratio = 10; | 63 | int dirty_background_ratio = 10; |
| 61 | 64 | ||
| @@ -474,10 +477,11 @@ get_dirty_limits(unsigned long *pbackground, unsigned long *pdirty, | |||
| 474 | * balance_dirty_pages() must be called by processes which are generating dirty | 477 | * balance_dirty_pages() must be called by processes which are generating dirty |
| 475 | * data. It looks at the number of dirty pages in the machine and will force | 478 | * data. It looks at the number of dirty pages in the machine and will force |
| 476 | * the caller to perform writeback if the system is over `vm_dirty_ratio'. | 479 | * the caller to perform writeback if the system is over `vm_dirty_ratio'. |
| 477 | * If we're over `background_thresh' then pdflush is woken to perform some | 480 | * If we're over `background_thresh' then the writeback threads are woken to |
| 478 | * writeout. | 481 | * perform some writeout. |
| 479 | */ | 482 | */ |
| 480 | static void balance_dirty_pages(struct address_space *mapping) | 483 | static void balance_dirty_pages(struct address_space *mapping, |
| 484 | unsigned long write_chunk) | ||
| 481 | { | 485 | { |
| 482 | long nr_reclaimable, bdi_nr_reclaimable; | 486 | long nr_reclaimable, bdi_nr_reclaimable; |
| 483 | long nr_writeback, bdi_nr_writeback; | 487 | long nr_writeback, bdi_nr_writeback; |
| @@ -485,7 +489,6 @@ static void balance_dirty_pages(struct address_space *mapping) | |||
| 485 | unsigned long dirty_thresh; | 489 | unsigned long dirty_thresh; |
| 486 | unsigned long bdi_thresh; | 490 | unsigned long bdi_thresh; |
| 487 | unsigned long pages_written = 0; | 491 | unsigned long pages_written = 0; |
| 488 | unsigned long write_chunk = sync_writeback_pages(); | ||
| 489 | unsigned long pause = 1; | 492 | unsigned long pause = 1; |
| 490 | 493 | ||
| 491 | struct backing_dev_info *bdi = mapping->backing_dev_info; | 494 | struct backing_dev_info *bdi = mapping->backing_dev_info; |
| @@ -563,7 +566,8 @@ static void balance_dirty_pages(struct address_space *mapping) | |||
| 563 | if (pages_written >= write_chunk) | 566 | if (pages_written >= write_chunk) |
| 564 | break; /* We've done our duty */ | 567 | break; /* We've done our duty */ |
| 565 | 568 | ||
| 566 | schedule_timeout_interruptible(pause); | 569 | __set_current_state(TASK_INTERRUPTIBLE); |
| 570 | io_schedule_timeout(pause); | ||
| 567 | 571 | ||
| 568 | /* | 572 | /* |
| 569 | * Increase the delay for each loop, up to our previous | 573 | * Increase the delay for each loop, up to our previous |
| @@ -579,7 +583,7 @@ static void balance_dirty_pages(struct address_space *mapping) | |||
| 579 | bdi->dirty_exceeded = 0; | 583 | bdi->dirty_exceeded = 0; |
| 580 | 584 | ||
| 581 | if (writeback_in_progress(bdi)) | 585 | if (writeback_in_progress(bdi)) |
| 582 | return; /* pdflush is already working this queue */ | 586 | return; |
| 583 | 587 | ||
| 584 | /* | 588 | /* |
| 585 | * In laptop mode, we wait until hitting the higher threshold before | 589 | * In laptop mode, we wait until hitting the higher threshold before |
| @@ -590,10 +594,10 @@ static void balance_dirty_pages(struct address_space *mapping) | |||
| 590 | * background_thresh, to keep the amount of dirty memory low. | 594 | * background_thresh, to keep the amount of dirty memory low. |
| 591 | */ | 595 | */ |
| 592 | if ((laptop_mode && pages_written) || | 596 | if ((laptop_mode && pages_written) || |
| 593 | (!laptop_mode && ((nr_writeback = global_page_state(NR_FILE_DIRTY) | 597 | (!laptop_mode && ((global_page_state(NR_FILE_DIRTY) |
| 594 | + global_page_state(NR_UNSTABLE_NFS)) | 598 | + global_page_state(NR_UNSTABLE_NFS)) |
| 595 | > background_thresh))) | 599 | > background_thresh))) |
| 596 | bdi_start_writeback(bdi, nr_writeback); | 600 | bdi_start_writeback(bdi, NULL, 0); |
| 597 | } | 601 | } |
| 598 | 602 | ||
| 599 | void set_page_dirty_balance(struct page *page, int page_mkwrite) | 603 | void set_page_dirty_balance(struct page *page, int page_mkwrite) |
| @@ -640,9 +644,10 @@ void balance_dirty_pages_ratelimited_nr(struct address_space *mapping, | |||
| 640 | p = &__get_cpu_var(bdp_ratelimits); | 644 | p = &__get_cpu_var(bdp_ratelimits); |
| 641 | *p += nr_pages_dirtied; | 645 | *p += nr_pages_dirtied; |
| 642 | if (unlikely(*p >= ratelimit)) { | 646 | if (unlikely(*p >= ratelimit)) { |
| 647 | ratelimit = sync_writeback_pages(*p); | ||
| 643 | *p = 0; | 648 | *p = 0; |
| 644 | preempt_enable(); | 649 | preempt_enable(); |
| 645 | balance_dirty_pages(mapping); | 650 | balance_dirty_pages(mapping, ratelimit); |
| 646 | return; | 651 | return; |
| 647 | } | 652 | } |
| 648 | preempt_enable(); | 653 | preempt_enable(); |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index bf720550b44d..cdcedf661616 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
| @@ -2183,7 +2183,7 @@ void show_free_areas(void) | |||
| 2183 | printk("active_anon:%lu inactive_anon:%lu isolated_anon:%lu\n" | 2183 | printk("active_anon:%lu inactive_anon:%lu isolated_anon:%lu\n" |
| 2184 | " active_file:%lu inactive_file:%lu isolated_file:%lu\n" | 2184 | " active_file:%lu inactive_file:%lu isolated_file:%lu\n" |
| 2185 | " unevictable:%lu" | 2185 | " unevictable:%lu" |
| 2186 | " dirty:%lu writeback:%lu unstable:%lu buffer:%lu\n" | 2186 | " dirty:%lu writeback:%lu unstable:%lu\n" |
| 2187 | " free:%lu slab_reclaimable:%lu slab_unreclaimable:%lu\n" | 2187 | " free:%lu slab_reclaimable:%lu slab_unreclaimable:%lu\n" |
| 2188 | " mapped:%lu shmem:%lu pagetables:%lu bounce:%lu\n", | 2188 | " mapped:%lu shmem:%lu pagetables:%lu bounce:%lu\n", |
| 2189 | global_page_state(NR_ACTIVE_ANON), | 2189 | global_page_state(NR_ACTIVE_ANON), |
| @@ -2196,7 +2196,6 @@ void show_free_areas(void) | |||
| 2196 | global_page_state(NR_FILE_DIRTY), | 2196 | global_page_state(NR_FILE_DIRTY), |
| 2197 | global_page_state(NR_WRITEBACK), | 2197 | global_page_state(NR_WRITEBACK), |
| 2198 | global_page_state(NR_UNSTABLE_NFS), | 2198 | global_page_state(NR_UNSTABLE_NFS), |
| 2199 | nr_blockdev_pages(), | ||
| 2200 | global_page_state(NR_FREE_PAGES), | 2199 | global_page_state(NR_FREE_PAGES), |
| 2201 | global_page_state(NR_SLAB_RECLAIMABLE), | 2200 | global_page_state(NR_SLAB_RECLAIMABLE), |
| 2202 | global_page_state(NR_SLAB_UNRECLAIMABLE), | 2201 | global_page_state(NR_SLAB_UNRECLAIMABLE), |
diff --git a/mm/percpu.c b/mm/percpu.c index 43d8cacfdaa5..d90797160c2a 100644 --- a/mm/percpu.c +++ b/mm/percpu.c | |||
| @@ -153,7 +153,10 @@ static int pcpu_reserved_chunk_limit; | |||
| 153 | * | 153 | * |
| 154 | * During allocation, pcpu_alloc_mutex is kept locked all the time and | 154 | * During allocation, pcpu_alloc_mutex is kept locked all the time and |
| 155 | * pcpu_lock is grabbed and released as necessary. All actual memory | 155 | * pcpu_lock is grabbed and released as necessary. All actual memory |
| 156 | * allocations are done using GFP_KERNEL with pcpu_lock released. | 156 | * allocations are done using GFP_KERNEL with pcpu_lock released. In |
| 157 | * general, percpu memory can't be allocated with irq off but | ||
| 158 | * irqsave/restore are still used in alloc path so that it can be used | ||
| 159 | * from early init path - sched_init() specifically. | ||
| 157 | * | 160 | * |
| 158 | * Free path accesses and alters only the index data structures, so it | 161 | * Free path accesses and alters only the index data structures, so it |
| 159 | * can be safely called from atomic context. When memory needs to be | 162 | * can be safely called from atomic context. When memory needs to be |
| @@ -366,7 +369,7 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr) | |||
| 366 | * RETURNS: | 369 | * RETURNS: |
| 367 | * 0 if noop, 1 if successfully extended, -errno on failure. | 370 | * 0 if noop, 1 if successfully extended, -errno on failure. |
| 368 | */ | 371 | */ |
| 369 | static int pcpu_extend_area_map(struct pcpu_chunk *chunk) | 372 | static int pcpu_extend_area_map(struct pcpu_chunk *chunk, unsigned long *flags) |
| 370 | { | 373 | { |
| 371 | int new_alloc; | 374 | int new_alloc; |
| 372 | int *new; | 375 | int *new; |
| @@ -376,7 +379,7 @@ static int pcpu_extend_area_map(struct pcpu_chunk *chunk) | |||
| 376 | if (chunk->map_alloc >= chunk->map_used + 2) | 379 | if (chunk->map_alloc >= chunk->map_used + 2) |
| 377 | return 0; | 380 | return 0; |
| 378 | 381 | ||
| 379 | spin_unlock_irq(&pcpu_lock); | 382 | spin_unlock_irqrestore(&pcpu_lock, *flags); |
| 380 | 383 | ||
| 381 | new_alloc = PCPU_DFL_MAP_ALLOC; | 384 | new_alloc = PCPU_DFL_MAP_ALLOC; |
| 382 | while (new_alloc < chunk->map_used + 2) | 385 | while (new_alloc < chunk->map_used + 2) |
| @@ -384,7 +387,7 @@ static int pcpu_extend_area_map(struct pcpu_chunk *chunk) | |||
| 384 | 387 | ||
| 385 | new = pcpu_mem_alloc(new_alloc * sizeof(new[0])); | 388 | new = pcpu_mem_alloc(new_alloc * sizeof(new[0])); |
| 386 | if (!new) { | 389 | if (!new) { |
| 387 | spin_lock_irq(&pcpu_lock); | 390 | spin_lock_irqsave(&pcpu_lock, *flags); |
| 388 | return -ENOMEM; | 391 | return -ENOMEM; |
| 389 | } | 392 | } |
| 390 | 393 | ||
| @@ -393,7 +396,7 @@ static int pcpu_extend_area_map(struct pcpu_chunk *chunk) | |||
| 393 | * could have happened inbetween, so map_used couldn't have | 396 | * could have happened inbetween, so map_used couldn't have |
| 394 | * grown. | 397 | * grown. |
| 395 | */ | 398 | */ |
| 396 | spin_lock_irq(&pcpu_lock); | 399 | spin_lock_irqsave(&pcpu_lock, *flags); |
| 397 | BUG_ON(new_alloc < chunk->map_used + 2); | 400 | BUG_ON(new_alloc < chunk->map_used + 2); |
| 398 | 401 | ||
| 399 | size = chunk->map_alloc * sizeof(chunk->map[0]); | 402 | size = chunk->map_alloc * sizeof(chunk->map[0]); |
| @@ -1043,8 +1046,11 @@ static struct pcpu_chunk *alloc_pcpu_chunk(void) | |||
| 1043 | */ | 1046 | */ |
| 1044 | static void *pcpu_alloc(size_t size, size_t align, bool reserved) | 1047 | static void *pcpu_alloc(size_t size, size_t align, bool reserved) |
| 1045 | { | 1048 | { |
| 1049 | static int warn_limit = 10; | ||
| 1046 | struct pcpu_chunk *chunk; | 1050 | struct pcpu_chunk *chunk; |
| 1051 | const char *err; | ||
| 1047 | int slot, off; | 1052 | int slot, off; |
| 1053 | unsigned long flags; | ||
| 1048 | 1054 | ||
| 1049 | if (unlikely(!size || size > PCPU_MIN_UNIT_SIZE || align > PAGE_SIZE)) { | 1055 | if (unlikely(!size || size > PCPU_MIN_UNIT_SIZE || align > PAGE_SIZE)) { |
| 1050 | WARN(true, "illegal size (%zu) or align (%zu) for " | 1056 | WARN(true, "illegal size (%zu) or align (%zu) for " |
| @@ -1053,17 +1059,20 @@ static void *pcpu_alloc(size_t size, size_t align, bool reserved) | |||
| 1053 | } | 1059 | } |
| 1054 | 1060 | ||
| 1055 | mutex_lock(&pcpu_alloc_mutex); | 1061 | mutex_lock(&pcpu_alloc_mutex); |
| 1056 | spin_lock_irq(&pcpu_lock); | 1062 | spin_lock_irqsave(&pcpu_lock, flags); |
| 1057 | 1063 | ||
| 1058 | /* serve reserved allocations from the reserved chunk if available */ | 1064 | /* serve reserved allocations from the reserved chunk if available */ |
| 1059 | if (reserved && pcpu_reserved_chunk) { | 1065 | if (reserved && pcpu_reserved_chunk) { |
| 1060 | chunk = pcpu_reserved_chunk; | 1066 | chunk = pcpu_reserved_chunk; |
| 1061 | if (size > chunk->contig_hint || | 1067 | if (size > chunk->contig_hint || |
| 1062 | pcpu_extend_area_map(chunk) < 0) | 1068 | pcpu_extend_area_map(chunk, &flags) < 0) { |
| 1069 | err = "failed to extend area map of reserved chunk"; | ||
| 1063 | goto fail_unlock; | 1070 | goto fail_unlock; |
| 1071 | } | ||
| 1064 | off = pcpu_alloc_area(chunk, size, align); | 1072 | off = pcpu_alloc_area(chunk, size, align); |
| 1065 | if (off >= 0) | 1073 | if (off >= 0) |
| 1066 | goto area_found; | 1074 | goto area_found; |
| 1075 | err = "alloc from reserved chunk failed"; | ||
| 1067 | goto fail_unlock; | 1076 | goto fail_unlock; |
| 1068 | } | 1077 | } |
| 1069 | 1078 | ||
| @@ -1074,12 +1083,13 @@ restart: | |||
| 1074 | if (size > chunk->contig_hint) | 1083 | if (size > chunk->contig_hint) |
| 1075 | continue; | 1084 | continue; |
| 1076 | 1085 | ||
| 1077 | switch (pcpu_extend_area_map(chunk)) { | 1086 | switch (pcpu_extend_area_map(chunk, &flags)) { |
| 1078 | case 0: | 1087 | case 0: |
| 1079 | break; | 1088 | break; |
| 1080 | case 1: | 1089 | case 1: |
| 1081 | goto restart; /* pcpu_lock dropped, restart */ | 1090 | goto restart; /* pcpu_lock dropped, restart */ |
| 1082 | default: | 1091 | default: |
| 1092 | err = "failed to extend area map"; | ||
| 1083 | goto fail_unlock; | 1093 | goto fail_unlock; |
| 1084 | } | 1094 | } |
| 1085 | 1095 | ||
| @@ -1090,23 +1100,26 @@ restart: | |||
| 1090 | } | 1100 | } |
| 1091 | 1101 | ||
| 1092 | /* hmmm... no space left, create a new chunk */ | 1102 | /* hmmm... no space left, create a new chunk */ |
| 1093 | spin_unlock_irq(&pcpu_lock); | 1103 | spin_unlock_irqrestore(&pcpu_lock, flags); |
| 1094 | 1104 | ||
| 1095 | chunk = alloc_pcpu_chunk(); | 1105 | chunk = alloc_pcpu_chunk(); |
| 1096 | if (!chunk) | 1106 | if (!chunk) { |
| 1107 | err = "failed to allocate new chunk"; | ||
| 1097 | goto fail_unlock_mutex; | 1108 | goto fail_unlock_mutex; |
| 1109 | } | ||
| 1098 | 1110 | ||
| 1099 | spin_lock_irq(&pcpu_lock); | 1111 | spin_lock_irqsave(&pcpu_lock, flags); |
| 1100 | pcpu_chunk_relocate(chunk, -1); | 1112 | pcpu_chunk_relocate(chunk, -1); |
| 1101 | goto restart; | 1113 | goto restart; |
| 1102 | 1114 | ||
| 1103 | area_found: | 1115 | area_found: |
| 1104 | spin_unlock_irq(&pcpu_lock); | 1116 | spin_unlock_irqrestore(&pcpu_lock, flags); |
| 1105 | 1117 | ||
| 1106 | /* populate, map and clear the area */ | 1118 | /* populate, map and clear the area */ |
| 1107 | if (pcpu_populate_chunk(chunk, off, size)) { | 1119 | if (pcpu_populate_chunk(chunk, off, size)) { |
| 1108 | spin_lock_irq(&pcpu_lock); | 1120 | spin_lock_irqsave(&pcpu_lock, flags); |
| 1109 | pcpu_free_area(chunk, off); | 1121 | pcpu_free_area(chunk, off); |
| 1122 | err = "failed to populate"; | ||
| 1110 | goto fail_unlock; | 1123 | goto fail_unlock; |
| 1111 | } | 1124 | } |
| 1112 | 1125 | ||
| @@ -1116,9 +1129,16 @@ area_found: | |||
| 1116 | return __addr_to_pcpu_ptr(chunk->base_addr + off); | 1129 | return __addr_to_pcpu_ptr(chunk->base_addr + off); |
| 1117 | 1130 | ||
| 1118 | fail_unlock: | 1131 | fail_unlock: |
| 1119 | spin_unlock_irq(&pcpu_lock); | 1132 | spin_unlock_irqrestore(&pcpu_lock, flags); |
| 1120 | fail_unlock_mutex: | 1133 | fail_unlock_mutex: |
| 1121 | mutex_unlock(&pcpu_alloc_mutex); | 1134 | mutex_unlock(&pcpu_alloc_mutex); |
| 1135 | if (warn_limit) { | ||
| 1136 | pr_warning("PERCPU: allocation failed, size=%zu align=%zu, " | ||
| 1137 | "%s\n", size, align, err); | ||
| 1138 | dump_stack(); | ||
| 1139 | if (!--warn_limit) | ||
| 1140 | pr_info("PERCPU: limit reached, disable warning\n"); | ||
| 1141 | } | ||
| 1122 | return NULL; | 1142 | return NULL; |
| 1123 | } | 1143 | } |
| 1124 | 1144 | ||
| @@ -1347,6 +1367,10 @@ struct pcpu_alloc_info * __init pcpu_build_alloc_info( | |||
| 1347 | struct pcpu_alloc_info *ai; | 1367 | struct pcpu_alloc_info *ai; |
| 1348 | unsigned int *cpu_map; | 1368 | unsigned int *cpu_map; |
| 1349 | 1369 | ||
| 1370 | /* this function may be called multiple times */ | ||
| 1371 | memset(group_map, 0, sizeof(group_map)); | ||
| 1372 | memset(group_cnt, 0, sizeof(group_map)); | ||
| 1373 | |||
| 1350 | /* | 1374 | /* |
| 1351 | * Determine min_unit_size, alloc_size and max_upa such that | 1375 | * Determine min_unit_size, alloc_size and max_upa such that |
| 1352 | * alloc_size is multiple of atom_size and is the smallest | 1376 | * alloc_size is multiple of atom_size and is the smallest |
| @@ -1574,6 +1598,7 @@ static void pcpu_dump_alloc_info(const char *lvl, | |||
| 1574 | int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, | 1598 | int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, |
| 1575 | void *base_addr) | 1599 | void *base_addr) |
| 1576 | { | 1600 | { |
| 1601 | static char cpus_buf[4096] __initdata; | ||
| 1577 | static int smap[2], dmap[2]; | 1602 | static int smap[2], dmap[2]; |
| 1578 | size_t dyn_size = ai->dyn_size; | 1603 | size_t dyn_size = ai->dyn_size; |
| 1579 | size_t size_sum = ai->static_size + ai->reserved_size + dyn_size; | 1604 | size_t size_sum = ai->static_size + ai->reserved_size + dyn_size; |
| @@ -1585,17 +1610,26 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, | |||
| 1585 | int *unit_map; | 1610 | int *unit_map; |
| 1586 | int group, unit, i; | 1611 | int group, unit, i; |
| 1587 | 1612 | ||
| 1613 | cpumask_scnprintf(cpus_buf, sizeof(cpus_buf), cpu_possible_mask); | ||
| 1614 | |||
| 1615 | #define PCPU_SETUP_BUG_ON(cond) do { \ | ||
| 1616 | if (unlikely(cond)) { \ | ||
| 1617 | pr_emerg("PERCPU: failed to initialize, %s", #cond); \ | ||
| 1618 | pr_emerg("PERCPU: cpu_possible_mask=%s\n", cpus_buf); \ | ||
| 1619 | pcpu_dump_alloc_info(KERN_EMERG, ai); \ | ||
| 1620 | BUG(); \ | ||
| 1621 | } \ | ||
| 1622 | } while (0) | ||
| 1623 | |||
| 1588 | /* sanity checks */ | 1624 | /* sanity checks */ |
| 1589 | BUILD_BUG_ON(ARRAY_SIZE(smap) >= PCPU_DFL_MAP_ALLOC || | 1625 | BUILD_BUG_ON(ARRAY_SIZE(smap) >= PCPU_DFL_MAP_ALLOC || |
| 1590 | ARRAY_SIZE(dmap) >= PCPU_DFL_MAP_ALLOC); | 1626 | ARRAY_SIZE(dmap) >= PCPU_DFL_MAP_ALLOC); |
| 1591 | BUG_ON(ai->nr_groups <= 0); | 1627 | PCPU_SETUP_BUG_ON(ai->nr_groups <= 0); |
| 1592 | BUG_ON(!ai->static_size); | 1628 | PCPU_SETUP_BUG_ON(!ai->static_size); |
| 1593 | BUG_ON(!base_addr); | 1629 | PCPU_SETUP_BUG_ON(!base_addr); |
| 1594 | BUG_ON(ai->unit_size < size_sum); | 1630 | PCPU_SETUP_BUG_ON(ai->unit_size < size_sum); |
| 1595 | BUG_ON(ai->unit_size & ~PAGE_MASK); | 1631 | PCPU_SETUP_BUG_ON(ai->unit_size & ~PAGE_MASK); |
| 1596 | BUG_ON(ai->unit_size < PCPU_MIN_UNIT_SIZE); | 1632 | PCPU_SETUP_BUG_ON(ai->unit_size < PCPU_MIN_UNIT_SIZE); |
| 1597 | |||
| 1598 | pcpu_dump_alloc_info(KERN_DEBUG, ai); | ||
| 1599 | 1633 | ||
| 1600 | /* process group information and build config tables accordingly */ | 1634 | /* process group information and build config tables accordingly */ |
| 1601 | group_offsets = alloc_bootmem(ai->nr_groups * sizeof(group_offsets[0])); | 1635 | group_offsets = alloc_bootmem(ai->nr_groups * sizeof(group_offsets[0])); |
| @@ -1604,7 +1638,7 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, | |||
| 1604 | unit_off = alloc_bootmem(nr_cpu_ids * sizeof(unit_off[0])); | 1638 | unit_off = alloc_bootmem(nr_cpu_ids * sizeof(unit_off[0])); |
| 1605 | 1639 | ||
| 1606 | for (cpu = 0; cpu < nr_cpu_ids; cpu++) | 1640 | for (cpu = 0; cpu < nr_cpu_ids; cpu++) |
| 1607 | unit_map[cpu] = NR_CPUS; | 1641 | unit_map[cpu] = UINT_MAX; |
| 1608 | pcpu_first_unit_cpu = NR_CPUS; | 1642 | pcpu_first_unit_cpu = NR_CPUS; |
| 1609 | 1643 | ||
| 1610 | for (group = 0, unit = 0; group < ai->nr_groups; group++, unit += i) { | 1644 | for (group = 0, unit = 0; group < ai->nr_groups; group++, unit += i) { |
| @@ -1618,8 +1652,9 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, | |||
| 1618 | if (cpu == NR_CPUS) | 1652 | if (cpu == NR_CPUS) |
| 1619 | continue; | 1653 | continue; |
| 1620 | 1654 | ||
| 1621 | BUG_ON(cpu > nr_cpu_ids || !cpu_possible(cpu)); | 1655 | PCPU_SETUP_BUG_ON(cpu > nr_cpu_ids); |
| 1622 | BUG_ON(unit_map[cpu] != NR_CPUS); | 1656 | PCPU_SETUP_BUG_ON(!cpu_possible(cpu)); |
| 1657 | PCPU_SETUP_BUG_ON(unit_map[cpu] != UINT_MAX); | ||
| 1623 | 1658 | ||
| 1624 | unit_map[cpu] = unit + i; | 1659 | unit_map[cpu] = unit + i; |
| 1625 | unit_off[cpu] = gi->base_offset + i * ai->unit_size; | 1660 | unit_off[cpu] = gi->base_offset + i * ai->unit_size; |
| @@ -1632,7 +1667,11 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, | |||
| 1632 | pcpu_nr_units = unit; | 1667 | pcpu_nr_units = unit; |
| 1633 | 1668 | ||
| 1634 | for_each_possible_cpu(cpu) | 1669 | for_each_possible_cpu(cpu) |
| 1635 | BUG_ON(unit_map[cpu] == NR_CPUS); | 1670 | PCPU_SETUP_BUG_ON(unit_map[cpu] == UINT_MAX); |
| 1671 | |||
| 1672 | /* we're done parsing the input, undefine BUG macro and dump config */ | ||
| 1673 | #undef PCPU_SETUP_BUG_ON | ||
| 1674 | pcpu_dump_alloc_info(KERN_INFO, ai); | ||
| 1636 | 1675 | ||
| 1637 | pcpu_nr_groups = ai->nr_groups; | 1676 | pcpu_nr_groups = ai->nr_groups; |
| 1638 | pcpu_group_offsets = group_offsets; | 1677 | pcpu_group_offsets = group_offsets; |
| @@ -1782,7 +1821,7 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, ssize_t dyn_size, | |||
| 1782 | void *base = (void *)ULONG_MAX; | 1821 | void *base = (void *)ULONG_MAX; |
| 1783 | void **areas = NULL; | 1822 | void **areas = NULL; |
| 1784 | struct pcpu_alloc_info *ai; | 1823 | struct pcpu_alloc_info *ai; |
| 1785 | size_t size_sum, areas_size; | 1824 | size_t size_sum, areas_size, max_distance; |
| 1786 | int group, i, rc; | 1825 | int group, i, rc; |
| 1787 | 1826 | ||
| 1788 | ai = pcpu_build_alloc_info(reserved_size, dyn_size, atom_size, | 1827 | ai = pcpu_build_alloc_info(reserved_size, dyn_size, atom_size, |
| @@ -1832,8 +1871,25 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, ssize_t dyn_size, | |||
| 1832 | } | 1871 | } |
| 1833 | 1872 | ||
| 1834 | /* base address is now known, determine group base offsets */ | 1873 | /* base address is now known, determine group base offsets */ |
| 1835 | for (group = 0; group < ai->nr_groups; group++) | 1874 | max_distance = 0; |
| 1875 | for (group = 0; group < ai->nr_groups; group++) { | ||
| 1836 | ai->groups[group].base_offset = areas[group] - base; | 1876 | ai->groups[group].base_offset = areas[group] - base; |
| 1877 | max_distance = max_t(size_t, max_distance, | ||
| 1878 | ai->groups[group].base_offset); | ||
| 1879 | } | ||
| 1880 | max_distance += ai->unit_size; | ||
| 1881 | |||
| 1882 | /* warn if maximum distance is further than 75% of vmalloc space */ | ||
| 1883 | if (max_distance > (VMALLOC_END - VMALLOC_START) * 3 / 4) { | ||
| 1884 | pr_warning("PERCPU: max_distance=0x%zx too large for vmalloc " | ||
| 1885 | "space 0x%lx\n", | ||
| 1886 | max_distance, VMALLOC_END - VMALLOC_START); | ||
| 1887 | #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK | ||
| 1888 | /* and fail if we have fallback */ | ||
| 1889 | rc = -EINVAL; | ||
| 1890 | goto out_free; | ||
| 1891 | #endif | ||
| 1892 | } | ||
| 1837 | 1893 | ||
| 1838 | pr_info("PERCPU: Embedded %zu pages/cpu @%p s%zu r%zu d%zu u%zu\n", | 1894 | pr_info("PERCPU: Embedded %zu pages/cpu @%p s%zu r%zu d%zu u%zu\n", |
| 1839 | PFN_DOWN(size_sum), base, ai->static_size, ai->reserved_size, | 1895 | PFN_DOWN(size_sum), base, ai->static_size, ai->reserved_size, |
| @@ -242,8 +242,8 @@ vma_address(struct page *page, struct vm_area_struct *vma) | |||
| 242 | } | 242 | } |
| 243 | 243 | ||
| 244 | /* | 244 | /* |
| 245 | * At what user virtual address is page expected in vma? checking that the | 245 | * At what user virtual address is page expected in vma? |
| 246 | * page matches the vma: currently only used on anon pages, by unuse_vma; | 246 | * checking that the page matches the vma. |
| 247 | */ | 247 | */ |
| 248 | unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma) | 248 | unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma) |
| 249 | { | 249 | { |
diff --git a/mm/shmem.c b/mm/shmem.c index 98631c26c200..356dd99566ec 100644 --- a/mm/shmem.c +++ b/mm/shmem.c | |||
| @@ -218,7 +218,7 @@ static const struct file_operations shmem_file_operations; | |||
| 218 | static const struct inode_operations shmem_inode_operations; | 218 | static const struct inode_operations shmem_inode_operations; |
| 219 | static const struct inode_operations shmem_dir_inode_operations; | 219 | static const struct inode_operations shmem_dir_inode_operations; |
| 220 | static const struct inode_operations shmem_special_inode_operations; | 220 | static const struct inode_operations shmem_special_inode_operations; |
| 221 | static struct vm_operations_struct shmem_vm_ops; | 221 | static const struct vm_operations_struct shmem_vm_ops; |
| 222 | 222 | ||
| 223 | static struct backing_dev_info shmem_backing_dev_info __read_mostly = { | 223 | static struct backing_dev_info shmem_backing_dev_info __read_mostly = { |
| 224 | .ra_pages = 0, /* No readahead */ | 224 | .ra_pages = 0, /* No readahead */ |
| @@ -1046,8 +1046,9 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc) | |||
| 1046 | * sync from ever calling shmem_writepage; but a stacking filesystem | 1046 | * sync from ever calling shmem_writepage; but a stacking filesystem |
| 1047 | * may use the ->writepage of its underlying filesystem, in which case | 1047 | * may use the ->writepage of its underlying filesystem, in which case |
| 1048 | * tmpfs should write out to swap only in response to memory pressure, | 1048 | * tmpfs should write out to swap only in response to memory pressure, |
| 1049 | * and not for pdflush or sync. However, in those cases, we do still | 1049 | * and not for the writeback threads or sync. However, in those cases, |
| 1050 | * want to check if there's a redundant swappage to be discarded. | 1050 | * we do still want to check if there's a redundant swappage to be |
| 1051 | * discarded. | ||
| 1051 | */ | 1052 | */ |
| 1052 | if (wbc->for_reclaim) | 1053 | if (wbc->for_reclaim) |
| 1053 | swap = get_swap_page(); | 1054 | swap = get_swap_page(); |
| @@ -2497,7 +2498,7 @@ static const struct super_operations shmem_ops = { | |||
| 2497 | .put_super = shmem_put_super, | 2498 | .put_super = shmem_put_super, |
| 2498 | }; | 2499 | }; |
| 2499 | 2500 | ||
| 2500 | static struct vm_operations_struct shmem_vm_ops = { | 2501 | static const struct vm_operations_struct shmem_vm_ops = { |
| 2501 | .fault = shmem_fault, | 2502 | .fault = shmem_fault, |
| 2502 | #ifdef CONFIG_NUMA | 2503 | #ifdef CONFIG_NUMA |
| 2503 | .set_policy = shmem_set_policy, | 2504 | .set_policy = shmem_set_policy, |
diff --git a/mm/swapfile.c b/mm/swapfile.c index 4de7f02f820b..9c590eef7912 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c | |||
| @@ -1151,8 +1151,7 @@ static int try_to_unuse(unsigned int type) | |||
| 1151 | } else | 1151 | } else |
| 1152 | retval = unuse_mm(mm, entry, page); | 1152 | retval = unuse_mm(mm, entry, page); |
| 1153 | 1153 | ||
| 1154 | if (set_start_mm && | 1154 | if (set_start_mm && *swap_map < swcount) { |
| 1155 | swap_count(*swap_map) < swcount) { | ||
| 1156 | mmput(new_start_mm); | 1155 | mmput(new_start_mm); |
| 1157 | atomic_inc(&mm->mm_users); | 1156 | atomic_inc(&mm->mm_users); |
| 1158 | new_start_mm = mm; | 1157 | new_start_mm = mm; |
| @@ -1974,12 +1973,14 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) | |||
| 1974 | goto bad_swap; | 1973 | goto bad_swap; |
| 1975 | } | 1974 | } |
| 1976 | 1975 | ||
| 1977 | if (blk_queue_nonrot(bdev_get_queue(p->bdev))) { | 1976 | if (p->bdev) { |
| 1978 | p->flags |= SWP_SOLIDSTATE; | 1977 | if (blk_queue_nonrot(bdev_get_queue(p->bdev))) { |
| 1979 | p->cluster_next = 1 + (random32() % p->highest_bit); | 1978 | p->flags |= SWP_SOLIDSTATE; |
| 1979 | p->cluster_next = 1 + (random32() % p->highest_bit); | ||
| 1980 | } | ||
| 1981 | if (discard_swap(p) == 0) | ||
| 1982 | p->flags |= SWP_DISCARDABLE; | ||
| 1980 | } | 1983 | } |
| 1981 | if (discard_swap(p) == 0) | ||
| 1982 | p->flags |= SWP_DISCARDABLE; | ||
| 1983 | 1984 | ||
| 1984 | mutex_lock(&swapon_mutex); | 1985 | mutex_lock(&swapon_mutex); |
| 1985 | spin_lock(&swap_lock); | 1986 | spin_lock(&swap_lock); |
diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 69511e663234..0f551a4a44cd 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c | |||
| @@ -12,6 +12,7 @@ | |||
| 12 | #include <linux/mm.h> | 12 | #include <linux/mm.h> |
| 13 | #include <linux/module.h> | 13 | #include <linux/module.h> |
| 14 | #include <linux/highmem.h> | 14 | #include <linux/highmem.h> |
| 15 | #include <linux/sched.h> | ||
| 15 | #include <linux/slab.h> | 16 | #include <linux/slab.h> |
| 16 | #include <linux/spinlock.h> | 17 | #include <linux/spinlock.h> |
| 17 | #include <linux/interrupt.h> | 18 | #include <linux/interrupt.h> |
| @@ -25,10 +26,10 @@ | |||
| 25 | #include <linux/rcupdate.h> | 26 | #include <linux/rcupdate.h> |
| 26 | #include <linux/pfn.h> | 27 | #include <linux/pfn.h> |
| 27 | #include <linux/kmemleak.h> | 28 | #include <linux/kmemleak.h> |
| 28 | #include <linux/highmem.h> | ||
| 29 | #include <asm/atomic.h> | 29 | #include <asm/atomic.h> |
| 30 | #include <asm/uaccess.h> | 30 | #include <asm/uaccess.h> |
| 31 | #include <asm/tlbflush.h> | 31 | #include <asm/tlbflush.h> |
| 32 | #include <asm/shmparam.h> | ||
| 32 | 33 | ||
| 33 | 34 | ||
| 34 | /*** Page table manipulation functions ***/ | 35 | /*** Page table manipulation functions ***/ |
| @@ -1156,12 +1157,11 @@ static void insert_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va, | |||
| 1156 | } | 1157 | } |
| 1157 | 1158 | ||
| 1158 | static struct vm_struct *__get_vm_area_node(unsigned long size, | 1159 | static struct vm_struct *__get_vm_area_node(unsigned long size, |
| 1159 | unsigned long flags, unsigned long start, unsigned long end, | 1160 | unsigned long align, unsigned long flags, unsigned long start, |
| 1160 | int node, gfp_t gfp_mask, void *caller) | 1161 | unsigned long end, int node, gfp_t gfp_mask, void *caller) |
| 1161 | { | 1162 | { |
| 1162 | static struct vmap_area *va; | 1163 | static struct vmap_area *va; |
| 1163 | struct vm_struct *area; | 1164 | struct vm_struct *area; |
| 1164 | unsigned long align = 1; | ||
| 1165 | 1165 | ||
| 1166 | BUG_ON(in_interrupt()); | 1166 | BUG_ON(in_interrupt()); |
| 1167 | if (flags & VM_IOREMAP) { | 1167 | if (flags & VM_IOREMAP) { |
| @@ -1201,7 +1201,7 @@ static struct vm_struct *__get_vm_area_node(unsigned long size, | |||
| 1201 | struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags, | 1201 | struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags, |
| 1202 | unsigned long start, unsigned long end) | 1202 | unsigned long start, unsigned long end) |
| 1203 | { | 1203 | { |
| 1204 | return __get_vm_area_node(size, flags, start, end, -1, GFP_KERNEL, | 1204 | return __get_vm_area_node(size, 1, flags, start, end, -1, GFP_KERNEL, |
| 1205 | __builtin_return_address(0)); | 1205 | __builtin_return_address(0)); |
| 1206 | } | 1206 | } |
| 1207 | EXPORT_SYMBOL_GPL(__get_vm_area); | 1207 | EXPORT_SYMBOL_GPL(__get_vm_area); |
| @@ -1210,7 +1210,7 @@ struct vm_struct *__get_vm_area_caller(unsigned long size, unsigned long flags, | |||
| 1210 | unsigned long start, unsigned long end, | 1210 | unsigned long start, unsigned long end, |
| 1211 | void *caller) | 1211 | void *caller) |
| 1212 | { | 1212 | { |
| 1213 | return __get_vm_area_node(size, flags, start, end, -1, GFP_KERNEL, | 1213 | return __get_vm_area_node(size, 1, flags, start, end, -1, GFP_KERNEL, |
| 1214 | caller); | 1214 | caller); |
| 1215 | } | 1215 | } |
| 1216 | 1216 | ||
| @@ -1225,22 +1225,22 @@ struct vm_struct *__get_vm_area_caller(unsigned long size, unsigned long flags, | |||
| 1225 | */ | 1225 | */ |
| 1226 | struct vm_struct *get_vm_area(unsigned long size, unsigned long flags) | 1226 | struct vm_struct *get_vm_area(unsigned long size, unsigned long flags) |
| 1227 | { | 1227 | { |
| 1228 | return __get_vm_area_node(size, flags, VMALLOC_START, VMALLOC_END, | 1228 | return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END, |
| 1229 | -1, GFP_KERNEL, __builtin_return_address(0)); | 1229 | -1, GFP_KERNEL, __builtin_return_address(0)); |
| 1230 | } | 1230 | } |
| 1231 | 1231 | ||
| 1232 | struct vm_struct *get_vm_area_caller(unsigned long size, unsigned long flags, | 1232 | struct vm_struct *get_vm_area_caller(unsigned long size, unsigned long flags, |
| 1233 | void *caller) | 1233 | void *caller) |
| 1234 | { | 1234 | { |
| 1235 | return __get_vm_area_node(size, flags, VMALLOC_START, VMALLOC_END, | 1235 | return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END, |
| 1236 | -1, GFP_KERNEL, caller); | 1236 | -1, GFP_KERNEL, caller); |
| 1237 | } | 1237 | } |
| 1238 | 1238 | ||
| 1239 | struct vm_struct *get_vm_area_node(unsigned long size, unsigned long flags, | 1239 | struct vm_struct *get_vm_area_node(unsigned long size, unsigned long flags, |
| 1240 | int node, gfp_t gfp_mask) | 1240 | int node, gfp_t gfp_mask) |
| 1241 | { | 1241 | { |
| 1242 | return __get_vm_area_node(size, flags, VMALLOC_START, VMALLOC_END, node, | 1242 | return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END, |
| 1243 | gfp_mask, __builtin_return_address(0)); | 1243 | node, gfp_mask, __builtin_return_address(0)); |
| 1244 | } | 1244 | } |
| 1245 | 1245 | ||
| 1246 | static struct vm_struct *find_vm_area(const void *addr) | 1246 | static struct vm_struct *find_vm_area(const void *addr) |
| @@ -1403,7 +1403,8 @@ void *vmap(struct page **pages, unsigned int count, | |||
| 1403 | } | 1403 | } |
| 1404 | EXPORT_SYMBOL(vmap); | 1404 | EXPORT_SYMBOL(vmap); |
| 1405 | 1405 | ||
| 1406 | static void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, pgprot_t prot, | 1406 | static void *__vmalloc_node(unsigned long size, unsigned long align, |
| 1407 | gfp_t gfp_mask, pgprot_t prot, | ||
| 1407 | int node, void *caller); | 1408 | int node, void *caller); |
| 1408 | static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, | 1409 | static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, |
| 1409 | pgprot_t prot, int node, void *caller) | 1410 | pgprot_t prot, int node, void *caller) |
| @@ -1417,7 +1418,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, | |||
| 1417 | area->nr_pages = nr_pages; | 1418 | area->nr_pages = nr_pages; |
| 1418 | /* Please note that the recursion is strictly bounded. */ | 1419 | /* Please note that the recursion is strictly bounded. */ |
| 1419 | if (array_size > PAGE_SIZE) { | 1420 | if (array_size > PAGE_SIZE) { |
| 1420 | pages = __vmalloc_node(array_size, gfp_mask | __GFP_ZERO, | 1421 | pages = __vmalloc_node(array_size, 1, gfp_mask | __GFP_ZERO, |
| 1421 | PAGE_KERNEL, node, caller); | 1422 | PAGE_KERNEL, node, caller); |
| 1422 | area->flags |= VM_VPAGES; | 1423 | area->flags |= VM_VPAGES; |
| 1423 | } else { | 1424 | } else { |
| @@ -1476,6 +1477,7 @@ void *__vmalloc_area(struct vm_struct *area, gfp_t gfp_mask, pgprot_t prot) | |||
| 1476 | /** | 1477 | /** |
| 1477 | * __vmalloc_node - allocate virtually contiguous memory | 1478 | * __vmalloc_node - allocate virtually contiguous memory |
| 1478 | * @size: allocation size | 1479 | * @size: allocation size |
| 1480 | * @align: desired alignment | ||
| 1479 | * @gfp_mask: flags for the page level allocator | 1481 | * @gfp_mask: flags for the page level allocator |
| 1480 | * @prot: protection mask for the allocated pages | 1482 | * @prot: protection mask for the allocated pages |
| 1481 | * @node: node to use for allocation or -1 | 1483 | * @node: node to use for allocation or -1 |
| @@ -1485,8 +1487,9 @@ void *__vmalloc_area(struct vm_struct *area, gfp_t gfp_mask, pgprot_t prot) | |||
| 1485 | * allocator with @gfp_mask flags. Map them into contiguous | 1487 | * allocator with @gfp_mask flags. Map them into contiguous |
| 1486 | * kernel virtual space, using a pagetable protection of @prot. | 1488 | * kernel virtual space, using a pagetable protection of @prot. |
| 1487 | */ | 1489 | */ |
| 1488 | static void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, pgprot_t prot, | 1490 | static void *__vmalloc_node(unsigned long size, unsigned long align, |
| 1489 | int node, void *caller) | 1491 | gfp_t gfp_mask, pgprot_t prot, |
| 1492 | int node, void *caller) | ||
| 1490 | { | 1493 | { |
| 1491 | struct vm_struct *area; | 1494 | struct vm_struct *area; |
| 1492 | void *addr; | 1495 | void *addr; |
| @@ -1496,8 +1499,8 @@ static void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, pgprot_t prot, | |||
| 1496 | if (!size || (size >> PAGE_SHIFT) > totalram_pages) | 1499 | if (!size || (size >> PAGE_SHIFT) > totalram_pages) |
| 1497 | return NULL; | 1500 | return NULL; |
| 1498 | 1501 | ||
| 1499 | area = __get_vm_area_node(size, VM_ALLOC, VMALLOC_START, VMALLOC_END, | 1502 | area = __get_vm_area_node(size, align, VM_ALLOC, VMALLOC_START, |
| 1500 | node, gfp_mask, caller); | 1503 | VMALLOC_END, node, gfp_mask, caller); |
| 1501 | 1504 | ||
| 1502 | if (!area) | 1505 | if (!area) |
| 1503 | return NULL; | 1506 | return NULL; |
| @@ -1516,7 +1519,7 @@ static void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, pgprot_t prot, | |||
| 1516 | 1519 | ||
| 1517 | void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot) | 1520 | void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot) |
| 1518 | { | 1521 | { |
| 1519 | return __vmalloc_node(size, gfp_mask, prot, -1, | 1522 | return __vmalloc_node(size, 1, gfp_mask, prot, -1, |
| 1520 | __builtin_return_address(0)); | 1523 | __builtin_return_address(0)); |
| 1521 | } | 1524 | } |
| 1522 | EXPORT_SYMBOL(__vmalloc); | 1525 | EXPORT_SYMBOL(__vmalloc); |
| @@ -1532,7 +1535,7 @@ EXPORT_SYMBOL(__vmalloc); | |||
| 1532 | */ | 1535 | */ |
| 1533 | void *vmalloc(unsigned long size) | 1536 | void *vmalloc(unsigned long size) |
| 1534 | { | 1537 | { |
| 1535 | return __vmalloc_node(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL, | 1538 | return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL, |
| 1536 | -1, __builtin_return_address(0)); | 1539 | -1, __builtin_return_address(0)); |
| 1537 | } | 1540 | } |
| 1538 | EXPORT_SYMBOL(vmalloc); | 1541 | EXPORT_SYMBOL(vmalloc); |
| @@ -1549,7 +1552,8 @@ void *vmalloc_user(unsigned long size) | |||
| 1549 | struct vm_struct *area; | 1552 | struct vm_struct *area; |
| 1550 | void *ret; | 1553 | void *ret; |
| 1551 | 1554 | ||
| 1552 | ret = __vmalloc_node(size, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, | 1555 | ret = __vmalloc_node(size, SHMLBA, |
| 1556 | GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, | ||
| 1553 | PAGE_KERNEL, -1, __builtin_return_address(0)); | 1557 | PAGE_KERNEL, -1, __builtin_return_address(0)); |
| 1554 | if (ret) { | 1558 | if (ret) { |
| 1555 | area = find_vm_area(ret); | 1559 | area = find_vm_area(ret); |
| @@ -1572,7 +1576,7 @@ EXPORT_SYMBOL(vmalloc_user); | |||
| 1572 | */ | 1576 | */ |
| 1573 | void *vmalloc_node(unsigned long size, int node) | 1577 | void *vmalloc_node(unsigned long size, int node) |
| 1574 | { | 1578 | { |
| 1575 | return __vmalloc_node(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL, | 1579 | return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL, |
| 1576 | node, __builtin_return_address(0)); | 1580 | node, __builtin_return_address(0)); |
| 1577 | } | 1581 | } |
| 1578 | EXPORT_SYMBOL(vmalloc_node); | 1582 | EXPORT_SYMBOL(vmalloc_node); |
| @@ -1595,7 +1599,7 @@ EXPORT_SYMBOL(vmalloc_node); | |||
| 1595 | 1599 | ||
| 1596 | void *vmalloc_exec(unsigned long size) | 1600 | void *vmalloc_exec(unsigned long size) |
| 1597 | { | 1601 | { |
| 1598 | return __vmalloc_node(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL_EXEC, | 1602 | return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL_EXEC, |
| 1599 | -1, __builtin_return_address(0)); | 1603 | -1, __builtin_return_address(0)); |
| 1600 | } | 1604 | } |
| 1601 | 1605 | ||
| @@ -1616,7 +1620,7 @@ void *vmalloc_exec(unsigned long size) | |||
| 1616 | */ | 1620 | */ |
| 1617 | void *vmalloc_32(unsigned long size) | 1621 | void *vmalloc_32(unsigned long size) |
| 1618 | { | 1622 | { |
| 1619 | return __vmalloc_node(size, GFP_VMALLOC32, PAGE_KERNEL, | 1623 | return __vmalloc_node(size, 1, GFP_VMALLOC32, PAGE_KERNEL, |
| 1620 | -1, __builtin_return_address(0)); | 1624 | -1, __builtin_return_address(0)); |
| 1621 | } | 1625 | } |
| 1622 | EXPORT_SYMBOL(vmalloc_32); | 1626 | EXPORT_SYMBOL(vmalloc_32); |
| @@ -1633,7 +1637,7 @@ void *vmalloc_32_user(unsigned long size) | |||
| 1633 | struct vm_struct *area; | 1637 | struct vm_struct *area; |
| 1634 | void *ret; | 1638 | void *ret; |
| 1635 | 1639 | ||
| 1636 | ret = __vmalloc_node(size, GFP_VMALLOC32 | __GFP_ZERO, PAGE_KERNEL, | 1640 | ret = __vmalloc_node(size, 1, GFP_VMALLOC32 | __GFP_ZERO, PAGE_KERNEL, |
| 1637 | -1, __builtin_return_address(0)); | 1641 | -1, __builtin_return_address(0)); |
| 1638 | if (ret) { | 1642 | if (ret) { |
| 1639 | area = find_vm_area(ret); | 1643 | area = find_vm_area(ret); |
diff --git a/mm/vmscan.c b/mm/vmscan.c index 1219ceb8a9b2..777af57fd8c8 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
| @@ -544,6 +544,16 @@ redo: | |||
| 544 | */ | 544 | */ |
| 545 | lru = LRU_UNEVICTABLE; | 545 | lru = LRU_UNEVICTABLE; |
| 546 | add_page_to_unevictable_list(page); | 546 | add_page_to_unevictable_list(page); |
| 547 | /* | ||
| 548 | * When racing with an mlock clearing (page is | ||
| 549 | * unlocked), make sure that if the other thread does | ||
| 550 | * not observe our setting of PG_lru and fails | ||
| 551 | * isolation, we see PG_mlocked cleared below and move | ||
| 552 | * the page back to the evictable list. | ||
| 553 | * | ||
| 554 | * The other side is TestClearPageMlocked(). | ||
| 555 | */ | ||
| 556 | smp_mb(); | ||
| 547 | } | 557 | } |
| 548 | 558 | ||
| 549 | /* | 559 | /* |
| @@ -1088,7 +1098,7 @@ static unsigned long shrink_inactive_list(unsigned long max_scan, | |||
| 1088 | int lumpy_reclaim = 0; | 1098 | int lumpy_reclaim = 0; |
| 1089 | 1099 | ||
| 1090 | while (unlikely(too_many_isolated(zone, file, sc))) { | 1100 | while (unlikely(too_many_isolated(zone, file, sc))) { |
| 1091 | congestion_wait(WRITE, HZ/10); | 1101 | congestion_wait(BLK_RW_ASYNC, HZ/10); |
| 1092 | 1102 | ||
| 1093 | /* We are about to die and free our memory. Return now. */ | 1103 | /* We are about to die and free our memory. Return now. */ |
| 1094 | if (fatal_signal_pending(current)) | 1104 | if (fatal_signal_pending(current)) |
| @@ -1356,7 +1366,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, | |||
| 1356 | * IO, plus JVM can create lots of anon VM_EXEC pages, | 1366 | * IO, plus JVM can create lots of anon VM_EXEC pages, |
| 1357 | * so we ignore them here. | 1367 | * so we ignore them here. |
| 1358 | */ | 1368 | */ |
| 1359 | if ((vm_flags & VM_EXEC) && !PageAnon(page)) { | 1369 | if ((vm_flags & VM_EXEC) && page_is_file_cache(page)) { |
| 1360 | list_add(&page->lru, &l_active); | 1370 | list_add(&page->lru, &l_active); |
| 1361 | continue; | 1371 | continue; |
| 1362 | } | 1372 | } |
| @@ -1709,10 +1719,10 @@ static void shrink_zones(int priority, struct zonelist *zonelist, | |||
| 1709 | * | 1719 | * |
| 1710 | * If the caller is !__GFP_FS then the probability of a failure is reasonably | 1720 | * If the caller is !__GFP_FS then the probability of a failure is reasonably |
| 1711 | * high - the zone may be full of dirty or under-writeback pages, which this | 1721 | * high - the zone may be full of dirty or under-writeback pages, which this |
| 1712 | * caller can't do much about. We kick pdflush and take explicit naps in the | 1722 | * caller can't do much about. We kick the writeback threads and take explicit |
| 1713 | * hope that some of these pages can be written. But if the allocating task | 1723 | * naps in the hope that some of these pages can be written. But if the |
| 1714 | * holds filesystem locks which prevent writeout this might not work, and the | 1724 | * allocating task holds filesystem locks which prevent writeout this might not |
| 1715 | * allocation attempt will fail. | 1725 | * work, and the allocation attempt will fail. |
| 1716 | * | 1726 | * |
| 1717 | * returns: 0, if no pages reclaimed | 1727 | * returns: 0, if no pages reclaimed |
| 1718 | * else, the number of pages reclaimed | 1728 | * else, the number of pages reclaimed |
