aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-08-07 00:14:42 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-08-07 00:14:42 -0400
commit33caee39925b887a99a2400dc5c980097c3573f9 (patch)
tree8e68ad97e1fee88c4a3f31453041f8d139f2027e
parent6456a0438b984186a0c9c8ecc9fe3d97b7ac3613 (diff)
parentf84223087402c45179be5e7060c5736c17a7b271 (diff)
Merge branch 'akpm' (patchbomb from Andrew Morton)
Merge incoming from Andrew Morton: - Various misc things. - arch/sh updates. - Part of ocfs2. Review is slow. - Slab updates. - Most of -mm. - printk updates. - lib/ updates. - checkpatch updates. * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (226 commits) checkpatch: update $declaration_macros, add uninitialized_var checkpatch: warn on missing spaces in broken up quoted checkpatch: fix false positives for --strict "space after cast" test checkpatch: fix false positive MISSING_BREAK warnings with --file checkpatch: add test for native c90 types in unusual order checkpatch: add signed generic types checkpatch: add short int to c variable types checkpatch: add for_each tests to indentation and brace tests checkpatch: fix brace style misuses of else and while checkpatch: add --fix option for a couple OPEN_BRACE misuses checkpatch: use the correct indentation for which() checkpatch: add fix_insert_line and fix_delete_line helpers checkpatch: add ability to insert and delete lines to patch/file checkpatch: add an index variable for fixed lines checkpatch: warn on break after goto or return with same tab indentation checkpatch: emit a warning on file add/move/delete checkpatch: add test for commit id formatting style in commit log checkpatch: emit fewer kmalloc_array/kcalloc conversion warnings checkpatch: improve "no space after cast" test checkpatch: allow multiple const * types ...
-rw-r--r--Documentation/RCU/whatisRCU.txt2
-rw-r--r--Documentation/kernel-parameters.txt8
-rw-r--r--Documentation/trace/postprocess/trace-vmscan-postprocess.pl53
-rw-r--r--Makefile19
-rw-r--r--arch/arm/mm/dma-mapping.c1
-rw-r--r--arch/ia64/mm/init.c3
-rw-r--r--arch/powerpc/kvm/Makefile1
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c4
-rw-r--r--arch/powerpc/kvm/book3s_hv_builtin.c19
-rw-r--r--arch/powerpc/kvm/book3s_hv_cma.c240
-rw-r--r--arch/powerpc/kvm/book3s_hv_cma.h27
-rw-r--r--arch/powerpc/mm/mem.c3
-rw-r--r--arch/score/include/uapi/asm/ptrace.h11
-rw-r--r--arch/sh/drivers/dma/Kconfig5
-rw-r--r--arch/sh/include/asm/io_noioport.h11
-rw-r--r--arch/sh/include/cpu-sh4/cpu/dma-register.h1
-rw-r--r--arch/sh/include/cpu-sh4a/cpu/dma.h3
-rw-r--r--arch/sh/kernel/cpu/sh4a/clock-sh7724.c4
-rw-r--r--arch/sh/kernel/time.c4
-rw-r--r--arch/sh/mm/asids-debugfs.c4
-rw-r--r--arch/sh/mm/init.c5
-rw-r--r--arch/tile/kernel/module.c2
-rw-r--r--arch/x86/mm/fault.c3
-rw-r--r--arch/x86/mm/init_32.c3
-rw-r--r--arch/x86/mm/init_64.c3
-rw-r--r--drivers/ata/Kconfig1
-rw-r--r--drivers/ata/libata-core.c72
-rw-r--r--drivers/base/Kconfig10
-rw-r--r--drivers/base/dma-contiguous.c220
-rw-r--r--drivers/base/memory.c30
-rw-r--r--drivers/base/node.c2
-rw-r--r--drivers/block/zram/zram_drv.c71
-rw-r--r--drivers/block/zram/zram_drv.h29
-rw-r--r--drivers/firmware/memmap.c6
-rw-r--r--drivers/gpu/drm/drm_hashtab.c2
-rw-r--r--drivers/hwmon/asus_atk0110.c2
-rw-r--r--drivers/lguest/core.c7
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_ethtool.c2
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c2
-rw-r--r--drivers/staging/android/binder.c4
-rw-r--r--drivers/staging/lustre/lustre/libcfs/hash.c4
-rw-r--r--drivers/tty/sysrq.c2
-rw-r--r--fs/fscache/main.c4
-rw-r--r--fs/logfs/readwrite.c15
-rw-r--r--fs/namespace.c2
-rw-r--r--fs/notify/fanotify/fanotify.c11
-rw-r--r--fs/notify/fanotify/fanotify_user.c14
-rw-r--r--fs/notify/inode_mark.c2
-rw-r--r--fs/notify/inotify/inotify_fsnotify.c2
-rw-r--r--fs/notify/inotify/inotify_user.c4
-rw-r--r--fs/notify/notification.c37
-rw-r--r--fs/notify/vfsmount_mark.c2
-rw-r--r--fs/ntfs/file.c3
-rw-r--r--fs/ocfs2/alloc.c15
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c5
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c4
-rw-r--r--fs/ocfs2/move_extents.c2
-rw-r--r--fs/ocfs2/refcounttree.c2
-rw-r--r--fs/ocfs2/slot_map.c2
-rw-r--r--fs/proc/meminfo.c2
-rw-r--r--fs/proc/task_mmu.c27
-rw-r--r--fs/squashfs/file_direct.c2
-rw-r--r--fs/squashfs/super.c5
-rw-r--r--include/linux/bitmap.h62
-rw-r--r--include/linux/byteorder/generic.h2
-rw-r--r--include/linux/cma.h27
-rw-r--r--include/linux/dma-contiguous.h11
-rw-r--r--include/linux/fs.h2
-rw-r--r--include/linux/fsnotify_backend.h14
-rw-r--r--include/linux/gfp.h2
-rw-r--r--include/linux/glob.h9
-rw-r--r--include/linux/highmem.h2
-rw-r--r--include/linux/huge_mm.h4
-rw-r--r--include/linux/hugetlb.h1
-rw-r--r--include/linux/kernel.h5
-rw-r--r--include/linux/klist.h2
-rw-r--r--include/linux/list.h14
-rw-r--r--include/linux/memblock.h4
-rw-r--r--include/linux/memory_hotplug.h10
-rw-r--r--include/linux/mmdebug.h2
-rw-r--r--include/linux/mmu_notifier.h6
-rw-r--r--include/linux/mmzone.h219
-rw-r--r--include/linux/nodemask.h11
-rw-r--r--include/linux/oom.h4
-rw-r--r--include/linux/page-flags.h21
-rw-r--r--include/linux/pagemap.h3
-rw-r--r--include/linux/printk.h2
-rw-r--r--include/linux/rculist.h8
-rw-r--r--include/linux/swap.h1
-rw-r--r--include/linux/vmalloc.h2
-rw-r--r--include/linux/zbud.h2
-rw-r--r--include/linux/zlib.h118
-rw-r--r--include/linux/zpool.h106
-rw-r--r--include/trace/events/migrate.h1
-rw-r--r--include/trace/events/pagemap.h16
-rw-r--r--init/Kconfig46
-rw-r--r--kernel/auditfilter.c4
-rw-r--r--kernel/exit.c1
-rw-r--r--kernel/printk/printk.c157
-rw-r--r--kernel/smp.c2
-rw-r--r--kernel/sysctl.c9
-rw-r--r--kernel/watchdog.c10
-rw-r--r--lib/Kconfig33
-rw-r--r--lib/Kconfig.debug2
-rw-r--r--lib/Makefile2
-rw-r--r--lib/bitmap.c111
-rw-r--r--lib/cmdline.c15
-rw-r--r--lib/glob.c287
-rw-r--r--lib/klist.c6
-rw-r--r--lib/list_sort.c71
-rw-r--r--lib/string_helpers.c15
-rw-r--r--lib/test-kstrtox.c2
-rw-r--r--lib/zlib_deflate/deflate.c143
-rw-r--r--lib/zlib_inflate/inflate.c132
-rw-r--r--mm/Kconfig54
-rw-r--r--mm/Makefile2
-rw-r--r--mm/cma.c335
-rw-r--r--mm/filemap.c27
-rw-r--r--mm/gup.c18
-rw-r--r--mm/highmem.c86
-rw-r--r--mm/huge_memory.c38
-rw-r--r--mm/hugetlb.c129
-rw-r--r--mm/hwpoison-inject.c3
-rw-r--r--mm/internal.h2
-rw-r--r--mm/madvise.c3
-rw-r--r--mm/memcontrol.c416
-rw-r--r--mm/memory-failure.c10
-rw-r--r--mm/memory.c70
-rw-r--r--mm/memory_hotplug.c45
-rw-r--r--mm/mlock.c9
-rw-r--r--mm/mmap.c5
-rw-r--r--mm/mmu_notifier.c40
-rw-r--r--mm/oom_kill.c34
-rw-r--r--mm/page-writeback.c5
-rw-r--r--mm/page_alloc.c159
-rw-r--r--mm/readahead.c3
-rw-r--r--mm/shmem.c39
-rw-r--r--mm/slab.c514
-rw-r--r--mm/slab.h24
-rw-r--r--mm/slab_common.c101
-rw-r--r--mm/slub.c221
-rw-r--r--mm/swap.c18
-rw-r--r--mm/util.c102
-rw-r--r--mm/vmalloc.c30
-rw-r--r--mm/vmscan.c274
-rw-r--r--mm/vmstat.c9
-rw-r--r--mm/zbud.c98
-rw-r--r--mm/zpool.c364
-rw-r--r--mm/zsmalloc.c86
-rw-r--r--mm/zswap.c75
-rw-r--r--net/batman-adv/fragmentation.c2
-rw-r--r--net/bridge/br_multicast.c2
-rw-r--r--net/ipv4/fib_trie.c2
-rw-r--r--net/ipv6/addrlabel.c2
-rw-r--r--net/xfrm/xfrm_policy.c4
-rwxr-xr-xscripts/checkpatch.pl581
156 files changed, 3930 insertions, 2919 deletions
diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt
index 49b8551a3b68..e48c57f1943b 100644
--- a/Documentation/RCU/whatisRCU.txt
+++ b/Documentation/RCU/whatisRCU.txt
@@ -818,7 +818,7 @@ RCU pointer/list update:
818 list_add_tail_rcu 818 list_add_tail_rcu
819 list_del_rcu 819 list_del_rcu
820 list_replace_rcu 820 list_replace_rcu
821 hlist_add_after_rcu 821 hlist_add_behind_rcu
822 hlist_add_before_rcu 822 hlist_add_before_rcu
823 hlist_add_head_rcu 823 hlist_add_head_rcu
824 hlist_del_rcu 824 hlist_del_rcu
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 883901b9ac4f..9344d833b7ea 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1716,8 +1716,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
1716 7 (KERN_DEBUG) debug-level messages 1716 7 (KERN_DEBUG) debug-level messages
1717 1717
1718 log_buf_len=n[KMG] Sets the size of the printk ring buffer, 1718 log_buf_len=n[KMG] Sets the size of the printk ring buffer,
1719 in bytes. n must be a power of two. The default 1719 in bytes. n must be a power of two and greater
1720 size is set in the kernel config file. 1720 than the minimal size. The minimal size is defined
1721 by LOG_BUF_SHIFT kernel config parameter. There is
1722 also CONFIG_LOG_CPU_MAX_BUF_SHIFT config parameter
1723 that allows to increase the default size depending on
1724 the number of CPUs. See init/Kconfig for more details.
1721 1725
1722 logo.nologo [FB] Disables display of the built-in Linux logo. 1726 logo.nologo [FB] Disables display of the built-in Linux logo.
1723 This may be used to provide more screen space for 1727 This may be used to provide more screen space for
diff --git a/Documentation/trace/postprocess/trace-vmscan-postprocess.pl b/Documentation/trace/postprocess/trace-vmscan-postprocess.pl
index 78c9a7b2b58f..8f961ef2b457 100644
--- a/Documentation/trace/postprocess/trace-vmscan-postprocess.pl
+++ b/Documentation/trace/postprocess/trace-vmscan-postprocess.pl
@@ -47,6 +47,10 @@ use constant HIGH_KSWAPD_REWAKEUP => 21;
47use constant HIGH_NR_SCANNED => 22; 47use constant HIGH_NR_SCANNED => 22;
48use constant HIGH_NR_TAKEN => 23; 48use constant HIGH_NR_TAKEN => 23;
49use constant HIGH_NR_RECLAIMED => 24; 49use constant HIGH_NR_RECLAIMED => 24;
50use constant HIGH_NR_FILE_SCANNED => 25;
51use constant HIGH_NR_ANON_SCANNED => 26;
52use constant HIGH_NR_FILE_RECLAIMED => 27;
53use constant HIGH_NR_ANON_RECLAIMED => 28;
50 54
51my %perprocesspid; 55my %perprocesspid;
52my %perprocess; 56my %perprocess;
@@ -56,14 +60,18 @@ my $opt_read_procstat;
56 60
57my $total_wakeup_kswapd; 61my $total_wakeup_kswapd;
58my ($total_direct_reclaim, $total_direct_nr_scanned); 62my ($total_direct_reclaim, $total_direct_nr_scanned);
63my ($total_direct_nr_file_scanned, $total_direct_nr_anon_scanned);
59my ($total_direct_latency, $total_kswapd_latency); 64my ($total_direct_latency, $total_kswapd_latency);
60my ($total_direct_nr_reclaimed); 65my ($total_direct_nr_reclaimed);
66my ($total_direct_nr_file_reclaimed, $total_direct_nr_anon_reclaimed);
61my ($total_direct_writepage_file_sync, $total_direct_writepage_file_async); 67my ($total_direct_writepage_file_sync, $total_direct_writepage_file_async);
62my ($total_direct_writepage_anon_sync, $total_direct_writepage_anon_async); 68my ($total_direct_writepage_anon_sync, $total_direct_writepage_anon_async);
63my ($total_kswapd_nr_scanned, $total_kswapd_wake); 69my ($total_kswapd_nr_scanned, $total_kswapd_wake);
70my ($total_kswapd_nr_file_scanned, $total_kswapd_nr_anon_scanned);
64my ($total_kswapd_writepage_file_sync, $total_kswapd_writepage_file_async); 71my ($total_kswapd_writepage_file_sync, $total_kswapd_writepage_file_async);
65my ($total_kswapd_writepage_anon_sync, $total_kswapd_writepage_anon_async); 72my ($total_kswapd_writepage_anon_sync, $total_kswapd_writepage_anon_async);
66my ($total_kswapd_nr_reclaimed); 73my ($total_kswapd_nr_reclaimed);
74my ($total_kswapd_nr_file_reclaimed, $total_kswapd_nr_anon_reclaimed);
67 75
68# Catch sigint and exit on request 76# Catch sigint and exit on request
69my $sigint_report = 0; 77my $sigint_report = 0;
@@ -374,6 +382,7 @@ EVENT_PROCESS:
374 } 382 }
375 my $isolate_mode = $1; 383 my $isolate_mode = $1;
376 my $nr_scanned = $4; 384 my $nr_scanned = $4;
385 my $file = $6;
377 386
378 # To closer match vmstat scanning statistics, only count isolate_both 387 # To closer match vmstat scanning statistics, only count isolate_both
379 # and isolate_inactive as scanning. isolate_active is rotation 388 # and isolate_inactive as scanning. isolate_active is rotation
@@ -382,6 +391,11 @@ EVENT_PROCESS:
382 # isolate_both == 3 391 # isolate_both == 3
383 if ($isolate_mode != 2) { 392 if ($isolate_mode != 2) {
384 $perprocesspid{$process_pid}->{HIGH_NR_SCANNED} += $nr_scanned; 393 $perprocesspid{$process_pid}->{HIGH_NR_SCANNED} += $nr_scanned;
394 if ($file == 1) {
395 $perprocesspid{$process_pid}->{HIGH_NR_FILE_SCANNED} += $nr_scanned;
396 } else {
397 $perprocesspid{$process_pid}->{HIGH_NR_ANON_SCANNED} += $nr_scanned;
398 }
385 } 399 }
386 } elsif ($tracepoint eq "mm_vmscan_lru_shrink_inactive") { 400 } elsif ($tracepoint eq "mm_vmscan_lru_shrink_inactive") {
387 $details = $6; 401 $details = $6;
@@ -391,8 +405,19 @@ EVENT_PROCESS:
391 print " $regex_lru_shrink_inactive/o\n"; 405 print " $regex_lru_shrink_inactive/o\n";
392 next; 406 next;
393 } 407 }
408
394 my $nr_reclaimed = $4; 409 my $nr_reclaimed = $4;
410 my $flags = $6;
411 my $file = 0;
412 if ($flags =~ /RECLAIM_WB_FILE/) {
413 $file = 1;
414 }
395 $perprocesspid{$process_pid}->{HIGH_NR_RECLAIMED} += $nr_reclaimed; 415 $perprocesspid{$process_pid}->{HIGH_NR_RECLAIMED} += $nr_reclaimed;
416 if ($file) {
417 $perprocesspid{$process_pid}->{HIGH_NR_FILE_RECLAIMED} += $nr_reclaimed;
418 } else {
419 $perprocesspid{$process_pid}->{HIGH_NR_ANON_RECLAIMED} += $nr_reclaimed;
420 }
396 } elsif ($tracepoint eq "mm_vmscan_writepage") { 421 } elsif ($tracepoint eq "mm_vmscan_writepage") {
397 $details = $6; 422 $details = $6;
398 if ($details !~ /$regex_writepage/o) { 423 if ($details !~ /$regex_writepage/o) {
@@ -493,7 +518,11 @@ sub dump_stats {
493 $total_direct_reclaim += $stats{$process_pid}->{MM_VMSCAN_DIRECT_RECLAIM_BEGIN}; 518 $total_direct_reclaim += $stats{$process_pid}->{MM_VMSCAN_DIRECT_RECLAIM_BEGIN};
494 $total_wakeup_kswapd += $stats{$process_pid}->{MM_VMSCAN_WAKEUP_KSWAPD}; 519 $total_wakeup_kswapd += $stats{$process_pid}->{MM_VMSCAN_WAKEUP_KSWAPD};
495 $total_direct_nr_scanned += $stats{$process_pid}->{HIGH_NR_SCANNED}; 520 $total_direct_nr_scanned += $stats{$process_pid}->{HIGH_NR_SCANNED};
521 $total_direct_nr_file_scanned += $stats{$process_pid}->{HIGH_NR_FILE_SCANNED};
522 $total_direct_nr_anon_scanned += $stats{$process_pid}->{HIGH_NR_ANON_SCANNED};
496 $total_direct_nr_reclaimed += $stats{$process_pid}->{HIGH_NR_RECLAIMED}; 523 $total_direct_nr_reclaimed += $stats{$process_pid}->{HIGH_NR_RECLAIMED};
524 $total_direct_nr_file_reclaimed += $stats{$process_pid}->{HIGH_NR_FILE_RECLAIMED};
525 $total_direct_nr_anon_reclaimed += $stats{$process_pid}->{HIGH_NR_ANON_RECLAIMED};
497 $total_direct_writepage_file_sync += $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_SYNC}; 526 $total_direct_writepage_file_sync += $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_SYNC};
498 $total_direct_writepage_anon_sync += $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_SYNC}; 527 $total_direct_writepage_anon_sync += $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_SYNC};
499 $total_direct_writepage_file_async += $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_ASYNC}; 528 $total_direct_writepage_file_async += $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_ASYNC};
@@ -513,7 +542,11 @@ sub dump_stats {
513 $stats{$process_pid}->{MM_VMSCAN_DIRECT_RECLAIM_BEGIN}, 542 $stats{$process_pid}->{MM_VMSCAN_DIRECT_RECLAIM_BEGIN},
514 $stats{$process_pid}->{MM_VMSCAN_WAKEUP_KSWAPD}, 543 $stats{$process_pid}->{MM_VMSCAN_WAKEUP_KSWAPD},
515 $stats{$process_pid}->{HIGH_NR_SCANNED}, 544 $stats{$process_pid}->{HIGH_NR_SCANNED},
545 $stats{$process_pid}->{HIGH_NR_FILE_SCANNED},
546 $stats{$process_pid}->{HIGH_NR_ANON_SCANNED},
516 $stats{$process_pid}->{HIGH_NR_RECLAIMED}, 547 $stats{$process_pid}->{HIGH_NR_RECLAIMED},
548 $stats{$process_pid}->{HIGH_NR_FILE_RECLAIMED},
549 $stats{$process_pid}->{HIGH_NR_ANON_RECLAIMED},
517 $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_SYNC} + $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_SYNC}, 550 $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_SYNC} + $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_SYNC},
518 $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_ASYNC} + $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_ASYNC}, 551 $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_ASYNC} + $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_ASYNC},
519 $this_reclaim_delay / 1000); 552 $this_reclaim_delay / 1000);
@@ -552,7 +585,11 @@ sub dump_stats {
552 585
553 $total_kswapd_wake += $stats{$process_pid}->{MM_VMSCAN_KSWAPD_WAKE}; 586 $total_kswapd_wake += $stats{$process_pid}->{MM_VMSCAN_KSWAPD_WAKE};
554 $total_kswapd_nr_scanned += $stats{$process_pid}->{HIGH_NR_SCANNED}; 587 $total_kswapd_nr_scanned += $stats{$process_pid}->{HIGH_NR_SCANNED};
588 $total_kswapd_nr_file_scanned += $stats{$process_pid}->{HIGH_NR_FILE_SCANNED};
589 $total_kswapd_nr_anon_scanned += $stats{$process_pid}->{HIGH_NR_ANON_SCANNED};
555 $total_kswapd_nr_reclaimed += $stats{$process_pid}->{HIGH_NR_RECLAIMED}; 590 $total_kswapd_nr_reclaimed += $stats{$process_pid}->{HIGH_NR_RECLAIMED};
591 $total_kswapd_nr_file_reclaimed += $stats{$process_pid}->{HIGH_NR_FILE_RECLAIMED};
592 $total_kswapd_nr_anon_reclaimed += $stats{$process_pid}->{HIGH_NR_ANON_RECLAIMED};
556 $total_kswapd_writepage_file_sync += $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_SYNC}; 593 $total_kswapd_writepage_file_sync += $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_SYNC};
557 $total_kswapd_writepage_anon_sync += $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_SYNC}; 594 $total_kswapd_writepage_anon_sync += $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_SYNC};
558 $total_kswapd_writepage_file_async += $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_ASYNC}; 595 $total_kswapd_writepage_file_async += $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_ASYNC};
@@ -563,7 +600,11 @@ sub dump_stats {
563 $stats{$process_pid}->{MM_VMSCAN_KSWAPD_WAKE}, 600 $stats{$process_pid}->{MM_VMSCAN_KSWAPD_WAKE},
564 $stats{$process_pid}->{HIGH_KSWAPD_REWAKEUP}, 601 $stats{$process_pid}->{HIGH_KSWAPD_REWAKEUP},
565 $stats{$process_pid}->{HIGH_NR_SCANNED}, 602 $stats{$process_pid}->{HIGH_NR_SCANNED},
603 $stats{$process_pid}->{HIGH_NR_FILE_SCANNED},
604 $stats{$process_pid}->{HIGH_NR_ANON_SCANNED},
566 $stats{$process_pid}->{HIGH_NR_RECLAIMED}, 605 $stats{$process_pid}->{HIGH_NR_RECLAIMED},
606 $stats{$process_pid}->{HIGH_NR_FILE_RECLAIMED},
607 $stats{$process_pid}->{HIGH_NR_ANON_RECLAIMED},
567 $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_SYNC} + $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_SYNC}, 608 $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_SYNC} + $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_SYNC},
568 $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_ASYNC} + $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_ASYNC}); 609 $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_ASYNC} + $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_ASYNC});
569 610
@@ -594,7 +635,11 @@ sub dump_stats {
594 print "\nSummary\n"; 635 print "\nSummary\n";
595 print "Direct reclaims: $total_direct_reclaim\n"; 636 print "Direct reclaims: $total_direct_reclaim\n";
596 print "Direct reclaim pages scanned: $total_direct_nr_scanned\n"; 637 print "Direct reclaim pages scanned: $total_direct_nr_scanned\n";
638 print "Direct reclaim file pages scanned: $total_direct_nr_file_scanned\n";
639 print "Direct reclaim anon pages scanned: $total_direct_nr_anon_scanned\n";
597 print "Direct reclaim pages reclaimed: $total_direct_nr_reclaimed\n"; 640 print "Direct reclaim pages reclaimed: $total_direct_nr_reclaimed\n";
641 print "Direct reclaim file pages reclaimed: $total_direct_nr_file_reclaimed\n";
642 print "Direct reclaim anon pages reclaimed: $total_direct_nr_anon_reclaimed\n";
598 print "Direct reclaim write file sync I/O: $total_direct_writepage_file_sync\n"; 643 print "Direct reclaim write file sync I/O: $total_direct_writepage_file_sync\n";
599 print "Direct reclaim write anon sync I/O: $total_direct_writepage_anon_sync\n"; 644 print "Direct reclaim write anon sync I/O: $total_direct_writepage_anon_sync\n";
600 print "Direct reclaim write file async I/O: $total_direct_writepage_file_async\n"; 645 print "Direct reclaim write file async I/O: $total_direct_writepage_file_async\n";
@@ -604,7 +649,11 @@ sub dump_stats {
604 print "\n"; 649 print "\n";
605 print "Kswapd wakeups: $total_kswapd_wake\n"; 650 print "Kswapd wakeups: $total_kswapd_wake\n";
606 print "Kswapd pages scanned: $total_kswapd_nr_scanned\n"; 651 print "Kswapd pages scanned: $total_kswapd_nr_scanned\n";
652 print "Kswapd file pages scanned: $total_kswapd_nr_file_scanned\n";
653 print "Kswapd anon pages scanned: $total_kswapd_nr_anon_scanned\n";
607 print "Kswapd pages reclaimed: $total_kswapd_nr_reclaimed\n"; 654 print "Kswapd pages reclaimed: $total_kswapd_nr_reclaimed\n";
655 print "Kswapd file pages reclaimed: $total_kswapd_nr_file_reclaimed\n";
656 print "Kswapd anon pages reclaimed: $total_kswapd_nr_anon_reclaimed\n";
608 print "Kswapd reclaim write file sync I/O: $total_kswapd_writepage_file_sync\n"; 657 print "Kswapd reclaim write file sync I/O: $total_kswapd_writepage_file_sync\n";
609 print "Kswapd reclaim write anon sync I/O: $total_kswapd_writepage_anon_sync\n"; 658 print "Kswapd reclaim write anon sync I/O: $total_kswapd_writepage_anon_sync\n";
610 print "Kswapd reclaim write file async I/O: $total_kswapd_writepage_file_async\n"; 659 print "Kswapd reclaim write file async I/O: $total_kswapd_writepage_file_async\n";
@@ -629,7 +678,11 @@ sub aggregate_perprocesspid() {
629 $perprocess{$process}->{MM_VMSCAN_WAKEUP_KSWAPD} += $perprocesspid{$process_pid}->{MM_VMSCAN_WAKEUP_KSWAPD}; 678 $perprocess{$process}->{MM_VMSCAN_WAKEUP_KSWAPD} += $perprocesspid{$process_pid}->{MM_VMSCAN_WAKEUP_KSWAPD};
630 $perprocess{$process}->{HIGH_KSWAPD_REWAKEUP} += $perprocesspid{$process_pid}->{HIGH_KSWAPD_REWAKEUP}; 679 $perprocess{$process}->{HIGH_KSWAPD_REWAKEUP} += $perprocesspid{$process_pid}->{HIGH_KSWAPD_REWAKEUP};
631 $perprocess{$process}->{HIGH_NR_SCANNED} += $perprocesspid{$process_pid}->{HIGH_NR_SCANNED}; 680 $perprocess{$process}->{HIGH_NR_SCANNED} += $perprocesspid{$process_pid}->{HIGH_NR_SCANNED};
681 $perprocess{$process}->{HIGH_NR_FILE_SCANNED} += $perprocesspid{$process_pid}->{HIGH_NR_FILE_SCANNED};
682 $perprocess{$process}->{HIGH_NR_ANON_SCANNED} += $perprocesspid{$process_pid}->{HIGH_NR_ANON_SCANNED};
632 $perprocess{$process}->{HIGH_NR_RECLAIMED} += $perprocesspid{$process_pid}->{HIGH_NR_RECLAIMED}; 683 $perprocess{$process}->{HIGH_NR_RECLAIMED} += $perprocesspid{$process_pid}->{HIGH_NR_RECLAIMED};
684 $perprocess{$process}->{HIGH_NR_FILE_RECLAIMED} += $perprocesspid{$process_pid}->{HIGH_NR_FILE_RECLAIMED};
685 $perprocess{$process}->{HIGH_NR_ANON_RECLAIMED} += $perprocesspid{$process_pid}->{HIGH_NR_ANON_RECLAIMED};
633 $perprocess{$process}->{MM_VMSCAN_WRITEPAGE_FILE_SYNC} += $perprocesspid{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_SYNC}; 686 $perprocess{$process}->{MM_VMSCAN_WRITEPAGE_FILE_SYNC} += $perprocesspid{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_SYNC};
634 $perprocess{$process}->{MM_VMSCAN_WRITEPAGE_ANON_SYNC} += $perprocesspid{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_SYNC}; 687 $perprocess{$process}->{MM_VMSCAN_WRITEPAGE_ANON_SYNC} += $perprocesspid{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_SYNC};
635 $perprocess{$process}->{MM_VMSCAN_WRITEPAGE_FILE_ASYNC} += $perprocesspid{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_ASYNC}; 688 $perprocess{$process}->{MM_VMSCAN_WRITEPAGE_FILE_ASYNC} += $perprocesspid{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_ASYNC};
diff --git a/Makefile b/Makefile
index d0901b46b4bf..a897c50db515 100644
--- a/Makefile
+++ b/Makefile
@@ -621,6 +621,9 @@ else
621KBUILD_CFLAGS += -O2 621KBUILD_CFLAGS += -O2
622endif 622endif
623 623
624# Tell gcc to never replace conditional load with a non-conditional one
625KBUILD_CFLAGS += $(call cc-option,--param=allow-store-data-races=0)
626
624ifdef CONFIG_READABLE_ASM 627ifdef CONFIG_READABLE_ASM
625# Disable optimizations that make assembler listings hard to read. 628# Disable optimizations that make assembler listings hard to read.
626# reorder blocks reorders the control in the function 629# reorder blocks reorders the control in the function
@@ -636,6 +639,22 @@ KBUILD_CFLAGS += $(call cc-option,-Wframe-larger-than=${CONFIG_FRAME_WARN})
636endif 639endif
637 640
638# Handle stack protector mode. 641# Handle stack protector mode.
642#
643# Since kbuild can potentially perform two passes (first with the old
644# .config values and then with updated .config values), we cannot error out
645# if a desired compiler option is unsupported. If we were to error, kbuild
646# could never get to the second pass and actually notice that we changed
647# the option to something that was supported.
648#
649# Additionally, we don't want to fallback and/or silently change which compiler
650# flags will be used, since that leads to producing kernels with different
651# security feature characteristics depending on the compiler used. ("But I
652# selected CC_STACKPROTECTOR_STRONG! Why did it build with _REGULAR?!")
653#
654# The middle ground is to warn here so that the failed option is obvious, but
655# to let the build fail with bad compiler flags so that we can't produce a
656# kernel when there is a CONFIG and compiler mismatch.
657#
639ifdef CONFIG_CC_STACKPROTECTOR_REGULAR 658ifdef CONFIG_CC_STACKPROTECTOR_REGULAR
640 stackp-flag := -fstack-protector 659 stackp-flag := -fstack-protector
641 ifeq ($(call cc-option, $(stackp-flag)),) 660 ifeq ($(call cc-option, $(stackp-flag)),)
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 1f88db06b133..7a996aaa061e 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -26,6 +26,7 @@
26#include <linux/io.h> 26#include <linux/io.h>
27#include <linux/vmalloc.h> 27#include <linux/vmalloc.h>
28#include <linux/sizes.h> 28#include <linux/sizes.h>
29#include <linux/cma.h>
29 30
30#include <asm/memory.h> 31#include <asm/memory.h>
31#include <asm/highmem.h> 32#include <asm/highmem.h>
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 25c350264a41..892d43e32f3b 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -631,7 +631,8 @@ int arch_add_memory(int nid, u64 start, u64 size)
631 631
632 pgdat = NODE_DATA(nid); 632 pgdat = NODE_DATA(nid);
633 633
634 zone = pgdat->node_zones + ZONE_NORMAL; 634 zone = pgdat->node_zones +
635 zone_for_memory(nid, start, size, ZONE_NORMAL);
635 ret = __add_pages(nid, zone, start_pfn, nr_pages); 636 ret = __add_pages(nid, zone, start_pfn, nr_pages);
636 637
637 if (ret) 638 if (ret)
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index ce569b6bf4d8..72905c30082e 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -90,7 +90,6 @@ kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \
90 book3s_hv_rm_mmu.o \ 90 book3s_hv_rm_mmu.o \
91 book3s_hv_ras.o \ 91 book3s_hv_ras.o \
92 book3s_hv_builtin.o \ 92 book3s_hv_builtin.o \
93 book3s_hv_cma.o \
94 $(kvm-book3s_64-builtin-xics-objs-y) 93 $(kvm-book3s_64-builtin-xics-objs-y)
95endif 94endif
96 95
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 68468d695f12..a01744fc3483 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -37,8 +37,6 @@
37#include <asm/ppc-opcode.h> 37#include <asm/ppc-opcode.h>
38#include <asm/cputable.h> 38#include <asm/cputable.h>
39 39
40#include "book3s_hv_cma.h"
41
42/* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */ 40/* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */
43#define MAX_LPID_970 63 41#define MAX_LPID_970 63
44 42
@@ -64,10 +62,10 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
64 } 62 }
65 63
66 kvm->arch.hpt_cma_alloc = 0; 64 kvm->arch.hpt_cma_alloc = 0;
67 VM_BUG_ON(order < KVM_CMA_CHUNK_ORDER);
68 page = kvm_alloc_hpt(1 << (order - PAGE_SHIFT)); 65 page = kvm_alloc_hpt(1 << (order - PAGE_SHIFT));
69 if (page) { 66 if (page) {
70 hpt = (unsigned long)pfn_to_kaddr(page_to_pfn(page)); 67 hpt = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
68 memset((void *)hpt, 0, (1 << order));
71 kvm->arch.hpt_cma_alloc = 1; 69 kvm->arch.hpt_cma_alloc = 1;
72 } 70 }
73 71
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 7cde8a665205..6cf498a9bc98 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -16,12 +16,14 @@
16#include <linux/init.h> 16#include <linux/init.h>
17#include <linux/memblock.h> 17#include <linux/memblock.h>
18#include <linux/sizes.h> 18#include <linux/sizes.h>
19#include <linux/cma.h>
19 20
20#include <asm/cputable.h> 21#include <asm/cputable.h>
21#include <asm/kvm_ppc.h> 22#include <asm/kvm_ppc.h>
22#include <asm/kvm_book3s.h> 23#include <asm/kvm_book3s.h>
23 24
24#include "book3s_hv_cma.h" 25#define KVM_CMA_CHUNK_ORDER 18
26
25/* 27/*
26 * Hash page table alignment on newer cpus(CPU_FTR_ARCH_206) 28 * Hash page table alignment on newer cpus(CPU_FTR_ARCH_206)
27 * should be power of 2. 29 * should be power of 2.
@@ -43,6 +45,8 @@ static unsigned long kvm_cma_resv_ratio = 5;
43unsigned long kvm_rma_pages = (1 << 27) >> PAGE_SHIFT; /* 128MB */ 45unsigned long kvm_rma_pages = (1 << 27) >> PAGE_SHIFT; /* 128MB */
44EXPORT_SYMBOL_GPL(kvm_rma_pages); 46EXPORT_SYMBOL_GPL(kvm_rma_pages);
45 47
48static struct cma *kvm_cma;
49
46/* Work out RMLS (real mode limit selector) field value for a given RMA size. 50/* Work out RMLS (real mode limit selector) field value for a given RMA size.
47 Assumes POWER7 or PPC970. */ 51 Assumes POWER7 or PPC970. */
48static inline int lpcr_rmls(unsigned long rma_size) 52static inline int lpcr_rmls(unsigned long rma_size)
@@ -97,7 +101,7 @@ struct kvm_rma_info *kvm_alloc_rma()
97 ri = kmalloc(sizeof(struct kvm_rma_info), GFP_KERNEL); 101 ri = kmalloc(sizeof(struct kvm_rma_info), GFP_KERNEL);
98 if (!ri) 102 if (!ri)
99 return NULL; 103 return NULL;
100 page = kvm_alloc_cma(kvm_rma_pages, kvm_rma_pages); 104 page = cma_alloc(kvm_cma, kvm_rma_pages, get_order(kvm_rma_pages));
101 if (!page) 105 if (!page)
102 goto err_out; 106 goto err_out;
103 atomic_set(&ri->use_count, 1); 107 atomic_set(&ri->use_count, 1);
@@ -112,7 +116,7 @@ EXPORT_SYMBOL_GPL(kvm_alloc_rma);
112void kvm_release_rma(struct kvm_rma_info *ri) 116void kvm_release_rma(struct kvm_rma_info *ri)
113{ 117{
114 if (atomic_dec_and_test(&ri->use_count)) { 118 if (atomic_dec_and_test(&ri->use_count)) {
115 kvm_release_cma(pfn_to_page(ri->base_pfn), kvm_rma_pages); 119 cma_release(kvm_cma, pfn_to_page(ri->base_pfn), kvm_rma_pages);
116 kfree(ri); 120 kfree(ri);
117 } 121 }
118} 122}
@@ -131,16 +135,18 @@ struct page *kvm_alloc_hpt(unsigned long nr_pages)
131{ 135{
132 unsigned long align_pages = HPT_ALIGN_PAGES; 136 unsigned long align_pages = HPT_ALIGN_PAGES;
133 137
138 VM_BUG_ON(get_order(nr_pages) < KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
139
134 /* Old CPUs require HPT aligned on a multiple of its size */ 140 /* Old CPUs require HPT aligned on a multiple of its size */
135 if (!cpu_has_feature(CPU_FTR_ARCH_206)) 141 if (!cpu_has_feature(CPU_FTR_ARCH_206))
136 align_pages = nr_pages; 142 align_pages = nr_pages;
137 return kvm_alloc_cma(nr_pages, align_pages); 143 return cma_alloc(kvm_cma, nr_pages, get_order(align_pages));
138} 144}
139EXPORT_SYMBOL_GPL(kvm_alloc_hpt); 145EXPORT_SYMBOL_GPL(kvm_alloc_hpt);
140 146
141void kvm_release_hpt(struct page *page, unsigned long nr_pages) 147void kvm_release_hpt(struct page *page, unsigned long nr_pages)
142{ 148{
143 kvm_release_cma(page, nr_pages); 149 cma_release(kvm_cma, page, nr_pages);
144} 150}
145EXPORT_SYMBOL_GPL(kvm_release_hpt); 151EXPORT_SYMBOL_GPL(kvm_release_hpt);
146 152
@@ -179,7 +185,8 @@ void __init kvm_cma_reserve(void)
179 align_size = HPT_ALIGN_PAGES << PAGE_SHIFT; 185 align_size = HPT_ALIGN_PAGES << PAGE_SHIFT;
180 186
181 align_size = max(kvm_rma_pages << PAGE_SHIFT, align_size); 187 align_size = max(kvm_rma_pages << PAGE_SHIFT, align_size);
182 kvm_cma_declare_contiguous(selected_size, align_size); 188 cma_declare_contiguous(0, selected_size, 0, align_size,
189 KVM_CMA_CHUNK_ORDER - PAGE_SHIFT, false, &kvm_cma);
183 } 190 }
184} 191}
185 192
diff --git a/arch/powerpc/kvm/book3s_hv_cma.c b/arch/powerpc/kvm/book3s_hv_cma.c
deleted file mode 100644
index d9d3d8553d51..000000000000
--- a/arch/powerpc/kvm/book3s_hv_cma.c
+++ /dev/null
@@ -1,240 +0,0 @@
1/*
2 * Contiguous Memory Allocator for ppc KVM hash pagetable based on CMA
3 * for DMA mapping framework
4 *
5 * Copyright IBM Corporation, 2013
6 * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License as
10 * published by the Free Software Foundation; either version 2 of the
11 * License or (at your optional) any later version of the license.
12 *
13 */
14#define pr_fmt(fmt) "kvm_cma: " fmt
15
16#ifdef CONFIG_CMA_DEBUG
17#ifndef DEBUG
18# define DEBUG
19#endif
20#endif
21
22#include <linux/memblock.h>
23#include <linux/mutex.h>
24#include <linux/sizes.h>
25#include <linux/slab.h>
26
27#include "book3s_hv_cma.h"
28
29struct kvm_cma {
30 unsigned long base_pfn;
31 unsigned long count;
32 unsigned long *bitmap;
33};
34
35static DEFINE_MUTEX(kvm_cma_mutex);
36static struct kvm_cma kvm_cma_area;
37
38/**
39 * kvm_cma_declare_contiguous() - reserve area for contiguous memory handling
40 * for kvm hash pagetable
41 * @size: Size of the reserved memory.
42 * @alignment: Alignment for the contiguous memory area
43 *
44 * This function reserves memory for kvm cma area. It should be
45 * called by arch code when early allocator (memblock or bootmem)
46 * is still activate.
47 */
48long __init kvm_cma_declare_contiguous(phys_addr_t size, phys_addr_t alignment)
49{
50 long base_pfn;
51 phys_addr_t addr;
52 struct kvm_cma *cma = &kvm_cma_area;
53
54 pr_debug("%s(size %lx)\n", __func__, (unsigned long)size);
55
56 if (!size)
57 return -EINVAL;
58 /*
59 * Sanitise input arguments.
60 * We should be pageblock aligned for CMA.
61 */
62 alignment = max(alignment, (phys_addr_t)(PAGE_SIZE << pageblock_order));
63 size = ALIGN(size, alignment);
64 /*
65 * Reserve memory
66 * Use __memblock_alloc_base() since
67 * memblock_alloc_base() panic()s.
68 */
69 addr = __memblock_alloc_base(size, alignment, 0);
70 if (!addr) {
71 base_pfn = -ENOMEM;
72 goto err;
73 } else
74 base_pfn = PFN_DOWN(addr);
75
76 /*
77 * Each reserved area must be initialised later, when more kernel
78 * subsystems (like slab allocator) are available.
79 */
80 cma->base_pfn = base_pfn;
81 cma->count = size >> PAGE_SHIFT;
82 pr_info("CMA: reserved %ld MiB\n", (unsigned long)size / SZ_1M);
83 return 0;
84err:
85 pr_err("CMA: failed to reserve %ld MiB\n", (unsigned long)size / SZ_1M);
86 return base_pfn;
87}
88
89/**
90 * kvm_alloc_cma() - allocate pages from contiguous area
91 * @nr_pages: Requested number of pages.
92 * @align_pages: Requested alignment in number of pages
93 *
94 * This function allocates memory buffer for hash pagetable.
95 */
96struct page *kvm_alloc_cma(unsigned long nr_pages, unsigned long align_pages)
97{
98 int ret;
99 struct page *page = NULL;
100 struct kvm_cma *cma = &kvm_cma_area;
101 unsigned long chunk_count, nr_chunk;
102 unsigned long mask, pfn, pageno, start = 0;
103
104
105 if (!cma || !cma->count)
106 return NULL;
107
108 pr_debug("%s(cma %p, count %lu, align pages %lu)\n", __func__,
109 (void *)cma, nr_pages, align_pages);
110
111 if (!nr_pages)
112 return NULL;
113 /*
114 * align mask with chunk size. The bit tracks pages in chunk size
115 */
116 VM_BUG_ON(!is_power_of_2(align_pages));
117 mask = (align_pages >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT)) - 1;
118 BUILD_BUG_ON(PAGE_SHIFT > KVM_CMA_CHUNK_ORDER);
119
120 chunk_count = cma->count >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
121 nr_chunk = nr_pages >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
122
123 mutex_lock(&kvm_cma_mutex);
124 for (;;) {
125 pageno = bitmap_find_next_zero_area(cma->bitmap, chunk_count,
126 start, nr_chunk, mask);
127 if (pageno >= chunk_count)
128 break;
129
130 pfn = cma->base_pfn + (pageno << (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT));
131 ret = alloc_contig_range(pfn, pfn + nr_pages, MIGRATE_CMA);
132 if (ret == 0) {
133 bitmap_set(cma->bitmap, pageno, nr_chunk);
134 page = pfn_to_page(pfn);
135 memset(pfn_to_kaddr(pfn), 0, nr_pages << PAGE_SHIFT);
136 break;
137 } else if (ret != -EBUSY) {
138 break;
139 }
140 pr_debug("%s(): memory range at %p is busy, retrying\n",
141 __func__, pfn_to_page(pfn));
142 /* try again with a bit different memory target */
143 start = pageno + mask + 1;
144 }
145 mutex_unlock(&kvm_cma_mutex);
146 pr_debug("%s(): returned %p\n", __func__, page);
147 return page;
148}
149
150/**
151 * kvm_release_cma() - release allocated pages for hash pagetable
152 * @pages: Allocated pages.
153 * @nr_pages: Number of allocated pages.
154 *
155 * This function releases memory allocated by kvm_alloc_cma().
156 * It returns false when provided pages do not belong to contiguous area and
157 * true otherwise.
158 */
159bool kvm_release_cma(struct page *pages, unsigned long nr_pages)
160{
161 unsigned long pfn;
162 unsigned long nr_chunk;
163 struct kvm_cma *cma = &kvm_cma_area;
164
165 if (!cma || !pages)
166 return false;
167
168 pr_debug("%s(page %p count %lu)\n", __func__, (void *)pages, nr_pages);
169
170 pfn = page_to_pfn(pages);
171
172 if (pfn < cma->base_pfn || pfn >= cma->base_pfn + cma->count)
173 return false;
174
175 VM_BUG_ON(pfn + nr_pages > cma->base_pfn + cma->count);
176 nr_chunk = nr_pages >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
177
178 mutex_lock(&kvm_cma_mutex);
179 bitmap_clear(cma->bitmap,
180 (pfn - cma->base_pfn) >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT),
181 nr_chunk);
182 free_contig_range(pfn, nr_pages);
183 mutex_unlock(&kvm_cma_mutex);
184
185 return true;
186}
187
188static int __init kvm_cma_activate_area(unsigned long base_pfn,
189 unsigned long count)
190{
191 unsigned long pfn = base_pfn;
192 unsigned i = count >> pageblock_order;
193 struct zone *zone;
194
195 WARN_ON_ONCE(!pfn_valid(pfn));
196 zone = page_zone(pfn_to_page(pfn));
197 do {
198 unsigned j;
199 base_pfn = pfn;
200 for (j = pageblock_nr_pages; j; --j, pfn++) {
201 WARN_ON_ONCE(!pfn_valid(pfn));
202 /*
203 * alloc_contig_range requires the pfn range
204 * specified to be in the same zone. Make this
205 * simple by forcing the entire CMA resv range
206 * to be in the same zone.
207 */
208 if (page_zone(pfn_to_page(pfn)) != zone)
209 return -EINVAL;
210 }
211 init_cma_reserved_pageblock(pfn_to_page(base_pfn));
212 } while (--i);
213 return 0;
214}
215
216static int __init kvm_cma_init_reserved_areas(void)
217{
218 int bitmap_size, ret;
219 unsigned long chunk_count;
220 struct kvm_cma *cma = &kvm_cma_area;
221
222 pr_debug("%s()\n", __func__);
223 if (!cma->count)
224 return 0;
225 chunk_count = cma->count >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
226 bitmap_size = BITS_TO_LONGS(chunk_count) * sizeof(long);
227 cma->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
228 if (!cma->bitmap)
229 return -ENOMEM;
230
231 ret = kvm_cma_activate_area(cma->base_pfn, cma->count);
232 if (ret)
233 goto error;
234 return 0;
235
236error:
237 kfree(cma->bitmap);
238 return ret;
239}
240core_initcall(kvm_cma_init_reserved_areas);
diff --git a/arch/powerpc/kvm/book3s_hv_cma.h b/arch/powerpc/kvm/book3s_hv_cma.h
deleted file mode 100644
index 655144f75fa5..000000000000
--- a/arch/powerpc/kvm/book3s_hv_cma.h
+++ /dev/null
@@ -1,27 +0,0 @@
1/*
2 * Contiguous Memory Allocator for ppc KVM hash pagetable based on CMA
3 * for DMA mapping framework
4 *
5 * Copyright IBM Corporation, 2013
6 * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License as
10 * published by the Free Software Foundation; either version 2 of the
11 * License or (at your optional) any later version of the license.
12 *
13 */
14
15#ifndef __POWERPC_KVM_CMA_ALLOC_H__
16#define __POWERPC_KVM_CMA_ALLOC_H__
17/*
18 * Both RMA and Hash page allocation will be multiple of 256K.
19 */
20#define KVM_CMA_CHUNK_ORDER 18
21
22extern struct page *kvm_alloc_cma(unsigned long nr_pages,
23 unsigned long align_pages);
24extern bool kvm_release_cma(struct page *pages, unsigned long nr_pages);
25extern long kvm_cma_declare_contiguous(phys_addr_t size,
26 phys_addr_t alignment) __init;
27#endif
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 2c8e90f5789e..e0f7a189c48e 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -128,7 +128,8 @@ int arch_add_memory(int nid, u64 start, u64 size)
128 return -EINVAL; 128 return -EINVAL;
129 129
130 /* this should work for most non-highmem platforms */ 130 /* this should work for most non-highmem platforms */
131 zone = pgdata->node_zones; 131 zone = pgdata->node_zones +
132 zone_for_memory(nid, start, size, 0);
132 133
133 return __add_pages(nid, zone, start_pfn, nr_pages); 134 return __add_pages(nid, zone, start_pfn, nr_pages);
134} 135}
diff --git a/arch/score/include/uapi/asm/ptrace.h b/arch/score/include/uapi/asm/ptrace.h
index f59771a3f127..5c5e794058be 100644
--- a/arch/score/include/uapi/asm/ptrace.h
+++ b/arch/score/include/uapi/asm/ptrace.h
@@ -4,17 +4,6 @@
4#define PTRACE_GETREGS 12 4#define PTRACE_GETREGS 12
5#define PTRACE_SETREGS 13 5#define PTRACE_SETREGS 13
6 6
7#define PC 32
8#define CONDITION 33
9#define ECR 34
10#define EMA 35
11#define CEH 36
12#define CEL 37
13#define COUNTER 38
14#define LDCR 39
15#define STCR 40
16#define PSR 41
17
18#define SINGLESTEP16_INSN 0x7006 7#define SINGLESTEP16_INSN 0x7006
19#define SINGLESTEP32_INSN 0x840C8000 8#define SINGLESTEP32_INSN 0x840C8000
20#define BREAKPOINT16_INSN 0x7002 /* work on SPG300 */ 9#define BREAKPOINT16_INSN 0x7002 /* work on SPG300 */
diff --git a/arch/sh/drivers/dma/Kconfig b/arch/sh/drivers/dma/Kconfig
index cfd5b90a8628..78bc97b1d027 100644
--- a/arch/sh/drivers/dma/Kconfig
+++ b/arch/sh/drivers/dma/Kconfig
@@ -12,9 +12,8 @@ config SH_DMA_IRQ_MULTI
12 default y if CPU_SUBTYPE_SH7750 || CPU_SUBTYPE_SH7751 || \ 12 default y if CPU_SUBTYPE_SH7750 || CPU_SUBTYPE_SH7751 || \
13 CPU_SUBTYPE_SH7750S || CPU_SUBTYPE_SH7750R || \ 13 CPU_SUBTYPE_SH7750S || CPU_SUBTYPE_SH7750R || \
14 CPU_SUBTYPE_SH7751R || CPU_SUBTYPE_SH7091 || \ 14 CPU_SUBTYPE_SH7751R || CPU_SUBTYPE_SH7091 || \
15 CPU_SUBTYPE_SH7763 || CPU_SUBTYPE_SH7764 || \ 15 CPU_SUBTYPE_SH7763 || CPU_SUBTYPE_SH7780 || \
16 CPU_SUBTYPE_SH7780 || CPU_SUBTYPE_SH7785 || \ 16 CPU_SUBTYPE_SH7785 || CPU_SUBTYPE_SH7760
17 CPU_SUBTYPE_SH7760
18 17
19config SH_DMA_API 18config SH_DMA_API
20 depends on SH_DMA 19 depends on SH_DMA
diff --git a/arch/sh/include/asm/io_noioport.h b/arch/sh/include/asm/io_noioport.h
index 4d48f1436a63..c727e6ddf69e 100644
--- a/arch/sh/include/asm/io_noioport.h
+++ b/arch/sh/include/asm/io_noioport.h
@@ -34,6 +34,17 @@ static inline void outl(unsigned int x, unsigned long port)
34 BUG(); 34 BUG();
35} 35}
36 36
37static inline void __iomem *ioport_map(unsigned long port, unsigned int size)
38{
39 BUG();
40 return NULL;
41}
42
43static inline void ioport_unmap(void __iomem *addr)
44{
45 BUG();
46}
47
37#define inb_p(addr) inb(addr) 48#define inb_p(addr) inb(addr)
38#define inw_p(addr) inw(addr) 49#define inw_p(addr) inw(addr)
39#define inl_p(addr) inl(addr) 50#define inl_p(addr) inl(addr)
diff --git a/arch/sh/include/cpu-sh4/cpu/dma-register.h b/arch/sh/include/cpu-sh4/cpu/dma-register.h
index 02788b6a03b7..9cd81e54056a 100644
--- a/arch/sh/include/cpu-sh4/cpu/dma-register.h
+++ b/arch/sh/include/cpu-sh4/cpu/dma-register.h
@@ -32,7 +32,6 @@
32#define CHCR_TS_HIGH_SHIFT (20 - 2) /* 2 bits for shifted low TS */ 32#define CHCR_TS_HIGH_SHIFT (20 - 2) /* 2 bits for shifted low TS */
33#elif defined(CONFIG_CPU_SUBTYPE_SH7757) || \ 33#elif defined(CONFIG_CPU_SUBTYPE_SH7757) || \
34 defined(CONFIG_CPU_SUBTYPE_SH7763) || \ 34 defined(CONFIG_CPU_SUBTYPE_SH7763) || \
35 defined(CONFIG_CPU_SUBTYPE_SH7764) || \
36 defined(CONFIG_CPU_SUBTYPE_SH7780) || \ 35 defined(CONFIG_CPU_SUBTYPE_SH7780) || \
37 defined(CONFIG_CPU_SUBTYPE_SH7785) 36 defined(CONFIG_CPU_SUBTYPE_SH7785)
38#define CHCR_TS_LOW_MASK 0x00000018 37#define CHCR_TS_LOW_MASK 0x00000018
diff --git a/arch/sh/include/cpu-sh4a/cpu/dma.h b/arch/sh/include/cpu-sh4a/cpu/dma.h
index 89afb650ce25..8ceccceae844 100644
--- a/arch/sh/include/cpu-sh4a/cpu/dma.h
+++ b/arch/sh/include/cpu-sh4a/cpu/dma.h
@@ -14,8 +14,7 @@
14#define DMTE4_IRQ evt2irq(0xb80) 14#define DMTE4_IRQ evt2irq(0xb80)
15#define DMAE0_IRQ evt2irq(0xbc0) /* DMA Error IRQ*/ 15#define DMAE0_IRQ evt2irq(0xbc0) /* DMA Error IRQ*/
16#define SH_DMAC_BASE0 0xFE008020 16#define SH_DMAC_BASE0 0xFE008020
17#elif defined(CONFIG_CPU_SUBTYPE_SH7763) || \ 17#elif defined(CONFIG_CPU_SUBTYPE_SH7763)
18 defined(CONFIG_CPU_SUBTYPE_SH7764)
19#define DMTE0_IRQ evt2irq(0x640) 18#define DMTE0_IRQ evt2irq(0x640)
20#define DMTE4_IRQ evt2irq(0x780) 19#define DMTE4_IRQ evt2irq(0x780)
21#define DMAE0_IRQ evt2irq(0x6c0) 20#define DMAE0_IRQ evt2irq(0x6c0)
diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7724.c b/arch/sh/kernel/cpu/sh4a/clock-sh7724.c
index f579dd528198..c187b9579c21 100644
--- a/arch/sh/kernel/cpu/sh4a/clock-sh7724.c
+++ b/arch/sh/kernel/cpu/sh4a/clock-sh7724.c
@@ -307,7 +307,7 @@ static struct clk_lookup lookups[] = {
307 CLKDEV_ICK_ID("fck", "sh-tmu.0", &mstp_clks[HWBLK_TMU0]), 307 CLKDEV_ICK_ID("fck", "sh-tmu.0", &mstp_clks[HWBLK_TMU0]),
308 CLKDEV_ICK_ID("fck", "sh-tmu.1", &mstp_clks[HWBLK_TMU1]), 308 CLKDEV_ICK_ID("fck", "sh-tmu.1", &mstp_clks[HWBLK_TMU1]),
309 309
310 CLKDEV_ICK_ID("fck", "sh-cmt-16.0", &mstp_clks[HWBLK_CMT]), 310 CLKDEV_ICK_ID("fck", "sh-cmt-32.0", &mstp_clks[HWBLK_CMT]),
311 CLKDEV_DEV_ID("sh-wdt.0", &mstp_clks[HWBLK_RWDT]), 311 CLKDEV_DEV_ID("sh-wdt.0", &mstp_clks[HWBLK_RWDT]),
312 CLKDEV_DEV_ID("sh-dma-engine.1", &mstp_clks[HWBLK_DMAC1]), 312 CLKDEV_DEV_ID("sh-dma-engine.1", &mstp_clks[HWBLK_DMAC1]),
313 313
@@ -332,6 +332,8 @@ static struct clk_lookup lookups[] = {
332 CLKDEV_CON_ID("tsif0", &mstp_clks[HWBLK_TSIF]), 332 CLKDEV_CON_ID("tsif0", &mstp_clks[HWBLK_TSIF]),
333 CLKDEV_DEV_ID("renesas_usbhs.1", &mstp_clks[HWBLK_USB1]), 333 CLKDEV_DEV_ID("renesas_usbhs.1", &mstp_clks[HWBLK_USB1]),
334 CLKDEV_DEV_ID("renesas_usbhs.0", &mstp_clks[HWBLK_USB0]), 334 CLKDEV_DEV_ID("renesas_usbhs.0", &mstp_clks[HWBLK_USB0]),
335 CLKDEV_CON_ID("usb1", &mstp_clks[HWBLK_USB1]),
336 CLKDEV_CON_ID("usb0", &mstp_clks[HWBLK_USB0]),
335 CLKDEV_CON_ID("2dg0", &mstp_clks[HWBLK_2DG]), 337 CLKDEV_CON_ID("2dg0", &mstp_clks[HWBLK_2DG]),
336 CLKDEV_DEV_ID("sh_mobile_sdhi.0", &mstp_clks[HWBLK_SDHI0]), 338 CLKDEV_DEV_ID("sh_mobile_sdhi.0", &mstp_clks[HWBLK_SDHI0]),
337 CLKDEV_DEV_ID("sh_mobile_sdhi.1", &mstp_clks[HWBLK_SDHI1]), 339 CLKDEV_DEV_ID("sh_mobile_sdhi.1", &mstp_clks[HWBLK_SDHI1]),
diff --git a/arch/sh/kernel/time.c b/arch/sh/kernel/time.c
index 552c8fcf9416..d6d0a986c6e9 100644
--- a/arch/sh/kernel/time.c
+++ b/arch/sh/kernel/time.c
@@ -80,10 +80,8 @@ static int __init rtc_generic_init(void)
80 return -ENODEV; 80 return -ENODEV;
81 81
82 pdev = platform_device_register_simple("rtc-generic", -1, NULL, 0); 82 pdev = platform_device_register_simple("rtc-generic", -1, NULL, 0);
83 if (IS_ERR(pdev))
84 return PTR_ERR(pdev);
85 83
86 return 0; 84 return PTR_ERR_OR_ZERO(pdev);
87} 85}
88module_init(rtc_generic_init); 86module_init(rtc_generic_init);
89 87
diff --git a/arch/sh/mm/asids-debugfs.c b/arch/sh/mm/asids-debugfs.c
index 74c03ecc4871..ecfc6b0c1da1 100644
--- a/arch/sh/mm/asids-debugfs.c
+++ b/arch/sh/mm/asids-debugfs.c
@@ -67,10 +67,8 @@ static int __init asids_debugfs_init(void)
67 NULL, &asids_debugfs_fops); 67 NULL, &asids_debugfs_fops);
68 if (!asids_dentry) 68 if (!asids_dentry)
69 return -ENOMEM; 69 return -ENOMEM;
70 if (IS_ERR(asids_dentry))
71 return PTR_ERR(asids_dentry);
72 70
73 return 0; 71 return PTR_ERR_OR_ZERO(asids_dentry);
74} 72}
75module_init(asids_debugfs_init); 73module_init(asids_debugfs_init);
76 74
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index 2d089fe2cba9..2790b6a64157 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -495,8 +495,9 @@ int arch_add_memory(int nid, u64 start, u64 size)
495 pgdat = NODE_DATA(nid); 495 pgdat = NODE_DATA(nid);
496 496
497 /* We only have ZONE_NORMAL, so this is easy.. */ 497 /* We only have ZONE_NORMAL, so this is easy.. */
498 ret = __add_pages(nid, pgdat->node_zones + ZONE_NORMAL, 498 ret = __add_pages(nid, pgdat->node_zones +
499 start_pfn, nr_pages); 499 zone_for_memory(nid, start, size, ZONE_NORMAL),
500 start_pfn, nr_pages);
500 if (unlikely(ret)) 501 if (unlikely(ret))
501 printk("%s: Failed, __add_pages() == %d\n", __func__, ret); 502 printk("%s: Failed, __add_pages() == %d\n", __func__, ret);
502 503
diff --git a/arch/tile/kernel/module.c b/arch/tile/kernel/module.c
index 4918d91bc3a6..d19b13e3a59f 100644
--- a/arch/tile/kernel/module.c
+++ b/arch/tile/kernel/module.c
@@ -58,7 +58,7 @@ void *module_alloc(unsigned long size)
58 area->nr_pages = npages; 58 area->nr_pages = npages;
59 area->pages = pages; 59 area->pages = pages;
60 60
61 if (map_vm_area(area, prot_rwx, &pages)) { 61 if (map_vm_area(area, prot_rwx, pages)) {
62 vunmap(area->addr); 62 vunmap(area->addr);
63 goto error; 63 goto error;
64 } 64 }
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 1dbade870f90..a24194681513 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1218,7 +1218,8 @@ good_area:
1218 /* 1218 /*
1219 * If for any reason at all we couldn't handle the fault, 1219 * If for any reason at all we couldn't handle the fault,
1220 * make sure we exit gracefully rather than endlessly redo 1220 * make sure we exit gracefully rather than endlessly redo
1221 * the fault: 1221 * the fault. Since we never set FAULT_FLAG_RETRY_NOWAIT, if
1222 * we get VM_FAULT_RETRY back, the mmap_sem has been unlocked.
1222 */ 1223 */
1223 fault = handle_mm_fault(mm, vma, address, flags); 1224 fault = handle_mm_fault(mm, vma, address, flags);
1224 1225
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index e39504878aec..7d05565ba781 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -825,7 +825,8 @@ void __init mem_init(void)
825int arch_add_memory(int nid, u64 start, u64 size) 825int arch_add_memory(int nid, u64 start, u64 size)
826{ 826{
827 struct pglist_data *pgdata = NODE_DATA(nid); 827 struct pglist_data *pgdata = NODE_DATA(nid);
828 struct zone *zone = pgdata->node_zones + ZONE_HIGHMEM; 828 struct zone *zone = pgdata->node_zones +
829 zone_for_memory(nid, start, size, ZONE_HIGHMEM);
829 unsigned long start_pfn = start >> PAGE_SHIFT; 830 unsigned long start_pfn = start >> PAGE_SHIFT;
830 unsigned long nr_pages = size >> PAGE_SHIFT; 831 unsigned long nr_pages = size >> PAGE_SHIFT;
831 832
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index df1a9927ad29..5621c47d7a1a 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -691,7 +691,8 @@ static void update_end_of_memory_vars(u64 start, u64 size)
691int arch_add_memory(int nid, u64 start, u64 size) 691int arch_add_memory(int nid, u64 start, u64 size)
692{ 692{
693 struct pglist_data *pgdat = NODE_DATA(nid); 693 struct pglist_data *pgdat = NODE_DATA(nid);
694 struct zone *zone = pgdat->node_zones + ZONE_NORMAL; 694 struct zone *zone = pgdat->node_zones +
695 zone_for_memory(nid, start, size, ZONE_NORMAL);
695 unsigned long start_pfn = start >> PAGE_SHIFT; 696 unsigned long start_pfn = start >> PAGE_SHIFT;
696 unsigned long nr_pages = size >> PAGE_SHIFT; 697 unsigned long nr_pages = size >> PAGE_SHIFT;
697 int ret; 698 int ret;
diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig
index e65d400efd44..e1b92788c225 100644
--- a/drivers/ata/Kconfig
+++ b/drivers/ata/Kconfig
@@ -16,6 +16,7 @@ menuconfig ATA
16 depends on BLOCK 16 depends on BLOCK
17 depends on !(M32R || M68K || S390) || BROKEN 17 depends on !(M32R || M68K || S390) || BROKEN
18 select SCSI 18 select SCSI
19 select GLOB
19 ---help--- 20 ---help---
20 If you want to use an ATA hard disk, ATA tape drive, ATA CD-ROM or 21 If you want to use an ATA hard disk, ATA tape drive, ATA CD-ROM or
21 any other ATA device under Linux, say Y and make sure that you know 22 any other ATA device under Linux, say Y and make sure that you know
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 677c0c1b03bd..dbdc5d32343f 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -59,6 +59,7 @@
59#include <linux/async.h> 59#include <linux/async.h>
60#include <linux/log2.h> 60#include <linux/log2.h>
61#include <linux/slab.h> 61#include <linux/slab.h>
62#include <linux/glob.h>
62#include <scsi/scsi.h> 63#include <scsi/scsi.h>
63#include <scsi/scsi_cmnd.h> 64#include <scsi/scsi_cmnd.h>
64#include <scsi/scsi_host.h> 65#include <scsi/scsi_host.h>
@@ -4250,73 +4251,6 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
4250 { } 4251 { }
4251}; 4252};
4252 4253
4253/**
4254 * glob_match - match a text string against a glob-style pattern
4255 * @text: the string to be examined
4256 * @pattern: the glob-style pattern to be matched against
4257 *
4258 * Either/both of text and pattern can be empty strings.
4259 *
4260 * Match text against a glob-style pattern, with wildcards and simple sets:
4261 *
4262 * ? matches any single character.
4263 * * matches any run of characters.
4264 * [xyz] matches a single character from the set: x, y, or z.
4265 * [a-d] matches a single character from the range: a, b, c, or d.
4266 * [a-d0-9] matches a single character from either range.
4267 *
4268 * The special characters ?, [, -, or *, can be matched using a set, eg. [*]
4269 * Behaviour with malformed patterns is undefined, though generally reasonable.
4270 *
4271 * Sample patterns: "SD1?", "SD1[0-5]", "*R0", "SD*1?[012]*xx"
4272 *
4273 * This function uses one level of recursion per '*' in pattern.
4274 * Since it calls _nothing_ else, and has _no_ explicit local variables,
4275 * this will not cause stack problems for any reasonable use here.
4276 *
4277 * RETURNS:
4278 * 0 on match, 1 otherwise.
4279 */
4280static int glob_match (const char *text, const char *pattern)
4281{
4282 do {
4283 /* Match single character or a '?' wildcard */
4284 if (*text == *pattern || *pattern == '?') {
4285 if (!*pattern++)
4286 return 0; /* End of both strings: match */
4287 } else {
4288 /* Match single char against a '[' bracketed ']' pattern set */
4289 if (!*text || *pattern != '[')
4290 break; /* Not a pattern set */
4291 while (*++pattern && *pattern != ']' && *text != *pattern) {
4292 if (*pattern == '-' && *(pattern - 1) != '[')
4293 if (*text > *(pattern - 1) && *text < *(pattern + 1)) {
4294 ++pattern;
4295 break;
4296 }
4297 }
4298 if (!*pattern || *pattern == ']')
4299 return 1; /* No match */
4300 while (*pattern && *pattern++ != ']');
4301 }
4302 } while (*++text && *pattern);
4303
4304 /* Match any run of chars against a '*' wildcard */
4305 if (*pattern == '*') {
4306 if (!*++pattern)
4307 return 0; /* Match: avoid recursion at end of pattern */
4308 /* Loop to handle additional pattern chars after the wildcard */
4309 while (*text) {
4310 if (glob_match(text, pattern) == 0)
4311 return 0; /* Remainder matched */
4312 ++text; /* Absorb (match) this char and try again */
4313 }
4314 }
4315 if (!*text && !*pattern)
4316 return 0; /* End of both strings: match */
4317 return 1; /* No match */
4318}
4319
4320static unsigned long ata_dev_blacklisted(const struct ata_device *dev) 4254static unsigned long ata_dev_blacklisted(const struct ata_device *dev)
4321{ 4255{
4322 unsigned char model_num[ATA_ID_PROD_LEN + 1]; 4256 unsigned char model_num[ATA_ID_PROD_LEN + 1];
@@ -4327,10 +4261,10 @@ static unsigned long ata_dev_blacklisted(const struct ata_device *dev)
4327 ata_id_c_string(dev->id, model_rev, ATA_ID_FW_REV, sizeof(model_rev)); 4261 ata_id_c_string(dev->id, model_rev, ATA_ID_FW_REV, sizeof(model_rev));
4328 4262
4329 while (ad->model_num) { 4263 while (ad->model_num) {
4330 if (!glob_match(model_num, ad->model_num)) { 4264 if (glob_match(model_num, ad->model_num)) {
4331 if (ad->model_rev == NULL) 4265 if (ad->model_rev == NULL)
4332 return ad->horkage; 4266 return ad->horkage;
4333 if (!glob_match(model_rev, ad->model_rev)) 4267 if (glob_match(model_rev, ad->model_rev))
4334 return ad->horkage; 4268 return ad->horkage;
4335 } 4269 }
4336 ad++; 4270 ad++;
diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig
index 88500fed3c7a..4e7f0ff83ae7 100644
--- a/drivers/base/Kconfig
+++ b/drivers/base/Kconfig
@@ -289,16 +289,6 @@ config CMA_ALIGNMENT
289 289
290 If unsure, leave the default value "8". 290 If unsure, leave the default value "8".
291 291
292config CMA_AREAS
293 int "Maximum count of the CMA device-private areas"
294 default 7
295 help
296 CMA allows to create CMA areas for particular devices. This parameter
297 sets the maximum number of such device private CMA areas in the
298 system.
299
300 If unsure, leave the default value "7".
301
302endif 292endif
303 293
304endmenu 294endmenu
diff --git a/drivers/base/dma-contiguous.c b/drivers/base/dma-contiguous.c
index 6467c919c509..6606abdf880c 100644
--- a/drivers/base/dma-contiguous.c
+++ b/drivers/base/dma-contiguous.c
@@ -24,23 +24,9 @@
24 24
25#include <linux/memblock.h> 25#include <linux/memblock.h>
26#include <linux/err.h> 26#include <linux/err.h>
27#include <linux/mm.h>
28#include <linux/mutex.h>
29#include <linux/page-isolation.h>
30#include <linux/sizes.h> 27#include <linux/sizes.h>
31#include <linux/slab.h>
32#include <linux/swap.h>
33#include <linux/mm_types.h>
34#include <linux/dma-contiguous.h> 28#include <linux/dma-contiguous.h>
35 29#include <linux/cma.h>
36struct cma {
37 unsigned long base_pfn;
38 unsigned long count;
39 unsigned long *bitmap;
40 struct mutex lock;
41};
42
43struct cma *dma_contiguous_default_area;
44 30
45#ifdef CONFIG_CMA_SIZE_MBYTES 31#ifdef CONFIG_CMA_SIZE_MBYTES
46#define CMA_SIZE_MBYTES CONFIG_CMA_SIZE_MBYTES 32#define CMA_SIZE_MBYTES CONFIG_CMA_SIZE_MBYTES
@@ -48,6 +34,8 @@ struct cma *dma_contiguous_default_area;
48#define CMA_SIZE_MBYTES 0 34#define CMA_SIZE_MBYTES 0
49#endif 35#endif
50 36
37struct cma *dma_contiguous_default_area;
38
51/* 39/*
52 * Default global CMA area size can be defined in kernel's .config. 40 * Default global CMA area size can be defined in kernel's .config.
53 * This is useful mainly for distro maintainers to create a kernel 41 * This is useful mainly for distro maintainers to create a kernel
@@ -154,65 +142,6 @@ void __init dma_contiguous_reserve(phys_addr_t limit)
154 } 142 }
155} 143}
156 144
157static DEFINE_MUTEX(cma_mutex);
158
159static int __init cma_activate_area(struct cma *cma)
160{
161 int bitmap_size = BITS_TO_LONGS(cma->count) * sizeof(long);
162 unsigned long base_pfn = cma->base_pfn, pfn = base_pfn;
163 unsigned i = cma->count >> pageblock_order;
164 struct zone *zone;
165
166 cma->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
167
168 if (!cma->bitmap)
169 return -ENOMEM;
170
171 WARN_ON_ONCE(!pfn_valid(pfn));
172 zone = page_zone(pfn_to_page(pfn));
173
174 do {
175 unsigned j;
176 base_pfn = pfn;
177 for (j = pageblock_nr_pages; j; --j, pfn++) {
178 WARN_ON_ONCE(!pfn_valid(pfn));
179 /*
180 * alloc_contig_range requires the pfn range
181 * specified to be in the same zone. Make this
182 * simple by forcing the entire CMA resv range
183 * to be in the same zone.
184 */
185 if (page_zone(pfn_to_page(pfn)) != zone)
186 goto err;
187 }
188 init_cma_reserved_pageblock(pfn_to_page(base_pfn));
189 } while (--i);
190
191 mutex_init(&cma->lock);
192 return 0;
193
194err:
195 kfree(cma->bitmap);
196 return -EINVAL;
197}
198
199static struct cma cma_areas[MAX_CMA_AREAS];
200static unsigned cma_area_count;
201
202static int __init cma_init_reserved_areas(void)
203{
204 int i;
205
206 for (i = 0; i < cma_area_count; i++) {
207 int ret = cma_activate_area(&cma_areas[i]);
208 if (ret)
209 return ret;
210 }
211
212 return 0;
213}
214core_initcall(cma_init_reserved_areas);
215
216/** 145/**
217 * dma_contiguous_reserve_area() - reserve custom contiguous area 146 * dma_contiguous_reserve_area() - reserve custom contiguous area
218 * @size: Size of the reserved area (in bytes), 147 * @size: Size of the reserved area (in bytes),
@@ -234,72 +163,17 @@ int __init dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base,
234 phys_addr_t limit, struct cma **res_cma, 163 phys_addr_t limit, struct cma **res_cma,
235 bool fixed) 164 bool fixed)
236{ 165{
237 struct cma *cma = &cma_areas[cma_area_count]; 166 int ret;
238 phys_addr_t alignment;
239 int ret = 0;
240
241 pr_debug("%s(size %lx, base %08lx, limit %08lx)\n", __func__,
242 (unsigned long)size, (unsigned long)base,
243 (unsigned long)limit);
244
245 /* Sanity checks */
246 if (cma_area_count == ARRAY_SIZE(cma_areas)) {
247 pr_err("Not enough slots for CMA reserved regions!\n");
248 return -ENOSPC;
249 }
250
251 if (!size)
252 return -EINVAL;
253
254 /* Sanitise input arguments */
255 alignment = PAGE_SIZE << max(MAX_ORDER - 1, pageblock_order);
256 base = ALIGN(base, alignment);
257 size = ALIGN(size, alignment);
258 limit &= ~(alignment - 1);
259
260 /* Reserve memory */
261 if (base && fixed) {
262 if (memblock_is_region_reserved(base, size) ||
263 memblock_reserve(base, size) < 0) {
264 ret = -EBUSY;
265 goto err;
266 }
267 } else {
268 phys_addr_t addr = memblock_alloc_range(size, alignment, base,
269 limit);
270 if (!addr) {
271 ret = -ENOMEM;
272 goto err;
273 } else {
274 base = addr;
275 }
276 }
277
278 /*
279 * Each reserved area must be initialised later, when more kernel
280 * subsystems (like slab allocator) are available.
281 */
282 cma->base_pfn = PFN_DOWN(base);
283 cma->count = size >> PAGE_SHIFT;
284 *res_cma = cma;
285 cma_area_count++;
286 167
287 pr_info("CMA: reserved %ld MiB at %08lx\n", (unsigned long)size / SZ_1M, 168 ret = cma_declare_contiguous(base, size, limit, 0, 0, fixed, res_cma);
288 (unsigned long)base); 169 if (ret)
170 return ret;
289 171
290 /* Architecture specific contiguous memory fixup. */ 172 /* Architecture specific contiguous memory fixup. */
291 dma_contiguous_early_fixup(base, size); 173 dma_contiguous_early_fixup(cma_get_base(*res_cma),
292 return 0; 174 cma_get_size(*res_cma));
293err:
294 pr_err("CMA: failed to reserve %ld MiB\n", (unsigned long)size / SZ_1M);
295 return ret;
296}
297 175
298static void clear_cma_bitmap(struct cma *cma, unsigned long pfn, int count) 176 return 0;
299{
300 mutex_lock(&cma->lock);
301 bitmap_clear(cma->bitmap, pfn - cma->base_pfn, count);
302 mutex_unlock(&cma->lock);
303} 177}
304 178
305/** 179/**
@@ -316,62 +190,10 @@ static void clear_cma_bitmap(struct cma *cma, unsigned long pfn, int count)
316struct page *dma_alloc_from_contiguous(struct device *dev, int count, 190struct page *dma_alloc_from_contiguous(struct device *dev, int count,
317 unsigned int align) 191 unsigned int align)
318{ 192{
319 unsigned long mask, pfn, pageno, start = 0;
320 struct cma *cma = dev_get_cma_area(dev);
321 struct page *page = NULL;
322 int ret;
323
324 if (!cma || !cma->count)
325 return NULL;
326
327 if (align > CONFIG_CMA_ALIGNMENT) 193 if (align > CONFIG_CMA_ALIGNMENT)
328 align = CONFIG_CMA_ALIGNMENT; 194 align = CONFIG_CMA_ALIGNMENT;
329 195
330 pr_debug("%s(cma %p, count %d, align %d)\n", __func__, (void *)cma, 196 return cma_alloc(dev_get_cma_area(dev), count, align);
331 count, align);
332
333 if (!count)
334 return NULL;
335
336 mask = (1 << align) - 1;
337
338
339 for (;;) {
340 mutex_lock(&cma->lock);
341 pageno = bitmap_find_next_zero_area(cma->bitmap, cma->count,
342 start, count, mask);
343 if (pageno >= cma->count) {
344 mutex_unlock(&cma->lock);
345 break;
346 }
347 bitmap_set(cma->bitmap, pageno, count);
348 /*
349 * It's safe to drop the lock here. We've marked this region for
350 * our exclusive use. If the migration fails we will take the
351 * lock again and unmark it.
352 */
353 mutex_unlock(&cma->lock);
354
355 pfn = cma->base_pfn + pageno;
356 mutex_lock(&cma_mutex);
357 ret = alloc_contig_range(pfn, pfn + count, MIGRATE_CMA);
358 mutex_unlock(&cma_mutex);
359 if (ret == 0) {
360 page = pfn_to_page(pfn);
361 break;
362 } else if (ret != -EBUSY) {
363 clear_cma_bitmap(cma, pfn, count);
364 break;
365 }
366 clear_cma_bitmap(cma, pfn, count);
367 pr_debug("%s(): memory range at %p is busy, retrying\n",
368 __func__, pfn_to_page(pfn));
369 /* try again with a bit different memory target */
370 start = pageno + mask + 1;
371 }
372
373 pr_debug("%s(): returned %p\n", __func__, page);
374 return page;
375} 197}
376 198
377/** 199/**
@@ -387,23 +209,5 @@ struct page *dma_alloc_from_contiguous(struct device *dev, int count,
387bool dma_release_from_contiguous(struct device *dev, struct page *pages, 209bool dma_release_from_contiguous(struct device *dev, struct page *pages,
388 int count) 210 int count)
389{ 211{
390 struct cma *cma = dev_get_cma_area(dev); 212 return cma_release(dev_get_cma_area(dev), pages, count);
391 unsigned long pfn;
392
393 if (!cma || !pages)
394 return false;
395
396 pr_debug("%s(page %p)\n", __func__, (void *)pages);
397
398 pfn = page_to_pfn(pages);
399
400 if (pfn < cma->base_pfn || pfn >= cma->base_pfn + cma->count)
401 return false;
402
403 VM_BUG_ON(pfn + count > cma->base_pfn + cma->count);
404
405 free_contig_range(pfn, count);
406 clear_cma_bitmap(cma, pfn, count);
407
408 return true;
409} 213}
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 89f752dd8465..a2e13e250bba 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -284,7 +284,7 @@ static int memory_subsys_online(struct device *dev)
284 * attribute and need to set the online_type. 284 * attribute and need to set the online_type.
285 */ 285 */
286 if (mem->online_type < 0) 286 if (mem->online_type < 0)
287 mem->online_type = ONLINE_KEEP; 287 mem->online_type = MMOP_ONLINE_KEEP;
288 288
289 ret = memory_block_change_state(mem, MEM_ONLINE, MEM_OFFLINE); 289 ret = memory_block_change_state(mem, MEM_ONLINE, MEM_OFFLINE);
290 290
@@ -315,23 +315,23 @@ store_mem_state(struct device *dev,
315 if (ret) 315 if (ret)
316 return ret; 316 return ret;
317 317
318 if (!strncmp(buf, "online_kernel", min_t(int, count, 13))) 318 if (sysfs_streq(buf, "online_kernel"))
319 online_type = ONLINE_KERNEL; 319 online_type = MMOP_ONLINE_KERNEL;
320 else if (!strncmp(buf, "online_movable", min_t(int, count, 14))) 320 else if (sysfs_streq(buf, "online_movable"))
321 online_type = ONLINE_MOVABLE; 321 online_type = MMOP_ONLINE_MOVABLE;
322 else if (!strncmp(buf, "online", min_t(int, count, 6))) 322 else if (sysfs_streq(buf, "online"))
323 online_type = ONLINE_KEEP; 323 online_type = MMOP_ONLINE_KEEP;
324 else if (!strncmp(buf, "offline", min_t(int, count, 7))) 324 else if (sysfs_streq(buf, "offline"))
325 online_type = -1; 325 online_type = MMOP_OFFLINE;
326 else { 326 else {
327 ret = -EINVAL; 327 ret = -EINVAL;
328 goto err; 328 goto err;
329 } 329 }
330 330
331 switch (online_type) { 331 switch (online_type) {
332 case ONLINE_KERNEL: 332 case MMOP_ONLINE_KERNEL:
333 case ONLINE_MOVABLE: 333 case MMOP_ONLINE_MOVABLE:
334 case ONLINE_KEEP: 334 case MMOP_ONLINE_KEEP:
335 /* 335 /*
336 * mem->online_type is not protected so there can be a 336 * mem->online_type is not protected so there can be a
337 * race here. However, when racing online, the first 337 * race here. However, when racing online, the first
@@ -342,7 +342,7 @@ store_mem_state(struct device *dev,
342 mem->online_type = online_type; 342 mem->online_type = online_type;
343 ret = device_online(&mem->dev); 343 ret = device_online(&mem->dev);
344 break; 344 break;
345 case -1: 345 case MMOP_OFFLINE:
346 ret = device_offline(&mem->dev); 346 ret = device_offline(&mem->dev);
347 break; 347 break;
348 default: 348 default:
@@ -406,7 +406,9 @@ memory_probe_store(struct device *dev, struct device_attribute *attr,
406 int i, ret; 406 int i, ret;
407 unsigned long pages_per_block = PAGES_PER_SECTION * sections_per_block; 407 unsigned long pages_per_block = PAGES_PER_SECTION * sections_per_block;
408 408
409 phys_addr = simple_strtoull(buf, NULL, 0); 409 ret = kstrtoull(buf, 0, &phys_addr);
410 if (ret)
411 return ret;
410 412
411 if (phys_addr & ((pages_per_block << PAGE_SHIFT) - 1)) 413 if (phys_addr & ((pages_per_block << PAGE_SHIFT) - 1))
412 return -EINVAL; 414 return -EINVAL;
diff --git a/drivers/base/node.c b/drivers/base/node.c
index 8f7ed9933a7c..c6d3ae05f1ca 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -126,7 +126,7 @@ static ssize_t node_read_meminfo(struct device *dev,
126 nid, K(node_page_state(nid, NR_FILE_PAGES)), 126 nid, K(node_page_state(nid, NR_FILE_PAGES)),
127 nid, K(node_page_state(nid, NR_FILE_MAPPED)), 127 nid, K(node_page_state(nid, NR_FILE_MAPPED)),
128 nid, K(node_page_state(nid, NR_ANON_PAGES)), 128 nid, K(node_page_state(nid, NR_ANON_PAGES)),
129 nid, K(node_page_state(nid, NR_SHMEM)), 129 nid, K(i.sharedram),
130 nid, node_page_state(nid, NR_KERNEL_STACK) * 130 nid, node_page_state(nid, NR_KERNEL_STACK) *
131 THREAD_SIZE / 1024, 131 THREAD_SIZE / 1024,
132 nid, K(node_page_state(nid, NR_PAGETABLE)), 132 nid, K(node_page_state(nid, NR_PAGETABLE)),
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 36e54be402df..dfa4024c448a 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -183,19 +183,32 @@ static ssize_t comp_algorithm_store(struct device *dev,
183static int zram_test_flag(struct zram_meta *meta, u32 index, 183static int zram_test_flag(struct zram_meta *meta, u32 index,
184 enum zram_pageflags flag) 184 enum zram_pageflags flag)
185{ 185{
186 return meta->table[index].flags & BIT(flag); 186 return meta->table[index].value & BIT(flag);
187} 187}
188 188
189static void zram_set_flag(struct zram_meta *meta, u32 index, 189static void zram_set_flag(struct zram_meta *meta, u32 index,
190 enum zram_pageflags flag) 190 enum zram_pageflags flag)
191{ 191{
192 meta->table[index].flags |= BIT(flag); 192 meta->table[index].value |= BIT(flag);
193} 193}
194 194
195static void zram_clear_flag(struct zram_meta *meta, u32 index, 195static void zram_clear_flag(struct zram_meta *meta, u32 index,
196 enum zram_pageflags flag) 196 enum zram_pageflags flag)
197{ 197{
198 meta->table[index].flags &= ~BIT(flag); 198 meta->table[index].value &= ~BIT(flag);
199}
200
201static size_t zram_get_obj_size(struct zram_meta *meta, u32 index)
202{
203 return meta->table[index].value & (BIT(ZRAM_FLAG_SHIFT) - 1);
204}
205
206static void zram_set_obj_size(struct zram_meta *meta,
207 u32 index, size_t size)
208{
209 unsigned long flags = meta->table[index].value >> ZRAM_FLAG_SHIFT;
210
211 meta->table[index].value = (flags << ZRAM_FLAG_SHIFT) | size;
199} 212}
200 213
201static inline int is_partial_io(struct bio_vec *bvec) 214static inline int is_partial_io(struct bio_vec *bvec)
@@ -255,7 +268,6 @@ static struct zram_meta *zram_meta_alloc(u64 disksize)
255 goto free_table; 268 goto free_table;
256 } 269 }
257 270
258 rwlock_init(&meta->tb_lock);
259 return meta; 271 return meta;
260 272
261free_table: 273free_table:
@@ -304,7 +316,12 @@ static void handle_zero_page(struct bio_vec *bvec)
304 flush_dcache_page(page); 316 flush_dcache_page(page);
305} 317}
306 318
307/* NOTE: caller should hold meta->tb_lock with write-side */ 319
320/*
321 * To protect concurrent access to the same index entry,
322 * caller should hold this table index entry's bit_spinlock to
323 * indicate this index entry is accessing.
324 */
308static void zram_free_page(struct zram *zram, size_t index) 325static void zram_free_page(struct zram *zram, size_t index)
309{ 326{
310 struct zram_meta *meta = zram->meta; 327 struct zram_meta *meta = zram->meta;
@@ -324,11 +341,12 @@ static void zram_free_page(struct zram *zram, size_t index)
324 341
325 zs_free(meta->mem_pool, handle); 342 zs_free(meta->mem_pool, handle);
326 343
327 atomic64_sub(meta->table[index].size, &zram->stats.compr_data_size); 344 atomic64_sub(zram_get_obj_size(meta, index),
345 &zram->stats.compr_data_size);
328 atomic64_dec(&zram->stats.pages_stored); 346 atomic64_dec(&zram->stats.pages_stored);
329 347
330 meta->table[index].handle = 0; 348 meta->table[index].handle = 0;
331 meta->table[index].size = 0; 349 zram_set_obj_size(meta, index, 0);
332} 350}
333 351
334static int zram_decompress_page(struct zram *zram, char *mem, u32 index) 352static int zram_decompress_page(struct zram *zram, char *mem, u32 index)
@@ -337,14 +355,14 @@ static int zram_decompress_page(struct zram *zram, char *mem, u32 index)
337 unsigned char *cmem; 355 unsigned char *cmem;
338 struct zram_meta *meta = zram->meta; 356 struct zram_meta *meta = zram->meta;
339 unsigned long handle; 357 unsigned long handle;
340 u16 size; 358 size_t size;
341 359
342 read_lock(&meta->tb_lock); 360 bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
343 handle = meta->table[index].handle; 361 handle = meta->table[index].handle;
344 size = meta->table[index].size; 362 size = zram_get_obj_size(meta, index);
345 363
346 if (!handle || zram_test_flag(meta, index, ZRAM_ZERO)) { 364 if (!handle || zram_test_flag(meta, index, ZRAM_ZERO)) {
347 read_unlock(&meta->tb_lock); 365 bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
348 clear_page(mem); 366 clear_page(mem);
349 return 0; 367 return 0;
350 } 368 }
@@ -355,7 +373,7 @@ static int zram_decompress_page(struct zram *zram, char *mem, u32 index)
355 else 373 else
356 ret = zcomp_decompress(zram->comp, cmem, size, mem); 374 ret = zcomp_decompress(zram->comp, cmem, size, mem);
357 zs_unmap_object(meta->mem_pool, handle); 375 zs_unmap_object(meta->mem_pool, handle);
358 read_unlock(&meta->tb_lock); 376 bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
359 377
360 /* Should NEVER happen. Return bio error if it does. */ 378 /* Should NEVER happen. Return bio error if it does. */
361 if (unlikely(ret)) { 379 if (unlikely(ret)) {
@@ -376,14 +394,14 @@ static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
376 struct zram_meta *meta = zram->meta; 394 struct zram_meta *meta = zram->meta;
377 page = bvec->bv_page; 395 page = bvec->bv_page;
378 396
379 read_lock(&meta->tb_lock); 397 bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
380 if (unlikely(!meta->table[index].handle) || 398 if (unlikely(!meta->table[index].handle) ||
381 zram_test_flag(meta, index, ZRAM_ZERO)) { 399 zram_test_flag(meta, index, ZRAM_ZERO)) {
382 read_unlock(&meta->tb_lock); 400 bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
383 handle_zero_page(bvec); 401 handle_zero_page(bvec);
384 return 0; 402 return 0;
385 } 403 }
386 read_unlock(&meta->tb_lock); 404 bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
387 405
388 if (is_partial_io(bvec)) 406 if (is_partial_io(bvec))
389 /* Use a temporary buffer to decompress the page */ 407 /* Use a temporary buffer to decompress the page */
@@ -461,10 +479,10 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
461 if (page_zero_filled(uncmem)) { 479 if (page_zero_filled(uncmem)) {
462 kunmap_atomic(user_mem); 480 kunmap_atomic(user_mem);
463 /* Free memory associated with this sector now. */ 481 /* Free memory associated with this sector now. */
464 write_lock(&zram->meta->tb_lock); 482 bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
465 zram_free_page(zram, index); 483 zram_free_page(zram, index);
466 zram_set_flag(meta, index, ZRAM_ZERO); 484 zram_set_flag(meta, index, ZRAM_ZERO);
467 write_unlock(&zram->meta->tb_lock); 485 bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
468 486
469 atomic64_inc(&zram->stats.zero_pages); 487 atomic64_inc(&zram->stats.zero_pages);
470 ret = 0; 488 ret = 0;
@@ -514,12 +532,12 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
514 * Free memory associated with this sector 532 * Free memory associated with this sector
515 * before overwriting unused sectors. 533 * before overwriting unused sectors.
516 */ 534 */
517 write_lock(&zram->meta->tb_lock); 535 bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
518 zram_free_page(zram, index); 536 zram_free_page(zram, index);
519 537
520 meta->table[index].handle = handle; 538 meta->table[index].handle = handle;
521 meta->table[index].size = clen; 539 zram_set_obj_size(meta, index, clen);
522 write_unlock(&zram->meta->tb_lock); 540 bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
523 541
524 /* Update stats */ 542 /* Update stats */
525 atomic64_add(clen, &zram->stats.compr_data_size); 543 atomic64_add(clen, &zram->stats.compr_data_size);
@@ -560,6 +578,7 @@ static void zram_bio_discard(struct zram *zram, u32 index,
560 int offset, struct bio *bio) 578 int offset, struct bio *bio)
561{ 579{
562 size_t n = bio->bi_iter.bi_size; 580 size_t n = bio->bi_iter.bi_size;
581 struct zram_meta *meta = zram->meta;
563 582
564 /* 583 /*
565 * zram manages data in physical block size units. Because logical block 584 * zram manages data in physical block size units. Because logical block
@@ -580,13 +599,9 @@ static void zram_bio_discard(struct zram *zram, u32 index,
580 } 599 }
581 600
582 while (n >= PAGE_SIZE) { 601 while (n >= PAGE_SIZE) {
583 /* 602 bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
584 * Discard request can be large so the lock hold times could be
585 * lengthy. So take the lock once per page.
586 */
587 write_lock(&zram->meta->tb_lock);
588 zram_free_page(zram, index); 603 zram_free_page(zram, index);
589 write_unlock(&zram->meta->tb_lock); 604 bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
590 index++; 605 index++;
591 n -= PAGE_SIZE; 606 n -= PAGE_SIZE;
592 } 607 }
@@ -821,9 +836,9 @@ static void zram_slot_free_notify(struct block_device *bdev,
821 zram = bdev->bd_disk->private_data; 836 zram = bdev->bd_disk->private_data;
822 meta = zram->meta; 837 meta = zram->meta;
823 838
824 write_lock(&meta->tb_lock); 839 bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
825 zram_free_page(zram, index); 840 zram_free_page(zram, index);
826 write_unlock(&meta->tb_lock); 841 bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
827 atomic64_inc(&zram->stats.notify_free); 842 atomic64_inc(&zram->stats.notify_free);
828} 843}
829 844
diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h
index 7f21c145e317..5b0afde729cd 100644
--- a/drivers/block/zram/zram_drv.h
+++ b/drivers/block/zram/zram_drv.h
@@ -43,7 +43,6 @@ static const size_t max_zpage_size = PAGE_SIZE / 4 * 3;
43/*-- End of configurable params */ 43/*-- End of configurable params */
44 44
45#define SECTOR_SHIFT 9 45#define SECTOR_SHIFT 9
46#define SECTOR_SIZE (1 << SECTOR_SHIFT)
47#define SECTORS_PER_PAGE_SHIFT (PAGE_SHIFT - SECTOR_SHIFT) 46#define SECTORS_PER_PAGE_SHIFT (PAGE_SHIFT - SECTOR_SHIFT)
48#define SECTORS_PER_PAGE (1 << SECTORS_PER_PAGE_SHIFT) 47#define SECTORS_PER_PAGE (1 << SECTORS_PER_PAGE_SHIFT)
49#define ZRAM_LOGICAL_BLOCK_SHIFT 12 48#define ZRAM_LOGICAL_BLOCK_SHIFT 12
@@ -51,10 +50,24 @@ static const size_t max_zpage_size = PAGE_SIZE / 4 * 3;
51#define ZRAM_SECTOR_PER_LOGICAL_BLOCK \ 50#define ZRAM_SECTOR_PER_LOGICAL_BLOCK \
52 (1 << (ZRAM_LOGICAL_BLOCK_SHIFT - SECTOR_SHIFT)) 51 (1 << (ZRAM_LOGICAL_BLOCK_SHIFT - SECTOR_SHIFT))
53 52
54/* Flags for zram pages (table[page_no].flags) */ 53
54/*
55 * The lower ZRAM_FLAG_SHIFT bits of table.value is for
56 * object size (excluding header), the higher bits is for
57 * zram_pageflags.
58 *
59 * zram is mainly used for memory efficiency so we want to keep memory
60 * footprint small so we can squeeze size and flags into a field.
61 * The lower ZRAM_FLAG_SHIFT bits is for object size (excluding header),
62 * the higher bits is for zram_pageflags.
63 */
64#define ZRAM_FLAG_SHIFT 24
65
66/* Flags for zram pages (table[page_no].value) */
55enum zram_pageflags { 67enum zram_pageflags {
56 /* Page consists entirely of zeros */ 68 /* Page consists entirely of zeros */
57 ZRAM_ZERO, 69 ZRAM_ZERO = ZRAM_FLAG_SHIFT + 1,
70 ZRAM_ACCESS, /* page in now accessed */
58 71
59 __NR_ZRAM_PAGEFLAGS, 72 __NR_ZRAM_PAGEFLAGS,
60}; 73};
@@ -62,11 +75,10 @@ enum zram_pageflags {
62/*-- Data structures */ 75/*-- Data structures */
63 76
64/* Allocated for each disk page */ 77/* Allocated for each disk page */
65struct table { 78struct zram_table_entry {
66 unsigned long handle; 79 unsigned long handle;
67 u16 size; /* object size (excluding header) */ 80 unsigned long value;
68 u8 flags; 81};
69} __aligned(4);
70 82
71struct zram_stats { 83struct zram_stats {
72 atomic64_t compr_data_size; /* compressed size of pages stored */ 84 atomic64_t compr_data_size; /* compressed size of pages stored */
@@ -81,8 +93,7 @@ struct zram_stats {
81}; 93};
82 94
83struct zram_meta { 95struct zram_meta {
84 rwlock_t tb_lock; /* protect table */ 96 struct zram_table_entry *table;
85 struct table *table;
86 struct zs_pool *mem_pool; 97 struct zs_pool *mem_pool;
87}; 98};
88 99
diff --git a/drivers/firmware/memmap.c b/drivers/firmware/memmap.c
index 17cf96c45f2b..79f18e6d9c4f 100644
--- a/drivers/firmware/memmap.c
+++ b/drivers/firmware/memmap.c
@@ -286,7 +286,11 @@ int __meminit firmware_map_add_hotplug(u64 start, u64 end, const char *type)
286{ 286{
287 struct firmware_map_entry *entry; 287 struct firmware_map_entry *entry;
288 288
289 entry = firmware_map_find_entry_bootmem(start, end, type); 289 entry = firmware_map_find_entry(start, end - 1, type);
290 if (entry)
291 return 0;
292
293 entry = firmware_map_find_entry_bootmem(start, end - 1, type);
290 if (!entry) { 294 if (!entry) {
291 entry = kzalloc(sizeof(struct firmware_map_entry), GFP_ATOMIC); 295 entry = kzalloc(sizeof(struct firmware_map_entry), GFP_ATOMIC);
292 if (!entry) 296 if (!entry)
diff --git a/drivers/gpu/drm/drm_hashtab.c b/drivers/gpu/drm/drm_hashtab.c
index 7e4bae760e27..c3b80fd65d62 100644
--- a/drivers/gpu/drm/drm_hashtab.c
+++ b/drivers/gpu/drm/drm_hashtab.c
@@ -125,7 +125,7 @@ int drm_ht_insert_item(struct drm_open_hash *ht, struct drm_hash_item *item)
125 parent = &entry->head; 125 parent = &entry->head;
126 } 126 }
127 if (parent) { 127 if (parent) {
128 hlist_add_after_rcu(parent, &item->head); 128 hlist_add_behind_rcu(&item->head, parent);
129 } else { 129 } else {
130 hlist_add_head_rcu(&item->head, h_list); 130 hlist_add_head_rcu(&item->head, h_list);
131 } 131 }
diff --git a/drivers/hwmon/asus_atk0110.c b/drivers/hwmon/asus_atk0110.c
index ae208f612198..cccef87963e0 100644
--- a/drivers/hwmon/asus_atk0110.c
+++ b/drivers/hwmon/asus_atk0110.c
@@ -688,7 +688,7 @@ static int atk_debugfs_gitm_get(void *p, u64 *val)
688DEFINE_SIMPLE_ATTRIBUTE(atk_debugfs_gitm, 688DEFINE_SIMPLE_ATTRIBUTE(atk_debugfs_gitm,
689 atk_debugfs_gitm_get, 689 atk_debugfs_gitm_get,
690 NULL, 690 NULL,
691 "0x%08llx\n") 691 "0x%08llx\n");
692 692
693static int atk_acpi_print(char *buf, size_t sz, union acpi_object *obj) 693static int atk_acpi_print(char *buf, size_t sz, union acpi_object *obj)
694{ 694{
diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c
index 0bf1e4edf04d..6590558d1d31 100644
--- a/drivers/lguest/core.c
+++ b/drivers/lguest/core.c
@@ -42,7 +42,6 @@ DEFINE_MUTEX(lguest_lock);
42static __init int map_switcher(void) 42static __init int map_switcher(void)
43{ 43{
44 int i, err; 44 int i, err;
45 struct page **pagep;
46 45
47 /* 46 /*
48 * Map the Switcher in to high memory. 47 * Map the Switcher in to high memory.
@@ -110,11 +109,9 @@ static __init int map_switcher(void)
110 * This code actually sets up the pages we've allocated to appear at 109 * This code actually sets up the pages we've allocated to appear at
111 * switcher_addr. map_vm_area() takes the vma we allocated above, the 110 * switcher_addr. map_vm_area() takes the vma we allocated above, the
112 * kind of pages we're mapping (kernel pages), and a pointer to our 111 * kind of pages we're mapping (kernel pages), and a pointer to our
113 * array of struct pages. It increments that pointer, but we don't 112 * array of struct pages.
114 * care.
115 */ 113 */
116 pagep = lg_switcher_pages; 114 err = map_vm_area(switcher_vma, PAGE_KERNEL_EXEC, lg_switcher_pages);
117 err = map_vm_area(switcher_vma, PAGE_KERNEL_EXEC, &pagep);
118 if (err) { 115 if (err) {
119 printk("lguest: map_vm_area failed: %i\n", err); 116 printk("lguest: map_vm_area failed: %i\n", err);
120 goto free_vma; 117 goto free_vma;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index 681a9e81ff51..e8ba7470700a 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -1948,7 +1948,7 @@ static int i40e_update_ethtool_fdir_entry(struct i40e_vsi *vsi,
1948 1948
1949 /* add filter to the list */ 1949 /* add filter to the list */
1950 if (parent) 1950 if (parent)
1951 hlist_add_after(&parent->fdir_node, &input->fdir_node); 1951 hlist_add_behind(&input->fdir_node, &parent->fdir_node);
1952 else 1952 else
1953 hlist_add_head(&input->fdir_node, 1953 hlist_add_head(&input->fdir_node,
1954 &pf->fdir_filter_list); 1954 &pf->fdir_filter_list);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
index 94a1c07efeb0..e4100b5737b6 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
@@ -2517,7 +2517,7 @@ static int ixgbe_update_ethtool_fdir_entry(struct ixgbe_adapter *adapter,
2517 2517
2518 /* add filter to the list */ 2518 /* add filter to the list */
2519 if (parent) 2519 if (parent)
2520 hlist_add_after(&parent->fdir_node, &input->fdir_node); 2520 hlist_add_behind(&input->fdir_node, &parent->fdir_node);
2521 else 2521 else
2522 hlist_add_head(&input->fdir_node, 2522 hlist_add_head(&input->fdir_node,
2523 &adapter->fdir_filter_list); 2523 &adapter->fdir_filter_list);
diff --git a/drivers/staging/android/binder.c b/drivers/staging/android/binder.c
index 02b0379ae550..4f34dc0095b5 100644
--- a/drivers/staging/android/binder.c
+++ b/drivers/staging/android/binder.c
@@ -585,7 +585,6 @@ static int binder_update_page_range(struct binder_proc *proc, int allocate,
585 585
586 for (page_addr = start; page_addr < end; page_addr += PAGE_SIZE) { 586 for (page_addr = start; page_addr < end; page_addr += PAGE_SIZE) {
587 int ret; 587 int ret;
588 struct page **page_array_ptr;
589 588
590 page = &proc->pages[(page_addr - proc->buffer) / PAGE_SIZE]; 589 page = &proc->pages[(page_addr - proc->buffer) / PAGE_SIZE];
591 590
@@ -598,8 +597,7 @@ static int binder_update_page_range(struct binder_proc *proc, int allocate,
598 } 597 }
599 tmp_area.addr = page_addr; 598 tmp_area.addr = page_addr;
600 tmp_area.size = PAGE_SIZE + PAGE_SIZE /* guard page? */; 599 tmp_area.size = PAGE_SIZE + PAGE_SIZE /* guard page? */;
601 page_array_ptr = page; 600 ret = map_vm_area(&tmp_area, PAGE_KERNEL, page);
602 ret = map_vm_area(&tmp_area, PAGE_KERNEL, &page_array_ptr);
603 if (ret) { 601 if (ret) {
604 pr_err("%d: binder_alloc_buf failed to map page at %p in kernel\n", 602 pr_err("%d: binder_alloc_buf failed to map page at %p in kernel\n",
605 proc->pid, page_addr); 603 proc->pid, page_addr);
diff --git a/drivers/staging/lustre/lustre/libcfs/hash.c b/drivers/staging/lustre/lustre/libcfs/hash.c
index 5dde79418297..8ef1deb59d4a 100644
--- a/drivers/staging/lustre/lustre/libcfs/hash.c
+++ b/drivers/staging/lustre/lustre/libcfs/hash.c
@@ -351,7 +351,7 @@ cfs_hash_dh_hnode_add(struct cfs_hash *hs, struct cfs_hash_bd *bd,
351 cfs_hash_dhead_t, dh_head); 351 cfs_hash_dhead_t, dh_head);
352 352
353 if (dh->dh_tail != NULL) /* not empty */ 353 if (dh->dh_tail != NULL) /* not empty */
354 hlist_add_after(dh->dh_tail, hnode); 354 hlist_add_behind(hnode, dh->dh_tail);
355 else /* empty list */ 355 else /* empty list */
356 hlist_add_head(hnode, &dh->dh_head); 356 hlist_add_head(hnode, &dh->dh_head);
357 dh->dh_tail = hnode; 357 dh->dh_tail = hnode;
@@ -406,7 +406,7 @@ cfs_hash_dd_hnode_add(struct cfs_hash *hs, struct cfs_hash_bd *bd,
406 cfs_hash_dhead_dep_t, dd_head); 406 cfs_hash_dhead_dep_t, dd_head);
407 407
408 if (dh->dd_tail != NULL) /* not empty */ 408 if (dh->dd_tail != NULL) /* not empty */
409 hlist_add_after(dh->dd_tail, hnode); 409 hlist_add_behind(hnode, dh->dd_tail);
410 else /* empty list */ 410 else /* empty list */
411 hlist_add_head(hnode, &dh->dd_head); 411 hlist_add_head(hnode, &dh->dd_head);
412 dh->dd_tail = hnode; 412 dh->dd_tail = hnode;
diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
index 454b65898e2c..42bad18c66c9 100644
--- a/drivers/tty/sysrq.c
+++ b/drivers/tty/sysrq.c
@@ -355,7 +355,7 @@ static struct sysrq_key_op sysrq_term_op = {
355 355
356static void moom_callback(struct work_struct *ignored) 356static void moom_callback(struct work_struct *ignored)
357{ 357{
358 out_of_memory(node_zonelist(first_online_node, GFP_KERNEL), GFP_KERNEL, 358 out_of_memory(node_zonelist(first_memory_node, GFP_KERNEL), GFP_KERNEL,
359 0, NULL, true); 359 0, NULL, true);
360} 360}
361 361
diff --git a/fs/fscache/main.c b/fs/fscache/main.c
index a31b83c5cbd9..b39d487ccfb0 100644
--- a/fs/fscache/main.c
+++ b/fs/fscache/main.c
@@ -67,7 +67,7 @@ static int fscache_max_active_sysctl(struct ctl_table *table, int write,
67 return ret; 67 return ret;
68} 68}
69 69
70struct ctl_table fscache_sysctls[] = { 70static struct ctl_table fscache_sysctls[] = {
71 { 71 {
72 .procname = "object_max_active", 72 .procname = "object_max_active",
73 .data = &fscache_object_max_active, 73 .data = &fscache_object_max_active,
@@ -87,7 +87,7 @@ struct ctl_table fscache_sysctls[] = {
87 {} 87 {}
88}; 88};
89 89
90struct ctl_table fscache_sysctls_root[] = { 90static struct ctl_table fscache_sysctls_root[] = {
91 { 91 {
92 .procname = "fscache", 92 .procname = "fscache",
93 .mode = 0555, 93 .mode = 0555,
diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c
index 48140315f627..380d86e1ab45 100644
--- a/fs/logfs/readwrite.c
+++ b/fs/logfs/readwrite.c
@@ -1019,11 +1019,11 @@ static int __logfs_is_valid_block(struct inode *inode, u64 bix, u64 ofs)
1019/** 1019/**
1020 * logfs_is_valid_block - check whether this block is still valid 1020 * logfs_is_valid_block - check whether this block is still valid
1021 * 1021 *
1022 * @sb - superblock 1022 * @sb: superblock
1023 * @ofs - block physical offset 1023 * @ofs: block physical offset
1024 * @ino - block inode number 1024 * @ino: block inode number
1025 * @bix - block index 1025 * @bix: block index
1026 * @level - block level 1026 * @gc_level: block level
1027 * 1027 *
1028 * Returns 0 if the block is invalid, 1 if it is valid and 2 if it will 1028 * Returns 0 if the block is invalid, 1 if it is valid and 2 if it will
1029 * become invalid once the journal is written. 1029 * become invalid once the journal is written.
@@ -2226,10 +2226,9 @@ void btree_write_block(struct logfs_block *block)
2226 * 2226 *
2227 * @inode: parent inode (ifile or directory) 2227 * @inode: parent inode (ifile or directory)
2228 * @buf: object to write (inode or dentry) 2228 * @buf: object to write (inode or dentry)
2229 * @n: object size 2229 * @count: object size
2230 * @_pos: object number (file position in blocks/objects) 2230 * @bix: block index
2231 * @flags: write flags 2231 * @flags: write flags
2232 * @lock: 0 if write lock is already taken, 1 otherwise
2233 * @shadow_tree: shadow below this inode 2232 * @shadow_tree: shadow below this inode
2234 * 2233 *
2235 * FIXME: All caller of this put a 200-300 byte variable on the stack, 2234 * FIXME: All caller of this put a 200-300 byte variable on the stack,
diff --git a/fs/namespace.c b/fs/namespace.c
index 182bc41cd887..2a1447c946e7 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -798,7 +798,7 @@ static void commit_tree(struct mount *mnt, struct mount *shadows)
798 list_splice(&head, n->list.prev); 798 list_splice(&head, n->list.prev);
799 799
800 if (shadows) 800 if (shadows)
801 hlist_add_after_rcu(&shadows->mnt_hash, &mnt->mnt_hash); 801 hlist_add_behind_rcu(&mnt->mnt_hash, &shadows->mnt_hash);
802 else 802 else
803 hlist_add_head_rcu(&mnt->mnt_hash, 803 hlist_add_head_rcu(&mnt->mnt_hash,
804 m_hash(&parent->mnt, mnt->mnt_mountpoint)); 804 m_hash(&parent->mnt, mnt->mnt_mountpoint));
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index ee9cb3795c2b..30d3addfad75 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -70,8 +70,15 @@ static int fanotify_get_response(struct fsnotify_group *group,
70 wait_event(group->fanotify_data.access_waitq, event->response || 70 wait_event(group->fanotify_data.access_waitq, event->response ||
71 atomic_read(&group->fanotify_data.bypass_perm)); 71 atomic_read(&group->fanotify_data.bypass_perm));
72 72
73 if (!event->response) /* bypass_perm set */ 73 if (!event->response) { /* bypass_perm set */
74 /*
75 * Event was canceled because group is being destroyed. Remove
76 * it from group's event list because we are responsible for
77 * freeing the permission event.
78 */
79 fsnotify_remove_event(group, &event->fae.fse);
74 return 0; 80 return 0;
81 }
75 82
76 /* userspace responded, convert to something usable */ 83 /* userspace responded, convert to something usable */
77 switch (event->response) { 84 switch (event->response) {
@@ -210,7 +217,7 @@ static int fanotify_handle_event(struct fsnotify_group *group,
210 return -ENOMEM; 217 return -ENOMEM;
211 218
212 fsn_event = &event->fse; 219 fsn_event = &event->fse;
213 ret = fsnotify_add_notify_event(group, fsn_event, fanotify_merge); 220 ret = fsnotify_add_event(group, fsn_event, fanotify_merge);
214 if (ret) { 221 if (ret) {
215 /* Permission events shouldn't be merged */ 222 /* Permission events shouldn't be merged */
216 BUG_ON(ret == 1 && mask & FAN_ALL_PERM_EVENTS); 223 BUG_ON(ret == 1 && mask & FAN_ALL_PERM_EVENTS);
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 3fdc8a3e1134..b13992a41bd9 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -66,7 +66,7 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
66 66
67 /* held the notification_mutex the whole time, so this is the 67 /* held the notification_mutex the whole time, so this is the
68 * same event we peeked above */ 68 * same event we peeked above */
69 return fsnotify_remove_notify_event(group); 69 return fsnotify_remove_first_event(group);
70} 70}
71 71
72static int create_fd(struct fsnotify_group *group, 72static int create_fd(struct fsnotify_group *group,
@@ -359,6 +359,11 @@ static int fanotify_release(struct inode *ignored, struct file *file)
359#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS 359#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
360 struct fanotify_perm_event_info *event, *next; 360 struct fanotify_perm_event_info *event, *next;
361 361
362 /*
363 * There may be still new events arriving in the notification queue
364 * but since userspace cannot use fanotify fd anymore, no event can
365 * enter or leave access_list by now.
366 */
362 spin_lock(&group->fanotify_data.access_lock); 367 spin_lock(&group->fanotify_data.access_lock);
363 368
364 atomic_inc(&group->fanotify_data.bypass_perm); 369 atomic_inc(&group->fanotify_data.bypass_perm);
@@ -373,6 +378,13 @@ static int fanotify_release(struct inode *ignored, struct file *file)
373 } 378 }
374 spin_unlock(&group->fanotify_data.access_lock); 379 spin_unlock(&group->fanotify_data.access_lock);
375 380
381 /*
382 * Since bypass_perm is set, newly queued events will not wait for
383 * access response. Wake up the already sleeping ones now.
384 * synchronize_srcu() in fsnotify_destroy_group() will wait for all
385 * processes sleeping in fanotify_handle_event() waiting for access
386 * response and thus also for all permission events to be freed.
387 */
376 wake_up(&group->fanotify_data.access_waitq); 388 wake_up(&group->fanotify_data.access_waitq);
377#endif 389#endif
378 390
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index 74825be65b7b..9ce062218de9 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -232,7 +232,7 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
232 232
233 BUG_ON(last == NULL); 233 BUG_ON(last == NULL);
234 /* mark should be the last entry. last is the current last entry */ 234 /* mark should be the last entry. last is the current last entry */
235 hlist_add_after_rcu(&last->i.i_list, &mark->i.i_list); 235 hlist_add_behind_rcu(&mark->i.i_list, &last->i.i_list);
236out: 236out:
237 fsnotify_recalc_inode_mask_locked(inode); 237 fsnotify_recalc_inode_mask_locked(inode);
238 spin_unlock(&inode->i_lock); 238 spin_unlock(&inode->i_lock);
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index 43ab1e1a07a2..0f88bc0b4e6c 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -108,7 +108,7 @@ int inotify_handle_event(struct fsnotify_group *group,
108 if (len) 108 if (len)
109 strcpy(event->name, file_name); 109 strcpy(event->name, file_name);
110 110
111 ret = fsnotify_add_notify_event(group, fsn_event, inotify_merge); 111 ret = fsnotify_add_event(group, fsn_event, inotify_merge);
112 if (ret) { 112 if (ret) {
113 /* Our event wasn't used in the end. Free it. */ 113 /* Our event wasn't used in the end. Free it. */
114 fsnotify_destroy_event(group, fsn_event); 114 fsnotify_destroy_event(group, fsn_event);
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index cc423a30a0c8..daf76652fe58 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -149,7 +149,7 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
149 if (fsnotify_notify_queue_is_empty(group)) 149 if (fsnotify_notify_queue_is_empty(group))
150 return NULL; 150 return NULL;
151 151
152 event = fsnotify_peek_notify_event(group); 152 event = fsnotify_peek_first_event(group);
153 153
154 pr_debug("%s: group=%p event=%p\n", __func__, group, event); 154 pr_debug("%s: group=%p event=%p\n", __func__, group, event);
155 155
@@ -159,7 +159,7 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
159 159
160 /* held the notification_mutex the whole time, so this is the 160 /* held the notification_mutex the whole time, so this is the
161 * same event we peeked above */ 161 * same event we peeked above */
162 fsnotify_remove_notify_event(group); 162 fsnotify_remove_first_event(group);
163 163
164 return event; 164 return event;
165} 165}
diff --git a/fs/notify/notification.c b/fs/notify/notification.c
index 1e58402171a5..a95d8e037aeb 100644
--- a/fs/notify/notification.c
+++ b/fs/notify/notification.c
@@ -73,7 +73,8 @@ void fsnotify_destroy_event(struct fsnotify_group *group,
73 /* Overflow events are per-group and we don't want to free them */ 73 /* Overflow events are per-group and we don't want to free them */
74 if (!event || event->mask == FS_Q_OVERFLOW) 74 if (!event || event->mask == FS_Q_OVERFLOW)
75 return; 75 return;
76 76 /* If the event is still queued, we have a problem... */
77 WARN_ON(!list_empty(&event->list));
77 group->ops->free_event(event); 78 group->ops->free_event(event);
78} 79}
79 80
@@ -83,10 +84,10 @@ void fsnotify_destroy_event(struct fsnotify_group *group,
83 * added to the queue, 1 if the event was merged with some other queued event, 84 * added to the queue, 1 if the event was merged with some other queued event,
84 * 2 if the queue of events has overflown. 85 * 2 if the queue of events has overflown.
85 */ 86 */
86int fsnotify_add_notify_event(struct fsnotify_group *group, 87int fsnotify_add_event(struct fsnotify_group *group,
87 struct fsnotify_event *event, 88 struct fsnotify_event *event,
88 int (*merge)(struct list_head *, 89 int (*merge)(struct list_head *,
89 struct fsnotify_event *)) 90 struct fsnotify_event *))
90{ 91{
91 int ret = 0; 92 int ret = 0;
92 struct list_head *list = &group->notification_list; 93 struct list_head *list = &group->notification_list;
@@ -125,10 +126,25 @@ queue:
125} 126}
126 127
127/* 128/*
129 * Remove @event from group's notification queue. It is the responsibility of
130 * the caller to destroy the event.
131 */
132void fsnotify_remove_event(struct fsnotify_group *group,
133 struct fsnotify_event *event)
134{
135 mutex_lock(&group->notification_mutex);
136 if (!list_empty(&event->list)) {
137 list_del_init(&event->list);
138 group->q_len--;
139 }
140 mutex_unlock(&group->notification_mutex);
141}
142
143/*
128 * Remove and return the first event from the notification list. It is the 144 * Remove and return the first event from the notification list. It is the
129 * responsibility of the caller to destroy the obtained event 145 * responsibility of the caller to destroy the obtained event
130 */ 146 */
131struct fsnotify_event *fsnotify_remove_notify_event(struct fsnotify_group *group) 147struct fsnotify_event *fsnotify_remove_first_event(struct fsnotify_group *group)
132{ 148{
133 struct fsnotify_event *event; 149 struct fsnotify_event *event;
134 150
@@ -140,7 +156,7 @@ struct fsnotify_event *fsnotify_remove_notify_event(struct fsnotify_group *group
140 struct fsnotify_event, list); 156 struct fsnotify_event, list);
141 /* 157 /*
142 * We need to init list head for the case of overflow event so that 158 * We need to init list head for the case of overflow event so that
143 * check in fsnotify_add_notify_events() works 159 * check in fsnotify_add_event() works
144 */ 160 */
145 list_del_init(&event->list); 161 list_del_init(&event->list);
146 group->q_len--; 162 group->q_len--;
@@ -149,9 +165,10 @@ struct fsnotify_event *fsnotify_remove_notify_event(struct fsnotify_group *group
149} 165}
150 166
151/* 167/*
152 * This will not remove the event, that must be done with fsnotify_remove_notify_event() 168 * This will not remove the event, that must be done with
169 * fsnotify_remove_first_event()
153 */ 170 */
154struct fsnotify_event *fsnotify_peek_notify_event(struct fsnotify_group *group) 171struct fsnotify_event *fsnotify_peek_first_event(struct fsnotify_group *group)
155{ 172{
156 BUG_ON(!mutex_is_locked(&group->notification_mutex)); 173 BUG_ON(!mutex_is_locked(&group->notification_mutex));
157 174
@@ -169,7 +186,7 @@ void fsnotify_flush_notify(struct fsnotify_group *group)
169 186
170 mutex_lock(&group->notification_mutex); 187 mutex_lock(&group->notification_mutex);
171 while (!fsnotify_notify_queue_is_empty(group)) { 188 while (!fsnotify_notify_queue_is_empty(group)) {
172 event = fsnotify_remove_notify_event(group); 189 event = fsnotify_remove_first_event(group);
173 fsnotify_destroy_event(group, event); 190 fsnotify_destroy_event(group, event);
174 } 191 }
175 mutex_unlock(&group->notification_mutex); 192 mutex_unlock(&group->notification_mutex);
diff --git a/fs/notify/vfsmount_mark.c b/fs/notify/vfsmount_mark.c
index 68ca5a8704b5..ac851e8376b1 100644
--- a/fs/notify/vfsmount_mark.c
+++ b/fs/notify/vfsmount_mark.c
@@ -191,7 +191,7 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark,
191 191
192 BUG_ON(last == NULL); 192 BUG_ON(last == NULL);
193 /* mark should be the last entry. last is the current last entry */ 193 /* mark should be the last entry. last is the current last entry */
194 hlist_add_after_rcu(&last->m.m_list, &mark->m.m_list); 194 hlist_add_behind_rcu(&mark->m.m_list, &last->m.m_list);
195out: 195out:
196 fsnotify_recalc_vfsmount_mask_locked(mnt); 196 fsnotify_recalc_vfsmount_mask_locked(mnt);
197 spin_unlock(&mnt->mnt_root->d_lock); 197 spin_unlock(&mnt->mnt_root->d_lock);
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 5c9e2c81cb11..f5ec1ce7a532 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -74,8 +74,6 @@ static int ntfs_file_open(struct inode *vi, struct file *filp)
74 * ntfs_attr_extend_initialized - extend the initialized size of an attribute 74 * ntfs_attr_extend_initialized - extend the initialized size of an attribute
75 * @ni: ntfs inode of the attribute to extend 75 * @ni: ntfs inode of the attribute to extend
76 * @new_init_size: requested new initialized size in bytes 76 * @new_init_size: requested new initialized size in bytes
77 * @cached_page: store any allocated but unused page here
78 * @lru_pvec: lru-buffering pagevec of the caller
79 * 77 *
80 * Extend the initialized size of an attribute described by the ntfs inode @ni 78 * Extend the initialized size of an attribute described by the ntfs inode @ni
81 * to @new_init_size bytes. This involves zeroing any non-sparse space between 79 * to @new_init_size bytes. This involves zeroing any non-sparse space between
@@ -395,7 +393,6 @@ static inline void ntfs_fault_in_pages_readable_iovec(const struct iovec *iov,
395 * @nr_pages: number of page cache pages to obtain 393 * @nr_pages: number of page cache pages to obtain
396 * @pages: array of pages in which to return the obtained page cache pages 394 * @pages: array of pages in which to return the obtained page cache pages
397 * @cached_page: allocated but as yet unused page 395 * @cached_page: allocated but as yet unused page
398 * @lru_pvec: lru-buffering pagevec of caller
399 * 396 *
400 * Obtain @nr_pages locked page cache pages from the mapping @mapping and 397 * Obtain @nr_pages locked page cache pages from the mapping @mapping and
401 * starting at index @index. 398 * starting at index @index.
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 9d8fcf2f3b94..a93bf9892256 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -4961,6 +4961,15 @@ leftright:
4961 4961
4962 el = path_leaf_el(path); 4962 el = path_leaf_el(path);
4963 split_index = ocfs2_search_extent_list(el, cpos); 4963 split_index = ocfs2_search_extent_list(el, cpos);
4964 if (split_index == -1) {
4965 ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
4966 "Owner %llu has an extent at cpos %u "
4967 "which can no longer be found.\n",
4968 (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
4969 cpos);
4970 ret = -EROFS;
4971 goto out;
4972 }
4964 goto leftright; 4973 goto leftright;
4965 } 4974 }
4966out: 4975out:
@@ -5135,7 +5144,7 @@ int ocfs2_change_extent_flag(handle_t *handle,
5135 el = path_leaf_el(left_path); 5144 el = path_leaf_el(left_path);
5136 5145
5137 index = ocfs2_search_extent_list(el, cpos); 5146 index = ocfs2_search_extent_list(el, cpos);
5138 if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) { 5147 if (index == -1) {
5139 ocfs2_error(sb, 5148 ocfs2_error(sb,
5140 "Owner %llu has an extent at cpos %u which can no " 5149 "Owner %llu has an extent at cpos %u which can no "
5141 "longer be found.\n", 5150 "longer be found.\n",
@@ -5491,7 +5500,7 @@ int ocfs2_remove_extent(handle_t *handle,
5491 5500
5492 el = path_leaf_el(path); 5501 el = path_leaf_el(path);
5493 index = ocfs2_search_extent_list(el, cpos); 5502 index = ocfs2_search_extent_list(el, cpos);
5494 if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) { 5503 if (index == -1) {
5495 ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci), 5504 ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
5496 "Owner %llu has an extent at cpos %u which can no " 5505 "Owner %llu has an extent at cpos %u which can no "
5497 "longer be found.\n", 5506 "longer be found.\n",
@@ -5557,7 +5566,7 @@ int ocfs2_remove_extent(handle_t *handle,
5557 5566
5558 el = path_leaf_el(path); 5567 el = path_leaf_el(path);
5559 index = ocfs2_search_extent_list(el, cpos); 5568 index = ocfs2_search_extent_list(el, cpos);
5560 if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) { 5569 if (index == -1) {
5561 ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci), 5570 ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
5562 "Owner %llu: split at cpos %u lost record.", 5571 "Owner %llu: split at cpos %u lost record.",
5563 (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci), 5572 (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 39efc5057a36..3fcf205ee900 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -1923,12 +1923,11 @@ static int dlm_join_domain(struct dlm_ctxt *dlm)
1923 goto bail; 1923 goto bail;
1924 } 1924 }
1925 1925
1926 if (total_backoff > 1926 if (total_backoff > DLM_JOIN_TIMEOUT_MSECS) {
1927 msecs_to_jiffies(DLM_JOIN_TIMEOUT_MSECS)) {
1928 status = -ERESTARTSYS; 1927 status = -ERESTARTSYS;
1929 mlog(ML_NOTICE, "Timed out joining dlm domain " 1928 mlog(ML_NOTICE, "Timed out joining dlm domain "
1930 "%s after %u msecs\n", dlm->name, 1929 "%s after %u msecs\n", dlm->name,
1931 jiffies_to_msecs(total_backoff)); 1930 total_backoff);
1932 goto bail; 1931 goto bail;
1933 } 1932 }
1934 1933
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 82abf0cc9a12..3ec906ef5d9a 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -2405,6 +2405,10 @@ static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm,
2405 if (res->state & DLM_LOCK_RES_MIGRATING) 2405 if (res->state & DLM_LOCK_RES_MIGRATING)
2406 return 0; 2406 return 0;
2407 2407
2408 /* delay migration when the lockres is in RECOCERING state */
2409 if (res->state & DLM_LOCK_RES_RECOVERING)
2410 return 0;
2411
2408 if (res->owner != dlm->node_num) 2412 if (res->owner != dlm->node_num)
2409 return 0; 2413 return 0;
2410 2414
diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c
index 599eb4c4c8be..6219aaadeb08 100644
--- a/fs/ocfs2/move_extents.c
+++ b/fs/ocfs2/move_extents.c
@@ -98,7 +98,7 @@ static int __ocfs2_move_extent(handle_t *handle,
98 el = path_leaf_el(path); 98 el = path_leaf_el(path);
99 99
100 index = ocfs2_search_extent_list(el, cpos); 100 index = ocfs2_search_extent_list(el, cpos);
101 if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) { 101 if (index == -1) {
102 ocfs2_error(inode->i_sb, 102 ocfs2_error(inode->i_sb,
103 "Inode %llu has an extent at cpos %u which can no " 103 "Inode %llu has an extent at cpos %u which can no "
104 "longer be found.\n", 104 "longer be found.\n",
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 636aab69ead5..d81f6e2a97f5 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -3109,7 +3109,7 @@ static int ocfs2_clear_ext_refcount(handle_t *handle,
3109 el = path_leaf_el(path); 3109 el = path_leaf_el(path);
3110 3110
3111 index = ocfs2_search_extent_list(el, cpos); 3111 index = ocfs2_search_extent_list(el, cpos);
3112 if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) { 3112 if (index == -1) {
3113 ocfs2_error(sb, 3113 ocfs2_error(sb,
3114 "Inode %llu has an extent at cpos %u which can no " 3114 "Inode %llu has an extent at cpos %u which can no "
3115 "longer be found.\n", 3115 "longer be found.\n",
diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c
index 1424c151cccc..a88b2a4fcc85 100644
--- a/fs/ocfs2/slot_map.c
+++ b/fs/ocfs2/slot_map.c
@@ -382,7 +382,7 @@ static int ocfs2_map_slot_buffers(struct ocfs2_super *osb,
382 382
383 trace_ocfs2_map_slot_buffers(bytes, si->si_blocks); 383 trace_ocfs2_map_slot_buffers(bytes, si->si_blocks);
384 384
385 si->si_bh = kzalloc(sizeof(struct buffer_head *) * si->si_blocks, 385 si->si_bh = kcalloc(si->si_blocks, sizeof(struct buffer_head *),
386 GFP_KERNEL); 386 GFP_KERNEL);
387 if (!si->si_bh) { 387 if (!si->si_bh) {
388 status = -ENOMEM; 388 status = -ENOMEM;
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 7445af0b1aa3..aa1eee06420f 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -168,7 +168,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
168 K(global_page_state(NR_WRITEBACK)), 168 K(global_page_state(NR_WRITEBACK)),
169 K(global_page_state(NR_ANON_PAGES)), 169 K(global_page_state(NR_ANON_PAGES)),
170 K(global_page_state(NR_FILE_MAPPED)), 170 K(global_page_state(NR_FILE_MAPPED)),
171 K(global_page_state(NR_SHMEM)), 171 K(i.sharedram),
172 K(global_page_state(NR_SLAB_RECLAIMABLE) + 172 K(global_page_state(NR_SLAB_RECLAIMABLE) +
173 global_page_state(NR_SLAB_UNRECLAIMABLE)), 173 global_page_state(NR_SLAB_UNRECLAIMABLE)),
174 K(global_page_state(NR_SLAB_RECLAIMABLE)), 174 K(global_page_state(NR_SLAB_RECLAIMABLE)),
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index cfa63ee92c96..dfc791c42d64 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -925,15 +925,30 @@ static int pagemap_pte_hole(unsigned long start, unsigned long end,
925 struct mm_walk *walk) 925 struct mm_walk *walk)
926{ 926{
927 struct pagemapread *pm = walk->private; 927 struct pagemapread *pm = walk->private;
928 unsigned long addr; 928 unsigned long addr = start;
929 int err = 0; 929 int err = 0;
930 pagemap_entry_t pme = make_pme(PM_NOT_PRESENT(pm->v2));
931 930
932 for (addr = start; addr < end; addr += PAGE_SIZE) { 931 while (addr < end) {
933 err = add_to_pagemap(addr, &pme, pm); 932 struct vm_area_struct *vma = find_vma(walk->mm, addr);
934 if (err) 933 pagemap_entry_t pme = make_pme(PM_NOT_PRESENT(pm->v2));
935 break; 934 unsigned long vm_end;
935
936 if (!vma) {
937 vm_end = end;
938 } else {
939 vm_end = min(end, vma->vm_end);
940 if (vma->vm_flags & VM_SOFTDIRTY)
941 pme.pme |= PM_STATUS2(pm->v2, __PM_SOFT_DIRTY);
942 }
943
944 for (; addr < vm_end; addr += PAGE_SIZE) {
945 err = add_to_pagemap(addr, &pme, pm);
946 if (err)
947 goto out;
948 }
936 } 949 }
950
951out:
937 return err; 952 return err;
938} 953}
939 954
diff --git a/fs/squashfs/file_direct.c b/fs/squashfs/file_direct.c
index 62a0de6632e1..43e7a7eddac0 100644
--- a/fs/squashfs/file_direct.c
+++ b/fs/squashfs/file_direct.c
@@ -44,7 +44,7 @@ int squashfs_readpage_block(struct page *target_page, u64 block, int bsize)
44 44
45 pages = end_index - start_index + 1; 45 pages = end_index - start_index + 1;
46 46
47 page = kmalloc(sizeof(void *) * pages, GFP_KERNEL); 47 page = kmalloc_array(pages, sizeof(void *), GFP_KERNEL);
48 if (page == NULL) 48 if (page == NULL)
49 return res; 49 return res;
50 50
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 031c8d67fd51..5056babe00df 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -27,6 +27,8 @@
27 * the filesystem. 27 * the filesystem.
28 */ 28 */
29 29
30#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
31
30#include <linux/fs.h> 32#include <linux/fs.h>
31#include <linux/vfs.h> 33#include <linux/vfs.h>
32#include <linux/slab.h> 34#include <linux/slab.h>
@@ -448,8 +450,7 @@ static int __init init_squashfs_fs(void)
448 return err; 450 return err;
449 } 451 }
450 452
451 printk(KERN_INFO "squashfs: version 4.0 (2009/01/31) " 453 pr_info("version 4.0 (2009/01/31) Phillip Lougher\n");
452 "Phillip Lougher\n");
453 454
454 return 0; 455 return 0;
455} 456}
diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index 7ad634501e48..e1c8d080c427 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -88,32 +88,32 @@
88 * lib/bitmap.c provides these functions: 88 * lib/bitmap.c provides these functions:
89 */ 89 */
90 90
91extern int __bitmap_empty(const unsigned long *bitmap, int bits); 91extern int __bitmap_empty(const unsigned long *bitmap, unsigned int nbits);
92extern int __bitmap_full(const unsigned long *bitmap, int bits); 92extern int __bitmap_full(const unsigned long *bitmap, unsigned int nbits);
93extern int __bitmap_equal(const unsigned long *bitmap1, 93extern int __bitmap_equal(const unsigned long *bitmap1,
94 const unsigned long *bitmap2, int bits); 94 const unsigned long *bitmap2, unsigned int nbits);
95extern void __bitmap_complement(unsigned long *dst, const unsigned long *src, 95extern void __bitmap_complement(unsigned long *dst, const unsigned long *src,
96 int bits); 96 unsigned int nbits);
97extern void __bitmap_shift_right(unsigned long *dst, 97extern void __bitmap_shift_right(unsigned long *dst,
98 const unsigned long *src, int shift, int bits); 98 const unsigned long *src, int shift, int bits);
99extern void __bitmap_shift_left(unsigned long *dst, 99extern void __bitmap_shift_left(unsigned long *dst,
100 const unsigned long *src, int shift, int bits); 100 const unsigned long *src, int shift, int bits);
101extern int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1, 101extern int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
102 const unsigned long *bitmap2, int bits); 102 const unsigned long *bitmap2, unsigned int nbits);
103extern void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1, 103extern void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
104 const unsigned long *bitmap2, int bits); 104 const unsigned long *bitmap2, unsigned int nbits);
105extern void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1, 105extern void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1,
106 const unsigned long *bitmap2, int bits); 106 const unsigned long *bitmap2, unsigned int nbits);
107extern int __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1, 107extern int __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1,
108 const unsigned long *bitmap2, int bits); 108 const unsigned long *bitmap2, unsigned int nbits);
109extern int __bitmap_intersects(const unsigned long *bitmap1, 109extern int __bitmap_intersects(const unsigned long *bitmap1,
110 const unsigned long *bitmap2, int bits); 110 const unsigned long *bitmap2, unsigned int nbits);
111extern int __bitmap_subset(const unsigned long *bitmap1, 111extern int __bitmap_subset(const unsigned long *bitmap1,
112 const unsigned long *bitmap2, int bits); 112 const unsigned long *bitmap2, unsigned int nbits);
113extern int __bitmap_weight(const unsigned long *bitmap, int bits); 113extern int __bitmap_weight(const unsigned long *bitmap, unsigned int nbits);
114 114
115extern void bitmap_set(unsigned long *map, int i, int len); 115extern void bitmap_set(unsigned long *map, unsigned int start, int len);
116extern void bitmap_clear(unsigned long *map, int start, int nr); 116extern void bitmap_clear(unsigned long *map, unsigned int start, int len);
117extern unsigned long bitmap_find_next_zero_area(unsigned long *map, 117extern unsigned long bitmap_find_next_zero_area(unsigned long *map,
118 unsigned long size, 118 unsigned long size,
119 unsigned long start, 119 unsigned long start,
@@ -140,9 +140,9 @@ extern void bitmap_onto(unsigned long *dst, const unsigned long *orig,
140 const unsigned long *relmap, int bits); 140 const unsigned long *relmap, int bits);
141extern void bitmap_fold(unsigned long *dst, const unsigned long *orig, 141extern void bitmap_fold(unsigned long *dst, const unsigned long *orig,
142 int sz, int bits); 142 int sz, int bits);
143extern int bitmap_find_free_region(unsigned long *bitmap, int bits, int order); 143extern int bitmap_find_free_region(unsigned long *bitmap, unsigned int bits, int order);
144extern void bitmap_release_region(unsigned long *bitmap, int pos, int order); 144extern void bitmap_release_region(unsigned long *bitmap, unsigned int pos, int order);
145extern int bitmap_allocate_region(unsigned long *bitmap, int pos, int order); 145extern int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order);
146extern void bitmap_copy_le(void *dst, const unsigned long *src, int nbits); 146extern void bitmap_copy_le(void *dst, const unsigned long *src, int nbits);
147extern int bitmap_ord_to_pos(const unsigned long *bitmap, int n, int bits); 147extern int bitmap_ord_to_pos(const unsigned long *bitmap, int n, int bits);
148 148
@@ -188,15 +188,15 @@ static inline void bitmap_copy(unsigned long *dst, const unsigned long *src,
188} 188}
189 189
190static inline int bitmap_and(unsigned long *dst, const unsigned long *src1, 190static inline int bitmap_and(unsigned long *dst, const unsigned long *src1,
191 const unsigned long *src2, int nbits) 191 const unsigned long *src2, unsigned int nbits)
192{ 192{
193 if (small_const_nbits(nbits)) 193 if (small_const_nbits(nbits))
194 return (*dst = *src1 & *src2) != 0; 194 return (*dst = *src1 & *src2 & BITMAP_LAST_WORD_MASK(nbits)) != 0;
195 return __bitmap_and(dst, src1, src2, nbits); 195 return __bitmap_and(dst, src1, src2, nbits);
196} 196}
197 197
198static inline void bitmap_or(unsigned long *dst, const unsigned long *src1, 198static inline void bitmap_or(unsigned long *dst, const unsigned long *src1,
199 const unsigned long *src2, int nbits) 199 const unsigned long *src2, unsigned int nbits)
200{ 200{
201 if (small_const_nbits(nbits)) 201 if (small_const_nbits(nbits))
202 *dst = *src1 | *src2; 202 *dst = *src1 | *src2;
@@ -205,7 +205,7 @@ static inline void bitmap_or(unsigned long *dst, const unsigned long *src1,
205} 205}
206 206
207static inline void bitmap_xor(unsigned long *dst, const unsigned long *src1, 207static inline void bitmap_xor(unsigned long *dst, const unsigned long *src1,
208 const unsigned long *src2, int nbits) 208 const unsigned long *src2, unsigned int nbits)
209{ 209{
210 if (small_const_nbits(nbits)) 210 if (small_const_nbits(nbits))
211 *dst = *src1 ^ *src2; 211 *dst = *src1 ^ *src2;
@@ -214,24 +214,24 @@ static inline void bitmap_xor(unsigned long *dst, const unsigned long *src1,
214} 214}
215 215
216static inline int bitmap_andnot(unsigned long *dst, const unsigned long *src1, 216static inline int bitmap_andnot(unsigned long *dst, const unsigned long *src1,
217 const unsigned long *src2, int nbits) 217 const unsigned long *src2, unsigned int nbits)
218{ 218{
219 if (small_const_nbits(nbits)) 219 if (small_const_nbits(nbits))
220 return (*dst = *src1 & ~(*src2)) != 0; 220 return (*dst = *src1 & ~(*src2) & BITMAP_LAST_WORD_MASK(nbits)) != 0;
221 return __bitmap_andnot(dst, src1, src2, nbits); 221 return __bitmap_andnot(dst, src1, src2, nbits);
222} 222}
223 223
224static inline void bitmap_complement(unsigned long *dst, const unsigned long *src, 224static inline void bitmap_complement(unsigned long *dst, const unsigned long *src,
225 int nbits) 225 unsigned int nbits)
226{ 226{
227 if (small_const_nbits(nbits)) 227 if (small_const_nbits(nbits))
228 *dst = ~(*src) & BITMAP_LAST_WORD_MASK(nbits); 228 *dst = ~(*src);
229 else 229 else
230 __bitmap_complement(dst, src, nbits); 230 __bitmap_complement(dst, src, nbits);
231} 231}
232 232
233static inline int bitmap_equal(const unsigned long *src1, 233static inline int bitmap_equal(const unsigned long *src1,
234 const unsigned long *src2, int nbits) 234 const unsigned long *src2, unsigned int nbits)
235{ 235{
236 if (small_const_nbits(nbits)) 236 if (small_const_nbits(nbits))
237 return ! ((*src1 ^ *src2) & BITMAP_LAST_WORD_MASK(nbits)); 237 return ! ((*src1 ^ *src2) & BITMAP_LAST_WORD_MASK(nbits));
@@ -240,7 +240,7 @@ static inline int bitmap_equal(const unsigned long *src1,
240} 240}
241 241
242static inline int bitmap_intersects(const unsigned long *src1, 242static inline int bitmap_intersects(const unsigned long *src1,
243 const unsigned long *src2, int nbits) 243 const unsigned long *src2, unsigned int nbits)
244{ 244{
245 if (small_const_nbits(nbits)) 245 if (small_const_nbits(nbits))
246 return ((*src1 & *src2) & BITMAP_LAST_WORD_MASK(nbits)) != 0; 246 return ((*src1 & *src2) & BITMAP_LAST_WORD_MASK(nbits)) != 0;
@@ -249,7 +249,7 @@ static inline int bitmap_intersects(const unsigned long *src1,
249} 249}
250 250
251static inline int bitmap_subset(const unsigned long *src1, 251static inline int bitmap_subset(const unsigned long *src1,
252 const unsigned long *src2, int nbits) 252 const unsigned long *src2, unsigned int nbits)
253{ 253{
254 if (small_const_nbits(nbits)) 254 if (small_const_nbits(nbits))
255 return ! ((*src1 & ~(*src2)) & BITMAP_LAST_WORD_MASK(nbits)); 255 return ! ((*src1 & ~(*src2)) & BITMAP_LAST_WORD_MASK(nbits));
@@ -257,7 +257,7 @@ static inline int bitmap_subset(const unsigned long *src1,
257 return __bitmap_subset(src1, src2, nbits); 257 return __bitmap_subset(src1, src2, nbits);
258} 258}
259 259
260static inline int bitmap_empty(const unsigned long *src, int nbits) 260static inline int bitmap_empty(const unsigned long *src, unsigned nbits)
261{ 261{
262 if (small_const_nbits(nbits)) 262 if (small_const_nbits(nbits))
263 return ! (*src & BITMAP_LAST_WORD_MASK(nbits)); 263 return ! (*src & BITMAP_LAST_WORD_MASK(nbits));
@@ -265,7 +265,7 @@ static inline int bitmap_empty(const unsigned long *src, int nbits)
265 return __bitmap_empty(src, nbits); 265 return __bitmap_empty(src, nbits);
266} 266}
267 267
268static inline int bitmap_full(const unsigned long *src, int nbits) 268static inline int bitmap_full(const unsigned long *src, unsigned int nbits)
269{ 269{
270 if (small_const_nbits(nbits)) 270 if (small_const_nbits(nbits))
271 return ! (~(*src) & BITMAP_LAST_WORD_MASK(nbits)); 271 return ! (~(*src) & BITMAP_LAST_WORD_MASK(nbits));
@@ -273,7 +273,7 @@ static inline int bitmap_full(const unsigned long *src, int nbits)
273 return __bitmap_full(src, nbits); 273 return __bitmap_full(src, nbits);
274} 274}
275 275
276static inline int bitmap_weight(const unsigned long *src, int nbits) 276static inline int bitmap_weight(const unsigned long *src, unsigned int nbits)
277{ 277{
278 if (small_const_nbits(nbits)) 278 if (small_const_nbits(nbits))
279 return hweight_long(*src & BITMAP_LAST_WORD_MASK(nbits)); 279 return hweight_long(*src & BITMAP_LAST_WORD_MASK(nbits));
@@ -284,7 +284,7 @@ static inline void bitmap_shift_right(unsigned long *dst,
284 const unsigned long *src, int n, int nbits) 284 const unsigned long *src, int n, int nbits)
285{ 285{
286 if (small_const_nbits(nbits)) 286 if (small_const_nbits(nbits))
287 *dst = *src >> n; 287 *dst = (*src & BITMAP_LAST_WORD_MASK(nbits)) >> n;
288 else 288 else
289 __bitmap_shift_right(dst, src, n, nbits); 289 __bitmap_shift_right(dst, src, n, nbits);
290} 290}
diff --git a/include/linux/byteorder/generic.h b/include/linux/byteorder/generic.h
index 0846e6b931ce..89f67c1c3160 100644
--- a/include/linux/byteorder/generic.h
+++ b/include/linux/byteorder/generic.h
@@ -2,7 +2,7 @@
2#define _LINUX_BYTEORDER_GENERIC_H 2#define _LINUX_BYTEORDER_GENERIC_H
3 3
4/* 4/*
5 * linux/byteorder_generic.h 5 * linux/byteorder/generic.h
6 * Generic Byte-reordering support 6 * Generic Byte-reordering support
7 * 7 *
8 * The "... p" macros, like le64_to_cpup, can be used with pointers 8 * The "... p" macros, like le64_to_cpup, can be used with pointers
diff --git a/include/linux/cma.h b/include/linux/cma.h
new file mode 100644
index 000000000000..371b93042520
--- /dev/null
+++ b/include/linux/cma.h
@@ -0,0 +1,27 @@
1#ifndef __CMA_H__
2#define __CMA_H__
3
4/*
5 * There is always at least global CMA area and a few optional
6 * areas configured in kernel .config.
7 */
8#ifdef CONFIG_CMA_AREAS
9#define MAX_CMA_AREAS (1 + CONFIG_CMA_AREAS)
10
11#else
12#define MAX_CMA_AREAS (0)
13
14#endif
15
16struct cma;
17
18extern phys_addr_t cma_get_base(struct cma *cma);
19extern unsigned long cma_get_size(struct cma *cma);
20
21extern int __init cma_declare_contiguous(phys_addr_t size,
22 phys_addr_t base, phys_addr_t limit,
23 phys_addr_t alignment, unsigned int order_per_bit,
24 bool fixed, struct cma **res_cma);
25extern struct page *cma_alloc(struct cma *cma, int count, unsigned int align);
26extern bool cma_release(struct cma *cma, struct page *pages, int count);
27#endif
diff --git a/include/linux/dma-contiguous.h b/include/linux/dma-contiguous.h
index 772eab5d524a..569bbd039896 100644
--- a/include/linux/dma-contiguous.h
+++ b/include/linux/dma-contiguous.h
@@ -53,18 +53,13 @@
53 53
54#ifdef __KERNEL__ 54#ifdef __KERNEL__
55 55
56#include <linux/device.h>
57
56struct cma; 58struct cma;
57struct page; 59struct page;
58struct device;
59 60
60#ifdef CONFIG_DMA_CMA 61#ifdef CONFIG_DMA_CMA
61 62
62/*
63 * There is always at least global CMA area and a few optional device
64 * private areas configured in kernel .config.
65 */
66#define MAX_CMA_AREAS (1 + CONFIG_CMA_AREAS)
67
68extern struct cma *dma_contiguous_default_area; 63extern struct cma *dma_contiguous_default_area;
69 64
70static inline struct cma *dev_get_cma_area(struct device *dev) 65static inline struct cma *dev_get_cma_area(struct device *dev)
@@ -123,8 +118,6 @@ bool dma_release_from_contiguous(struct device *dev, struct page *pages,
123 118
124#else 119#else
125 120
126#define MAX_CMA_AREAS (0)
127
128static inline struct cma *dev_get_cma_area(struct device *dev) 121static inline struct cma *dev_get_cma_area(struct device *dev)
129{ 122{
130 return NULL; 123 return NULL;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 2daccaf4b547..1ab6c6913040 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2688,7 +2688,7 @@ static const struct file_operations __fops = { \
2688 .read = simple_attr_read, \ 2688 .read = simple_attr_read, \
2689 .write = simple_attr_write, \ 2689 .write = simple_attr_write, \
2690 .llseek = generic_file_llseek, \ 2690 .llseek = generic_file_llseek, \
2691}; 2691}
2692 2692
2693static inline __printf(1, 2) 2693static inline __printf(1, 2)
2694void __simple_attr_check_format(const char *fmt, ...) 2694void __simple_attr_check_format(const char *fmt, ...)
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index fc7718c6bd3e..ca060d7c4fa6 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -322,16 +322,18 @@ extern int fsnotify_fasync(int fd, struct file *file, int on);
322extern void fsnotify_destroy_event(struct fsnotify_group *group, 322extern void fsnotify_destroy_event(struct fsnotify_group *group,
323 struct fsnotify_event *event); 323 struct fsnotify_event *event);
324/* attach the event to the group notification queue */ 324/* attach the event to the group notification queue */
325extern int fsnotify_add_notify_event(struct fsnotify_group *group, 325extern int fsnotify_add_event(struct fsnotify_group *group,
326 struct fsnotify_event *event, 326 struct fsnotify_event *event,
327 int (*merge)(struct list_head *, 327 int (*merge)(struct list_head *,
328 struct fsnotify_event *)); 328 struct fsnotify_event *));
329/* Remove passed event from groups notification queue */
330extern void fsnotify_remove_event(struct fsnotify_group *group, struct fsnotify_event *event);
329/* true if the group notification queue is empty */ 331/* true if the group notification queue is empty */
330extern bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group); 332extern bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group);
331/* return, but do not dequeue the first event on the notification queue */ 333/* return, but do not dequeue the first event on the notification queue */
332extern struct fsnotify_event *fsnotify_peek_notify_event(struct fsnotify_group *group); 334extern struct fsnotify_event *fsnotify_peek_first_event(struct fsnotify_group *group);
333/* return AND dequeue the first event on the notification queue */ 335/* return AND dequeue the first event on the notification queue */
334extern struct fsnotify_event *fsnotify_remove_notify_event(struct fsnotify_group *group); 336extern struct fsnotify_event *fsnotify_remove_first_event(struct fsnotify_group *group);
335 337
336/* functions used to manipulate the marks attached to inodes */ 338/* functions used to manipulate the marks attached to inodes */
337 339
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 6eb1fb37de9a..5e7219dc0fae 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -360,7 +360,7 @@ extern unsigned long get_zeroed_page(gfp_t gfp_mask);
360void *alloc_pages_exact(size_t size, gfp_t gfp_mask); 360void *alloc_pages_exact(size_t size, gfp_t gfp_mask);
361void free_pages_exact(void *virt, size_t size); 361void free_pages_exact(void *virt, size_t size);
362/* This is different from alloc_pages_exact_node !!! */ 362/* This is different from alloc_pages_exact_node !!! */
363void *alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask); 363void * __meminit alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask);
364 364
365#define __get_free_page(gfp_mask) \ 365#define __get_free_page(gfp_mask) \
366 __get_free_pages((gfp_mask), 0) 366 __get_free_pages((gfp_mask), 0)
diff --git a/include/linux/glob.h b/include/linux/glob.h
new file mode 100644
index 000000000000..861d8347d08e
--- /dev/null
+++ b/include/linux/glob.h
@@ -0,0 +1,9 @@
1#ifndef _LINUX_GLOB_H
2#define _LINUX_GLOB_H
3
4#include <linux/types.h> /* For bool */
5#include <linux/compiler.h> /* For __pure */
6
7bool __pure glob_match(char const *pat, char const *str);
8
9#endif /* _LINUX_GLOB_H */
diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index 7fb31da45d03..9286a46b7d69 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -93,7 +93,7 @@ static inline int kmap_atomic_idx_push(void)
93 93
94#ifdef CONFIG_DEBUG_HIGHMEM 94#ifdef CONFIG_DEBUG_HIGHMEM
95 WARN_ON_ONCE(in_irq() && !irqs_disabled()); 95 WARN_ON_ONCE(in_irq() && !irqs_disabled());
96 BUG_ON(idx > KM_TYPE_NR); 96 BUG_ON(idx >= KM_TYPE_NR);
97#endif 97#endif
98 return idx; 98 return idx;
99} 99}
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index b826239bdce0..63579cb8d3dc 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -93,10 +93,6 @@ extern bool is_vma_temporary_stack(struct vm_area_struct *vma);
93#endif /* CONFIG_DEBUG_VM */ 93#endif /* CONFIG_DEBUG_VM */
94 94
95extern unsigned long transparent_hugepage_flags; 95extern unsigned long transparent_hugepage_flags;
96extern int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
97 pmd_t *dst_pmd, pmd_t *src_pmd,
98 struct vm_area_struct *vma,
99 unsigned long addr, unsigned long end);
100extern int split_huge_page_to_list(struct page *page, struct list_head *list); 96extern int split_huge_page_to_list(struct page *page, struct list_head *list);
101static inline int split_huge_page(struct page *page) 97static inline int split_huge_page(struct page *page)
102{ 98{
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index a23c096b3080..6e6d338641fe 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -87,7 +87,6 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud);
87#endif 87#endif
88 88
89extern unsigned long hugepages_treat_as_movable; 89extern unsigned long hugepages_treat_as_movable;
90extern const unsigned long hugetlb_zero, hugetlb_infinity;
91extern int sysctl_hugetlb_shm_group; 90extern int sysctl_hugetlb_shm_group;
92extern struct list_head huge_boot_pages; 91extern struct list_head huge_boot_pages;
93 92
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index a9e2268ecccb..3dc22abbc68a 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -493,11 +493,6 @@ static inline char *hex_byte_pack_upper(char *buf, u8 byte)
493 return buf; 493 return buf;
494} 494}
495 495
496static inline char * __deprecated pack_hex_byte(char *buf, u8 byte)
497{
498 return hex_byte_pack(buf, byte);
499}
500
501extern int hex_to_bin(char ch); 496extern int hex_to_bin(char ch);
502extern int __must_check hex2bin(u8 *dst, const char *src, size_t count); 497extern int __must_check hex2bin(u8 *dst, const char *src, size_t count);
503 498
diff --git a/include/linux/klist.h b/include/linux/klist.h
index a370ce57cf1d..61e5b723ae73 100644
--- a/include/linux/klist.h
+++ b/include/linux/klist.h
@@ -44,7 +44,7 @@ struct klist_node {
44 44
45extern void klist_add_tail(struct klist_node *n, struct klist *k); 45extern void klist_add_tail(struct klist_node *n, struct klist *k);
46extern void klist_add_head(struct klist_node *n, struct klist *k); 46extern void klist_add_head(struct klist_node *n, struct klist *k);
47extern void klist_add_after(struct klist_node *n, struct klist_node *pos); 47extern void klist_add_behind(struct klist_node *n, struct klist_node *pos);
48extern void klist_add_before(struct klist_node *n, struct klist_node *pos); 48extern void klist_add_before(struct klist_node *n, struct klist_node *pos);
49 49
50extern void klist_del(struct klist_node *n); 50extern void klist_del(struct klist_node *n);
diff --git a/include/linux/list.h b/include/linux/list.h
index ef9594171062..cbbb96fcead9 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -654,15 +654,15 @@ static inline void hlist_add_before(struct hlist_node *n,
654 *(n->pprev) = n; 654 *(n->pprev) = n;
655} 655}
656 656
657static inline void hlist_add_after(struct hlist_node *n, 657static inline void hlist_add_behind(struct hlist_node *n,
658 struct hlist_node *next) 658 struct hlist_node *prev)
659{ 659{
660 next->next = n->next; 660 n->next = prev->next;
661 n->next = next; 661 prev->next = n;
662 next->pprev = &n->next; 662 n->pprev = &prev->next;
663 663
664 if(next->next) 664 if (n->next)
665 next->next->pprev = &next->next; 665 n->next->pprev = &n->next;
666} 666}
667 667
668/* after that we'll appear to be on some hlist and hlist_del will work */ 668/* after that we'll appear to be on some hlist and hlist_del will work */
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index b660e05b63d4..e8cc45307f8f 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -249,7 +249,7 @@ phys_addr_t memblock_alloc(phys_addr_t size, phys_addr_t align);
249/* 249/*
250 * Set the allocation direction to bottom-up or top-down. 250 * Set the allocation direction to bottom-up or top-down.
251 */ 251 */
252static inline void memblock_set_bottom_up(bool enable) 252static inline void __init memblock_set_bottom_up(bool enable)
253{ 253{
254 memblock.bottom_up = enable; 254 memblock.bottom_up = enable;
255} 255}
@@ -264,7 +264,7 @@ static inline bool memblock_bottom_up(void)
264 return memblock.bottom_up; 264 return memblock.bottom_up;
265} 265}
266#else 266#else
267static inline void memblock_set_bottom_up(bool enable) {} 267static inline void __init memblock_set_bottom_up(bool enable) {}
268static inline bool memblock_bottom_up(void) { return false; } 268static inline bool memblock_bottom_up(void) { return false; }
269#endif 269#endif
270 270
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 010d125bffbf..d9524c49d767 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -26,11 +26,12 @@ enum {
26 MEMORY_HOTPLUG_MAX_BOOTMEM_TYPE = NODE_INFO, 26 MEMORY_HOTPLUG_MAX_BOOTMEM_TYPE = NODE_INFO,
27}; 27};
28 28
29/* Types for control the zone type of onlined memory */ 29/* Types for control the zone type of onlined and offlined memory */
30enum { 30enum {
31 ONLINE_KEEP, 31 MMOP_OFFLINE = -1,
32 ONLINE_KERNEL, 32 MMOP_ONLINE_KEEP,
33 ONLINE_MOVABLE, 33 MMOP_ONLINE_KERNEL,
34 MMOP_ONLINE_MOVABLE,
34}; 35};
35 36
36/* 37/*
@@ -258,6 +259,7 @@ static inline void remove_memory(int nid, u64 start, u64 size) {}
258extern int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn, 259extern int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn,
259 void *arg, int (*func)(struct memory_block *, void *)); 260 void *arg, int (*func)(struct memory_block *, void *));
260extern int add_memory(int nid, u64 start, u64 size); 261extern int add_memory(int nid, u64 start, u64 size);
262extern int zone_for_memory(int nid, u64 start, u64 size, int zone_default);
261extern int arch_add_memory(int nid, u64 start, u64 size); 263extern int arch_add_memory(int nid, u64 start, u64 size);
262extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages); 264extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages);
263extern bool is_memblock_offlined(struct memory_block *mem); 265extern bool is_memblock_offlined(struct memory_block *mem);
diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h
index edd82a105220..2f348d02f640 100644
--- a/include/linux/mmdebug.h
+++ b/include/linux/mmdebug.h
@@ -20,11 +20,13 @@ extern void dump_page_badflags(struct page *page, const char *reason,
20 } while (0) 20 } while (0)
21#define VM_WARN_ON(cond) WARN_ON(cond) 21#define VM_WARN_ON(cond) WARN_ON(cond)
22#define VM_WARN_ON_ONCE(cond) WARN_ON_ONCE(cond) 22#define VM_WARN_ON_ONCE(cond) WARN_ON_ONCE(cond)
23#define VM_WARN_ONCE(cond, format...) WARN_ONCE(cond, format)
23#else 24#else
24#define VM_BUG_ON(cond) BUILD_BUG_ON_INVALID(cond) 25#define VM_BUG_ON(cond) BUILD_BUG_ON_INVALID(cond)
25#define VM_BUG_ON_PAGE(cond, page) VM_BUG_ON(cond) 26#define VM_BUG_ON_PAGE(cond, page) VM_BUG_ON(cond)
26#define VM_WARN_ON(cond) BUILD_BUG_ON_INVALID(cond) 27#define VM_WARN_ON(cond) BUILD_BUG_ON_INVALID(cond)
27#define VM_WARN_ON_ONCE(cond) BUILD_BUG_ON_INVALID(cond) 28#define VM_WARN_ON_ONCE(cond) BUILD_BUG_ON_INVALID(cond)
29#define VM_WARN_ONCE(cond, format...) BUILD_BUG_ON_INVALID(cond)
28#endif 30#endif
29 31
30#ifdef CONFIG_DEBUG_VIRTUAL 32#ifdef CONFIG_DEBUG_VIRTUAL
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index deca87452528..27288692241e 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -170,6 +170,8 @@ extern int __mmu_notifier_register(struct mmu_notifier *mn,
170 struct mm_struct *mm); 170 struct mm_struct *mm);
171extern void mmu_notifier_unregister(struct mmu_notifier *mn, 171extern void mmu_notifier_unregister(struct mmu_notifier *mn,
172 struct mm_struct *mm); 172 struct mm_struct *mm);
173extern void mmu_notifier_unregister_no_release(struct mmu_notifier *mn,
174 struct mm_struct *mm);
173extern void __mmu_notifier_mm_destroy(struct mm_struct *mm); 175extern void __mmu_notifier_mm_destroy(struct mm_struct *mm);
174extern void __mmu_notifier_release(struct mm_struct *mm); 176extern void __mmu_notifier_release(struct mm_struct *mm);
175extern int __mmu_notifier_clear_flush_young(struct mm_struct *mm, 177extern int __mmu_notifier_clear_flush_young(struct mm_struct *mm,
@@ -288,6 +290,10 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
288 set_pte_at(___mm, ___address, __ptep, ___pte); \ 290 set_pte_at(___mm, ___address, __ptep, ___pte); \
289}) 291})
290 292
293extern void mmu_notifier_call_srcu(struct rcu_head *rcu,
294 void (*func)(struct rcu_head *rcu));
295extern void mmu_notifier_synchronize(void);
296
291#else /* CONFIG_MMU_NOTIFIER */ 297#else /* CONFIG_MMU_NOTIFIER */
292 298
293static inline void mmu_notifier_release(struct mm_struct *mm) 299static inline void mmu_notifier_release(struct mm_struct *mm)
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 6cbd1b6c3d20..318df7051850 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -143,6 +143,7 @@ enum zone_stat_item {
143 NR_SHMEM, /* shmem pages (included tmpfs/GEM pages) */ 143 NR_SHMEM, /* shmem pages (included tmpfs/GEM pages) */
144 NR_DIRTIED, /* page dirtyings since bootup */ 144 NR_DIRTIED, /* page dirtyings since bootup */
145 NR_WRITTEN, /* page writings since bootup */ 145 NR_WRITTEN, /* page writings since bootup */
146 NR_PAGES_SCANNED, /* pages scanned since last reclaim */
146#ifdef CONFIG_NUMA 147#ifdef CONFIG_NUMA
147 NUMA_HIT, /* allocated in intended node */ 148 NUMA_HIT, /* allocated in intended node */
148 NUMA_MISS, /* allocated in non intended node */ 149 NUMA_MISS, /* allocated in non intended node */
@@ -324,19 +325,12 @@ enum zone_type {
324#ifndef __GENERATING_BOUNDS_H 325#ifndef __GENERATING_BOUNDS_H
325 326
326struct zone { 327struct zone {
327 /* Fields commonly accessed by the page allocator */ 328 /* Read-mostly fields */
328 329
329 /* zone watermarks, access with *_wmark_pages(zone) macros */ 330 /* zone watermarks, access with *_wmark_pages(zone) macros */
330 unsigned long watermark[NR_WMARK]; 331 unsigned long watermark[NR_WMARK];
331 332
332 /* 333 /*
333 * When free pages are below this point, additional steps are taken
334 * when reading the number of free pages to avoid per-cpu counter
335 * drift allowing watermarks to be breached
336 */
337 unsigned long percpu_drift_mark;
338
339 /*
340 * We don't know if the memory that we're going to allocate will be freeable 334 * We don't know if the memory that we're going to allocate will be freeable
341 * or/and it will be released eventually, so to avoid totally wasting several 335 * or/and it will be released eventually, so to avoid totally wasting several
342 * GB of ram we must reserve some of the lower zone memory (otherwise we risk 336 * GB of ram we must reserve some of the lower zone memory (otherwise we risk
@@ -344,41 +338,26 @@ struct zone {
344 * on the higher zones). This array is recalculated at runtime if the 338 * on the higher zones). This array is recalculated at runtime if the
345 * sysctl_lowmem_reserve_ratio sysctl changes. 339 * sysctl_lowmem_reserve_ratio sysctl changes.
346 */ 340 */
347 unsigned long lowmem_reserve[MAX_NR_ZONES]; 341 long lowmem_reserve[MAX_NR_ZONES];
348
349 /*
350 * This is a per-zone reserve of pages that should not be
351 * considered dirtyable memory.
352 */
353 unsigned long dirty_balance_reserve;
354 342
355#ifdef CONFIG_NUMA 343#ifdef CONFIG_NUMA
356 int node; 344 int node;
345#endif
346
357 /* 347 /*
358 * zone reclaim becomes active if more unmapped pages exist. 348 * The target ratio of ACTIVE_ANON to INACTIVE_ANON pages on
349 * this zone's LRU. Maintained by the pageout code.
359 */ 350 */
360 unsigned long min_unmapped_pages; 351 unsigned int inactive_ratio;
361 unsigned long min_slab_pages; 352
362#endif 353 struct pglist_data *zone_pgdat;
363 struct per_cpu_pageset __percpu *pageset; 354 struct per_cpu_pageset __percpu *pageset;
355
364 /* 356 /*
365 * free areas of different sizes 357 * This is a per-zone reserve of pages that should not be
358 * considered dirtyable memory.
366 */ 359 */
367 spinlock_t lock; 360 unsigned long dirty_balance_reserve;
368#if defined CONFIG_COMPACTION || defined CONFIG_CMA
369 /* Set to true when the PG_migrate_skip bits should be cleared */
370 bool compact_blockskip_flush;
371
372 /* pfn where compaction free scanner should start */
373 unsigned long compact_cached_free_pfn;
374 /* pfn where async and sync compaction migration scanner should start */
375 unsigned long compact_cached_migrate_pfn[2];
376#endif
377#ifdef CONFIG_MEMORY_HOTPLUG
378 /* see spanned/present_pages for more description */
379 seqlock_t span_seqlock;
380#endif
381 struct free_area free_area[MAX_ORDER];
382 361
383#ifndef CONFIG_SPARSEMEM 362#ifndef CONFIG_SPARSEMEM
384 /* 363 /*
@@ -388,74 +367,14 @@ struct zone {
388 unsigned long *pageblock_flags; 367 unsigned long *pageblock_flags;
389#endif /* CONFIG_SPARSEMEM */ 368#endif /* CONFIG_SPARSEMEM */
390 369
391#ifdef CONFIG_COMPACTION 370#ifdef CONFIG_NUMA
392 /*
393 * On compaction failure, 1<<compact_defer_shift compactions
394 * are skipped before trying again. The number attempted since
395 * last failure is tracked with compact_considered.
396 */
397 unsigned int compact_considered;
398 unsigned int compact_defer_shift;
399 int compact_order_failed;
400#endif
401
402 ZONE_PADDING(_pad1_)
403
404 /* Fields commonly accessed by the page reclaim scanner */
405 spinlock_t lru_lock;
406 struct lruvec lruvec;
407
408 /* Evictions & activations on the inactive file list */
409 atomic_long_t inactive_age;
410
411 unsigned long pages_scanned; /* since last reclaim */
412 unsigned long flags; /* zone flags, see below */
413
414 /* Zone statistics */
415 atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS];
416
417 /*
418 * The target ratio of ACTIVE_ANON to INACTIVE_ANON pages on
419 * this zone's LRU. Maintained by the pageout code.
420 */
421 unsigned int inactive_ratio;
422
423
424 ZONE_PADDING(_pad2_)
425 /* Rarely used or read-mostly fields */
426
427 /* 371 /*
428 * wait_table -- the array holding the hash table 372 * zone reclaim becomes active if more unmapped pages exist.
429 * wait_table_hash_nr_entries -- the size of the hash table array
430 * wait_table_bits -- wait_table_size == (1 << wait_table_bits)
431 *
432 * The purpose of all these is to keep track of the people
433 * waiting for a page to become available and make them
434 * runnable again when possible. The trouble is that this
435 * consumes a lot of space, especially when so few things
436 * wait on pages at a given time. So instead of using
437 * per-page waitqueues, we use a waitqueue hash table.
438 *
439 * The bucket discipline is to sleep on the same queue when
440 * colliding and wake all in that wait queue when removing.
441 * When something wakes, it must check to be sure its page is
442 * truly available, a la thundering herd. The cost of a
443 * collision is great, but given the expected load of the
444 * table, they should be so rare as to be outweighed by the
445 * benefits from the saved space.
446 *
447 * __wait_on_page_locked() and unlock_page() in mm/filemap.c, are the
448 * primary users of these fields, and in mm/page_alloc.c
449 * free_area_init_core() performs the initialization of them.
450 */ 373 */
451 wait_queue_head_t * wait_table; 374 unsigned long min_unmapped_pages;
452 unsigned long wait_table_hash_nr_entries; 375 unsigned long min_slab_pages;
453 unsigned long wait_table_bits; 376#endif /* CONFIG_NUMA */
454 377
455 /*
456 * Discontig memory support fields.
457 */
458 struct pglist_data *zone_pgdat;
459 /* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */ 378 /* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */
460 unsigned long zone_start_pfn; 379 unsigned long zone_start_pfn;
461 380
@@ -500,9 +419,11 @@ struct zone {
500 * adjust_managed_page_count() should be used instead of directly 419 * adjust_managed_page_count() should be used instead of directly
501 * touching zone->managed_pages and totalram_pages. 420 * touching zone->managed_pages and totalram_pages.
502 */ 421 */
422 unsigned long managed_pages;
503 unsigned long spanned_pages; 423 unsigned long spanned_pages;
504 unsigned long present_pages; 424 unsigned long present_pages;
505 unsigned long managed_pages; 425
426 const char *name;
506 427
507 /* 428 /*
508 * Number of MIGRATE_RESEVE page block. To maintain for just 429 * Number of MIGRATE_RESEVE page block. To maintain for just
@@ -510,10 +431,94 @@ struct zone {
510 */ 431 */
511 int nr_migrate_reserve_block; 432 int nr_migrate_reserve_block;
512 433
434#ifdef CONFIG_MEMORY_HOTPLUG
435 /* see spanned/present_pages for more description */
436 seqlock_t span_seqlock;
437#endif
438
513 /* 439 /*
514 * rarely used fields: 440 * wait_table -- the array holding the hash table
441 * wait_table_hash_nr_entries -- the size of the hash table array
442 * wait_table_bits -- wait_table_size == (1 << wait_table_bits)
443 *
444 * The purpose of all these is to keep track of the people
445 * waiting for a page to become available and make them
446 * runnable again when possible. The trouble is that this
447 * consumes a lot of space, especially when so few things
448 * wait on pages at a given time. So instead of using
449 * per-page waitqueues, we use a waitqueue hash table.
450 *
451 * The bucket discipline is to sleep on the same queue when
452 * colliding and wake all in that wait queue when removing.
453 * When something wakes, it must check to be sure its page is
454 * truly available, a la thundering herd. The cost of a
455 * collision is great, but given the expected load of the
456 * table, they should be so rare as to be outweighed by the
457 * benefits from the saved space.
458 *
459 * __wait_on_page_locked() and unlock_page() in mm/filemap.c, are the
460 * primary users of these fields, and in mm/page_alloc.c
461 * free_area_init_core() performs the initialization of them.
515 */ 462 */
516 const char *name; 463 wait_queue_head_t *wait_table;
464 unsigned long wait_table_hash_nr_entries;
465 unsigned long wait_table_bits;
466
467 ZONE_PADDING(_pad1_)
468
469 /* Write-intensive fields used from the page allocator */
470 spinlock_t lock;
471
472 /* free areas of different sizes */
473 struct free_area free_area[MAX_ORDER];
474
475 /* zone flags, see below */
476 unsigned long flags;
477
478 ZONE_PADDING(_pad2_)
479
480 /* Write-intensive fields used by page reclaim */
481
482 /* Fields commonly accessed by the page reclaim scanner */
483 spinlock_t lru_lock;
484 struct lruvec lruvec;
485
486 /* Evictions & activations on the inactive file list */
487 atomic_long_t inactive_age;
488
489 /*
490 * When free pages are below this point, additional steps are taken
491 * when reading the number of free pages to avoid per-cpu counter
492 * drift allowing watermarks to be breached
493 */
494 unsigned long percpu_drift_mark;
495
496#if defined CONFIG_COMPACTION || defined CONFIG_CMA
497 /* pfn where compaction free scanner should start */
498 unsigned long compact_cached_free_pfn;
499 /* pfn where async and sync compaction migration scanner should start */
500 unsigned long compact_cached_migrate_pfn[2];
501#endif
502
503#ifdef CONFIG_COMPACTION
504 /*
505 * On compaction failure, 1<<compact_defer_shift compactions
506 * are skipped before trying again. The number attempted since
507 * last failure is tracked with compact_considered.
508 */
509 unsigned int compact_considered;
510 unsigned int compact_defer_shift;
511 int compact_order_failed;
512#endif
513
514#if defined CONFIG_COMPACTION || defined CONFIG_CMA
515 /* Set to true when the PG_migrate_skip bits should be cleared */
516 bool compact_blockskip_flush;
517#endif
518
519 ZONE_PADDING(_pad3_)
520 /* Zone statistics */
521 atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS];
517} ____cacheline_internodealigned_in_smp; 522} ____cacheline_internodealigned_in_smp;
518 523
519typedef enum { 524typedef enum {
@@ -529,6 +534,7 @@ typedef enum {
529 ZONE_WRITEBACK, /* reclaim scanning has recently found 534 ZONE_WRITEBACK, /* reclaim scanning has recently found
530 * many pages under writeback 535 * many pages under writeback
531 */ 536 */
537 ZONE_FAIR_DEPLETED, /* fair zone policy batch depleted */
532} zone_flags_t; 538} zone_flags_t;
533 539
534static inline void zone_set_flag(struct zone *zone, zone_flags_t flag) 540static inline void zone_set_flag(struct zone *zone, zone_flags_t flag)
@@ -566,6 +572,11 @@ static inline int zone_is_reclaim_locked(const struct zone *zone)
566 return test_bit(ZONE_RECLAIM_LOCKED, &zone->flags); 572 return test_bit(ZONE_RECLAIM_LOCKED, &zone->flags);
567} 573}
568 574
575static inline int zone_is_fair_depleted(const struct zone *zone)
576{
577 return test_bit(ZONE_FAIR_DEPLETED, &zone->flags);
578}
579
569static inline int zone_is_oom_locked(const struct zone *zone) 580static inline int zone_is_oom_locked(const struct zone *zone)
570{ 581{
571 return test_bit(ZONE_OOM_LOCKED, &zone->flags); 582 return test_bit(ZONE_OOM_LOCKED, &zone->flags);
@@ -872,6 +883,8 @@ static inline int zone_movable_is_highmem(void)
872{ 883{
873#if defined(CONFIG_HIGHMEM) && defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP) 884#if defined(CONFIG_HIGHMEM) && defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP)
874 return movable_zone == ZONE_HIGHMEM; 885 return movable_zone == ZONE_HIGHMEM;
886#elif defined(CONFIG_HIGHMEM)
887 return (ZONE_MOVABLE - 1) == ZONE_HIGHMEM;
875#else 888#else
876 return 0; 889 return 0;
877#endif 890#endif
diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h
index 58b9a02c38d2..83a6aeda899d 100644
--- a/include/linux/nodemask.h
+++ b/include/linux/nodemask.h
@@ -430,7 +430,15 @@ static inline int num_node_state(enum node_states state)
430 for_each_node_mask((__node), node_states[__state]) 430 for_each_node_mask((__node), node_states[__state])
431 431
432#define first_online_node first_node(node_states[N_ONLINE]) 432#define first_online_node first_node(node_states[N_ONLINE])
433#define next_online_node(nid) next_node((nid), node_states[N_ONLINE]) 433#define first_memory_node first_node(node_states[N_MEMORY])
434static inline int next_online_node(int nid)
435{
436 return next_node(nid, node_states[N_ONLINE]);
437}
438static inline int next_memory_node(int nid)
439{
440 return next_node(nid, node_states[N_MEMORY]);
441}
434 442
435extern int nr_node_ids; 443extern int nr_node_ids;
436extern int nr_online_nodes; 444extern int nr_online_nodes;
@@ -471,6 +479,7 @@ static inline int num_node_state(enum node_states state)
471 for ( (node) = 0; (node) == 0; (node) = 1) 479 for ( (node) = 0; (node) == 0; (node) = 1)
472 480
473#define first_online_node 0 481#define first_online_node 0
482#define first_memory_node 0
474#define next_online_node(nid) (MAX_NUMNODES) 483#define next_online_node(nid) (MAX_NUMNODES)
475#define nr_node_ids 1 484#define nr_node_ids 1
476#define nr_online_nodes 1 485#define nr_online_nodes 1
diff --git a/include/linux/oom.h b/include/linux/oom.h
index 4cd62677feb9..647395a1a550 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -55,8 +55,8 @@ extern void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
55 struct mem_cgroup *memcg, nodemask_t *nodemask, 55 struct mem_cgroup *memcg, nodemask_t *nodemask,
56 const char *message); 56 const char *message);
57 57
58extern int try_set_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_flags); 58extern bool oom_zonelist_trylock(struct zonelist *zonelist, gfp_t gfp_flags);
59extern void clear_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_flags); 59extern void oom_zonelist_unlock(struct zonelist *zonelist, gfp_t gfp_flags);
60 60
61extern void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask, 61extern void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask,
62 int order, const nodemask_t *nodemask); 62 int order, const nodemask_t *nodemask);
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 8304959ad336..e1f5fcd79792 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -171,13 +171,12 @@ static inline int __TestClearPage##uname(struct page *page) \
171#define __PAGEFLAG(uname, lname) TESTPAGEFLAG(uname, lname) \ 171#define __PAGEFLAG(uname, lname) TESTPAGEFLAG(uname, lname) \
172 __SETPAGEFLAG(uname, lname) __CLEARPAGEFLAG(uname, lname) 172 __SETPAGEFLAG(uname, lname) __CLEARPAGEFLAG(uname, lname)
173 173
174#define PAGEFLAG_FALSE(uname) \
175static inline int Page##uname(const struct page *page) \
176 { return 0; }
177
178#define TESTSCFLAG(uname, lname) \ 174#define TESTSCFLAG(uname, lname) \
179 TESTSETFLAG(uname, lname) TESTCLEARFLAG(uname, lname) 175 TESTSETFLAG(uname, lname) TESTCLEARFLAG(uname, lname)
180 176
177#define TESTPAGEFLAG_FALSE(uname) \
178static inline int Page##uname(const struct page *page) { return 0; }
179
181#define SETPAGEFLAG_NOOP(uname) \ 180#define SETPAGEFLAG_NOOP(uname) \
182static inline void SetPage##uname(struct page *page) { } 181static inline void SetPage##uname(struct page *page) { }
183 182
@@ -187,12 +186,21 @@ static inline void ClearPage##uname(struct page *page) { }
187#define __CLEARPAGEFLAG_NOOP(uname) \ 186#define __CLEARPAGEFLAG_NOOP(uname) \
188static inline void __ClearPage##uname(struct page *page) { } 187static inline void __ClearPage##uname(struct page *page) { }
189 188
189#define TESTSETFLAG_FALSE(uname) \
190static inline int TestSetPage##uname(struct page *page) { return 0; }
191
190#define TESTCLEARFLAG_FALSE(uname) \ 192#define TESTCLEARFLAG_FALSE(uname) \
191static inline int TestClearPage##uname(struct page *page) { return 0; } 193static inline int TestClearPage##uname(struct page *page) { return 0; }
192 194
193#define __TESTCLEARFLAG_FALSE(uname) \ 195#define __TESTCLEARFLAG_FALSE(uname) \
194static inline int __TestClearPage##uname(struct page *page) { return 0; } 196static inline int __TestClearPage##uname(struct page *page) { return 0; }
195 197
198#define PAGEFLAG_FALSE(uname) TESTPAGEFLAG_FALSE(uname) \
199 SETPAGEFLAG_NOOP(uname) CLEARPAGEFLAG_NOOP(uname)
200
201#define TESTSCFLAG_FALSE(uname) \
202 TESTSETFLAG_FALSE(uname) TESTCLEARFLAG_FALSE(uname)
203
196struct page; /* forward declaration */ 204struct page; /* forward declaration */
197 205
198TESTPAGEFLAG(Locked, locked) 206TESTPAGEFLAG(Locked, locked)
@@ -248,7 +256,6 @@ PAGEFLAG_FALSE(HighMem)
248PAGEFLAG(SwapCache, swapcache) 256PAGEFLAG(SwapCache, swapcache)
249#else 257#else
250PAGEFLAG_FALSE(SwapCache) 258PAGEFLAG_FALSE(SwapCache)
251 SETPAGEFLAG_NOOP(SwapCache) CLEARPAGEFLAG_NOOP(SwapCache)
252#endif 259#endif
253 260
254PAGEFLAG(Unevictable, unevictable) __CLEARPAGEFLAG(Unevictable, unevictable) 261PAGEFLAG(Unevictable, unevictable) __CLEARPAGEFLAG(Unevictable, unevictable)
@@ -258,8 +265,8 @@ PAGEFLAG(Unevictable, unevictable) __CLEARPAGEFLAG(Unevictable, unevictable)
258PAGEFLAG(Mlocked, mlocked) __CLEARPAGEFLAG(Mlocked, mlocked) 265PAGEFLAG(Mlocked, mlocked) __CLEARPAGEFLAG(Mlocked, mlocked)
259 TESTSCFLAG(Mlocked, mlocked) __TESTCLEARFLAG(Mlocked, mlocked) 266 TESTSCFLAG(Mlocked, mlocked) __TESTCLEARFLAG(Mlocked, mlocked)
260#else 267#else
261PAGEFLAG_FALSE(Mlocked) SETPAGEFLAG_NOOP(Mlocked) 268PAGEFLAG_FALSE(Mlocked) __CLEARPAGEFLAG_NOOP(Mlocked)
262 TESTCLEARFLAG_FALSE(Mlocked) __TESTCLEARFLAG_FALSE(Mlocked) 269 TESTSCFLAG_FALSE(Mlocked) __TESTCLEARFLAG_FALSE(Mlocked)
263#endif 270#endif
264 271
265#ifdef CONFIG_ARCH_USES_PG_UNCACHED 272#ifdef CONFIG_ARCH_USES_PG_UNCACHED
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index e1474ae18c88..3df8c7db7a4e 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -484,6 +484,9 @@ static inline int lock_page_killable(struct page *page)
484/* 484/*
485 * lock_page_or_retry - Lock the page, unless this would block and the 485 * lock_page_or_retry - Lock the page, unless this would block and the
486 * caller indicated that it can handle a retry. 486 * caller indicated that it can handle a retry.
487 *
488 * Return value and mmap_sem implications depend on flags; see
489 * __lock_page_or_retry().
487 */ 490 */
488static inline int lock_page_or_retry(struct page *page, struct mm_struct *mm, 491static inline int lock_page_or_retry(struct page *page, struct mm_struct *mm,
489 unsigned int flags) 492 unsigned int flags)
diff --git a/include/linux/printk.h b/include/linux/printk.h
index 319ff7e53efb..0990997a5304 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -31,7 +31,7 @@ static inline const char *printk_skip_level(const char *buffer)
31} 31}
32 32
33/* printk's without a loglevel use this.. */ 33/* printk's without a loglevel use this.. */
34#define DEFAULT_MESSAGE_LOGLEVEL CONFIG_DEFAULT_MESSAGE_LOGLEVEL 34#define MESSAGE_LOGLEVEL_DEFAULT CONFIG_MESSAGE_LOGLEVEL_DEFAULT
35 35
36/* We show everything that is MORE important than this.. */ 36/* We show everything that is MORE important than this.. */
37#define CONSOLE_LOGLEVEL_SILENT 0 /* Mum's the word */ 37#define CONSOLE_LOGLEVEL_SILENT 0 /* Mum's the word */
diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index 8183b46fbaa2..372ad5e0dcb8 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -432,9 +432,9 @@ static inline void hlist_add_before_rcu(struct hlist_node *n,
432} 432}
433 433
434/** 434/**
435 * hlist_add_after_rcu 435 * hlist_add_behind_rcu
436 * @prev: the existing element to add the new element after.
437 * @n: the new element to add to the hash list. 436 * @n: the new element to add to the hash list.
437 * @prev: the existing element to add the new element after.
438 * 438 *
439 * Description: 439 * Description:
440 * Adds the specified element to the specified hlist 440 * Adds the specified element to the specified hlist
@@ -449,8 +449,8 @@ static inline void hlist_add_before_rcu(struct hlist_node *n,
449 * hlist_for_each_entry_rcu(), used to prevent memory-consistency 449 * hlist_for_each_entry_rcu(), used to prevent memory-consistency
450 * problems on Alpha CPUs. 450 * problems on Alpha CPUs.
451 */ 451 */
452static inline void hlist_add_after_rcu(struct hlist_node *prev, 452static inline void hlist_add_behind_rcu(struct hlist_node *n,
453 struct hlist_node *n) 453 struct hlist_node *prev)
454{ 454{
455 n->next = prev->next; 455 n->next = prev->next;
456 n->pprev = &prev->next; 456 n->pprev = &prev->next;
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 4bdbee80eede..1eb64043c076 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -311,7 +311,6 @@ extern void lru_add_page_tail(struct page *page, struct page *page_tail,
311 struct lruvec *lruvec, struct list_head *head); 311 struct lruvec *lruvec, struct list_head *head);
312extern void activate_page(struct page *); 312extern void activate_page(struct page *);
313extern void mark_page_accessed(struct page *); 313extern void mark_page_accessed(struct page *);
314extern void init_page_accessed(struct page *page);
315extern void lru_add_drain(void); 314extern void lru_add_drain(void);
316extern void lru_add_drain_cpu(int cpu); 315extern void lru_add_drain_cpu(int cpu);
317extern void lru_add_drain_all(void); 316extern void lru_add_drain_all(void);
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 4b8a89189a29..b87696fdf06a 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -113,7 +113,7 @@ extern struct vm_struct *remove_vm_area(const void *addr);
113extern struct vm_struct *find_vm_area(const void *addr); 113extern struct vm_struct *find_vm_area(const void *addr);
114 114
115extern int map_vm_area(struct vm_struct *area, pgprot_t prot, 115extern int map_vm_area(struct vm_struct *area, pgprot_t prot,
116 struct page ***pages); 116 struct page **pages);
117#ifdef CONFIG_MMU 117#ifdef CONFIG_MMU
118extern int map_kernel_range_noflush(unsigned long start, unsigned long size, 118extern int map_kernel_range_noflush(unsigned long start, unsigned long size,
119 pgprot_t prot, struct page **pages); 119 pgprot_t prot, struct page **pages);
diff --git a/include/linux/zbud.h b/include/linux/zbud.h
index 13af0d450bf6..f9d41a6e361f 100644
--- a/include/linux/zbud.h
+++ b/include/linux/zbud.h
@@ -11,7 +11,7 @@ struct zbud_ops {
11 11
12struct zbud_pool *zbud_create_pool(gfp_t gfp, struct zbud_ops *ops); 12struct zbud_pool *zbud_create_pool(gfp_t gfp, struct zbud_ops *ops);
13void zbud_destroy_pool(struct zbud_pool *pool); 13void zbud_destroy_pool(struct zbud_pool *pool);
14int zbud_alloc(struct zbud_pool *pool, unsigned int size, gfp_t gfp, 14int zbud_alloc(struct zbud_pool *pool, size_t size, gfp_t gfp,
15 unsigned long *handle); 15 unsigned long *handle);
16void zbud_free(struct zbud_pool *pool, unsigned long handle); 16void zbud_free(struct zbud_pool *pool, unsigned long handle);
17int zbud_reclaim_page(struct zbud_pool *pool, unsigned int retries); 17int zbud_reclaim_page(struct zbud_pool *pool, unsigned int retries);
diff --git a/include/linux/zlib.h b/include/linux/zlib.h
index 9c5a6b4de0a3..197abb2a54c5 100644
--- a/include/linux/zlib.h
+++ b/include/linux/zlib.h
@@ -493,64 +493,6 @@ extern int deflateInit2 (z_streamp strm,
493 method). msg is set to null if there is no error message. deflateInit2 does 493 method). msg is set to null if there is no error message. deflateInit2 does
494 not perform any compression: this will be done by deflate(). 494 not perform any compression: this will be done by deflate().
495*/ 495*/
496
497#if 0
498extern int zlib_deflateSetDictionary (z_streamp strm,
499 const Byte *dictionary,
500 uInt dictLength);
501#endif
502/*
503 Initializes the compression dictionary from the given byte sequence
504 without producing any compressed output. This function must be called
505 immediately after deflateInit, deflateInit2 or deflateReset, before any
506 call of deflate. The compressor and decompressor must use exactly the same
507 dictionary (see inflateSetDictionary).
508
509 The dictionary should consist of strings (byte sequences) that are likely
510 to be encountered later in the data to be compressed, with the most commonly
511 used strings preferably put towards the end of the dictionary. Using a
512 dictionary is most useful when the data to be compressed is short and can be
513 predicted with good accuracy; the data can then be compressed better than
514 with the default empty dictionary.
515
516 Depending on the size of the compression data structures selected by
517 deflateInit or deflateInit2, a part of the dictionary may in effect be
518 discarded, for example if the dictionary is larger than the window size in
519 deflate or deflate2. Thus the strings most likely to be useful should be
520 put at the end of the dictionary, not at the front.
521
522 Upon return of this function, strm->adler is set to the Adler32 value
523 of the dictionary; the decompressor may later use this value to determine
524 which dictionary has been used by the compressor. (The Adler32 value
525 applies to the whole dictionary even if only a subset of the dictionary is
526 actually used by the compressor.)
527
528 deflateSetDictionary returns Z_OK if success, or Z_STREAM_ERROR if a
529 parameter is invalid (such as NULL dictionary) or the stream state is
530 inconsistent (for example if deflate has already been called for this stream
531 or if the compression method is bsort). deflateSetDictionary does not
532 perform any compression: this will be done by deflate().
533*/
534
535#if 0
536extern int zlib_deflateCopy (z_streamp dest, z_streamp source);
537#endif
538
539/*
540 Sets the destination stream as a complete copy of the source stream.
541
542 This function can be useful when several compression strategies will be
543 tried, for example when there are several ways of pre-processing the input
544 data with a filter. The streams that will be discarded should then be freed
545 by calling deflateEnd. Note that deflateCopy duplicates the internal
546 compression state which can be quite large, so this strategy is slow and
547 can consume lots of memory.
548
549 deflateCopy returns Z_OK if success, Z_MEM_ERROR if there was not
550 enough memory, Z_STREAM_ERROR if the source stream state was inconsistent
551 (such as zalloc being NULL). msg is left unchanged in both source and
552 destination.
553*/
554 496
555extern int zlib_deflateReset (z_streamp strm); 497extern int zlib_deflateReset (z_streamp strm);
556/* 498/*
@@ -568,27 +510,6 @@ static inline unsigned long deflateBound(unsigned long s)
568 return s + ((s + 7) >> 3) + ((s + 63) >> 6) + 11; 510 return s + ((s + 7) >> 3) + ((s + 63) >> 6) + 11;
569} 511}
570 512
571#if 0
572extern int zlib_deflateParams (z_streamp strm, int level, int strategy);
573#endif
574/*
575 Dynamically update the compression level and compression strategy. The
576 interpretation of level and strategy is as in deflateInit2. This can be
577 used to switch between compression and straight copy of the input data, or
578 to switch to a different kind of input data requiring a different
579 strategy. If the compression level is changed, the input available so far
580 is compressed with the old level (and may be flushed); the new level will
581 take effect only at the next call of deflate().
582
583 Before the call of deflateParams, the stream state must be set as for
584 a call of deflate(), since the currently available input may have to
585 be compressed and flushed. In particular, strm->avail_out must be non-zero.
586
587 deflateParams returns Z_OK if success, Z_STREAM_ERROR if the source
588 stream state was inconsistent or if a parameter was invalid, Z_BUF_ERROR
589 if strm->avail_out was zero.
590*/
591
592/* 513/*
593extern int inflateInit2 (z_streamp strm, int windowBits); 514extern int inflateInit2 (z_streamp strm, int windowBits);
594 515
@@ -631,45 +552,6 @@ extern int inflateInit2 (z_streamp strm, int windowBits);
631 and avail_out are unchanged.) 552 and avail_out are unchanged.)
632*/ 553*/
633 554
634extern int zlib_inflateSetDictionary (z_streamp strm,
635 const Byte *dictionary,
636 uInt dictLength);
637/*
638 Initializes the decompression dictionary from the given uncompressed byte
639 sequence. This function must be called immediately after a call of inflate,
640 if that call returned Z_NEED_DICT. The dictionary chosen by the compressor
641 can be determined from the adler32 value returned by that call of inflate.
642 The compressor and decompressor must use exactly the same dictionary (see
643 deflateSetDictionary). For raw inflate, this function can be called
644 immediately after inflateInit2() or inflateReset() and before any call of
645 inflate() to set the dictionary. The application must insure that the
646 dictionary that was used for compression is provided.
647
648 inflateSetDictionary returns Z_OK if success, Z_STREAM_ERROR if a
649 parameter is invalid (such as NULL dictionary) or the stream state is
650 inconsistent, Z_DATA_ERROR if the given dictionary doesn't match the
651 expected one (incorrect adler32 value). inflateSetDictionary does not
652 perform any decompression: this will be done by subsequent calls of
653 inflate().
654*/
655
656#if 0
657extern int zlib_inflateSync (z_streamp strm);
658#endif
659/*
660 Skips invalid compressed data until a full flush point (see above the
661 description of deflate with Z_FULL_FLUSH) can be found, or until all
662 available input is skipped. No output is provided.
663
664 inflateSync returns Z_OK if a full flush point has been found, Z_BUF_ERROR
665 if no more input was provided, Z_DATA_ERROR if no flush point has been found,
666 or Z_STREAM_ERROR if the stream structure was inconsistent. In the success
667 case, the application may save the current current value of total_in which
668 indicates where valid compressed data was found. In the error case, the
669 application may repeatedly call inflateSync, providing more input each time,
670 until success or end of the input data.
671*/
672
673extern int zlib_inflateReset (z_streamp strm); 555extern int zlib_inflateReset (z_streamp strm);
674/* 556/*
675 This function is equivalent to inflateEnd followed by inflateInit, 557 This function is equivalent to inflateEnd followed by inflateInit,
diff --git a/include/linux/zpool.h b/include/linux/zpool.h
new file mode 100644
index 000000000000..f14bd75f08b3
--- /dev/null
+++ b/include/linux/zpool.h
@@ -0,0 +1,106 @@
1/*
2 * zpool memory storage api
3 *
4 * Copyright (C) 2014 Dan Streetman
5 *
6 * This is a common frontend for the zbud and zsmalloc memory
7 * storage pool implementations. Typically, this is used to
8 * store compressed memory.
9 */
10
11#ifndef _ZPOOL_H_
12#define _ZPOOL_H_
13
14struct zpool;
15
16struct zpool_ops {
17 int (*evict)(struct zpool *pool, unsigned long handle);
18};
19
20/*
21 * Control how a handle is mapped. It will be ignored if the
22 * implementation does not support it. Its use is optional.
23 * Note that this does not refer to memory protection, it
24 * refers to how the memory will be copied in/out if copying
25 * is necessary during mapping; read-write is the safest as
26 * it copies the existing memory in on map, and copies the
27 * changed memory back out on unmap. Write-only does not copy
28 * in the memory and should only be used for initialization.
29 * If in doubt, use ZPOOL_MM_DEFAULT which is read-write.
30 */
31enum zpool_mapmode {
32 ZPOOL_MM_RW, /* normal read-write mapping */
33 ZPOOL_MM_RO, /* read-only (no copy-out at unmap time) */
34 ZPOOL_MM_WO, /* write-only (no copy-in at map time) */
35
36 ZPOOL_MM_DEFAULT = ZPOOL_MM_RW
37};
38
39struct zpool *zpool_create_pool(char *type, gfp_t gfp, struct zpool_ops *ops);
40
41char *zpool_get_type(struct zpool *pool);
42
43void zpool_destroy_pool(struct zpool *pool);
44
45int zpool_malloc(struct zpool *pool, size_t size, gfp_t gfp,
46 unsigned long *handle);
47
48void zpool_free(struct zpool *pool, unsigned long handle);
49
50int zpool_shrink(struct zpool *pool, unsigned int pages,
51 unsigned int *reclaimed);
52
53void *zpool_map_handle(struct zpool *pool, unsigned long handle,
54 enum zpool_mapmode mm);
55
56void zpool_unmap_handle(struct zpool *pool, unsigned long handle);
57
58u64 zpool_get_total_size(struct zpool *pool);
59
60
61/**
62 * struct zpool_driver - driver implementation for zpool
63 * @type: name of the driver.
64 * @list: entry in the list of zpool drivers.
65 * @create: create a new pool.
66 * @destroy: destroy a pool.
67 * @malloc: allocate mem from a pool.
68 * @free: free mem from a pool.
69 * @shrink: shrink the pool.
70 * @map: map a handle.
71 * @unmap: unmap a handle.
72 * @total_size: get total size of a pool.
73 *
74 * This is created by a zpool implementation and registered
75 * with zpool.
76 */
77struct zpool_driver {
78 char *type;
79 struct module *owner;
80 atomic_t refcount;
81 struct list_head list;
82
83 void *(*create)(gfp_t gfp, struct zpool_ops *ops);
84 void (*destroy)(void *pool);
85
86 int (*malloc)(void *pool, size_t size, gfp_t gfp,
87 unsigned long *handle);
88 void (*free)(void *pool, unsigned long handle);
89
90 int (*shrink)(void *pool, unsigned int pages,
91 unsigned int *reclaimed);
92
93 void *(*map)(void *pool, unsigned long handle,
94 enum zpool_mapmode mm);
95 void (*unmap)(void *pool, unsigned long handle);
96
97 u64 (*total_size)(void *pool);
98};
99
100void zpool_register_driver(struct zpool_driver *driver);
101
102int zpool_unregister_driver(struct zpool_driver *driver);
103
104int zpool_evict(void *pool, unsigned long handle);
105
106#endif
diff --git a/include/trace/events/migrate.h b/include/trace/events/migrate.h
index 4e4f2f8b1ac2..dd2b5467d905 100644
--- a/include/trace/events/migrate.h
+++ b/include/trace/events/migrate.h
@@ -17,6 +17,7 @@
17 {MR_MEMORY_HOTPLUG, "memory_hotplug"}, \ 17 {MR_MEMORY_HOTPLUG, "memory_hotplug"}, \
18 {MR_SYSCALL, "syscall_or_cpuset"}, \ 18 {MR_SYSCALL, "syscall_or_cpuset"}, \
19 {MR_MEMPOLICY_MBIND, "mempolicy_mbind"}, \ 19 {MR_MEMPOLICY_MBIND, "mempolicy_mbind"}, \
20 {MR_NUMA_MISPLACED, "numa_misplaced"}, \
20 {MR_CMA, "cma"} 21 {MR_CMA, "cma"}
21 22
22TRACE_EVENT(mm_migrate_pages, 23TRACE_EVENT(mm_migrate_pages,
diff --git a/include/trace/events/pagemap.h b/include/trace/events/pagemap.h
index 1c9fabde69e4..ce0803b8d05f 100644
--- a/include/trace/events/pagemap.h
+++ b/include/trace/events/pagemap.h
@@ -28,12 +28,10 @@ TRACE_EVENT(mm_lru_insertion,
28 28
29 TP_PROTO( 29 TP_PROTO(
30 struct page *page, 30 struct page *page,
31 unsigned long pfn, 31 int lru
32 int lru,
33 unsigned long flags
34 ), 32 ),
35 33
36 TP_ARGS(page, pfn, lru, flags), 34 TP_ARGS(page, lru),
37 35
38 TP_STRUCT__entry( 36 TP_STRUCT__entry(
39 __field(struct page *, page ) 37 __field(struct page *, page )
@@ -44,9 +42,9 @@ TRACE_EVENT(mm_lru_insertion,
44 42
45 TP_fast_assign( 43 TP_fast_assign(
46 __entry->page = page; 44 __entry->page = page;
47 __entry->pfn = pfn; 45 __entry->pfn = page_to_pfn(page);
48 __entry->lru = lru; 46 __entry->lru = lru;
49 __entry->flags = flags; 47 __entry->flags = trace_pagemap_flags(page);
50 ), 48 ),
51 49
52 /* Flag format is based on page-types.c formatting for pagemap */ 50 /* Flag format is based on page-types.c formatting for pagemap */
@@ -64,9 +62,9 @@ TRACE_EVENT(mm_lru_insertion,
64 62
65TRACE_EVENT(mm_lru_activate, 63TRACE_EVENT(mm_lru_activate,
66 64
67 TP_PROTO(struct page *page, unsigned long pfn), 65 TP_PROTO(struct page *page),
68 66
69 TP_ARGS(page, pfn), 67 TP_ARGS(page),
70 68
71 TP_STRUCT__entry( 69 TP_STRUCT__entry(
72 __field(struct page *, page ) 70 __field(struct page *, page )
@@ -75,7 +73,7 @@ TRACE_EVENT(mm_lru_activate,
75 73
76 TP_fast_assign( 74 TP_fast_assign(
77 __entry->page = page; 75 __entry->page = page;
78 __entry->pfn = pfn; 76 __entry->pfn = page_to_pfn(page);
79 ), 77 ),
80 78
81 /* Flag format is based on page-types.c formatting for pagemap */ 79 /* Flag format is based on page-types.c formatting for pagemap */
diff --git a/init/Kconfig b/init/Kconfig
index 41066e49e880..a291b7ef4738 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -807,15 +807,53 @@ config LOG_BUF_SHIFT
807 range 12 21 807 range 12 21
808 default 17 808 default 17
809 help 809 help
810 Select kernel log buffer size as a power of 2. 810 Select the minimal kernel log buffer size as a power of 2.
811 The final size is affected by LOG_CPU_MAX_BUF_SHIFT config
812 parameter, see below. Any higher size also might be forced
813 by "log_buf_len" boot parameter.
814
811 Examples: 815 Examples:
812 17 => 128 KB 816 17 => 128 KB
813 16 => 64 KB 817 16 => 64 KB
814 15 => 32 KB 818 15 => 32 KB
815 14 => 16 KB 819 14 => 16 KB
816 13 => 8 KB 820 13 => 8 KB
817 12 => 4 KB 821 12 => 4 KB
818 822
823config LOG_CPU_MAX_BUF_SHIFT
824 int "CPU kernel log buffer size contribution (13 => 8 KB, 17 => 128KB)"
825 range 0 21
826 default 12 if !BASE_SMALL
827 default 0 if BASE_SMALL
828 help
829 This option allows to increase the default ring buffer size
830 according to the number of CPUs. The value defines the contribution
831 of each CPU as a power of 2. The used space is typically only few
832 lines however it might be much more when problems are reported,
833 e.g. backtraces.
834
835 The increased size means that a new buffer has to be allocated and
836 the original static one is unused. It makes sense only on systems
837 with more CPUs. Therefore this value is used only when the sum of
838 contributions is greater than the half of the default kernel ring
839 buffer as defined by LOG_BUF_SHIFT. The default values are set
840 so that more than 64 CPUs are needed to trigger the allocation.
841
842 Also this option is ignored when "log_buf_len" kernel parameter is
843 used as it forces an exact (power of two) size of the ring buffer.
844
845 The number of possible CPUs is used for this computation ignoring
846 hotplugging making the compuation optimal for the the worst case
847 scenerio while allowing a simple algorithm to be used from bootup.
848
849 Examples shift values and their meaning:
850 17 => 128 KB for each CPU
851 16 => 64 KB for each CPU
852 15 => 32 KB for each CPU
853 14 => 16 KB for each CPU
854 13 => 8 KB for each CPU
855 12 => 4 KB for each CPU
856
819# 857#
820# Architectures with an unreliable sched_clock() should select this: 858# Architectures with an unreliable sched_clock() should select this:
821# 859#
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index 8e9bc9c3dbb7..c447cd9848d1 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -106,7 +106,7 @@ static inline struct audit_entry *audit_init_entry(u32 field_count)
106 if (unlikely(!entry)) 106 if (unlikely(!entry))
107 return NULL; 107 return NULL;
108 108
109 fields = kzalloc(sizeof(*fields) * field_count, GFP_KERNEL); 109 fields = kcalloc(field_count, sizeof(*fields), GFP_KERNEL);
110 if (unlikely(!fields)) { 110 if (unlikely(!fields)) {
111 kfree(entry); 111 kfree(entry);
112 return NULL; 112 return NULL;
@@ -160,7 +160,7 @@ static __u32 *classes[AUDIT_SYSCALL_CLASSES];
160 160
161int __init audit_register_class(int class, unsigned *list) 161int __init audit_register_class(int class, unsigned *list)
162{ 162{
163 __u32 *p = kzalloc(AUDIT_BITMASK_SIZE * sizeof(__u32), GFP_KERNEL); 163 __u32 *p = kcalloc(AUDIT_BITMASK_SIZE, sizeof(__u32), GFP_KERNEL);
164 if (!p) 164 if (!p)
165 return -ENOMEM; 165 return -ENOMEM;
166 while (*list != ~0U) { 166 while (*list != ~0U) {
diff --git a/kernel/exit.c b/kernel/exit.c
index e5c4668f1799..88c6b3e42583 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -455,6 +455,7 @@ static void exit_mm(struct task_struct * tsk)
455 task_unlock(tsk); 455 task_unlock(tsk);
456 mm_update_next_owner(mm); 456 mm_update_next_owner(mm);
457 mmput(mm); 457 mmput(mm);
458 clear_thread_flag(TIF_MEMDIE);
458} 459}
459 460
460/* 461/*
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 13e839dbca07..de1a6bb6861d 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -45,6 +45,7 @@
45#include <linux/poll.h> 45#include <linux/poll.h>
46#include <linux/irq_work.h> 46#include <linux/irq_work.h>
47#include <linux/utsname.h> 47#include <linux/utsname.h>
48#include <linux/ctype.h>
48 49
49#include <asm/uaccess.h> 50#include <asm/uaccess.h>
50 51
@@ -56,7 +57,7 @@
56 57
57int console_printk[4] = { 58int console_printk[4] = {
58 CONSOLE_LOGLEVEL_DEFAULT, /* console_loglevel */ 59 CONSOLE_LOGLEVEL_DEFAULT, /* console_loglevel */
59 DEFAULT_MESSAGE_LOGLEVEL, /* default_message_loglevel */ 60 MESSAGE_LOGLEVEL_DEFAULT, /* default_message_loglevel */
60 CONSOLE_LOGLEVEL_MIN, /* minimum_console_loglevel */ 61 CONSOLE_LOGLEVEL_MIN, /* minimum_console_loglevel */
61 CONSOLE_LOGLEVEL_DEFAULT, /* default_console_loglevel */ 62 CONSOLE_LOGLEVEL_DEFAULT, /* default_console_loglevel */
62}; 63};
@@ -113,9 +114,9 @@ static int __down_trylock_console_sem(unsigned long ip)
113 * This is used for debugging the mess that is the VT code by 114 * This is used for debugging the mess that is the VT code by
114 * keeping track if we have the console semaphore held. It's 115 * keeping track if we have the console semaphore held. It's
115 * definitely not the perfect debug tool (we don't know if _WE_ 116 * definitely not the perfect debug tool (we don't know if _WE_
116 * hold it are racing, but it helps tracking those weird code 117 * hold it and are racing, but it helps tracking those weird code
117 * path in the console code where we end up in places I want 118 * paths in the console code where we end up in places I want
118 * locked without the console sempahore held 119 * locked without the console sempahore held).
119 */ 120 */
120static int console_locked, console_suspended; 121static int console_locked, console_suspended;
121 122
@@ -146,8 +147,8 @@ static int console_may_schedule;
146 * the overall length of the record. 147 * the overall length of the record.
147 * 148 *
148 * The heads to the first and last entry in the buffer, as well as the 149 * The heads to the first and last entry in the buffer, as well as the
149 * sequence numbers of these both entries are maintained when messages 150 * sequence numbers of these entries are maintained when messages are
150 * are stored.. 151 * stored.
151 * 152 *
152 * If the heads indicate available messages, the length in the header 153 * If the heads indicate available messages, the length in the header
153 * tells the start next message. A length == 0 for the next message 154 * tells the start next message. A length == 0 for the next message
@@ -257,7 +258,7 @@ static u64 clear_seq;
257static u32 clear_idx; 258static u32 clear_idx;
258 259
259#define PREFIX_MAX 32 260#define PREFIX_MAX 32
260#define LOG_LINE_MAX 1024 - PREFIX_MAX 261#define LOG_LINE_MAX (1024 - PREFIX_MAX)
261 262
262/* record buffer */ 263/* record buffer */
263#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) 264#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
@@ -266,6 +267,7 @@ static u32 clear_idx;
266#define LOG_ALIGN __alignof__(struct printk_log) 267#define LOG_ALIGN __alignof__(struct printk_log)
267#endif 268#endif
268#define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT) 269#define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT)
270#define __LOG_CPU_MAX_BUF_LEN (1 << CONFIG_LOG_CPU_MAX_BUF_SHIFT)
269static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN); 271static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN);
270static char *log_buf = __log_buf; 272static char *log_buf = __log_buf;
271static u32 log_buf_len = __LOG_BUF_LEN; 273static u32 log_buf_len = __LOG_BUF_LEN;
@@ -344,7 +346,7 @@ static int log_make_free_space(u32 msg_size)
344 while (log_first_seq < log_next_seq) { 346 while (log_first_seq < log_next_seq) {
345 if (logbuf_has_space(msg_size, false)) 347 if (logbuf_has_space(msg_size, false))
346 return 0; 348 return 0;
347 /* drop old messages until we have enough continuous space */ 349 /* drop old messages until we have enough contiguous space */
348 log_first_idx = log_next(log_first_idx); 350 log_first_idx = log_next(log_first_idx);
349 log_first_seq++; 351 log_first_seq++;
350 } 352 }
@@ -453,11 +455,7 @@ static int log_store(int facility, int level,
453 return msg->text_len; 455 return msg->text_len;
454} 456}
455 457
456#ifdef CONFIG_SECURITY_DMESG_RESTRICT 458int dmesg_restrict = IS_ENABLED(CONFIG_SECURITY_DMESG_RESTRICT);
457int dmesg_restrict = 1;
458#else
459int dmesg_restrict;
460#endif
461 459
462static int syslog_action_restricted(int type) 460static int syslog_action_restricted(int type)
463{ 461{
@@ -828,34 +826,74 @@ void log_buf_kexec_setup(void)
828/* requested log_buf_len from kernel cmdline */ 826/* requested log_buf_len from kernel cmdline */
829static unsigned long __initdata new_log_buf_len; 827static unsigned long __initdata new_log_buf_len;
830 828
831/* save requested log_buf_len since it's too early to process it */ 829/* we practice scaling the ring buffer by powers of 2 */
832static int __init log_buf_len_setup(char *str) 830static void __init log_buf_len_update(unsigned size)
833{ 831{
834 unsigned size = memparse(str, &str);
835
836 if (size) 832 if (size)
837 size = roundup_pow_of_two(size); 833 size = roundup_pow_of_two(size);
838 if (size > log_buf_len) 834 if (size > log_buf_len)
839 new_log_buf_len = size; 835 new_log_buf_len = size;
836}
837
838/* save requested log_buf_len since it's too early to process it */
839static int __init log_buf_len_setup(char *str)
840{
841 unsigned size = memparse(str, &str);
842
843 log_buf_len_update(size);
840 844
841 return 0; 845 return 0;
842} 846}
843early_param("log_buf_len", log_buf_len_setup); 847early_param("log_buf_len", log_buf_len_setup);
844 848
849static void __init log_buf_add_cpu(void)
850{
851 unsigned int cpu_extra;
852
853 /*
854 * archs should set up cpu_possible_bits properly with
855 * set_cpu_possible() after setup_arch() but just in
856 * case lets ensure this is valid.
857 */
858 if (num_possible_cpus() == 1)
859 return;
860
861 cpu_extra = (num_possible_cpus() - 1) * __LOG_CPU_MAX_BUF_LEN;
862
863 /* by default this will only continue through for large > 64 CPUs */
864 if (cpu_extra <= __LOG_BUF_LEN / 2)
865 return;
866
867 pr_info("log_buf_len individual max cpu contribution: %d bytes\n",
868 __LOG_CPU_MAX_BUF_LEN);
869 pr_info("log_buf_len total cpu_extra contributions: %d bytes\n",
870 cpu_extra);
871 pr_info("log_buf_len min size: %d bytes\n", __LOG_BUF_LEN);
872
873 log_buf_len_update(cpu_extra + __LOG_BUF_LEN);
874}
875
845void __init setup_log_buf(int early) 876void __init setup_log_buf(int early)
846{ 877{
847 unsigned long flags; 878 unsigned long flags;
848 char *new_log_buf; 879 char *new_log_buf;
849 int free; 880 int free;
850 881
882 if (log_buf != __log_buf)
883 return;
884
885 if (!early && !new_log_buf_len)
886 log_buf_add_cpu();
887
851 if (!new_log_buf_len) 888 if (!new_log_buf_len)
852 return; 889 return;
853 890
854 if (early) { 891 if (early) {
855 new_log_buf = 892 new_log_buf =
856 memblock_virt_alloc(new_log_buf_len, PAGE_SIZE); 893 memblock_virt_alloc(new_log_buf_len, LOG_ALIGN);
857 } else { 894 } else {
858 new_log_buf = memblock_virt_alloc_nopanic(new_log_buf_len, 0); 895 new_log_buf = memblock_virt_alloc_nopanic(new_log_buf_len,
896 LOG_ALIGN);
859 } 897 }
860 898
861 if (unlikely(!new_log_buf)) { 899 if (unlikely(!new_log_buf)) {
@@ -872,7 +910,7 @@ void __init setup_log_buf(int early)
872 memcpy(log_buf, __log_buf, __LOG_BUF_LEN); 910 memcpy(log_buf, __log_buf, __LOG_BUF_LEN);
873 raw_spin_unlock_irqrestore(&logbuf_lock, flags); 911 raw_spin_unlock_irqrestore(&logbuf_lock, flags);
874 912
875 pr_info("log_buf_len: %d\n", log_buf_len); 913 pr_info("log_buf_len: %d bytes\n", log_buf_len);
876 pr_info("early log buf free: %d(%d%%)\n", 914 pr_info("early log buf free: %d(%d%%)\n",
877 free, (free * 100) / __LOG_BUF_LEN); 915 free, (free * 100) / __LOG_BUF_LEN);
878} 916}
@@ -881,7 +919,7 @@ static bool __read_mostly ignore_loglevel;
881 919
882static int __init ignore_loglevel_setup(char *str) 920static int __init ignore_loglevel_setup(char *str)
883{ 921{
884 ignore_loglevel = 1; 922 ignore_loglevel = true;
885 pr_info("debug: ignoring loglevel setting.\n"); 923 pr_info("debug: ignoring loglevel setting.\n");
886 924
887 return 0; 925 return 0;
@@ -947,11 +985,7 @@ static inline void boot_delay_msec(int level)
947} 985}
948#endif 986#endif
949 987
950#if defined(CONFIG_PRINTK_TIME) 988static bool printk_time = IS_ENABLED(CONFIG_PRINTK_TIME);
951static bool printk_time = 1;
952#else
953static bool printk_time;
954#endif
955module_param_named(time, printk_time, bool, S_IRUGO | S_IWUSR); 989module_param_named(time, printk_time, bool, S_IRUGO | S_IWUSR);
956 990
957static size_t print_time(u64 ts, char *buf) 991static size_t print_time(u64 ts, char *buf)
@@ -1310,7 +1344,7 @@ int do_syslog(int type, char __user *buf, int len, bool from_file)
1310 * for pending data, not the size; return the count of 1344 * for pending data, not the size; return the count of
1311 * records, not the length. 1345 * records, not the length.
1312 */ 1346 */
1313 error = log_next_idx - syslog_idx; 1347 error = log_next_seq - syslog_seq;
1314 } else { 1348 } else {
1315 u64 seq = syslog_seq; 1349 u64 seq = syslog_seq;
1316 u32 idx = syslog_idx; 1350 u32 idx = syslog_idx;
@@ -1416,10 +1450,9 @@ static int have_callable_console(void)
1416/* 1450/*
1417 * Can we actually use the console at this time on this cpu? 1451 * Can we actually use the console at this time on this cpu?
1418 * 1452 *
1419 * Console drivers may assume that per-cpu resources have 1453 * Console drivers may assume that per-cpu resources have been allocated. So
1420 * been allocated. So unless they're explicitly marked as 1454 * unless they're explicitly marked as being able to cope (CON_ANYTIME) don't
1421 * being able to cope (CON_ANYTIME) don't call them until 1455 * call them until this CPU is officially up.
1422 * this CPU is officially up.
1423 */ 1456 */
1424static inline int can_use_console(unsigned int cpu) 1457static inline int can_use_console(unsigned int cpu)
1425{ 1458{
@@ -1432,8 +1465,10 @@ static inline int can_use_console(unsigned int cpu)
1432 * console_lock held, and 'console_locked' set) if it 1465 * console_lock held, and 'console_locked' set) if it
1433 * is successful, false otherwise. 1466 * is successful, false otherwise.
1434 */ 1467 */
1435static int console_trylock_for_printk(unsigned int cpu) 1468static int console_trylock_for_printk(void)
1436{ 1469{
1470 unsigned int cpu = smp_processor_id();
1471
1437 if (!console_trylock()) 1472 if (!console_trylock())
1438 return 0; 1473 return 0;
1439 /* 1474 /*
@@ -1476,7 +1511,7 @@ static struct cont {
1476 struct task_struct *owner; /* task of first print*/ 1511 struct task_struct *owner; /* task of first print*/
1477 u64 ts_nsec; /* time of first print */ 1512 u64 ts_nsec; /* time of first print */
1478 u8 level; /* log level of first message */ 1513 u8 level; /* log level of first message */
1479 u8 facility; /* log level of first message */ 1514 u8 facility; /* log facility of first message */
1480 enum log_flags flags; /* prefix, newline flags */ 1515 enum log_flags flags; /* prefix, newline flags */
1481 bool flushed:1; /* buffer sealed and committed */ 1516 bool flushed:1; /* buffer sealed and committed */
1482} cont; 1517} cont;
@@ -1608,7 +1643,8 @@ asmlinkage int vprintk_emit(int facility, int level,
1608 */ 1643 */
1609 if (!oops_in_progress && !lockdep_recursing(current)) { 1644 if (!oops_in_progress && !lockdep_recursing(current)) {
1610 recursion_bug = 1; 1645 recursion_bug = 1;
1611 goto out_restore_irqs; 1646 local_irq_restore(flags);
1647 return 0;
1612 } 1648 }
1613 zap_locks(); 1649 zap_locks();
1614 } 1650 }
@@ -1716,21 +1752,30 @@ asmlinkage int vprintk_emit(int facility, int level,
1716 1752
1717 logbuf_cpu = UINT_MAX; 1753 logbuf_cpu = UINT_MAX;
1718 raw_spin_unlock(&logbuf_lock); 1754 raw_spin_unlock(&logbuf_lock);
1755 lockdep_on();
1756 local_irq_restore(flags);
1719 1757
1720 /* If called from the scheduler, we can not call up(). */ 1758 /* If called from the scheduler, we can not call up(). */
1721 if (!in_sched) { 1759 if (!in_sched) {
1760 lockdep_off();
1761 /*
1762 * Disable preemption to avoid being preempted while holding
1763 * console_sem which would prevent anyone from printing to
1764 * console
1765 */
1766 preempt_disable();
1767
1722 /* 1768 /*
1723 * Try to acquire and then immediately release the console 1769 * Try to acquire and then immediately release the console
1724 * semaphore. The release will print out buffers and wake up 1770 * semaphore. The release will print out buffers and wake up
1725 * /dev/kmsg and syslog() users. 1771 * /dev/kmsg and syslog() users.
1726 */ 1772 */
1727 if (console_trylock_for_printk(this_cpu)) 1773 if (console_trylock_for_printk())
1728 console_unlock(); 1774 console_unlock();
1775 preempt_enable();
1776 lockdep_on();
1729 } 1777 }
1730 1778
1731 lockdep_on();
1732out_restore_irqs:
1733 local_irq_restore(flags);
1734 return printed_len; 1779 return printed_len;
1735} 1780}
1736EXPORT_SYMBOL(vprintk_emit); 1781EXPORT_SYMBOL(vprintk_emit);
@@ -1802,7 +1847,7 @@ EXPORT_SYMBOL(printk);
1802 1847
1803#define LOG_LINE_MAX 0 1848#define LOG_LINE_MAX 0
1804#define PREFIX_MAX 0 1849#define PREFIX_MAX 0
1805#define LOG_LINE_MAX 0 1850
1806static u64 syslog_seq; 1851static u64 syslog_seq;
1807static u32 syslog_idx; 1852static u32 syslog_idx;
1808static u64 console_seq; 1853static u64 console_seq;
@@ -1881,11 +1926,12 @@ static int __add_preferred_console(char *name, int idx, char *options,
1881 return 0; 1926 return 0;
1882} 1927}
1883/* 1928/*
1884 * Set up a list of consoles. Called from init/main.c 1929 * Set up a console. Called via do_early_param() in init/main.c
1930 * for each "console=" parameter in the boot command line.
1885 */ 1931 */
1886static int __init console_setup(char *str) 1932static int __init console_setup(char *str)
1887{ 1933{
1888 char buf[sizeof(console_cmdline[0].name) + 4]; /* 4 for index */ 1934 char buf[sizeof(console_cmdline[0].name) + 4]; /* 4 for "ttyS" */
1889 char *s, *options, *brl_options = NULL; 1935 char *s, *options, *brl_options = NULL;
1890 int idx; 1936 int idx;
1891 1937
@@ -1902,7 +1948,8 @@ static int __init console_setup(char *str)
1902 strncpy(buf, str, sizeof(buf) - 1); 1948 strncpy(buf, str, sizeof(buf) - 1);
1903 } 1949 }
1904 buf[sizeof(buf) - 1] = 0; 1950 buf[sizeof(buf) - 1] = 0;
1905 if ((options = strchr(str, ',')) != NULL) 1951 options = strchr(str, ',');
1952 if (options)
1906 *(options++) = 0; 1953 *(options++) = 0;
1907#ifdef __sparc__ 1954#ifdef __sparc__
1908 if (!strcmp(str, "ttya")) 1955 if (!strcmp(str, "ttya"))
@@ -1911,7 +1958,7 @@ static int __init console_setup(char *str)
1911 strcpy(buf, "ttyS1"); 1958 strcpy(buf, "ttyS1");
1912#endif 1959#endif
1913 for (s = buf; *s; s++) 1960 for (s = buf; *s; s++)
1914 if ((*s >= '0' && *s <= '9') || *s == ',') 1961 if (isdigit(*s) || *s == ',')
1915 break; 1962 break;
1916 idx = simple_strtoul(s, NULL, 10); 1963 idx = simple_strtoul(s, NULL, 10);
1917 *s = 0; 1964 *s = 0;
@@ -1950,7 +1997,6 @@ int update_console_cmdline(char *name, int idx, char *name_new, int idx_new, cha
1950 i++, c++) 1997 i++, c++)
1951 if (strcmp(c->name, name) == 0 && c->index == idx) { 1998 if (strcmp(c->name, name) == 0 && c->index == idx) {
1952 strlcpy(c->name, name_new, sizeof(c->name)); 1999 strlcpy(c->name, name_new, sizeof(c->name));
1953 c->name[sizeof(c->name) - 1] = 0;
1954 c->options = options; 2000 c->options = options;
1955 c->index = idx_new; 2001 c->index = idx_new;
1956 return i; 2002 return i;
@@ -1959,12 +2005,12 @@ int update_console_cmdline(char *name, int idx, char *name_new, int idx_new, cha
1959 return -1; 2005 return -1;
1960} 2006}
1961 2007
1962bool console_suspend_enabled = 1; 2008bool console_suspend_enabled = true;
1963EXPORT_SYMBOL(console_suspend_enabled); 2009EXPORT_SYMBOL(console_suspend_enabled);
1964 2010
1965static int __init console_suspend_disable(char *str) 2011static int __init console_suspend_disable(char *str)
1966{ 2012{
1967 console_suspend_enabled = 0; 2013 console_suspend_enabled = false;
1968 return 1; 2014 return 1;
1969} 2015}
1970__setup("no_console_suspend", console_suspend_disable); 2016__setup("no_console_suspend", console_suspend_disable);
@@ -2045,8 +2091,8 @@ EXPORT_SYMBOL(console_lock);
2045/** 2091/**
2046 * console_trylock - try to lock the console system for exclusive use. 2092 * console_trylock - try to lock the console system for exclusive use.
2047 * 2093 *
2048 * Tried to acquire a lock which guarantees that the caller has 2094 * Try to acquire a lock which guarantees that the caller has exclusive
2049 * exclusive access to the console system and the console_drivers list. 2095 * access to the console system and the console_drivers list.
2050 * 2096 *
2051 * returns 1 on success, and 0 on failure to acquire the lock. 2097 * returns 1 on success, and 0 on failure to acquire the lock.
2052 */ 2098 */
@@ -2618,14 +2664,13 @@ EXPORT_SYMBOL(__printk_ratelimit);
2618bool printk_timed_ratelimit(unsigned long *caller_jiffies, 2664bool printk_timed_ratelimit(unsigned long *caller_jiffies,
2619 unsigned int interval_msecs) 2665 unsigned int interval_msecs)
2620{ 2666{
2621 if (*caller_jiffies == 0 2667 unsigned long elapsed = jiffies - *caller_jiffies;
2622 || !time_in_range(jiffies, *caller_jiffies, 2668
2623 *caller_jiffies 2669 if (*caller_jiffies && elapsed <= msecs_to_jiffies(interval_msecs))
2624 + msecs_to_jiffies(interval_msecs))) { 2670 return false;
2625 *caller_jiffies = jiffies; 2671
2626 return true; 2672 *caller_jiffies = jiffies;
2627 } 2673 return true;
2628 return false;
2629} 2674}
2630EXPORT_SYMBOL(printk_timed_ratelimit); 2675EXPORT_SYMBOL(printk_timed_ratelimit);
2631 2676
diff --git a/kernel/smp.c b/kernel/smp.c
index 487653b5844f..aff8aa14f547 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -670,7 +670,7 @@ void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info),
670 if (cond_func(cpu, info)) { 670 if (cond_func(cpu, info)) {
671 ret = smp_call_function_single(cpu, func, 671 ret = smp_call_function_single(cpu, func,
672 info, wait); 672 info, wait);
673 WARN_ON_ONCE(!ret); 673 WARN_ON_ONCE(ret);
674 } 674 }
675 preempt_enable(); 675 preempt_enable();
676 } 676 }
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 75b22e22a72c..75875a741b5e 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1240,8 +1240,7 @@ static struct ctl_table vm_table[] = {
1240 .maxlen = sizeof(unsigned long), 1240 .maxlen = sizeof(unsigned long),
1241 .mode = 0644, 1241 .mode = 0644,
1242 .proc_handler = hugetlb_sysctl_handler, 1242 .proc_handler = hugetlb_sysctl_handler,
1243 .extra1 = (void *)&hugetlb_zero, 1243 .extra1 = &zero,
1244 .extra2 = (void *)&hugetlb_infinity,
1245 }, 1244 },
1246#ifdef CONFIG_NUMA 1245#ifdef CONFIG_NUMA
1247 { 1246 {
@@ -1250,8 +1249,7 @@ static struct ctl_table vm_table[] = {
1250 .maxlen = sizeof(unsigned long), 1249 .maxlen = sizeof(unsigned long),
1251 .mode = 0644, 1250 .mode = 0644,
1252 .proc_handler = &hugetlb_mempolicy_sysctl_handler, 1251 .proc_handler = &hugetlb_mempolicy_sysctl_handler,
1253 .extra1 = (void *)&hugetlb_zero, 1252 .extra1 = &zero,
1254 .extra2 = (void *)&hugetlb_infinity,
1255 }, 1253 },
1256#endif 1254#endif
1257 { 1255 {
@@ -1274,8 +1272,7 @@ static struct ctl_table vm_table[] = {
1274 .maxlen = sizeof(unsigned long), 1272 .maxlen = sizeof(unsigned long),
1275 .mode = 0644, 1273 .mode = 0644,
1276 .proc_handler = hugetlb_overcommit_handler, 1274 .proc_handler = hugetlb_overcommit_handler,
1277 .extra1 = (void *)&hugetlb_zero, 1275 .extra1 = &zero,
1278 .extra2 = (void *)&hugetlb_infinity,
1279 }, 1276 },
1280#endif 1277#endif
1281 { 1278 {
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index c3319bd1b040..51b29e9d2ba6 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -260,9 +260,11 @@ static void watchdog_overflow_callback(struct perf_event *event,
260 return; 260 return;
261 261
262 if (hardlockup_panic) 262 if (hardlockup_panic)
263 panic("Watchdog detected hard LOCKUP on cpu %d", this_cpu); 263 panic("Watchdog detected hard LOCKUP on cpu %d",
264 this_cpu);
264 else 265 else
265 WARN(1, "Watchdog detected hard LOCKUP on cpu %d", this_cpu); 266 WARN(1, "Watchdog detected hard LOCKUP on cpu %d",
267 this_cpu);
266 268
267 __this_cpu_write(hard_watchdog_warn, true); 269 __this_cpu_write(hard_watchdog_warn, true);
268 return; 270 return;
@@ -345,7 +347,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
345 } 347 }
346 } 348 }
347 349
348 printk(KERN_EMERG "BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n", 350 pr_emerg("BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
349 smp_processor_id(), duration, 351 smp_processor_id(), duration,
350 current->comm, task_pid_nr(current)); 352 current->comm, task_pid_nr(current));
351 print_modules(); 353 print_modules();
@@ -484,7 +486,7 @@ static int watchdog_nmi_enable(unsigned int cpu)
484 if (PTR_ERR(event) == -EOPNOTSUPP) 486 if (PTR_ERR(event) == -EOPNOTSUPP)
485 pr_info("disabled (cpu%i): not supported (no LAPIC?)\n", cpu); 487 pr_info("disabled (cpu%i): not supported (no LAPIC?)\n", cpu);
486 else if (PTR_ERR(event) == -ENOENT) 488 else if (PTR_ERR(event) == -ENOENT)
487 pr_warning("disabled (cpu%i): hardware events not enabled\n", 489 pr_warn("disabled (cpu%i): hardware events not enabled\n",
488 cpu); 490 cpu);
489 else 491 else
490 pr_err("disabled (cpu%i): unable to create perf event: %ld\n", 492 pr_err("disabled (cpu%i): unable to create perf event: %ld\n",
diff --git a/lib/Kconfig b/lib/Kconfig
index a8a775730c09..df872659ddd3 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -396,6 +396,39 @@ config CPU_RMAP
396config DQL 396config DQL
397 bool 397 bool
398 398
399config GLOB
400 bool
401# This actually supports modular compilation, but the module overhead
402# is ridiculous for the amount of code involved. Until an out-of-tree
403# driver asks for it, we'll just link it directly it into the kernel
404# when required. Since we're ignoring out-of-tree users, there's also
405# no need bother prompting for a manual decision:
406# prompt "glob_match() function"
407 help
408 This option provides a glob_match function for performing
409 simple text pattern matching. It originated in the ATA code
410 to blacklist particular drive models, but other device drivers
411 may need similar functionality.
412
413 All drivers in the Linux kernel tree that require this function
414 should automatically select this option. Say N unless you
415 are compiling an out-of tree driver which tells you that it
416 depends on this.
417
418config GLOB_SELFTEST
419 bool "glob self-test on init"
420 default n
421 depends on GLOB
422 help
423 This option enables a simple self-test of the glob_match
424 function on startup. It is primarily useful for people
425 working on the code to ensure they haven't introduced any
426 regressions.
427
428 It only adds a little bit of code and slows kernel boot (or
429 module load) by a small amount, so you're welcome to play with
430 it, but you probably don't need it.
431
399# 432#
400# Netlink attribute parsing support is select'ed if needed 433# Netlink attribute parsing support is select'ed if needed
401# 434#
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index cfe7df8f62cc..cb45f59685e6 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -15,7 +15,7 @@ config PRINTK_TIME
15 The behavior is also controlled by the kernel command line 15 The behavior is also controlled by the kernel command line
16 parameter printk.time=1. See Documentation/kernel-parameters.txt 16 parameter printk.time=1. See Documentation/kernel-parameters.txt
17 17
18config DEFAULT_MESSAGE_LOGLEVEL 18config MESSAGE_LOGLEVEL_DEFAULT
19 int "Default message log level (1-7)" 19 int "Default message log level (1-7)"
20 range 1 7 20 range 1 7
21 default "4" 21 default "4"
diff --git a/lib/Makefile b/lib/Makefile
index 8427df95dade..d6b4bc496408 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -137,6 +137,8 @@ obj-$(CONFIG_CORDIC) += cordic.o
137 137
138obj-$(CONFIG_DQL) += dynamic_queue_limits.o 138obj-$(CONFIG_DQL) += dynamic_queue_limits.o
139 139
140obj-$(CONFIG_GLOB) += glob.o
141
140obj-$(CONFIG_MPILIB) += mpi/ 142obj-$(CONFIG_MPILIB) += mpi/
141obj-$(CONFIG_SIGNATURE) += digsig.o 143obj-$(CONFIG_SIGNATURE) += digsig.o
142 144
diff --git a/lib/bitmap.c b/lib/bitmap.c
index 06f7e4fe8d2d..1e031f2c9aba 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -40,9 +40,9 @@
40 * for the best explanations of this ordering. 40 * for the best explanations of this ordering.
41 */ 41 */
42 42
43int __bitmap_empty(const unsigned long *bitmap, int bits) 43int __bitmap_empty(const unsigned long *bitmap, unsigned int bits)
44{ 44{
45 int k, lim = bits/BITS_PER_LONG; 45 unsigned int k, lim = bits/BITS_PER_LONG;
46 for (k = 0; k < lim; ++k) 46 for (k = 0; k < lim; ++k)
47 if (bitmap[k]) 47 if (bitmap[k])
48 return 0; 48 return 0;
@@ -55,9 +55,9 @@ int __bitmap_empty(const unsigned long *bitmap, int bits)
55} 55}
56EXPORT_SYMBOL(__bitmap_empty); 56EXPORT_SYMBOL(__bitmap_empty);
57 57
58int __bitmap_full(const unsigned long *bitmap, int bits) 58int __bitmap_full(const unsigned long *bitmap, unsigned int bits)
59{ 59{
60 int k, lim = bits/BITS_PER_LONG; 60 unsigned int k, lim = bits/BITS_PER_LONG;
61 for (k = 0; k < lim; ++k) 61 for (k = 0; k < lim; ++k)
62 if (~bitmap[k]) 62 if (~bitmap[k])
63 return 0; 63 return 0;
@@ -71,9 +71,9 @@ int __bitmap_full(const unsigned long *bitmap, int bits)
71EXPORT_SYMBOL(__bitmap_full); 71EXPORT_SYMBOL(__bitmap_full);
72 72
73int __bitmap_equal(const unsigned long *bitmap1, 73int __bitmap_equal(const unsigned long *bitmap1,
74 const unsigned long *bitmap2, int bits) 74 const unsigned long *bitmap2, unsigned int bits)
75{ 75{
76 int k, lim = bits/BITS_PER_LONG; 76 unsigned int k, lim = bits/BITS_PER_LONG;
77 for (k = 0; k < lim; ++k) 77 for (k = 0; k < lim; ++k)
78 if (bitmap1[k] != bitmap2[k]) 78 if (bitmap1[k] != bitmap2[k])
79 return 0; 79 return 0;
@@ -86,14 +86,14 @@ int __bitmap_equal(const unsigned long *bitmap1,
86} 86}
87EXPORT_SYMBOL(__bitmap_equal); 87EXPORT_SYMBOL(__bitmap_equal);
88 88
89void __bitmap_complement(unsigned long *dst, const unsigned long *src, int bits) 89void __bitmap_complement(unsigned long *dst, const unsigned long *src, unsigned int bits)
90{ 90{
91 int k, lim = bits/BITS_PER_LONG; 91 unsigned int k, lim = bits/BITS_PER_LONG;
92 for (k = 0; k < lim; ++k) 92 for (k = 0; k < lim; ++k)
93 dst[k] = ~src[k]; 93 dst[k] = ~src[k];
94 94
95 if (bits % BITS_PER_LONG) 95 if (bits % BITS_PER_LONG)
96 dst[k] = ~src[k] & BITMAP_LAST_WORD_MASK(bits); 96 dst[k] = ~src[k];
97} 97}
98EXPORT_SYMBOL(__bitmap_complement); 98EXPORT_SYMBOL(__bitmap_complement);
99 99
@@ -182,23 +182,26 @@ void __bitmap_shift_left(unsigned long *dst,
182EXPORT_SYMBOL(__bitmap_shift_left); 182EXPORT_SYMBOL(__bitmap_shift_left);
183 183
184int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1, 184int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
185 const unsigned long *bitmap2, int bits) 185 const unsigned long *bitmap2, unsigned int bits)
186{ 186{
187 int k; 187 unsigned int k;
188 int nr = BITS_TO_LONGS(bits); 188 unsigned int lim = bits/BITS_PER_LONG;
189 unsigned long result = 0; 189 unsigned long result = 0;
190 190
191 for (k = 0; k < nr; k++) 191 for (k = 0; k < lim; k++)
192 result |= (dst[k] = bitmap1[k] & bitmap2[k]); 192 result |= (dst[k] = bitmap1[k] & bitmap2[k]);
193 if (bits % BITS_PER_LONG)
194 result |= (dst[k] = bitmap1[k] & bitmap2[k] &
195 BITMAP_LAST_WORD_MASK(bits));
193 return result != 0; 196 return result != 0;
194} 197}
195EXPORT_SYMBOL(__bitmap_and); 198EXPORT_SYMBOL(__bitmap_and);
196 199
197void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1, 200void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
198 const unsigned long *bitmap2, int bits) 201 const unsigned long *bitmap2, unsigned int bits)
199{ 202{
200 int k; 203 unsigned int k;
201 int nr = BITS_TO_LONGS(bits); 204 unsigned int nr = BITS_TO_LONGS(bits);
202 205
203 for (k = 0; k < nr; k++) 206 for (k = 0; k < nr; k++)
204 dst[k] = bitmap1[k] | bitmap2[k]; 207 dst[k] = bitmap1[k] | bitmap2[k];
@@ -206,10 +209,10 @@ void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
206EXPORT_SYMBOL(__bitmap_or); 209EXPORT_SYMBOL(__bitmap_or);
207 210
208void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1, 211void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1,
209 const unsigned long *bitmap2, int bits) 212 const unsigned long *bitmap2, unsigned int bits)
210{ 213{
211 int k; 214 unsigned int k;
212 int nr = BITS_TO_LONGS(bits); 215 unsigned int nr = BITS_TO_LONGS(bits);
213 216
214 for (k = 0; k < nr; k++) 217 for (k = 0; k < nr; k++)
215 dst[k] = bitmap1[k] ^ bitmap2[k]; 218 dst[k] = bitmap1[k] ^ bitmap2[k];
@@ -217,22 +220,25 @@ void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1,
217EXPORT_SYMBOL(__bitmap_xor); 220EXPORT_SYMBOL(__bitmap_xor);
218 221
219int __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1, 222int __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1,
220 const unsigned long *bitmap2, int bits) 223 const unsigned long *bitmap2, unsigned int bits)
221{ 224{
222 int k; 225 unsigned int k;
223 int nr = BITS_TO_LONGS(bits); 226 unsigned int lim = bits/BITS_PER_LONG;
224 unsigned long result = 0; 227 unsigned long result = 0;
225 228
226 for (k = 0; k < nr; k++) 229 for (k = 0; k < lim; k++)
227 result |= (dst[k] = bitmap1[k] & ~bitmap2[k]); 230 result |= (dst[k] = bitmap1[k] & ~bitmap2[k]);
231 if (bits % BITS_PER_LONG)
232 result |= (dst[k] = bitmap1[k] & ~bitmap2[k] &
233 BITMAP_LAST_WORD_MASK(bits));
228 return result != 0; 234 return result != 0;
229} 235}
230EXPORT_SYMBOL(__bitmap_andnot); 236EXPORT_SYMBOL(__bitmap_andnot);
231 237
232int __bitmap_intersects(const unsigned long *bitmap1, 238int __bitmap_intersects(const unsigned long *bitmap1,
233 const unsigned long *bitmap2, int bits) 239 const unsigned long *bitmap2, unsigned int bits)
234{ 240{
235 int k, lim = bits/BITS_PER_LONG; 241 unsigned int k, lim = bits/BITS_PER_LONG;
236 for (k = 0; k < lim; ++k) 242 for (k = 0; k < lim; ++k)
237 if (bitmap1[k] & bitmap2[k]) 243 if (bitmap1[k] & bitmap2[k])
238 return 1; 244 return 1;
@@ -245,9 +251,9 @@ int __bitmap_intersects(const unsigned long *bitmap1,
245EXPORT_SYMBOL(__bitmap_intersects); 251EXPORT_SYMBOL(__bitmap_intersects);
246 252
247int __bitmap_subset(const unsigned long *bitmap1, 253int __bitmap_subset(const unsigned long *bitmap1,
248 const unsigned long *bitmap2, int bits) 254 const unsigned long *bitmap2, unsigned int bits)
249{ 255{
250 int k, lim = bits/BITS_PER_LONG; 256 unsigned int k, lim = bits/BITS_PER_LONG;
251 for (k = 0; k < lim; ++k) 257 for (k = 0; k < lim; ++k)
252 if (bitmap1[k] & ~bitmap2[k]) 258 if (bitmap1[k] & ~bitmap2[k])
253 return 0; 259 return 0;
@@ -259,9 +265,10 @@ int __bitmap_subset(const unsigned long *bitmap1,
259} 265}
260EXPORT_SYMBOL(__bitmap_subset); 266EXPORT_SYMBOL(__bitmap_subset);
261 267
262int __bitmap_weight(const unsigned long *bitmap, int bits) 268int __bitmap_weight(const unsigned long *bitmap, unsigned int bits)
263{ 269{
264 int k, w = 0, lim = bits/BITS_PER_LONG; 270 unsigned int k, lim = bits/BITS_PER_LONG;
271 int w = 0;
265 272
266 for (k = 0; k < lim; k++) 273 for (k = 0; k < lim; k++)
267 w += hweight_long(bitmap[k]); 274 w += hweight_long(bitmap[k]);
@@ -273,42 +280,42 @@ int __bitmap_weight(const unsigned long *bitmap, int bits)
273} 280}
274EXPORT_SYMBOL(__bitmap_weight); 281EXPORT_SYMBOL(__bitmap_weight);
275 282
276void bitmap_set(unsigned long *map, int start, int nr) 283void bitmap_set(unsigned long *map, unsigned int start, int len)
277{ 284{
278 unsigned long *p = map + BIT_WORD(start); 285 unsigned long *p = map + BIT_WORD(start);
279 const int size = start + nr; 286 const unsigned int size = start + len;
280 int bits_to_set = BITS_PER_LONG - (start % BITS_PER_LONG); 287 int bits_to_set = BITS_PER_LONG - (start % BITS_PER_LONG);
281 unsigned long mask_to_set = BITMAP_FIRST_WORD_MASK(start); 288 unsigned long mask_to_set = BITMAP_FIRST_WORD_MASK(start);
282 289
283 while (nr - bits_to_set >= 0) { 290 while (len - bits_to_set >= 0) {
284 *p |= mask_to_set; 291 *p |= mask_to_set;
285 nr -= bits_to_set; 292 len -= bits_to_set;
286 bits_to_set = BITS_PER_LONG; 293 bits_to_set = BITS_PER_LONG;
287 mask_to_set = ~0UL; 294 mask_to_set = ~0UL;
288 p++; 295 p++;
289 } 296 }
290 if (nr) { 297 if (len) {
291 mask_to_set &= BITMAP_LAST_WORD_MASK(size); 298 mask_to_set &= BITMAP_LAST_WORD_MASK(size);
292 *p |= mask_to_set; 299 *p |= mask_to_set;
293 } 300 }
294} 301}
295EXPORT_SYMBOL(bitmap_set); 302EXPORT_SYMBOL(bitmap_set);
296 303
297void bitmap_clear(unsigned long *map, int start, int nr) 304void bitmap_clear(unsigned long *map, unsigned int start, int len)
298{ 305{
299 unsigned long *p = map + BIT_WORD(start); 306 unsigned long *p = map + BIT_WORD(start);
300 const int size = start + nr; 307 const unsigned int size = start + len;
301 int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG); 308 int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG);
302 unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start); 309 unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start);
303 310
304 while (nr - bits_to_clear >= 0) { 311 while (len - bits_to_clear >= 0) {
305 *p &= ~mask_to_clear; 312 *p &= ~mask_to_clear;
306 nr -= bits_to_clear; 313 len -= bits_to_clear;
307 bits_to_clear = BITS_PER_LONG; 314 bits_to_clear = BITS_PER_LONG;
308 mask_to_clear = ~0UL; 315 mask_to_clear = ~0UL;
309 p++; 316 p++;
310 } 317 }
311 if (nr) { 318 if (len) {
312 mask_to_clear &= BITMAP_LAST_WORD_MASK(size); 319 mask_to_clear &= BITMAP_LAST_WORD_MASK(size);
313 *p &= ~mask_to_clear; 320 *p &= ~mask_to_clear;
314 } 321 }
@@ -664,13 +671,8 @@ static int __bitmap_parselist(const char *buf, unsigned int buflen,
664 671
665int bitmap_parselist(const char *bp, unsigned long *maskp, int nmaskbits) 672int bitmap_parselist(const char *bp, unsigned long *maskp, int nmaskbits)
666{ 673{
667 char *nl = strchr(bp, '\n'); 674 char *nl = strchrnul(bp, '\n');
668 int len; 675 int len = nl - bp;
669
670 if (nl)
671 len = nl - bp;
672 else
673 len = strlen(bp);
674 676
675 return __bitmap_parselist(bp, len, 0, maskp, nmaskbits); 677 return __bitmap_parselist(bp, len, 0, maskp, nmaskbits);
676} 678}
@@ -716,7 +718,7 @@ EXPORT_SYMBOL(bitmap_parselist_user);
716 * 718 *
717 * If for example, just bits 4 through 7 are set in @buf, then @pos 719 * If for example, just bits 4 through 7 are set in @buf, then @pos
718 * values 4 through 7 will get mapped to 0 through 3, respectively, 720 * values 4 through 7 will get mapped to 0 through 3, respectively,
719 * and other @pos values will get mapped to 0. When @pos value 7 721 * and other @pos values will get mapped to -1. When @pos value 7
720 * gets mapped to (returns) @ord value 3 in this example, that means 722 * gets mapped to (returns) @ord value 3 in this example, that means
721 * that bit 7 is the 3rd (starting with 0th) set bit in @buf. 723 * that bit 7 is the 3rd (starting with 0th) set bit in @buf.
722 * 724 *
@@ -1046,7 +1048,7 @@ enum {
1046 REG_OP_RELEASE, /* clear all bits in region */ 1048 REG_OP_RELEASE, /* clear all bits in region */
1047}; 1049};
1048 1050
1049static int __reg_op(unsigned long *bitmap, int pos, int order, int reg_op) 1051static int __reg_op(unsigned long *bitmap, unsigned int pos, int order, int reg_op)
1050{ 1052{
1051 int nbits_reg; /* number of bits in region */ 1053 int nbits_reg; /* number of bits in region */
1052 int index; /* index first long of region in bitmap */ 1054 int index; /* index first long of region in bitmap */
@@ -1112,11 +1114,11 @@ done:
1112 * Return the bit offset in bitmap of the allocated region, 1114 * Return the bit offset in bitmap of the allocated region,
1113 * or -errno on failure. 1115 * or -errno on failure.
1114 */ 1116 */
1115int bitmap_find_free_region(unsigned long *bitmap, int bits, int order) 1117int bitmap_find_free_region(unsigned long *bitmap, unsigned int bits, int order)
1116{ 1118{
1117 int pos, end; /* scans bitmap by regions of size order */ 1119 unsigned int pos, end; /* scans bitmap by regions of size order */
1118 1120
1119 for (pos = 0 ; (end = pos + (1 << order)) <= bits; pos = end) { 1121 for (pos = 0 ; (end = pos + (1U << order)) <= bits; pos = end) {
1120 if (!__reg_op(bitmap, pos, order, REG_OP_ISFREE)) 1122 if (!__reg_op(bitmap, pos, order, REG_OP_ISFREE))
1121 continue; 1123 continue;
1122 __reg_op(bitmap, pos, order, REG_OP_ALLOC); 1124 __reg_op(bitmap, pos, order, REG_OP_ALLOC);
@@ -1137,7 +1139,7 @@ EXPORT_SYMBOL(bitmap_find_free_region);
1137 * 1139 *
1138 * No return value. 1140 * No return value.
1139 */ 1141 */
1140void bitmap_release_region(unsigned long *bitmap, int pos, int order) 1142void bitmap_release_region(unsigned long *bitmap, unsigned int pos, int order)
1141{ 1143{
1142 __reg_op(bitmap, pos, order, REG_OP_RELEASE); 1144 __reg_op(bitmap, pos, order, REG_OP_RELEASE);
1143} 1145}
@@ -1154,12 +1156,11 @@ EXPORT_SYMBOL(bitmap_release_region);
1154 * Return 0 on success, or %-EBUSY if specified region wasn't 1156 * Return 0 on success, or %-EBUSY if specified region wasn't
1155 * free (not all bits were zero). 1157 * free (not all bits were zero).
1156 */ 1158 */
1157int bitmap_allocate_region(unsigned long *bitmap, int pos, int order) 1159int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order)
1158{ 1160{
1159 if (!__reg_op(bitmap, pos, order, REG_OP_ISFREE)) 1161 if (!__reg_op(bitmap, pos, order, REG_OP_ISFREE))
1160 return -EBUSY; 1162 return -EBUSY;
1161 __reg_op(bitmap, pos, order, REG_OP_ALLOC); 1163 return __reg_op(bitmap, pos, order, REG_OP_ALLOC);
1162 return 0;
1163} 1164}
1164EXPORT_SYMBOL(bitmap_allocate_region); 1165EXPORT_SYMBOL(bitmap_allocate_region);
1165 1166
diff --git a/lib/cmdline.c b/lib/cmdline.c
index d4932f745e92..76a712e6e20e 100644
--- a/lib/cmdline.c
+++ b/lib/cmdline.c
@@ -121,11 +121,7 @@ EXPORT_SYMBOL(get_options);
121 * @retptr: (output) Optional pointer to next char after parse completes 121 * @retptr: (output) Optional pointer to next char after parse completes
122 * 122 *
123 * Parses a string into a number. The number stored at @ptr is 123 * Parses a string into a number. The number stored at @ptr is
124 * potentially suffixed with %K (for kilobytes, or 1024 bytes), 124 * potentially suffixed with K, M, G, T, P, E.
125 * %M (for megabytes, or 1048576 bytes), or %G (for gigabytes, or
126 * 1073741824). If the number is suffixed with K, M, or G, then
127 * the return value is the number multiplied by one kilobyte, one
128 * megabyte, or one gigabyte, respectively.
129 */ 125 */
130 126
131unsigned long long memparse(const char *ptr, char **retptr) 127unsigned long long memparse(const char *ptr, char **retptr)
@@ -135,6 +131,15 @@ unsigned long long memparse(const char *ptr, char **retptr)
135 unsigned long long ret = simple_strtoull(ptr, &endptr, 0); 131 unsigned long long ret = simple_strtoull(ptr, &endptr, 0);
136 132
137 switch (*endptr) { 133 switch (*endptr) {
134 case 'E':
135 case 'e':
136 ret <<= 10;
137 case 'P':
138 case 'p':
139 ret <<= 10;
140 case 'T':
141 case 't':
142 ret <<= 10;
138 case 'G': 143 case 'G':
139 case 'g': 144 case 'g':
140 ret <<= 10; 145 ret <<= 10;
diff --git a/lib/glob.c b/lib/glob.c
new file mode 100644
index 000000000000..500fc80d23e1
--- /dev/null
+++ b/lib/glob.c
@@ -0,0 +1,287 @@
1#include <linux/module.h>
2#include <linux/glob.h>
3
4/*
5 * The only reason this code can be compiled as a module is because the
6 * ATA code that depends on it can be as well. In practice, they're
7 * both usually compiled in and the module overhead goes away.
8 */
9MODULE_DESCRIPTION("glob(7) matching");
10MODULE_LICENSE("Dual MIT/GPL");
11
12/**
13 * glob_match - Shell-style pattern matching, like !fnmatch(pat, str, 0)
14 * @pat: Shell-style pattern to match, e.g. "*.[ch]".
15 * @str: String to match. The pattern must match the entire string.
16 *
17 * Perform shell-style glob matching, returning true (1) if the match
18 * succeeds, or false (0) if it fails. Equivalent to !fnmatch(@pat, @str, 0).
19 *
20 * Pattern metacharacters are ?, *, [ and \.
21 * (And, inside character classes, !, - and ].)
22 *
23 * This is small and simple implementation intended for device blacklists
24 * where a string is matched against a number of patterns. Thus, it
25 * does not preprocess the patterns. It is non-recursive, and run-time
26 * is at most quadratic: strlen(@str)*strlen(@pat).
27 *
28 * An example of the worst case is glob_match("*aaaaa", "aaaaaaaaaa");
29 * it takes 6 passes over the pattern before matching the string.
30 *
31 * Like !fnmatch(@pat, @str, 0) and unlike the shell, this does NOT
32 * treat / or leading . specially; it isn't actually used for pathnames.
33 *
34 * Note that according to glob(7) (and unlike bash), character classes
35 * are complemented by a leading !; this does not support the regex-style
36 * [^a-z] syntax.
37 *
38 * An opening bracket without a matching close is matched literally.
39 */
40bool __pure glob_match(char const *pat, char const *str)
41{
42 /*
43 * Backtrack to previous * on mismatch and retry starting one
44 * character later in the string. Because * matches all characters
45 * (no exception for /), it can be easily proved that there's
46 * never a need to backtrack multiple levels.
47 */
48 char const *back_pat = NULL, *back_str = back_str;
49
50 /*
51 * Loop over each token (character or class) in pat, matching
52 * it against the remaining unmatched tail of str. Return false
53 * on mismatch, or true after matching the trailing nul bytes.
54 */
55 for (;;) {
56 unsigned char c = *str++;
57 unsigned char d = *pat++;
58
59 switch (d) {
60 case '?': /* Wildcard: anything but nul */
61 if (c == '\0')
62 return false;
63 break;
64 case '*': /* Any-length wildcard */
65 if (*pat == '\0') /* Optimize trailing * case */
66 return true;
67 back_pat = pat;
68 back_str = --str; /* Allow zero-length match */
69 break;
70 case '[': { /* Character class */
71 bool match = false, inverted = (*pat == '!');
72 char const *class = pat + inverted;
73 unsigned char a = *class++;
74
75 /*
76 * Iterate over each span in the character class.
77 * A span is either a single character a, or a
78 * range a-b. The first span may begin with ']'.
79 */
80 do {
81 unsigned char b = a;
82
83 if (a == '\0') /* Malformed */
84 goto literal;
85
86 if (class[0] == '-' && class[1] != ']') {
87 b = class[1];
88
89 if (b == '\0')
90 goto literal;
91
92 class += 2;
93 /* Any special action if a > b? */
94 }
95 match |= (a <= c && c <= b);
96 } while ((a = *class++) != ']');
97
98 if (match == inverted)
99 goto backtrack;
100 pat = class;
101 }
102 break;
103 case '\\':
104 d = *pat++;
105 /*FALLTHROUGH*/
106 default: /* Literal character */
107literal:
108 if (c == d) {
109 if (d == '\0')
110 return true;
111 break;
112 }
113backtrack:
114 if (c == '\0' || !back_pat)
115 return false; /* No point continuing */
116 /* Try again from last *, one character later in str. */
117 pat = back_pat;
118 str = ++back_str;
119 break;
120 }
121 }
122}
123EXPORT_SYMBOL(glob_match);
124
125
126#ifdef CONFIG_GLOB_SELFTEST
127
128#include <linux/printk.h>
129#include <linux/moduleparam.h>
130
131/* Boot with "glob.verbose=1" to show successful tests, too */
132static bool verbose = false;
133module_param(verbose, bool, 0);
134
135struct glob_test {
136 char const *pat, *str;
137 bool expected;
138};
139
140static bool __pure __init test(char const *pat, char const *str, bool expected)
141{
142 bool match = glob_match(pat, str);
143 bool success = match == expected;
144
145 /* Can't get string literals into a particular section, so... */
146 static char const msg_error[] __initconst =
147 KERN_ERR "glob: \"%s\" vs. \"%s\": %s *** ERROR ***\n";
148 static char const msg_ok[] __initconst =
149 KERN_DEBUG "glob: \"%s\" vs. \"%s\": %s OK\n";
150 static char const mismatch[] __initconst = "mismatch";
151 char const *message;
152
153 if (!success)
154 message = msg_error;
155 else if (verbose)
156 message = msg_ok;
157 else
158 return success;
159
160 printk(message, pat, str, mismatch + 3*match);
161 return success;
162}
163
164/*
165 * The tests are all jammed together in one array to make it simpler
166 * to place that array in the .init.rodata section. The obvious
167 * "array of structures containing char *" has no way to force the
168 * pointed-to strings to be in a particular section.
169 *
170 * Anyway, a test consists of:
171 * 1. Expected glob_match result: '1' or '0'.
172 * 2. Pattern to match: null-terminated string
173 * 3. String to match against: null-terminated string
174 *
175 * The list of tests is terminated with a final '\0' instead of
176 * a glob_match result character.
177 */
178static char const glob_tests[] __initconst =
179 /* Some basic tests */
180 "1" "a\0" "a\0"
181 "0" "a\0" "b\0"
182 "0" "a\0" "aa\0"
183 "0" "a\0" "\0"
184 "1" "\0" "\0"
185 "0" "\0" "a\0"
186 /* Simple character class tests */
187 "1" "[a]\0" "a\0"
188 "0" "[a]\0" "b\0"
189 "0" "[!a]\0" "a\0"
190 "1" "[!a]\0" "b\0"
191 "1" "[ab]\0" "a\0"
192 "1" "[ab]\0" "b\0"
193 "0" "[ab]\0" "c\0"
194 "1" "[!ab]\0" "c\0"
195 "1" "[a-c]\0" "b\0"
196 "0" "[a-c]\0" "d\0"
197 /* Corner cases in character class parsing */
198 "1" "[a-c-e-g]\0" "-\0"
199 "0" "[a-c-e-g]\0" "d\0"
200 "1" "[a-c-e-g]\0" "f\0"
201 "1" "[]a-ceg-ik[]\0" "a\0"
202 "1" "[]a-ceg-ik[]\0" "]\0"
203 "1" "[]a-ceg-ik[]\0" "[\0"
204 "1" "[]a-ceg-ik[]\0" "h\0"
205 "0" "[]a-ceg-ik[]\0" "f\0"
206 "0" "[!]a-ceg-ik[]\0" "h\0"
207 "0" "[!]a-ceg-ik[]\0" "]\0"
208 "1" "[!]a-ceg-ik[]\0" "f\0"
209 /* Simple wild cards */
210 "1" "?\0" "a\0"
211 "0" "?\0" "aa\0"
212 "0" "??\0" "a\0"
213 "1" "?x?\0" "axb\0"
214 "0" "?x?\0" "abx\0"
215 "0" "?x?\0" "xab\0"
216 /* Asterisk wild cards (backtracking) */
217 "0" "*??\0" "a\0"
218 "1" "*??\0" "ab\0"
219 "1" "*??\0" "abc\0"
220 "1" "*??\0" "abcd\0"
221 "0" "??*\0" "a\0"
222 "1" "??*\0" "ab\0"
223 "1" "??*\0" "abc\0"
224 "1" "??*\0" "abcd\0"
225 "0" "?*?\0" "a\0"
226 "1" "?*?\0" "ab\0"
227 "1" "?*?\0" "abc\0"
228 "1" "?*?\0" "abcd\0"
229 "1" "*b\0" "b\0"
230 "1" "*b\0" "ab\0"
231 "0" "*b\0" "ba\0"
232 "1" "*b\0" "bb\0"
233 "1" "*b\0" "abb\0"
234 "1" "*b\0" "bab\0"
235 "1" "*bc\0" "abbc\0"
236 "1" "*bc\0" "bc\0"
237 "1" "*bc\0" "bbc\0"
238 "1" "*bc\0" "bcbc\0"
239 /* Multiple asterisks (complex backtracking) */
240 "1" "*ac*\0" "abacadaeafag\0"
241 "1" "*ac*ae*ag*\0" "abacadaeafag\0"
242 "1" "*a*b*[bc]*[ef]*g*\0" "abacadaeafag\0"
243 "0" "*a*b*[ef]*[cd]*g*\0" "abacadaeafag\0"
244 "1" "*abcd*\0" "abcabcabcabcdefg\0"
245 "1" "*ab*cd*\0" "abcabcabcabcdefg\0"
246 "1" "*abcd*abcdef*\0" "abcabcdabcdeabcdefg\0"
247 "0" "*abcd*\0" "abcabcabcabcefg\0"
248 "0" "*ab*cd*\0" "abcabcabcabcefg\0";
249
250static int __init glob_init(void)
251{
252 unsigned successes = 0;
253 unsigned n = 0;
254 char const *p = glob_tests;
255 static char const message[] __initconst =
256 KERN_INFO "glob: %u self-tests passed, %u failed\n";
257
258 /*
259 * Tests are jammed together in a string. The first byte is '1'
260 * or '0' to indicate the expected outcome, or '\0' to indicate the
261 * end of the tests. Then come two null-terminated strings: the
262 * pattern and the string to match it against.
263 */
264 while (*p) {
265 bool expected = *p++ & 1;
266 char const *pat = p;
267
268 p += strlen(p) + 1;
269 successes += test(pat, p, expected);
270 p += strlen(p) + 1;
271 n++;
272 }
273
274 n -= successes;
275 printk(message, successes, n);
276
277 /* What's the errno for "kernel bug detected"? Guess... */
278 return n ? -ECANCELED : 0;
279}
280
281/* We need a dummy exit function to allow unload */
282static void __exit glob_fini(void) { }
283
284module_init(glob_init);
285module_exit(glob_fini);
286
287#endif /* CONFIG_GLOB_SELFTEST */
diff --git a/lib/klist.c b/lib/klist.c
index 358a368a2947..89b485a2a58d 100644
--- a/lib/klist.c
+++ b/lib/klist.c
@@ -140,11 +140,11 @@ void klist_add_tail(struct klist_node *n, struct klist *k)
140EXPORT_SYMBOL_GPL(klist_add_tail); 140EXPORT_SYMBOL_GPL(klist_add_tail);
141 141
142/** 142/**
143 * klist_add_after - Init a klist_node and add it after an existing node 143 * klist_add_behind - Init a klist_node and add it after an existing node
144 * @n: node we're adding. 144 * @n: node we're adding.
145 * @pos: node to put @n after 145 * @pos: node to put @n after
146 */ 146 */
147void klist_add_after(struct klist_node *n, struct klist_node *pos) 147void klist_add_behind(struct klist_node *n, struct klist_node *pos)
148{ 148{
149 struct klist *k = knode_klist(pos); 149 struct klist *k = knode_klist(pos);
150 150
@@ -153,7 +153,7 @@ void klist_add_after(struct klist_node *n, struct klist_node *pos)
153 list_add(&n->n_node, &pos->n_node); 153 list_add(&n->n_node, &pos->n_node);
154 spin_unlock(&k->k_lock); 154 spin_unlock(&k->k_lock);
155} 155}
156EXPORT_SYMBOL_GPL(klist_add_after); 156EXPORT_SYMBOL_GPL(klist_add_behind);
157 157
158/** 158/**
159 * klist_add_before - Init a klist_node and add it before an existing node 159 * klist_add_before - Init a klist_node and add it before an existing node
diff --git a/lib/list_sort.c b/lib/list_sort.c
index 1183fa70a44d..12bcba1c8612 100644
--- a/lib/list_sort.c
+++ b/lib/list_sort.c
@@ -1,3 +1,6 @@
1
2#define pr_fmt(fmt) "list_sort_test: " fmt
3
1#include <linux/kernel.h> 4#include <linux/kernel.h>
2#include <linux/module.h> 5#include <linux/module.h>
3#include <linux/list_sort.h> 6#include <linux/list_sort.h>
@@ -47,6 +50,7 @@ static void merge_and_restore_back_links(void *priv,
47 struct list_head *a, struct list_head *b) 50 struct list_head *a, struct list_head *b)
48{ 51{
49 struct list_head *tail = head; 52 struct list_head *tail = head;
53 u8 count = 0;
50 54
51 while (a && b) { 55 while (a && b) {
52 /* if equal, take 'a' -- important for sort stability */ 56 /* if equal, take 'a' -- important for sort stability */
@@ -70,7 +74,8 @@ static void merge_and_restore_back_links(void *priv,
70 * element comparison is needed, so the client's cmp() 74 * element comparison is needed, so the client's cmp()
71 * routine can invoke cond_resched() periodically. 75 * routine can invoke cond_resched() periodically.
72 */ 76 */
73 (*cmp)(priv, tail->next, tail->next); 77 if (unlikely(!(++count)))
78 (*cmp)(priv, tail->next, tail->next);
74 79
75 tail->next->prev = tail; 80 tail->next->prev = tail;
76 tail = tail->next; 81 tail = tail->next;
@@ -123,9 +128,7 @@ void list_sort(void *priv, struct list_head *head,
123 } 128 }
124 if (lev > max_lev) { 129 if (lev > max_lev) {
125 if (unlikely(lev >= ARRAY_SIZE(part)-1)) { 130 if (unlikely(lev >= ARRAY_SIZE(part)-1)) {
126 printk_once(KERN_DEBUG "list passed to" 131 printk_once(KERN_DEBUG "list too long for efficiency\n");
127 " list_sort() too long for"
128 " efficiency\n");
129 lev--; 132 lev--;
130 } 133 }
131 max_lev = lev; 134 max_lev = lev;
@@ -168,27 +171,25 @@ static struct debug_el **elts __initdata;
168static int __init check(struct debug_el *ela, struct debug_el *elb) 171static int __init check(struct debug_el *ela, struct debug_el *elb)
169{ 172{
170 if (ela->serial >= TEST_LIST_LEN) { 173 if (ela->serial >= TEST_LIST_LEN) {
171 printk(KERN_ERR "list_sort_test: error: incorrect serial %d\n", 174 pr_err("error: incorrect serial %d\n", ela->serial);
172 ela->serial);
173 return -EINVAL; 175 return -EINVAL;
174 } 176 }
175 if (elb->serial >= TEST_LIST_LEN) { 177 if (elb->serial >= TEST_LIST_LEN) {
176 printk(KERN_ERR "list_sort_test: error: incorrect serial %d\n", 178 pr_err("error: incorrect serial %d\n", elb->serial);
177 elb->serial);
178 return -EINVAL; 179 return -EINVAL;
179 } 180 }
180 if (elts[ela->serial] != ela || elts[elb->serial] != elb) { 181 if (elts[ela->serial] != ela || elts[elb->serial] != elb) {
181 printk(KERN_ERR "list_sort_test: error: phantom element\n"); 182 pr_err("error: phantom element\n");
182 return -EINVAL; 183 return -EINVAL;
183 } 184 }
184 if (ela->poison1 != TEST_POISON1 || ela->poison2 != TEST_POISON2) { 185 if (ela->poison1 != TEST_POISON1 || ela->poison2 != TEST_POISON2) {
185 printk(KERN_ERR "list_sort_test: error: bad poison: %#x/%#x\n", 186 pr_err("error: bad poison: %#x/%#x\n",
186 ela->poison1, ela->poison2); 187 ela->poison1, ela->poison2);
187 return -EINVAL; 188 return -EINVAL;
188 } 189 }
189 if (elb->poison1 != TEST_POISON1 || elb->poison2 != TEST_POISON2) { 190 if (elb->poison1 != TEST_POISON1 || elb->poison2 != TEST_POISON2) {
190 printk(KERN_ERR "list_sort_test: error: bad poison: %#x/%#x\n", 191 pr_err("error: bad poison: %#x/%#x\n",
191 elb->poison1, elb->poison2); 192 elb->poison1, elb->poison2);
192 return -EINVAL; 193 return -EINVAL;
193 } 194 }
194 return 0; 195 return 0;
@@ -207,25 +208,23 @@ static int __init cmp(void *priv, struct list_head *a, struct list_head *b)
207 208
208static int __init list_sort_test(void) 209static int __init list_sort_test(void)
209{ 210{
210 int i, count = 1, err = -EINVAL; 211 int i, count = 1, err = -ENOMEM;
211 struct debug_el *el; 212 struct debug_el *el;
212 struct list_head *cur, *tmp; 213 struct list_head *cur;
213 LIST_HEAD(head); 214 LIST_HEAD(head);
214 215
215 printk(KERN_DEBUG "list_sort_test: start testing list_sort()\n"); 216 pr_debug("start testing list_sort()\n");
216 217
217 elts = kmalloc(sizeof(void *) * TEST_LIST_LEN, GFP_KERNEL); 218 elts = kcalloc(TEST_LIST_LEN, sizeof(*elts), GFP_KERNEL);
218 if (!elts) { 219 if (!elts) {
219 printk(KERN_ERR "list_sort_test: error: cannot allocate " 220 pr_err("error: cannot allocate memory\n");
220 "memory\n"); 221 return err;
221 goto exit;
222 } 222 }
223 223
224 for (i = 0; i < TEST_LIST_LEN; i++) { 224 for (i = 0; i < TEST_LIST_LEN; i++) {
225 el = kmalloc(sizeof(*el), GFP_KERNEL); 225 el = kmalloc(sizeof(*el), GFP_KERNEL);
226 if (!el) { 226 if (!el) {
227 printk(KERN_ERR "list_sort_test: error: cannot " 227 pr_err("error: cannot allocate memory\n");
228 "allocate memory\n");
229 goto exit; 228 goto exit;
230 } 229 }
231 /* force some equivalencies */ 230 /* force some equivalencies */
@@ -239,52 +238,52 @@ static int __init list_sort_test(void)
239 238
240 list_sort(NULL, &head, cmp); 239 list_sort(NULL, &head, cmp);
241 240
241 err = -EINVAL;
242 for (cur = head.next; cur->next != &head; cur = cur->next) { 242 for (cur = head.next; cur->next != &head; cur = cur->next) {
243 struct debug_el *el1; 243 struct debug_el *el1;
244 int cmp_result; 244 int cmp_result;
245 245
246 if (cur->next->prev != cur) { 246 if (cur->next->prev != cur) {
247 printk(KERN_ERR "list_sort_test: error: list is " 247 pr_err("error: list is corrupted\n");
248 "corrupted\n");
249 goto exit; 248 goto exit;
250 } 249 }
251 250
252 cmp_result = cmp(NULL, cur, cur->next); 251 cmp_result = cmp(NULL, cur, cur->next);
253 if (cmp_result > 0) { 252 if (cmp_result > 0) {
254 printk(KERN_ERR "list_sort_test: error: list is not " 253 pr_err("error: list is not sorted\n");
255 "sorted\n");
256 goto exit; 254 goto exit;
257 } 255 }
258 256
259 el = container_of(cur, struct debug_el, list); 257 el = container_of(cur, struct debug_el, list);
260 el1 = container_of(cur->next, struct debug_el, list); 258 el1 = container_of(cur->next, struct debug_el, list);
261 if (cmp_result == 0 && el->serial >= el1->serial) { 259 if (cmp_result == 0 && el->serial >= el1->serial) {
262 printk(KERN_ERR "list_sort_test: error: order of " 260 pr_err("error: order of equivalent elements not "
263 "equivalent elements not preserved\n"); 261 "preserved\n");
264 goto exit; 262 goto exit;
265 } 263 }
266 264
267 if (check(el, el1)) { 265 if (check(el, el1)) {
268 printk(KERN_ERR "list_sort_test: error: element check " 266 pr_err("error: element check failed\n");
269 "failed\n");
270 goto exit; 267 goto exit;
271 } 268 }
272 count++; 269 count++;
273 } 270 }
271 if (head.prev != cur) {
272 pr_err("error: list is corrupted\n");
273 goto exit;
274 }
275
274 276
275 if (count != TEST_LIST_LEN) { 277 if (count != TEST_LIST_LEN) {
276 printk(KERN_ERR "list_sort_test: error: bad list length %d", 278 pr_err("error: bad list length %d", count);
277 count);
278 goto exit; 279 goto exit;
279 } 280 }
280 281
281 err = 0; 282 err = 0;
282exit: 283exit:
284 for (i = 0; i < TEST_LIST_LEN; i++)
285 kfree(elts[i]);
283 kfree(elts); 286 kfree(elts);
284 list_for_each_safe(cur, tmp, &head) {
285 list_del(cur);
286 kfree(container_of(cur, struct debug_el, list));
287 }
288 return err; 287 return err;
289} 288}
290module_init(list_sort_test); 289module_init(list_sort_test);
diff --git a/lib/string_helpers.c b/lib/string_helpers.c
index ed5c1454dd62..29033f319aea 100644
--- a/lib/string_helpers.c
+++ b/lib/string_helpers.c
@@ -25,12 +25,15 @@
25int string_get_size(u64 size, const enum string_size_units units, 25int string_get_size(u64 size, const enum string_size_units units,
26 char *buf, int len) 26 char *buf, int len)
27{ 27{
28 static const char *units_10[] = { "B", "kB", "MB", "GB", "TB", "PB", 28 static const char *const units_10[] = {
29 "EB", "ZB", "YB", NULL}; 29 "B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB", NULL
30 static const char *units_2[] = {"B", "KiB", "MiB", "GiB", "TiB", "PiB", 30 };
31 "EiB", "ZiB", "YiB", NULL }; 31 static const char *const units_2[] = {
32 static const char **units_str[] = { 32 "B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB",
33 [STRING_UNITS_10] = units_10, 33 NULL
34 };
35 static const char *const *const units_str[] = {
36 [STRING_UNITS_10] = units_10,
34 [STRING_UNITS_2] = units_2, 37 [STRING_UNITS_2] = units_2,
35 }; 38 };
36 static const unsigned int divisor[] = { 39 static const unsigned int divisor[] = {
diff --git a/lib/test-kstrtox.c b/lib/test-kstrtox.c
index bea3f3fa3f02..4137bca5f8e8 100644
--- a/lib/test-kstrtox.c
+++ b/lib/test-kstrtox.c
@@ -3,7 +3,7 @@
3#include <linux/module.h> 3#include <linux/module.h>
4 4
5#define for_each_test(i, test) \ 5#define for_each_test(i, test) \
6 for (i = 0; i < sizeof(test) / sizeof(test[0]); i++) 6 for (i = 0; i < ARRAY_SIZE(test); i++)
7 7
8struct test_fail { 8struct test_fail {
9 const char *str; 9 const char *str;
diff --git a/lib/zlib_deflate/deflate.c b/lib/zlib_deflate/deflate.c
index d63381e8e333..d20ef458f137 100644
--- a/lib/zlib_deflate/deflate.c
+++ b/lib/zlib_deflate/deflate.c
@@ -250,52 +250,6 @@ int zlib_deflateInit2(
250} 250}
251 251
252/* ========================================================================= */ 252/* ========================================================================= */
253#if 0
254int zlib_deflateSetDictionary(
255 z_streamp strm,
256 const Byte *dictionary,
257 uInt dictLength
258)
259{
260 deflate_state *s;
261 uInt length = dictLength;
262 uInt n;
263 IPos hash_head = 0;
264
265 if (strm == NULL || strm->state == NULL || dictionary == NULL)
266 return Z_STREAM_ERROR;
267
268 s = (deflate_state *) strm->state;
269 if (s->status != INIT_STATE) return Z_STREAM_ERROR;
270
271 strm->adler = zlib_adler32(strm->adler, dictionary, dictLength);
272
273 if (length < MIN_MATCH) return Z_OK;
274 if (length > MAX_DIST(s)) {
275 length = MAX_DIST(s);
276#ifndef USE_DICT_HEAD
277 dictionary += dictLength - length; /* use the tail of the dictionary */
278#endif
279 }
280 memcpy((char *)s->window, dictionary, length);
281 s->strstart = length;
282 s->block_start = (long)length;
283
284 /* Insert all strings in the hash table (except for the last two bytes).
285 * s->lookahead stays null, so s->ins_h will be recomputed at the next
286 * call of fill_window.
287 */
288 s->ins_h = s->window[0];
289 UPDATE_HASH(s, s->ins_h, s->window[1]);
290 for (n = 0; n <= length - MIN_MATCH; n++) {
291 INSERT_STRING(s, n, hash_head);
292 }
293 if (hash_head) hash_head = 0; /* to make compiler happy */
294 return Z_OK;
295}
296#endif /* 0 */
297
298/* ========================================================================= */
299int zlib_deflateReset( 253int zlib_deflateReset(
300 z_streamp strm 254 z_streamp strm
301) 255)
@@ -326,45 +280,6 @@ int zlib_deflateReset(
326 return Z_OK; 280 return Z_OK;
327} 281}
328 282
329/* ========================================================================= */
330#if 0
331int zlib_deflateParams(
332 z_streamp strm,
333 int level,
334 int strategy
335)
336{
337 deflate_state *s;
338 compress_func func;
339 int err = Z_OK;
340
341 if (strm == NULL || strm->state == NULL) return Z_STREAM_ERROR;
342 s = (deflate_state *) strm->state;
343
344 if (level == Z_DEFAULT_COMPRESSION) {
345 level = 6;
346 }
347 if (level < 0 || level > 9 || strategy < 0 || strategy > Z_HUFFMAN_ONLY) {
348 return Z_STREAM_ERROR;
349 }
350 func = configuration_table[s->level].func;
351
352 if (func != configuration_table[level].func && strm->total_in != 0) {
353 /* Flush the last buffer: */
354 err = zlib_deflate(strm, Z_PARTIAL_FLUSH);
355 }
356 if (s->level != level) {
357 s->level = level;
358 s->max_lazy_match = configuration_table[level].max_lazy;
359 s->good_match = configuration_table[level].good_length;
360 s->nice_match = configuration_table[level].nice_length;
361 s->max_chain_length = configuration_table[level].max_chain;
362 }
363 s->strategy = strategy;
364 return err;
365}
366#endif /* 0 */
367
368/* ========================================================================= 283/* =========================================================================
369 * Put a short in the pending buffer. The 16-bit value is put in MSB order. 284 * Put a short in the pending buffer. The 16-bit value is put in MSB order.
370 * IN assertion: the stream state is correct and there is enough room in 285 * IN assertion: the stream state is correct and there is enough room in
@@ -568,64 +483,6 @@ int zlib_deflateEnd(
568 return status == BUSY_STATE ? Z_DATA_ERROR : Z_OK; 483 return status == BUSY_STATE ? Z_DATA_ERROR : Z_OK;
569} 484}
570 485
571/* =========================================================================
572 * Copy the source state to the destination state.
573 */
574#if 0
575int zlib_deflateCopy (
576 z_streamp dest,
577 z_streamp source
578)
579{
580#ifdef MAXSEG_64K
581 return Z_STREAM_ERROR;
582#else
583 deflate_state *ds;
584 deflate_state *ss;
585 ush *overlay;
586 deflate_workspace *mem;
587
588
589 if (source == NULL || dest == NULL || source->state == NULL) {
590 return Z_STREAM_ERROR;
591 }
592
593 ss = (deflate_state *) source->state;
594
595 *dest = *source;
596
597 mem = (deflate_workspace *) dest->workspace;
598
599 ds = &(mem->deflate_memory);
600
601 dest->state = (struct internal_state *) ds;
602 *ds = *ss;
603 ds->strm = dest;
604
605 ds->window = (Byte *) mem->window_memory;
606 ds->prev = (Pos *) mem->prev_memory;
607 ds->head = (Pos *) mem->head_memory;
608 overlay = (ush *) mem->overlay_memory;
609 ds->pending_buf = (uch *) overlay;
610
611 memcpy(ds->window, ss->window, ds->w_size * 2 * sizeof(Byte));
612 memcpy(ds->prev, ss->prev, ds->w_size * sizeof(Pos));
613 memcpy(ds->head, ss->head, ds->hash_size * sizeof(Pos));
614 memcpy(ds->pending_buf, ss->pending_buf, (uInt)ds->pending_buf_size);
615
616 ds->pending_out = ds->pending_buf + (ss->pending_out - ss->pending_buf);
617 ds->d_buf = overlay + ds->lit_bufsize/sizeof(ush);
618 ds->l_buf = ds->pending_buf + (1+sizeof(ush))*ds->lit_bufsize;
619
620 ds->l_desc.dyn_tree = ds->dyn_ltree;
621 ds->d_desc.dyn_tree = ds->dyn_dtree;
622 ds->bl_desc.dyn_tree = ds->bl_tree;
623
624 return Z_OK;
625#endif
626}
627#endif /* 0 */
628
629/* =========================================================================== 486/* ===========================================================================
630 * Read a new buffer from the current input stream, update the adler32 487 * Read a new buffer from the current input stream, update the adler32
631 * and total number of bytes read. All deflate() input goes through 488 * and total number of bytes read. All deflate() input goes through
diff --git a/lib/zlib_inflate/inflate.c b/lib/zlib_inflate/inflate.c
index f5ce87b0800e..58a733b10387 100644
--- a/lib/zlib_inflate/inflate.c
+++ b/lib/zlib_inflate/inflate.c
@@ -45,21 +45,6 @@ int zlib_inflateReset(z_streamp strm)
45 return Z_OK; 45 return Z_OK;
46} 46}
47 47
48#if 0
49int zlib_inflatePrime(z_streamp strm, int bits, int value)
50{
51 struct inflate_state *state;
52
53 if (strm == NULL || strm->state == NULL) return Z_STREAM_ERROR;
54 state = (struct inflate_state *)strm->state;
55 if (bits > 16 || state->bits + bits > 32) return Z_STREAM_ERROR;
56 value &= (1L << bits) - 1;
57 state->hold += value << state->bits;
58 state->bits += bits;
59 return Z_OK;
60}
61#endif
62
63int zlib_inflateInit2(z_streamp strm, int windowBits) 48int zlib_inflateInit2(z_streamp strm, int windowBits)
64{ 49{
65 struct inflate_state *state; 50 struct inflate_state *state;
@@ -761,123 +746,6 @@ int zlib_inflateEnd(z_streamp strm)
761 return Z_OK; 746 return Z_OK;
762} 747}
763 748
764#if 0
765int zlib_inflateSetDictionary(z_streamp strm, const Byte *dictionary,
766 uInt dictLength)
767{
768 struct inflate_state *state;
769 unsigned long id;
770
771 /* check state */
772 if (strm == NULL || strm->state == NULL) return Z_STREAM_ERROR;
773 state = (struct inflate_state *)strm->state;
774 if (state->wrap != 0 && state->mode != DICT)
775 return Z_STREAM_ERROR;
776
777 /* check for correct dictionary id */
778 if (state->mode == DICT) {
779 id = zlib_adler32(0L, NULL, 0);
780 id = zlib_adler32(id, dictionary, dictLength);
781 if (id != state->check)
782 return Z_DATA_ERROR;
783 }
784
785 /* copy dictionary to window */
786 zlib_updatewindow(strm, strm->avail_out);
787
788 if (dictLength > state->wsize) {
789 memcpy(state->window, dictionary + dictLength - state->wsize,
790 state->wsize);
791 state->whave = state->wsize;
792 }
793 else {
794 memcpy(state->window + state->wsize - dictLength, dictionary,
795 dictLength);
796 state->whave = dictLength;
797 }
798 state->havedict = 1;
799 return Z_OK;
800}
801#endif
802
803#if 0
804/*
805 Search buf[0..len-1] for the pattern: 0, 0, 0xff, 0xff. Return when found
806 or when out of input. When called, *have is the number of pattern bytes
807 found in order so far, in 0..3. On return *have is updated to the new
808 state. If on return *have equals four, then the pattern was found and the
809 return value is how many bytes were read including the last byte of the
810 pattern. If *have is less than four, then the pattern has not been found
811 yet and the return value is len. In the latter case, zlib_syncsearch() can be
812 called again with more data and the *have state. *have is initialized to
813 zero for the first call.
814 */
815static unsigned zlib_syncsearch(unsigned *have, unsigned char *buf,
816 unsigned len)
817{
818 unsigned got;
819 unsigned next;
820
821 got = *have;
822 next = 0;
823 while (next < len && got < 4) {
824 if ((int)(buf[next]) == (got < 2 ? 0 : 0xff))
825 got++;
826 else if (buf[next])
827 got = 0;
828 else
829 got = 4 - got;
830 next++;
831 }
832 *have = got;
833 return next;
834}
835#endif
836
837#if 0
838int zlib_inflateSync(z_streamp strm)
839{
840 unsigned len; /* number of bytes to look at or looked at */
841 unsigned long in, out; /* temporary to save total_in and total_out */
842 unsigned char buf[4]; /* to restore bit buffer to byte string */
843 struct inflate_state *state;
844
845 /* check parameters */
846 if (strm == NULL || strm->state == NULL) return Z_STREAM_ERROR;
847 state = (struct inflate_state *)strm->state;
848 if (strm->avail_in == 0 && state->bits < 8) return Z_BUF_ERROR;
849
850 /* if first time, start search in bit buffer */
851 if (state->mode != SYNC) {
852 state->mode = SYNC;
853 state->hold <<= state->bits & 7;
854 state->bits -= state->bits & 7;
855 len = 0;
856 while (state->bits >= 8) {
857 buf[len++] = (unsigned char)(state->hold);
858 state->hold >>= 8;
859 state->bits -= 8;
860 }
861 state->have = 0;
862 zlib_syncsearch(&(state->have), buf, len);
863 }
864
865 /* search available input */
866 len = zlib_syncsearch(&(state->have), strm->next_in, strm->avail_in);
867 strm->avail_in -= len;
868 strm->next_in += len;
869 strm->total_in += len;
870
871 /* return no joy or set up to restart inflate() on a new block */
872 if (state->have != 4) return Z_DATA_ERROR;
873 in = strm->total_in; out = strm->total_out;
874 zlib_inflateReset(strm);
875 strm->total_in = in; strm->total_out = out;
876 state->mode = TYPE;
877 return Z_OK;
878}
879#endif
880
881/* 749/*
882 * This subroutine adds the data at next_in/avail_in to the output history 750 * This subroutine adds the data at next_in/avail_in to the output history
883 * without performing any output. The output buffer must be "caught up"; 751 * without performing any output. The output buffer must be "caught up";
diff --git a/mm/Kconfig b/mm/Kconfig
index 3e9977a9d657..886db2158538 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -508,21 +508,34 @@ config CMA_DEBUG
508 processing calls such as dma_alloc_from_contiguous(). 508 processing calls such as dma_alloc_from_contiguous().
509 This option does not affect warning and error messages. 509 This option does not affect warning and error messages.
510 510
511config ZBUD 511config CMA_AREAS
512 tristate 512 int "Maximum count of the CMA areas"
513 default n 513 depends on CMA
514 default 7
514 help 515 help
515 A special purpose allocator for storing compressed pages. 516 CMA allows to create CMA areas for particular purpose, mainly,
516 It is designed to store up to two compressed pages per physical 517 used as device private area. This parameter sets the maximum
517 page. While this design limits storage density, it has simple and 518 number of CMA area in the system.
518 deterministic reclaim properties that make it preferable to a higher 519
519 density approach when reclaim will be used. 520 If unsure, leave the default value "7".
521
522config MEM_SOFT_DIRTY
523 bool "Track memory changes"
524 depends on CHECKPOINT_RESTORE && HAVE_ARCH_SOFT_DIRTY && PROC_FS
525 select PROC_PAGE_MONITOR
526 help
527 This option enables memory changes tracking by introducing a
528 soft-dirty bit on pte-s. This bit it set when someone writes
529 into a page just as regular dirty bit, but unlike the latter
530 it can be cleared by hands.
531
532 See Documentation/vm/soft-dirty.txt for more details.
520 533
521config ZSWAP 534config ZSWAP
522 bool "Compressed cache for swap pages (EXPERIMENTAL)" 535 bool "Compressed cache for swap pages (EXPERIMENTAL)"
523 depends on FRONTSWAP && CRYPTO=y 536 depends on FRONTSWAP && CRYPTO=y
524 select CRYPTO_LZO 537 select CRYPTO_LZO
525 select ZBUD 538 select ZPOOL
526 default n 539 default n
527 help 540 help
528 A lightweight compressed cache for swap pages. It takes 541 A lightweight compressed cache for swap pages. It takes
@@ -538,17 +551,22 @@ config ZSWAP
538 they have not be fully explored on the large set of potential 551 they have not be fully explored on the large set of potential
539 configurations and workloads that exist. 552 configurations and workloads that exist.
540 553
541config MEM_SOFT_DIRTY 554config ZPOOL
542 bool "Track memory changes" 555 tristate "Common API for compressed memory storage"
543 depends on CHECKPOINT_RESTORE && HAVE_ARCH_SOFT_DIRTY && PROC_FS 556 default n
544 select PROC_PAGE_MONITOR
545 help 557 help
546 This option enables memory changes tracking by introducing a 558 Compressed memory storage API. This allows using either zbud or
547 soft-dirty bit on pte-s. This bit it set when someone writes 559 zsmalloc.
548 into a page just as regular dirty bit, but unlike the latter
549 it can be cleared by hands.
550 560
551 See Documentation/vm/soft-dirty.txt for more details. 561config ZBUD
562 tristate "Low density storage for compressed pages"
563 default n
564 help
565 A special purpose allocator for storing compressed pages.
566 It is designed to store up to two compressed pages per physical
567 page. While this design limits storage density, it has simple and
568 deterministic reclaim properties that make it preferable to a higher
569 density approach when reclaim will be used.
552 570
553config ZSMALLOC 571config ZSMALLOC
554 tristate "Memory allocator for compressed pages" 572 tristate "Memory allocator for compressed pages"
diff --git a/mm/Makefile b/mm/Makefile
index 4064f3ec145e..632ae77e6070 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -59,6 +59,8 @@ obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o
59obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak-test.o 59obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak-test.o
60obj-$(CONFIG_CLEANCACHE) += cleancache.o 60obj-$(CONFIG_CLEANCACHE) += cleancache.o
61obj-$(CONFIG_MEMORY_ISOLATION) += page_isolation.o 61obj-$(CONFIG_MEMORY_ISOLATION) += page_isolation.o
62obj-$(CONFIG_ZPOOL) += zpool.o
62obj-$(CONFIG_ZBUD) += zbud.o 63obj-$(CONFIG_ZBUD) += zbud.o
63obj-$(CONFIG_ZSMALLOC) += zsmalloc.o 64obj-$(CONFIG_ZSMALLOC) += zsmalloc.o
64obj-$(CONFIG_GENERIC_EARLY_IOREMAP) += early_ioremap.o 65obj-$(CONFIG_GENERIC_EARLY_IOREMAP) += early_ioremap.o
66obj-$(CONFIG_CMA) += cma.o
diff --git a/mm/cma.c b/mm/cma.c
new file mode 100644
index 000000000000..c17751c0dcaf
--- /dev/null
+++ b/mm/cma.c
@@ -0,0 +1,335 @@
1/*
2 * Contiguous Memory Allocator
3 *
4 * Copyright (c) 2010-2011 by Samsung Electronics.
5 * Copyright IBM Corporation, 2013
6 * Copyright LG Electronics Inc., 2014
7 * Written by:
8 * Marek Szyprowski <m.szyprowski@samsung.com>
9 * Michal Nazarewicz <mina86@mina86.com>
10 * Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
11 * Joonsoo Kim <iamjoonsoo.kim@lge.com>
12 *
13 * This program is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU General Public License as
15 * published by the Free Software Foundation; either version 2 of the
16 * License or (at your optional) any later version of the license.
17 */
18
19#define pr_fmt(fmt) "cma: " fmt
20
21#ifdef CONFIG_CMA_DEBUG
22#ifndef DEBUG
23# define DEBUG
24#endif
25#endif
26
27#include <linux/memblock.h>
28#include <linux/err.h>
29#include <linux/mm.h>
30#include <linux/mutex.h>
31#include <linux/sizes.h>
32#include <linux/slab.h>
33#include <linux/log2.h>
34#include <linux/cma.h>
35
36struct cma {
37 unsigned long base_pfn;
38 unsigned long count;
39 unsigned long *bitmap;
40 unsigned int order_per_bit; /* Order of pages represented by one bit */
41 struct mutex lock;
42};
43
44static struct cma cma_areas[MAX_CMA_AREAS];
45static unsigned cma_area_count;
46static DEFINE_MUTEX(cma_mutex);
47
48phys_addr_t cma_get_base(struct cma *cma)
49{
50 return PFN_PHYS(cma->base_pfn);
51}
52
53unsigned long cma_get_size(struct cma *cma)
54{
55 return cma->count << PAGE_SHIFT;
56}
57
58static unsigned long cma_bitmap_aligned_mask(struct cma *cma, int align_order)
59{
60 return (1UL << (align_order >> cma->order_per_bit)) - 1;
61}
62
63static unsigned long cma_bitmap_maxno(struct cma *cma)
64{
65 return cma->count >> cma->order_per_bit;
66}
67
68static unsigned long cma_bitmap_pages_to_bits(struct cma *cma,
69 unsigned long pages)
70{
71 return ALIGN(pages, 1UL << cma->order_per_bit) >> cma->order_per_bit;
72}
73
74static void cma_clear_bitmap(struct cma *cma, unsigned long pfn, int count)
75{
76 unsigned long bitmap_no, bitmap_count;
77
78 bitmap_no = (pfn - cma->base_pfn) >> cma->order_per_bit;
79 bitmap_count = cma_bitmap_pages_to_bits(cma, count);
80
81 mutex_lock(&cma->lock);
82 bitmap_clear(cma->bitmap, bitmap_no, bitmap_count);
83 mutex_unlock(&cma->lock);
84}
85
86static int __init cma_activate_area(struct cma *cma)
87{
88 int bitmap_size = BITS_TO_LONGS(cma_bitmap_maxno(cma)) * sizeof(long);
89 unsigned long base_pfn = cma->base_pfn, pfn = base_pfn;
90 unsigned i = cma->count >> pageblock_order;
91 struct zone *zone;
92
93 cma->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
94
95 if (!cma->bitmap)
96 return -ENOMEM;
97
98 WARN_ON_ONCE(!pfn_valid(pfn));
99 zone = page_zone(pfn_to_page(pfn));
100
101 do {
102 unsigned j;
103
104 base_pfn = pfn;
105 for (j = pageblock_nr_pages; j; --j, pfn++) {
106 WARN_ON_ONCE(!pfn_valid(pfn));
107 /*
108 * alloc_contig_range requires the pfn range
109 * specified to be in the same zone. Make this
110 * simple by forcing the entire CMA resv range
111 * to be in the same zone.
112 */
113 if (page_zone(pfn_to_page(pfn)) != zone)
114 goto err;
115 }
116 init_cma_reserved_pageblock(pfn_to_page(base_pfn));
117 } while (--i);
118
119 mutex_init(&cma->lock);
120 return 0;
121
122err:
123 kfree(cma->bitmap);
124 return -EINVAL;
125}
126
127static int __init cma_init_reserved_areas(void)
128{
129 int i;
130
131 for (i = 0; i < cma_area_count; i++) {
132 int ret = cma_activate_area(&cma_areas[i]);
133
134 if (ret)
135 return ret;
136 }
137
138 return 0;
139}
140core_initcall(cma_init_reserved_areas);
141
142/**
143 * cma_declare_contiguous() - reserve custom contiguous area
144 * @base: Base address of the reserved area optional, use 0 for any
145 * @size: Size of the reserved area (in bytes),
146 * @limit: End address of the reserved memory (optional, 0 for any).
147 * @alignment: Alignment for the CMA area, should be power of 2 or zero
148 * @order_per_bit: Order of pages represented by one bit on bitmap.
149 * @fixed: hint about where to place the reserved area
150 * @res_cma: Pointer to store the created cma region.
151 *
152 * This function reserves memory from early allocator. It should be
153 * called by arch specific code once the early allocator (memblock or bootmem)
154 * has been activated and all other subsystems have already allocated/reserved
155 * memory. This function allows to create custom reserved areas.
156 *
157 * If @fixed is true, reserve contiguous area at exactly @base. If false,
158 * reserve in range from @base to @limit.
159 */
160int __init cma_declare_contiguous(phys_addr_t base,
161 phys_addr_t size, phys_addr_t limit,
162 phys_addr_t alignment, unsigned int order_per_bit,
163 bool fixed, struct cma **res_cma)
164{
165 struct cma *cma;
166 int ret = 0;
167
168 pr_debug("%s(size %lx, base %08lx, limit %08lx alignment %08lx)\n",
169 __func__, (unsigned long)size, (unsigned long)base,
170 (unsigned long)limit, (unsigned long)alignment);
171
172 if (cma_area_count == ARRAY_SIZE(cma_areas)) {
173 pr_err("Not enough slots for CMA reserved regions!\n");
174 return -ENOSPC;
175 }
176
177 if (!size)
178 return -EINVAL;
179
180 if (alignment && !is_power_of_2(alignment))
181 return -EINVAL;
182
183 /*
184 * Sanitise input arguments.
185 * Pages both ends in CMA area could be merged into adjacent unmovable
186 * migratetype page by page allocator's buddy algorithm. In the case,
187 * you couldn't get a contiguous memory, which is not what we want.
188 */
189 alignment = max(alignment,
190 (phys_addr_t)PAGE_SIZE << max(MAX_ORDER - 1, pageblock_order));
191 base = ALIGN(base, alignment);
192 size = ALIGN(size, alignment);
193 limit &= ~(alignment - 1);
194
195 /* size should be aligned with order_per_bit */
196 if (!IS_ALIGNED(size >> PAGE_SHIFT, 1 << order_per_bit))
197 return -EINVAL;
198
199 /* Reserve memory */
200 if (base && fixed) {
201 if (memblock_is_region_reserved(base, size) ||
202 memblock_reserve(base, size) < 0) {
203 ret = -EBUSY;
204 goto err;
205 }
206 } else {
207 phys_addr_t addr = memblock_alloc_range(size, alignment, base,
208 limit);
209 if (!addr) {
210 ret = -ENOMEM;
211 goto err;
212 } else {
213 base = addr;
214 }
215 }
216
217 /*
218 * Each reserved area must be initialised later, when more kernel
219 * subsystems (like slab allocator) are available.
220 */
221 cma = &cma_areas[cma_area_count];
222 cma->base_pfn = PFN_DOWN(base);
223 cma->count = size >> PAGE_SHIFT;
224 cma->order_per_bit = order_per_bit;
225 *res_cma = cma;
226 cma_area_count++;
227
228 pr_info("Reserved %ld MiB at %08lx\n", (unsigned long)size / SZ_1M,
229 (unsigned long)base);
230 return 0;
231
232err:
233 pr_err("Failed to reserve %ld MiB\n", (unsigned long)size / SZ_1M);
234 return ret;
235}
236
237/**
238 * cma_alloc() - allocate pages from contiguous area
239 * @cma: Contiguous memory region for which the allocation is performed.
240 * @count: Requested number of pages.
241 * @align: Requested alignment of pages (in PAGE_SIZE order).
242 *
243 * This function allocates part of contiguous memory on specific
244 * contiguous memory area.
245 */
246struct page *cma_alloc(struct cma *cma, int count, unsigned int align)
247{
248 unsigned long mask, pfn, start = 0;
249 unsigned long bitmap_maxno, bitmap_no, bitmap_count;
250 struct page *page = NULL;
251 int ret;
252
253 if (!cma || !cma->count)
254 return NULL;
255
256 pr_debug("%s(cma %p, count %d, align %d)\n", __func__, (void *)cma,
257 count, align);
258
259 if (!count)
260 return NULL;
261
262 mask = cma_bitmap_aligned_mask(cma, align);
263 bitmap_maxno = cma_bitmap_maxno(cma);
264 bitmap_count = cma_bitmap_pages_to_bits(cma, count);
265
266 for (;;) {
267 mutex_lock(&cma->lock);
268 bitmap_no = bitmap_find_next_zero_area(cma->bitmap,
269 bitmap_maxno, start, bitmap_count, mask);
270 if (bitmap_no >= bitmap_maxno) {
271 mutex_unlock(&cma->lock);
272 break;
273 }
274 bitmap_set(cma->bitmap, bitmap_no, bitmap_count);
275 /*
276 * It's safe to drop the lock here. We've marked this region for
277 * our exclusive use. If the migration fails we will take the
278 * lock again and unmark it.
279 */
280 mutex_unlock(&cma->lock);
281
282 pfn = cma->base_pfn + (bitmap_no << cma->order_per_bit);
283 mutex_lock(&cma_mutex);
284 ret = alloc_contig_range(pfn, pfn + count, MIGRATE_CMA);
285 mutex_unlock(&cma_mutex);
286 if (ret == 0) {
287 page = pfn_to_page(pfn);
288 break;
289 }
290
291 cma_clear_bitmap(cma, pfn, count);
292 if (ret != -EBUSY)
293 break;
294
295 pr_debug("%s(): memory range at %p is busy, retrying\n",
296 __func__, pfn_to_page(pfn));
297 /* try again with a bit different memory target */
298 start = bitmap_no + mask + 1;
299 }
300
301 pr_debug("%s(): returned %p\n", __func__, page);
302 return page;
303}
304
305/**
306 * cma_release() - release allocated pages
307 * @cma: Contiguous memory region for which the allocation is performed.
308 * @pages: Allocated pages.
309 * @count: Number of allocated pages.
310 *
311 * This function releases memory allocated by alloc_cma().
312 * It returns false when provided pages do not belong to contiguous area and
313 * true otherwise.
314 */
315bool cma_release(struct cma *cma, struct page *pages, int count)
316{
317 unsigned long pfn;
318
319 if (!cma || !pages)
320 return false;
321
322 pr_debug("%s(page %p)\n", __func__, (void *)pages);
323
324 pfn = page_to_pfn(pages);
325
326 if (pfn < cma->base_pfn || pfn >= cma->base_pfn + cma->count)
327 return false;
328
329 VM_BUG_ON(pfn + count > cma->base_pfn + cma->count);
330
331 free_contig_range(pfn, count);
332 cma_clear_bitmap(cma, pfn, count);
333
334 return true;
335}
diff --git a/mm/filemap.c b/mm/filemap.c
index 65d44fd88c78..af19a6b079f5 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -808,6 +808,17 @@ int __lock_page_killable(struct page *page)
808} 808}
809EXPORT_SYMBOL_GPL(__lock_page_killable); 809EXPORT_SYMBOL_GPL(__lock_page_killable);
810 810
811/*
812 * Return values:
813 * 1 - page is locked; mmap_sem is still held.
814 * 0 - page is not locked.
815 * mmap_sem has been released (up_read()), unless flags had both
816 * FAULT_FLAG_ALLOW_RETRY and FAULT_FLAG_RETRY_NOWAIT set, in
817 * which case mmap_sem is still held.
818 *
819 * If neither ALLOW_RETRY nor KILLABLE are set, will always return 1
820 * with the page locked and the mmap_sem unperturbed.
821 */
811int __lock_page_or_retry(struct page *page, struct mm_struct *mm, 822int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
812 unsigned int flags) 823 unsigned int flags)
813{ 824{
@@ -1091,9 +1102,9 @@ no_page:
1091 if (WARN_ON_ONCE(!(fgp_flags & FGP_LOCK))) 1102 if (WARN_ON_ONCE(!(fgp_flags & FGP_LOCK)))
1092 fgp_flags |= FGP_LOCK; 1103 fgp_flags |= FGP_LOCK;
1093 1104
1094 /* Init accessed so avoit atomic mark_page_accessed later */ 1105 /* Init accessed so avoid atomic mark_page_accessed later */
1095 if (fgp_flags & FGP_ACCESSED) 1106 if (fgp_flags & FGP_ACCESSED)
1096 init_page_accessed(page); 1107 __SetPageReferenced(page);
1097 1108
1098 err = add_to_page_cache_lru(page, mapping, offset, radix_gfp_mask); 1109 err = add_to_page_cache_lru(page, mapping, offset, radix_gfp_mask);
1099 if (unlikely(err)) { 1110 if (unlikely(err)) {
@@ -1827,6 +1838,18 @@ static void do_async_mmap_readahead(struct vm_area_struct *vma,
1827 * The goto's are kind of ugly, but this streamlines the normal case of having 1838 * The goto's are kind of ugly, but this streamlines the normal case of having
1828 * it in the page cache, and handles the special cases reasonably without 1839 * it in the page cache, and handles the special cases reasonably without
1829 * having a lot of duplicated code. 1840 * having a lot of duplicated code.
1841 *
1842 * vma->vm_mm->mmap_sem must be held on entry.
1843 *
1844 * If our return value has VM_FAULT_RETRY set, it's because
1845 * lock_page_or_retry() returned 0.
1846 * The mmap_sem has usually been released in this case.
1847 * See __lock_page_or_retry() for the exception.
1848 *
1849 * If our return value does not have VM_FAULT_RETRY set, the mmap_sem
1850 * has not been released.
1851 *
1852 * We never return with VM_FAULT_RETRY and a bit from VM_FAULT_ERROR set.
1830 */ 1853 */
1831int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 1854int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1832{ 1855{
diff --git a/mm/gup.c b/mm/gup.c
index cc5a9e7adea7..91d044b1600d 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -258,6 +258,11 @@ unmap:
258 return ret; 258 return ret;
259} 259}
260 260
261/*
262 * mmap_sem must be held on entry. If @nonblocking != NULL and
263 * *@flags does not include FOLL_NOWAIT, the mmap_sem may be released.
264 * If it is, *@nonblocking will be set to 0 and -EBUSY returned.
265 */
261static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma, 266static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
262 unsigned long address, unsigned int *flags, int *nonblocking) 267 unsigned long address, unsigned int *flags, int *nonblocking)
263{ 268{
@@ -373,7 +378,7 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
373 * with a put_page() call when it is finished with. vmas will only 378 * with a put_page() call when it is finished with. vmas will only
374 * remain valid while mmap_sem is held. 379 * remain valid while mmap_sem is held.
375 * 380 *
376 * Must be called with mmap_sem held for read or write. 381 * Must be called with mmap_sem held. It may be released. See below.
377 * 382 *
378 * __get_user_pages walks a process's page tables and takes a reference to 383 * __get_user_pages walks a process's page tables and takes a reference to
379 * each struct page that each user address corresponds to at a given 384 * each struct page that each user address corresponds to at a given
@@ -396,7 +401,14 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
396 * 401 *
397 * If @nonblocking != NULL, __get_user_pages will not wait for disk IO 402 * If @nonblocking != NULL, __get_user_pages will not wait for disk IO
398 * or mmap_sem contention, and if waiting is needed to pin all pages, 403 * or mmap_sem contention, and if waiting is needed to pin all pages,
399 * *@nonblocking will be set to 0. 404 * *@nonblocking will be set to 0. Further, if @gup_flags does not
405 * include FOLL_NOWAIT, the mmap_sem will be released via up_read() in
406 * this case.
407 *
408 * A caller using such a combination of @nonblocking and @gup_flags
409 * must therefore hold the mmap_sem for reading only, and recognize
410 * when it's been released. Otherwise, it must be held for either
411 * reading or writing and will not be released.
400 * 412 *
401 * In most cases, get_user_pages or get_user_pages_fast should be used 413 * In most cases, get_user_pages or get_user_pages_fast should be used
402 * instead of __get_user_pages. __get_user_pages should be used only if 414 * instead of __get_user_pages. __get_user_pages should be used only if
@@ -528,7 +540,7 @@ EXPORT_SYMBOL(__get_user_pages);
528 * such architectures, gup() will not be enough to make a subsequent access 540 * such architectures, gup() will not be enough to make a subsequent access
529 * succeed. 541 * succeed.
530 * 542 *
531 * This should be called with the mm_sem held for read. 543 * This has the same semantics wrt the @mm->mmap_sem as does filemap_fault().
532 */ 544 */
533int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm, 545int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm,
534 unsigned long address, unsigned int fault_flags) 546 unsigned long address, unsigned int fault_flags)
diff --git a/mm/highmem.c b/mm/highmem.c
index b32b70cdaed6..123bcd3ed4f2 100644
--- a/mm/highmem.c
+++ b/mm/highmem.c
@@ -44,6 +44,66 @@ DEFINE_PER_CPU(int, __kmap_atomic_idx);
44 */ 44 */
45#ifdef CONFIG_HIGHMEM 45#ifdef CONFIG_HIGHMEM
46 46
47/*
48 * Architecture with aliasing data cache may define the following family of
49 * helper functions in its asm/highmem.h to control cache color of virtual
50 * addresses where physical memory pages are mapped by kmap.
51 */
52#ifndef get_pkmap_color
53
54/*
55 * Determine color of virtual address where the page should be mapped.
56 */
57static inline unsigned int get_pkmap_color(struct page *page)
58{
59 return 0;
60}
61#define get_pkmap_color get_pkmap_color
62
63/*
64 * Get next index for mapping inside PKMAP region for page with given color.
65 */
66static inline unsigned int get_next_pkmap_nr(unsigned int color)
67{
68 static unsigned int last_pkmap_nr;
69
70 last_pkmap_nr = (last_pkmap_nr + 1) & LAST_PKMAP_MASK;
71 return last_pkmap_nr;
72}
73
74/*
75 * Determine if page index inside PKMAP region (pkmap_nr) of given color
76 * has wrapped around PKMAP region end. When this happens an attempt to
77 * flush all unused PKMAP slots is made.
78 */
79static inline int no_more_pkmaps(unsigned int pkmap_nr, unsigned int color)
80{
81 return pkmap_nr == 0;
82}
83
84/*
85 * Get the number of PKMAP entries of the given color. If no free slot is
86 * found after checking that many entries, kmap will sleep waiting for
87 * someone to call kunmap and free PKMAP slot.
88 */
89static inline int get_pkmap_entries_count(unsigned int color)
90{
91 return LAST_PKMAP;
92}
93
94/*
95 * Get head of a wait queue for PKMAP entries of the given color.
96 * Wait queues for different mapping colors should be independent to avoid
97 * unnecessary wakeups caused by freeing of slots of other colors.
98 */
99static inline wait_queue_head_t *get_pkmap_wait_queue_head(unsigned int color)
100{
101 static DECLARE_WAIT_QUEUE_HEAD(pkmap_map_wait);
102
103 return &pkmap_map_wait;
104}
105#endif
106
47unsigned long totalhigh_pages __read_mostly; 107unsigned long totalhigh_pages __read_mostly;
48EXPORT_SYMBOL(totalhigh_pages); 108EXPORT_SYMBOL(totalhigh_pages);
49 109
@@ -68,13 +128,10 @@ unsigned int nr_free_highpages (void)
68} 128}
69 129
70static int pkmap_count[LAST_PKMAP]; 130static int pkmap_count[LAST_PKMAP];
71static unsigned int last_pkmap_nr;
72static __cacheline_aligned_in_smp DEFINE_SPINLOCK(kmap_lock); 131static __cacheline_aligned_in_smp DEFINE_SPINLOCK(kmap_lock);
73 132
74pte_t * pkmap_page_table; 133pte_t * pkmap_page_table;
75 134
76static DECLARE_WAIT_QUEUE_HEAD(pkmap_map_wait);
77
78/* 135/*
79 * Most architectures have no use for kmap_high_get(), so let's abstract 136 * Most architectures have no use for kmap_high_get(), so let's abstract
80 * the disabling of IRQ out of the locking in that case to save on a 137 * the disabling of IRQ out of the locking in that case to save on a
@@ -161,15 +218,17 @@ static inline unsigned long map_new_virtual(struct page *page)
161{ 218{
162 unsigned long vaddr; 219 unsigned long vaddr;
163 int count; 220 int count;
221 unsigned int last_pkmap_nr;
222 unsigned int color = get_pkmap_color(page);
164 223
165start: 224start:
166 count = LAST_PKMAP; 225 count = get_pkmap_entries_count(color);
167 /* Find an empty entry */ 226 /* Find an empty entry */
168 for (;;) { 227 for (;;) {
169 last_pkmap_nr = (last_pkmap_nr + 1) & LAST_PKMAP_MASK; 228 last_pkmap_nr = get_next_pkmap_nr(color);
170 if (!last_pkmap_nr) { 229 if (no_more_pkmaps(last_pkmap_nr, color)) {
171 flush_all_zero_pkmaps(); 230 flush_all_zero_pkmaps();
172 count = LAST_PKMAP; 231 count = get_pkmap_entries_count(color);
173 } 232 }
174 if (!pkmap_count[last_pkmap_nr]) 233 if (!pkmap_count[last_pkmap_nr])
175 break; /* Found a usable entry */ 234 break; /* Found a usable entry */
@@ -181,12 +240,14 @@ start:
181 */ 240 */
182 { 241 {
183 DECLARE_WAITQUEUE(wait, current); 242 DECLARE_WAITQUEUE(wait, current);
243 wait_queue_head_t *pkmap_map_wait =
244 get_pkmap_wait_queue_head(color);
184 245
185 __set_current_state(TASK_UNINTERRUPTIBLE); 246 __set_current_state(TASK_UNINTERRUPTIBLE);
186 add_wait_queue(&pkmap_map_wait, &wait); 247 add_wait_queue(pkmap_map_wait, &wait);
187 unlock_kmap(); 248 unlock_kmap();
188 schedule(); 249 schedule();
189 remove_wait_queue(&pkmap_map_wait, &wait); 250 remove_wait_queue(pkmap_map_wait, &wait);
190 lock_kmap(); 251 lock_kmap();
191 252
192 /* Somebody else might have mapped it while we slept */ 253 /* Somebody else might have mapped it while we slept */
@@ -274,6 +335,8 @@ void kunmap_high(struct page *page)
274 unsigned long nr; 335 unsigned long nr;
275 unsigned long flags; 336 unsigned long flags;
276 int need_wakeup; 337 int need_wakeup;
338 unsigned int color = get_pkmap_color(page);
339 wait_queue_head_t *pkmap_map_wait;
277 340
278 lock_kmap_any(flags); 341 lock_kmap_any(flags);
279 vaddr = (unsigned long)page_address(page); 342 vaddr = (unsigned long)page_address(page);
@@ -299,13 +362,14 @@ void kunmap_high(struct page *page)
299 * no need for the wait-queue-head's lock. Simply 362 * no need for the wait-queue-head's lock. Simply
300 * test if the queue is empty. 363 * test if the queue is empty.
301 */ 364 */
302 need_wakeup = waitqueue_active(&pkmap_map_wait); 365 pkmap_map_wait = get_pkmap_wait_queue_head(color);
366 need_wakeup = waitqueue_active(pkmap_map_wait);
303 } 367 }
304 unlock_kmap_any(flags); 368 unlock_kmap_any(flags);
305 369
306 /* do wake-up, if needed, race-free outside of the spin lock */ 370 /* do wake-up, if needed, race-free outside of the spin lock */
307 if (need_wakeup) 371 if (need_wakeup)
308 wake_up(&pkmap_map_wait); 372 wake_up(pkmap_map_wait);
309} 373}
310 374
311EXPORT_SYMBOL(kunmap_high); 375EXPORT_SYMBOL(kunmap_high);
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 33514d88fef9..3630d577e987 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -827,7 +827,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
827 count_vm_event(THP_FAULT_FALLBACK); 827 count_vm_event(THP_FAULT_FALLBACK);
828 return VM_FAULT_FALLBACK; 828 return VM_FAULT_FALLBACK;
829 } 829 }
830 if (unlikely(mem_cgroup_charge_anon(page, mm, GFP_KERNEL))) { 830 if (unlikely(mem_cgroup_charge_anon(page, mm, GFP_TRANSHUGE))) {
831 put_page(page); 831 put_page(page);
832 count_vm_event(THP_FAULT_FALLBACK); 832 count_vm_event(THP_FAULT_FALLBACK);
833 return VM_FAULT_FALLBACK; 833 return VM_FAULT_FALLBACK;
@@ -1132,7 +1132,7 @@ alloc:
1132 goto out; 1132 goto out;
1133 } 1133 }
1134 1134
1135 if (unlikely(mem_cgroup_charge_anon(new_page, mm, GFP_KERNEL))) { 1135 if (unlikely(mem_cgroup_charge_anon(new_page, mm, GFP_TRANSHUGE))) {
1136 put_page(new_page); 1136 put_page(new_page);
1137 if (page) { 1137 if (page) {
1138 split_huge_page(page); 1138 split_huge_page(page);
@@ -1681,7 +1681,7 @@ static void __split_huge_page_refcount(struct page *page,
1681 &page_tail->_count); 1681 &page_tail->_count);
1682 1682
1683 /* after clearing PageTail the gup refcount can be released */ 1683 /* after clearing PageTail the gup refcount can be released */
1684 smp_mb(); 1684 smp_mb__after_atomic();
1685 1685
1686 /* 1686 /*
1687 * retain hwpoison flag of the poisoned tail page: 1687 * retain hwpoison flag of the poisoned tail page:
@@ -1775,6 +1775,8 @@ static int __split_huge_page_map(struct page *page,
1775 if (pmd) { 1775 if (pmd) {
1776 pgtable = pgtable_trans_huge_withdraw(mm, pmd); 1776 pgtable = pgtable_trans_huge_withdraw(mm, pmd);
1777 pmd_populate(mm, &_pmd, pgtable); 1777 pmd_populate(mm, &_pmd, pgtable);
1778 if (pmd_write(*pmd))
1779 BUG_ON(page_mapcount(page) != 1);
1778 1780
1779 haddr = address; 1781 haddr = address;
1780 for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE) { 1782 for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE) {
@@ -1784,8 +1786,6 @@ static int __split_huge_page_map(struct page *page,
1784 entry = maybe_mkwrite(pte_mkdirty(entry), vma); 1786 entry = maybe_mkwrite(pte_mkdirty(entry), vma);
1785 if (!pmd_write(*pmd)) 1787 if (!pmd_write(*pmd))
1786 entry = pte_wrprotect(entry); 1788 entry = pte_wrprotect(entry);
1787 else
1788 BUG_ON(page_mapcount(page) != 1);
1789 if (!pmd_young(*pmd)) 1789 if (!pmd_young(*pmd))
1790 entry = pte_mkold(entry); 1790 entry = pte_mkold(entry);
1791 if (pmd_numa(*pmd)) 1791 if (pmd_numa(*pmd))
@@ -2233,6 +2233,30 @@ static void khugepaged_alloc_sleep(void)
2233 2233
2234static int khugepaged_node_load[MAX_NUMNODES]; 2234static int khugepaged_node_load[MAX_NUMNODES];
2235 2235
2236static bool khugepaged_scan_abort(int nid)
2237{
2238 int i;
2239
2240 /*
2241 * If zone_reclaim_mode is disabled, then no extra effort is made to
2242 * allocate memory locally.
2243 */
2244 if (!zone_reclaim_mode)
2245 return false;
2246
2247 /* If there is a count for this node already, it must be acceptable */
2248 if (khugepaged_node_load[nid])
2249 return false;
2250
2251 for (i = 0; i < MAX_NUMNODES; i++) {
2252 if (!khugepaged_node_load[i])
2253 continue;
2254 if (node_distance(nid, i) > RECLAIM_DISTANCE)
2255 return true;
2256 }
2257 return false;
2258}
2259
2236#ifdef CONFIG_NUMA 2260#ifdef CONFIG_NUMA
2237static int khugepaged_find_target_node(void) 2261static int khugepaged_find_target_node(void)
2238{ 2262{
@@ -2399,7 +2423,7 @@ static void collapse_huge_page(struct mm_struct *mm,
2399 if (!new_page) 2423 if (!new_page)
2400 return; 2424 return;
2401 2425
2402 if (unlikely(mem_cgroup_charge_anon(new_page, mm, GFP_KERNEL))) 2426 if (unlikely(mem_cgroup_charge_anon(new_page, mm, GFP_TRANSHUGE)))
2403 return; 2427 return;
2404 2428
2405 /* 2429 /*
@@ -2545,6 +2569,8 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
2545 * hit record. 2569 * hit record.
2546 */ 2570 */
2547 node = page_to_nid(page); 2571 node = page_to_nid(page);
2572 if (khugepaged_scan_abort(node))
2573 goto out_unmap;
2548 khugepaged_node_load[node]++; 2574 khugepaged_node_load[node]++;
2549 VM_BUG_ON_PAGE(PageCompound(page), page); 2575 VM_BUG_ON_PAGE(PageCompound(page), page);
2550 if (!PageLRU(page) || PageLocked(page) || !PageAnon(page)) 2576 if (!PageLRU(page) || PageLocked(page) || !PageAnon(page))
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 7a0a73d2fcff..eeceeeb09019 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -35,7 +35,6 @@
35#include <linux/node.h> 35#include <linux/node.h>
36#include "internal.h" 36#include "internal.h"
37 37
38const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL;
39unsigned long hugepages_treat_as_movable; 38unsigned long hugepages_treat_as_movable;
40 39
41int hugetlb_max_hstate __read_mostly; 40int hugetlb_max_hstate __read_mostly;
@@ -1089,6 +1088,9 @@ void dissolve_free_huge_pages(unsigned long start_pfn, unsigned long end_pfn)
1089 unsigned long pfn; 1088 unsigned long pfn;
1090 struct hstate *h; 1089 struct hstate *h;
1091 1090
1091 if (!hugepages_supported())
1092 return;
1093
1092 /* Set scan step to minimum hugepage size */ 1094 /* Set scan step to minimum hugepage size */
1093 for_each_hstate(h) 1095 for_each_hstate(h)
1094 if (order > huge_page_order(h)) 1096 if (order > huge_page_order(h))
@@ -1734,21 +1736,13 @@ static ssize_t nr_hugepages_show_common(struct kobject *kobj,
1734 return sprintf(buf, "%lu\n", nr_huge_pages); 1736 return sprintf(buf, "%lu\n", nr_huge_pages);
1735} 1737}
1736 1738
1737static ssize_t nr_hugepages_store_common(bool obey_mempolicy, 1739static ssize_t __nr_hugepages_store_common(bool obey_mempolicy,
1738 struct kobject *kobj, struct kobj_attribute *attr, 1740 struct hstate *h, int nid,
1739 const char *buf, size_t len) 1741 unsigned long count, size_t len)
1740{ 1742{
1741 int err; 1743 int err;
1742 int nid;
1743 unsigned long count;
1744 struct hstate *h;
1745 NODEMASK_ALLOC(nodemask_t, nodes_allowed, GFP_KERNEL | __GFP_NORETRY); 1744 NODEMASK_ALLOC(nodemask_t, nodes_allowed, GFP_KERNEL | __GFP_NORETRY);
1746 1745
1747 err = kstrtoul(buf, 10, &count);
1748 if (err)
1749 goto out;
1750
1751 h = kobj_to_hstate(kobj, &nid);
1752 if (hstate_is_gigantic(h) && !gigantic_page_supported()) { 1746 if (hstate_is_gigantic(h) && !gigantic_page_supported()) {
1753 err = -EINVAL; 1747 err = -EINVAL;
1754 goto out; 1748 goto out;
@@ -1784,6 +1778,23 @@ out:
1784 return err; 1778 return err;
1785} 1779}
1786 1780
1781static ssize_t nr_hugepages_store_common(bool obey_mempolicy,
1782 struct kobject *kobj, const char *buf,
1783 size_t len)
1784{
1785 struct hstate *h;
1786 unsigned long count;
1787 int nid;
1788 int err;
1789
1790 err = kstrtoul(buf, 10, &count);
1791 if (err)
1792 return err;
1793
1794 h = kobj_to_hstate(kobj, &nid);
1795 return __nr_hugepages_store_common(obey_mempolicy, h, nid, count, len);
1796}
1797
1787static ssize_t nr_hugepages_show(struct kobject *kobj, 1798static ssize_t nr_hugepages_show(struct kobject *kobj,
1788 struct kobj_attribute *attr, char *buf) 1799 struct kobj_attribute *attr, char *buf)
1789{ 1800{
@@ -1793,7 +1804,7 @@ static ssize_t nr_hugepages_show(struct kobject *kobj,
1793static ssize_t nr_hugepages_store(struct kobject *kobj, 1804static ssize_t nr_hugepages_store(struct kobject *kobj,
1794 struct kobj_attribute *attr, const char *buf, size_t len) 1805 struct kobj_attribute *attr, const char *buf, size_t len)
1795{ 1806{
1796 return nr_hugepages_store_common(false, kobj, attr, buf, len); 1807 return nr_hugepages_store_common(false, kobj, buf, len);
1797} 1808}
1798HSTATE_ATTR(nr_hugepages); 1809HSTATE_ATTR(nr_hugepages);
1799 1810
@@ -1812,7 +1823,7 @@ static ssize_t nr_hugepages_mempolicy_show(struct kobject *kobj,
1812static ssize_t nr_hugepages_mempolicy_store(struct kobject *kobj, 1823static ssize_t nr_hugepages_mempolicy_store(struct kobject *kobj,
1813 struct kobj_attribute *attr, const char *buf, size_t len) 1824 struct kobj_attribute *attr, const char *buf, size_t len)
1814{ 1825{
1815 return nr_hugepages_store_common(true, kobj, attr, buf, len); 1826 return nr_hugepages_store_common(true, kobj, buf, len);
1816} 1827}
1817HSTATE_ATTR(nr_hugepages_mempolicy); 1828HSTATE_ATTR(nr_hugepages_mempolicy);
1818#endif 1829#endif
@@ -2248,36 +2259,21 @@ static int hugetlb_sysctl_handler_common(bool obey_mempolicy,
2248 void __user *buffer, size_t *length, loff_t *ppos) 2259 void __user *buffer, size_t *length, loff_t *ppos)
2249{ 2260{
2250 struct hstate *h = &default_hstate; 2261 struct hstate *h = &default_hstate;
2251 unsigned long tmp; 2262 unsigned long tmp = h->max_huge_pages;
2252 int ret; 2263 int ret;
2253 2264
2254 if (!hugepages_supported()) 2265 if (!hugepages_supported())
2255 return -ENOTSUPP; 2266 return -ENOTSUPP;
2256 2267
2257 tmp = h->max_huge_pages;
2258
2259 if (write && hstate_is_gigantic(h) && !gigantic_page_supported())
2260 return -EINVAL;
2261
2262 table->data = &tmp; 2268 table->data = &tmp;
2263 table->maxlen = sizeof(unsigned long); 2269 table->maxlen = sizeof(unsigned long);
2264 ret = proc_doulongvec_minmax(table, write, buffer, length, ppos); 2270 ret = proc_doulongvec_minmax(table, write, buffer, length, ppos);
2265 if (ret) 2271 if (ret)
2266 goto out; 2272 goto out;
2267 2273
2268 if (write) { 2274 if (write)
2269 NODEMASK_ALLOC(nodemask_t, nodes_allowed, 2275 ret = __nr_hugepages_store_common(obey_mempolicy, h,
2270 GFP_KERNEL | __GFP_NORETRY); 2276 NUMA_NO_NODE, tmp, *length);
2271 if (!(obey_mempolicy &&
2272 init_nodemask_of_mempolicy(nodes_allowed))) {
2273 NODEMASK_FREE(nodes_allowed);
2274 nodes_allowed = &node_states[N_MEMORY];
2275 }
2276 h->max_huge_pages = set_max_huge_pages(h, tmp, nodes_allowed);
2277
2278 if (nodes_allowed != &node_states[N_MEMORY])
2279 NODEMASK_FREE(nodes_allowed);
2280 }
2281out: 2277out:
2282 return ret; 2278 return ret;
2283} 2279}
@@ -2754,8 +2750,8 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
2754 * from other VMAs and let the children be SIGKILLed if they are faulting the 2750 * from other VMAs and let the children be SIGKILLed if they are faulting the
2755 * same region. 2751 * same region.
2756 */ 2752 */
2757static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma, 2753static void unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
2758 struct page *page, unsigned long address) 2754 struct page *page, unsigned long address)
2759{ 2755{
2760 struct hstate *h = hstate_vma(vma); 2756 struct hstate *h = hstate_vma(vma);
2761 struct vm_area_struct *iter_vma; 2757 struct vm_area_struct *iter_vma;
@@ -2794,8 +2790,6 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
2794 address + huge_page_size(h), page); 2790 address + huge_page_size(h), page);
2795 } 2791 }
2796 mutex_unlock(&mapping->i_mmap_mutex); 2792 mutex_unlock(&mapping->i_mmap_mutex);
2797
2798 return 1;
2799} 2793}
2800 2794
2801/* 2795/*
@@ -2810,7 +2804,7 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,
2810{ 2804{
2811 struct hstate *h = hstate_vma(vma); 2805 struct hstate *h = hstate_vma(vma);
2812 struct page *old_page, *new_page; 2806 struct page *old_page, *new_page;
2813 int outside_reserve = 0; 2807 int ret = 0, outside_reserve = 0;
2814 unsigned long mmun_start; /* For mmu_notifiers */ 2808 unsigned long mmun_start; /* For mmu_notifiers */
2815 unsigned long mmun_end; /* For mmu_notifiers */ 2809 unsigned long mmun_end; /* For mmu_notifiers */
2816 2810
@@ -2840,14 +2834,14 @@ retry_avoidcopy:
2840 2834
2841 page_cache_get(old_page); 2835 page_cache_get(old_page);
2842 2836
2843 /* Drop page table lock as buddy allocator may be called */ 2837 /*
2838 * Drop page table lock as buddy allocator may be called. It will
2839 * be acquired again before returning to the caller, as expected.
2840 */
2844 spin_unlock(ptl); 2841 spin_unlock(ptl);
2845 new_page = alloc_huge_page(vma, address, outside_reserve); 2842 new_page = alloc_huge_page(vma, address, outside_reserve);
2846 2843
2847 if (IS_ERR(new_page)) { 2844 if (IS_ERR(new_page)) {
2848 long err = PTR_ERR(new_page);
2849 page_cache_release(old_page);
2850
2851 /* 2845 /*
2852 * If a process owning a MAP_PRIVATE mapping fails to COW, 2846 * If a process owning a MAP_PRIVATE mapping fails to COW,
2853 * it is due to references held by a child and an insufficient 2847 * it is due to references held by a child and an insufficient
@@ -2856,29 +2850,25 @@ retry_avoidcopy:
2856 * may get SIGKILLed if it later faults. 2850 * may get SIGKILLed if it later faults.
2857 */ 2851 */
2858 if (outside_reserve) { 2852 if (outside_reserve) {
2853 page_cache_release(old_page);
2859 BUG_ON(huge_pte_none(pte)); 2854 BUG_ON(huge_pte_none(pte));
2860 if (unmap_ref_private(mm, vma, old_page, address)) { 2855 unmap_ref_private(mm, vma, old_page, address);
2861 BUG_ON(huge_pte_none(pte)); 2856 BUG_ON(huge_pte_none(pte));
2862 spin_lock(ptl); 2857 spin_lock(ptl);
2863 ptep = huge_pte_offset(mm, address & huge_page_mask(h)); 2858 ptep = huge_pte_offset(mm, address & huge_page_mask(h));
2864 if (likely(ptep && 2859 if (likely(ptep &&
2865 pte_same(huge_ptep_get(ptep), pte))) 2860 pte_same(huge_ptep_get(ptep), pte)))
2866 goto retry_avoidcopy; 2861 goto retry_avoidcopy;
2867 /* 2862 /*
2868 * race occurs while re-acquiring page table 2863 * race occurs while re-acquiring page table
2869 * lock, and our job is done. 2864 * lock, and our job is done.
2870 */ 2865 */
2871 return 0; 2866 return 0;
2872 }
2873 WARN_ON_ONCE(1);
2874 } 2867 }
2875 2868
2876 /* Caller expects lock to be held */ 2869 ret = (PTR_ERR(new_page) == -ENOMEM) ?
2877 spin_lock(ptl); 2870 VM_FAULT_OOM : VM_FAULT_SIGBUS;
2878 if (err == -ENOMEM) 2871 goto out_release_old;
2879 return VM_FAULT_OOM;
2880 else
2881 return VM_FAULT_SIGBUS;
2882 } 2872 }
2883 2873
2884 /* 2874 /*
@@ -2886,11 +2876,8 @@ retry_avoidcopy:
2886 * anon_vma prepared. 2876 * anon_vma prepared.
2887 */ 2877 */
2888 if (unlikely(anon_vma_prepare(vma))) { 2878 if (unlikely(anon_vma_prepare(vma))) {
2889 page_cache_release(new_page); 2879 ret = VM_FAULT_OOM;
2890 page_cache_release(old_page); 2880 goto out_release_all;
2891 /* Caller expects lock to be held */
2892 spin_lock(ptl);
2893 return VM_FAULT_OOM;
2894 } 2881 }
2895 2882
2896 copy_user_huge_page(new_page, old_page, address, vma, 2883 copy_user_huge_page(new_page, old_page, address, vma,
@@ -2900,6 +2887,7 @@ retry_avoidcopy:
2900 mmun_start = address & huge_page_mask(h); 2887 mmun_start = address & huge_page_mask(h);
2901 mmun_end = mmun_start + huge_page_size(h); 2888 mmun_end = mmun_start + huge_page_size(h);
2902 mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); 2889 mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
2890
2903 /* 2891 /*
2904 * Retake the page table lock to check for racing updates 2892 * Retake the page table lock to check for racing updates
2905 * before the page tables are altered 2893 * before the page tables are altered
@@ -2920,12 +2908,13 @@ retry_avoidcopy:
2920 } 2908 }
2921 spin_unlock(ptl); 2909 spin_unlock(ptl);
2922 mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); 2910 mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
2911out_release_all:
2923 page_cache_release(new_page); 2912 page_cache_release(new_page);
2913out_release_old:
2924 page_cache_release(old_page); 2914 page_cache_release(old_page);
2925 2915
2926 /* Caller expects lock to be held */ 2916 spin_lock(ptl); /* Caller expects lock to be held */
2927 spin_lock(ptl); 2917 return ret;
2928 return 0;
2929} 2918}
2930 2919
2931/* Return the pagecache page at a given address within a VMA */ 2920/* Return the pagecache page at a given address within a VMA */
diff --git a/mm/hwpoison-inject.c b/mm/hwpoison-inject.c
index 95487c71cad5..329caf56df22 100644
--- a/mm/hwpoison-inject.c
+++ b/mm/hwpoison-inject.c
@@ -72,8 +72,7 @@ DEFINE_SIMPLE_ATTRIBUTE(unpoison_fops, NULL, hwpoison_unpoison, "%lli\n");
72 72
73static void pfn_inject_exit(void) 73static void pfn_inject_exit(void)
74{ 74{
75 if (hwpoison_dir) 75 debugfs_remove_recursive(hwpoison_dir);
76 debugfs_remove_recursive(hwpoison_dir);
77} 76}
78 77
79static int pfn_inject_init(void) 78static int pfn_inject_init(void)
diff --git a/mm/internal.h b/mm/internal.h
index 7f22a11fcc66..a1b651b11c5f 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -247,7 +247,7 @@ static inline void mlock_migrate_page(struct page *new, struct page *old) { }
247static inline struct page *mem_map_offset(struct page *base, int offset) 247static inline struct page *mem_map_offset(struct page *base, int offset)
248{ 248{
249 if (unlikely(offset >= MAX_ORDER_NR_PAGES)) 249 if (unlikely(offset >= MAX_ORDER_NR_PAGES))
250 return pfn_to_page(page_to_pfn(base) + offset); 250 return nth_page(base, offset);
251 return base + offset; 251 return base + offset;
252} 252}
253 253
diff --git a/mm/madvise.c b/mm/madvise.c
index a402f8fdc68e..0938b30da4ab 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -292,9 +292,6 @@ static long madvise_dontneed(struct vm_area_struct *vma,
292/* 292/*
293 * Application wants to free up the pages and associated backing store. 293 * Application wants to free up the pages and associated backing store.
294 * This is effectively punching a hole into the middle of a file. 294 * This is effectively punching a hole into the middle of a file.
295 *
296 * NOTE: Currently, only shmfs/tmpfs is supported for this operation.
297 * Other filesystems return -ENOSYS.
298 */ 295 */
299static long madvise_remove(struct vm_area_struct *vma, 296static long madvise_remove(struct vm_area_struct *vma,
300 struct vm_area_struct **prev, 297 struct vm_area_struct **prev,
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index f009a14918d2..90dc501eaf3f 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2551,55 +2551,72 @@ static int memcg_cpu_hotplug_callback(struct notifier_block *nb,
2551 return NOTIFY_OK; 2551 return NOTIFY_OK;
2552} 2552}
2553 2553
2554 2554/**
2555/* See mem_cgroup_try_charge() for details */ 2555 * mem_cgroup_try_charge - try charging a memcg
2556enum { 2556 * @memcg: memcg to charge
2557 CHARGE_OK, /* success */ 2557 * @nr_pages: number of pages to charge
2558 CHARGE_RETRY, /* need to retry but retry is not bad */ 2558 *
2559 CHARGE_NOMEM, /* we can't do more. return -ENOMEM */ 2559 * Returns 0 if @memcg was charged successfully, -EINTR if the charge
2560 CHARGE_WOULDBLOCK, /* GFP_WAIT wasn't set and no enough res. */ 2560 * was bypassed to root_mem_cgroup, and -ENOMEM if the charge failed.
2561}; 2561 */
2562 2562static int mem_cgroup_try_charge(struct mem_cgroup *memcg,
2563static int mem_cgroup_do_charge(struct mem_cgroup *memcg, gfp_t gfp_mask, 2563 gfp_t gfp_mask,
2564 unsigned int nr_pages, unsigned int min_pages, 2564 unsigned int nr_pages)
2565 bool invoke_oom)
2566{ 2565{
2567 unsigned long csize = nr_pages * PAGE_SIZE; 2566 unsigned int batch = max(CHARGE_BATCH, nr_pages);
2567 int nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
2568 struct mem_cgroup *mem_over_limit; 2568 struct mem_cgroup *mem_over_limit;
2569 struct res_counter *fail_res; 2569 struct res_counter *fail_res;
2570 unsigned long nr_reclaimed;
2570 unsigned long flags = 0; 2571 unsigned long flags = 0;
2571 int ret; 2572 unsigned long long size;
2573 int ret = 0;
2572 2574
2573 ret = res_counter_charge(&memcg->res, csize, &fail_res); 2575retry:
2576 if (consume_stock(memcg, nr_pages))
2577 goto done;
2574 2578
2575 if (likely(!ret)) { 2579 size = batch * PAGE_SIZE;
2580 if (!res_counter_charge(&memcg->res, size, &fail_res)) {
2576 if (!do_swap_account) 2581 if (!do_swap_account)
2577 return CHARGE_OK; 2582 goto done_restock;
2578 ret = res_counter_charge(&memcg->memsw, csize, &fail_res); 2583 if (!res_counter_charge(&memcg->memsw, size, &fail_res))
2579 if (likely(!ret)) 2584 goto done_restock;
2580 return CHARGE_OK; 2585 res_counter_uncharge(&memcg->res, size);
2581
2582 res_counter_uncharge(&memcg->res, csize);
2583 mem_over_limit = mem_cgroup_from_res_counter(fail_res, memsw); 2586 mem_over_limit = mem_cgroup_from_res_counter(fail_res, memsw);
2584 flags |= MEM_CGROUP_RECLAIM_NOSWAP; 2587 flags |= MEM_CGROUP_RECLAIM_NOSWAP;
2585 } else 2588 } else
2586 mem_over_limit = mem_cgroup_from_res_counter(fail_res, res); 2589 mem_over_limit = mem_cgroup_from_res_counter(fail_res, res);
2590
2591 if (batch > nr_pages) {
2592 batch = nr_pages;
2593 goto retry;
2594 }
2595
2587 /* 2596 /*
2588 * Never reclaim on behalf of optional batching, retry with a 2597 * Unlike in global OOM situations, memcg is not in a physical
2589 * single page instead. 2598 * memory shortage. Allow dying and OOM-killed tasks to
2599 * bypass the last charges so that they can exit quickly and
2600 * free their memory.
2590 */ 2601 */
2591 if (nr_pages > min_pages) 2602 if (unlikely(test_thread_flag(TIF_MEMDIE) ||
2592 return CHARGE_RETRY; 2603 fatal_signal_pending(current) ||
2604 current->flags & PF_EXITING))
2605 goto bypass;
2606
2607 if (unlikely(task_in_memcg_oom(current)))
2608 goto nomem;
2593 2609
2594 if (!(gfp_mask & __GFP_WAIT)) 2610 if (!(gfp_mask & __GFP_WAIT))
2595 return CHARGE_WOULDBLOCK; 2611 goto nomem;
2596 2612
2597 if (gfp_mask & __GFP_NORETRY) 2613 nr_reclaimed = mem_cgroup_reclaim(mem_over_limit, gfp_mask, flags);
2598 return CHARGE_NOMEM;
2599 2614
2600 ret = mem_cgroup_reclaim(mem_over_limit, gfp_mask, flags);
2601 if (mem_cgroup_margin(mem_over_limit) >= nr_pages) 2615 if (mem_cgroup_margin(mem_over_limit) >= nr_pages)
2602 return CHARGE_RETRY; 2616 goto retry;
2617
2618 if (gfp_mask & __GFP_NORETRY)
2619 goto nomem;
2603 /* 2620 /*
2604 * Even though the limit is exceeded at this point, reclaim 2621 * Even though the limit is exceeded at this point, reclaim
2605 * may have been able to free some pages. Retry the charge 2622 * may have been able to free some pages. Retry the charge
@@ -2609,96 +2626,38 @@ static int mem_cgroup_do_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
2609 * unlikely to succeed so close to the limit, and we fall back 2626 * unlikely to succeed so close to the limit, and we fall back
2610 * to regular pages anyway in case of failure. 2627 * to regular pages anyway in case of failure.
2611 */ 2628 */
2612 if (nr_pages <= (1 << PAGE_ALLOC_COSTLY_ORDER) && ret) 2629 if (nr_reclaimed && nr_pages <= (1 << PAGE_ALLOC_COSTLY_ORDER))
2613 return CHARGE_RETRY; 2630 goto retry;
2614
2615 /* 2631 /*
2616 * At task move, charge accounts can be doubly counted. So, it's 2632 * At task move, charge accounts can be doubly counted. So, it's
2617 * better to wait until the end of task_move if something is going on. 2633 * better to wait until the end of task_move if something is going on.
2618 */ 2634 */
2619 if (mem_cgroup_wait_acct_move(mem_over_limit)) 2635 if (mem_cgroup_wait_acct_move(mem_over_limit))
2620 return CHARGE_RETRY; 2636 goto retry;
2621
2622 if (invoke_oom)
2623 mem_cgroup_oom(mem_over_limit, gfp_mask, get_order(csize));
2624
2625 return CHARGE_NOMEM;
2626}
2627
2628/**
2629 * mem_cgroup_try_charge - try charging a memcg
2630 * @memcg: memcg to charge
2631 * @nr_pages: number of pages to charge
2632 * @oom: trigger OOM if reclaim fails
2633 *
2634 * Returns 0 if @memcg was charged successfully, -EINTR if the charge
2635 * was bypassed to root_mem_cgroup, and -ENOMEM if the charge failed.
2636 */
2637static int mem_cgroup_try_charge(struct mem_cgroup *memcg,
2638 gfp_t gfp_mask,
2639 unsigned int nr_pages,
2640 bool oom)
2641{
2642 unsigned int batch = max(CHARGE_BATCH, nr_pages);
2643 int nr_oom_retries = MEM_CGROUP_RECLAIM_RETRIES;
2644 int ret;
2645
2646 if (mem_cgroup_is_root(memcg))
2647 goto done;
2648 /*
2649 * Unlike in global OOM situations, memcg is not in a physical
2650 * memory shortage. Allow dying and OOM-killed tasks to
2651 * bypass the last charges so that they can exit quickly and
2652 * free their memory.
2653 */
2654 if (unlikely(test_thread_flag(TIF_MEMDIE) ||
2655 fatal_signal_pending(current) ||
2656 current->flags & PF_EXITING))
2657 goto bypass;
2658 2637
2659 if (unlikely(task_in_memcg_oom(current))) 2638 if (nr_retries--)
2660 goto nomem; 2639 goto retry;
2661 2640
2662 if (gfp_mask & __GFP_NOFAIL) 2641 if (gfp_mask & __GFP_NOFAIL)
2663 oom = false; 2642 goto bypass;
2664again:
2665 if (consume_stock(memcg, nr_pages))
2666 goto done;
2667
2668 do {
2669 bool invoke_oom = oom && !nr_oom_retries;
2670
2671 /* If killed, bypass charge */
2672 if (fatal_signal_pending(current))
2673 goto bypass;
2674 2643
2675 ret = mem_cgroup_do_charge(memcg, gfp_mask, batch, 2644 if (fatal_signal_pending(current))
2676 nr_pages, invoke_oom); 2645 goto bypass;
2677 switch (ret) {
2678 case CHARGE_OK:
2679 break;
2680 case CHARGE_RETRY: /* not in OOM situation but retry */
2681 batch = nr_pages;
2682 goto again;
2683 case CHARGE_WOULDBLOCK: /* !__GFP_WAIT */
2684 goto nomem;
2685 case CHARGE_NOMEM: /* OOM routine works */
2686 if (!oom || invoke_oom)
2687 goto nomem;
2688 nr_oom_retries--;
2689 break;
2690 }
2691 } while (ret != CHARGE_OK);
2692 2646
2693 if (batch > nr_pages) 2647 mem_cgroup_oom(mem_over_limit, gfp_mask, get_order(nr_pages));
2694 refill_stock(memcg, batch - nr_pages);
2695done:
2696 return 0;
2697nomem: 2648nomem:
2698 if (!(gfp_mask & __GFP_NOFAIL)) 2649 if (!(gfp_mask & __GFP_NOFAIL))
2699 return -ENOMEM; 2650 return -ENOMEM;
2700bypass: 2651bypass:
2701 return -EINTR; 2652 memcg = root_mem_cgroup;
2653 ret = -EINTR;
2654 goto retry;
2655
2656done_restock:
2657 if (batch > nr_pages)
2658 refill_stock(memcg, batch - nr_pages);
2659done:
2660 return ret;
2702} 2661}
2703 2662
2704/** 2663/**
@@ -2712,15 +2671,14 @@ bypass:
2712 */ 2671 */
2713static struct mem_cgroup *mem_cgroup_try_charge_mm(struct mm_struct *mm, 2672static struct mem_cgroup *mem_cgroup_try_charge_mm(struct mm_struct *mm,
2714 gfp_t gfp_mask, 2673 gfp_t gfp_mask,
2715 unsigned int nr_pages, 2674 unsigned int nr_pages)
2716 bool oom)
2717 2675
2718{ 2676{
2719 struct mem_cgroup *memcg; 2677 struct mem_cgroup *memcg;
2720 int ret; 2678 int ret;
2721 2679
2722 memcg = get_mem_cgroup_from_mm(mm); 2680 memcg = get_mem_cgroup_from_mm(mm);
2723 ret = mem_cgroup_try_charge(memcg, gfp_mask, nr_pages, oom); 2681 ret = mem_cgroup_try_charge(memcg, gfp_mask, nr_pages);
2724 css_put(&memcg->css); 2682 css_put(&memcg->css);
2725 if (ret == -EINTR) 2683 if (ret == -EINTR)
2726 memcg = root_mem_cgroup; 2684 memcg = root_mem_cgroup;
@@ -2738,13 +2696,11 @@ static struct mem_cgroup *mem_cgroup_try_charge_mm(struct mm_struct *mm,
2738static void __mem_cgroup_cancel_charge(struct mem_cgroup *memcg, 2696static void __mem_cgroup_cancel_charge(struct mem_cgroup *memcg,
2739 unsigned int nr_pages) 2697 unsigned int nr_pages)
2740{ 2698{
2741 if (!mem_cgroup_is_root(memcg)) { 2699 unsigned long bytes = nr_pages * PAGE_SIZE;
2742 unsigned long bytes = nr_pages * PAGE_SIZE;
2743 2700
2744 res_counter_uncharge(&memcg->res, bytes); 2701 res_counter_uncharge(&memcg->res, bytes);
2745 if (do_swap_account) 2702 if (do_swap_account)
2746 res_counter_uncharge(&memcg->memsw, bytes); 2703 res_counter_uncharge(&memcg->memsw, bytes);
2747 }
2748} 2704}
2749 2705
2750/* 2706/*
@@ -2756,9 +2712,6 @@ static void __mem_cgroup_cancel_local_charge(struct mem_cgroup *memcg,
2756{ 2712{
2757 unsigned long bytes = nr_pages * PAGE_SIZE; 2713 unsigned long bytes = nr_pages * PAGE_SIZE;
2758 2714
2759 if (mem_cgroup_is_root(memcg))
2760 return;
2761
2762 res_counter_uncharge_until(&memcg->res, memcg->res.parent, bytes); 2715 res_counter_uncharge_until(&memcg->res, memcg->res.parent, bytes);
2763 if (do_swap_account) 2716 if (do_swap_account)
2764 res_counter_uncharge_until(&memcg->memsw, 2717 res_counter_uncharge_until(&memcg->memsw,
@@ -2842,14 +2795,6 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
2842 } 2795 }
2843 2796
2844 pc->mem_cgroup = memcg; 2797 pc->mem_cgroup = memcg;
2845 /*
2846 * We access a page_cgroup asynchronously without lock_page_cgroup().
2847 * Especially when a page_cgroup is taken from a page, pc->mem_cgroup
2848 * is accessed after testing USED bit. To make pc->mem_cgroup visible
2849 * before USED bit, we need memory barrier here.
2850 * See mem_cgroup_add_lru_list(), etc.
2851 */
2852 smp_wmb();
2853 SetPageCgroupUsed(pc); 2798 SetPageCgroupUsed(pc);
2854 2799
2855 if (lrucare) { 2800 if (lrucare) {
@@ -2937,8 +2882,7 @@ static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size)
2937 if (ret) 2882 if (ret)
2938 return ret; 2883 return ret;
2939 2884
2940 ret = mem_cgroup_try_charge(memcg, gfp, size >> PAGE_SHIFT, 2885 ret = mem_cgroup_try_charge(memcg, gfp, size >> PAGE_SHIFT);
2941 oom_gfp_allowed(gfp));
2942 if (ret == -EINTR) { 2886 if (ret == -EINTR) {
2943 /* 2887 /*
2944 * mem_cgroup_try_charge() chosed to bypass to root due to 2888 * mem_cgroup_try_charge() chosed to bypass to root due to
@@ -3463,12 +3407,13 @@ void __memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg,
3463 memcg_uncharge_kmem(memcg, PAGE_SIZE << order); 3407 memcg_uncharge_kmem(memcg, PAGE_SIZE << order);
3464 return; 3408 return;
3465 } 3409 }
3466 3410 /*
3411 * The page is freshly allocated and not visible to any
3412 * outside callers yet. Set up pc non-atomically.
3413 */
3467 pc = lookup_page_cgroup(page); 3414 pc = lookup_page_cgroup(page);
3468 lock_page_cgroup(pc);
3469 pc->mem_cgroup = memcg; 3415 pc->mem_cgroup = memcg;
3470 SetPageCgroupUsed(pc); 3416 pc->flags = PCG_USED;
3471 unlock_page_cgroup(pc);
3472} 3417}
3473 3418
3474void __memcg_kmem_uncharge_pages(struct page *page, int order) 3419void __memcg_kmem_uncharge_pages(struct page *page, int order)
@@ -3478,19 +3423,11 @@ void __memcg_kmem_uncharge_pages(struct page *page, int order)
3478 3423
3479 3424
3480 pc = lookup_page_cgroup(page); 3425 pc = lookup_page_cgroup(page);
3481 /*
3482 * Fast unlocked return. Theoretically might have changed, have to
3483 * check again after locking.
3484 */
3485 if (!PageCgroupUsed(pc)) 3426 if (!PageCgroupUsed(pc))
3486 return; 3427 return;
3487 3428
3488 lock_page_cgroup(pc); 3429 memcg = pc->mem_cgroup;
3489 if (PageCgroupUsed(pc)) { 3430 pc->flags = 0;
3490 memcg = pc->mem_cgroup;
3491 ClearPageCgroupUsed(pc);
3492 }
3493 unlock_page_cgroup(pc);
3494 3431
3495 /* 3432 /*
3496 * We trust that only if there is a memcg associated with the page, it 3433 * We trust that only if there is a memcg associated with the page, it
@@ -3531,7 +3468,6 @@ void mem_cgroup_split_huge_fixup(struct page *head)
3531 for (i = 1; i < HPAGE_PMD_NR; i++) { 3468 for (i = 1; i < HPAGE_PMD_NR; i++) {
3532 pc = head_pc + i; 3469 pc = head_pc + i;
3533 pc->mem_cgroup = memcg; 3470 pc->mem_cgroup = memcg;
3534 smp_wmb();/* see __commit_charge() */
3535 pc->flags = head_pc->flags & ~PCGF_NOCOPY_AT_SPLIT; 3471 pc->flags = head_pc->flags & ~PCGF_NOCOPY_AT_SPLIT;
3536 } 3472 }
3537 __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS_HUGE], 3473 __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS_HUGE],
@@ -3687,7 +3623,6 @@ int mem_cgroup_charge_anon(struct page *page,
3687{ 3623{
3688 unsigned int nr_pages = 1; 3624 unsigned int nr_pages = 1;
3689 struct mem_cgroup *memcg; 3625 struct mem_cgroup *memcg;
3690 bool oom = true;
3691 3626
3692 if (mem_cgroup_disabled()) 3627 if (mem_cgroup_disabled())
3693 return 0; 3628 return 0;
@@ -3699,14 +3634,9 @@ int mem_cgroup_charge_anon(struct page *page,
3699 if (PageTransHuge(page)) { 3634 if (PageTransHuge(page)) {
3700 nr_pages <<= compound_order(page); 3635 nr_pages <<= compound_order(page);
3701 VM_BUG_ON_PAGE(!PageTransHuge(page), page); 3636 VM_BUG_ON_PAGE(!PageTransHuge(page), page);
3702 /*
3703 * Never OOM-kill a process for a huge page. The
3704 * fault handler will fall back to regular pages.
3705 */
3706 oom = false;
3707 } 3637 }
3708 3638
3709 memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, nr_pages, oom); 3639 memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, nr_pages);
3710 if (!memcg) 3640 if (!memcg)
3711 return -ENOMEM; 3641 return -ENOMEM;
3712 __mem_cgroup_commit_charge(memcg, page, nr_pages, 3642 __mem_cgroup_commit_charge(memcg, page, nr_pages,
@@ -3743,7 +3673,7 @@ static int __mem_cgroup_try_charge_swapin(struct mm_struct *mm,
3743 memcg = try_get_mem_cgroup_from_page(page); 3673 memcg = try_get_mem_cgroup_from_page(page);
3744 if (!memcg) 3674 if (!memcg)
3745 memcg = get_mem_cgroup_from_mm(mm); 3675 memcg = get_mem_cgroup_from_mm(mm);
3746 ret = mem_cgroup_try_charge(memcg, mask, 1, true); 3676 ret = mem_cgroup_try_charge(memcg, mask, 1);
3747 css_put(&memcg->css); 3677 css_put(&memcg->css);
3748 if (ret == -EINTR) 3678 if (ret == -EINTR)
3749 memcg = root_mem_cgroup; 3679 memcg = root_mem_cgroup;
@@ -3770,7 +3700,7 @@ int mem_cgroup_try_charge_swapin(struct mm_struct *mm, struct page *page,
3770 if (!PageSwapCache(page)) { 3700 if (!PageSwapCache(page)) {
3771 struct mem_cgroup *memcg; 3701 struct mem_cgroup *memcg;
3772 3702
3773 memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, 1, true); 3703 memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, 1);
3774 if (!memcg) 3704 if (!memcg)
3775 return -ENOMEM; 3705 return -ENOMEM;
3776 *memcgp = memcg; 3706 *memcgp = memcg;
@@ -3839,7 +3769,7 @@ int mem_cgroup_charge_file(struct page *page, struct mm_struct *mm,
3839 return 0; 3769 return 0;
3840 } 3770 }
3841 3771
3842 memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, 1, true); 3772 memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, 1);
3843 if (!memcg) 3773 if (!memcg)
3844 return -ENOMEM; 3774 return -ENOMEM;
3845 __mem_cgroup_commit_charge(memcg, page, 1, type, false); 3775 __mem_cgroup_commit_charge(memcg, page, 1, type, false);
@@ -3993,7 +3923,7 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype,
3993 * replacement page, so leave it alone when phasing out the 3923 * replacement page, so leave it alone when phasing out the
3994 * page that is unused after the migration. 3924 * page that is unused after the migration.
3995 */ 3925 */
3996 if (!end_migration && !mem_cgroup_is_root(memcg)) 3926 if (!end_migration)
3997 mem_cgroup_do_uncharge(memcg, nr_pages, ctype); 3927 mem_cgroup_do_uncharge(memcg, nr_pages, ctype);
3998 3928
3999 return memcg; 3929 return memcg;
@@ -4126,8 +4056,7 @@ void mem_cgroup_uncharge_swap(swp_entry_t ent)
4126 * We uncharge this because swap is freed. This memcg can 4056 * We uncharge this because swap is freed. This memcg can
4127 * be obsolete one. We avoid calling css_tryget_online(). 4057 * be obsolete one. We avoid calling css_tryget_online().
4128 */ 4058 */
4129 if (!mem_cgroup_is_root(memcg)) 4059 res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
4130 res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
4131 mem_cgroup_swap_statistics(memcg, false); 4060 mem_cgroup_swap_statistics(memcg, false);
4132 css_put(&memcg->css); 4061 css_put(&memcg->css);
4133 } 4062 }
@@ -4817,78 +4746,24 @@ out:
4817 return retval; 4746 return retval;
4818} 4747}
4819 4748
4820
4821static unsigned long mem_cgroup_recursive_stat(struct mem_cgroup *memcg,
4822 enum mem_cgroup_stat_index idx)
4823{
4824 struct mem_cgroup *iter;
4825 long val = 0;
4826
4827 /* Per-cpu values can be negative, use a signed accumulator */
4828 for_each_mem_cgroup_tree(iter, memcg)
4829 val += mem_cgroup_read_stat(iter, idx);
4830
4831 if (val < 0) /* race ? */
4832 val = 0;
4833 return val;
4834}
4835
4836static inline u64 mem_cgroup_usage(struct mem_cgroup *memcg, bool swap)
4837{
4838 u64 val;
4839
4840 if (!mem_cgroup_is_root(memcg)) {
4841 if (!swap)
4842 return res_counter_read_u64(&memcg->res, RES_USAGE);
4843 else
4844 return res_counter_read_u64(&memcg->memsw, RES_USAGE);
4845 }
4846
4847 /*
4848 * Transparent hugepages are still accounted for in MEM_CGROUP_STAT_RSS
4849 * as well as in MEM_CGROUP_STAT_RSS_HUGE.
4850 */
4851 val = mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_CACHE);
4852 val += mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_RSS);
4853
4854 if (swap)
4855 val += mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_SWAP);
4856
4857 return val << PAGE_SHIFT;
4858}
4859
4860static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css, 4749static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css,
4861 struct cftype *cft) 4750 struct cftype *cft)
4862{ 4751{
4863 struct mem_cgroup *memcg = mem_cgroup_from_css(css); 4752 struct mem_cgroup *memcg = mem_cgroup_from_css(css);
4864 u64 val; 4753 enum res_type type = MEMFILE_TYPE(cft->private);
4865 int name; 4754 int name = MEMFILE_ATTR(cft->private);
4866 enum res_type type;
4867
4868 type = MEMFILE_TYPE(cft->private);
4869 name = MEMFILE_ATTR(cft->private);
4870 4755
4871 switch (type) { 4756 switch (type) {
4872 case _MEM: 4757 case _MEM:
4873 if (name == RES_USAGE) 4758 return res_counter_read_u64(&memcg->res, name);
4874 val = mem_cgroup_usage(memcg, false);
4875 else
4876 val = res_counter_read_u64(&memcg->res, name);
4877 break;
4878 case _MEMSWAP: 4759 case _MEMSWAP:
4879 if (name == RES_USAGE) 4760 return res_counter_read_u64(&memcg->memsw, name);
4880 val = mem_cgroup_usage(memcg, true);
4881 else
4882 val = res_counter_read_u64(&memcg->memsw, name);
4883 break;
4884 case _KMEM: 4761 case _KMEM:
4885 val = res_counter_read_u64(&memcg->kmem, name); 4762 return res_counter_read_u64(&memcg->kmem, name);
4886 break; 4763 break;
4887 default: 4764 default:
4888 BUG(); 4765 BUG();
4889 } 4766 }
4890
4891 return val;
4892} 4767}
4893 4768
4894#ifdef CONFIG_MEMCG_KMEM 4769#ifdef CONFIG_MEMCG_KMEM
@@ -5350,7 +5225,10 @@ static void __mem_cgroup_threshold(struct mem_cgroup *memcg, bool swap)
5350 if (!t) 5225 if (!t)
5351 goto unlock; 5226 goto unlock;
5352 5227
5353 usage = mem_cgroup_usage(memcg, swap); 5228 if (!swap)
5229 usage = res_counter_read_u64(&memcg->res, RES_USAGE);
5230 else
5231 usage = res_counter_read_u64(&memcg->memsw, RES_USAGE);
5354 5232
5355 /* 5233 /*
5356 * current_threshold points to threshold just below or equal to usage. 5234 * current_threshold points to threshold just below or equal to usage.
@@ -5446,15 +5324,15 @@ static int __mem_cgroup_usage_register_event(struct mem_cgroup *memcg,
5446 5324
5447 mutex_lock(&memcg->thresholds_lock); 5325 mutex_lock(&memcg->thresholds_lock);
5448 5326
5449 if (type == _MEM) 5327 if (type == _MEM) {
5450 thresholds = &memcg->thresholds; 5328 thresholds = &memcg->thresholds;
5451 else if (type == _MEMSWAP) 5329 usage = res_counter_read_u64(&memcg->res, RES_USAGE);
5330 } else if (type == _MEMSWAP) {
5452 thresholds = &memcg->memsw_thresholds; 5331 thresholds = &memcg->memsw_thresholds;
5453 else 5332 usage = res_counter_read_u64(&memcg->memsw, RES_USAGE);
5333 } else
5454 BUG(); 5334 BUG();
5455 5335
5456 usage = mem_cgroup_usage(memcg, type == _MEMSWAP);
5457
5458 /* Check if a threshold crossed before adding a new one */ 5336 /* Check if a threshold crossed before adding a new one */
5459 if (thresholds->primary) 5337 if (thresholds->primary)
5460 __mem_cgroup_threshold(memcg, type == _MEMSWAP); 5338 __mem_cgroup_threshold(memcg, type == _MEMSWAP);
@@ -5534,18 +5412,19 @@ static void __mem_cgroup_usage_unregister_event(struct mem_cgroup *memcg,
5534 int i, j, size; 5412 int i, j, size;
5535 5413
5536 mutex_lock(&memcg->thresholds_lock); 5414 mutex_lock(&memcg->thresholds_lock);
5537 if (type == _MEM) 5415
5416 if (type == _MEM) {
5538 thresholds = &memcg->thresholds; 5417 thresholds = &memcg->thresholds;
5539 else if (type == _MEMSWAP) 5418 usage = res_counter_read_u64(&memcg->res, RES_USAGE);
5419 } else if (type == _MEMSWAP) {
5540 thresholds = &memcg->memsw_thresholds; 5420 thresholds = &memcg->memsw_thresholds;
5541 else 5421 usage = res_counter_read_u64(&memcg->memsw, RES_USAGE);
5422 } else
5542 BUG(); 5423 BUG();
5543 5424
5544 if (!thresholds->primary) 5425 if (!thresholds->primary)
5545 goto unlock; 5426 goto unlock;
5546 5427
5547 usage = mem_cgroup_usage(memcg, type == _MEMSWAP);
5548
5549 /* Check if a threshold crossed before removing */ 5428 /* Check if a threshold crossed before removing */
5550 __mem_cgroup_threshold(memcg, type == _MEMSWAP); 5429 __mem_cgroup_threshold(memcg, type == _MEMSWAP);
5551 5430
@@ -6299,9 +6178,9 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css)
6299 * core guarantees its existence. 6178 * core guarantees its existence.
6300 */ 6179 */
6301 } else { 6180 } else {
6302 res_counter_init(&memcg->res, NULL); 6181 res_counter_init(&memcg->res, &root_mem_cgroup->res);
6303 res_counter_init(&memcg->memsw, NULL); 6182 res_counter_init(&memcg->memsw, &root_mem_cgroup->memsw);
6304 res_counter_init(&memcg->kmem, NULL); 6183 res_counter_init(&memcg->kmem, &root_mem_cgroup->kmem);
6305 /* 6184 /*
6306 * Deeper hierachy with use_hierarchy == false doesn't make 6185 * Deeper hierachy with use_hierarchy == false doesn't make
6307 * much sense so let cgroup subsystem know about this 6186 * much sense so let cgroup subsystem know about this
@@ -6435,55 +6314,39 @@ static void mem_cgroup_css_reset(struct cgroup_subsys_state *css)
6435 6314
6436#ifdef CONFIG_MMU 6315#ifdef CONFIG_MMU
6437/* Handlers for move charge at task migration. */ 6316/* Handlers for move charge at task migration. */
6438#define PRECHARGE_COUNT_AT_ONCE 256
6439static int mem_cgroup_do_precharge(unsigned long count) 6317static int mem_cgroup_do_precharge(unsigned long count)
6440{ 6318{
6441 int ret = 0; 6319 int ret;
6442 int batch_count = PRECHARGE_COUNT_AT_ONCE;
6443 struct mem_cgroup *memcg = mc.to;
6444 6320
6445 if (mem_cgroup_is_root(memcg)) { 6321 /* Try a single bulk charge without reclaim first */
6322 ret = mem_cgroup_try_charge(mc.to, GFP_KERNEL & ~__GFP_WAIT, count);
6323 if (!ret) {
6446 mc.precharge += count; 6324 mc.precharge += count;
6447 /* we don't need css_get for root */
6448 return ret; 6325 return ret;
6449 } 6326 }
6450 /* try to charge at once */ 6327 if (ret == -EINTR) {
6451 if (count > 1) { 6328 __mem_cgroup_cancel_charge(root_mem_cgroup, count);
6452 struct res_counter *dummy;
6453 /*
6454 * "memcg" cannot be under rmdir() because we've already checked
6455 * by cgroup_lock_live_cgroup() that it is not removed and we
6456 * are still under the same cgroup_mutex. So we can postpone
6457 * css_get().
6458 */
6459 if (res_counter_charge(&memcg->res, PAGE_SIZE * count, &dummy))
6460 goto one_by_one;
6461 if (do_swap_account && res_counter_charge(&memcg->memsw,
6462 PAGE_SIZE * count, &dummy)) {
6463 res_counter_uncharge(&memcg->res, PAGE_SIZE * count);
6464 goto one_by_one;
6465 }
6466 mc.precharge += count;
6467 return ret; 6329 return ret;
6468 } 6330 }
6469one_by_one: 6331
6470 /* fall back to one by one charge */ 6332 /* Try charges one by one with reclaim */
6471 while (count--) { 6333 while (count--) {
6472 if (signal_pending(current)) { 6334 ret = mem_cgroup_try_charge(mc.to,
6473 ret = -EINTR; 6335 GFP_KERNEL & ~__GFP_NORETRY, 1);
6474 break; 6336 /*
6475 } 6337 * In case of failure, any residual charges against
6476 if (!batch_count--) { 6338 * mc.to will be dropped by mem_cgroup_clear_mc()
6477 batch_count = PRECHARGE_COUNT_AT_ONCE; 6339 * later on. However, cancel any charges that are
6478 cond_resched(); 6340 * bypassed to root right away or they'll be lost.
6479 } 6341 */
6480 ret = mem_cgroup_try_charge(memcg, GFP_KERNEL, 1, false); 6342 if (ret == -EINTR)
6343 __mem_cgroup_cancel_charge(root_mem_cgroup, 1);
6481 if (ret) 6344 if (ret)
6482 /* mem_cgroup_clear_mc() will do uncharge later */
6483 return ret; 6345 return ret;
6484 mc.precharge++; 6346 mc.precharge++;
6347 cond_resched();
6485 } 6348 }
6486 return ret; 6349 return 0;
6487} 6350}
6488 6351
6489/** 6352/**
@@ -6760,21 +6623,18 @@ static void __mem_cgroup_clear_mc(void)
6760 /* we must fixup refcnts and charges */ 6623 /* we must fixup refcnts and charges */
6761 if (mc.moved_swap) { 6624 if (mc.moved_swap) {
6762 /* uncharge swap account from the old cgroup */ 6625 /* uncharge swap account from the old cgroup */
6763 if (!mem_cgroup_is_root(mc.from)) 6626 res_counter_uncharge(&mc.from->memsw,
6764 res_counter_uncharge(&mc.from->memsw, 6627 PAGE_SIZE * mc.moved_swap);
6765 PAGE_SIZE * mc.moved_swap);
6766 6628
6767 for (i = 0; i < mc.moved_swap; i++) 6629 for (i = 0; i < mc.moved_swap; i++)
6768 css_put(&mc.from->css); 6630 css_put(&mc.from->css);
6769 6631
6770 if (!mem_cgroup_is_root(mc.to)) { 6632 /*
6771 /* 6633 * we charged both to->res and to->memsw, so we should
6772 * we charged both to->res and to->memsw, so we should 6634 * uncharge to->res.
6773 * uncharge to->res. 6635 */
6774 */ 6636 res_counter_uncharge(&mc.to->res,
6775 res_counter_uncharge(&mc.to->res, 6637 PAGE_SIZE * mc.moved_swap);
6776 PAGE_SIZE * mc.moved_swap);
6777 }
6778 /* we've already done css_get(mc.to) */ 6638 /* we've already done css_get(mc.to) */
6779 mc.moved_swap = 0; 6639 mc.moved_swap = 0;
6780 } 6640 }
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index a013bc94ebbe..44c6bd201d3a 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1173,6 +1173,16 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
1173 lock_page(hpage); 1173 lock_page(hpage);
1174 1174
1175 /* 1175 /*
1176 * The page could have changed compound pages during the locking.
1177 * If this happens just bail out.
1178 */
1179 if (compound_head(p) != hpage) {
1180 action_result(pfn, "different compound page after locking", IGNORED);
1181 res = -EBUSY;
1182 goto out;
1183 }
1184
1185 /*
1176 * We use page flags to determine what action should be taken, but 1186 * We use page flags to determine what action should be taken, but
1177 * the flags can be modified by the error containment action. One 1187 * the flags can be modified by the error containment action. One
1178 * example is an mlocked page, where PG_mlocked is cleared by 1188 * example is an mlocked page, where PG_mlocked is cleared by
diff --git a/mm/memory.c b/mm/memory.c
index 8b44f765b645..5c55270729f7 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -884,7 +884,7 @@ out_set_pte:
884 return 0; 884 return 0;
885} 885}
886 886
887int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, 887static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
888 pmd_t *dst_pmd, pmd_t *src_pmd, struct vm_area_struct *vma, 888 pmd_t *dst_pmd, pmd_t *src_pmd, struct vm_area_struct *vma,
889 unsigned long addr, unsigned long end) 889 unsigned long addr, unsigned long end)
890{ 890{
@@ -2399,7 +2399,10 @@ EXPORT_SYMBOL(unmap_mapping_range);
2399/* 2399/*
2400 * We enter with non-exclusive mmap_sem (to exclude vma changes, 2400 * We enter with non-exclusive mmap_sem (to exclude vma changes,
2401 * but allow concurrent faults), and pte mapped but not yet locked. 2401 * but allow concurrent faults), and pte mapped but not yet locked.
2402 * We return with mmap_sem still held, but pte unmapped and unlocked. 2402 * We return with pte unmapped and unlocked.
2403 *
2404 * We return with the mmap_sem locked or unlocked in the same cases
2405 * as does filemap_fault().
2403 */ 2406 */
2404static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, 2407static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
2405 unsigned long address, pte_t *page_table, pmd_t *pmd, 2408 unsigned long address, pte_t *page_table, pmd_t *pmd,
@@ -2688,6 +2691,11 @@ oom:
2688 return VM_FAULT_OOM; 2691 return VM_FAULT_OOM;
2689} 2692}
2690 2693
2694/*
2695 * The mmap_sem must have been held on entry, and may have been
2696 * released depending on flags and vma->vm_ops->fault() return value.
2697 * See filemap_fault() and __lock_page_retry().
2698 */
2691static int __do_fault(struct vm_area_struct *vma, unsigned long address, 2699static int __do_fault(struct vm_area_struct *vma, unsigned long address,
2692 pgoff_t pgoff, unsigned int flags, struct page **page) 2700 pgoff_t pgoff, unsigned int flags, struct page **page)
2693{ 2701{
@@ -2744,7 +2752,7 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address,
2744 if (write) 2752 if (write)
2745 entry = maybe_mkwrite(pte_mkdirty(entry), vma); 2753 entry = maybe_mkwrite(pte_mkdirty(entry), vma);
2746 else if (pte_file(*pte) && pte_file_soft_dirty(*pte)) 2754 else if (pte_file(*pte) && pte_file_soft_dirty(*pte))
2747 pte_mksoft_dirty(entry); 2755 entry = pte_mksoft_dirty(entry);
2748 if (anon) { 2756 if (anon) {
2749 inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES); 2757 inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
2750 page_add_new_anon_rmap(page, vma, address); 2758 page_add_new_anon_rmap(page, vma, address);
@@ -2758,17 +2766,8 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address,
2758 update_mmu_cache(vma, address, pte); 2766 update_mmu_cache(vma, address, pte);
2759} 2767}
2760 2768
2761static unsigned long fault_around_bytes = rounddown_pow_of_two(65536); 2769static unsigned long fault_around_bytes __read_mostly =
2762 2770 rounddown_pow_of_two(65536);
2763static inline unsigned long fault_around_pages(void)
2764{
2765 return fault_around_bytes >> PAGE_SHIFT;
2766}
2767
2768static inline unsigned long fault_around_mask(void)
2769{
2770 return ~(fault_around_bytes - 1) & PAGE_MASK;
2771}
2772 2771
2773#ifdef CONFIG_DEBUG_FS 2772#ifdef CONFIG_DEBUG_FS
2774static int fault_around_bytes_get(void *data, u64 *val) 2773static int fault_around_bytes_get(void *data, u64 *val)
@@ -2834,12 +2833,15 @@ late_initcall(fault_around_debugfs);
2834static void do_fault_around(struct vm_area_struct *vma, unsigned long address, 2833static void do_fault_around(struct vm_area_struct *vma, unsigned long address,
2835 pte_t *pte, pgoff_t pgoff, unsigned int flags) 2834 pte_t *pte, pgoff_t pgoff, unsigned int flags)
2836{ 2835{
2837 unsigned long start_addr; 2836 unsigned long start_addr, nr_pages, mask;
2838 pgoff_t max_pgoff; 2837 pgoff_t max_pgoff;
2839 struct vm_fault vmf; 2838 struct vm_fault vmf;
2840 int off; 2839 int off;
2841 2840
2842 start_addr = max(address & fault_around_mask(), vma->vm_start); 2841 nr_pages = ACCESS_ONCE(fault_around_bytes) >> PAGE_SHIFT;
2842 mask = ~(nr_pages * PAGE_SIZE - 1) & PAGE_MASK;
2843
2844 start_addr = max(address & mask, vma->vm_start);
2843 off = ((address - start_addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1); 2845 off = ((address - start_addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1);
2844 pte -= off; 2846 pte -= off;
2845 pgoff -= off; 2847 pgoff -= off;
@@ -2851,7 +2853,7 @@ static void do_fault_around(struct vm_area_struct *vma, unsigned long address,
2851 max_pgoff = pgoff - ((start_addr >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) + 2853 max_pgoff = pgoff - ((start_addr >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) +
2852 PTRS_PER_PTE - 1; 2854 PTRS_PER_PTE - 1;
2853 max_pgoff = min3(max_pgoff, vma_pages(vma) + vma->vm_pgoff - 1, 2855 max_pgoff = min3(max_pgoff, vma_pages(vma) + vma->vm_pgoff - 1,
2854 pgoff + fault_around_pages() - 1); 2856 pgoff + nr_pages - 1);
2855 2857
2856 /* Check if it makes any sense to call ->map_pages */ 2858 /* Check if it makes any sense to call ->map_pages */
2857 while (!pte_none(*pte)) { 2859 while (!pte_none(*pte)) {
@@ -2886,7 +2888,7 @@ static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2886 * something). 2888 * something).
2887 */ 2889 */
2888 if (vma->vm_ops->map_pages && !(flags & FAULT_FLAG_NONLINEAR) && 2890 if (vma->vm_ops->map_pages && !(flags & FAULT_FLAG_NONLINEAR) &&
2889 fault_around_pages() > 1) { 2891 fault_around_bytes >> PAGE_SHIFT > 1) {
2890 pte = pte_offset_map_lock(mm, pmd, address, &ptl); 2892 pte = pte_offset_map_lock(mm, pmd, address, &ptl);
2891 do_fault_around(vma, address, pte, pgoff, flags); 2893 do_fault_around(vma, address, pte, pgoff, flags);
2892 if (!pte_same(*pte, orig_pte)) 2894 if (!pte_same(*pte, orig_pte))
@@ -3016,6 +3018,12 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma,
3016 return ret; 3018 return ret;
3017} 3019}
3018 3020
3021/*
3022 * We enter with non-exclusive mmap_sem (to exclude vma changes,
3023 * but allow concurrent faults).
3024 * The mmap_sem may have been released depending on flags and our
3025 * return value. See filemap_fault() and __lock_page_or_retry().
3026 */
3019static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma, 3027static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
3020 unsigned long address, pte_t *page_table, pmd_t *pmd, 3028 unsigned long address, pte_t *page_table, pmd_t *pmd,
3021 unsigned int flags, pte_t orig_pte) 3029 unsigned int flags, pte_t orig_pte)
@@ -3040,7 +3048,9 @@ static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
3040 * 3048 *
3041 * We enter with non-exclusive mmap_sem (to exclude vma changes, 3049 * We enter with non-exclusive mmap_sem (to exclude vma changes,
3042 * but allow concurrent faults), and pte mapped but not yet locked. 3050 * but allow concurrent faults), and pte mapped but not yet locked.
3043 * We return with mmap_sem still held, but pte unmapped and unlocked. 3051 * We return with pte unmapped and unlocked.
3052 * The mmap_sem may have been released depending on flags and our
3053 * return value. See filemap_fault() and __lock_page_or_retry().
3044 */ 3054 */
3045static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma, 3055static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
3046 unsigned long address, pte_t *page_table, pmd_t *pmd, 3056 unsigned long address, pte_t *page_table, pmd_t *pmd,
@@ -3172,7 +3182,10 @@ out:
3172 * 3182 *
3173 * We enter with non-exclusive mmap_sem (to exclude vma changes, 3183 * We enter with non-exclusive mmap_sem (to exclude vma changes,
3174 * but allow concurrent faults), and pte mapped but not yet locked. 3184 * but allow concurrent faults), and pte mapped but not yet locked.
3175 * We return with mmap_sem still held, but pte unmapped and unlocked. 3185 * We return with pte unmapped and unlocked.
3186 *
3187 * The mmap_sem may have been released depending on flags and our
3188 * return value. See filemap_fault() and __lock_page_or_retry().
3176 */ 3189 */
3177static int handle_pte_fault(struct mm_struct *mm, 3190static int handle_pte_fault(struct mm_struct *mm,
3178 struct vm_area_struct *vma, unsigned long address, 3191 struct vm_area_struct *vma, unsigned long address,
@@ -3181,7 +3194,7 @@ static int handle_pte_fault(struct mm_struct *mm,
3181 pte_t entry; 3194 pte_t entry;
3182 spinlock_t *ptl; 3195 spinlock_t *ptl;
3183 3196
3184 entry = *pte; 3197 entry = ACCESS_ONCE(*pte);
3185 if (!pte_present(entry)) { 3198 if (!pte_present(entry)) {
3186 if (pte_none(entry)) { 3199 if (pte_none(entry)) {
3187 if (vma->vm_ops) { 3200 if (vma->vm_ops) {
@@ -3232,6 +3245,9 @@ unlock:
3232 3245
3233/* 3246/*
3234 * By the time we get here, we already hold the mm semaphore 3247 * By the time we get here, we already hold the mm semaphore
3248 *
3249 * The mmap_sem may have been released depending on flags and our
3250 * return value. See filemap_fault() and __lock_page_or_retry().
3235 */ 3251 */
3236static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, 3252static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
3237 unsigned long address, unsigned int flags) 3253 unsigned long address, unsigned int flags)
@@ -3313,6 +3329,12 @@ static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
3313 return handle_pte_fault(mm, vma, address, pte, pmd, flags); 3329 return handle_pte_fault(mm, vma, address, pte, pmd, flags);
3314} 3330}
3315 3331
3332/*
3333 * By the time we get here, we already hold the mm semaphore
3334 *
3335 * The mmap_sem may have been released depending on flags and our
3336 * return value. See filemap_fault() and __lock_page_or_retry().
3337 */
3316int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, 3338int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
3317 unsigned long address, unsigned int flags) 3339 unsigned long address, unsigned int flags)
3318{ 3340{
@@ -3591,11 +3613,13 @@ static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
3591 ret = get_user_pages(tsk, mm, addr, 1, 3613 ret = get_user_pages(tsk, mm, addr, 1,
3592 write, 1, &page, &vma); 3614 write, 1, &page, &vma);
3593 if (ret <= 0) { 3615 if (ret <= 0) {
3616#ifndef CONFIG_HAVE_IOREMAP_PROT
3617 break;
3618#else
3594 /* 3619 /*
3595 * Check if this is a VM_IO | VM_PFNMAP VMA, which 3620 * Check if this is a VM_IO | VM_PFNMAP VMA, which
3596 * we can access using slightly different code. 3621 * we can access using slightly different code.
3597 */ 3622 */
3598#ifdef CONFIG_HAVE_IOREMAP_PROT
3599 vma = find_vma(mm, addr); 3623 vma = find_vma(mm, addr);
3600 if (!vma || vma->vm_start > addr) 3624 if (!vma || vma->vm_start > addr)
3601 break; 3625 break;
@@ -3603,9 +3627,9 @@ static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
3603 ret = vma->vm_ops->access(vma, addr, buf, 3627 ret = vma->vm_ops->access(vma, addr, buf,
3604 len, write); 3628 len, write);
3605 if (ret <= 0) 3629 if (ret <= 0)
3606#endif
3607 break; 3630 break;
3608 bytes = ret; 3631 bytes = ret;
3632#endif
3609 } else { 3633 } else {
3610 bytes = len; 3634 bytes = len;
3611 offset = addr & (PAGE_SIZE-1); 3635 offset = addr & (PAGE_SIZE-1);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 469bbf505f85..2ff8c2325e96 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -284,8 +284,8 @@ void register_page_bootmem_info_node(struct pglist_data *pgdat)
284} 284}
285#endif /* CONFIG_HAVE_BOOTMEM_INFO_NODE */ 285#endif /* CONFIG_HAVE_BOOTMEM_INFO_NODE */
286 286
287static void grow_zone_span(struct zone *zone, unsigned long start_pfn, 287static void __meminit grow_zone_span(struct zone *zone, unsigned long start_pfn,
288 unsigned long end_pfn) 288 unsigned long end_pfn)
289{ 289{
290 unsigned long old_zone_end_pfn; 290 unsigned long old_zone_end_pfn;
291 291
@@ -427,8 +427,8 @@ out_fail:
427 return -1; 427 return -1;
428} 428}
429 429
430static void grow_pgdat_span(struct pglist_data *pgdat, unsigned long start_pfn, 430static void __meminit grow_pgdat_span(struct pglist_data *pgdat, unsigned long start_pfn,
431 unsigned long end_pfn) 431 unsigned long end_pfn)
432{ 432{
433 unsigned long old_pgdat_end_pfn = pgdat_end_pfn(pgdat); 433 unsigned long old_pgdat_end_pfn = pgdat_end_pfn(pgdat);
434 434
@@ -977,15 +977,18 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ
977 zone = page_zone(pfn_to_page(pfn)); 977 zone = page_zone(pfn_to_page(pfn));
978 978
979 ret = -EINVAL; 979 ret = -EINVAL;
980 if ((zone_idx(zone) > ZONE_NORMAL || online_type == ONLINE_MOVABLE) && 980 if ((zone_idx(zone) > ZONE_NORMAL ||
981 online_type == MMOP_ONLINE_MOVABLE) &&
981 !can_online_high_movable(zone)) 982 !can_online_high_movable(zone))
982 goto out; 983 goto out;
983 984
984 if (online_type == ONLINE_KERNEL && zone_idx(zone) == ZONE_MOVABLE) { 985 if (online_type == MMOP_ONLINE_KERNEL &&
986 zone_idx(zone) == ZONE_MOVABLE) {
985 if (move_pfn_range_left(zone - 1, zone, pfn, pfn + nr_pages)) 987 if (move_pfn_range_left(zone - 1, zone, pfn, pfn + nr_pages))
986 goto out; 988 goto out;
987 } 989 }
988 if (online_type == ONLINE_MOVABLE && zone_idx(zone) == ZONE_MOVABLE - 1) { 990 if (online_type == MMOP_ONLINE_MOVABLE &&
991 zone_idx(zone) == ZONE_MOVABLE - 1) {
989 if (move_pfn_range_right(zone, zone + 1, pfn, pfn + nr_pages)) 992 if (move_pfn_range_right(zone, zone + 1, pfn, pfn + nr_pages))
990 goto out; 993 goto out;
991 } 994 }
@@ -1156,6 +1159,34 @@ static int check_hotplug_memory_range(u64 start, u64 size)
1156 return 0; 1159 return 0;
1157} 1160}
1158 1161
1162/*
1163 * If movable zone has already been setup, newly added memory should be check.
1164 * If its address is higher than movable zone, it should be added as movable.
1165 * Without this check, movable zone may overlap with other zone.
1166 */
1167static int should_add_memory_movable(int nid, u64 start, u64 size)
1168{
1169 unsigned long start_pfn = start >> PAGE_SHIFT;
1170 pg_data_t *pgdat = NODE_DATA(nid);
1171 struct zone *movable_zone = pgdat->node_zones + ZONE_MOVABLE;
1172
1173 if (zone_is_empty(movable_zone))
1174 return 0;
1175
1176 if (movable_zone->zone_start_pfn <= start_pfn)
1177 return 1;
1178
1179 return 0;
1180}
1181
1182int zone_for_memory(int nid, u64 start, u64 size, int zone_default)
1183{
1184 if (should_add_memory_movable(nid, start, size))
1185 return ZONE_MOVABLE;
1186
1187 return zone_default;
1188}
1189
1159/* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */ 1190/* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */
1160int __ref add_memory(int nid, u64 start, u64 size) 1191int __ref add_memory(int nid, u64 start, u64 size)
1161{ 1192{
diff --git a/mm/mlock.c b/mm/mlock.c
index b1eb53634005..ce84cb0b83ef 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -210,12 +210,19 @@ out:
210 * @vma: target vma 210 * @vma: target vma
211 * @start: start address 211 * @start: start address
212 * @end: end address 212 * @end: end address
213 * @nonblocking:
213 * 214 *
214 * This takes care of making the pages present too. 215 * This takes care of making the pages present too.
215 * 216 *
216 * return 0 on success, negative error code on error. 217 * return 0 on success, negative error code on error.
217 * 218 *
218 * vma->vm_mm->mmap_sem must be held for at least read. 219 * vma->vm_mm->mmap_sem must be held.
220 *
221 * If @nonblocking is NULL, it may be held for read or write and will
222 * be unperturbed.
223 *
224 * If @nonblocking is non-NULL, it must held for read only and may be
225 * released. If it's released, *@nonblocking will be set to 0.
219 */ 226 */
220long __mlock_vma_pages_range(struct vm_area_struct *vma, 227long __mlock_vma_pages_range(struct vm_area_struct *vma,
221 unsigned long start, unsigned long end, int *nonblocking) 228 unsigned long start, unsigned long end, int *nonblocking)
diff --git a/mm/mmap.c b/mm/mmap.c
index 129b847d30cc..64c9d736155c 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -31,6 +31,7 @@
31#include <linux/mempolicy.h> 31#include <linux/mempolicy.h>
32#include <linux/rmap.h> 32#include <linux/rmap.h>
33#include <linux/mmu_notifier.h> 33#include <linux/mmu_notifier.h>
34#include <linux/mmdebug.h>
34#include <linux/perf_event.h> 35#include <linux/perf_event.h>
35#include <linux/audit.h> 36#include <linux/audit.h>
36#include <linux/khugepaged.h> 37#include <linux/khugepaged.h>
@@ -134,6 +135,10 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
134{ 135{
135 unsigned long free, allowed, reserve; 136 unsigned long free, allowed, reserve;
136 137
138 VM_WARN_ONCE(percpu_counter_read(&vm_committed_as) <
139 -(s64)vm_committed_as_batch * num_online_cpus(),
140 "memory commitment underflow");
141
137 vm_acct_memory(pages); 142 vm_acct_memory(pages);
138 143
139 /* 144 /*
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
index 41cefdf0aadd..950813b1eb36 100644
--- a/mm/mmu_notifier.c
+++ b/mm/mmu_notifier.c
@@ -23,6 +23,25 @@
23static struct srcu_struct srcu; 23static struct srcu_struct srcu;
24 24
25/* 25/*
26 * This function allows mmu_notifier::release callback to delay a call to
27 * a function that will free appropriate resources. The function must be
28 * quick and must not block.
29 */
30void mmu_notifier_call_srcu(struct rcu_head *rcu,
31 void (*func)(struct rcu_head *rcu))
32{
33 call_srcu(&srcu, rcu, func);
34}
35EXPORT_SYMBOL_GPL(mmu_notifier_call_srcu);
36
37void mmu_notifier_synchronize(void)
38{
39 /* Wait for any running method to finish. */
40 srcu_barrier(&srcu);
41}
42EXPORT_SYMBOL_GPL(mmu_notifier_synchronize);
43
44/*
26 * This function can't run concurrently against mmu_notifier_register 45 * This function can't run concurrently against mmu_notifier_register
27 * because mm->mm_users > 0 during mmu_notifier_register and exit_mmap 46 * because mm->mm_users > 0 during mmu_notifier_register and exit_mmap
28 * runs with mm_users == 0. Other tasks may still invoke mmu notifiers 47 * runs with mm_users == 0. Other tasks may still invoke mmu notifiers
@@ -53,7 +72,6 @@ void __mmu_notifier_release(struct mm_struct *mm)
53 */ 72 */
54 if (mn->ops->release) 73 if (mn->ops->release)
55 mn->ops->release(mn, mm); 74 mn->ops->release(mn, mm);
56 srcu_read_unlock(&srcu, id);
57 75
58 spin_lock(&mm->mmu_notifier_mm->lock); 76 spin_lock(&mm->mmu_notifier_mm->lock);
59 while (unlikely(!hlist_empty(&mm->mmu_notifier_mm->list))) { 77 while (unlikely(!hlist_empty(&mm->mmu_notifier_mm->list))) {
@@ -69,6 +87,7 @@ void __mmu_notifier_release(struct mm_struct *mm)
69 hlist_del_init_rcu(&mn->hlist); 87 hlist_del_init_rcu(&mn->hlist);
70 } 88 }
71 spin_unlock(&mm->mmu_notifier_mm->lock); 89 spin_unlock(&mm->mmu_notifier_mm->lock);
90 srcu_read_unlock(&srcu, id);
72 91
73 /* 92 /*
74 * synchronize_srcu here prevents mmu_notifier_release from returning to 93 * synchronize_srcu here prevents mmu_notifier_release from returning to
@@ -325,6 +344,25 @@ void mmu_notifier_unregister(struct mmu_notifier *mn, struct mm_struct *mm)
325} 344}
326EXPORT_SYMBOL_GPL(mmu_notifier_unregister); 345EXPORT_SYMBOL_GPL(mmu_notifier_unregister);
327 346
347/*
348 * Same as mmu_notifier_unregister but no callback and no srcu synchronization.
349 */
350void mmu_notifier_unregister_no_release(struct mmu_notifier *mn,
351 struct mm_struct *mm)
352{
353 spin_lock(&mm->mmu_notifier_mm->lock);
354 /*
355 * Can not use list_del_rcu() since __mmu_notifier_release
356 * can delete it before we hold the lock.
357 */
358 hlist_del_init_rcu(&mn->hlist);
359 spin_unlock(&mm->mmu_notifier_mm->lock);
360
361 BUG_ON(atomic_read(&mm->mm_count) <= 0);
362 mmdrop(mm);
363}
364EXPORT_SYMBOL_GPL(mmu_notifier_unregister_no_release);
365
328static int __init mmu_notifier_init(void) 366static int __init mmu_notifier_init(void)
329{ 367{
330 return init_srcu_struct(&srcu); 368 return init_srcu_struct(&srcu);
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 3291e82d4352..1e11df8fa7ec 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -258,8 +258,6 @@ enum oom_scan_t oom_scan_process_thread(struct task_struct *task,
258 unsigned long totalpages, const nodemask_t *nodemask, 258 unsigned long totalpages, const nodemask_t *nodemask,
259 bool force_kill) 259 bool force_kill)
260{ 260{
261 if (task->exit_state)
262 return OOM_SCAN_CONTINUE;
263 if (oom_unkillable_task(task, NULL, nodemask)) 261 if (oom_unkillable_task(task, NULL, nodemask))
264 return OOM_SCAN_CONTINUE; 262 return OOM_SCAN_CONTINUE;
265 263
@@ -559,28 +557,25 @@ EXPORT_SYMBOL_GPL(unregister_oom_notifier);
559 * if a parallel OOM killing is already taking place that includes a zone in 557 * if a parallel OOM killing is already taking place that includes a zone in
560 * the zonelist. Otherwise, locks all zones in the zonelist and returns 1. 558 * the zonelist. Otherwise, locks all zones in the zonelist and returns 1.
561 */ 559 */
562int try_set_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_mask) 560bool oom_zonelist_trylock(struct zonelist *zonelist, gfp_t gfp_mask)
563{ 561{
564 struct zoneref *z; 562 struct zoneref *z;
565 struct zone *zone; 563 struct zone *zone;
566 int ret = 1; 564 bool ret = true;
567 565
568 spin_lock(&zone_scan_lock); 566 spin_lock(&zone_scan_lock);
569 for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) { 567 for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask))
570 if (zone_is_oom_locked(zone)) { 568 if (zone_is_oom_locked(zone)) {
571 ret = 0; 569 ret = false;
572 goto out; 570 goto out;
573 } 571 }
574 }
575 572
576 for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) { 573 /*
577 /* 574 * Lock each zone in the zonelist under zone_scan_lock so a parallel
578 * Lock each zone in the zonelist under zone_scan_lock so a 575 * call to oom_zonelist_trylock() doesn't succeed when it shouldn't.
579 * parallel invocation of try_set_zonelist_oom() doesn't succeed 576 */
580 * when it shouldn't. 577 for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask))
581 */
582 zone_set_flag(zone, ZONE_OOM_LOCKED); 578 zone_set_flag(zone, ZONE_OOM_LOCKED);
583 }
584 579
585out: 580out:
586 spin_unlock(&zone_scan_lock); 581 spin_unlock(&zone_scan_lock);
@@ -592,15 +587,14 @@ out:
592 * allocation attempts with zonelists containing them may now recall the OOM 587 * allocation attempts with zonelists containing them may now recall the OOM
593 * killer, if necessary. 588 * killer, if necessary.
594 */ 589 */
595void clear_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_mask) 590void oom_zonelist_unlock(struct zonelist *zonelist, gfp_t gfp_mask)
596{ 591{
597 struct zoneref *z; 592 struct zoneref *z;
598 struct zone *zone; 593 struct zone *zone;
599 594
600 spin_lock(&zone_scan_lock); 595 spin_lock(&zone_scan_lock);
601 for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) { 596 for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask))
602 zone_clear_flag(zone, ZONE_OOM_LOCKED); 597 zone_clear_flag(zone, ZONE_OOM_LOCKED);
603 }
604 spin_unlock(&zone_scan_lock); 598 spin_unlock(&zone_scan_lock);
605} 599}
606 600
@@ -694,9 +688,9 @@ void pagefault_out_of_memory(void)
694 if (mem_cgroup_oom_synchronize(true)) 688 if (mem_cgroup_oom_synchronize(true))
695 return; 689 return;
696 690
697 zonelist = node_zonelist(first_online_node, GFP_KERNEL); 691 zonelist = node_zonelist(first_memory_node, GFP_KERNEL);
698 if (try_set_zonelist_oom(zonelist, GFP_KERNEL)) { 692 if (oom_zonelist_trylock(zonelist, GFP_KERNEL)) {
699 out_of_memory(NULL, 0, 0, NULL, false); 693 out_of_memory(NULL, 0, 0, NULL, false);
700 clear_zonelist_oom(zonelist, GFP_KERNEL); 694 oom_zonelist_unlock(zonelist, GFP_KERNEL);
701 } 695 }
702} 696}
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index e0c943014eb7..91d73ef1744d 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -261,14 +261,11 @@ static unsigned long global_dirtyable_memory(void)
261 */ 261 */
262void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty) 262void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty)
263{ 263{
264 const unsigned long available_memory = global_dirtyable_memory();
264 unsigned long background; 265 unsigned long background;
265 unsigned long dirty; 266 unsigned long dirty;
266 unsigned long uninitialized_var(available_memory);
267 struct task_struct *tsk; 267 struct task_struct *tsk;
268 268
269 if (!vm_dirty_bytes || !dirty_background_bytes)
270 available_memory = global_dirtyable_memory();
271
272 if (vm_dirty_bytes) 269 if (vm_dirty_bytes)
273 dirty = DIV_ROUND_UP(vm_dirty_bytes, PAGE_SIZE); 270 dirty = DIV_ROUND_UP(vm_dirty_bytes, PAGE_SIZE);
274 else 271 else
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index ef44ad736ca1..18cee0d4c8a2 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -680,9 +680,12 @@ static void free_pcppages_bulk(struct zone *zone, int count,
680 int migratetype = 0; 680 int migratetype = 0;
681 int batch_free = 0; 681 int batch_free = 0;
682 int to_free = count; 682 int to_free = count;
683 unsigned long nr_scanned;
683 684
684 spin_lock(&zone->lock); 685 spin_lock(&zone->lock);
685 zone->pages_scanned = 0; 686 nr_scanned = zone_page_state(zone, NR_PAGES_SCANNED);
687 if (nr_scanned)
688 __mod_zone_page_state(zone, NR_PAGES_SCANNED, -nr_scanned);
686 689
687 while (to_free) { 690 while (to_free) {
688 struct page *page; 691 struct page *page;
@@ -731,8 +734,11 @@ static void free_one_page(struct zone *zone,
731 unsigned int order, 734 unsigned int order,
732 int migratetype) 735 int migratetype)
733{ 736{
737 unsigned long nr_scanned;
734 spin_lock(&zone->lock); 738 spin_lock(&zone->lock);
735 zone->pages_scanned = 0; 739 nr_scanned = zone_page_state(zone, NR_PAGES_SCANNED);
740 if (nr_scanned)
741 __mod_zone_page_state(zone, NR_PAGES_SCANNED, -nr_scanned);
736 742
737 __free_one_page(page, pfn, zone, order, migratetype); 743 __free_one_page(page, pfn, zone, order, migratetype);
738 if (unlikely(!is_migrate_isolate(migratetype))) 744 if (unlikely(!is_migrate_isolate(migratetype)))
@@ -1257,15 +1263,11 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
1257void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp) 1263void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
1258{ 1264{
1259 unsigned long flags; 1265 unsigned long flags;
1260 int to_drain; 1266 int to_drain, batch;
1261 unsigned long batch;
1262 1267
1263 local_irq_save(flags); 1268 local_irq_save(flags);
1264 batch = ACCESS_ONCE(pcp->batch); 1269 batch = ACCESS_ONCE(pcp->batch);
1265 if (pcp->count >= batch) 1270 to_drain = min(pcp->count, batch);
1266 to_drain = batch;
1267 else
1268 to_drain = pcp->count;
1269 if (to_drain > 0) { 1271 if (to_drain > 0) {
1270 free_pcppages_bulk(zone, to_drain, pcp); 1272 free_pcppages_bulk(zone, to_drain, pcp);
1271 pcp->count -= to_drain; 1273 pcp->count -= to_drain;
@@ -1610,6 +1612,9 @@ again:
1610 } 1612 }
1611 1613
1612 __mod_zone_page_state(zone, NR_ALLOC_BATCH, -(1 << order)); 1614 __mod_zone_page_state(zone, NR_ALLOC_BATCH, -(1 << order));
1615 if (zone_page_state(zone, NR_ALLOC_BATCH) == 0 &&
1616 !zone_is_fair_depleted(zone))
1617 zone_set_flag(zone, ZONE_FAIR_DEPLETED);
1613 1618
1614 __count_zone_vm_events(PGALLOC, zone, 1 << order); 1619 __count_zone_vm_events(PGALLOC, zone, 1 << order);
1615 zone_statistics(preferred_zone, zone, gfp_flags); 1620 zone_statistics(preferred_zone, zone, gfp_flags);
@@ -1712,7 +1717,6 @@ static bool __zone_watermark_ok(struct zone *z, unsigned int order,
1712{ 1717{
1713 /* free_pages my go negative - that's OK */ 1718 /* free_pages my go negative - that's OK */
1714 long min = mark; 1719 long min = mark;
1715 long lowmem_reserve = z->lowmem_reserve[classzone_idx];
1716 int o; 1720 int o;
1717 long free_cma = 0; 1721 long free_cma = 0;
1718 1722
@@ -1727,7 +1731,7 @@ static bool __zone_watermark_ok(struct zone *z, unsigned int order,
1727 free_cma = zone_page_state(z, NR_FREE_CMA_PAGES); 1731 free_cma = zone_page_state(z, NR_FREE_CMA_PAGES);
1728#endif 1732#endif
1729 1733
1730 if (free_pages - free_cma <= min + lowmem_reserve) 1734 if (free_pages - free_cma <= min + z->lowmem_reserve[classzone_idx])
1731 return false; 1735 return false;
1732 for (o = 0; o < order; o++) { 1736 for (o = 0; o < order; o++) {
1733 /* At the next order, this order's pages become unavailable */ 1737 /* At the next order, this order's pages become unavailable */
@@ -1922,6 +1926,18 @@ static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone)
1922 1926
1923#endif /* CONFIG_NUMA */ 1927#endif /* CONFIG_NUMA */
1924 1928
1929static void reset_alloc_batches(struct zone *preferred_zone)
1930{
1931 struct zone *zone = preferred_zone->zone_pgdat->node_zones;
1932
1933 do {
1934 mod_zone_page_state(zone, NR_ALLOC_BATCH,
1935 high_wmark_pages(zone) - low_wmark_pages(zone) -
1936 atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH]));
1937 zone_clear_flag(zone, ZONE_FAIR_DEPLETED);
1938 } while (zone++ != preferred_zone);
1939}
1940
1925/* 1941/*
1926 * get_page_from_freelist goes through the zonelist trying to allocate 1942 * get_page_from_freelist goes through the zonelist trying to allocate
1927 * a page. 1943 * a page.
@@ -1939,8 +1955,12 @@ get_page_from_freelist(gfp_t gfp_mask, nodemask_t *nodemask, unsigned int order,
1939 int did_zlc_setup = 0; /* just call zlc_setup() one time */ 1955 int did_zlc_setup = 0; /* just call zlc_setup() one time */
1940 bool consider_zone_dirty = (alloc_flags & ALLOC_WMARK_LOW) && 1956 bool consider_zone_dirty = (alloc_flags & ALLOC_WMARK_LOW) &&
1941 (gfp_mask & __GFP_WRITE); 1957 (gfp_mask & __GFP_WRITE);
1958 int nr_fair_skipped = 0;
1959 bool zonelist_rescan;
1942 1960
1943zonelist_scan: 1961zonelist_scan:
1962 zonelist_rescan = false;
1963
1944 /* 1964 /*
1945 * Scan zonelist, looking for a zone with enough free. 1965 * Scan zonelist, looking for a zone with enough free.
1946 * See also __cpuset_node_allowed_softwall() comment in kernel/cpuset.c. 1966 * See also __cpuset_node_allowed_softwall() comment in kernel/cpuset.c.
@@ -1964,9 +1984,11 @@ zonelist_scan:
1964 */ 1984 */
1965 if (alloc_flags & ALLOC_FAIR) { 1985 if (alloc_flags & ALLOC_FAIR) {
1966 if (!zone_local(preferred_zone, zone)) 1986 if (!zone_local(preferred_zone, zone))
1987 break;
1988 if (zone_is_fair_depleted(zone)) {
1989 nr_fair_skipped++;
1967 continue; 1990 continue;
1968 if (zone_page_state(zone, NR_ALLOC_BATCH) <= 0) 1991 }
1969 continue;
1970 } 1992 }
1971 /* 1993 /*
1972 * When allocating a page cache page for writing, we 1994 * When allocating a page cache page for writing, we
@@ -2072,13 +2094,7 @@ this_zone_full:
2072 zlc_mark_zone_full(zonelist, z); 2094 zlc_mark_zone_full(zonelist, z);
2073 } 2095 }
2074 2096
2075 if (unlikely(IS_ENABLED(CONFIG_NUMA) && page == NULL && zlc_active)) { 2097 if (page) {
2076 /* Disable zlc cache for second zonelist scan */
2077 zlc_active = 0;
2078 goto zonelist_scan;
2079 }
2080
2081 if (page)
2082 /* 2098 /*
2083 * page->pfmemalloc is set when ALLOC_NO_WATERMARKS was 2099 * page->pfmemalloc is set when ALLOC_NO_WATERMARKS was
2084 * necessary to allocate the page. The expectation is 2100 * necessary to allocate the page. The expectation is
@@ -2087,8 +2103,37 @@ this_zone_full:
2087 * for !PFMEMALLOC purposes. 2103 * for !PFMEMALLOC purposes.
2088 */ 2104 */
2089 page->pfmemalloc = !!(alloc_flags & ALLOC_NO_WATERMARKS); 2105 page->pfmemalloc = !!(alloc_flags & ALLOC_NO_WATERMARKS);
2106 return page;
2107 }
2090 2108
2091 return page; 2109 /*
2110 * The first pass makes sure allocations are spread fairly within the
2111 * local node. However, the local node might have free pages left
2112 * after the fairness batches are exhausted, and remote zones haven't
2113 * even been considered yet. Try once more without fairness, and
2114 * include remote zones now, before entering the slowpath and waking
2115 * kswapd: prefer spilling to a remote zone over swapping locally.
2116 */
2117 if (alloc_flags & ALLOC_FAIR) {
2118 alloc_flags &= ~ALLOC_FAIR;
2119 if (nr_fair_skipped) {
2120 zonelist_rescan = true;
2121 reset_alloc_batches(preferred_zone);
2122 }
2123 if (nr_online_nodes > 1)
2124 zonelist_rescan = true;
2125 }
2126
2127 if (unlikely(IS_ENABLED(CONFIG_NUMA) && zlc_active)) {
2128 /* Disable zlc cache for second zonelist scan */
2129 zlc_active = 0;
2130 zonelist_rescan = true;
2131 }
2132
2133 if (zonelist_rescan)
2134 goto zonelist_scan;
2135
2136 return NULL;
2092} 2137}
2093 2138
2094/* 2139/*
@@ -2201,8 +2246,8 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
2201{ 2246{
2202 struct page *page; 2247 struct page *page;
2203 2248
2204 /* Acquire the OOM killer lock for the zones in zonelist */ 2249 /* Acquire the per-zone oom lock for each zone */
2205 if (!try_set_zonelist_oom(zonelist, gfp_mask)) { 2250 if (!oom_zonelist_trylock(zonelist, gfp_mask)) {
2206 schedule_timeout_uninterruptible(1); 2251 schedule_timeout_uninterruptible(1);
2207 return NULL; 2252 return NULL;
2208 } 2253 }
@@ -2240,7 +2285,7 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
2240 out_of_memory(zonelist, gfp_mask, order, nodemask, false); 2285 out_of_memory(zonelist, gfp_mask, order, nodemask, false);
2241 2286
2242out: 2287out:
2243 clear_zonelist_oom(zonelist, gfp_mask); 2288 oom_zonelist_unlock(zonelist, gfp_mask);
2244 return page; 2289 return page;
2245} 2290}
2246 2291
@@ -2409,28 +2454,6 @@ __alloc_pages_high_priority(gfp_t gfp_mask, unsigned int order,
2409 return page; 2454 return page;
2410} 2455}
2411 2456
2412static void reset_alloc_batches(struct zonelist *zonelist,
2413 enum zone_type high_zoneidx,
2414 struct zone *preferred_zone)
2415{
2416 struct zoneref *z;
2417 struct zone *zone;
2418
2419 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
2420 /*
2421 * Only reset the batches of zones that were actually
2422 * considered in the fairness pass, we don't want to
2423 * trash fairness information for zones that are not
2424 * actually part of this zonelist's round-robin cycle.
2425 */
2426 if (!zone_local(preferred_zone, zone))
2427 continue;
2428 mod_zone_page_state(zone, NR_ALLOC_BATCH,
2429 high_wmark_pages(zone) - low_wmark_pages(zone) -
2430 atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH]));
2431 }
2432}
2433
2434static void wake_all_kswapds(unsigned int order, 2457static void wake_all_kswapds(unsigned int order,
2435 struct zonelist *zonelist, 2458 struct zonelist *zonelist,
2436 enum zone_type high_zoneidx, 2459 enum zone_type high_zoneidx,
@@ -2616,14 +2639,6 @@ rebalance:
2616 goto got_pg; 2639 goto got_pg;
2617 2640
2618 /* 2641 /*
2619 * It can become very expensive to allocate transparent hugepages at
2620 * fault, so use asynchronous memory compaction for THP unless it is
2621 * khugepaged trying to collapse.
2622 */
2623 if (!(gfp_mask & __GFP_NO_KSWAPD) || (current->flags & PF_KTHREAD))
2624 migration_mode = MIGRATE_SYNC_LIGHT;
2625
2626 /*
2627 * If compaction is deferred for high-order allocations, it is because 2642 * If compaction is deferred for high-order allocations, it is because
2628 * sync compaction recently failed. In this is the case and the caller 2643 * sync compaction recently failed. In this is the case and the caller
2629 * requested a movable allocation that does not heavily disrupt the 2644 * requested a movable allocation that does not heavily disrupt the
@@ -2633,6 +2648,15 @@ rebalance:
2633 (gfp_mask & __GFP_NO_KSWAPD)) 2648 (gfp_mask & __GFP_NO_KSWAPD))
2634 goto nopage; 2649 goto nopage;
2635 2650
2651 /*
2652 * It can become very expensive to allocate transparent hugepages at
2653 * fault, so use asynchronous memory compaction for THP unless it is
2654 * khugepaged trying to collapse.
2655 */
2656 if ((gfp_mask & GFP_TRANSHUGE) != GFP_TRANSHUGE ||
2657 (current->flags & PF_KTHREAD))
2658 migration_mode = MIGRATE_SYNC_LIGHT;
2659
2636 /* Try direct reclaim and then allocating */ 2660 /* Try direct reclaim and then allocating */
2637 page = __alloc_pages_direct_reclaim(gfp_mask, order, 2661 page = __alloc_pages_direct_reclaim(gfp_mask, order,
2638 zonelist, high_zoneidx, 2662 zonelist, high_zoneidx,
@@ -2766,29 +2790,12 @@ retry_cpuset:
2766 if (allocflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE) 2790 if (allocflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE)
2767 alloc_flags |= ALLOC_CMA; 2791 alloc_flags |= ALLOC_CMA;
2768#endif 2792#endif
2769retry:
2770 /* First allocation attempt */ 2793 /* First allocation attempt */
2771 page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order, 2794 page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order,
2772 zonelist, high_zoneidx, alloc_flags, 2795 zonelist, high_zoneidx, alloc_flags,
2773 preferred_zone, classzone_idx, migratetype); 2796 preferred_zone, classzone_idx, migratetype);
2774 if (unlikely(!page)) { 2797 if (unlikely(!page)) {
2775 /* 2798 /*
2776 * The first pass makes sure allocations are spread
2777 * fairly within the local node. However, the local
2778 * node might have free pages left after the fairness
2779 * batches are exhausted, and remote zones haven't
2780 * even been considered yet. Try once more without
2781 * fairness, and include remote zones now, before
2782 * entering the slowpath and waking kswapd: prefer
2783 * spilling to a remote zone over swapping locally.
2784 */
2785 if (alloc_flags & ALLOC_FAIR) {
2786 reset_alloc_batches(zonelist, high_zoneidx,
2787 preferred_zone);
2788 alloc_flags &= ~ALLOC_FAIR;
2789 goto retry;
2790 }
2791 /*
2792 * Runtime PM, block IO and its error handling path 2799 * Runtime PM, block IO and its error handling path
2793 * can deadlock because I/O on the device might not 2800 * can deadlock because I/O on the device might not
2794 * complete. 2801 * complete.
@@ -2962,7 +2969,7 @@ EXPORT_SYMBOL(alloc_pages_exact);
2962 * Note this is not alloc_pages_exact_node() which allocates on a specific node, 2969 * Note this is not alloc_pages_exact_node() which allocates on a specific node,
2963 * but is not exact. 2970 * but is not exact.
2964 */ 2971 */
2965void *alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask) 2972void * __meminit alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask)
2966{ 2973{
2967 unsigned order = get_order(size); 2974 unsigned order = get_order(size);
2968 struct page *p = alloc_pages_node(nid, gfp_mask, order); 2975 struct page *p = alloc_pages_node(nid, gfp_mask, order);
@@ -2970,7 +2977,6 @@ void *alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask)
2970 return NULL; 2977 return NULL;
2971 return make_alloc_exact((unsigned long)page_address(p), order, size); 2978 return make_alloc_exact((unsigned long)page_address(p), order, size);
2972} 2979}
2973EXPORT_SYMBOL(alloc_pages_exact_nid);
2974 2980
2975/** 2981/**
2976 * free_pages_exact - release memory allocated via alloc_pages_exact() 2982 * free_pages_exact - release memory allocated via alloc_pages_exact()
@@ -3052,7 +3058,7 @@ static inline void show_node(struct zone *zone)
3052void si_meminfo(struct sysinfo *val) 3058void si_meminfo(struct sysinfo *val)
3053{ 3059{
3054 val->totalram = totalram_pages; 3060 val->totalram = totalram_pages;
3055 val->sharedram = 0; 3061 val->sharedram = global_page_state(NR_SHMEM);
3056 val->freeram = global_page_state(NR_FREE_PAGES); 3062 val->freeram = global_page_state(NR_FREE_PAGES);
3057 val->bufferram = nr_blockdev_pages(); 3063 val->bufferram = nr_blockdev_pages();
3058 val->totalhigh = totalhigh_pages; 3064 val->totalhigh = totalhigh_pages;
@@ -3072,6 +3078,7 @@ void si_meminfo_node(struct sysinfo *val, int nid)
3072 for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++) 3078 for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++)
3073 managed_pages += pgdat->node_zones[zone_type].managed_pages; 3079 managed_pages += pgdat->node_zones[zone_type].managed_pages;
3074 val->totalram = managed_pages; 3080 val->totalram = managed_pages;
3081 val->sharedram = node_page_state(nid, NR_SHMEM);
3075 val->freeram = node_page_state(nid, NR_FREE_PAGES); 3082 val->freeram = node_page_state(nid, NR_FREE_PAGES);
3076#ifdef CONFIG_HIGHMEM 3083#ifdef CONFIG_HIGHMEM
3077 val->totalhigh = pgdat->node_zones[ZONE_HIGHMEM].managed_pages; 3084 val->totalhigh = pgdat->node_zones[ZONE_HIGHMEM].managed_pages;
@@ -3253,12 +3260,12 @@ void show_free_areas(unsigned int filter)
3253 K(zone_page_state(zone, NR_BOUNCE)), 3260 K(zone_page_state(zone, NR_BOUNCE)),
3254 K(zone_page_state(zone, NR_FREE_CMA_PAGES)), 3261 K(zone_page_state(zone, NR_FREE_CMA_PAGES)),
3255 K(zone_page_state(zone, NR_WRITEBACK_TEMP)), 3262 K(zone_page_state(zone, NR_WRITEBACK_TEMP)),
3256 zone->pages_scanned, 3263 K(zone_page_state(zone, NR_PAGES_SCANNED)),
3257 (!zone_reclaimable(zone) ? "yes" : "no") 3264 (!zone_reclaimable(zone) ? "yes" : "no")
3258 ); 3265 );
3259 printk("lowmem_reserve[]:"); 3266 printk("lowmem_reserve[]:");
3260 for (i = 0; i < MAX_NR_ZONES; i++) 3267 for (i = 0; i < MAX_NR_ZONES; i++)
3261 printk(" %lu", zone->lowmem_reserve[i]); 3268 printk(" %ld", zone->lowmem_reserve[i]);
3262 printk("\n"); 3269 printk("\n");
3263 } 3270 }
3264 3271
@@ -5579,7 +5586,7 @@ static void calculate_totalreserve_pages(void)
5579 for_each_online_pgdat(pgdat) { 5586 for_each_online_pgdat(pgdat) {
5580 for (i = 0; i < MAX_NR_ZONES; i++) { 5587 for (i = 0; i < MAX_NR_ZONES; i++) {
5581 struct zone *zone = pgdat->node_zones + i; 5588 struct zone *zone = pgdat->node_zones + i;
5582 unsigned long max = 0; 5589 long max = 0;
5583 5590
5584 /* Find valid and maximum lowmem_reserve in the zone */ 5591 /* Find valid and maximum lowmem_reserve in the zone */
5585 for (j = i; j < MAX_NR_ZONES; j++) { 5592 for (j = i; j < MAX_NR_ZONES; j++) {
diff --git a/mm/readahead.c b/mm/readahead.c
index 0ca36a7770b1..17b9172ec37f 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -326,7 +326,6 @@ static unsigned long get_next_ra_size(struct file_ra_state *ra,
326 * - thrashing threshold in memory tight systems 326 * - thrashing threshold in memory tight systems
327 */ 327 */
328static pgoff_t count_history_pages(struct address_space *mapping, 328static pgoff_t count_history_pages(struct address_space *mapping,
329 struct file_ra_state *ra,
330 pgoff_t offset, unsigned long max) 329 pgoff_t offset, unsigned long max)
331{ 330{
332 pgoff_t head; 331 pgoff_t head;
@@ -349,7 +348,7 @@ static int try_context_readahead(struct address_space *mapping,
349{ 348{
350 pgoff_t size; 349 pgoff_t size;
351 350
352 size = count_history_pages(mapping, ra, offset, max); 351 size = count_history_pages(mapping, offset, max);
353 352
354 /* 353 /*
355 * not enough history pages: 354 * not enough history pages:
diff --git a/mm/shmem.c b/mm/shmem.c
index af68b15a8fc1..302d1cf7ad07 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -149,6 +149,19 @@ static inline void shmem_unacct_size(unsigned long flags, loff_t size)
149 vm_unacct_memory(VM_ACCT(size)); 149 vm_unacct_memory(VM_ACCT(size));
150} 150}
151 151
152static inline int shmem_reacct_size(unsigned long flags,
153 loff_t oldsize, loff_t newsize)
154{
155 if (!(flags & VM_NORESERVE)) {
156 if (VM_ACCT(newsize) > VM_ACCT(oldsize))
157 return security_vm_enough_memory_mm(current->mm,
158 VM_ACCT(newsize) - VM_ACCT(oldsize));
159 else if (VM_ACCT(newsize) < VM_ACCT(oldsize))
160 vm_unacct_memory(VM_ACCT(oldsize) - VM_ACCT(newsize));
161 }
162 return 0;
163}
164
152/* 165/*
153 * ... whereas tmpfs objects are accounted incrementally as 166 * ... whereas tmpfs objects are accounted incrementally as
154 * pages are allocated, in order to allow huge sparse files. 167 * pages are allocated, in order to allow huge sparse files.
@@ -280,7 +293,7 @@ static bool shmem_confirm_swap(struct address_space *mapping,
280 */ 293 */
281static int shmem_add_to_page_cache(struct page *page, 294static int shmem_add_to_page_cache(struct page *page,
282 struct address_space *mapping, 295 struct address_space *mapping,
283 pgoff_t index, gfp_t gfp, void *expected) 296 pgoff_t index, void *expected)
284{ 297{
285 int error; 298 int error;
286 299
@@ -549,6 +562,10 @@ static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
549 loff_t newsize = attr->ia_size; 562 loff_t newsize = attr->ia_size;
550 563
551 if (newsize != oldsize) { 564 if (newsize != oldsize) {
565 error = shmem_reacct_size(SHMEM_I(inode)->flags,
566 oldsize, newsize);
567 if (error)
568 return error;
552 i_size_write(inode, newsize); 569 i_size_write(inode, newsize);
553 inode->i_ctime = inode->i_mtime = CURRENT_TIME; 570 inode->i_ctime = inode->i_mtime = CURRENT_TIME;
554 } 571 }
@@ -649,7 +666,7 @@ static int shmem_unuse_inode(struct shmem_inode_info *info,
649 */ 666 */
650 if (!error) 667 if (!error)
651 error = shmem_add_to_page_cache(*pagep, mapping, index, 668 error = shmem_add_to_page_cache(*pagep, mapping, index,
652 GFP_NOWAIT, radswap); 669 radswap);
653 if (error != -ENOMEM) { 670 if (error != -ENOMEM) {
654 /* 671 /*
655 * Truncation and eviction use free_swap_and_cache(), which 672 * Truncation and eviction use free_swap_and_cache(), which
@@ -1095,7 +1112,7 @@ repeat:
1095 gfp & GFP_RECLAIM_MASK); 1112 gfp & GFP_RECLAIM_MASK);
1096 if (!error) { 1113 if (!error) {
1097 error = shmem_add_to_page_cache(page, mapping, index, 1114 error = shmem_add_to_page_cache(page, mapping, index,
1098 gfp, swp_to_radix_entry(swap)); 1115 swp_to_radix_entry(swap));
1099 /* 1116 /*
1100 * We already confirmed swap under page lock, and make 1117 * We already confirmed swap under page lock, and make
1101 * no memory allocation here, so usually no possibility 1118 * no memory allocation here, so usually no possibility
@@ -1149,7 +1166,7 @@ repeat:
1149 __SetPageSwapBacked(page); 1166 __SetPageSwapBacked(page);
1150 __set_page_locked(page); 1167 __set_page_locked(page);
1151 if (sgp == SGP_WRITE) 1168 if (sgp == SGP_WRITE)
1152 init_page_accessed(page); 1169 __SetPageReferenced(page);
1153 1170
1154 error = mem_cgroup_charge_file(page, current->mm, 1171 error = mem_cgroup_charge_file(page, current->mm,
1155 gfp & GFP_RECLAIM_MASK); 1172 gfp & GFP_RECLAIM_MASK);
@@ -1158,7 +1175,7 @@ repeat:
1158 error = radix_tree_maybe_preload(gfp & GFP_RECLAIM_MASK); 1175 error = radix_tree_maybe_preload(gfp & GFP_RECLAIM_MASK);
1159 if (!error) { 1176 if (!error) {
1160 error = shmem_add_to_page_cache(page, mapping, index, 1177 error = shmem_add_to_page_cache(page, mapping, index,
1161 gfp, NULL); 1178 NULL);
1162 radix_tree_preload_end(); 1179 radix_tree_preload_end();
1163 } 1180 }
1164 if (error) { 1181 if (error) {
@@ -2932,16 +2949,16 @@ static struct file *__shmem_file_setup(const char *name, loff_t size,
2932 this.len = strlen(name); 2949 this.len = strlen(name);
2933 this.hash = 0; /* will go */ 2950 this.hash = 0; /* will go */
2934 sb = shm_mnt->mnt_sb; 2951 sb = shm_mnt->mnt_sb;
2952 path.mnt = mntget(shm_mnt);
2935 path.dentry = d_alloc_pseudo(sb, &this); 2953 path.dentry = d_alloc_pseudo(sb, &this);
2936 if (!path.dentry) 2954 if (!path.dentry)
2937 goto put_memory; 2955 goto put_memory;
2938 d_set_d_op(path.dentry, &anon_ops); 2956 d_set_d_op(path.dentry, &anon_ops);
2939 path.mnt = mntget(shm_mnt);
2940 2957
2941 res = ERR_PTR(-ENOSPC); 2958 res = ERR_PTR(-ENOSPC);
2942 inode = shmem_get_inode(sb, NULL, S_IFREG | S_IRWXUGO, 0, flags); 2959 inode = shmem_get_inode(sb, NULL, S_IFREG | S_IRWXUGO, 0, flags);
2943 if (!inode) 2960 if (!inode)
2944 goto put_dentry; 2961 goto put_memory;
2945 2962
2946 inode->i_flags |= i_flags; 2963 inode->i_flags |= i_flags;
2947 d_instantiate(path.dentry, inode); 2964 d_instantiate(path.dentry, inode);
@@ -2949,19 +2966,19 @@ static struct file *__shmem_file_setup(const char *name, loff_t size,
2949 clear_nlink(inode); /* It is unlinked */ 2966 clear_nlink(inode); /* It is unlinked */
2950 res = ERR_PTR(ramfs_nommu_expand_for_mapping(inode, size)); 2967 res = ERR_PTR(ramfs_nommu_expand_for_mapping(inode, size));
2951 if (IS_ERR(res)) 2968 if (IS_ERR(res))
2952 goto put_dentry; 2969 goto put_path;
2953 2970
2954 res = alloc_file(&path, FMODE_WRITE | FMODE_READ, 2971 res = alloc_file(&path, FMODE_WRITE | FMODE_READ,
2955 &shmem_file_operations); 2972 &shmem_file_operations);
2956 if (IS_ERR(res)) 2973 if (IS_ERR(res))
2957 goto put_dentry; 2974 goto put_path;
2958 2975
2959 return res; 2976 return res;
2960 2977
2961put_dentry:
2962 path_put(&path);
2963put_memory: 2978put_memory:
2964 shmem_unacct_size(flags, size); 2979 shmem_unacct_size(flags, size);
2980put_path:
2981 path_put(&path);
2965 return res; 2982 return res;
2966} 2983}
2967 2984
diff --git a/mm/slab.c b/mm/slab.c
index 3070b929a1bf..2e60bf3dedbb 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -191,7 +191,6 @@ struct array_cache {
191 unsigned int limit; 191 unsigned int limit;
192 unsigned int batchcount; 192 unsigned int batchcount;
193 unsigned int touched; 193 unsigned int touched;
194 spinlock_t lock;
195 void *entry[]; /* 194 void *entry[]; /*
196 * Must have this definition in here for the proper 195 * Must have this definition in here for the proper
197 * alignment of array_cache. Also simplifies accessing 196 * alignment of array_cache. Also simplifies accessing
@@ -203,6 +202,11 @@ struct array_cache {
203 */ 202 */
204}; 203};
205 204
205struct alien_cache {
206 spinlock_t lock;
207 struct array_cache ac;
208};
209
206#define SLAB_OBJ_PFMEMALLOC 1 210#define SLAB_OBJ_PFMEMALLOC 1
207static inline bool is_obj_pfmemalloc(void *objp) 211static inline bool is_obj_pfmemalloc(void *objp)
208{ 212{
@@ -242,7 +246,8 @@ static struct kmem_cache_node __initdata init_kmem_cache_node[NUM_INIT_LISTS];
242static int drain_freelist(struct kmem_cache *cache, 246static int drain_freelist(struct kmem_cache *cache,
243 struct kmem_cache_node *n, int tofree); 247 struct kmem_cache_node *n, int tofree);
244static void free_block(struct kmem_cache *cachep, void **objpp, int len, 248static void free_block(struct kmem_cache *cachep, void **objpp, int len,
245 int node); 249 int node, struct list_head *list);
250static void slabs_destroy(struct kmem_cache *cachep, struct list_head *list);
246static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp); 251static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp);
247static void cache_reap(struct work_struct *unused); 252static void cache_reap(struct work_struct *unused);
248 253
@@ -267,7 +272,7 @@ static void kmem_cache_node_init(struct kmem_cache_node *parent)
267#define MAKE_LIST(cachep, listp, slab, nodeid) \ 272#define MAKE_LIST(cachep, listp, slab, nodeid) \
268 do { \ 273 do { \
269 INIT_LIST_HEAD(listp); \ 274 INIT_LIST_HEAD(listp); \
270 list_splice(&(cachep->node[nodeid]->slab), listp); \ 275 list_splice(&get_node(cachep, nodeid)->slab, listp); \
271 } while (0) 276 } while (0)
272 277
273#define MAKE_ALL_LISTS(cachep, ptr, nodeid) \ 278#define MAKE_ALL_LISTS(cachep, ptr, nodeid) \
@@ -465,143 +470,6 @@ static struct kmem_cache kmem_cache_boot = {
465 .name = "kmem_cache", 470 .name = "kmem_cache",
466}; 471};
467 472
468#define BAD_ALIEN_MAGIC 0x01020304ul
469
470#ifdef CONFIG_LOCKDEP
471
472/*
473 * Slab sometimes uses the kmalloc slabs to store the slab headers
474 * for other slabs "off slab".
475 * The locking for this is tricky in that it nests within the locks
476 * of all other slabs in a few places; to deal with this special
477 * locking we put on-slab caches into a separate lock-class.
478 *
479 * We set lock class for alien array caches which are up during init.
480 * The lock annotation will be lost if all cpus of a node goes down and
481 * then comes back up during hotplug
482 */
483static struct lock_class_key on_slab_l3_key;
484static struct lock_class_key on_slab_alc_key;
485
486static struct lock_class_key debugobj_l3_key;
487static struct lock_class_key debugobj_alc_key;
488
489static void slab_set_lock_classes(struct kmem_cache *cachep,
490 struct lock_class_key *l3_key, struct lock_class_key *alc_key,
491 int q)
492{
493 struct array_cache **alc;
494 struct kmem_cache_node *n;
495 int r;
496
497 n = cachep->node[q];
498 if (!n)
499 return;
500
501 lockdep_set_class(&n->list_lock, l3_key);
502 alc = n->alien;
503 /*
504 * FIXME: This check for BAD_ALIEN_MAGIC
505 * should go away when common slab code is taught to
506 * work even without alien caches.
507 * Currently, non NUMA code returns BAD_ALIEN_MAGIC
508 * for alloc_alien_cache,
509 */
510 if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC)
511 return;
512 for_each_node(r) {
513 if (alc[r])
514 lockdep_set_class(&alc[r]->lock, alc_key);
515 }
516}
517
518static void slab_set_debugobj_lock_classes_node(struct kmem_cache *cachep, int node)
519{
520 slab_set_lock_classes(cachep, &debugobj_l3_key, &debugobj_alc_key, node);
521}
522
523static void slab_set_debugobj_lock_classes(struct kmem_cache *cachep)
524{
525 int node;
526
527 for_each_online_node(node)
528 slab_set_debugobj_lock_classes_node(cachep, node);
529}
530
531static void init_node_lock_keys(int q)
532{
533 int i;
534
535 if (slab_state < UP)
536 return;
537
538 for (i = 1; i <= KMALLOC_SHIFT_HIGH; i++) {
539 struct kmem_cache_node *n;
540 struct kmem_cache *cache = kmalloc_caches[i];
541
542 if (!cache)
543 continue;
544
545 n = cache->node[q];
546 if (!n || OFF_SLAB(cache))
547 continue;
548
549 slab_set_lock_classes(cache, &on_slab_l3_key,
550 &on_slab_alc_key, q);
551 }
552}
553
554static void on_slab_lock_classes_node(struct kmem_cache *cachep, int q)
555{
556 if (!cachep->node[q])
557 return;
558
559 slab_set_lock_classes(cachep, &on_slab_l3_key,
560 &on_slab_alc_key, q);
561}
562
563static inline void on_slab_lock_classes(struct kmem_cache *cachep)
564{
565 int node;
566
567 VM_BUG_ON(OFF_SLAB(cachep));
568 for_each_node(node)
569 on_slab_lock_classes_node(cachep, node);
570}
571
572static inline void init_lock_keys(void)
573{
574 int node;
575
576 for_each_node(node)
577 init_node_lock_keys(node);
578}
579#else
580static void init_node_lock_keys(int q)
581{
582}
583
584static inline void init_lock_keys(void)
585{
586}
587
588static inline void on_slab_lock_classes(struct kmem_cache *cachep)
589{
590}
591
592static inline void on_slab_lock_classes_node(struct kmem_cache *cachep, int node)
593{
594}
595
596static void slab_set_debugobj_lock_classes_node(struct kmem_cache *cachep, int node)
597{
598}
599
600static void slab_set_debugobj_lock_classes(struct kmem_cache *cachep)
601{
602}
603#endif
604
605static DEFINE_PER_CPU(struct delayed_work, slab_reap_work); 473static DEFINE_PER_CPU(struct delayed_work, slab_reap_work);
606 474
607static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep) 475static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
@@ -792,13 +660,8 @@ static void start_cpu_timer(int cpu)
792 } 660 }
793} 661}
794 662
795static struct array_cache *alloc_arraycache(int node, int entries, 663static void init_arraycache(struct array_cache *ac, int limit, int batch)
796 int batchcount, gfp_t gfp)
797{ 664{
798 int memsize = sizeof(void *) * entries + sizeof(struct array_cache);
799 struct array_cache *nc = NULL;
800
801 nc = kmalloc_node(memsize, gfp, node);
802 /* 665 /*
803 * The array_cache structures contain pointers to free object. 666 * The array_cache structures contain pointers to free object.
804 * However, when such objects are allocated or transferred to another 667 * However, when such objects are allocated or transferred to another
@@ -806,15 +669,24 @@ static struct array_cache *alloc_arraycache(int node, int entries,
806 * valid references during a kmemleak scan. Therefore, kmemleak must 669 * valid references during a kmemleak scan. Therefore, kmemleak must
807 * not scan such objects. 670 * not scan such objects.
808 */ 671 */
809 kmemleak_no_scan(nc); 672 kmemleak_no_scan(ac);
810 if (nc) { 673 if (ac) {
811 nc->avail = 0; 674 ac->avail = 0;
812 nc->limit = entries; 675 ac->limit = limit;
813 nc->batchcount = batchcount; 676 ac->batchcount = batch;
814 nc->touched = 0; 677 ac->touched = 0;
815 spin_lock_init(&nc->lock);
816 } 678 }
817 return nc; 679}
680
681static struct array_cache *alloc_arraycache(int node, int entries,
682 int batchcount, gfp_t gfp)
683{
684 size_t memsize = sizeof(void *) * entries + sizeof(struct array_cache);
685 struct array_cache *ac = NULL;
686
687 ac = kmalloc_node(memsize, gfp, node);
688 init_arraycache(ac, entries, batchcount);
689 return ac;
818} 690}
819 691
820static inline bool is_slab_pfmemalloc(struct page *page) 692static inline bool is_slab_pfmemalloc(struct page *page)
@@ -826,7 +698,7 @@ static inline bool is_slab_pfmemalloc(struct page *page)
826static void recheck_pfmemalloc_active(struct kmem_cache *cachep, 698static void recheck_pfmemalloc_active(struct kmem_cache *cachep,
827 struct array_cache *ac) 699 struct array_cache *ac)
828{ 700{
829 struct kmem_cache_node *n = cachep->node[numa_mem_id()]; 701 struct kmem_cache_node *n = get_node(cachep, numa_mem_id());
830 struct page *page; 702 struct page *page;
831 unsigned long flags; 703 unsigned long flags;
832 704
@@ -881,7 +753,7 @@ static void *__ac_get_obj(struct kmem_cache *cachep, struct array_cache *ac,
881 * If there are empty slabs on the slabs_free list and we are 753 * If there are empty slabs on the slabs_free list and we are
882 * being forced to refill the cache, mark this one !pfmemalloc. 754 * being forced to refill the cache, mark this one !pfmemalloc.
883 */ 755 */
884 n = cachep->node[numa_mem_id()]; 756 n = get_node(cachep, numa_mem_id());
885 if (!list_empty(&n->slabs_free) && force_refill) { 757 if (!list_empty(&n->slabs_free) && force_refill) {
886 struct page *page = virt_to_head_page(objp); 758 struct page *page = virt_to_head_page(objp);
887 ClearPageSlabPfmemalloc(page); 759 ClearPageSlabPfmemalloc(page);
@@ -961,12 +833,13 @@ static int transfer_objects(struct array_cache *to,
961#define drain_alien_cache(cachep, alien) do { } while (0) 833#define drain_alien_cache(cachep, alien) do { } while (0)
962#define reap_alien(cachep, n) do { } while (0) 834#define reap_alien(cachep, n) do { } while (0)
963 835
964static inline struct array_cache **alloc_alien_cache(int node, int limit, gfp_t gfp) 836static inline struct alien_cache **alloc_alien_cache(int node,
837 int limit, gfp_t gfp)
965{ 838{
966 return (struct array_cache **)BAD_ALIEN_MAGIC; 839 return NULL;
967} 840}
968 841
969static inline void free_alien_cache(struct array_cache **ac_ptr) 842static inline void free_alien_cache(struct alien_cache **ac_ptr)
970{ 843{
971} 844}
972 845
@@ -992,46 +865,60 @@ static inline void *____cache_alloc_node(struct kmem_cache *cachep,
992static void *____cache_alloc_node(struct kmem_cache *, gfp_t, int); 865static void *____cache_alloc_node(struct kmem_cache *, gfp_t, int);
993static void *alternate_node_alloc(struct kmem_cache *, gfp_t); 866static void *alternate_node_alloc(struct kmem_cache *, gfp_t);
994 867
995static struct array_cache **alloc_alien_cache(int node, int limit, gfp_t gfp) 868static struct alien_cache *__alloc_alien_cache(int node, int entries,
869 int batch, gfp_t gfp)
870{
871 size_t memsize = sizeof(void *) * entries + sizeof(struct alien_cache);
872 struct alien_cache *alc = NULL;
873
874 alc = kmalloc_node(memsize, gfp, node);
875 init_arraycache(&alc->ac, entries, batch);
876 spin_lock_init(&alc->lock);
877 return alc;
878}
879
880static struct alien_cache **alloc_alien_cache(int node, int limit, gfp_t gfp)
996{ 881{
997 struct array_cache **ac_ptr; 882 struct alien_cache **alc_ptr;
998 int memsize = sizeof(void *) * nr_node_ids; 883 size_t memsize = sizeof(void *) * nr_node_ids;
999 int i; 884 int i;
1000 885
1001 if (limit > 1) 886 if (limit > 1)
1002 limit = 12; 887 limit = 12;
1003 ac_ptr = kzalloc_node(memsize, gfp, node); 888 alc_ptr = kzalloc_node(memsize, gfp, node);
1004 if (ac_ptr) { 889 if (!alc_ptr)
1005 for_each_node(i) { 890 return NULL;
1006 if (i == node || !node_online(i)) 891
1007 continue; 892 for_each_node(i) {
1008 ac_ptr[i] = alloc_arraycache(node, limit, 0xbaadf00d, gfp); 893 if (i == node || !node_online(i))
1009 if (!ac_ptr[i]) { 894 continue;
1010 for (i--; i >= 0; i--) 895 alc_ptr[i] = __alloc_alien_cache(node, limit, 0xbaadf00d, gfp);
1011 kfree(ac_ptr[i]); 896 if (!alc_ptr[i]) {
1012 kfree(ac_ptr); 897 for (i--; i >= 0; i--)
1013 return NULL; 898 kfree(alc_ptr[i]);
1014 } 899 kfree(alc_ptr);
900 return NULL;
1015 } 901 }
1016 } 902 }
1017 return ac_ptr; 903 return alc_ptr;
1018} 904}
1019 905
1020static void free_alien_cache(struct array_cache **ac_ptr) 906static void free_alien_cache(struct alien_cache **alc_ptr)
1021{ 907{
1022 int i; 908 int i;
1023 909
1024 if (!ac_ptr) 910 if (!alc_ptr)
1025 return; 911 return;
1026 for_each_node(i) 912 for_each_node(i)
1027 kfree(ac_ptr[i]); 913 kfree(alc_ptr[i]);
1028 kfree(ac_ptr); 914 kfree(alc_ptr);
1029} 915}
1030 916
1031static void __drain_alien_cache(struct kmem_cache *cachep, 917static void __drain_alien_cache(struct kmem_cache *cachep,
1032 struct array_cache *ac, int node) 918 struct array_cache *ac, int node,
919 struct list_head *list)
1033{ 920{
1034 struct kmem_cache_node *n = cachep->node[node]; 921 struct kmem_cache_node *n = get_node(cachep, node);
1035 922
1036 if (ac->avail) { 923 if (ac->avail) {
1037 spin_lock(&n->list_lock); 924 spin_lock(&n->list_lock);
@@ -1043,7 +930,7 @@ static void __drain_alien_cache(struct kmem_cache *cachep,
1043 if (n->shared) 930 if (n->shared)
1044 transfer_objects(n->shared, ac, ac->limit); 931 transfer_objects(n->shared, ac, ac->limit);
1045 932
1046 free_block(cachep, ac->entry, ac->avail, node); 933 free_block(cachep, ac->entry, ac->avail, node, list);
1047 ac->avail = 0; 934 ac->avail = 0;
1048 spin_unlock(&n->list_lock); 935 spin_unlock(&n->list_lock);
1049 } 936 }
@@ -1057,28 +944,40 @@ static void reap_alien(struct kmem_cache *cachep, struct kmem_cache_node *n)
1057 int node = __this_cpu_read(slab_reap_node); 944 int node = __this_cpu_read(slab_reap_node);
1058 945
1059 if (n->alien) { 946 if (n->alien) {
1060 struct array_cache *ac = n->alien[node]; 947 struct alien_cache *alc = n->alien[node];
948 struct array_cache *ac;
949
950 if (alc) {
951 ac = &alc->ac;
952 if (ac->avail && spin_trylock_irq(&alc->lock)) {
953 LIST_HEAD(list);
1061 954
1062 if (ac && ac->avail && spin_trylock_irq(&ac->lock)) { 955 __drain_alien_cache(cachep, ac, node, &list);
1063 __drain_alien_cache(cachep, ac, node); 956 spin_unlock_irq(&alc->lock);
1064 spin_unlock_irq(&ac->lock); 957 slabs_destroy(cachep, &list);
958 }
1065 } 959 }
1066 } 960 }
1067} 961}
1068 962
1069static void drain_alien_cache(struct kmem_cache *cachep, 963static void drain_alien_cache(struct kmem_cache *cachep,
1070 struct array_cache **alien) 964 struct alien_cache **alien)
1071{ 965{
1072 int i = 0; 966 int i = 0;
967 struct alien_cache *alc;
1073 struct array_cache *ac; 968 struct array_cache *ac;
1074 unsigned long flags; 969 unsigned long flags;
1075 970
1076 for_each_online_node(i) { 971 for_each_online_node(i) {
1077 ac = alien[i]; 972 alc = alien[i];
1078 if (ac) { 973 if (alc) {
1079 spin_lock_irqsave(&ac->lock, flags); 974 LIST_HEAD(list);
1080 __drain_alien_cache(cachep, ac, i); 975
1081 spin_unlock_irqrestore(&ac->lock, flags); 976 ac = &alc->ac;
977 spin_lock_irqsave(&alc->lock, flags);
978 __drain_alien_cache(cachep, ac, i, &list);
979 spin_unlock_irqrestore(&alc->lock, flags);
980 slabs_destroy(cachep, &list);
1082 } 981 }
1083 } 982 }
1084} 983}
@@ -1087,8 +986,10 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
1087{ 986{
1088 int nodeid = page_to_nid(virt_to_page(objp)); 987 int nodeid = page_to_nid(virt_to_page(objp));
1089 struct kmem_cache_node *n; 988 struct kmem_cache_node *n;
1090 struct array_cache *alien = NULL; 989 struct alien_cache *alien = NULL;
990 struct array_cache *ac;
1091 int node; 991 int node;
992 LIST_HEAD(list);
1092 993
1093 node = numa_mem_id(); 994 node = numa_mem_id();
1094 995
@@ -1099,21 +1000,25 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
1099 if (likely(nodeid == node)) 1000 if (likely(nodeid == node))
1100 return 0; 1001 return 0;
1101 1002
1102 n = cachep->node[node]; 1003 n = get_node(cachep, node);
1103 STATS_INC_NODEFREES(cachep); 1004 STATS_INC_NODEFREES(cachep);
1104 if (n->alien && n->alien[nodeid]) { 1005 if (n->alien && n->alien[nodeid]) {
1105 alien = n->alien[nodeid]; 1006 alien = n->alien[nodeid];
1007 ac = &alien->ac;
1106 spin_lock(&alien->lock); 1008 spin_lock(&alien->lock);
1107 if (unlikely(alien->avail == alien->limit)) { 1009 if (unlikely(ac->avail == ac->limit)) {
1108 STATS_INC_ACOVERFLOW(cachep); 1010 STATS_INC_ACOVERFLOW(cachep);
1109 __drain_alien_cache(cachep, alien, nodeid); 1011 __drain_alien_cache(cachep, ac, nodeid, &list);
1110 } 1012 }
1111 ac_put_obj(cachep, alien, objp); 1013 ac_put_obj(cachep, ac, objp);
1112 spin_unlock(&alien->lock); 1014 spin_unlock(&alien->lock);
1015 slabs_destroy(cachep, &list);
1113 } else { 1016 } else {
1114 spin_lock(&(cachep->node[nodeid])->list_lock); 1017 n = get_node(cachep, nodeid);
1115 free_block(cachep, &objp, 1, nodeid); 1018 spin_lock(&n->list_lock);
1116 spin_unlock(&(cachep->node[nodeid])->list_lock); 1019 free_block(cachep, &objp, 1, nodeid, &list);
1020 spin_unlock(&n->list_lock);
1021 slabs_destroy(cachep, &list);
1117 } 1022 }
1118 return 1; 1023 return 1;
1119} 1024}
@@ -1132,7 +1037,7 @@ static int init_cache_node_node(int node)
1132{ 1037{
1133 struct kmem_cache *cachep; 1038 struct kmem_cache *cachep;
1134 struct kmem_cache_node *n; 1039 struct kmem_cache_node *n;
1135 const int memsize = sizeof(struct kmem_cache_node); 1040 const size_t memsize = sizeof(struct kmem_cache_node);
1136 1041
1137 list_for_each_entry(cachep, &slab_caches, list) { 1042 list_for_each_entry(cachep, &slab_caches, list) {
1138 /* 1043 /*
@@ -1140,7 +1045,8 @@ static int init_cache_node_node(int node)
1140 * begin anything. Make sure some other cpu on this 1045 * begin anything. Make sure some other cpu on this
1141 * node has not already allocated this 1046 * node has not already allocated this
1142 */ 1047 */
1143 if (!cachep->node[node]) { 1048 n = get_node(cachep, node);
1049 if (!n) {
1144 n = kmalloc_node(memsize, GFP_KERNEL, node); 1050 n = kmalloc_node(memsize, GFP_KERNEL, node);
1145 if (!n) 1051 if (!n)
1146 return -ENOMEM; 1052 return -ENOMEM;
@@ -1156,11 +1062,11 @@ static int init_cache_node_node(int node)
1156 cachep->node[node] = n; 1062 cachep->node[node] = n;
1157 } 1063 }
1158 1064
1159 spin_lock_irq(&cachep->node[node]->list_lock); 1065 spin_lock_irq(&n->list_lock);
1160 cachep->node[node]->free_limit = 1066 n->free_limit =
1161 (1 + nr_cpus_node(node)) * 1067 (1 + nr_cpus_node(node)) *
1162 cachep->batchcount + cachep->num; 1068 cachep->batchcount + cachep->num;
1163 spin_unlock_irq(&cachep->node[node]->list_lock); 1069 spin_unlock_irq(&n->list_lock);
1164 } 1070 }
1165 return 0; 1071 return 0;
1166} 1072}
@@ -1181,12 +1087,13 @@ static void cpuup_canceled(long cpu)
1181 list_for_each_entry(cachep, &slab_caches, list) { 1087 list_for_each_entry(cachep, &slab_caches, list) {
1182 struct array_cache *nc; 1088 struct array_cache *nc;
1183 struct array_cache *shared; 1089 struct array_cache *shared;
1184 struct array_cache **alien; 1090 struct alien_cache **alien;
1091 LIST_HEAD(list);
1185 1092
1186 /* cpu is dead; no one can alloc from it. */ 1093 /* cpu is dead; no one can alloc from it. */
1187 nc = cachep->array[cpu]; 1094 nc = cachep->array[cpu];
1188 cachep->array[cpu] = NULL; 1095 cachep->array[cpu] = NULL;
1189 n = cachep->node[node]; 1096 n = get_node(cachep, node);
1190 1097
1191 if (!n) 1098 if (!n)
1192 goto free_array_cache; 1099 goto free_array_cache;
@@ -1196,7 +1103,7 @@ static void cpuup_canceled(long cpu)
1196 /* Free limit for this kmem_cache_node */ 1103 /* Free limit for this kmem_cache_node */
1197 n->free_limit -= cachep->batchcount; 1104 n->free_limit -= cachep->batchcount;
1198 if (nc) 1105 if (nc)
1199 free_block(cachep, nc->entry, nc->avail, node); 1106 free_block(cachep, nc->entry, nc->avail, node, &list);
1200 1107
1201 if (!cpumask_empty(mask)) { 1108 if (!cpumask_empty(mask)) {
1202 spin_unlock_irq(&n->list_lock); 1109 spin_unlock_irq(&n->list_lock);
@@ -1206,7 +1113,7 @@ static void cpuup_canceled(long cpu)
1206 shared = n->shared; 1113 shared = n->shared;
1207 if (shared) { 1114 if (shared) {
1208 free_block(cachep, shared->entry, 1115 free_block(cachep, shared->entry,
1209 shared->avail, node); 1116 shared->avail, node, &list);
1210 n->shared = NULL; 1117 n->shared = NULL;
1211 } 1118 }
1212 1119
@@ -1221,6 +1128,7 @@ static void cpuup_canceled(long cpu)
1221 free_alien_cache(alien); 1128 free_alien_cache(alien);
1222 } 1129 }
1223free_array_cache: 1130free_array_cache:
1131 slabs_destroy(cachep, &list);
1224 kfree(nc); 1132 kfree(nc);
1225 } 1133 }
1226 /* 1134 /*
@@ -1229,7 +1137,7 @@ free_array_cache:
1229 * shrink each nodelist to its limit. 1137 * shrink each nodelist to its limit.
1230 */ 1138 */
1231 list_for_each_entry(cachep, &slab_caches, list) { 1139 list_for_each_entry(cachep, &slab_caches, list) {
1232 n = cachep->node[node]; 1140 n = get_node(cachep, node);
1233 if (!n) 1141 if (!n)
1234 continue; 1142 continue;
1235 drain_freelist(cachep, n, slabs_tofree(cachep, n)); 1143 drain_freelist(cachep, n, slabs_tofree(cachep, n));
@@ -1260,7 +1168,7 @@ static int cpuup_prepare(long cpu)
1260 list_for_each_entry(cachep, &slab_caches, list) { 1168 list_for_each_entry(cachep, &slab_caches, list) {
1261 struct array_cache *nc; 1169 struct array_cache *nc;
1262 struct array_cache *shared = NULL; 1170 struct array_cache *shared = NULL;
1263 struct array_cache **alien = NULL; 1171 struct alien_cache **alien = NULL;
1264 1172
1265 nc = alloc_arraycache(node, cachep->limit, 1173 nc = alloc_arraycache(node, cachep->limit,
1266 cachep->batchcount, GFP_KERNEL); 1174 cachep->batchcount, GFP_KERNEL);
@@ -1284,7 +1192,7 @@ static int cpuup_prepare(long cpu)
1284 } 1192 }
1285 } 1193 }
1286 cachep->array[cpu] = nc; 1194 cachep->array[cpu] = nc;
1287 n = cachep->node[node]; 1195 n = get_node(cachep, node);
1288 BUG_ON(!n); 1196 BUG_ON(!n);
1289 1197
1290 spin_lock_irq(&n->list_lock); 1198 spin_lock_irq(&n->list_lock);
@@ -1305,13 +1213,7 @@ static int cpuup_prepare(long cpu)
1305 spin_unlock_irq(&n->list_lock); 1213 spin_unlock_irq(&n->list_lock);
1306 kfree(shared); 1214 kfree(shared);
1307 free_alien_cache(alien); 1215 free_alien_cache(alien);
1308 if (cachep->flags & SLAB_DEBUG_OBJECTS)
1309 slab_set_debugobj_lock_classes_node(cachep, node);
1310 else if (!OFF_SLAB(cachep) &&
1311 !(cachep->flags & SLAB_DESTROY_BY_RCU))
1312 on_slab_lock_classes_node(cachep, node);
1313 } 1216 }
1314 init_node_lock_keys(node);
1315 1217
1316 return 0; 1218 return 0;
1317bad: 1219bad:
@@ -1395,7 +1297,7 @@ static int __meminit drain_cache_node_node(int node)
1395 list_for_each_entry(cachep, &slab_caches, list) { 1297 list_for_each_entry(cachep, &slab_caches, list) {
1396 struct kmem_cache_node *n; 1298 struct kmem_cache_node *n;
1397 1299
1398 n = cachep->node[node]; 1300 n = get_node(cachep, node);
1399 if (!n) 1301 if (!n)
1400 continue; 1302 continue;
1401 1303
@@ -1575,10 +1477,6 @@ void __init kmem_cache_init(void)
1575 1477
1576 memcpy(ptr, cpu_cache_get(kmem_cache), 1478 memcpy(ptr, cpu_cache_get(kmem_cache),
1577 sizeof(struct arraycache_init)); 1479 sizeof(struct arraycache_init));
1578 /*
1579 * Do not assume that spinlocks can be initialized via memcpy:
1580 */
1581 spin_lock_init(&ptr->lock);
1582 1480
1583 kmem_cache->array[smp_processor_id()] = ptr; 1481 kmem_cache->array[smp_processor_id()] = ptr;
1584 1482
@@ -1588,10 +1486,6 @@ void __init kmem_cache_init(void)
1588 != &initarray_generic.cache); 1486 != &initarray_generic.cache);
1589 memcpy(ptr, cpu_cache_get(kmalloc_caches[INDEX_AC]), 1487 memcpy(ptr, cpu_cache_get(kmalloc_caches[INDEX_AC]),
1590 sizeof(struct arraycache_init)); 1488 sizeof(struct arraycache_init));
1591 /*
1592 * Do not assume that spinlocks can be initialized via memcpy:
1593 */
1594 spin_lock_init(&ptr->lock);
1595 1489
1596 kmalloc_caches[INDEX_AC]->array[smp_processor_id()] = ptr; 1490 kmalloc_caches[INDEX_AC]->array[smp_processor_id()] = ptr;
1597 } 1491 }
@@ -1628,9 +1522,6 @@ void __init kmem_cache_init_late(void)
1628 BUG(); 1522 BUG();
1629 mutex_unlock(&slab_mutex); 1523 mutex_unlock(&slab_mutex);
1630 1524
1631 /* Annotate slab for lockdep -- annotate the malloc caches */
1632 init_lock_keys();
1633
1634 /* Done! */ 1525 /* Done! */
1635 slab_state = FULL; 1526 slab_state = FULL;
1636 1527
@@ -1690,14 +1581,10 @@ slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid)
1690 printk(KERN_WARNING " cache: %s, object size: %d, order: %d\n", 1581 printk(KERN_WARNING " cache: %s, object size: %d, order: %d\n",
1691 cachep->name, cachep->size, cachep->gfporder); 1582 cachep->name, cachep->size, cachep->gfporder);
1692 1583
1693 for_each_online_node(node) { 1584 for_each_kmem_cache_node(cachep, node, n) {
1694 unsigned long active_objs = 0, num_objs = 0, free_objects = 0; 1585 unsigned long active_objs = 0, num_objs = 0, free_objects = 0;
1695 unsigned long active_slabs = 0, num_slabs = 0; 1586 unsigned long active_slabs = 0, num_slabs = 0;
1696 1587
1697 n = cachep->node[node];
1698 if (!n)
1699 continue;
1700
1701 spin_lock_irqsave(&n->list_lock, flags); 1588 spin_lock_irqsave(&n->list_lock, flags);
1702 list_for_each_entry(page, &n->slabs_full, lru) { 1589 list_for_each_entry(page, &n->slabs_full, lru) {
1703 active_objs += cachep->num; 1590 active_objs += cachep->num;
@@ -1724,7 +1611,8 @@ slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid)
1724} 1611}
1725 1612
1726/* 1613/*
1727 * Interface to system's page allocator. No need to hold the cache-lock. 1614 * Interface to system's page allocator. No need to hold the
1615 * kmem_cache_node ->list_lock.
1728 * 1616 *
1729 * If we requested dmaable memory, we will get it. Even if we 1617 * If we requested dmaable memory, we will get it. Even if we
1730 * did not request dmaable memory, we might get it, but that 1618 * did not request dmaable memory, we might get it, but that
@@ -2026,9 +1914,9 @@ static void slab_destroy_debugcheck(struct kmem_cache *cachep,
2026 * @cachep: cache pointer being destroyed 1914 * @cachep: cache pointer being destroyed
2027 * @page: page pointer being destroyed 1915 * @page: page pointer being destroyed
2028 * 1916 *
2029 * Destroy all the objs in a slab, and release the mem back to the system. 1917 * Destroy all the objs in a slab page, and release the mem back to the system.
2030 * Before calling the slab must have been unlinked from the cache. The 1918 * Before calling the slab page must have been unlinked from the cache. The
2031 * cache-lock is not held/needed. 1919 * kmem_cache_node ->list_lock is not held/needed.
2032 */ 1920 */
2033static void slab_destroy(struct kmem_cache *cachep, struct page *page) 1921static void slab_destroy(struct kmem_cache *cachep, struct page *page)
2034{ 1922{
@@ -2060,6 +1948,16 @@ static void slab_destroy(struct kmem_cache *cachep, struct page *page)
2060 kmem_cache_free(cachep->freelist_cache, freelist); 1948 kmem_cache_free(cachep->freelist_cache, freelist);
2061} 1949}
2062 1950
1951static void slabs_destroy(struct kmem_cache *cachep, struct list_head *list)
1952{
1953 struct page *page, *n;
1954
1955 list_for_each_entry_safe(page, n, list, lru) {
1956 list_del(&page->lru);
1957 slab_destroy(cachep, page);
1958 }
1959}
1960
2063/** 1961/**
2064 * calculate_slab_order - calculate size (page order) of slabs 1962 * calculate_slab_order - calculate size (page order) of slabs
2065 * @cachep: pointer to the cache that is being created 1963 * @cachep: pointer to the cache that is being created
@@ -2405,17 +2303,6 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
2405 return err; 2303 return err;
2406 } 2304 }
2407 2305
2408 if (flags & SLAB_DEBUG_OBJECTS) {
2409 /*
2410 * Would deadlock through slab_destroy()->call_rcu()->
2411 * debug_object_activate()->kmem_cache_alloc().
2412 */
2413 WARN_ON_ONCE(flags & SLAB_DESTROY_BY_RCU);
2414
2415 slab_set_debugobj_lock_classes(cachep);
2416 } else if (!OFF_SLAB(cachep) && !(flags & SLAB_DESTROY_BY_RCU))
2417 on_slab_lock_classes(cachep);
2418
2419 return 0; 2306 return 0;
2420} 2307}
2421 2308
@@ -2434,7 +2321,7 @@ static void check_spinlock_acquired(struct kmem_cache *cachep)
2434{ 2321{
2435#ifdef CONFIG_SMP 2322#ifdef CONFIG_SMP
2436 check_irq_off(); 2323 check_irq_off();
2437 assert_spin_locked(&cachep->node[numa_mem_id()]->list_lock); 2324 assert_spin_locked(&get_node(cachep, numa_mem_id())->list_lock);
2438#endif 2325#endif
2439} 2326}
2440 2327
@@ -2442,7 +2329,7 @@ static void check_spinlock_acquired_node(struct kmem_cache *cachep, int node)
2442{ 2329{
2443#ifdef CONFIG_SMP 2330#ifdef CONFIG_SMP
2444 check_irq_off(); 2331 check_irq_off();
2445 assert_spin_locked(&cachep->node[node]->list_lock); 2332 assert_spin_locked(&get_node(cachep, node)->list_lock);
2446#endif 2333#endif
2447} 2334}
2448 2335
@@ -2462,12 +2349,16 @@ static void do_drain(void *arg)
2462 struct kmem_cache *cachep = arg; 2349 struct kmem_cache *cachep = arg;
2463 struct array_cache *ac; 2350 struct array_cache *ac;
2464 int node = numa_mem_id(); 2351 int node = numa_mem_id();
2352 struct kmem_cache_node *n;
2353 LIST_HEAD(list);
2465 2354
2466 check_irq_off(); 2355 check_irq_off();
2467 ac = cpu_cache_get(cachep); 2356 ac = cpu_cache_get(cachep);
2468 spin_lock(&cachep->node[node]->list_lock); 2357 n = get_node(cachep, node);
2469 free_block(cachep, ac->entry, ac->avail, node); 2358 spin_lock(&n->list_lock);
2470 spin_unlock(&cachep->node[node]->list_lock); 2359 free_block(cachep, ac->entry, ac->avail, node, &list);
2360 spin_unlock(&n->list_lock);
2361 slabs_destroy(cachep, &list);
2471 ac->avail = 0; 2362 ac->avail = 0;
2472} 2363}
2473 2364
@@ -2478,17 +2369,12 @@ static void drain_cpu_caches(struct kmem_cache *cachep)
2478 2369
2479 on_each_cpu(do_drain, cachep, 1); 2370 on_each_cpu(do_drain, cachep, 1);
2480 check_irq_on(); 2371 check_irq_on();
2481 for_each_online_node(node) { 2372 for_each_kmem_cache_node(cachep, node, n)
2482 n = cachep->node[node]; 2373 if (n->alien)
2483 if (n && n->alien)
2484 drain_alien_cache(cachep, n->alien); 2374 drain_alien_cache(cachep, n->alien);
2485 }
2486 2375
2487 for_each_online_node(node) { 2376 for_each_kmem_cache_node(cachep, node, n)
2488 n = cachep->node[node]; 2377 drain_array(cachep, n, n->shared, 1, node);
2489 if (n)
2490 drain_array(cachep, n, n->shared, 1, node);
2491 }
2492} 2378}
2493 2379
2494/* 2380/*
@@ -2534,17 +2420,14 @@ out:
2534 2420
2535int __kmem_cache_shrink(struct kmem_cache *cachep) 2421int __kmem_cache_shrink(struct kmem_cache *cachep)
2536{ 2422{
2537 int ret = 0, i = 0; 2423 int ret = 0;
2424 int node;
2538 struct kmem_cache_node *n; 2425 struct kmem_cache_node *n;
2539 2426
2540 drain_cpu_caches(cachep); 2427 drain_cpu_caches(cachep);
2541 2428
2542 check_irq_on(); 2429 check_irq_on();
2543 for_each_online_node(i) { 2430 for_each_kmem_cache_node(cachep, node, n) {
2544 n = cachep->node[i];
2545 if (!n)
2546 continue;
2547
2548 drain_freelist(cachep, n, slabs_tofree(cachep, n)); 2431 drain_freelist(cachep, n, slabs_tofree(cachep, n));
2549 2432
2550 ret += !list_empty(&n->slabs_full) || 2433 ret += !list_empty(&n->slabs_full) ||
@@ -2566,13 +2449,11 @@ int __kmem_cache_shutdown(struct kmem_cache *cachep)
2566 kfree(cachep->array[i]); 2449 kfree(cachep->array[i]);
2567 2450
2568 /* NUMA: free the node structures */ 2451 /* NUMA: free the node structures */
2569 for_each_online_node(i) { 2452 for_each_kmem_cache_node(cachep, i, n) {
2570 n = cachep->node[i]; 2453 kfree(n->shared);
2571 if (n) { 2454 free_alien_cache(n->alien);
2572 kfree(n->shared); 2455 kfree(n);
2573 free_alien_cache(n->alien); 2456 cachep->node[i] = NULL;
2574 kfree(n);
2575 }
2576 } 2457 }
2577 return 0; 2458 return 0;
2578} 2459}
@@ -2751,7 +2632,7 @@ static int cache_grow(struct kmem_cache *cachep,
2751 2632
2752 /* Take the node list lock to change the colour_next on this node */ 2633 /* Take the node list lock to change the colour_next on this node */
2753 check_irq_off(); 2634 check_irq_off();
2754 n = cachep->node[nodeid]; 2635 n = get_node(cachep, nodeid);
2755 spin_lock(&n->list_lock); 2636 spin_lock(&n->list_lock);
2756 2637
2757 /* Get colour for the slab, and cal the next value. */ 2638 /* Get colour for the slab, and cal the next value. */
@@ -2920,7 +2801,7 @@ retry:
2920 */ 2801 */
2921 batchcount = BATCHREFILL_LIMIT; 2802 batchcount = BATCHREFILL_LIMIT;
2922 } 2803 }
2923 n = cachep->node[node]; 2804 n = get_node(cachep, node);
2924 2805
2925 BUG_ON(ac->avail > 0 || !n); 2806 BUG_ON(ac->avail > 0 || !n);
2926 spin_lock(&n->list_lock); 2807 spin_lock(&n->list_lock);
@@ -3060,7 +2941,7 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
3060 2941
3061static bool slab_should_failslab(struct kmem_cache *cachep, gfp_t flags) 2942static bool slab_should_failslab(struct kmem_cache *cachep, gfp_t flags)
3062{ 2943{
3063 if (cachep == kmem_cache) 2944 if (unlikely(cachep == kmem_cache))
3064 return false; 2945 return false;
3065 2946
3066 return should_failslab(cachep->object_size, flags, cachep->flags); 2947 return should_failslab(cachep->object_size, flags, cachep->flags);
@@ -3169,8 +3050,8 @@ retry:
3169 nid = zone_to_nid(zone); 3050 nid = zone_to_nid(zone);
3170 3051
3171 if (cpuset_zone_allowed_hardwall(zone, flags) && 3052 if (cpuset_zone_allowed_hardwall(zone, flags) &&
3172 cache->node[nid] && 3053 get_node(cache, nid) &&
3173 cache->node[nid]->free_objects) { 3054 get_node(cache, nid)->free_objects) {
3174 obj = ____cache_alloc_node(cache, 3055 obj = ____cache_alloc_node(cache,
3175 flags | GFP_THISNODE, nid); 3056 flags | GFP_THISNODE, nid);
3176 if (obj) 3057 if (obj)
@@ -3233,7 +3114,7 @@ static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
3233 int x; 3114 int x;
3234 3115
3235 VM_BUG_ON(nodeid > num_online_nodes()); 3116 VM_BUG_ON(nodeid > num_online_nodes());
3236 n = cachep->node[nodeid]; 3117 n = get_node(cachep, nodeid);
3237 BUG_ON(!n); 3118 BUG_ON(!n);
3238 3119
3239retry: 3120retry:
@@ -3304,7 +3185,7 @@ slab_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
3304 if (nodeid == NUMA_NO_NODE) 3185 if (nodeid == NUMA_NO_NODE)
3305 nodeid = slab_node; 3186 nodeid = slab_node;
3306 3187
3307 if (unlikely(!cachep->node[nodeid])) { 3188 if (unlikely(!get_node(cachep, nodeid))) {
3308 /* Node not bootstrapped yet */ 3189 /* Node not bootstrapped yet */
3309 ptr = fallback_alloc(cachep, flags); 3190 ptr = fallback_alloc(cachep, flags);
3310 goto out; 3191 goto out;
@@ -3405,12 +3286,13 @@ slab_alloc(struct kmem_cache *cachep, gfp_t flags, unsigned long caller)
3405 3286
3406/* 3287/*
3407 * Caller needs to acquire correct kmem_cache_node's list_lock 3288 * Caller needs to acquire correct kmem_cache_node's list_lock
3289 * @list: List of detached free slabs should be freed by caller
3408 */ 3290 */
3409static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects, 3291static void free_block(struct kmem_cache *cachep, void **objpp,
3410 int node) 3292 int nr_objects, int node, struct list_head *list)
3411{ 3293{
3412 int i; 3294 int i;
3413 struct kmem_cache_node *n; 3295 struct kmem_cache_node *n = get_node(cachep, node);
3414 3296
3415 for (i = 0; i < nr_objects; i++) { 3297 for (i = 0; i < nr_objects; i++) {
3416 void *objp; 3298 void *objp;
@@ -3420,7 +3302,6 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
3420 objp = objpp[i]; 3302 objp = objpp[i];
3421 3303
3422 page = virt_to_head_page(objp); 3304 page = virt_to_head_page(objp);
3423 n = cachep->node[node];
3424 list_del(&page->lru); 3305 list_del(&page->lru);
3425 check_spinlock_acquired_node(cachep, node); 3306 check_spinlock_acquired_node(cachep, node);
3426 slab_put_obj(cachep, page, objp, node); 3307 slab_put_obj(cachep, page, objp, node);
@@ -3431,13 +3312,7 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
3431 if (page->active == 0) { 3312 if (page->active == 0) {
3432 if (n->free_objects > n->free_limit) { 3313 if (n->free_objects > n->free_limit) {
3433 n->free_objects -= cachep->num; 3314 n->free_objects -= cachep->num;
3434 /* No need to drop any previously held 3315 list_add_tail(&page->lru, list);
3435 * lock here, even if we have a off-slab slab
3436 * descriptor it is guaranteed to come from
3437 * a different cache, refer to comments before
3438 * alloc_slabmgmt.
3439 */
3440 slab_destroy(cachep, page);
3441 } else { 3316 } else {
3442 list_add(&page->lru, &n->slabs_free); 3317 list_add(&page->lru, &n->slabs_free);
3443 } 3318 }
@@ -3456,13 +3331,14 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
3456 int batchcount; 3331 int batchcount;
3457 struct kmem_cache_node *n; 3332 struct kmem_cache_node *n;
3458 int node = numa_mem_id(); 3333 int node = numa_mem_id();
3334 LIST_HEAD(list);
3459 3335
3460 batchcount = ac->batchcount; 3336 batchcount = ac->batchcount;
3461#if DEBUG 3337#if DEBUG
3462 BUG_ON(!batchcount || batchcount > ac->avail); 3338 BUG_ON(!batchcount || batchcount > ac->avail);
3463#endif 3339#endif
3464 check_irq_off(); 3340 check_irq_off();
3465 n = cachep->node[node]; 3341 n = get_node(cachep, node);
3466 spin_lock(&n->list_lock); 3342 spin_lock(&n->list_lock);
3467 if (n->shared) { 3343 if (n->shared) {
3468 struct array_cache *shared_array = n->shared; 3344 struct array_cache *shared_array = n->shared;
@@ -3477,7 +3353,7 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
3477 } 3353 }
3478 } 3354 }
3479 3355
3480 free_block(cachep, ac->entry, batchcount, node); 3356 free_block(cachep, ac->entry, batchcount, node, &list);
3481free_done: 3357free_done:
3482#if STATS 3358#if STATS
3483 { 3359 {
@@ -3498,6 +3374,7 @@ free_done:
3498 } 3374 }
3499#endif 3375#endif
3500 spin_unlock(&n->list_lock); 3376 spin_unlock(&n->list_lock);
3377 slabs_destroy(cachep, &list);
3501 ac->avail -= batchcount; 3378 ac->avail -= batchcount;
3502 memmove(ac->entry, &(ac->entry[batchcount]), sizeof(void *)*ac->avail); 3379 memmove(ac->entry, &(ac->entry[batchcount]), sizeof(void *)*ac->avail);
3503} 3380}
@@ -3754,7 +3631,7 @@ static int alloc_kmem_cache_node(struct kmem_cache *cachep, gfp_t gfp)
3754 int node; 3631 int node;
3755 struct kmem_cache_node *n; 3632 struct kmem_cache_node *n;
3756 struct array_cache *new_shared; 3633 struct array_cache *new_shared;
3757 struct array_cache **new_alien = NULL; 3634 struct alien_cache **new_alien = NULL;
3758 3635
3759 for_each_online_node(node) { 3636 for_each_online_node(node) {
3760 3637
@@ -3775,15 +3652,16 @@ static int alloc_kmem_cache_node(struct kmem_cache *cachep, gfp_t gfp)
3775 } 3652 }
3776 } 3653 }
3777 3654
3778 n = cachep->node[node]; 3655 n = get_node(cachep, node);
3779 if (n) { 3656 if (n) {
3780 struct array_cache *shared = n->shared; 3657 struct array_cache *shared = n->shared;
3658 LIST_HEAD(list);
3781 3659
3782 spin_lock_irq(&n->list_lock); 3660 spin_lock_irq(&n->list_lock);
3783 3661
3784 if (shared) 3662 if (shared)
3785 free_block(cachep, shared->entry, 3663 free_block(cachep, shared->entry,
3786 shared->avail, node); 3664 shared->avail, node, &list);
3787 3665
3788 n->shared = new_shared; 3666 n->shared = new_shared;
3789 if (!n->alien) { 3667 if (!n->alien) {
@@ -3793,6 +3671,7 @@ static int alloc_kmem_cache_node(struct kmem_cache *cachep, gfp_t gfp)
3793 n->free_limit = (1 + nr_cpus_node(node)) * 3671 n->free_limit = (1 + nr_cpus_node(node)) *
3794 cachep->batchcount + cachep->num; 3672 cachep->batchcount + cachep->num;
3795 spin_unlock_irq(&n->list_lock); 3673 spin_unlock_irq(&n->list_lock);
3674 slabs_destroy(cachep, &list);
3796 kfree(shared); 3675 kfree(shared);
3797 free_alien_cache(new_alien); 3676 free_alien_cache(new_alien);
3798 continue; 3677 continue;
@@ -3820,9 +3699,8 @@ fail:
3820 /* Cache is not active yet. Roll back what we did */ 3699 /* Cache is not active yet. Roll back what we did */
3821 node--; 3700 node--;
3822 while (node >= 0) { 3701 while (node >= 0) {
3823 if (cachep->node[node]) { 3702 n = get_node(cachep, node);
3824 n = cachep->node[node]; 3703 if (n) {
3825
3826 kfree(n->shared); 3704 kfree(n->shared);
3827 free_alien_cache(n->alien); 3705 free_alien_cache(n->alien);
3828 kfree(n); 3706 kfree(n);
@@ -3883,12 +3761,20 @@ static int __do_tune_cpucache(struct kmem_cache *cachep, int limit,
3883 cachep->shared = shared; 3761 cachep->shared = shared;
3884 3762
3885 for_each_online_cpu(i) { 3763 for_each_online_cpu(i) {
3764 LIST_HEAD(list);
3886 struct array_cache *ccold = new->new[i]; 3765 struct array_cache *ccold = new->new[i];
3766 int node;
3767 struct kmem_cache_node *n;
3768
3887 if (!ccold) 3769 if (!ccold)
3888 continue; 3770 continue;
3889 spin_lock_irq(&cachep->node[cpu_to_mem(i)]->list_lock); 3771
3890 free_block(cachep, ccold->entry, ccold->avail, cpu_to_mem(i)); 3772 node = cpu_to_mem(i);
3891 spin_unlock_irq(&cachep->node[cpu_to_mem(i)]->list_lock); 3773 n = get_node(cachep, node);
3774 spin_lock_irq(&n->list_lock);
3775 free_block(cachep, ccold->entry, ccold->avail, node, &list);
3776 spin_unlock_irq(&n->list_lock);
3777 slabs_destroy(cachep, &list);
3892 kfree(ccold); 3778 kfree(ccold);
3893 } 3779 }
3894 kfree(new); 3780 kfree(new);
@@ -3996,6 +3882,7 @@ skip_setup:
3996static void drain_array(struct kmem_cache *cachep, struct kmem_cache_node *n, 3882static void drain_array(struct kmem_cache *cachep, struct kmem_cache_node *n,
3997 struct array_cache *ac, int force, int node) 3883 struct array_cache *ac, int force, int node)
3998{ 3884{
3885 LIST_HEAD(list);
3999 int tofree; 3886 int tofree;
4000 3887
4001 if (!ac || !ac->avail) 3888 if (!ac || !ac->avail)
@@ -4008,12 +3895,13 @@ static void drain_array(struct kmem_cache *cachep, struct kmem_cache_node *n,
4008 tofree = force ? ac->avail : (ac->limit + 4) / 5; 3895 tofree = force ? ac->avail : (ac->limit + 4) / 5;
4009 if (tofree > ac->avail) 3896 if (tofree > ac->avail)
4010 tofree = (ac->avail + 1) / 2; 3897 tofree = (ac->avail + 1) / 2;
4011 free_block(cachep, ac->entry, tofree, node); 3898 free_block(cachep, ac->entry, tofree, node, &list);
4012 ac->avail -= tofree; 3899 ac->avail -= tofree;
4013 memmove(ac->entry, &(ac->entry[tofree]), 3900 memmove(ac->entry, &(ac->entry[tofree]),
4014 sizeof(void *) * ac->avail); 3901 sizeof(void *) * ac->avail);
4015 } 3902 }
4016 spin_unlock_irq(&n->list_lock); 3903 spin_unlock_irq(&n->list_lock);
3904 slabs_destroy(cachep, &list);
4017 } 3905 }
4018} 3906}
4019 3907
@@ -4048,7 +3936,7 @@ static void cache_reap(struct work_struct *w)
4048 * have established with reasonable certainty that 3936 * have established with reasonable certainty that
4049 * we can do some work if the lock was obtained. 3937 * we can do some work if the lock was obtained.
4050 */ 3938 */
4051 n = searchp->node[node]; 3939 n = get_node(searchp, node);
4052 3940
4053 reap_alien(searchp, n); 3941 reap_alien(searchp, n);
4054 3942
@@ -4100,10 +3988,7 @@ void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo)
4100 3988
4101 active_objs = 0; 3989 active_objs = 0;
4102 num_slabs = 0; 3990 num_slabs = 0;
4103 for_each_online_node(node) { 3991 for_each_kmem_cache_node(cachep, node, n) {
4104 n = cachep->node[node];
4105 if (!n)
4106 continue;
4107 3992
4108 check_irq_on(); 3993 check_irq_on();
4109 spin_lock_irq(&n->list_lock); 3994 spin_lock_irq(&n->list_lock);
@@ -4328,10 +4213,7 @@ static int leaks_show(struct seq_file *m, void *p)
4328 4213
4329 x[1] = 0; 4214 x[1] = 0;
4330 4215
4331 for_each_online_node(node) { 4216 for_each_kmem_cache_node(cachep, node, n) {
4332 n = cachep->node[node];
4333 if (!n)
4334 continue;
4335 4217
4336 check_irq_on(); 4218 check_irq_on();
4337 spin_lock_irq(&n->list_lock); 4219 spin_lock_irq(&n->list_lock);
diff --git a/mm/slab.h b/mm/slab.h
index 961a3fb1f5a2..0e0fdd365840 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -256,13 +256,12 @@ static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x)
256 return cachep; 256 return cachep;
257 257
258 pr_err("%s: Wrong slab cache. %s but object is from %s\n", 258 pr_err("%s: Wrong slab cache. %s but object is from %s\n",
259 __FUNCTION__, cachep->name, s->name); 259 __func__, cachep->name, s->name);
260 WARN_ON_ONCE(1); 260 WARN_ON_ONCE(1);
261 return s; 261 return s;
262} 262}
263#endif
264
265 263
264#ifndef CONFIG_SLOB
266/* 265/*
267 * The slab lists for all objects. 266 * The slab lists for all objects.
268 */ 267 */
@@ -277,7 +276,7 @@ struct kmem_cache_node {
277 unsigned int free_limit; 276 unsigned int free_limit;
278 unsigned int colour_next; /* Per-node cache coloring */ 277 unsigned int colour_next; /* Per-node cache coloring */
279 struct array_cache *shared; /* shared per node */ 278 struct array_cache *shared; /* shared per node */
280 struct array_cache **alien; /* on other nodes */ 279 struct alien_cache **alien; /* on other nodes */
281 unsigned long next_reap; /* updated without locking */ 280 unsigned long next_reap; /* updated without locking */
282 int free_touched; /* updated without locking */ 281 int free_touched; /* updated without locking */
283#endif 282#endif
@@ -294,5 +293,22 @@ struct kmem_cache_node {
294 293
295}; 294};
296 295
296static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node)
297{
298 return s->node[node];
299}
300
301/*
302 * Iterator over all nodes. The body will be executed for each node that has
303 * a kmem_cache_node structure allocated (which is true for all online nodes)
304 */
305#define for_each_kmem_cache_node(__s, __node, __n) \
306 for (__node = 0; __n = get_node(__s, __node), __node < nr_node_ids; __node++) \
307 if (__n)
308
309#endif
310
297void *slab_next(struct seq_file *m, void *p, loff_t *pos); 311void *slab_next(struct seq_file *m, void *p, loff_t *pos);
298void slab_stop(struct seq_file *m, void *p); 312void slab_stop(struct seq_file *m, void *p);
313
314#endif /* MM_SLAB_H */
diff --git a/mm/slab_common.c b/mm/slab_common.c
index d31c4bacc6a2..d319502b2403 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -19,6 +19,8 @@
19#include <asm/tlbflush.h> 19#include <asm/tlbflush.h>
20#include <asm/page.h> 20#include <asm/page.h>
21#include <linux/memcontrol.h> 21#include <linux/memcontrol.h>
22
23#define CREATE_TRACE_POINTS
22#include <trace/events/kmem.h> 24#include <trace/events/kmem.h>
23 25
24#include "slab.h" 26#include "slab.h"
@@ -787,3 +789,102 @@ static int __init slab_proc_init(void)
787} 789}
788module_init(slab_proc_init); 790module_init(slab_proc_init);
789#endif /* CONFIG_SLABINFO */ 791#endif /* CONFIG_SLABINFO */
792
793static __always_inline void *__do_krealloc(const void *p, size_t new_size,
794 gfp_t flags)
795{
796 void *ret;
797 size_t ks = 0;
798
799 if (p)
800 ks = ksize(p);
801
802 if (ks >= new_size)
803 return (void *)p;
804
805 ret = kmalloc_track_caller(new_size, flags);
806 if (ret && p)
807 memcpy(ret, p, ks);
808
809 return ret;
810}
811
812/**
813 * __krealloc - like krealloc() but don't free @p.
814 * @p: object to reallocate memory for.
815 * @new_size: how many bytes of memory are required.
816 * @flags: the type of memory to allocate.
817 *
818 * This function is like krealloc() except it never frees the originally
819 * allocated buffer. Use this if you don't want to free the buffer immediately
820 * like, for example, with RCU.
821 */
822void *__krealloc(const void *p, size_t new_size, gfp_t flags)
823{
824 if (unlikely(!new_size))
825 return ZERO_SIZE_PTR;
826
827 return __do_krealloc(p, new_size, flags);
828
829}
830EXPORT_SYMBOL(__krealloc);
831
832/**
833 * krealloc - reallocate memory. The contents will remain unchanged.
834 * @p: object to reallocate memory for.
835 * @new_size: how many bytes of memory are required.
836 * @flags: the type of memory to allocate.
837 *
838 * The contents of the object pointed to are preserved up to the
839 * lesser of the new and old sizes. If @p is %NULL, krealloc()
840 * behaves exactly like kmalloc(). If @new_size is 0 and @p is not a
841 * %NULL pointer, the object pointed to is freed.
842 */
843void *krealloc(const void *p, size_t new_size, gfp_t flags)
844{
845 void *ret;
846
847 if (unlikely(!new_size)) {
848 kfree(p);
849 return ZERO_SIZE_PTR;
850 }
851
852 ret = __do_krealloc(p, new_size, flags);
853 if (ret && p != ret)
854 kfree(p);
855
856 return ret;
857}
858EXPORT_SYMBOL(krealloc);
859
860/**
861 * kzfree - like kfree but zero memory
862 * @p: object to free memory of
863 *
864 * The memory of the object @p points to is zeroed before freed.
865 * If @p is %NULL, kzfree() does nothing.
866 *
867 * Note: this function zeroes the whole allocated buffer which can be a good
868 * deal bigger than the requested buffer size passed to kmalloc(). So be
869 * careful when using this function in performance sensitive code.
870 */
871void kzfree(const void *p)
872{
873 size_t ks;
874 void *mem = (void *)p;
875
876 if (unlikely(ZERO_OR_NULL_PTR(mem)))
877 return;
878 ks = ksize(mem);
879 memset(mem, 0, ks);
880 kfree(mem);
881}
882EXPORT_SYMBOL(kzfree);
883
884/* Tracepoints definitions. */
885EXPORT_TRACEPOINT_SYMBOL(kmalloc);
886EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc);
887EXPORT_TRACEPOINT_SYMBOL(kmalloc_node);
888EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc_node);
889EXPORT_TRACEPOINT_SYMBOL(kfree);
890EXPORT_TRACEPOINT_SYMBOL(kmem_cache_free);
diff --git a/mm/slub.c b/mm/slub.c
index 73004808537e..3e8afcc07a76 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -233,11 +233,6 @@ static inline void stat(const struct kmem_cache *s, enum stat_item si)
233 * Core slab cache functions 233 * Core slab cache functions
234 *******************************************************************/ 234 *******************************************************************/
235 235
236static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node)
237{
238 return s->node[node];
239}
240
241/* Verify that a pointer has an address that is valid within a slab page */ 236/* Verify that a pointer has an address that is valid within a slab page */
242static inline int check_valid_pointer(struct kmem_cache *s, 237static inline int check_valid_pointer(struct kmem_cache *s,
243 struct page *page, const void *object) 238 struct page *page, const void *object)
@@ -288,6 +283,10 @@ static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp)
288 for (__p = (__addr); __p < (__addr) + (__objects) * (__s)->size;\ 283 for (__p = (__addr); __p < (__addr) + (__objects) * (__s)->size;\
289 __p += (__s)->size) 284 __p += (__s)->size)
290 285
286#define for_each_object_idx(__p, __idx, __s, __addr, __objects) \
287 for (__p = (__addr), __idx = 1; __idx <= __objects;\
288 __p += (__s)->size, __idx++)
289
291/* Determine object index from a given position */ 290/* Determine object index from a given position */
292static inline int slab_index(void *p, struct kmem_cache *s, void *addr) 291static inline int slab_index(void *p, struct kmem_cache *s, void *addr)
293{ 292{
@@ -382,9 +381,9 @@ static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page
382 defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE) 381 defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
383 if (s->flags & __CMPXCHG_DOUBLE) { 382 if (s->flags & __CMPXCHG_DOUBLE) {
384 if (cmpxchg_double(&page->freelist, &page->counters, 383 if (cmpxchg_double(&page->freelist, &page->counters,
385 freelist_old, counters_old, 384 freelist_old, counters_old,
386 freelist_new, counters_new)) 385 freelist_new, counters_new))
387 return 1; 386 return 1;
388 } else 387 } else
389#endif 388#endif
390 { 389 {
@@ -418,9 +417,9 @@ static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
418 defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE) 417 defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
419 if (s->flags & __CMPXCHG_DOUBLE) { 418 if (s->flags & __CMPXCHG_DOUBLE) {
420 if (cmpxchg_double(&page->freelist, &page->counters, 419 if (cmpxchg_double(&page->freelist, &page->counters,
421 freelist_old, counters_old, 420 freelist_old, counters_old,
422 freelist_new, counters_new)) 421 freelist_new, counters_new))
423 return 1; 422 return 1;
424 } else 423 } else
425#endif 424#endif
426 { 425 {
@@ -945,60 +944,6 @@ static void trace(struct kmem_cache *s, struct page *page, void *object,
945} 944}
946 945
947/* 946/*
948 * Hooks for other subsystems that check memory allocations. In a typical
949 * production configuration these hooks all should produce no code at all.
950 */
951static inline void kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags)
952{
953 kmemleak_alloc(ptr, size, 1, flags);
954}
955
956static inline void kfree_hook(const void *x)
957{
958 kmemleak_free(x);
959}
960
961static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
962{
963 flags &= gfp_allowed_mask;
964 lockdep_trace_alloc(flags);
965 might_sleep_if(flags & __GFP_WAIT);
966
967 return should_failslab(s->object_size, flags, s->flags);
968}
969
970static inline void slab_post_alloc_hook(struct kmem_cache *s,
971 gfp_t flags, void *object)
972{
973 flags &= gfp_allowed_mask;
974 kmemcheck_slab_alloc(s, flags, object, slab_ksize(s));
975 kmemleak_alloc_recursive(object, s->object_size, 1, s->flags, flags);
976}
977
978static inline void slab_free_hook(struct kmem_cache *s, void *x)
979{
980 kmemleak_free_recursive(x, s->flags);
981
982 /*
983 * Trouble is that we may no longer disable interrupts in the fast path
984 * So in order to make the debug calls that expect irqs to be
985 * disabled we need to disable interrupts temporarily.
986 */
987#if defined(CONFIG_KMEMCHECK) || defined(CONFIG_LOCKDEP)
988 {
989 unsigned long flags;
990
991 local_irq_save(flags);
992 kmemcheck_slab_free(s, x, s->object_size);
993 debug_check_no_locks_freed(x, s->object_size);
994 local_irq_restore(flags);
995 }
996#endif
997 if (!(s->flags & SLAB_DEBUG_OBJECTS))
998 debug_check_no_obj_freed(x, s->object_size);
999}
1000
1001/*
1002 * Tracking of fully allocated slabs for debugging purposes. 947 * Tracking of fully allocated slabs for debugging purposes.
1003 */ 948 */
1004static void add_full(struct kmem_cache *s, 949static void add_full(struct kmem_cache *s,
@@ -1282,6 +1227,12 @@ static inline void inc_slabs_node(struct kmem_cache *s, int node,
1282static inline void dec_slabs_node(struct kmem_cache *s, int node, 1227static inline void dec_slabs_node(struct kmem_cache *s, int node,
1283 int objects) {} 1228 int objects) {}
1284 1229
1230#endif /* CONFIG_SLUB_DEBUG */
1231
1232/*
1233 * Hooks for other subsystems that check memory allocations. In a typical
1234 * production configuration these hooks all should produce no code at all.
1235 */
1285static inline void kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags) 1236static inline void kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags)
1286{ 1237{
1287 kmemleak_alloc(ptr, size, 1, flags); 1238 kmemleak_alloc(ptr, size, 1, flags);
@@ -1293,21 +1244,44 @@ static inline void kfree_hook(const void *x)
1293} 1244}
1294 1245
1295static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags) 1246static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
1296 { return 0; } 1247{
1248 flags &= gfp_allowed_mask;
1249 lockdep_trace_alloc(flags);
1250 might_sleep_if(flags & __GFP_WAIT);
1251
1252 return should_failslab(s->object_size, flags, s->flags);
1253}
1297 1254
1298static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, 1255static inline void slab_post_alloc_hook(struct kmem_cache *s,
1299 void *object) 1256 gfp_t flags, void *object)
1300{ 1257{
1301 kmemleak_alloc_recursive(object, s->object_size, 1, s->flags, 1258 flags &= gfp_allowed_mask;
1302 flags & gfp_allowed_mask); 1259 kmemcheck_slab_alloc(s, flags, object, slab_ksize(s));
1260 kmemleak_alloc_recursive(object, s->object_size, 1, s->flags, flags);
1303} 1261}
1304 1262
1305static inline void slab_free_hook(struct kmem_cache *s, void *x) 1263static inline void slab_free_hook(struct kmem_cache *s, void *x)
1306{ 1264{
1307 kmemleak_free_recursive(x, s->flags); 1265 kmemleak_free_recursive(x, s->flags);
1308}
1309 1266
1310#endif /* CONFIG_SLUB_DEBUG */ 1267 /*
1268 * Trouble is that we may no longer disable interrupts in the fast path
1269 * So in order to make the debug calls that expect irqs to be
1270 * disabled we need to disable interrupts temporarily.
1271 */
1272#if defined(CONFIG_KMEMCHECK) || defined(CONFIG_LOCKDEP)
1273 {
1274 unsigned long flags;
1275
1276 local_irq_save(flags);
1277 kmemcheck_slab_free(s, x, s->object_size);
1278 debug_check_no_locks_freed(x, s->object_size);
1279 local_irq_restore(flags);
1280 }
1281#endif
1282 if (!(s->flags & SLAB_DEBUG_OBJECTS))
1283 debug_check_no_obj_freed(x, s->object_size);
1284}
1311 1285
1312/* 1286/*
1313 * Slab allocation and freeing 1287 * Slab allocation and freeing
@@ -1409,9 +1383,9 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
1409{ 1383{
1410 struct page *page; 1384 struct page *page;
1411 void *start; 1385 void *start;
1412 void *last;
1413 void *p; 1386 void *p;
1414 int order; 1387 int order;
1388 int idx;
1415 1389
1416 BUG_ON(flags & GFP_SLAB_BUG_MASK); 1390 BUG_ON(flags & GFP_SLAB_BUG_MASK);
1417 1391
@@ -1432,14 +1406,13 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
1432 if (unlikely(s->flags & SLAB_POISON)) 1406 if (unlikely(s->flags & SLAB_POISON))
1433 memset(start, POISON_INUSE, PAGE_SIZE << order); 1407 memset(start, POISON_INUSE, PAGE_SIZE << order);
1434 1408
1435 last = start; 1409 for_each_object_idx(p, idx, s, start, page->objects) {
1436 for_each_object(p, s, start, page->objects) { 1410 setup_object(s, page, p);
1437 setup_object(s, page, last); 1411 if (likely(idx < page->objects))
1438 set_freepointer(s, last, p); 1412 set_freepointer(s, p, p + s->size);
1439 last = p; 1413 else
1414 set_freepointer(s, p, NULL);
1440 } 1415 }
1441 setup_object(s, page, last);
1442 set_freepointer(s, last, NULL);
1443 1416
1444 page->freelist = start; 1417 page->freelist = start;
1445 page->inuse = page->objects; 1418 page->inuse = page->objects;
@@ -2162,6 +2135,7 @@ slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
2162 static DEFINE_RATELIMIT_STATE(slub_oom_rs, DEFAULT_RATELIMIT_INTERVAL, 2135 static DEFINE_RATELIMIT_STATE(slub_oom_rs, DEFAULT_RATELIMIT_INTERVAL,
2163 DEFAULT_RATELIMIT_BURST); 2136 DEFAULT_RATELIMIT_BURST);
2164 int node; 2137 int node;
2138 struct kmem_cache_node *n;
2165 2139
2166 if ((gfpflags & __GFP_NOWARN) || !__ratelimit(&slub_oom_rs)) 2140 if ((gfpflags & __GFP_NOWARN) || !__ratelimit(&slub_oom_rs))
2167 return; 2141 return;
@@ -2176,15 +2150,11 @@ slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
2176 pr_warn(" %s debugging increased min order, use slub_debug=O to disable.\n", 2150 pr_warn(" %s debugging increased min order, use slub_debug=O to disable.\n",
2177 s->name); 2151 s->name);
2178 2152
2179 for_each_online_node(node) { 2153 for_each_kmem_cache_node(s, node, n) {
2180 struct kmem_cache_node *n = get_node(s, node);
2181 unsigned long nr_slabs; 2154 unsigned long nr_slabs;
2182 unsigned long nr_objs; 2155 unsigned long nr_objs;
2183 unsigned long nr_free; 2156 unsigned long nr_free;
2184 2157
2185 if (!n)
2186 continue;
2187
2188 nr_free = count_partial(n, count_free); 2158 nr_free = count_partial(n, count_free);
2189 nr_slabs = node_nr_slabs(n); 2159 nr_slabs = node_nr_slabs(n);
2190 nr_objs = node_nr_objs(n); 2160 nr_objs = node_nr_objs(n);
@@ -2928,13 +2898,10 @@ static void early_kmem_cache_node_alloc(int node)
2928static void free_kmem_cache_nodes(struct kmem_cache *s) 2898static void free_kmem_cache_nodes(struct kmem_cache *s)
2929{ 2899{
2930 int node; 2900 int node;
2901 struct kmem_cache_node *n;
2931 2902
2932 for_each_node_state(node, N_NORMAL_MEMORY) { 2903 for_each_kmem_cache_node(s, node, n) {
2933 struct kmem_cache_node *n = s->node[node]; 2904 kmem_cache_free(kmem_cache_node, n);
2934
2935 if (n)
2936 kmem_cache_free(kmem_cache_node, n);
2937
2938 s->node[node] = NULL; 2905 s->node[node] = NULL;
2939 } 2906 }
2940} 2907}
@@ -3222,12 +3189,11 @@ static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
3222static inline int kmem_cache_close(struct kmem_cache *s) 3189static inline int kmem_cache_close(struct kmem_cache *s)
3223{ 3190{
3224 int node; 3191 int node;
3192 struct kmem_cache_node *n;
3225 3193
3226 flush_all(s); 3194 flush_all(s);
3227 /* Attempt to free all objects */ 3195 /* Attempt to free all objects */
3228 for_each_node_state(node, N_NORMAL_MEMORY) { 3196 for_each_kmem_cache_node(s, node, n) {
3229 struct kmem_cache_node *n = get_node(s, node);
3230
3231 free_partial(s, n); 3197 free_partial(s, n);
3232 if (n->nr_partial || slabs_node(s, node)) 3198 if (n->nr_partial || slabs_node(s, node))
3233 return 1; 3199 return 1;
@@ -3412,9 +3378,7 @@ int __kmem_cache_shrink(struct kmem_cache *s)
3412 return -ENOMEM; 3378 return -ENOMEM;
3413 3379
3414 flush_all(s); 3380 flush_all(s);
3415 for_each_node_state(node, N_NORMAL_MEMORY) { 3381 for_each_kmem_cache_node(s, node, n) {
3416 n = get_node(s, node);
3417
3418 if (!n->nr_partial) 3382 if (!n->nr_partial)
3419 continue; 3383 continue;
3420 3384
@@ -3586,6 +3550,7 @@ static struct kmem_cache * __init bootstrap(struct kmem_cache *static_cache)
3586{ 3550{
3587 int node; 3551 int node;
3588 struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT); 3552 struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
3553 struct kmem_cache_node *n;
3589 3554
3590 memcpy(s, static_cache, kmem_cache->object_size); 3555 memcpy(s, static_cache, kmem_cache->object_size);
3591 3556
@@ -3595,19 +3560,16 @@ static struct kmem_cache * __init bootstrap(struct kmem_cache *static_cache)
3595 * IPIs around. 3560 * IPIs around.
3596 */ 3561 */
3597 __flush_cpu_slab(s, smp_processor_id()); 3562 __flush_cpu_slab(s, smp_processor_id());
3598 for_each_node_state(node, N_NORMAL_MEMORY) { 3563 for_each_kmem_cache_node(s, node, n) {
3599 struct kmem_cache_node *n = get_node(s, node);
3600 struct page *p; 3564 struct page *p;
3601 3565
3602 if (n) { 3566 list_for_each_entry(p, &n->partial, lru)
3603 list_for_each_entry(p, &n->partial, lru) 3567 p->slab_cache = s;
3604 p->slab_cache = s;
3605 3568
3606#ifdef CONFIG_SLUB_DEBUG 3569#ifdef CONFIG_SLUB_DEBUG
3607 list_for_each_entry(p, &n->full, lru) 3570 list_for_each_entry(p, &n->full, lru)
3608 p->slab_cache = s; 3571 p->slab_cache = s;
3609#endif 3572#endif
3610 }
3611 } 3573 }
3612 list_add(&s->list, &slab_caches); 3574 list_add(&s->list, &slab_caches);
3613 return s; 3575 return s;
@@ -3960,16 +3922,14 @@ static long validate_slab_cache(struct kmem_cache *s)
3960 unsigned long count = 0; 3922 unsigned long count = 0;
3961 unsigned long *map = kmalloc(BITS_TO_LONGS(oo_objects(s->max)) * 3923 unsigned long *map = kmalloc(BITS_TO_LONGS(oo_objects(s->max)) *
3962 sizeof(unsigned long), GFP_KERNEL); 3924 sizeof(unsigned long), GFP_KERNEL);
3925 struct kmem_cache_node *n;
3963 3926
3964 if (!map) 3927 if (!map)
3965 return -ENOMEM; 3928 return -ENOMEM;
3966 3929
3967 flush_all(s); 3930 flush_all(s);
3968 for_each_node_state(node, N_NORMAL_MEMORY) { 3931 for_each_kmem_cache_node(s, node, n)
3969 struct kmem_cache_node *n = get_node(s, node);
3970
3971 count += validate_slab_node(s, n, map); 3932 count += validate_slab_node(s, n, map);
3972 }
3973 kfree(map); 3933 kfree(map);
3974 return count; 3934 return count;
3975} 3935}
@@ -4123,6 +4083,7 @@ static int list_locations(struct kmem_cache *s, char *buf,
4123 int node; 4083 int node;
4124 unsigned long *map = kmalloc(BITS_TO_LONGS(oo_objects(s->max)) * 4084 unsigned long *map = kmalloc(BITS_TO_LONGS(oo_objects(s->max)) *
4125 sizeof(unsigned long), GFP_KERNEL); 4085 sizeof(unsigned long), GFP_KERNEL);
4086 struct kmem_cache_node *n;
4126 4087
4127 if (!map || !alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location), 4088 if (!map || !alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location),
4128 GFP_TEMPORARY)) { 4089 GFP_TEMPORARY)) {
@@ -4132,8 +4093,7 @@ static int list_locations(struct kmem_cache *s, char *buf,
4132 /* Push back cpu slabs */ 4093 /* Push back cpu slabs */
4133 flush_all(s); 4094 flush_all(s);
4134 4095
4135 for_each_node_state(node, N_NORMAL_MEMORY) { 4096 for_each_kmem_cache_node(s, node, n) {
4136 struct kmem_cache_node *n = get_node(s, node);
4137 unsigned long flags; 4097 unsigned long flags;
4138 struct page *page; 4098 struct page *page;
4139 4099
@@ -4205,7 +4165,7 @@ static int list_locations(struct kmem_cache *s, char *buf,
4205#endif 4165#endif
4206 4166
4207#ifdef SLUB_RESILIENCY_TEST 4167#ifdef SLUB_RESILIENCY_TEST
4208static void resiliency_test(void) 4168static void __init resiliency_test(void)
4209{ 4169{
4210 u8 *p; 4170 u8 *p;
4211 4171
@@ -4332,8 +4292,9 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
4332 get_online_mems(); 4292 get_online_mems();
4333#ifdef CONFIG_SLUB_DEBUG 4293#ifdef CONFIG_SLUB_DEBUG
4334 if (flags & SO_ALL) { 4294 if (flags & SO_ALL) {
4335 for_each_node_state(node, N_NORMAL_MEMORY) { 4295 struct kmem_cache_node *n;
4336 struct kmem_cache_node *n = get_node(s, node); 4296
4297 for_each_kmem_cache_node(s, node, n) {
4337 4298
4338 if (flags & SO_TOTAL) 4299 if (flags & SO_TOTAL)
4339 x = atomic_long_read(&n->total_objects); 4300 x = atomic_long_read(&n->total_objects);
@@ -4349,9 +4310,9 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
4349 } else 4310 } else
4350#endif 4311#endif
4351 if (flags & SO_PARTIAL) { 4312 if (flags & SO_PARTIAL) {
4352 for_each_node_state(node, N_NORMAL_MEMORY) { 4313 struct kmem_cache_node *n;
4353 struct kmem_cache_node *n = get_node(s, node);
4354 4314
4315 for_each_kmem_cache_node(s, node, n) {
4355 if (flags & SO_TOTAL) 4316 if (flags & SO_TOTAL)
4356 x = count_partial(n, count_total); 4317 x = count_partial(n, count_total);
4357 else if (flags & SO_OBJECTS) 4318 else if (flags & SO_OBJECTS)
@@ -4364,7 +4325,7 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
4364 } 4325 }
4365 x = sprintf(buf, "%lu", total); 4326 x = sprintf(buf, "%lu", total);
4366#ifdef CONFIG_NUMA 4327#ifdef CONFIG_NUMA
4367 for_each_node_state(node, N_NORMAL_MEMORY) 4328 for (node = 0; node < nr_node_ids; node++)
4368 if (nodes[node]) 4329 if (nodes[node])
4369 x += sprintf(buf + x, " N%d=%lu", 4330 x += sprintf(buf + x, " N%d=%lu",
4370 node, nodes[node]); 4331 node, nodes[node]);
@@ -4378,16 +4339,12 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
4378static int any_slab_objects(struct kmem_cache *s) 4339static int any_slab_objects(struct kmem_cache *s)
4379{ 4340{
4380 int node; 4341 int node;
4342 struct kmem_cache_node *n;
4381 4343
4382 for_each_online_node(node) { 4344 for_each_kmem_cache_node(s, node, n)
4383 struct kmem_cache_node *n = get_node(s, node);
4384
4385 if (!n)
4386 continue;
4387
4388 if (atomic_long_read(&n->total_objects)) 4345 if (atomic_long_read(&n->total_objects))
4389 return 1; 4346 return 1;
4390 } 4347
4391 return 0; 4348 return 0;
4392} 4349}
4393#endif 4350#endif
@@ -4509,7 +4466,7 @@ SLAB_ATTR_RO(ctor);
4509 4466
4510static ssize_t aliases_show(struct kmem_cache *s, char *buf) 4467static ssize_t aliases_show(struct kmem_cache *s, char *buf)
4511{ 4468{
4512 return sprintf(buf, "%d\n", s->refcount - 1); 4469 return sprintf(buf, "%d\n", s->refcount < 0 ? 0 : s->refcount - 1);
4513} 4470}
4514SLAB_ATTR_RO(aliases); 4471SLAB_ATTR_RO(aliases);
4515 4472
@@ -5171,12 +5128,6 @@ static char *create_unique_id(struct kmem_cache *s)
5171 *p++ = '-'; 5128 *p++ = '-';
5172 p += sprintf(p, "%07d", s->size); 5129 p += sprintf(p, "%07d", s->size);
5173 5130
5174#ifdef CONFIG_MEMCG_KMEM
5175 if (!is_root_cache(s))
5176 p += sprintf(p, "-%08d",
5177 memcg_cache_id(s->memcg_params->memcg));
5178#endif
5179
5180 BUG_ON(p > name + ID_STR_LENGTH - 1); 5131 BUG_ON(p > name + ID_STR_LENGTH - 1);
5181 return name; 5132 return name;
5182} 5133}
@@ -5342,13 +5293,9 @@ void get_slabinfo(struct kmem_cache *s, struct slabinfo *sinfo)
5342 unsigned long nr_objs = 0; 5293 unsigned long nr_objs = 0;
5343 unsigned long nr_free = 0; 5294 unsigned long nr_free = 0;
5344 int node; 5295 int node;
5296 struct kmem_cache_node *n;
5345 5297
5346 for_each_online_node(node) { 5298 for_each_kmem_cache_node(s, node, n) {
5347 struct kmem_cache_node *n = get_node(s, node);
5348
5349 if (!n)
5350 continue;
5351
5352 nr_slabs += node_nr_slabs(n); 5299 nr_slabs += node_nr_slabs(n);
5353 nr_objs += node_nr_objs(n); 5300 nr_objs += node_nr_objs(n);
5354 nr_free += count_partial(n, count_free); 5301 nr_free += count_partial(n, count_free);
diff --git a/mm/swap.c b/mm/swap.c
index 9e8e3472248b..c789d01c9ec3 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -501,7 +501,7 @@ static void __activate_page(struct page *page, struct lruvec *lruvec,
501 SetPageActive(page); 501 SetPageActive(page);
502 lru += LRU_ACTIVE; 502 lru += LRU_ACTIVE;
503 add_page_to_lru_list(page, lruvec, lru); 503 add_page_to_lru_list(page, lruvec, lru);
504 trace_mm_lru_activate(page, page_to_pfn(page)); 504 trace_mm_lru_activate(page);
505 505
506 __count_vm_event(PGACTIVATE); 506 __count_vm_event(PGACTIVATE);
507 update_page_reclaim_stat(lruvec, file, 1); 507 update_page_reclaim_stat(lruvec, file, 1);
@@ -589,6 +589,9 @@ static void __lru_cache_activate_page(struct page *page)
589 * inactive,unreferenced -> inactive,referenced 589 * inactive,unreferenced -> inactive,referenced
590 * inactive,referenced -> active,unreferenced 590 * inactive,referenced -> active,unreferenced
591 * active,unreferenced -> active,referenced 591 * active,unreferenced -> active,referenced
592 *
593 * When a newly allocated page is not yet visible, so safe for non-atomic ops,
594 * __SetPageReferenced(page) may be substituted for mark_page_accessed(page).
592 */ 595 */
593void mark_page_accessed(struct page *page) 596void mark_page_accessed(struct page *page)
594{ 597{
@@ -614,17 +617,6 @@ void mark_page_accessed(struct page *page)
614} 617}
615EXPORT_SYMBOL(mark_page_accessed); 618EXPORT_SYMBOL(mark_page_accessed);
616 619
617/*
618 * Used to mark_page_accessed(page) that is not visible yet and when it is
619 * still safe to use non-atomic ops
620 */
621void init_page_accessed(struct page *page)
622{
623 if (!PageReferenced(page))
624 __SetPageReferenced(page);
625}
626EXPORT_SYMBOL(init_page_accessed);
627
628static void __lru_cache_add(struct page *page) 620static void __lru_cache_add(struct page *page)
629{ 621{
630 struct pagevec *pvec = &get_cpu_var(lru_add_pvec); 622 struct pagevec *pvec = &get_cpu_var(lru_add_pvec);
@@ -996,7 +988,7 @@ static void __pagevec_lru_add_fn(struct page *page, struct lruvec *lruvec,
996 SetPageLRU(page); 988 SetPageLRU(page);
997 add_page_to_lru_list(page, lruvec, lru); 989 add_page_to_lru_list(page, lruvec, lru);
998 update_page_reclaim_stat(lruvec, file, active); 990 update_page_reclaim_stat(lruvec, file, active);
999 trace_mm_lru_insertion(page, page_to_pfn(page), lru, trace_pagemap_flags(page)); 991 trace_mm_lru_insertion(page, lru);
1000} 992}
1001 993
1002/* 994/*
diff --git a/mm/util.c b/mm/util.c
index d5ea733c5082..7b6608df2ee8 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -16,9 +16,6 @@
16 16
17#include "internal.h" 17#include "internal.h"
18 18
19#define CREATE_TRACE_POINTS
20#include <trace/events/kmem.h>
21
22/** 19/**
23 * kstrdup - allocate space for and copy an existing string 20 * kstrdup - allocate space for and copy an existing string
24 * @s: the string to duplicate 21 * @s: the string to duplicate
@@ -112,97 +109,6 @@ void *memdup_user(const void __user *src, size_t len)
112} 109}
113EXPORT_SYMBOL(memdup_user); 110EXPORT_SYMBOL(memdup_user);
114 111
115static __always_inline void *__do_krealloc(const void *p, size_t new_size,
116 gfp_t flags)
117{
118 void *ret;
119 size_t ks = 0;
120
121 if (p)
122 ks = ksize(p);
123
124 if (ks >= new_size)
125 return (void *)p;
126
127 ret = kmalloc_track_caller(new_size, flags);
128 if (ret && p)
129 memcpy(ret, p, ks);
130
131 return ret;
132}
133
134/**
135 * __krealloc - like krealloc() but don't free @p.
136 * @p: object to reallocate memory for.
137 * @new_size: how many bytes of memory are required.
138 * @flags: the type of memory to allocate.
139 *
140 * This function is like krealloc() except it never frees the originally
141 * allocated buffer. Use this if you don't want to free the buffer immediately
142 * like, for example, with RCU.
143 */
144void *__krealloc(const void *p, size_t new_size, gfp_t flags)
145{
146 if (unlikely(!new_size))
147 return ZERO_SIZE_PTR;
148
149 return __do_krealloc(p, new_size, flags);
150
151}
152EXPORT_SYMBOL(__krealloc);
153
154/**
155 * krealloc - reallocate memory. The contents will remain unchanged.
156 * @p: object to reallocate memory for.
157 * @new_size: how many bytes of memory are required.
158 * @flags: the type of memory to allocate.
159 *
160 * The contents of the object pointed to are preserved up to the
161 * lesser of the new and old sizes. If @p is %NULL, krealloc()
162 * behaves exactly like kmalloc(). If @new_size is 0 and @p is not a
163 * %NULL pointer, the object pointed to is freed.
164 */
165void *krealloc(const void *p, size_t new_size, gfp_t flags)
166{
167 void *ret;
168
169 if (unlikely(!new_size)) {
170 kfree(p);
171 return ZERO_SIZE_PTR;
172 }
173
174 ret = __do_krealloc(p, new_size, flags);
175 if (ret && p != ret)
176 kfree(p);
177
178 return ret;
179}
180EXPORT_SYMBOL(krealloc);
181
182/**
183 * kzfree - like kfree but zero memory
184 * @p: object to free memory of
185 *
186 * The memory of the object @p points to is zeroed before freed.
187 * If @p is %NULL, kzfree() does nothing.
188 *
189 * Note: this function zeroes the whole allocated buffer which can be a good
190 * deal bigger than the requested buffer size passed to kmalloc(). So be
191 * careful when using this function in performance sensitive code.
192 */
193void kzfree(const void *p)
194{
195 size_t ks;
196 void *mem = (void *)p;
197
198 if (unlikely(ZERO_OR_NULL_PTR(mem)))
199 return;
200 ks = ksize(mem);
201 memset(mem, 0, ks);
202 kfree(mem);
203}
204EXPORT_SYMBOL(kzfree);
205
206/* 112/*
207 * strndup_user - duplicate an existing string from user space 113 * strndup_user - duplicate an existing string from user space
208 * @s: The string to duplicate 114 * @s: The string to duplicate
@@ -504,11 +410,3 @@ out_mm:
504out: 410out:
505 return res; 411 return res;
506} 412}
507
508/* Tracepoints definitions. */
509EXPORT_TRACEPOINT_SYMBOL(kmalloc);
510EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc);
511EXPORT_TRACEPOINT_SYMBOL(kmalloc_node);
512EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc_node);
513EXPORT_TRACEPOINT_SYMBOL(kfree);
514EXPORT_TRACEPOINT_SYMBOL(kmem_cache_free);
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index f64632b67196..2b0aa5486092 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1270,19 +1270,15 @@ void unmap_kernel_range(unsigned long addr, unsigned long size)
1270} 1270}
1271EXPORT_SYMBOL_GPL(unmap_kernel_range); 1271EXPORT_SYMBOL_GPL(unmap_kernel_range);
1272 1272
1273int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages) 1273int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page **pages)
1274{ 1274{
1275 unsigned long addr = (unsigned long)area->addr; 1275 unsigned long addr = (unsigned long)area->addr;
1276 unsigned long end = addr + get_vm_area_size(area); 1276 unsigned long end = addr + get_vm_area_size(area);
1277 int err; 1277 int err;
1278 1278
1279 err = vmap_page_range(addr, end, prot, *pages); 1279 err = vmap_page_range(addr, end, prot, pages);
1280 if (err > 0) {
1281 *pages += err;
1282 err = 0;
1283 }
1284 1280
1285 return err; 1281 return err > 0 ? 0 : err;
1286} 1282}
1287EXPORT_SYMBOL_GPL(map_vm_area); 1283EXPORT_SYMBOL_GPL(map_vm_area);
1288 1284
@@ -1548,7 +1544,7 @@ void *vmap(struct page **pages, unsigned int count,
1548 if (!area) 1544 if (!area)
1549 return NULL; 1545 return NULL;
1550 1546
1551 if (map_vm_area(area, prot, &pages)) { 1547 if (map_vm_area(area, prot, pages)) {
1552 vunmap(area->addr); 1548 vunmap(area->addr);
1553 return NULL; 1549 return NULL;
1554 } 1550 }
@@ -1566,7 +1562,8 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
1566 const int order = 0; 1562 const int order = 0;
1567 struct page **pages; 1563 struct page **pages;
1568 unsigned int nr_pages, array_size, i; 1564 unsigned int nr_pages, array_size, i;
1569 gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO; 1565 const gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO;
1566 const gfp_t alloc_mask = gfp_mask | __GFP_NOWARN;
1570 1567
1571 nr_pages = get_vm_area_size(area) >> PAGE_SHIFT; 1568 nr_pages = get_vm_area_size(area) >> PAGE_SHIFT;
1572 array_size = (nr_pages * sizeof(struct page *)); 1569 array_size = (nr_pages * sizeof(struct page *));
@@ -1589,12 +1586,11 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
1589 1586
1590 for (i = 0; i < area->nr_pages; i++) { 1587 for (i = 0; i < area->nr_pages; i++) {
1591 struct page *page; 1588 struct page *page;
1592 gfp_t tmp_mask = gfp_mask | __GFP_NOWARN;
1593 1589
1594 if (node == NUMA_NO_NODE) 1590 if (node == NUMA_NO_NODE)
1595 page = alloc_page(tmp_mask); 1591 page = alloc_page(alloc_mask);
1596 else 1592 else
1597 page = alloc_pages_node(node, tmp_mask, order); 1593 page = alloc_pages_node(node, alloc_mask, order);
1598 1594
1599 if (unlikely(!page)) { 1595 if (unlikely(!page)) {
1600 /* Successfully allocated i pages, free them in __vunmap() */ 1596 /* Successfully allocated i pages, free them in __vunmap() */
@@ -1602,9 +1598,11 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
1602 goto fail; 1598 goto fail;
1603 } 1599 }
1604 area->pages[i] = page; 1600 area->pages[i] = page;
1601 if (gfp_mask & __GFP_WAIT)
1602 cond_resched();
1605 } 1603 }
1606 1604
1607 if (map_vm_area(area, prot, &pages)) 1605 if (map_vm_area(area, prot, pages))
1608 goto fail; 1606 goto fail;
1609 return area->addr; 1607 return area->addr;
1610 1608
@@ -2690,14 +2688,14 @@ void get_vmalloc_info(struct vmalloc_info *vmi)
2690 2688
2691 prev_end = VMALLOC_START; 2689 prev_end = VMALLOC_START;
2692 2690
2693 spin_lock(&vmap_area_lock); 2691 rcu_read_lock();
2694 2692
2695 if (list_empty(&vmap_area_list)) { 2693 if (list_empty(&vmap_area_list)) {
2696 vmi->largest_chunk = VMALLOC_TOTAL; 2694 vmi->largest_chunk = VMALLOC_TOTAL;
2697 goto out; 2695 goto out;
2698 } 2696 }
2699 2697
2700 list_for_each_entry(va, &vmap_area_list, list) { 2698 list_for_each_entry_rcu(va, &vmap_area_list, list) {
2701 unsigned long addr = va->va_start; 2699 unsigned long addr = va->va_start;
2702 2700
2703 /* 2701 /*
@@ -2724,7 +2722,7 @@ void get_vmalloc_info(struct vmalloc_info *vmi)
2724 vmi->largest_chunk = VMALLOC_END - prev_end; 2722 vmi->largest_chunk = VMALLOC_END - prev_end;
2725 2723
2726out: 2724out:
2727 spin_unlock(&vmap_area_lock); 2725 rcu_read_unlock();
2728} 2726}
2729#endif 2727#endif
2730 2728
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 0f16ffe8eb67..d2f65c856350 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -59,35 +59,20 @@
59#include <trace/events/vmscan.h> 59#include <trace/events/vmscan.h>
60 60
61struct scan_control { 61struct scan_control {
62 /* Incremented by the number of inactive pages that were scanned */
63 unsigned long nr_scanned;
64
65 /* Number of pages freed so far during a call to shrink_zones() */
66 unsigned long nr_reclaimed;
67
68 /* How many pages shrink_list() should reclaim */ 62 /* How many pages shrink_list() should reclaim */
69 unsigned long nr_to_reclaim; 63 unsigned long nr_to_reclaim;
70 64
71 unsigned long hibernation_mode;
72
73 /* This context's GFP mask */ 65 /* This context's GFP mask */
74 gfp_t gfp_mask; 66 gfp_t gfp_mask;
75 67
76 int may_writepage; 68 /* Allocation order */
77
78 /* Can mapped pages be reclaimed? */
79 int may_unmap;
80
81 /* Can pages be swapped as part of reclaim? */
82 int may_swap;
83
84 int order; 69 int order;
85 70
86 /* Scan (total_size >> priority) pages at once */ 71 /*
87 int priority; 72 * Nodemask of nodes allowed by the caller. If NULL, all nodes
88 73 * are scanned.
89 /* anon vs. file LRUs scanning "ratio" */ 74 */
90 int swappiness; 75 nodemask_t *nodemask;
91 76
92 /* 77 /*
93 * The memory cgroup that hit its limit and as a result is the 78 * The memory cgroup that hit its limit and as a result is the
@@ -95,11 +80,27 @@ struct scan_control {
95 */ 80 */
96 struct mem_cgroup *target_mem_cgroup; 81 struct mem_cgroup *target_mem_cgroup;
97 82
98 /* 83 /* Scan (total_size >> priority) pages at once */
99 * Nodemask of nodes allowed by the caller. If NULL, all nodes 84 int priority;
100 * are scanned. 85
101 */ 86 unsigned int may_writepage:1;
102 nodemask_t *nodemask; 87
88 /* Can mapped pages be reclaimed? */
89 unsigned int may_unmap:1;
90
91 /* Can pages be swapped as part of reclaim? */
92 unsigned int may_swap:1;
93
94 unsigned int hibernation_mode:1;
95
96 /* One of the zones is ready for compaction */
97 unsigned int compaction_ready:1;
98
99 /* Incremented by the number of inactive pages that were scanned */
100 unsigned long nr_scanned;
101
102 /* Number of pages freed so far during a call to shrink_zones() */
103 unsigned long nr_reclaimed;
103}; 104};
104 105
105#define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru)) 106#define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru))
@@ -136,7 +137,11 @@ struct scan_control {
136 * From 0 .. 100. Higher means more swappy. 137 * From 0 .. 100. Higher means more swappy.
137 */ 138 */
138int vm_swappiness = 60; 139int vm_swappiness = 60;
139unsigned long vm_total_pages; /* The total number of pages which the VM controls */ 140/*
141 * The total number of pages which are beyond the high watermark within all
142 * zones.
143 */
144unsigned long vm_total_pages;
140 145
141static LIST_HEAD(shrinker_list); 146static LIST_HEAD(shrinker_list);
142static DECLARE_RWSEM(shrinker_rwsem); 147static DECLARE_RWSEM(shrinker_rwsem);
@@ -169,7 +174,8 @@ static unsigned long zone_reclaimable_pages(struct zone *zone)
169 174
170bool zone_reclaimable(struct zone *zone) 175bool zone_reclaimable(struct zone *zone)
171{ 176{
172 return zone->pages_scanned < zone_reclaimable_pages(zone) * 6; 177 return zone_page_state(zone, NR_PAGES_SCANNED) <
178 zone_reclaimable_pages(zone) * 6;
173} 179}
174 180
175static unsigned long get_lru_size(struct lruvec *lruvec, enum lru_list lru) 181static unsigned long get_lru_size(struct lruvec *lruvec, enum lru_list lru)
@@ -1503,7 +1509,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
1503 __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, nr_taken); 1509 __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, nr_taken);
1504 1510
1505 if (global_reclaim(sc)) { 1511 if (global_reclaim(sc)) {
1506 zone->pages_scanned += nr_scanned; 1512 __mod_zone_page_state(zone, NR_PAGES_SCANNED, nr_scanned);
1507 if (current_is_kswapd()) 1513 if (current_is_kswapd())
1508 __count_zone_vm_events(PGSCAN_KSWAPD, zone, nr_scanned); 1514 __count_zone_vm_events(PGSCAN_KSWAPD, zone, nr_scanned);
1509 else 1515 else
@@ -1693,7 +1699,7 @@ static void shrink_active_list(unsigned long nr_to_scan,
1693 nr_taken = isolate_lru_pages(nr_to_scan, lruvec, &l_hold, 1699 nr_taken = isolate_lru_pages(nr_to_scan, lruvec, &l_hold,
1694 &nr_scanned, sc, isolate_mode, lru); 1700 &nr_scanned, sc, isolate_mode, lru);
1695 if (global_reclaim(sc)) 1701 if (global_reclaim(sc))
1696 zone->pages_scanned += nr_scanned; 1702 __mod_zone_page_state(zone, NR_PAGES_SCANNED, nr_scanned);
1697 1703
1698 reclaim_stat->recent_scanned[file] += nr_taken; 1704 reclaim_stat->recent_scanned[file] += nr_taken;
1699 1705
@@ -1750,7 +1756,7 @@ static void shrink_active_list(unsigned long nr_to_scan,
1750 * Count referenced pages from currently used mappings as rotated, 1756 * Count referenced pages from currently used mappings as rotated,
1751 * even though only some of them are actually re-activated. This 1757 * even though only some of them are actually re-activated. This
1752 * helps balance scan pressure between file and anonymous pages in 1758 * helps balance scan pressure between file and anonymous pages in
1753 * get_scan_ratio. 1759 * get_scan_count.
1754 */ 1760 */
1755 reclaim_stat->recent_rotated[file] += nr_rotated; 1761 reclaim_stat->recent_rotated[file] += nr_rotated;
1756 1762
@@ -1865,8 +1871,8 @@ enum scan_balance {
1865 * nr[0] = anon inactive pages to scan; nr[1] = anon active pages to scan 1871 * nr[0] = anon inactive pages to scan; nr[1] = anon active pages to scan
1866 * nr[2] = file inactive pages to scan; nr[3] = file active pages to scan 1872 * nr[2] = file inactive pages to scan; nr[3] = file active pages to scan
1867 */ 1873 */
1868static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc, 1874static void get_scan_count(struct lruvec *lruvec, int swappiness,
1869 unsigned long *nr) 1875 struct scan_control *sc, unsigned long *nr)
1870{ 1876{
1871 struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat; 1877 struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat;
1872 u64 fraction[2]; 1878 u64 fraction[2];
@@ -1909,7 +1915,7 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
1909 * using the memory controller's swap limit feature would be 1915 * using the memory controller's swap limit feature would be
1910 * too expensive. 1916 * too expensive.
1911 */ 1917 */
1912 if (!global_reclaim(sc) && !sc->swappiness) { 1918 if (!global_reclaim(sc) && !swappiness) {
1913 scan_balance = SCAN_FILE; 1919 scan_balance = SCAN_FILE;
1914 goto out; 1920 goto out;
1915 } 1921 }
@@ -1919,16 +1925,11 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
1919 * system is close to OOM, scan both anon and file equally 1925 * system is close to OOM, scan both anon and file equally
1920 * (unless the swappiness setting disagrees with swapping). 1926 * (unless the swappiness setting disagrees with swapping).
1921 */ 1927 */
1922 if (!sc->priority && sc->swappiness) { 1928 if (!sc->priority && swappiness) {
1923 scan_balance = SCAN_EQUAL; 1929 scan_balance = SCAN_EQUAL;
1924 goto out; 1930 goto out;
1925 } 1931 }
1926 1932
1927 anon = get_lru_size(lruvec, LRU_ACTIVE_ANON) +
1928 get_lru_size(lruvec, LRU_INACTIVE_ANON);
1929 file = get_lru_size(lruvec, LRU_ACTIVE_FILE) +
1930 get_lru_size(lruvec, LRU_INACTIVE_FILE);
1931
1932 /* 1933 /*
1933 * Prevent the reclaimer from falling into the cache trap: as 1934 * Prevent the reclaimer from falling into the cache trap: as
1934 * cache pages start out inactive, every cache fault will tip 1935 * cache pages start out inactive, every cache fault will tip
@@ -1939,9 +1940,14 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
1939 * anon pages. Try to detect this based on file LRU size. 1940 * anon pages. Try to detect this based on file LRU size.
1940 */ 1941 */
1941 if (global_reclaim(sc)) { 1942 if (global_reclaim(sc)) {
1942 unsigned long free = zone_page_state(zone, NR_FREE_PAGES); 1943 unsigned long zonefile;
1944 unsigned long zonefree;
1943 1945
1944 if (unlikely(file + free <= high_wmark_pages(zone))) { 1946 zonefree = zone_page_state(zone, NR_FREE_PAGES);
1947 zonefile = zone_page_state(zone, NR_ACTIVE_FILE) +
1948 zone_page_state(zone, NR_INACTIVE_FILE);
1949
1950 if (unlikely(zonefile + zonefree <= high_wmark_pages(zone))) {
1945 scan_balance = SCAN_ANON; 1951 scan_balance = SCAN_ANON;
1946 goto out; 1952 goto out;
1947 } 1953 }
@@ -1962,7 +1968,7 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
1962 * With swappiness at 100, anonymous and file have the same priority. 1968 * With swappiness at 100, anonymous and file have the same priority.
1963 * This scanning priority is essentially the inverse of IO cost. 1969 * This scanning priority is essentially the inverse of IO cost.
1964 */ 1970 */
1965 anon_prio = sc->swappiness; 1971 anon_prio = swappiness;
1966 file_prio = 200 - anon_prio; 1972 file_prio = 200 - anon_prio;
1967 1973
1968 /* 1974 /*
@@ -1976,6 +1982,12 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
1976 * 1982 *
1977 * anon in [0], file in [1] 1983 * anon in [0], file in [1]
1978 */ 1984 */
1985
1986 anon = get_lru_size(lruvec, LRU_ACTIVE_ANON) +
1987 get_lru_size(lruvec, LRU_INACTIVE_ANON);
1988 file = get_lru_size(lruvec, LRU_ACTIVE_FILE) +
1989 get_lru_size(lruvec, LRU_INACTIVE_FILE);
1990
1979 spin_lock_irq(&zone->lru_lock); 1991 spin_lock_irq(&zone->lru_lock);
1980 if (unlikely(reclaim_stat->recent_scanned[0] > anon / 4)) { 1992 if (unlikely(reclaim_stat->recent_scanned[0] > anon / 4)) {
1981 reclaim_stat->recent_scanned[0] /= 2; 1993 reclaim_stat->recent_scanned[0] /= 2;
@@ -2052,7 +2064,8 @@ out:
2052/* 2064/*
2053 * This is a basic per-zone page freer. Used by both kswapd and direct reclaim. 2065 * This is a basic per-zone page freer. Used by both kswapd and direct reclaim.
2054 */ 2066 */
2055static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) 2067static void shrink_lruvec(struct lruvec *lruvec, int swappiness,
2068 struct scan_control *sc)
2056{ 2069{
2057 unsigned long nr[NR_LRU_LISTS]; 2070 unsigned long nr[NR_LRU_LISTS];
2058 unsigned long targets[NR_LRU_LISTS]; 2071 unsigned long targets[NR_LRU_LISTS];
@@ -2063,7 +2076,7 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
2063 struct blk_plug plug; 2076 struct blk_plug plug;
2064 bool scan_adjusted; 2077 bool scan_adjusted;
2065 2078
2066 get_scan_count(lruvec, sc, nr); 2079 get_scan_count(lruvec, swappiness, sc, nr);
2067 2080
2068 /* Record the original scan target for proportional adjustments later */ 2081 /* Record the original scan target for proportional adjustments later */
2069 memcpy(targets, nr, sizeof(nr)); 2082 memcpy(targets, nr, sizeof(nr));
@@ -2241,9 +2254,10 @@ static inline bool should_continue_reclaim(struct zone *zone,
2241 } 2254 }
2242} 2255}
2243 2256
2244static void shrink_zone(struct zone *zone, struct scan_control *sc) 2257static bool shrink_zone(struct zone *zone, struct scan_control *sc)
2245{ 2258{
2246 unsigned long nr_reclaimed, nr_scanned; 2259 unsigned long nr_reclaimed, nr_scanned;
2260 bool reclaimable = false;
2247 2261
2248 do { 2262 do {
2249 struct mem_cgroup *root = sc->target_mem_cgroup; 2263 struct mem_cgroup *root = sc->target_mem_cgroup;
@@ -2259,11 +2273,12 @@ static void shrink_zone(struct zone *zone, struct scan_control *sc)
2259 memcg = mem_cgroup_iter(root, NULL, &reclaim); 2273 memcg = mem_cgroup_iter(root, NULL, &reclaim);
2260 do { 2274 do {
2261 struct lruvec *lruvec; 2275 struct lruvec *lruvec;
2276 int swappiness;
2262 2277
2263 lruvec = mem_cgroup_zone_lruvec(zone, memcg); 2278 lruvec = mem_cgroup_zone_lruvec(zone, memcg);
2279 swappiness = mem_cgroup_swappiness(memcg);
2264 2280
2265 sc->swappiness = mem_cgroup_swappiness(memcg); 2281 shrink_lruvec(lruvec, swappiness, sc);
2266 shrink_lruvec(lruvec, sc);
2267 2282
2268 /* 2283 /*
2269 * Direct reclaim and kswapd have to scan all memory 2284 * Direct reclaim and kswapd have to scan all memory
@@ -2287,20 +2302,21 @@ static void shrink_zone(struct zone *zone, struct scan_control *sc)
2287 sc->nr_scanned - nr_scanned, 2302 sc->nr_scanned - nr_scanned,
2288 sc->nr_reclaimed - nr_reclaimed); 2303 sc->nr_reclaimed - nr_reclaimed);
2289 2304
2305 if (sc->nr_reclaimed - nr_reclaimed)
2306 reclaimable = true;
2307
2290 } while (should_continue_reclaim(zone, sc->nr_reclaimed - nr_reclaimed, 2308 } while (should_continue_reclaim(zone, sc->nr_reclaimed - nr_reclaimed,
2291 sc->nr_scanned - nr_scanned, sc)); 2309 sc->nr_scanned - nr_scanned, sc));
2310
2311 return reclaimable;
2292} 2312}
2293 2313
2294/* Returns true if compaction should go ahead for a high-order request */ 2314/* Returns true if compaction should go ahead for a high-order request */
2295static inline bool compaction_ready(struct zone *zone, struct scan_control *sc) 2315static inline bool compaction_ready(struct zone *zone, int order)
2296{ 2316{
2297 unsigned long balance_gap, watermark; 2317 unsigned long balance_gap, watermark;
2298 bool watermark_ok; 2318 bool watermark_ok;
2299 2319
2300 /* Do not consider compaction for orders reclaim is meant to satisfy */
2301 if (sc->order <= PAGE_ALLOC_COSTLY_ORDER)
2302 return false;
2303
2304 /* 2320 /*
2305 * Compaction takes time to run and there are potentially other 2321 * Compaction takes time to run and there are potentially other
2306 * callers using the pages just freed. Continue reclaiming until 2322 * callers using the pages just freed. Continue reclaiming until
@@ -2309,18 +2325,18 @@ static inline bool compaction_ready(struct zone *zone, struct scan_control *sc)
2309 */ 2325 */
2310 balance_gap = min(low_wmark_pages(zone), DIV_ROUND_UP( 2326 balance_gap = min(low_wmark_pages(zone), DIV_ROUND_UP(
2311 zone->managed_pages, KSWAPD_ZONE_BALANCE_GAP_RATIO)); 2327 zone->managed_pages, KSWAPD_ZONE_BALANCE_GAP_RATIO));
2312 watermark = high_wmark_pages(zone) + balance_gap + (2UL << sc->order); 2328 watermark = high_wmark_pages(zone) + balance_gap + (2UL << order);
2313 watermark_ok = zone_watermark_ok_safe(zone, 0, watermark, 0, 0); 2329 watermark_ok = zone_watermark_ok_safe(zone, 0, watermark, 0, 0);
2314 2330
2315 /* 2331 /*
2316 * If compaction is deferred, reclaim up to a point where 2332 * If compaction is deferred, reclaim up to a point where
2317 * compaction will have a chance of success when re-enabled 2333 * compaction will have a chance of success when re-enabled
2318 */ 2334 */
2319 if (compaction_deferred(zone, sc->order)) 2335 if (compaction_deferred(zone, order))
2320 return watermark_ok; 2336 return watermark_ok;
2321 2337
2322 /* If compaction is not ready to start, keep reclaiming */ 2338 /* If compaction is not ready to start, keep reclaiming */
2323 if (!compaction_suitable(zone, sc->order)) 2339 if (!compaction_suitable(zone, order))
2324 return false; 2340 return false;
2325 2341
2326 return watermark_ok; 2342 return watermark_ok;
@@ -2342,10 +2358,7 @@ static inline bool compaction_ready(struct zone *zone, struct scan_control *sc)
2342 * If a zone is deemed to be full of pinned pages then just give it a light 2358 * If a zone is deemed to be full of pinned pages then just give it a light
2343 * scan then give up on it. 2359 * scan then give up on it.
2344 * 2360 *
2345 * This function returns true if a zone is being reclaimed for a costly 2361 * Returns true if a zone was reclaimable.
2346 * high-order allocation and compaction is ready to begin. This indicates to
2347 * the caller that it should consider retrying the allocation instead of
2348 * further reclaim.
2349 */ 2362 */
2350static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc) 2363static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
2351{ 2364{
@@ -2354,13 +2367,13 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
2354 unsigned long nr_soft_reclaimed; 2367 unsigned long nr_soft_reclaimed;
2355 unsigned long nr_soft_scanned; 2368 unsigned long nr_soft_scanned;
2356 unsigned long lru_pages = 0; 2369 unsigned long lru_pages = 0;
2357 bool aborted_reclaim = false;
2358 struct reclaim_state *reclaim_state = current->reclaim_state; 2370 struct reclaim_state *reclaim_state = current->reclaim_state;
2359 gfp_t orig_mask; 2371 gfp_t orig_mask;
2360 struct shrink_control shrink = { 2372 struct shrink_control shrink = {
2361 .gfp_mask = sc->gfp_mask, 2373 .gfp_mask = sc->gfp_mask,
2362 }; 2374 };
2363 enum zone_type requested_highidx = gfp_zone(sc->gfp_mask); 2375 enum zone_type requested_highidx = gfp_zone(sc->gfp_mask);
2376 bool reclaimable = false;
2364 2377
2365 /* 2378 /*
2366 * If the number of buffer_heads in the machine exceeds the maximum 2379 * If the number of buffer_heads in the machine exceeds the maximum
@@ -2391,22 +2404,24 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
2391 if (sc->priority != DEF_PRIORITY && 2404 if (sc->priority != DEF_PRIORITY &&
2392 !zone_reclaimable(zone)) 2405 !zone_reclaimable(zone))
2393 continue; /* Let kswapd poll it */ 2406 continue; /* Let kswapd poll it */
2394 if (IS_ENABLED(CONFIG_COMPACTION)) { 2407
2395 /* 2408 /*
2396 * If we already have plenty of memory free for 2409 * If we already have plenty of memory free for
2397 * compaction in this zone, don't free any more. 2410 * compaction in this zone, don't free any more.
2398 * Even though compaction is invoked for any 2411 * Even though compaction is invoked for any
2399 * non-zero order, only frequent costly order 2412 * non-zero order, only frequent costly order
2400 * reclamation is disruptive enough to become a 2413 * reclamation is disruptive enough to become a
2401 * noticeable problem, like transparent huge 2414 * noticeable problem, like transparent huge
2402 * page allocations. 2415 * page allocations.
2403 */ 2416 */
2404 if ((zonelist_zone_idx(z) <= requested_highidx) 2417 if (IS_ENABLED(CONFIG_COMPACTION) &&
2405 && compaction_ready(zone, sc)) { 2418 sc->order > PAGE_ALLOC_COSTLY_ORDER &&
2406 aborted_reclaim = true; 2419 zonelist_zone_idx(z) <= requested_highidx &&
2407 continue; 2420 compaction_ready(zone, sc->order)) {
2408 } 2421 sc->compaction_ready = true;
2422 continue;
2409 } 2423 }
2424
2410 /* 2425 /*
2411 * This steals pages from memory cgroups over softlimit 2426 * This steals pages from memory cgroups over softlimit
2412 * and returns the number of reclaimed pages and 2427 * and returns the number of reclaimed pages and
@@ -2419,10 +2434,17 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
2419 &nr_soft_scanned); 2434 &nr_soft_scanned);
2420 sc->nr_reclaimed += nr_soft_reclaimed; 2435 sc->nr_reclaimed += nr_soft_reclaimed;
2421 sc->nr_scanned += nr_soft_scanned; 2436 sc->nr_scanned += nr_soft_scanned;
2437 if (nr_soft_reclaimed)
2438 reclaimable = true;
2422 /* need some check for avoid more shrink_zone() */ 2439 /* need some check for avoid more shrink_zone() */
2423 } 2440 }
2424 2441
2425 shrink_zone(zone, sc); 2442 if (shrink_zone(zone, sc))
2443 reclaimable = true;
2444
2445 if (global_reclaim(sc) &&
2446 !reclaimable && zone_reclaimable(zone))
2447 reclaimable = true;
2426 } 2448 }
2427 2449
2428 /* 2450 /*
@@ -2445,27 +2467,7 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
2445 */ 2467 */
2446 sc->gfp_mask = orig_mask; 2468 sc->gfp_mask = orig_mask;
2447 2469
2448 return aborted_reclaim; 2470 return reclaimable;
2449}
2450
2451/* All zones in zonelist are unreclaimable? */
2452static bool all_unreclaimable(struct zonelist *zonelist,
2453 struct scan_control *sc)
2454{
2455 struct zoneref *z;
2456 struct zone *zone;
2457
2458 for_each_zone_zonelist_nodemask(zone, z, zonelist,
2459 gfp_zone(sc->gfp_mask), sc->nodemask) {
2460 if (!populated_zone(zone))
2461 continue;
2462 if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
2463 continue;
2464 if (zone_reclaimable(zone))
2465 return false;
2466 }
2467
2468 return true;
2469} 2471}
2470 2472
2471/* 2473/*
@@ -2489,7 +2491,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
2489{ 2491{
2490 unsigned long total_scanned = 0; 2492 unsigned long total_scanned = 0;
2491 unsigned long writeback_threshold; 2493 unsigned long writeback_threshold;
2492 bool aborted_reclaim; 2494 bool zones_reclaimable;
2493 2495
2494 delayacct_freepages_start(); 2496 delayacct_freepages_start();
2495 2497
@@ -2500,11 +2502,14 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
2500 vmpressure_prio(sc->gfp_mask, sc->target_mem_cgroup, 2502 vmpressure_prio(sc->gfp_mask, sc->target_mem_cgroup,
2501 sc->priority); 2503 sc->priority);
2502 sc->nr_scanned = 0; 2504 sc->nr_scanned = 0;
2503 aborted_reclaim = shrink_zones(zonelist, sc); 2505 zones_reclaimable = shrink_zones(zonelist, sc);
2504 2506
2505 total_scanned += sc->nr_scanned; 2507 total_scanned += sc->nr_scanned;
2506 if (sc->nr_reclaimed >= sc->nr_to_reclaim) 2508 if (sc->nr_reclaimed >= sc->nr_to_reclaim)
2507 goto out; 2509 break;
2510
2511 if (sc->compaction_ready)
2512 break;
2508 2513
2509 /* 2514 /*
2510 * If we're getting trouble reclaiming, start doing 2515 * If we're getting trouble reclaiming, start doing
@@ -2526,28 +2531,19 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
2526 WB_REASON_TRY_TO_FREE_PAGES); 2531 WB_REASON_TRY_TO_FREE_PAGES);
2527 sc->may_writepage = 1; 2532 sc->may_writepage = 1;
2528 } 2533 }
2529 } while (--sc->priority >= 0 && !aborted_reclaim); 2534 } while (--sc->priority >= 0);
2530 2535
2531out:
2532 delayacct_freepages_end(); 2536 delayacct_freepages_end();
2533 2537
2534 if (sc->nr_reclaimed) 2538 if (sc->nr_reclaimed)
2535 return sc->nr_reclaimed; 2539 return sc->nr_reclaimed;
2536 2540
2537 /*
2538 * As hibernation is going on, kswapd is freezed so that it can't mark
2539 * the zone into all_unreclaimable. Thus bypassing all_unreclaimable
2540 * check.
2541 */
2542 if (oom_killer_disabled)
2543 return 0;
2544
2545 /* Aborted reclaim to try compaction? don't OOM, then */ 2541 /* Aborted reclaim to try compaction? don't OOM, then */
2546 if (aborted_reclaim) 2542 if (sc->compaction_ready)
2547 return 1; 2543 return 1;
2548 2544
2549 /* top priority shrink_zones still had more to do? don't OOM, then */ 2545 /* Any of the zones still reclaimable? Don't OOM. */
2550 if (global_reclaim(sc) && !all_unreclaimable(zonelist, sc)) 2546 if (zones_reclaimable)
2551 return 1; 2547 return 1;
2552 2548
2553 return 0; 2549 return 0;
@@ -2684,15 +2680,14 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
2684{ 2680{
2685 unsigned long nr_reclaimed; 2681 unsigned long nr_reclaimed;
2686 struct scan_control sc = { 2682 struct scan_control sc = {
2683 .nr_to_reclaim = SWAP_CLUSTER_MAX,
2687 .gfp_mask = (gfp_mask = memalloc_noio_flags(gfp_mask)), 2684 .gfp_mask = (gfp_mask = memalloc_noio_flags(gfp_mask)),
2685 .order = order,
2686 .nodemask = nodemask,
2687 .priority = DEF_PRIORITY,
2688 .may_writepage = !laptop_mode, 2688 .may_writepage = !laptop_mode,
2689 .nr_to_reclaim = SWAP_CLUSTER_MAX,
2690 .may_unmap = 1, 2689 .may_unmap = 1,
2691 .may_swap = 1, 2690 .may_swap = 1,
2692 .order = order,
2693 .priority = DEF_PRIORITY,
2694 .target_mem_cgroup = NULL,
2695 .nodemask = nodemask,
2696 }; 2691 };
2697 2692
2698 /* 2693 /*
@@ -2722,17 +2717,14 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg,
2722 unsigned long *nr_scanned) 2717 unsigned long *nr_scanned)
2723{ 2718{
2724 struct scan_control sc = { 2719 struct scan_control sc = {
2725 .nr_scanned = 0,
2726 .nr_to_reclaim = SWAP_CLUSTER_MAX, 2720 .nr_to_reclaim = SWAP_CLUSTER_MAX,
2721 .target_mem_cgroup = memcg,
2727 .may_writepage = !laptop_mode, 2722 .may_writepage = !laptop_mode,
2728 .may_unmap = 1, 2723 .may_unmap = 1,
2729 .may_swap = !noswap, 2724 .may_swap = !noswap,
2730 .order = 0,
2731 .priority = 0,
2732 .swappiness = mem_cgroup_swappiness(memcg),
2733 .target_mem_cgroup = memcg,
2734 }; 2725 };
2735 struct lruvec *lruvec = mem_cgroup_zone_lruvec(zone, memcg); 2726 struct lruvec *lruvec = mem_cgroup_zone_lruvec(zone, memcg);
2727 int swappiness = mem_cgroup_swappiness(memcg);
2736 2728
2737 sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | 2729 sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
2738 (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK); 2730 (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
@@ -2748,7 +2740,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg,
2748 * will pick up pages from other mem cgroup's as well. We hack 2740 * will pick up pages from other mem cgroup's as well. We hack
2749 * the priority and make it zero. 2741 * the priority and make it zero.
2750 */ 2742 */
2751 shrink_lruvec(lruvec, &sc); 2743 shrink_lruvec(lruvec, swappiness, &sc);
2752 2744
2753 trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed); 2745 trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed);
2754 2746
@@ -2764,16 +2756,14 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
2764 unsigned long nr_reclaimed; 2756 unsigned long nr_reclaimed;
2765 int nid; 2757 int nid;
2766 struct scan_control sc = { 2758 struct scan_control sc = {
2767 .may_writepage = !laptop_mode,
2768 .may_unmap = 1,
2769 .may_swap = !noswap,
2770 .nr_to_reclaim = SWAP_CLUSTER_MAX, 2759 .nr_to_reclaim = SWAP_CLUSTER_MAX,
2771 .order = 0,
2772 .priority = DEF_PRIORITY,
2773 .target_mem_cgroup = memcg,
2774 .nodemask = NULL, /* we don't care the placement */
2775 .gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | 2760 .gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
2776 (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK), 2761 (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK),
2762 .target_mem_cgroup = memcg,
2763 .priority = DEF_PRIORITY,
2764 .may_writepage = !laptop_mode,
2765 .may_unmap = 1,
2766 .may_swap = !noswap,
2777 }; 2767 };
2778 2768
2779 /* 2769 /*
@@ -3031,12 +3021,11 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
3031 unsigned long nr_soft_scanned; 3021 unsigned long nr_soft_scanned;
3032 struct scan_control sc = { 3022 struct scan_control sc = {
3033 .gfp_mask = GFP_KERNEL, 3023 .gfp_mask = GFP_KERNEL,
3024 .order = order,
3034 .priority = DEF_PRIORITY, 3025 .priority = DEF_PRIORITY,
3026 .may_writepage = !laptop_mode,
3035 .may_unmap = 1, 3027 .may_unmap = 1,
3036 .may_swap = 1, 3028 .may_swap = 1,
3037 .may_writepage = !laptop_mode,
3038 .order = order,
3039 .target_mem_cgroup = NULL,
3040 }; 3029 };
3041 count_vm_event(PAGEOUTRUN); 3030 count_vm_event(PAGEOUTRUN);
3042 3031
@@ -3417,14 +3406,13 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
3417{ 3406{
3418 struct reclaim_state reclaim_state; 3407 struct reclaim_state reclaim_state;
3419 struct scan_control sc = { 3408 struct scan_control sc = {
3409 .nr_to_reclaim = nr_to_reclaim,
3420 .gfp_mask = GFP_HIGHUSER_MOVABLE, 3410 .gfp_mask = GFP_HIGHUSER_MOVABLE,
3421 .may_swap = 1, 3411 .priority = DEF_PRIORITY,
3422 .may_unmap = 1,
3423 .may_writepage = 1, 3412 .may_writepage = 1,
3424 .nr_to_reclaim = nr_to_reclaim, 3413 .may_unmap = 1,
3414 .may_swap = 1,
3425 .hibernation_mode = 1, 3415 .hibernation_mode = 1,
3426 .order = 0,
3427 .priority = DEF_PRIORITY,
3428 }; 3416 };
3429 struct zonelist *zonelist = node_zonelist(numa_node_id(), sc.gfp_mask); 3417 struct zonelist *zonelist = node_zonelist(numa_node_id(), sc.gfp_mask);
3430 struct task_struct *p = current; 3418 struct task_struct *p = current;
@@ -3604,13 +3592,13 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
3604 struct task_struct *p = current; 3592 struct task_struct *p = current;
3605 struct reclaim_state reclaim_state; 3593 struct reclaim_state reclaim_state;
3606 struct scan_control sc = { 3594 struct scan_control sc = {
3607 .may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE),
3608 .may_unmap = !!(zone_reclaim_mode & RECLAIM_SWAP),
3609 .may_swap = 1,
3610 .nr_to_reclaim = max(nr_pages, SWAP_CLUSTER_MAX), 3595 .nr_to_reclaim = max(nr_pages, SWAP_CLUSTER_MAX),
3611 .gfp_mask = (gfp_mask = memalloc_noio_flags(gfp_mask)), 3596 .gfp_mask = (gfp_mask = memalloc_noio_flags(gfp_mask)),
3612 .order = order, 3597 .order = order,
3613 .priority = ZONE_RECLAIM_PRIORITY, 3598 .priority = ZONE_RECLAIM_PRIORITY,
3599 .may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE),
3600 .may_unmap = !!(zone_reclaim_mode & RECLAIM_SWAP),
3601 .may_swap = 1,
3614 }; 3602 };
3615 struct shrink_control shrink = { 3603 struct shrink_control shrink = {
3616 .gfp_mask = sc.gfp_mask, 3604 .gfp_mask = sc.gfp_mask,
diff --git a/mm/vmstat.c b/mm/vmstat.c
index b37bd49bfd55..e9ab104b956f 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -200,7 +200,7 @@ void set_pgdat_percpu_threshold(pg_data_t *pgdat,
200 continue; 200 continue;
201 201
202 threshold = (*calculate_pressure)(zone); 202 threshold = (*calculate_pressure)(zone);
203 for_each_possible_cpu(cpu) 203 for_each_online_cpu(cpu)
204 per_cpu_ptr(zone->pageset, cpu)->stat_threshold 204 per_cpu_ptr(zone->pageset, cpu)->stat_threshold
205 = threshold; 205 = threshold;
206 } 206 }
@@ -763,6 +763,7 @@ const char * const vmstat_text[] = {
763 "nr_shmem", 763 "nr_shmem",
764 "nr_dirtied", 764 "nr_dirtied",
765 "nr_written", 765 "nr_written",
766 "nr_pages_scanned",
766 767
767#ifdef CONFIG_NUMA 768#ifdef CONFIG_NUMA
768 "numa_hit", 769 "numa_hit",
@@ -1067,7 +1068,7 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
1067 min_wmark_pages(zone), 1068 min_wmark_pages(zone),
1068 low_wmark_pages(zone), 1069 low_wmark_pages(zone),
1069 high_wmark_pages(zone), 1070 high_wmark_pages(zone),
1070 zone->pages_scanned, 1071 zone_page_state(zone, NR_PAGES_SCANNED),
1071 zone->spanned_pages, 1072 zone->spanned_pages,
1072 zone->present_pages, 1073 zone->present_pages,
1073 zone->managed_pages); 1074 zone->managed_pages);
@@ -1077,10 +1078,10 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
1077 zone_page_state(zone, i)); 1078 zone_page_state(zone, i));
1078 1079
1079 seq_printf(m, 1080 seq_printf(m,
1080 "\n protection: (%lu", 1081 "\n protection: (%ld",
1081 zone->lowmem_reserve[0]); 1082 zone->lowmem_reserve[0]);
1082 for (i = 1; i < ARRAY_SIZE(zone->lowmem_reserve); i++) 1083 for (i = 1; i < ARRAY_SIZE(zone->lowmem_reserve); i++)
1083 seq_printf(m, ", %lu", zone->lowmem_reserve[i]); 1084 seq_printf(m, ", %ld", zone->lowmem_reserve[i]);
1084 seq_printf(m, 1085 seq_printf(m,
1085 ")" 1086 ")"
1086 "\n pagesets"); 1087 "\n pagesets");
diff --git a/mm/zbud.c b/mm/zbud.c
index 01df13a7e2e1..a05790b1915e 100644
--- a/mm/zbud.c
+++ b/mm/zbud.c
@@ -51,6 +51,7 @@
51#include <linux/slab.h> 51#include <linux/slab.h>
52#include <linux/spinlock.h> 52#include <linux/spinlock.h>
53#include <linux/zbud.h> 53#include <linux/zbud.h>
54#include <linux/zpool.h>
54 55
55/***************** 56/*****************
56 * Structures 57 * Structures
@@ -113,6 +114,90 @@ struct zbud_header {
113}; 114};
114 115
115/***************** 116/*****************
117 * zpool
118 ****************/
119
120#ifdef CONFIG_ZPOOL
121
122static int zbud_zpool_evict(struct zbud_pool *pool, unsigned long handle)
123{
124 return zpool_evict(pool, handle);
125}
126
127static struct zbud_ops zbud_zpool_ops = {
128 .evict = zbud_zpool_evict
129};
130
131static void *zbud_zpool_create(gfp_t gfp, struct zpool_ops *zpool_ops)
132{
133 return zbud_create_pool(gfp, &zbud_zpool_ops);
134}
135
136static void zbud_zpool_destroy(void *pool)
137{
138 zbud_destroy_pool(pool);
139}
140
141static int zbud_zpool_malloc(void *pool, size_t size, gfp_t gfp,
142 unsigned long *handle)
143{
144 return zbud_alloc(pool, size, gfp, handle);
145}
146static void zbud_zpool_free(void *pool, unsigned long handle)
147{
148 zbud_free(pool, handle);
149}
150
151static int zbud_zpool_shrink(void *pool, unsigned int pages,
152 unsigned int *reclaimed)
153{
154 unsigned int total = 0;
155 int ret = -EINVAL;
156
157 while (total < pages) {
158 ret = zbud_reclaim_page(pool, 8);
159 if (ret < 0)
160 break;
161 total++;
162 }
163
164 if (reclaimed)
165 *reclaimed = total;
166
167 return ret;
168}
169
170static void *zbud_zpool_map(void *pool, unsigned long handle,
171 enum zpool_mapmode mm)
172{
173 return zbud_map(pool, handle);
174}
175static void zbud_zpool_unmap(void *pool, unsigned long handle)
176{
177 zbud_unmap(pool, handle);
178}
179
180static u64 zbud_zpool_total_size(void *pool)
181{
182 return zbud_get_pool_size(pool) * PAGE_SIZE;
183}
184
185static struct zpool_driver zbud_zpool_driver = {
186 .type = "zbud",
187 .owner = THIS_MODULE,
188 .create = zbud_zpool_create,
189 .destroy = zbud_zpool_destroy,
190 .malloc = zbud_zpool_malloc,
191 .free = zbud_zpool_free,
192 .shrink = zbud_zpool_shrink,
193 .map = zbud_zpool_map,
194 .unmap = zbud_zpool_unmap,
195 .total_size = zbud_zpool_total_size,
196};
197
198#endif /* CONFIG_ZPOOL */
199
200/*****************
116 * Helpers 201 * Helpers
117*****************/ 202*****************/
118/* Just to make the code easier to read */ 203/* Just to make the code easier to read */
@@ -122,7 +207,7 @@ enum buddy {
122}; 207};
123 208
124/* Converts an allocation size in bytes to size in zbud chunks */ 209/* Converts an allocation size in bytes to size in zbud chunks */
125static int size_to_chunks(int size) 210static int size_to_chunks(size_t size)
126{ 211{
127 return (size + CHUNK_SIZE - 1) >> CHUNK_SHIFT; 212 return (size + CHUNK_SIZE - 1) >> CHUNK_SHIFT;
128} 213}
@@ -247,7 +332,7 @@ void zbud_destroy_pool(struct zbud_pool *pool)
247 * gfp arguments are invalid or -ENOMEM if the pool was unable to allocate 332 * gfp arguments are invalid or -ENOMEM if the pool was unable to allocate
248 * a new page. 333 * a new page.
249 */ 334 */
250int zbud_alloc(struct zbud_pool *pool, unsigned int size, gfp_t gfp, 335int zbud_alloc(struct zbud_pool *pool, size_t size, gfp_t gfp,
251 unsigned long *handle) 336 unsigned long *handle)
252{ 337{
253 int chunks, i, freechunks; 338 int chunks, i, freechunks;
@@ -511,11 +596,20 @@ static int __init init_zbud(void)
511 /* Make sure the zbud header will fit in one chunk */ 596 /* Make sure the zbud header will fit in one chunk */
512 BUILD_BUG_ON(sizeof(struct zbud_header) > ZHDR_SIZE_ALIGNED); 597 BUILD_BUG_ON(sizeof(struct zbud_header) > ZHDR_SIZE_ALIGNED);
513 pr_info("loaded\n"); 598 pr_info("loaded\n");
599
600#ifdef CONFIG_ZPOOL
601 zpool_register_driver(&zbud_zpool_driver);
602#endif
603
514 return 0; 604 return 0;
515} 605}
516 606
517static void __exit exit_zbud(void) 607static void __exit exit_zbud(void)
518{ 608{
609#ifdef CONFIG_ZPOOL
610 zpool_unregister_driver(&zbud_zpool_driver);
611#endif
612
519 pr_info("unloaded\n"); 613 pr_info("unloaded\n");
520} 614}
521 615
diff --git a/mm/zpool.c b/mm/zpool.c
new file mode 100644
index 000000000000..e40612a1df00
--- /dev/null
+++ b/mm/zpool.c
@@ -0,0 +1,364 @@
1/*
2 * zpool memory storage api
3 *
4 * Copyright (C) 2014 Dan Streetman
5 *
6 * This is a common frontend for memory storage pool implementations.
7 * Typically, this is used to store compressed memory.
8 */
9
10#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
11
12#include <linux/list.h>
13#include <linux/types.h>
14#include <linux/mm.h>
15#include <linux/slab.h>
16#include <linux/spinlock.h>
17#include <linux/module.h>
18#include <linux/zpool.h>
19
20struct zpool {
21 char *type;
22
23 struct zpool_driver *driver;
24 void *pool;
25 struct zpool_ops *ops;
26
27 struct list_head list;
28};
29
30static LIST_HEAD(drivers_head);
31static DEFINE_SPINLOCK(drivers_lock);
32
33static LIST_HEAD(pools_head);
34static DEFINE_SPINLOCK(pools_lock);
35
36/**
37 * zpool_register_driver() - register a zpool implementation.
38 * @driver: driver to register
39 */
40void zpool_register_driver(struct zpool_driver *driver)
41{
42 spin_lock(&drivers_lock);
43 atomic_set(&driver->refcount, 0);
44 list_add(&driver->list, &drivers_head);
45 spin_unlock(&drivers_lock);
46}
47EXPORT_SYMBOL(zpool_register_driver);
48
49/**
50 * zpool_unregister_driver() - unregister a zpool implementation.
51 * @driver: driver to unregister.
52 *
53 * Module usage counting is used to prevent using a driver
54 * while/after unloading, so if this is called from module
55 * exit function, this should never fail; if called from
56 * other than the module exit function, and this returns
57 * failure, the driver is in use and must remain available.
58 */
59int zpool_unregister_driver(struct zpool_driver *driver)
60{
61 int ret = 0, refcount;
62
63 spin_lock(&drivers_lock);
64 refcount = atomic_read(&driver->refcount);
65 WARN_ON(refcount < 0);
66 if (refcount > 0)
67 ret = -EBUSY;
68 else
69 list_del(&driver->list);
70 spin_unlock(&drivers_lock);
71
72 return ret;
73}
74EXPORT_SYMBOL(zpool_unregister_driver);
75
76/**
77 * zpool_evict() - evict callback from a zpool implementation.
78 * @pool: pool to evict from.
79 * @handle: handle to evict.
80 *
81 * This can be used by zpool implementations to call the
82 * user's evict zpool_ops struct evict callback.
83 */
84int zpool_evict(void *pool, unsigned long handle)
85{
86 struct zpool *zpool;
87
88 spin_lock(&pools_lock);
89 list_for_each_entry(zpool, &pools_head, list) {
90 if (zpool->pool == pool) {
91 spin_unlock(&pools_lock);
92 if (!zpool->ops || !zpool->ops->evict)
93 return -EINVAL;
94 return zpool->ops->evict(zpool, handle);
95 }
96 }
97 spin_unlock(&pools_lock);
98
99 return -ENOENT;
100}
101EXPORT_SYMBOL(zpool_evict);
102
103static struct zpool_driver *zpool_get_driver(char *type)
104{
105 struct zpool_driver *driver;
106
107 spin_lock(&drivers_lock);
108 list_for_each_entry(driver, &drivers_head, list) {
109 if (!strcmp(driver->type, type)) {
110 bool got = try_module_get(driver->owner);
111
112 if (got)
113 atomic_inc(&driver->refcount);
114 spin_unlock(&drivers_lock);
115 return got ? driver : NULL;
116 }
117 }
118
119 spin_unlock(&drivers_lock);
120 return NULL;
121}
122
123static void zpool_put_driver(struct zpool_driver *driver)
124{
125 atomic_dec(&driver->refcount);
126 module_put(driver->owner);
127}
128
129/**
130 * zpool_create_pool() - Create a new zpool
131 * @type The type of the zpool to create (e.g. zbud, zsmalloc)
132 * @gfp The GFP flags to use when allocating the pool.
133 * @ops The optional ops callback.
134 *
135 * This creates a new zpool of the specified type. The gfp flags will be
136 * used when allocating memory, if the implementation supports it. If the
137 * ops param is NULL, then the created zpool will not be shrinkable.
138 *
139 * Implementations must guarantee this to be thread-safe.
140 *
141 * Returns: New zpool on success, NULL on failure.
142 */
143struct zpool *zpool_create_pool(char *type, gfp_t gfp, struct zpool_ops *ops)
144{
145 struct zpool_driver *driver;
146 struct zpool *zpool;
147
148 pr_info("creating pool type %s\n", type);
149
150 driver = zpool_get_driver(type);
151
152 if (!driver) {
153 request_module(type);
154 driver = zpool_get_driver(type);
155 }
156
157 if (!driver) {
158 pr_err("no driver for type %s\n", type);
159 return NULL;
160 }
161
162 zpool = kmalloc(sizeof(*zpool), gfp);
163 if (!zpool) {
164 pr_err("couldn't create zpool - out of memory\n");
165 zpool_put_driver(driver);
166 return NULL;
167 }
168
169 zpool->type = driver->type;
170 zpool->driver = driver;
171 zpool->pool = driver->create(gfp, ops);
172 zpool->ops = ops;
173
174 if (!zpool->pool) {
175 pr_err("couldn't create %s pool\n", type);
176 zpool_put_driver(driver);
177 kfree(zpool);
178 return NULL;
179 }
180
181 pr_info("created %s pool\n", type);
182
183 spin_lock(&pools_lock);
184 list_add(&zpool->list, &pools_head);
185 spin_unlock(&pools_lock);
186
187 return zpool;
188}
189
190/**
191 * zpool_destroy_pool() - Destroy a zpool
192 * @pool The zpool to destroy.
193 *
194 * Implementations must guarantee this to be thread-safe,
195 * however only when destroying different pools. The same
196 * pool should only be destroyed once, and should not be used
197 * after it is destroyed.
198 *
199 * This destroys an existing zpool. The zpool should not be in use.
200 */
201void zpool_destroy_pool(struct zpool *zpool)
202{
203 pr_info("destroying pool type %s\n", zpool->type);
204
205 spin_lock(&pools_lock);
206 list_del(&zpool->list);
207 spin_unlock(&pools_lock);
208 zpool->driver->destroy(zpool->pool);
209 zpool_put_driver(zpool->driver);
210 kfree(zpool);
211}
212
213/**
214 * zpool_get_type() - Get the type of the zpool
215 * @pool The zpool to check
216 *
217 * This returns the type of the pool.
218 *
219 * Implementations must guarantee this to be thread-safe.
220 *
221 * Returns: The type of zpool.
222 */
223char *zpool_get_type(struct zpool *zpool)
224{
225 return zpool->type;
226}
227
228/**
229 * zpool_malloc() - Allocate memory
230 * @pool The zpool to allocate from.
231 * @size The amount of memory to allocate.
232 * @gfp The GFP flags to use when allocating memory.
233 * @handle Pointer to the handle to set
234 *
235 * This allocates the requested amount of memory from the pool.
236 * The gfp flags will be used when allocating memory, if the
237 * implementation supports it. The provided @handle will be
238 * set to the allocated object handle.
239 *
240 * Implementations must guarantee this to be thread-safe.
241 *
242 * Returns: 0 on success, negative value on error.
243 */
244int zpool_malloc(struct zpool *zpool, size_t size, gfp_t gfp,
245 unsigned long *handle)
246{
247 return zpool->driver->malloc(zpool->pool, size, gfp, handle);
248}
249
250/**
251 * zpool_free() - Free previously allocated memory
252 * @pool The zpool that allocated the memory.
253 * @handle The handle to the memory to free.
254 *
255 * This frees previously allocated memory. This does not guarantee
256 * that the pool will actually free memory, only that the memory
257 * in the pool will become available for use by the pool.
258 *
259 * Implementations must guarantee this to be thread-safe,
260 * however only when freeing different handles. The same
261 * handle should only be freed once, and should not be used
262 * after freeing.
263 */
264void zpool_free(struct zpool *zpool, unsigned long handle)
265{
266 zpool->driver->free(zpool->pool, handle);
267}
268
269/**
270 * zpool_shrink() - Shrink the pool size
271 * @pool The zpool to shrink.
272 * @pages The number of pages to shrink the pool.
273 * @reclaimed The number of pages successfully evicted.
274 *
275 * This attempts to shrink the actual memory size of the pool
276 * by evicting currently used handle(s). If the pool was
277 * created with no zpool_ops, or the evict call fails for any
278 * of the handles, this will fail. If non-NULL, the @reclaimed
279 * parameter will be set to the number of pages reclaimed,
280 * which may be more than the number of pages requested.
281 *
282 * Implementations must guarantee this to be thread-safe.
283 *
284 * Returns: 0 on success, negative value on error/failure.
285 */
286int zpool_shrink(struct zpool *zpool, unsigned int pages,
287 unsigned int *reclaimed)
288{
289 return zpool->driver->shrink(zpool->pool, pages, reclaimed);
290}
291
292/**
293 * zpool_map_handle() - Map a previously allocated handle into memory
294 * @pool The zpool that the handle was allocated from
295 * @handle The handle to map
296 * @mm How the memory should be mapped
297 *
298 * This maps a previously allocated handle into memory. The @mm
299 * param indicates to the implementation how the memory will be
300 * used, i.e. read-only, write-only, read-write. If the
301 * implementation does not support it, the memory will be treated
302 * as read-write.
303 *
304 * This may hold locks, disable interrupts, and/or preemption,
305 * and the zpool_unmap_handle() must be called to undo those
306 * actions. The code that uses the mapped handle should complete
307 * its operatons on the mapped handle memory quickly and unmap
308 * as soon as possible. As the implementation may use per-cpu
309 * data, multiple handles should not be mapped concurrently on
310 * any cpu.
311 *
312 * Returns: A pointer to the handle's mapped memory area.
313 */
314void *zpool_map_handle(struct zpool *zpool, unsigned long handle,
315 enum zpool_mapmode mapmode)
316{
317 return zpool->driver->map(zpool->pool, handle, mapmode);
318}
319
320/**
321 * zpool_unmap_handle() - Unmap a previously mapped handle
322 * @pool The zpool that the handle was allocated from
323 * @handle The handle to unmap
324 *
325 * This unmaps a previously mapped handle. Any locks or other
326 * actions that the implementation took in zpool_map_handle()
327 * will be undone here. The memory area returned from
328 * zpool_map_handle() should no longer be used after this.
329 */
330void zpool_unmap_handle(struct zpool *zpool, unsigned long handle)
331{
332 zpool->driver->unmap(zpool->pool, handle);
333}
334
335/**
336 * zpool_get_total_size() - The total size of the pool
337 * @pool The zpool to check
338 *
339 * This returns the total size in bytes of the pool.
340 *
341 * Returns: Total size of the zpool in bytes.
342 */
343u64 zpool_get_total_size(struct zpool *zpool)
344{
345 return zpool->driver->total_size(zpool->pool);
346}
347
348static int __init init_zpool(void)
349{
350 pr_info("loaded\n");
351 return 0;
352}
353
354static void __exit exit_zpool(void)
355{
356 pr_info("unloaded\n");
357}
358
359module_init(init_zpool);
360module_exit(exit_zpool);
361
362MODULE_LICENSE("GPL");
363MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>");
364MODULE_DESCRIPTION("Common API for compressed memory storage");
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index fe78189624cf..4e2fc83cb394 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -92,6 +92,7 @@
92#include <linux/spinlock.h> 92#include <linux/spinlock.h>
93#include <linux/types.h> 93#include <linux/types.h>
94#include <linux/zsmalloc.h> 94#include <linux/zsmalloc.h>
95#include <linux/zpool.h>
95 96
96/* 97/*
97 * This must be power of 2 and greater than of equal to sizeof(link_free). 98 * This must be power of 2 and greater than of equal to sizeof(link_free).
@@ -240,6 +241,81 @@ struct mapping_area {
240 enum zs_mapmode vm_mm; /* mapping mode */ 241 enum zs_mapmode vm_mm; /* mapping mode */
241}; 242};
242 243
244/* zpool driver */
245
246#ifdef CONFIG_ZPOOL
247
248static void *zs_zpool_create(gfp_t gfp, struct zpool_ops *zpool_ops)
249{
250 return zs_create_pool(gfp);
251}
252
253static void zs_zpool_destroy(void *pool)
254{
255 zs_destroy_pool(pool);
256}
257
258static int zs_zpool_malloc(void *pool, size_t size, gfp_t gfp,
259 unsigned long *handle)
260{
261 *handle = zs_malloc(pool, size);
262 return *handle ? 0 : -1;
263}
264static void zs_zpool_free(void *pool, unsigned long handle)
265{
266 zs_free(pool, handle);
267}
268
269static int zs_zpool_shrink(void *pool, unsigned int pages,
270 unsigned int *reclaimed)
271{
272 return -EINVAL;
273}
274
275static void *zs_zpool_map(void *pool, unsigned long handle,
276 enum zpool_mapmode mm)
277{
278 enum zs_mapmode zs_mm;
279
280 switch (mm) {
281 case ZPOOL_MM_RO:
282 zs_mm = ZS_MM_RO;
283 break;
284 case ZPOOL_MM_WO:
285 zs_mm = ZS_MM_WO;
286 break;
287 case ZPOOL_MM_RW: /* fallthru */
288 default:
289 zs_mm = ZS_MM_RW;
290 break;
291 }
292
293 return zs_map_object(pool, handle, zs_mm);
294}
295static void zs_zpool_unmap(void *pool, unsigned long handle)
296{
297 zs_unmap_object(pool, handle);
298}
299
300static u64 zs_zpool_total_size(void *pool)
301{
302 return zs_get_total_size_bytes(pool);
303}
304
305static struct zpool_driver zs_zpool_driver = {
306 .type = "zsmalloc",
307 .owner = THIS_MODULE,
308 .create = zs_zpool_create,
309 .destroy = zs_zpool_destroy,
310 .malloc = zs_zpool_malloc,
311 .free = zs_zpool_free,
312 .shrink = zs_zpool_shrink,
313 .map = zs_zpool_map,
314 .unmap = zs_zpool_unmap,
315 .total_size = zs_zpool_total_size,
316};
317
318#endif /* CONFIG_ZPOOL */
243 319
244/* per-cpu VM mapping areas for zspage accesses that cross page boundaries */ 320/* per-cpu VM mapping areas for zspage accesses that cross page boundaries */
245static DEFINE_PER_CPU(struct mapping_area, zs_map_area); 321static DEFINE_PER_CPU(struct mapping_area, zs_map_area);
@@ -690,7 +766,7 @@ static inline void __zs_cpu_down(struct mapping_area *area)
690static inline void *__zs_map_object(struct mapping_area *area, 766static inline void *__zs_map_object(struct mapping_area *area,
691 struct page *pages[2], int off, int size) 767 struct page *pages[2], int off, int size)
692{ 768{
693 BUG_ON(map_vm_area(area->vm, PAGE_KERNEL, &pages)); 769 BUG_ON(map_vm_area(area->vm, PAGE_KERNEL, pages));
694 area->vm_addr = area->vm->addr; 770 area->vm_addr = area->vm->addr;
695 return area->vm_addr + off; 771 return area->vm_addr + off;
696} 772}
@@ -814,6 +890,10 @@ static void zs_exit(void)
814{ 890{
815 int cpu; 891 int cpu;
816 892
893#ifdef CONFIG_ZPOOL
894 zpool_unregister_driver(&zs_zpool_driver);
895#endif
896
817 cpu_notifier_register_begin(); 897 cpu_notifier_register_begin();
818 898
819 for_each_online_cpu(cpu) 899 for_each_online_cpu(cpu)
@@ -840,6 +920,10 @@ static int zs_init(void)
840 920
841 cpu_notifier_register_done(); 921 cpu_notifier_register_done();
842 922
923#ifdef CONFIG_ZPOOL
924 zpool_register_driver(&zs_zpool_driver);
925#endif
926
843 return 0; 927 return 0;
844fail: 928fail:
845 zs_exit(); 929 zs_exit();
diff --git a/mm/zswap.c b/mm/zswap.c
index 008388fe7b0f..032c21eeab2b 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -34,7 +34,7 @@
34#include <linux/swap.h> 34#include <linux/swap.h>
35#include <linux/crypto.h> 35#include <linux/crypto.h>
36#include <linux/mempool.h> 36#include <linux/mempool.h>
37#include <linux/zbud.h> 37#include <linux/zpool.h>
38 38
39#include <linux/mm_types.h> 39#include <linux/mm_types.h>
40#include <linux/page-flags.h> 40#include <linux/page-flags.h>
@@ -45,8 +45,8 @@
45/********************************* 45/*********************************
46* statistics 46* statistics
47**********************************/ 47**********************************/
48/* Number of memory pages used by the compressed pool */ 48/* Total bytes used by the compressed storage */
49static u64 zswap_pool_pages; 49static u64 zswap_pool_total_size;
50/* The number of compressed pages currently stored in zswap */ 50/* The number of compressed pages currently stored in zswap */
51static atomic_t zswap_stored_pages = ATOMIC_INIT(0); 51static atomic_t zswap_stored_pages = ATOMIC_INIT(0);
52 52
@@ -89,8 +89,13 @@ static unsigned int zswap_max_pool_percent = 20;
89module_param_named(max_pool_percent, 89module_param_named(max_pool_percent,
90 zswap_max_pool_percent, uint, 0644); 90 zswap_max_pool_percent, uint, 0644);
91 91
92/* zbud_pool is shared by all of zswap backend */ 92/* Compressed storage to use */
93static struct zbud_pool *zswap_pool; 93#define ZSWAP_ZPOOL_DEFAULT "zbud"
94static char *zswap_zpool_type = ZSWAP_ZPOOL_DEFAULT;
95module_param_named(zpool, zswap_zpool_type, charp, 0444);
96
97/* zpool is shared by all of zswap backend */
98static struct zpool *zswap_pool;
94 99
95/********************************* 100/*********************************
96* compression functions 101* compression functions
@@ -168,7 +173,7 @@ static void zswap_comp_exit(void)
168 * be held while changing the refcount. Since the lock must 173 * be held while changing the refcount. Since the lock must
169 * be held, there is no reason to also make refcount atomic. 174 * be held, there is no reason to also make refcount atomic.
170 * offset - the swap offset for the entry. Index into the red-black tree. 175 * offset - the swap offset for the entry. Index into the red-black tree.
171 * handle - zbud allocation handle that stores the compressed page data 176 * handle - zpool allocation handle that stores the compressed page data
172 * length - the length in bytes of the compressed page data. Needed during 177 * length - the length in bytes of the compressed page data. Needed during
173 * decompression 178 * decompression
174 */ 179 */
@@ -284,15 +289,15 @@ static void zswap_rb_erase(struct rb_root *root, struct zswap_entry *entry)
284} 289}
285 290
286/* 291/*
287 * Carries out the common pattern of freeing and entry's zbud allocation, 292 * Carries out the common pattern of freeing and entry's zpool allocation,
288 * freeing the entry itself, and decrementing the number of stored pages. 293 * freeing the entry itself, and decrementing the number of stored pages.
289 */ 294 */
290static void zswap_free_entry(struct zswap_entry *entry) 295static void zswap_free_entry(struct zswap_entry *entry)
291{ 296{
292 zbud_free(zswap_pool, entry->handle); 297 zpool_free(zswap_pool, entry->handle);
293 zswap_entry_cache_free(entry); 298 zswap_entry_cache_free(entry);
294 atomic_dec(&zswap_stored_pages); 299 atomic_dec(&zswap_stored_pages);
295 zswap_pool_pages = zbud_get_pool_size(zswap_pool); 300 zswap_pool_total_size = zpool_get_total_size(zswap_pool);
296} 301}
297 302
298/* caller must hold the tree lock */ 303/* caller must hold the tree lock */
@@ -409,7 +414,7 @@ cleanup:
409static bool zswap_is_full(void) 414static bool zswap_is_full(void)
410{ 415{
411 return totalram_pages * zswap_max_pool_percent / 100 < 416 return totalram_pages * zswap_max_pool_percent / 100 <
412 zswap_pool_pages; 417 DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE);
413} 418}
414 419
415/********************************* 420/*********************************
@@ -525,7 +530,7 @@ static int zswap_get_swap_cache_page(swp_entry_t entry,
525 * the swap cache, the compressed version stored by zswap can be 530 * the swap cache, the compressed version stored by zswap can be
526 * freed. 531 * freed.
527 */ 532 */
528static int zswap_writeback_entry(struct zbud_pool *pool, unsigned long handle) 533static int zswap_writeback_entry(struct zpool *pool, unsigned long handle)
529{ 534{
530 struct zswap_header *zhdr; 535 struct zswap_header *zhdr;
531 swp_entry_t swpentry; 536 swp_entry_t swpentry;
@@ -541,9 +546,9 @@ static int zswap_writeback_entry(struct zbud_pool *pool, unsigned long handle)
541 }; 546 };
542 547
543 /* extract swpentry from data */ 548 /* extract swpentry from data */
544 zhdr = zbud_map(pool, handle); 549 zhdr = zpool_map_handle(pool, handle, ZPOOL_MM_RO);
545 swpentry = zhdr->swpentry; /* here */ 550 swpentry = zhdr->swpentry; /* here */
546 zbud_unmap(pool, handle); 551 zpool_unmap_handle(pool, handle);
547 tree = zswap_trees[swp_type(swpentry)]; 552 tree = zswap_trees[swp_type(swpentry)];
548 offset = swp_offset(swpentry); 553 offset = swp_offset(swpentry);
549 554
@@ -573,13 +578,13 @@ static int zswap_writeback_entry(struct zbud_pool *pool, unsigned long handle)
573 case ZSWAP_SWAPCACHE_NEW: /* page is locked */ 578 case ZSWAP_SWAPCACHE_NEW: /* page is locked */
574 /* decompress */ 579 /* decompress */
575 dlen = PAGE_SIZE; 580 dlen = PAGE_SIZE;
576 src = (u8 *)zbud_map(zswap_pool, entry->handle) + 581 src = (u8 *)zpool_map_handle(zswap_pool, entry->handle,
577 sizeof(struct zswap_header); 582 ZPOOL_MM_RO) + sizeof(struct zswap_header);
578 dst = kmap_atomic(page); 583 dst = kmap_atomic(page);
579 ret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src, 584 ret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src,
580 entry->length, dst, &dlen); 585 entry->length, dst, &dlen);
581 kunmap_atomic(dst); 586 kunmap_atomic(dst);
582 zbud_unmap(zswap_pool, entry->handle); 587 zpool_unmap_handle(zswap_pool, entry->handle);
583 BUG_ON(ret); 588 BUG_ON(ret);
584 BUG_ON(dlen != PAGE_SIZE); 589 BUG_ON(dlen != PAGE_SIZE);
585 590
@@ -652,7 +657,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
652 /* reclaim space if needed */ 657 /* reclaim space if needed */
653 if (zswap_is_full()) { 658 if (zswap_is_full()) {
654 zswap_pool_limit_hit++; 659 zswap_pool_limit_hit++;
655 if (zbud_reclaim_page(zswap_pool, 8)) { 660 if (zpool_shrink(zswap_pool, 1, NULL)) {
656 zswap_reject_reclaim_fail++; 661 zswap_reject_reclaim_fail++;
657 ret = -ENOMEM; 662 ret = -ENOMEM;
658 goto reject; 663 goto reject;
@@ -679,7 +684,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
679 684
680 /* store */ 685 /* store */
681 len = dlen + sizeof(struct zswap_header); 686 len = dlen + sizeof(struct zswap_header);
682 ret = zbud_alloc(zswap_pool, len, __GFP_NORETRY | __GFP_NOWARN, 687 ret = zpool_malloc(zswap_pool, len, __GFP_NORETRY | __GFP_NOWARN,
683 &handle); 688 &handle);
684 if (ret == -ENOSPC) { 689 if (ret == -ENOSPC) {
685 zswap_reject_compress_poor++; 690 zswap_reject_compress_poor++;
@@ -689,11 +694,11 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
689 zswap_reject_alloc_fail++; 694 zswap_reject_alloc_fail++;
690 goto freepage; 695 goto freepage;
691 } 696 }
692 zhdr = zbud_map(zswap_pool, handle); 697 zhdr = zpool_map_handle(zswap_pool, handle, ZPOOL_MM_RW);
693 zhdr->swpentry = swp_entry(type, offset); 698 zhdr->swpentry = swp_entry(type, offset);
694 buf = (u8 *)(zhdr + 1); 699 buf = (u8 *)(zhdr + 1);
695 memcpy(buf, dst, dlen); 700 memcpy(buf, dst, dlen);
696 zbud_unmap(zswap_pool, handle); 701 zpool_unmap_handle(zswap_pool, handle);
697 put_cpu_var(zswap_dstmem); 702 put_cpu_var(zswap_dstmem);
698 703
699 /* populate entry */ 704 /* populate entry */
@@ -716,7 +721,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
716 721
717 /* update stats */ 722 /* update stats */
718 atomic_inc(&zswap_stored_pages); 723 atomic_inc(&zswap_stored_pages);
719 zswap_pool_pages = zbud_get_pool_size(zswap_pool); 724 zswap_pool_total_size = zpool_get_total_size(zswap_pool);
720 725
721 return 0; 726 return 0;
722 727
@@ -752,13 +757,13 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset,
752 757
753 /* decompress */ 758 /* decompress */
754 dlen = PAGE_SIZE; 759 dlen = PAGE_SIZE;
755 src = (u8 *)zbud_map(zswap_pool, entry->handle) + 760 src = (u8 *)zpool_map_handle(zswap_pool, entry->handle,
756 sizeof(struct zswap_header); 761 ZPOOL_MM_RO) + sizeof(struct zswap_header);
757 dst = kmap_atomic(page); 762 dst = kmap_atomic(page);
758 ret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src, entry->length, 763 ret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src, entry->length,
759 dst, &dlen); 764 dst, &dlen);
760 kunmap_atomic(dst); 765 kunmap_atomic(dst);
761 zbud_unmap(zswap_pool, entry->handle); 766 zpool_unmap_handle(zswap_pool, entry->handle);
762 BUG_ON(ret); 767 BUG_ON(ret);
763 768
764 spin_lock(&tree->lock); 769 spin_lock(&tree->lock);
@@ -811,7 +816,7 @@ static void zswap_frontswap_invalidate_area(unsigned type)
811 zswap_trees[type] = NULL; 816 zswap_trees[type] = NULL;
812} 817}
813 818
814static struct zbud_ops zswap_zbud_ops = { 819static struct zpool_ops zswap_zpool_ops = {
815 .evict = zswap_writeback_entry 820 .evict = zswap_writeback_entry
816}; 821};
817 822
@@ -869,8 +874,8 @@ static int __init zswap_debugfs_init(void)
869 zswap_debugfs_root, &zswap_written_back_pages); 874 zswap_debugfs_root, &zswap_written_back_pages);
870 debugfs_create_u64("duplicate_entry", S_IRUGO, 875 debugfs_create_u64("duplicate_entry", S_IRUGO,
871 zswap_debugfs_root, &zswap_duplicate_entry); 876 zswap_debugfs_root, &zswap_duplicate_entry);
872 debugfs_create_u64("pool_pages", S_IRUGO, 877 debugfs_create_u64("pool_total_size", S_IRUGO,
873 zswap_debugfs_root, &zswap_pool_pages); 878 zswap_debugfs_root, &zswap_pool_total_size);
874 debugfs_create_atomic_t("stored_pages", S_IRUGO, 879 debugfs_create_atomic_t("stored_pages", S_IRUGO,
875 zswap_debugfs_root, &zswap_stored_pages); 880 zswap_debugfs_root, &zswap_stored_pages);
876 881
@@ -895,16 +900,26 @@ static void __exit zswap_debugfs_exit(void) { }
895**********************************/ 900**********************************/
896static int __init init_zswap(void) 901static int __init init_zswap(void)
897{ 902{
903 gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN;
904
898 if (!zswap_enabled) 905 if (!zswap_enabled)
899 return 0; 906 return 0;
900 907
901 pr_info("loading zswap\n"); 908 pr_info("loading zswap\n");
902 909
903 zswap_pool = zbud_create_pool(GFP_KERNEL, &zswap_zbud_ops); 910 zswap_pool = zpool_create_pool(zswap_zpool_type, gfp, &zswap_zpool_ops);
911 if (!zswap_pool && strcmp(zswap_zpool_type, ZSWAP_ZPOOL_DEFAULT)) {
912 pr_info("%s zpool not available\n", zswap_zpool_type);
913 zswap_zpool_type = ZSWAP_ZPOOL_DEFAULT;
914 zswap_pool = zpool_create_pool(zswap_zpool_type, gfp,
915 &zswap_zpool_ops);
916 }
904 if (!zswap_pool) { 917 if (!zswap_pool) {
905 pr_err("zbud pool creation failed\n"); 918 pr_err("%s zpool not available\n", zswap_zpool_type);
919 pr_err("zpool creation failed\n");
906 goto error; 920 goto error;
907 } 921 }
922 pr_info("using %s pool\n", zswap_zpool_type);
908 923
909 if (zswap_entry_cache_create()) { 924 if (zswap_entry_cache_create()) {
910 pr_err("entry cache creation failed\n"); 925 pr_err("entry cache creation failed\n");
@@ -928,7 +943,7 @@ pcpufail:
928compfail: 943compfail:
929 zswap_entry_cache_destory(); 944 zswap_entry_cache_destory();
930cachefail: 945cachefail:
931 zbud_destroy_pool(zswap_pool); 946 zpool_destroy_pool(zswap_pool);
932error: 947error:
933 return -ENOMEM; 948 return -ENOMEM;
934} 949}
diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index 022d18ab27a6..52c43f904220 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -188,7 +188,7 @@ static bool batadv_frag_insert_packet(struct batadv_orig_node *orig_node,
188 188
189 /* Reached the end of the list, so insert after 'frag_entry_last'. */ 189 /* Reached the end of the list, so insert after 'frag_entry_last'. */
190 if (likely(frag_entry_last)) { 190 if (likely(frag_entry_last)) {
191 hlist_add_after(&frag_entry_last->list, &frag_entry_new->list); 191 hlist_add_behind(&frag_entry_last->list, &frag_entry_new->list);
192 chain->size += skb->len - hdr_size; 192 chain->size += skb->len - hdr_size;
193 chain->timestamp = jiffies; 193 chain->timestamp = jiffies;
194 ret = true; 194 ret = true;
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index b4845f4b2bb4..7751c92c8c57 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1174,7 +1174,7 @@ static void br_multicast_add_router(struct net_bridge *br,
1174 } 1174 }
1175 1175
1176 if (slot) 1176 if (slot)
1177 hlist_add_after_rcu(slot, &port->rlist); 1177 hlist_add_behind_rcu(&port->rlist, slot);
1178 else 1178 else
1179 hlist_add_head_rcu(&port->rlist, &br->router_list); 1179 hlist_add_head_rcu(&port->rlist, &br->router_list);
1180} 1180}
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 5afeb5aa4c7c..e9cb2588e416 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -940,7 +940,7 @@ static void insert_leaf_info(struct hlist_head *head, struct leaf_info *new)
940 last = li; 940 last = li;
941 } 941 }
942 if (last) 942 if (last)
943 hlist_add_after_rcu(&last->hlist, &new->hlist); 943 hlist_add_behind_rcu(&new->hlist, &last->hlist);
944 else 944 else
945 hlist_add_before_rcu(&new->hlist, &li->hlist); 945 hlist_add_before_rcu(&new->hlist, &li->hlist);
946 } 946 }
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index 731e1e1722d9..fd0dc47f471d 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -277,7 +277,7 @@ static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace)
277 last = p; 277 last = p;
278 } 278 }
279 if (last) 279 if (last)
280 hlist_add_after_rcu(&last->list, &newp->list); 280 hlist_add_behind_rcu(&newp->list, &last->list);
281 else 281 else
282 hlist_add_head_rcu(&newp->list, &ip6addrlbl_table.head); 282 hlist_add_head_rcu(&newp->list, &ip6addrlbl_table.head);
283out: 283out:
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 0525d78ba328..beeed602aeb3 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -389,7 +389,7 @@ redo:
389 if (h != h0) 389 if (h != h0)
390 continue; 390 continue;
391 hlist_del(&pol->bydst); 391 hlist_del(&pol->bydst);
392 hlist_add_after(entry0, &pol->bydst); 392 hlist_add_behind(&pol->bydst, entry0);
393 } 393 }
394 entry0 = &pol->bydst; 394 entry0 = &pol->bydst;
395 } 395 }
@@ -654,7 +654,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
654 break; 654 break;
655 } 655 }
656 if (newpos) 656 if (newpos)
657 hlist_add_after(newpos, &policy->bydst); 657 hlist_add_behind(&policy->bydst, newpos);
658 else 658 else
659 hlist_add_head(&policy->bydst, chain); 659 hlist_add_head(&policy->bydst, chain);
660 xfrm_pol_hold(policy); 660 xfrm_pol_hold(policy);
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 182be0f12407..31a731e06f50 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -309,9 +309,12 @@ our $Operators = qr{
309our $c90_Keywords = qr{do|for|while|if|else|return|goto|continue|switch|default|case|break}x; 309our $c90_Keywords = qr{do|for|while|if|else|return|goto|continue|switch|default|case|break}x;
310 310
311our $NonptrType; 311our $NonptrType;
312our $NonptrTypeMisordered;
312our $NonptrTypeWithAttr; 313our $NonptrTypeWithAttr;
313our $Type; 314our $Type;
315our $TypeMisordered;
314our $Declare; 316our $Declare;
317our $DeclareMisordered;
315 318
316our $NON_ASCII_UTF8 = qr{ 319our $NON_ASCII_UTF8 = qr{
317 [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte 320 [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte
@@ -353,16 +356,36 @@ our $signature_tags = qr{(?xi:
353 Cc: 356 Cc:
354)}; 357)};
355 358
359our @typeListMisordered = (
360 qr{char\s+(?:un)?signed},
361 qr{int\s+(?:(?:un)?signed\s+)?short\s},
362 qr{int\s+short(?:\s+(?:un)?signed)},
363 qr{short\s+int(?:\s+(?:un)?signed)},
364 qr{(?:un)?signed\s+int\s+short},
365 qr{short\s+(?:un)?signed},
366 qr{long\s+int\s+(?:un)?signed},
367 qr{int\s+long\s+(?:un)?signed},
368 qr{long\s+(?:un)?signed\s+int},
369 qr{int\s+(?:un)?signed\s+long},
370 qr{int\s+(?:un)?signed},
371 qr{int\s+long\s+long\s+(?:un)?signed},
372 qr{long\s+long\s+int\s+(?:un)?signed},
373 qr{long\s+long\s+(?:un)?signed\s+int},
374 qr{long\s+long\s+(?:un)?signed},
375 qr{long\s+(?:un)?signed},
376);
377
356our @typeList = ( 378our @typeList = (
357 qr{void}, 379 qr{void},
358 qr{(?:unsigned\s+)?char}, 380 qr{(?:(?:un)?signed\s+)?char},
359 qr{(?:unsigned\s+)?short}, 381 qr{(?:(?:un)?signed\s+)?short\s+int},
360 qr{(?:unsigned\s+)?int}, 382 qr{(?:(?:un)?signed\s+)?short},
361 qr{(?:unsigned\s+)?long}, 383 qr{(?:(?:un)?signed\s+)?int},
362 qr{(?:unsigned\s+)?long\s+int}, 384 qr{(?:(?:un)?signed\s+)?long\s+int},
363 qr{(?:unsigned\s+)?long\s+long}, 385 qr{(?:(?:un)?signed\s+)?long\s+long\s+int},
364 qr{(?:unsigned\s+)?long\s+long\s+int}, 386 qr{(?:(?:un)?signed\s+)?long\s+long},
365 qr{unsigned}, 387 qr{(?:(?:un)?signed\s+)?long},
388 qr{(?:un)?signed},
366 qr{float}, 389 qr{float},
367 qr{double}, 390 qr{double},
368 qr{bool}, 391 qr{bool},
@@ -372,6 +395,7 @@ our @typeList = (
372 qr{${Ident}_t}, 395 qr{${Ident}_t},
373 qr{${Ident}_handler}, 396 qr{${Ident}_handler},
374 qr{${Ident}_handler_fn}, 397 qr{${Ident}_handler_fn},
398 @typeListMisordered,
375); 399);
376our @typeListWithAttr = ( 400our @typeListWithAttr = (
377 @typeList, 401 @typeList,
@@ -399,11 +423,6 @@ foreach my $entry (@mode_permission_funcs) {
399 $mode_perms_search .= $entry->[0]; 423 $mode_perms_search .= $entry->[0];
400} 424}
401 425
402our $declaration_macros = qr{(?x:
403 (?:$Storage\s+)?(?:DECLARE|DEFINE)_[A-Z]+\s*\(|
404 (?:$Storage\s+)?LIST_HEAD\s*\(
405)};
406
407our $allowed_asm_includes = qr{(?x: 426our $allowed_asm_includes = qr{(?x:
408 irq| 427 irq|
409 memory 428 memory
@@ -413,6 +432,7 @@ our $allowed_asm_includes = qr{(?x:
413sub build_types { 432sub build_types {
414 my $mods = "(?x: \n" . join("|\n ", @modifierList) . "\n)"; 433 my $mods = "(?x: \n" . join("|\n ", @modifierList) . "\n)";
415 my $all = "(?x: \n" . join("|\n ", @typeList) . "\n)"; 434 my $all = "(?x: \n" . join("|\n ", @typeList) . "\n)";
435 my $Misordered = "(?x: \n" . join("|\n ", @typeListMisordered) . "\n)";
416 my $allWithAttr = "(?x: \n" . join("|\n ", @typeListWithAttr) . "\n)"; 436 my $allWithAttr = "(?x: \n" . join("|\n ", @typeListWithAttr) . "\n)";
417 $Modifier = qr{(?:$Attribute|$Sparse|$mods)}; 437 $Modifier = qr{(?:$Attribute|$Sparse|$mods)};
418 $NonptrType = qr{ 438 $NonptrType = qr{
@@ -424,6 +444,13 @@ sub build_types {
424 ) 444 )
425 (?:\s+$Modifier|\s+const)* 445 (?:\s+$Modifier|\s+const)*
426 }x; 446 }x;
447 $NonptrTypeMisordered = qr{
448 (?:$Modifier\s+|const\s+)*
449 (?:
450 (?:${Misordered}\b)
451 )
452 (?:\s+$Modifier|\s+const)*
453 }x;
427 $NonptrTypeWithAttr = qr{ 454 $NonptrTypeWithAttr = qr{
428 (?:$Modifier\s+|const\s+)* 455 (?:$Modifier\s+|const\s+)*
429 (?: 456 (?:
@@ -435,10 +462,16 @@ sub build_types {
435 }x; 462 }x;
436 $Type = qr{ 463 $Type = qr{
437 $NonptrType 464 $NonptrType
438 (?:(?:\s|\*|\[\])+\s*const|(?:\s|\*|\[\])+|(?:\s*\[\s*\])+)? 465 (?:(?:\s|\*|\[\])+\s*const|(?:\s|\*\s*(?:const\s*)?|\[\])+|(?:\s*\[\s*\])+)?
466 (?:\s+$Inline|\s+$Modifier)*
467 }x;
468 $TypeMisordered = qr{
469 $NonptrTypeMisordered
470 (?:(?:\s|\*|\[\])+\s*const|(?:\s|\*\s*(?:const\s*)?|\[\])+|(?:\s*\[\s*\])+)?
439 (?:\s+$Inline|\s+$Modifier)* 471 (?:\s+$Inline|\s+$Modifier)*
440 }x; 472 }x;
441 $Declare = qr{(?:$Storage\s+(?:$Inline\s+)?)?$Type}; 473 $Declare = qr{(?:$Storage\s+(?:$Inline\s+)?)?$Type};
474 $DeclareMisordered = qr{(?:$Storage\s+(?:$Inline\s+)?)?$TypeMisordered};
442} 475}
443build_types(); 476build_types();
444 477
@@ -452,6 +485,12 @@ our $balanced_parens = qr/(\((?:[^\(\)]++|(?-1))*\))/;
452our $LvalOrFunc = qr{((?:[\&\*]\s*)?$Lval)\s*($balanced_parens{0,1})\s*}; 485our $LvalOrFunc = qr{((?:[\&\*]\s*)?$Lval)\s*($balanced_parens{0,1})\s*};
453our $FuncArg = qr{$Typecast{0,1}($LvalOrFunc|$Constant)}; 486our $FuncArg = qr{$Typecast{0,1}($LvalOrFunc|$Constant)};
454 487
488our $declaration_macros = qr{(?x:
489 (?:$Storage\s+)?(?:[A-Z_][A-Z0-9]*_){0,2}(?:DEFINE|DECLARE)(?:_[A-Z0-9]+){1,2}\s*\(|
490 (?:$Storage\s+)?LIST_HEAD\s*\(|
491 (?:$Storage\s+)?${Type}\s+uninitialized_var\s*\(
492)};
493
455sub deparenthesize { 494sub deparenthesize {
456 my ($string) = @_; 495 my ($string) = @_;
457 return "" if (!defined($string)); 496 return "" if (!defined($string));
@@ -550,11 +589,43 @@ sub seed_camelcase_includes {
550 } 589 }
551} 590}
552 591
592sub git_commit_info {
593 my ($commit, $id, $desc) = @_;
594
595 return ($id, $desc) if ((which("git") eq "") || !(-e ".git"));
596
597 my $output = `git log --no-color --format='%H %s' -1 $commit 2>&1`;
598 $output =~ s/^\s*//gm;
599 my @lines = split("\n", $output);
600
601 if ($lines[0] =~ /^error: short SHA1 $commit is ambiguous\./) {
602# Maybe one day convert this block of bash into something that returns
603# all matching commit ids, but it's very slow...
604#
605# echo "checking commits $1..."
606# git rev-list --remotes | grep -i "^$1" |
607# while read line ; do
608# git log --format='%H %s' -1 $line |
609# echo "commit $(cut -c 1-12,41-)"
610# done
611 } elsif ($lines[0] =~ /^fatal: ambiguous argument '$commit': unknown revision or path not in the working tree\./) {
612 } else {
613 $id = substr($lines[0], 0, 12);
614 $desc = substr($lines[0], 41);
615 }
616
617 return ($id, $desc);
618}
619
553$chk_signoff = 0 if ($file); 620$chk_signoff = 0 if ($file);
554 621
555my @rawlines = (); 622my @rawlines = ();
556my @lines = (); 623my @lines = ();
557my @fixed = (); 624my @fixed = ();
625my @fixed_inserted = ();
626my @fixed_deleted = ();
627my $fixlinenr = -1;
628
558my $vname; 629my $vname;
559for my $filename (@ARGV) { 630for my $filename (@ARGV) {
560 my $FILE; 631 my $FILE;
@@ -583,6 +654,9 @@ for my $filename (@ARGV) {
583 @rawlines = (); 654 @rawlines = ();
584 @lines = (); 655 @lines = ();
585 @fixed = (); 656 @fixed = ();
657 @fixed_inserted = ();
658 @fixed_deleted = ();
659 $fixlinenr = -1;
586} 660}
587 661
588exit($exit); 662exit($exit);
@@ -674,6 +748,18 @@ sub format_email {
674 return $formatted_email; 748 return $formatted_email;
675} 749}
676 750
751sub which {
752 my ($bin) = @_;
753
754 foreach my $path (split(/:/, $ENV{PATH})) {
755 if (-e "$path/$bin") {
756 return "$path/$bin";
757 }
758 }
759
760 return "";
761}
762
677sub which_conf { 763sub which_conf {
678 my ($conf) = @_; 764 my ($conf) = @_;
679 765
@@ -1483,6 +1569,90 @@ sub report_dump {
1483 our @report; 1569 our @report;
1484} 1570}
1485 1571
1572sub fixup_current_range {
1573 my ($lineRef, $offset, $length) = @_;
1574
1575 if ($$lineRef =~ /^\@\@ -\d+,\d+ \+(\d+),(\d+) \@\@/) {
1576 my $o = $1;
1577 my $l = $2;
1578 my $no = $o + $offset;
1579 my $nl = $l + $length;
1580 $$lineRef =~ s/\+$o,$l \@\@/\+$no,$nl \@\@/;
1581 }
1582}
1583
1584sub fix_inserted_deleted_lines {
1585 my ($linesRef, $insertedRef, $deletedRef) = @_;
1586
1587 my $range_last_linenr = 0;
1588 my $delta_offset = 0;
1589
1590 my $old_linenr = 0;
1591 my $new_linenr = 0;
1592
1593 my $next_insert = 0;
1594 my $next_delete = 0;
1595
1596 my @lines = ();
1597
1598 my $inserted = @{$insertedRef}[$next_insert++];
1599 my $deleted = @{$deletedRef}[$next_delete++];
1600
1601 foreach my $old_line (@{$linesRef}) {
1602 my $save_line = 1;
1603 my $line = $old_line; #don't modify the array
1604 if ($line =~ /^(?:\+\+\+\|\-\-\-)\s+\S+/) { #new filename
1605 $delta_offset = 0;
1606 } elsif ($line =~ /^\@\@ -\d+,\d+ \+\d+,\d+ \@\@/) { #new hunk
1607 $range_last_linenr = $new_linenr;
1608 fixup_current_range(\$line, $delta_offset, 0);
1609 }
1610
1611 while (defined($deleted) && ${$deleted}{'LINENR'} == $old_linenr) {
1612 $deleted = @{$deletedRef}[$next_delete++];
1613 $save_line = 0;
1614 fixup_current_range(\$lines[$range_last_linenr], $delta_offset--, -1);
1615 }
1616
1617 while (defined($inserted) && ${$inserted}{'LINENR'} == $old_linenr) {
1618 push(@lines, ${$inserted}{'LINE'});
1619 $inserted = @{$insertedRef}[$next_insert++];
1620 $new_linenr++;
1621 fixup_current_range(\$lines[$range_last_linenr], $delta_offset++, 1);
1622 }
1623
1624 if ($save_line) {
1625 push(@lines, $line);
1626 $new_linenr++;
1627 }
1628
1629 $old_linenr++;
1630 }
1631
1632 return @lines;
1633}
1634
1635sub fix_insert_line {
1636 my ($linenr, $line) = @_;
1637
1638 my $inserted = {
1639 LINENR => $linenr,
1640 LINE => $line,
1641 };
1642 push(@fixed_inserted, $inserted);
1643}
1644
1645sub fix_delete_line {
1646 my ($linenr, $line) = @_;
1647
1648 my $deleted = {
1649 LINENR => $linenr,
1650 LINE => $line,
1651 };
1652
1653 push(@fixed_deleted, $deleted);
1654}
1655
1486sub ERROR { 1656sub ERROR {
1487 my ($type, $msg) = @_; 1657 my ($type, $msg) = @_;
1488 1658
@@ -1637,11 +1807,13 @@ sub process {
1637 my $signoff = 0; 1807 my $signoff = 0;
1638 my $is_patch = 0; 1808 my $is_patch = 0;
1639 1809
1640 my $in_header_lines = 1; 1810 my $in_header_lines = $file ? 0 : 1;
1641 my $in_commit_log = 0; #Scanning lines before patch 1811 my $in_commit_log = 0; #Scanning lines before patch
1642 1812 my $reported_maintainer_file = 0;
1643 my $non_utf8_charset = 0; 1813 my $non_utf8_charset = 0;
1644 1814
1815 my $last_blank_line = 0;
1816
1645 our @report = (); 1817 our @report = ();
1646 our $cnt_lines = 0; 1818 our $cnt_lines = 0;
1647 our $cnt_error = 0; 1819 our $cnt_error = 0;
@@ -1759,8 +1931,10 @@ sub process {
1759 1931
1760 $realcnt = 0; 1932 $realcnt = 0;
1761 $linenr = 0; 1933 $linenr = 0;
1934 $fixlinenr = -1;
1762 foreach my $line (@lines) { 1935 foreach my $line (@lines) {
1763 $linenr++; 1936 $linenr++;
1937 $fixlinenr++;
1764 my $sline = $line; #copy of $line 1938 my $sline = $line; #copy of $line
1765 $sline =~ s/$;/ /g; #with comments as spaces 1939 $sline =~ s/$;/ /g; #with comments as spaces
1766 1940
@@ -1891,7 +2065,7 @@ sub process {
1891 if (WARN("BAD_SIGN_OFF", 2065 if (WARN("BAD_SIGN_OFF",
1892 "Do not use whitespace before $ucfirst_sign_off\n" . $herecurr) && 2066 "Do not use whitespace before $ucfirst_sign_off\n" . $herecurr) &&
1893 $fix) { 2067 $fix) {
1894 $fixed[$linenr - 1] = 2068 $fixed[$fixlinenr] =
1895 "$ucfirst_sign_off $email"; 2069 "$ucfirst_sign_off $email";
1896 } 2070 }
1897 } 2071 }
@@ -1899,7 +2073,7 @@ sub process {
1899 if (WARN("BAD_SIGN_OFF", 2073 if (WARN("BAD_SIGN_OFF",
1900 "'$ucfirst_sign_off' is the preferred signature form\n" . $herecurr) && 2074 "'$ucfirst_sign_off' is the preferred signature form\n" . $herecurr) &&
1901 $fix) { 2075 $fix) {
1902 $fixed[$linenr - 1] = 2076 $fixed[$fixlinenr] =
1903 "$ucfirst_sign_off $email"; 2077 "$ucfirst_sign_off $email";
1904 } 2078 }
1905 2079
@@ -1908,7 +2082,7 @@ sub process {
1908 if (WARN("BAD_SIGN_OFF", 2082 if (WARN("BAD_SIGN_OFF",
1909 "Use a single space after $ucfirst_sign_off\n" . $herecurr) && 2083 "Use a single space after $ucfirst_sign_off\n" . $herecurr) &&
1910 $fix) { 2084 $fix) {
1911 $fixed[$linenr - 1] = 2085 $fixed[$fixlinenr] =
1912 "$ucfirst_sign_off $email"; 2086 "$ucfirst_sign_off $email";
1913 } 2087 }
1914 } 2088 }
@@ -1956,6 +2130,31 @@ sub process {
1956 "Remove Gerrit Change-Id's before submitting upstream.\n" . $herecurr); 2130 "Remove Gerrit Change-Id's before submitting upstream.\n" . $herecurr);
1957 } 2131 }
1958 2132
2133# Check for improperly formed commit descriptions
2134 if ($in_commit_log &&
2135 $line =~ /\bcommit\s+[0-9a-f]{5,}/i &&
2136 $line !~ /\b[Cc]ommit [0-9a-f]{12,16} \("/) {
2137 $line =~ /\b(c)ommit\s+([0-9a-f]{5,})/i;
2138 my $init_char = $1;
2139 my $orig_commit = lc($2);
2140 my $id = '01234567890ab';
2141 my $desc = 'commit description';
2142 ($id, $desc) = git_commit_info($orig_commit, $id, $desc);
2143 ERROR("GIT_COMMIT_ID",
2144 "Please use 12 to 16 chars for the git commit ID like: '${init_char}ommit $id (\"$desc\")'\n" . $herecurr);
2145 }
2146
2147# Check for added, moved or deleted files
2148 if (!$reported_maintainer_file && !$in_commit_log &&
2149 ($line =~ /^(?:new|deleted) file mode\s*\d+\s*$/ ||
2150 $line =~ /^rename (?:from|to) [\w\/\.\-]+\s*$/ ||
2151 ($line =~ /\{\s*([\w\/\.\-]*)\s*\=\>\s*([\w\/\.\-]*)\s*\}/ &&
2152 (defined($1) || defined($2))))) {
2153 $reported_maintainer_file = 1;
2154 WARN("FILE_PATH_CHANGES",
2155 "added, moved or deleted file(s), does MAINTAINERS need updating?\n" . $herecurr);
2156 }
2157
1959# Check for wrappage within a valid hunk of the file 2158# Check for wrappage within a valid hunk of the file
1960 if ($realcnt != 0 && $line !~ m{^(?:\+|-| |\\ No newline|$)}) { 2159 if ($realcnt != 0 && $line !~ m{^(?:\+|-| |\\ No newline|$)}) {
1961 ERROR("CORRUPTED_PATCH", 2160 ERROR("CORRUPTED_PATCH",
@@ -1993,7 +2192,8 @@ sub process {
1993# Check if it's the start of a commit log 2192# Check if it's the start of a commit log
1994# (not a header line and we haven't seen the patch filename) 2193# (not a header line and we haven't seen the patch filename)
1995 if ($in_header_lines && $realfile =~ /^$/ && 2194 if ($in_header_lines && $realfile =~ /^$/ &&
1996 $rawline !~ /^(commit\b|from\b|[\w-]+:).+$/i) { 2195 !($rawline =~ /^\s+\S/ ||
2196 $rawline =~ /^(commit\b|from\b|[\w-]+:).*$/i)) {
1997 $in_header_lines = 0; 2197 $in_header_lines = 0;
1998 $in_commit_log = 1; 2198 $in_commit_log = 1;
1999 } 2199 }
@@ -2021,14 +2221,14 @@ sub process {
2021 if (ERROR("DOS_LINE_ENDINGS", 2221 if (ERROR("DOS_LINE_ENDINGS",
2022 "DOS line endings\n" . $herevet) && 2222 "DOS line endings\n" . $herevet) &&
2023 $fix) { 2223 $fix) {
2024 $fixed[$linenr - 1] =~ s/[\s\015]+$//; 2224 $fixed[$fixlinenr] =~ s/[\s\015]+$//;
2025 } 2225 }
2026 } elsif ($rawline =~ /^\+.*\S\s+$/ || $rawline =~ /^\+\s+$/) { 2226 } elsif ($rawline =~ /^\+.*\S\s+$/ || $rawline =~ /^\+\s+$/) {
2027 my $herevet = "$here\n" . cat_vet($rawline) . "\n"; 2227 my $herevet = "$here\n" . cat_vet($rawline) . "\n";
2028 if (ERROR("TRAILING_WHITESPACE", 2228 if (ERROR("TRAILING_WHITESPACE",
2029 "trailing whitespace\n" . $herevet) && 2229 "trailing whitespace\n" . $herevet) &&
2030 $fix) { 2230 $fix) {
2031 $fixed[$linenr - 1] =~ s/\s+$//; 2231 $fixed[$fixlinenr] =~ s/\s+$//;
2032 } 2232 }
2033 2233
2034 $rpt_cleaners = 1; 2234 $rpt_cleaners = 1;
@@ -2049,7 +2249,7 @@ sub process {
2049# Only applies when adding the entry originally, after that we do not have 2249# Only applies when adding the entry originally, after that we do not have
2050# sufficient context to determine whether it is indeed long enough. 2250# sufficient context to determine whether it is indeed long enough.
2051 if ($realfile =~ /Kconfig/ && 2251 if ($realfile =~ /Kconfig/ &&
2052 $line =~ /.\s*config\s+/) { 2252 $line =~ /^\+\s*config\s+/) {
2053 my $length = 0; 2253 my $length = 0;
2054 my $cnt = $realcnt; 2254 my $cnt = $realcnt;
2055 my $ln = $linenr + 1; 2255 my $ln = $linenr + 1;
@@ -2062,10 +2262,11 @@ sub process {
2062 $is_end = $lines[$ln - 1] =~ /^\+/; 2262 $is_end = $lines[$ln - 1] =~ /^\+/;
2063 2263
2064 next if ($f =~ /^-/); 2264 next if ($f =~ /^-/);
2265 last if (!$file && $f =~ /^\@\@/);
2065 2266
2066 if ($lines[$ln - 1] =~ /.\s*(?:bool|tristate)\s*\"/) { 2267 if ($lines[$ln - 1] =~ /^\+\s*(?:bool|tristate)\s*\"/) {
2067 $is_start = 1; 2268 $is_start = 1;
2068 } elsif ($lines[$ln - 1] =~ /.\s*(?:---)?help(?:---)?$/) { 2269 } elsif ($lines[$ln - 1] =~ /^\+\s*(?:---)?help(?:---)?$/) {
2069 $length = -1; 2270 $length = -1;
2070 } 2271 }
2071 2272
@@ -2161,12 +2362,18 @@ sub process {
2161 "quoted string split across lines\n" . $hereprev); 2362 "quoted string split across lines\n" . $hereprev);
2162 } 2363 }
2163 2364
2365# check for missing a space in a string concatination
2366 if ($prevrawline =~ /[^\\]\w"$/ && $rawline =~ /^\+[\t ]+"\w/) {
2367 WARN('MISSING_SPACE',
2368 "break quoted strings at a space character\n" . $hereprev);
2369 }
2370
2164# check for spaces before a quoted newline 2371# check for spaces before a quoted newline
2165 if ($rawline =~ /^.*\".*\s\\n/) { 2372 if ($rawline =~ /^.*\".*\s\\n/) {
2166 if (WARN("QUOTED_WHITESPACE_BEFORE_NEWLINE", 2373 if (WARN("QUOTED_WHITESPACE_BEFORE_NEWLINE",
2167 "unnecessary whitespace before a quoted newline\n" . $herecurr) && 2374 "unnecessary whitespace before a quoted newline\n" . $herecurr) &&
2168 $fix) { 2375 $fix) {
2169 $fixed[$linenr - 1] =~ s/^(\+.*\".*)\s+\\n/$1\\n/; 2376 $fixed[$fixlinenr] =~ s/^(\+.*\".*)\s+\\n/$1\\n/;
2170 } 2377 }
2171 2378
2172 } 2379 }
@@ -2203,7 +2410,7 @@ sub process {
2203 if (ERROR("CODE_INDENT", 2410 if (ERROR("CODE_INDENT",
2204 "code indent should use tabs where possible\n" . $herevet) && 2411 "code indent should use tabs where possible\n" . $herevet) &&
2205 $fix) { 2412 $fix) {
2206 $fixed[$linenr - 1] =~ s/^\+([ \t]+)/"\+" . tabify($1)/e; 2413 $fixed[$fixlinenr] =~ s/^\+([ \t]+)/"\+" . tabify($1)/e;
2207 } 2414 }
2208 } 2415 }
2209 2416
@@ -2213,9 +2420,9 @@ sub process {
2213 if (WARN("SPACE_BEFORE_TAB", 2420 if (WARN("SPACE_BEFORE_TAB",
2214 "please, no space before tabs\n" . $herevet) && 2421 "please, no space before tabs\n" . $herevet) &&
2215 $fix) { 2422 $fix) {
2216 while ($fixed[$linenr - 1] =~ 2423 while ($fixed[$fixlinenr] =~
2217 s/(^\+.*) {8,8}+\t/$1\t\t/) {} 2424 s/(^\+.*) {8,8}+\t/$1\t\t/) {}
2218 while ($fixed[$linenr - 1] =~ 2425 while ($fixed[$fixlinenr] =~
2219 s/(^\+.*) +\t/$1\t/) {} 2426 s/(^\+.*) +\t/$1\t/) {}
2220 } 2427 }
2221 } 2428 }
@@ -2249,19 +2456,19 @@ sub process {
2249 if (CHK("PARENTHESIS_ALIGNMENT", 2456 if (CHK("PARENTHESIS_ALIGNMENT",
2250 "Alignment should match open parenthesis\n" . $hereprev) && 2457 "Alignment should match open parenthesis\n" . $hereprev) &&
2251 $fix && $line =~ /^\+/) { 2458 $fix && $line =~ /^\+/) {
2252 $fixed[$linenr - 1] =~ 2459 $fixed[$fixlinenr] =~
2253 s/^\+[ \t]*/\+$goodtabindent/; 2460 s/^\+[ \t]*/\+$goodtabindent/;
2254 } 2461 }
2255 } 2462 }
2256 } 2463 }
2257 } 2464 }
2258 2465
2259 if ($line =~ /^\+.*\*[ \t]*\)[ \t]+(?!$Assignment|$Arithmetic)/) { 2466 if ($line =~ /^\+.*\(\s*$Type\s*\)[ \t]+(?!$Assignment|$Arithmetic|{)/) {
2260 if (CHK("SPACING", 2467 if (CHK("SPACING",
2261 "No space is necessary after a cast\n" . $hereprev) && 2468 "No space is necessary after a cast\n" . $herecurr) &&
2262 $fix) { 2469 $fix) {
2263 $fixed[$linenr - 1] =~ 2470 $fixed[$fixlinenr] =~
2264 s/^(\+.*\*[ \t]*\))[ \t]+/$1/; 2471 s/(\(\s*$Type\s*\))[ \t]+/$1/;
2265 } 2472 }
2266 } 2473 }
2267 2474
@@ -2291,10 +2498,44 @@ sub process {
2291 "networking block comments put the trailing */ on a separate line\n" . $herecurr); 2498 "networking block comments put the trailing */ on a separate line\n" . $herecurr);
2292 } 2499 }
2293 2500
2501# check for missing blank lines after struct/union declarations
2502# with exceptions for various attributes and macros
2503 if ($prevline =~ /^[\+ ]};?\s*$/ &&
2504 $line =~ /^\+/ &&
2505 !($line =~ /^\+\s*$/ ||
2506 $line =~ /^\+\s*EXPORT_SYMBOL/ ||
2507 $line =~ /^\+\s*MODULE_/i ||
2508 $line =~ /^\+\s*\#\s*(?:end|elif|else)/ ||
2509 $line =~ /^\+[a-z_]*init/ ||
2510 $line =~ /^\+\s*(?:static\s+)?[A-Z_]*ATTR/ ||
2511 $line =~ /^\+\s*DECLARE/ ||
2512 $line =~ /^\+\s*__setup/)) {
2513 if (CHK("LINE_SPACING",
2514 "Please use a blank line after function/struct/union/enum declarations\n" . $hereprev) &&
2515 $fix) {
2516 fix_insert_line($fixlinenr, "\+");
2517 }
2518 }
2519
2520# check for multiple consecutive blank lines
2521 if ($prevline =~ /^[\+ ]\s*$/ &&
2522 $line =~ /^\+\s*$/ &&
2523 $last_blank_line != ($linenr - 1)) {
2524 if (CHK("LINE_SPACING",
2525 "Please don't use multiple blank lines\n" . $hereprev) &&
2526 $fix) {
2527 fix_delete_line($fixlinenr, $rawline);
2528 }
2529
2530 $last_blank_line = $linenr;
2531 }
2532
2294# check for missing blank lines after declarations 2533# check for missing blank lines after declarations
2295 if ($sline =~ /^\+\s+\S/ && #Not at char 1 2534 if ($sline =~ /^\+\s+\S/ && #Not at char 1
2296 # actual declarations 2535 # actual declarations
2297 ($prevline =~ /^\+\s+$Declare\s*$Ident\s*[=,;:\[]/ || 2536 ($prevline =~ /^\+\s+$Declare\s*$Ident\s*[=,;:\[]/ ||
2537 # function pointer declarations
2538 $prevline =~ /^\+\s+$Declare\s*\(\s*\*\s*$Ident\s*\)\s*[=,;:\[\(]/ ||
2298 # foo bar; where foo is some local typedef or #define 2539 # foo bar; where foo is some local typedef or #define
2299 $prevline =~ /^\+\s+$Ident(?:\s+|\s*\*\s*)$Ident\s*[=,;\[]/ || 2540 $prevline =~ /^\+\s+$Ident(?:\s+|\s*\*\s*)$Ident\s*[=,;\[]/ ||
2300 # known declaration macros 2541 # known declaration macros
@@ -2307,6 +2548,8 @@ sub process {
2307 $prevline =~ /(?:\{\s*|\\)$/) && 2548 $prevline =~ /(?:\{\s*|\\)$/) &&
2308 # looks like a declaration 2549 # looks like a declaration
2309 !($sline =~ /^\+\s+$Declare\s*$Ident\s*[=,;:\[]/ || 2550 !($sline =~ /^\+\s+$Declare\s*$Ident\s*[=,;:\[]/ ||
2551 # function pointer declarations
2552 $sline =~ /^\+\s+$Declare\s*\(\s*\*\s*$Ident\s*\)\s*[=,;:\[\(]/ ||
2310 # foo bar; where foo is some local typedef or #define 2553 # foo bar; where foo is some local typedef or #define
2311 $sline =~ /^\+\s+$Ident(?:\s+|\s*\*\s*)$Ident\s*[=,;\[]/ || 2554 $sline =~ /^\+\s+$Ident(?:\s+|\s*\*\s*)$Ident\s*[=,;\[]/ ||
2312 # known declaration macros 2555 # known declaration macros
@@ -2321,8 +2564,11 @@ sub process {
2321 $sline =~ /^\+\s+\(?\s*(?:$Compare|$Assignment|$Operators)/) && 2564 $sline =~ /^\+\s+\(?\s*(?:$Compare|$Assignment|$Operators)/) &&
2322 # indentation of previous and current line are the same 2565 # indentation of previous and current line are the same
2323 (($prevline =~ /\+(\s+)\S/) && $sline =~ /^\+$1\S/)) { 2566 (($prevline =~ /\+(\s+)\S/) && $sline =~ /^\+$1\S/)) {
2324 WARN("SPACING", 2567 if (WARN("LINE_SPACING",
2325 "Missing a blank line after declarations\n" . $hereprev); 2568 "Missing a blank line after declarations\n" . $hereprev) &&
2569 $fix) {
2570 fix_insert_line($fixlinenr, "\+");
2571 }
2326 } 2572 }
2327 2573
2328# check for spaces at the beginning of a line. 2574# check for spaces at the beginning of a line.
@@ -2335,13 +2581,33 @@ sub process {
2335 if (WARN("LEADING_SPACE", 2581 if (WARN("LEADING_SPACE",
2336 "please, no spaces at the start of a line\n" . $herevet) && 2582 "please, no spaces at the start of a line\n" . $herevet) &&
2337 $fix) { 2583 $fix) {
2338 $fixed[$linenr - 1] =~ s/^\+([ \t]+)/"\+" . tabify($1)/e; 2584 $fixed[$fixlinenr] =~ s/^\+([ \t]+)/"\+" . tabify($1)/e;
2339 } 2585 }
2340 } 2586 }
2341 2587
2342# check we are in a valid C source file if not then ignore this hunk 2588# check we are in a valid C source file if not then ignore this hunk
2343 next if ($realfile !~ /\.(h|c)$/); 2589 next if ($realfile !~ /\.(h|c)$/);
2344 2590
2591# check indentation of any line with a bare else
2592# if the previous line is a break or return and is indented 1 tab more...
2593 if ($sline =~ /^\+([\t]+)(?:}[ \t]*)?else(?:[ \t]*{)?\s*$/) {
2594 my $tabs = length($1) + 1;
2595 if ($prevline =~ /^\+\t{$tabs,$tabs}(?:break|return)\b/) {
2596 WARN("UNNECESSARY_ELSE",
2597 "else is not generally useful after a break or return\n" . $hereprev);
2598 }
2599 }
2600
2601# check indentation of a line with a break;
2602# if the previous line is a goto or return and is indented the same # of tabs
2603 if ($sline =~ /^\+([\t]+)break\s*;\s*$/) {
2604 my $tabs = $1;
2605 if ($prevline =~ /^\+$tabs(?:goto|return)\b/) {
2606 WARN("UNNECESSARY_BREAK",
2607 "break is not useful after a goto or return\n" . $hereprev);
2608 }
2609 }
2610
2345# discourage the addition of CONFIG_EXPERIMENTAL in #if(def). 2611# discourage the addition of CONFIG_EXPERIMENTAL in #if(def).
2346 if ($line =~ /^\+\s*\#\s*if.*\bCONFIG_EXPERIMENTAL\b/) { 2612 if ($line =~ /^\+\s*\#\s*if.*\bCONFIG_EXPERIMENTAL\b/) {
2347 WARN("CONFIG_EXPERIMENTAL", 2613 WARN("CONFIG_EXPERIMENTAL",
@@ -2477,7 +2743,7 @@ sub process {
2477 2743
2478# if/while/etc brace do not go on next line, unless defining a do while loop, 2744# if/while/etc brace do not go on next line, unless defining a do while loop,
2479# or if that brace on the next line is for something else 2745# or if that brace on the next line is for something else
2480 if ($line =~ /(.*)\b((?:if|while|for|switch)\s*\(|do\b|else\b)/ && $line !~ /^.\s*\#/) { 2746 if ($line =~ /(.*)\b((?:if|while|for|switch|(?:[a-z_]+|)for_each[a-z_]+)\s*\(|do\b|else\b)/ && $line !~ /^.\s*\#/) {
2481 my $pre_ctx = "$1$2"; 2747 my $pre_ctx = "$1$2";
2482 2748
2483 my ($level, @ctx) = ctx_statement_level($linenr, $realcnt, 0); 2749 my ($level, @ctx) = ctx_statement_level($linenr, $realcnt, 0);
@@ -2504,7 +2770,7 @@ sub process {
2504 #print "realcnt<$realcnt> ctx_cnt<$ctx_cnt>\n"; 2770 #print "realcnt<$realcnt> ctx_cnt<$ctx_cnt>\n";
2505 #print "pre<$pre_ctx>\nline<$line>\nctx<$ctx>\nnext<$lines[$ctx_ln - 1]>\n"; 2771 #print "pre<$pre_ctx>\nline<$line>\nctx<$ctx>\nnext<$lines[$ctx_ln - 1]>\n";
2506 2772
2507 if ($ctx !~ /{\s*/ && defined($lines[$ctx_ln -1]) && $lines[$ctx_ln - 1] =~ /^\+\s*{/) { 2773 if ($ctx !~ /{\s*/ && defined($lines[$ctx_ln - 1]) && $lines[$ctx_ln - 1] =~ /^\+\s*{/) {
2508 ERROR("OPEN_BRACE", 2774 ERROR("OPEN_BRACE",
2509 "that open brace { should be on the previous line\n" . 2775 "that open brace { should be on the previous line\n" .
2510 "$here\n$ctx\n$rawlines[$ctx_ln - 1]\n"); 2776 "$here\n$ctx\n$rawlines[$ctx_ln - 1]\n");
@@ -2523,7 +2789,7 @@ sub process {
2523 } 2789 }
2524 2790
2525# Check relative indent for conditionals and blocks. 2791# Check relative indent for conditionals and blocks.
2526 if ($line =~ /\b(?:(?:if|while|for)\s*\(|do\b)/ && $line !~ /^.\s*#/ && $line !~ /\}\s*while\s*/) { 2792 if ($line =~ /\b(?:(?:if|while|for|(?:[a-z_]+|)for_each[a-z_]+)\s*\(|do\b)/ && $line !~ /^.\s*#/ && $line !~ /\}\s*while\s*/) {
2527 ($stat, $cond, $line_nr_next, $remain_next, $off_next) = 2793 ($stat, $cond, $line_nr_next, $remain_next, $off_next) =
2528 ctx_statement_block($linenr, $realcnt, 0) 2794 ctx_statement_block($linenr, $realcnt, 0)
2529 if (!defined $stat); 2795 if (!defined $stat);
@@ -2654,8 +2920,18 @@ sub process {
2654# check for initialisation to aggregates open brace on the next line 2920# check for initialisation to aggregates open brace on the next line
2655 if ($line =~ /^.\s*{/ && 2921 if ($line =~ /^.\s*{/ &&
2656 $prevline =~ /(?:^|[^=])=\s*$/) { 2922 $prevline =~ /(?:^|[^=])=\s*$/) {
2657 ERROR("OPEN_BRACE", 2923 if (ERROR("OPEN_BRACE",
2658 "that open brace { should be on the previous line\n" . $hereprev); 2924 "that open brace { should be on the previous line\n" . $hereprev) &&
2925 $fix && $prevline =~ /^\+/ && $line =~ /^\+/) {
2926 fix_delete_line($fixlinenr - 1, $prevrawline);
2927 fix_delete_line($fixlinenr, $rawline);
2928 my $fixedline = $prevrawline;
2929 $fixedline =~ s/\s*=\s*$/ = {/;
2930 fix_insert_line($fixlinenr, $fixedline);
2931 $fixedline = $line;
2932 $fixedline =~ s/^(.\s*){\s*/$1/;
2933 fix_insert_line($fixlinenr, $fixedline);
2934 }
2659 } 2935 }
2660 2936
2661# 2937#
@@ -2680,10 +2956,10 @@ sub process {
2680 if (ERROR("C99_COMMENTS", 2956 if (ERROR("C99_COMMENTS",
2681 "do not use C99 // comments\n" . $herecurr) && 2957 "do not use C99 // comments\n" . $herecurr) &&
2682 $fix) { 2958 $fix) {
2683 my $line = $fixed[$linenr - 1]; 2959 my $line = $fixed[$fixlinenr];
2684 if ($line =~ /\/\/(.*)$/) { 2960 if ($line =~ /\/\/(.*)$/) {
2685 my $comment = trim($1); 2961 my $comment = trim($1);
2686 $fixed[$linenr - 1] =~ s@\/\/(.*)$@/\* $comment \*/@; 2962 $fixed[$fixlinenr] =~ s@\/\/(.*)$@/\* $comment \*/@;
2687 } 2963 }
2688 } 2964 }
2689 } 2965 }
@@ -2742,7 +3018,7 @@ sub process {
2742 "do not initialise globals to 0 or NULL\n" . 3018 "do not initialise globals to 0 or NULL\n" .
2743 $herecurr) && 3019 $herecurr) &&
2744 $fix) { 3020 $fix) {
2745 $fixed[$linenr - 1] =~ s/($Type\s*$Ident\s*(?:\s+$Modifier))*\s*=\s*(0|NULL|false)\s*;/$1;/; 3021 $fixed[$fixlinenr] =~ s/($Type\s*$Ident\s*(?:\s+$Modifier))*\s*=\s*(0|NULL|false)\s*;/$1;/;
2746 } 3022 }
2747 } 3023 }
2748# check for static initialisers. 3024# check for static initialisers.
@@ -2751,10 +3027,17 @@ sub process {
2751 "do not initialise statics to 0 or NULL\n" . 3027 "do not initialise statics to 0 or NULL\n" .
2752 $herecurr) && 3028 $herecurr) &&
2753 $fix) { 3029 $fix) {
2754 $fixed[$linenr - 1] =~ s/(\bstatic\s.*?)\s*=\s*(0|NULL|false)\s*;/$1;/; 3030 $fixed[$fixlinenr] =~ s/(\bstatic\s.*?)\s*=\s*(0|NULL|false)\s*;/$1;/;
2755 } 3031 }
2756 } 3032 }
2757 3033
3034# check for misordered declarations of char/short/int/long with signed/unsigned
3035 while ($sline =~ m{(\b$TypeMisordered\b)}g) {
3036 my $tmp = trim($1);
3037 WARN("MISORDERED_TYPE",
3038 "type '$tmp' should be specified in [[un]signed] [short|int|long|long long] order\n" . $herecurr);
3039 }
3040
2758# check for static const char * arrays. 3041# check for static const char * arrays.
2759 if ($line =~ /\bstatic\s+const\s+char\s*\*\s*(\w+)\s*\[\s*\]\s*=\s*/) { 3042 if ($line =~ /\bstatic\s+const\s+char\s*\*\s*(\w+)\s*\[\s*\]\s*=\s*/) {
2760 WARN("STATIC_CONST_CHAR_ARRAY", 3043 WARN("STATIC_CONST_CHAR_ARRAY",
@@ -2781,7 +3064,7 @@ sub process {
2781 if (ERROR("FUNCTION_WITHOUT_ARGS", 3064 if (ERROR("FUNCTION_WITHOUT_ARGS",
2782 "Bad function definition - $1() should probably be $1(void)\n" . $herecurr) && 3065 "Bad function definition - $1() should probably be $1(void)\n" . $herecurr) &&
2783 $fix) { 3066 $fix) {
2784 $fixed[$linenr - 1] =~ s/(\b($Type)\s+($Ident))\s*\(\s*\)/$2 $3(void)/; 3067 $fixed[$fixlinenr] =~ s/(\b($Type)\s+($Ident))\s*\(\s*\)/$2 $3(void)/;
2785 } 3068 }
2786 } 3069 }
2787 3070
@@ -2790,7 +3073,7 @@ sub process {
2790 if (WARN("DEFINE_PCI_DEVICE_TABLE", 3073 if (WARN("DEFINE_PCI_DEVICE_TABLE",
2791 "Prefer struct pci_device_id over deprecated DEFINE_PCI_DEVICE_TABLE\n" . $herecurr) && 3074 "Prefer struct pci_device_id over deprecated DEFINE_PCI_DEVICE_TABLE\n" . $herecurr) &&
2792 $fix) { 3075 $fix) {
2793 $fixed[$linenr - 1] =~ s/\b(?:static\s+|)DEFINE_PCI_DEVICE_TABLE\s*\(\s*(\w+)\s*\)\s*=\s*/static const struct pci_device_id $1\[\] = /; 3076 $fixed[$fixlinenr] =~ s/\b(?:static\s+|)DEFINE_PCI_DEVICE_TABLE\s*\(\s*(\w+)\s*\)\s*=\s*/static const struct pci_device_id $1\[\] = /;
2794 } 3077 }
2795 } 3078 }
2796 3079
@@ -2827,7 +3110,7 @@ sub process {
2827 my $sub_from = $ident; 3110 my $sub_from = $ident;
2828 my $sub_to = $ident; 3111 my $sub_to = $ident;
2829 $sub_to =~ s/\Q$from\E/$to/; 3112 $sub_to =~ s/\Q$from\E/$to/;
2830 $fixed[$linenr - 1] =~ 3113 $fixed[$fixlinenr] =~
2831 s@\Q$sub_from\E@$sub_to@; 3114 s@\Q$sub_from\E@$sub_to@;
2832 } 3115 }
2833 } 3116 }
@@ -2855,7 +3138,7 @@ sub process {
2855 my $sub_from = $match; 3138 my $sub_from = $match;
2856 my $sub_to = $match; 3139 my $sub_to = $match;
2857 $sub_to =~ s/\Q$from\E/$to/; 3140 $sub_to =~ s/\Q$from\E/$to/;
2858 $fixed[$linenr - 1] =~ 3141 $fixed[$fixlinenr] =~
2859 s@\Q$sub_from\E@$sub_to@; 3142 s@\Q$sub_from\E@$sub_to@;
2860 } 3143 }
2861 } 3144 }
@@ -2917,7 +3200,7 @@ sub process {
2917 if (WARN("PREFER_PR_LEVEL", 3200 if (WARN("PREFER_PR_LEVEL",
2918 "Prefer pr_warn(... to pr_warning(...\n" . $herecurr) && 3201 "Prefer pr_warn(... to pr_warning(...\n" . $herecurr) &&
2919 $fix) { 3202 $fix) {
2920 $fixed[$linenr - 1] =~ 3203 $fixed[$fixlinenr] =~
2921 s/\bpr_warning\b/pr_warn/; 3204 s/\bpr_warning\b/pr_warn/;
2922 } 3205 }
2923 } 3206 }
@@ -2933,17 +3216,40 @@ sub process {
2933 3216
2934# function brace can't be on same line, except for #defines of do while, 3217# function brace can't be on same line, except for #defines of do while,
2935# or if closed on same line 3218# or if closed on same line
2936 if (($line=~/$Type\s*$Ident\(.*\).*\s{/) and 3219 if (($line=~/$Type\s*$Ident\(.*\).*\s*{/) and
2937 !($line=~/\#\s*define.*do\s{/) and !($line=~/}/)) { 3220 !($line=~/\#\s*define.*do\s{/) and !($line=~/}/)) {
2938 ERROR("OPEN_BRACE", 3221 if (ERROR("OPEN_BRACE",
2939 "open brace '{' following function declarations go on the next line\n" . $herecurr); 3222 "open brace '{' following function declarations go on the next line\n" . $herecurr) &&
3223 $fix) {
3224 fix_delete_line($fixlinenr, $rawline);
3225 my $fixed_line = $rawline;
3226 $fixed_line =~ /(^..*$Type\s*$Ident\(.*\)\s*){(.*)$/;
3227 my $line1 = $1;
3228 my $line2 = $2;
3229 fix_insert_line($fixlinenr, ltrim($line1));
3230 fix_insert_line($fixlinenr, "\+{");
3231 if ($line2 !~ /^\s*$/) {
3232 fix_insert_line($fixlinenr, "\+\t" . trim($line2));
3233 }
3234 }
2940 } 3235 }
2941 3236
2942# open braces for enum, union and struct go on the same line. 3237# open braces for enum, union and struct go on the same line.
2943 if ($line =~ /^.\s*{/ && 3238 if ($line =~ /^.\s*{/ &&
2944 $prevline =~ /^.\s*(?:typedef\s+)?(enum|union|struct)(?:\s+$Ident)?\s*$/) { 3239 $prevline =~ /^.\s*(?:typedef\s+)?(enum|union|struct)(?:\s+$Ident)?\s*$/) {
2945 ERROR("OPEN_BRACE", 3240 if (ERROR("OPEN_BRACE",
2946 "open brace '{' following $1 go on the same line\n" . $hereprev); 3241 "open brace '{' following $1 go on the same line\n" . $hereprev) &&
3242 $fix && $prevline =~ /^\+/ && $line =~ /^\+/) {
3243 fix_delete_line($fixlinenr - 1, $prevrawline);
3244 fix_delete_line($fixlinenr, $rawline);
3245 my $fixedline = rtrim($prevrawline) . " {";
3246 fix_insert_line($fixlinenr, $fixedline);
3247 $fixedline = $rawline;
3248 $fixedline =~ s/^(.\s*){\s*/$1\t/;
3249 if ($fixedline !~ /^\+\s*$/) {
3250 fix_insert_line($fixlinenr, $fixedline);
3251 }
3252 }
2947 } 3253 }
2948 3254
2949# missing space after union, struct or enum definition 3255# missing space after union, struct or enum definition
@@ -2951,7 +3257,7 @@ sub process {
2951 if (WARN("SPACING", 3257 if (WARN("SPACING",
2952 "missing space after $1 definition\n" . $herecurr) && 3258 "missing space after $1 definition\n" . $herecurr) &&
2953 $fix) { 3259 $fix) {
2954 $fixed[$linenr - 1] =~ 3260 $fixed[$fixlinenr] =~
2955 s/^(.\s*(?:typedef\s+)?(?:enum|union|struct)(?:\s+$Ident){1,2})([=\{])/$1 $2/; 3261 s/^(.\s*(?:typedef\s+)?(?:enum|union|struct)(?:\s+$Ident){1,2})([=\{])/$1 $2/;
2956 } 3262 }
2957 } 3263 }
@@ -3021,7 +3327,7 @@ sub process {
3021 } 3327 }
3022 3328
3023 if (show_type("SPACING") && $fix) { 3329 if (show_type("SPACING") && $fix) {
3024 $fixed[$linenr - 1] =~ 3330 $fixed[$fixlinenr] =~
3025 s/^(.\s*)$Declare\s*\(\s*\*\s*$Ident\s*\)\s*\(/$1 . $declare . $post_declare_space . '(*' . $funcname . ')('/ex; 3331 s/^(.\s*)$Declare\s*\(\s*\*\s*$Ident\s*\)\s*\(/$1 . $declare . $post_declare_space . '(*' . $funcname . ')('/ex;
3026 } 3332 }
3027 } 3333 }
@@ -3038,7 +3344,7 @@ sub process {
3038 if (ERROR("BRACKET_SPACE", 3344 if (ERROR("BRACKET_SPACE",
3039 "space prohibited before open square bracket '['\n" . $herecurr) && 3345 "space prohibited before open square bracket '['\n" . $herecurr) &&
3040 $fix) { 3346 $fix) {
3041 $fixed[$linenr - 1] =~ 3347 $fixed[$fixlinenr] =~
3042 s/^(\+.*?)\s+\[/$1\[/; 3348 s/^(\+.*?)\s+\[/$1\[/;
3043 } 3349 }
3044 } 3350 }
@@ -3073,7 +3379,7 @@ sub process {
3073 if (WARN("SPACING", 3379 if (WARN("SPACING",
3074 "space prohibited between function name and open parenthesis '('\n" . $herecurr) && 3380 "space prohibited between function name and open parenthesis '('\n" . $herecurr) &&
3075 $fix) { 3381 $fix) {
3076 $fixed[$linenr - 1] =~ 3382 $fixed[$fixlinenr] =~
3077 s/\b$name\s+\(/$name\(/; 3383 s/\b$name\s+\(/$name\(/;
3078 } 3384 }
3079 } 3385 }
@@ -3341,8 +3647,8 @@ sub process {
3341 $fixed_line = $fixed_line . $fix_elements[$#elements]; 3647 $fixed_line = $fixed_line . $fix_elements[$#elements];
3342 } 3648 }
3343 3649
3344 if ($fix && $line_fixed && $fixed_line ne $fixed[$linenr - 1]) { 3650 if ($fix && $line_fixed && $fixed_line ne $fixed[$fixlinenr]) {
3345 $fixed[$linenr - 1] = $fixed_line; 3651 $fixed[$fixlinenr] = $fixed_line;
3346 } 3652 }
3347 3653
3348 3654
@@ -3353,7 +3659,7 @@ sub process {
3353 if (WARN("SPACING", 3659 if (WARN("SPACING",
3354 "space prohibited before semicolon\n" . $herecurr) && 3660 "space prohibited before semicolon\n" . $herecurr) &&
3355 $fix) { 3661 $fix) {
3356 1 while $fixed[$linenr - 1] =~ 3662 1 while $fixed[$fixlinenr] =~
3357 s/^(\+.*\S)\s+;/$1;/; 3663 s/^(\+.*\S)\s+;/$1;/;
3358 } 3664 }
3359 } 3665 }
@@ -3386,7 +3692,7 @@ sub process {
3386 if (ERROR("SPACING", 3692 if (ERROR("SPACING",
3387 "space required before the open brace '{'\n" . $herecurr) && 3693 "space required before the open brace '{'\n" . $herecurr) &&
3388 $fix) { 3694 $fix) {
3389 $fixed[$linenr - 1] =~ s/^(\+.*(?:do|\))){/$1 {/; 3695 $fixed[$fixlinenr] =~ s/^(\+.*(?:do|\))){/$1 {/;
3390 } 3696 }
3391 } 3697 }
3392 3698
@@ -3404,7 +3710,7 @@ sub process {
3404 if (ERROR("SPACING", 3710 if (ERROR("SPACING",
3405 "space required after that close brace '}'\n" . $herecurr) && 3711 "space required after that close brace '}'\n" . $herecurr) &&
3406 $fix) { 3712 $fix) {
3407 $fixed[$linenr - 1] =~ 3713 $fixed[$fixlinenr] =~
3408 s/}((?!(?:,|;|\)))\S)/} $1/; 3714 s/}((?!(?:,|;|\)))\S)/} $1/;
3409 } 3715 }
3410 } 3716 }
@@ -3414,7 +3720,7 @@ sub process {
3414 if (ERROR("SPACING", 3720 if (ERROR("SPACING",
3415 "space prohibited after that open square bracket '['\n" . $herecurr) && 3721 "space prohibited after that open square bracket '['\n" . $herecurr) &&
3416 $fix) { 3722 $fix) {
3417 $fixed[$linenr - 1] =~ 3723 $fixed[$fixlinenr] =~
3418 s/\[\s+/\[/; 3724 s/\[\s+/\[/;
3419 } 3725 }
3420 } 3726 }
@@ -3422,7 +3728,7 @@ sub process {
3422 if (ERROR("SPACING", 3728 if (ERROR("SPACING",
3423 "space prohibited before that close square bracket ']'\n" . $herecurr) && 3729 "space prohibited before that close square bracket ']'\n" . $herecurr) &&
3424 $fix) { 3730 $fix) {
3425 $fixed[$linenr - 1] =~ 3731 $fixed[$fixlinenr] =~
3426 s/\s+\]/\]/; 3732 s/\s+\]/\]/;
3427 } 3733 }
3428 } 3734 }
@@ -3433,7 +3739,7 @@ sub process {
3433 if (ERROR("SPACING", 3739 if (ERROR("SPACING",
3434 "space prohibited after that open parenthesis '('\n" . $herecurr) && 3740 "space prohibited after that open parenthesis '('\n" . $herecurr) &&
3435 $fix) { 3741 $fix) {
3436 $fixed[$linenr - 1] =~ 3742 $fixed[$fixlinenr] =~
3437 s/\(\s+/\(/; 3743 s/\(\s+/\(/;
3438 } 3744 }
3439 } 3745 }
@@ -3443,18 +3749,27 @@ sub process {
3443 if (ERROR("SPACING", 3749 if (ERROR("SPACING",
3444 "space prohibited before that close parenthesis ')'\n" . $herecurr) && 3750 "space prohibited before that close parenthesis ')'\n" . $herecurr) &&
3445 $fix) { 3751 $fix) {
3446 $fixed[$linenr - 1] =~ 3752 print("fixlinenr: <$fixlinenr> fixed[fixlinenr]: <$fixed[$fixlinenr]>\n");
3753 $fixed[$fixlinenr] =~
3447 s/\s+\)/\)/; 3754 s/\s+\)/\)/;
3448 } 3755 }
3449 } 3756 }
3450 3757
3758# check unnecessary parentheses around addressof/dereference single $Lvals
3759# ie: &(foo->bar) should be &foo->bar and *(foo->bar) should be *foo->bar
3760
3761 while ($line =~ /(?:[^&]&\s*|\*)\(\s*($Ident\s*(?:$Member\s*)+)\s*\)/g) {
3762 CHK("UNNECESSARY_PARENTHESES",
3763 "Unnecessary parentheses around $1\n" . $herecurr);
3764 }
3765
3451#goto labels aren't indented, allow a single space however 3766#goto labels aren't indented, allow a single space however
3452 if ($line=~/^.\s+[A-Za-z\d_]+:(?![0-9]+)/ and 3767 if ($line=~/^.\s+[A-Za-z\d_]+:(?![0-9]+)/ and
3453 !($line=~/^. [A-Za-z\d_]+:/) and !($line=~/^.\s+default:/)) { 3768 !($line=~/^. [A-Za-z\d_]+:/) and !($line=~/^.\s+default:/)) {
3454 if (WARN("INDENTED_LABEL", 3769 if (WARN("INDENTED_LABEL",
3455 "labels should not be indented\n" . $herecurr) && 3770 "labels should not be indented\n" . $herecurr) &&
3456 $fix) { 3771 $fix) {
3457 $fixed[$linenr - 1] =~ 3772 $fixed[$fixlinenr] =~
3458 s/^(.)\s+/$1/; 3773 s/^(.)\s+/$1/;
3459 } 3774 }
3460 } 3775 }
@@ -3516,7 +3831,7 @@ sub process {
3516 if (ERROR("SPACING", 3831 if (ERROR("SPACING",
3517 "space required before the open parenthesis '('\n" . $herecurr) && 3832 "space required before the open parenthesis '('\n" . $herecurr) &&
3518 $fix) { 3833 $fix) {
3519 $fixed[$linenr - 1] =~ 3834 $fixed[$fixlinenr] =~
3520 s/\b(if|while|for|switch)\(/$1 \(/; 3835 s/\b(if|while|for|switch)\(/$1 \(/;
3521 } 3836 }
3522 } 3837 }
@@ -3606,7 +3921,7 @@ sub process {
3606# if should not continue a brace 3921# if should not continue a brace
3607 if ($line =~ /}\s*if\b/) { 3922 if ($line =~ /}\s*if\b/) {
3608 ERROR("TRAILING_STATEMENTS", 3923 ERROR("TRAILING_STATEMENTS",
3609 "trailing statements should be on next line\n" . 3924 "trailing statements should be on next line (or did you mean 'else if'?)\n" .
3610 $herecurr); 3925 $herecurr);
3611 } 3926 }
3612# case and default should not have general statements after them 3927# case and default should not have general statements after them
@@ -3622,14 +3937,26 @@ sub process {
3622 3937
3623 # Check for }<nl>else {, these must be at the same 3938 # Check for }<nl>else {, these must be at the same
3624 # indent level to be relevant to each other. 3939 # indent level to be relevant to each other.
3625 if ($prevline=~/}\s*$/ and $line=~/^.\s*else\s*/ and 3940 if ($prevline=~/}\s*$/ and $line=~/^.\s*else\s*/ &&
3626 $previndent == $indent) { 3941 $previndent == $indent) {
3627 ERROR("ELSE_AFTER_BRACE", 3942 if (ERROR("ELSE_AFTER_BRACE",
3628 "else should follow close brace '}'\n" . $hereprev); 3943 "else should follow close brace '}'\n" . $hereprev) &&
3944 $fix && $prevline =~ /^\+/ && $line =~ /^\+/) {
3945 fix_delete_line($fixlinenr - 1, $prevrawline);
3946 fix_delete_line($fixlinenr, $rawline);
3947 my $fixedline = $prevrawline;
3948 $fixedline =~ s/}\s*$//;
3949 if ($fixedline !~ /^\+\s*$/) {
3950 fix_insert_line($fixlinenr, $fixedline);
3951 }
3952 $fixedline = $rawline;
3953 $fixedline =~ s/^(.\s*)else/$1} else/;
3954 fix_insert_line($fixlinenr, $fixedline);
3955 }
3629 } 3956 }
3630 3957
3631 if ($prevline=~/}\s*$/ and $line=~/^.\s*while\s*/ and 3958 if ($prevline=~/}\s*$/ and $line=~/^.\s*while\s*/ &&
3632 $previndent == $indent) { 3959 $previndent == $indent) {
3633 my ($s, $c) = ctx_statement_block($linenr, $realcnt, 0); 3960 my ($s, $c) = ctx_statement_block($linenr, $realcnt, 0);
3634 3961
3635 # Find out what is on the end of the line after the 3962 # Find out what is on the end of the line after the
@@ -3638,8 +3965,18 @@ sub process {
3638 $s =~ s/\n.*//g; 3965 $s =~ s/\n.*//g;
3639 3966
3640 if ($s =~ /^\s*;/) { 3967 if ($s =~ /^\s*;/) {
3641 ERROR("WHILE_AFTER_BRACE", 3968 if (ERROR("WHILE_AFTER_BRACE",
3642 "while should follow close brace '}'\n" . $hereprev); 3969 "while should follow close brace '}'\n" . $hereprev) &&
3970 $fix && $prevline =~ /^\+/ && $line =~ /^\+/) {
3971 fix_delete_line($fixlinenr - 1, $prevrawline);
3972 fix_delete_line($fixlinenr, $rawline);
3973 my $fixedline = $prevrawline;
3974 my $trailing = $rawline;
3975 $trailing =~ s/^\+//;
3976 $trailing = trim($trailing);
3977 $fixedline =~ s/}\s*$/} $trailing/;
3978 fix_insert_line($fixlinenr, $fixedline);
3979 }
3643 } 3980 }
3644 } 3981 }
3645 3982
@@ -3653,7 +3990,7 @@ sub process {
3653 "Avoid gcc v4.3+ binary constant extension: <$var>\n" . $herecurr) && 3990 "Avoid gcc v4.3+ binary constant extension: <$var>\n" . $herecurr) &&
3654 $fix) { 3991 $fix) {
3655 my $hexval = sprintf("0x%x", oct($var)); 3992 my $hexval = sprintf("0x%x", oct($var));
3656 $fixed[$linenr - 1] =~ 3993 $fixed[$fixlinenr] =~
3657 s/\b$var\b/$hexval/; 3994 s/\b$var\b/$hexval/;
3658 } 3995 }
3659 } 3996 }
@@ -3689,7 +4026,7 @@ sub process {
3689 if (WARN("WHITESPACE_AFTER_LINE_CONTINUATION", 4026 if (WARN("WHITESPACE_AFTER_LINE_CONTINUATION",
3690 "Whitespace after \\ makes next lines useless\n" . $herecurr) && 4027 "Whitespace after \\ makes next lines useless\n" . $herecurr) &&
3691 $fix) { 4028 $fix) {
3692 $fixed[$linenr - 1] =~ s/\s+$//; 4029 $fixed[$fixlinenr] =~ s/\s+$//;
3693 } 4030 }
3694 } 4031 }
3695 4032
@@ -3762,7 +4099,7 @@ sub process {
3762 $dstat !~ /^(?:$Ident|-?$Constant),$/ && # 10, // foo(), 4099 $dstat !~ /^(?:$Ident|-?$Constant),$/ && # 10, // foo(),
3763 $dstat !~ /^(?:$Ident|-?$Constant);$/ && # foo(); 4100 $dstat !~ /^(?:$Ident|-?$Constant);$/ && # foo();
3764 $dstat !~ /^[!~-]?(?:$Lval|$Constant)$/ && # 10 // foo() // !foo // ~foo // -foo // foo->bar // foo.bar->baz 4101 $dstat !~ /^[!~-]?(?:$Lval|$Constant)$/ && # 10 // foo() // !foo // ~foo // -foo // foo->bar // foo.bar->baz
3765 $dstat !~ /^'X'$/ && # character constants 4102 $dstat !~ /^'X'$/ && $dstat !~ /^'XX'$/ && # character constants
3766 $dstat !~ /$exceptions/ && 4103 $dstat !~ /$exceptions/ &&
3767 $dstat !~ /^\.$Ident\s*=/ && # .foo = 4104 $dstat !~ /^\.$Ident\s*=/ && # .foo =
3768 $dstat !~ /^(?:\#\s*$Ident|\#\s*$Constant)\s*$/ && # stringification #foo 4105 $dstat !~ /^(?:\#\s*$Ident|\#\s*$Constant)\s*$/ && # stringification #foo
@@ -4014,6 +4351,23 @@ sub process {
4014 } 4351 }
4015 } 4352 }
4016 4353
4354# check for unnecessary "Out of Memory" messages
4355 if ($line =~ /^\+.*\b$logFunctions\s*\(/ &&
4356 $prevline =~ /^[ \+]\s*if\s*\(\s*(\!\s*|NULL\s*==\s*)?($Lval)(\s*==\s*NULL\s*)?\s*\)/ &&
4357 (defined $1 || defined $3) &&
4358 $linenr > 3) {
4359 my $testval = $2;
4360 my $testline = $lines[$linenr - 3];
4361
4362 my ($s, $c) = ctx_statement_block($linenr - 3, $realcnt, 0);
4363# print("line: <$line>\nprevline: <$prevline>\ns: <$s>\nc: <$c>\n\n\n");
4364
4365 if ($c =~ /(?:^|\n)[ \+]\s*(?:$Type\s*)?\Q$testval\E\s*=\s*(?:\([^\)]*\)\s*)?\s*(?:devm_)?(?:[kv][czm]alloc(?:_node|_array)?\b|kstrdup|(?:dev_)?alloc_skb)/) {
4366 WARN("OOM_MESSAGE",
4367 "Possible unnecessary 'out of memory' message\n" . $hereprev);
4368 }
4369 }
4370
4017# check for bad placement of section $InitAttribute (e.g.: __initdata) 4371# check for bad placement of section $InitAttribute (e.g.: __initdata)
4018 if ($line =~ /(\b$InitAttribute\b)/) { 4372 if ($line =~ /(\b$InitAttribute\b)/) {
4019 my $attr = $1; 4373 my $attr = $1;
@@ -4027,7 +4381,7 @@ sub process {
4027 WARN("MISPLACED_INIT", 4381 WARN("MISPLACED_INIT",
4028 "$attr should be placed after $var\n" . $herecurr))) && 4382 "$attr should be placed after $var\n" . $herecurr))) &&
4029 $fix) { 4383 $fix) {
4030 $fixed[$linenr - 1] =~ s/(\bstatic\s+(?:const\s+)?)(?:$attr\s+)?($NonptrTypeWithAttr)\s+(?:$attr\s+)?($Ident(?:\[[^]]*\])?)\s*([=;])\s*/"$1" . trim(string_find_replace($2, "\\s*$attr\\s*", " ")) . " " . trim(string_find_replace($3, "\\s*$attr\\s*", "")) . " $attr" . ("$4" eq ";" ? ";" : " = ")/e; 4384 $fixed[$fixlinenr] =~ s/(\bstatic\s+(?:const\s+)?)(?:$attr\s+)?($NonptrTypeWithAttr)\s+(?:$attr\s+)?($Ident(?:\[[^]]*\])?)\s*([=;])\s*/"$1" . trim(string_find_replace($2, "\\s*$attr\\s*", " ")) . " " . trim(string_find_replace($3, "\\s*$attr\\s*", "")) . " $attr" . ("$4" eq ";" ? ";" : " = ")/e;
4031 } 4385 }
4032 } 4386 }
4033 } 4387 }
@@ -4041,7 +4395,7 @@ sub process {
4041 if (ERROR("INIT_ATTRIBUTE", 4395 if (ERROR("INIT_ATTRIBUTE",
4042 "Use of const init definition must use ${attr_prefix}initconst\n" . $herecurr) && 4396 "Use of const init definition must use ${attr_prefix}initconst\n" . $herecurr) &&
4043 $fix) { 4397 $fix) {
4044 $fixed[$linenr - 1] =~ 4398 $fixed[$fixlinenr] =~
4045 s/$InitAttributeData/${attr_prefix}initconst/; 4399 s/$InitAttributeData/${attr_prefix}initconst/;
4046 } 4400 }
4047 } 4401 }
@@ -4052,12 +4406,12 @@ sub process {
4052 if (ERROR("INIT_ATTRIBUTE", 4406 if (ERROR("INIT_ATTRIBUTE",
4053 "Use of $attr requires a separate use of const\n" . $herecurr) && 4407 "Use of $attr requires a separate use of const\n" . $herecurr) &&
4054 $fix) { 4408 $fix) {
4055 my $lead = $fixed[$linenr - 1] =~ 4409 my $lead = $fixed[$fixlinenr] =~
4056 /(^\+\s*(?:static\s+))/; 4410 /(^\+\s*(?:static\s+))/;
4057 $lead = rtrim($1); 4411 $lead = rtrim($1);
4058 $lead = "$lead " if ($lead !~ /^\+$/); 4412 $lead = "$lead " if ($lead !~ /^\+$/);
4059 $lead = "${lead}const "; 4413 $lead = "${lead}const ";
4060 $fixed[$linenr - 1] =~ s/(^\+\s*(?:static\s+))/$lead/; 4414 $fixed[$fixlinenr] =~ s/(^\+\s*(?:static\s+))/$lead/;
4061 } 4415 }
4062 } 4416 }
4063 4417
@@ -4070,7 +4424,7 @@ sub process {
4070 if (WARN("CONSTANT_CONVERSION", 4424 if (WARN("CONSTANT_CONVERSION",
4071 "$constant_func should be $func\n" . $herecurr) && 4425 "$constant_func should be $func\n" . $herecurr) &&
4072 $fix) { 4426 $fix) {
4073 $fixed[$linenr - 1] =~ s/\b$constant_func\b/$func/g; 4427 $fixed[$fixlinenr] =~ s/\b$constant_func\b/$func/g;
4074 } 4428 }
4075 } 4429 }
4076 4430
@@ -4120,7 +4474,7 @@ sub process {
4120 if (ERROR("SPACING", 4474 if (ERROR("SPACING",
4121 "exactly one space required after that #$1\n" . $herecurr) && 4475 "exactly one space required after that #$1\n" . $herecurr) &&
4122 $fix) { 4476 $fix) {
4123 $fixed[$linenr - 1] =~ 4477 $fixed[$fixlinenr] =~
4124 s/^(.\s*\#\s*(ifdef|ifndef|elif))\s{2,}/$1 /; 4478 s/^(.\s*\#\s*(ifdef|ifndef|elif))\s{2,}/$1 /;
4125 } 4479 }
4126 4480
@@ -4168,7 +4522,7 @@ sub process {
4168 if (WARN("INLINE", 4522 if (WARN("INLINE",
4169 "plain inline is preferred over $1\n" . $herecurr) && 4523 "plain inline is preferred over $1\n" . $herecurr) &&
4170 $fix) { 4524 $fix) {
4171 $fixed[$linenr - 1] =~ s/\b(__inline__|__inline)\b/inline/; 4525 $fixed[$fixlinenr] =~ s/\b(__inline__|__inline)\b/inline/;
4172 4526
4173 } 4527 }
4174 } 4528 }
@@ -4193,7 +4547,7 @@ sub process {
4193 if (WARN("PREFER_PRINTF", 4547 if (WARN("PREFER_PRINTF",
4194 "__printf(string-index, first-to-check) is preferred over __attribute__((format(printf, string-index, first-to-check)))\n" . $herecurr) && 4548 "__printf(string-index, first-to-check) is preferred over __attribute__((format(printf, string-index, first-to-check)))\n" . $herecurr) &&
4195 $fix) { 4549 $fix) {
4196 $fixed[$linenr - 1] =~ s/\b__attribute__\s*\(\s*\(\s*format\s*\(\s*printf\s*,\s*(.*)\)\s*\)\s*\)/"__printf(" . trim($1) . ")"/ex; 4550 $fixed[$fixlinenr] =~ s/\b__attribute__\s*\(\s*\(\s*format\s*\(\s*printf\s*,\s*(.*)\)\s*\)\s*\)/"__printf(" . trim($1) . ")"/ex;
4197 4551
4198 } 4552 }
4199 } 4553 }
@@ -4204,7 +4558,7 @@ sub process {
4204 if (WARN("PREFER_SCANF", 4558 if (WARN("PREFER_SCANF",
4205 "__scanf(string-index, first-to-check) is preferred over __attribute__((format(scanf, string-index, first-to-check)))\n" . $herecurr) && 4559 "__scanf(string-index, first-to-check) is preferred over __attribute__((format(scanf, string-index, first-to-check)))\n" . $herecurr) &&
4206 $fix) { 4560 $fix) {
4207 $fixed[$linenr - 1] =~ s/\b__attribute__\s*\(\s*\(\s*format\s*\(\s*scanf\s*,\s*(.*)\)\s*\)\s*\)/"__scanf(" . trim($1) . ")"/ex; 4561 $fixed[$fixlinenr] =~ s/\b__attribute__\s*\(\s*\(\s*format\s*\(\s*scanf\s*,\s*(.*)\)\s*\)\s*\)/"__scanf(" . trim($1) . ")"/ex;
4208 } 4562 }
4209 } 4563 }
4210 4564
@@ -4219,7 +4573,7 @@ sub process {
4219 if (WARN("SIZEOF_PARENTHESIS", 4573 if (WARN("SIZEOF_PARENTHESIS",
4220 "sizeof $1 should be sizeof($1)\n" . $herecurr) && 4574 "sizeof $1 should be sizeof($1)\n" . $herecurr) &&
4221 $fix) { 4575 $fix) {
4222 $fixed[$linenr - 1] =~ s/\bsizeof\s+((?:\*\s*|)$Lval|$Type(?:\s+$Lval|))/"sizeof(" . trim($1) . ")"/ex; 4576 $fixed[$fixlinenr] =~ s/\bsizeof\s+((?:\*\s*|)$Lval|$Type(?:\s+$Lval|))/"sizeof(" . trim($1) . ")"/ex;
4223 } 4577 }
4224 } 4578 }
4225 4579
@@ -4242,7 +4596,7 @@ sub process {
4242 if (WARN("PREFER_SEQ_PUTS", 4596 if (WARN("PREFER_SEQ_PUTS",
4243 "Prefer seq_puts to seq_printf\n" . $herecurr) && 4597 "Prefer seq_puts to seq_printf\n" . $herecurr) &&
4244 $fix) { 4598 $fix) {
4245 $fixed[$linenr - 1] =~ s/\bseq_printf\b/seq_puts/; 4599 $fixed[$fixlinenr] =~ s/\bseq_printf\b/seq_puts/;
4246 } 4600 }
4247 } 4601 }
4248 } 4602 }
@@ -4271,7 +4625,7 @@ sub process {
4271 if (WARN("PREFER_ETHER_ADDR_COPY", 4625 if (WARN("PREFER_ETHER_ADDR_COPY",
4272 "Prefer ether_addr_copy() over memcpy() if the Ethernet addresses are __aligned(2)\n" . $herecurr) && 4626 "Prefer ether_addr_copy() over memcpy() if the Ethernet addresses are __aligned(2)\n" . $herecurr) &&
4273 $fix) { 4627 $fix) {
4274 $fixed[$linenr - 1] =~ s/\bmemcpy\s*\(\s*$FuncArg\s*,\s*$FuncArg\s*\,\s*ETH_ALEN\s*\)/ether_addr_copy($2, $7)/; 4628 $fixed[$fixlinenr] =~ s/\bmemcpy\s*\(\s*$FuncArg\s*,\s*$FuncArg\s*\,\s*ETH_ALEN\s*\)/ether_addr_copy($2, $7)/;
4275 } 4629 }
4276 } 4630 }
4277 4631
@@ -4359,7 +4713,7 @@ sub process {
4359 if (CHK("AVOID_EXTERNS", 4713 if (CHK("AVOID_EXTERNS",
4360 "extern prototypes should be avoided in .h files\n" . $herecurr) && 4714 "extern prototypes should be avoided in .h files\n" . $herecurr) &&
4361 $fix) { 4715 $fix) {
4362 $fixed[$linenr - 1] =~ s/(.*)\bextern\b\s*(.*)/$1$2/; 4716 $fixed[$fixlinenr] =~ s/(.*)\bextern\b\s*(.*)/$1$2/;
4363 } 4717 }
4364 } 4718 }
4365 4719
@@ -4419,23 +4773,24 @@ sub process {
4419 4773
4420# check for k[mz]alloc with multiplies that could be kmalloc_array/kcalloc 4774# check for k[mz]alloc with multiplies that could be kmalloc_array/kcalloc
4421 if ($^V && $^V ge 5.10.0 && 4775 if ($^V && $^V ge 5.10.0 &&
4422 $line =~ /\b($Lval)\s*\=\s*(?:$balanced_parens)?\s*(k[mz]alloc)\s*\(\s*($FuncArg)\s*\*\s*($FuncArg)/) { 4776 $line =~ /\b($Lval)\s*\=\s*(?:$balanced_parens)?\s*(k[mz]alloc)\s*\(\s*($FuncArg)\s*\*\s*($FuncArg)\s*,/) {
4423 my $oldfunc = $3; 4777 my $oldfunc = $3;
4424 my $a1 = $4; 4778 my $a1 = $4;
4425 my $a2 = $10; 4779 my $a2 = $10;
4426 my $newfunc = "kmalloc_array"; 4780 my $newfunc = "kmalloc_array";
4427 $newfunc = "kcalloc" if ($oldfunc eq "kzalloc"); 4781 $newfunc = "kcalloc" if ($oldfunc eq "kzalloc");
4428 if ($a1 =~ /^sizeof\s*\S/ || $a2 =~ /^sizeof\s*\S/) { 4782 my $r1 = $a1;
4783 my $r2 = $a2;
4784 if ($a1 =~ /^sizeof\s*\S/) {
4785 $r1 = $a2;
4786 $r2 = $a1;
4787 }
4788 if ($r1 !~ /^sizeof\b/ && $r2 =~ /^sizeof\s*\S/ &&
4789 !($r1 =~ /^$Constant$/ || $r1 =~ /^[A-Z_][A-Z0-9_]*$/)) {
4429 if (WARN("ALLOC_WITH_MULTIPLY", 4790 if (WARN("ALLOC_WITH_MULTIPLY",
4430 "Prefer $newfunc over $oldfunc with multiply\n" . $herecurr) && 4791 "Prefer $newfunc over $oldfunc with multiply\n" . $herecurr) &&
4431 $fix) { 4792 $fix) {
4432 my $r1 = $a1; 4793 $fixed[$fixlinenr] =~ s/\b($Lval)\s*\=\s*(?:$balanced_parens)?\s*(k[mz]alloc)\s*\(\s*($FuncArg)\s*\*\s*($FuncArg)/$1 . ' = ' . "$newfunc(" . trim($r1) . ', ' . trim($r2)/e;
4433 my $r2 = $a2;
4434 if ($a1 =~ /^sizeof\s*\S/) {
4435 $r1 = $a2;
4436 $r2 = $a1;
4437 }
4438 $fixed[$linenr - 1] =~ s/\b($Lval)\s*\=\s*(?:$balanced_parens)?\s*(k[mz]alloc)\s*\(\s*($FuncArg)\s*\*\s*($FuncArg)/$1 . ' = ' . "$newfunc(" . trim($r1) . ', ' . trim($r2)/e;
4439 4794
4440 } 4795 }
4441 } 4796 }
@@ -4459,17 +4814,17 @@ sub process {
4459 if (WARN("ONE_SEMICOLON", 4814 if (WARN("ONE_SEMICOLON",
4460 "Statements terminations use 1 semicolon\n" . $herecurr) && 4815 "Statements terminations use 1 semicolon\n" . $herecurr) &&
4461 $fix) { 4816 $fix) {
4462 $fixed[$linenr - 1] =~ s/(\s*;\s*){2,}$/;/g; 4817 $fixed[$fixlinenr] =~ s/(\s*;\s*){2,}$/;/g;
4463 } 4818 }
4464 } 4819 }
4465 4820
4466# check for case / default statements not preceeded by break/fallthrough/switch 4821# check for case / default statements not preceded by break/fallthrough/switch
4467 if ($line =~ /^.\s*(?:case\s+(?:$Ident|$Constant)\s*|default):/) { 4822 if ($line =~ /^.\s*(?:case\s+(?:$Ident|$Constant)\s*|default):/) {
4468 my $has_break = 0; 4823 my $has_break = 0;
4469 my $has_statement = 0; 4824 my $has_statement = 0;
4470 my $count = 0; 4825 my $count = 0;
4471 my $prevline = $linenr; 4826 my $prevline = $linenr;
4472 while ($prevline > 1 && $count < 3 && !$has_break) { 4827 while ($prevline > 1 && ($file || $count < 3) && !$has_break) {
4473 $prevline--; 4828 $prevline--;
4474 my $rline = $rawlines[$prevline - 1]; 4829 my $rline = $rawlines[$prevline - 1];
4475 my $fline = $lines[$prevline - 1]; 4830 my $fline = $lines[$prevline - 1];
@@ -4507,7 +4862,7 @@ sub process {
4507 if (WARN("USE_FUNC", 4862 if (WARN("USE_FUNC",
4508 "__func__ should be used instead of gcc specific __FUNCTION__\n" . $herecurr) && 4863 "__func__ should be used instead of gcc specific __FUNCTION__\n" . $herecurr) &&
4509 $fix) { 4864 $fix) {
4510 $fixed[$linenr - 1] =~ s/\b__FUNCTION__\b/__func__/g; 4865 $fixed[$fixlinenr] =~ s/\b__FUNCTION__\b/__func__/g;
4511 } 4866 }
4512 } 4867 }
4513 4868
@@ -4750,12 +5105,16 @@ sub process {
4750 hash_show_words(\%use_type, "Used"); 5105 hash_show_words(\%use_type, "Used");
4751 hash_show_words(\%ignore_type, "Ignored"); 5106 hash_show_words(\%ignore_type, "Ignored");
4752 5107
4753 if ($clean == 0 && $fix && "@rawlines" ne "@fixed") { 5108 if ($clean == 0 && $fix &&
5109 ("@rawlines" ne "@fixed" ||
5110 $#fixed_inserted >= 0 || $#fixed_deleted >= 0)) {
4754 my $newfile = $filename; 5111 my $newfile = $filename;
4755 $newfile .= ".EXPERIMENTAL-checkpatch-fixes" if (!$fix_inplace); 5112 $newfile .= ".EXPERIMENTAL-checkpatch-fixes" if (!$fix_inplace);
4756 my $linecount = 0; 5113 my $linecount = 0;
4757 my $f; 5114 my $f;
4758 5115
5116 @fixed = fix_inserted_deleted_lines(\@fixed, \@fixed_inserted, \@fixed_deleted);
5117
4759 open($f, '>', $newfile) 5118 open($f, '>', $newfile)
4760 or die "$P: Can't open $newfile for write\n"; 5119 or die "$P: Can't open $newfile for write\n";
4761 foreach my $fixed_line (@fixed) { 5120 foreach my $fixed_line (@fixed) {
@@ -4763,7 +5122,7 @@ sub process {
4763 if ($file) { 5122 if ($file) {
4764 if ($linecount > 3) { 5123 if ($linecount > 3) {
4765 $fixed_line =~ s/^\+//; 5124 $fixed_line =~ s/^\+//;
4766 print $f $fixed_line. "\n"; 5125 print $f $fixed_line . "\n";
4767 } 5126 }
4768 } else { 5127 } else {
4769 print $f $fixed_line . "\n"; 5128 print $f $fixed_line . "\n";