aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/ABI/testing/sysfs-block-zram39
-rw-r--r--Documentation/DocBook/kernel-hacking.tmpl2
-rw-r--r--Documentation/arm64/memory.txt4
-rw-r--r--Documentation/blockdev/zram.txt54
-rw-r--r--Documentation/cgroups/memcg_test.txt4
-rw-r--r--Documentation/cgroups/resource_counter.txt12
-rw-r--r--Documentation/filesystems/Locking10
-rw-r--r--Documentation/filesystems/affs.txt9
-rw-r--r--Documentation/filesystems/proc.txt17
-rw-r--r--Documentation/irqflags-tracing.txt7
-rw-r--r--Documentation/kbuild/kconfig-language.txt4
-rw-r--r--Documentation/kernel-parameters.txt5
-rw-r--r--Documentation/rapidio/sysfs.txt66
-rw-r--r--Documentation/scheduler/sched-arch.txt2
-rw-r--r--Documentation/sysctl/kernel.txt1
-rw-r--r--MAINTAINERS3
-rw-r--r--arch/arc/Kconfig2
-rw-r--r--arch/arm/Kconfig12
-rw-r--r--arch/arm/mach-picoxcell/Kconfig2
-rw-r--r--arch/arm/mach-prima2/Kconfig2
-rw-r--r--arch/arm/mach-s3c24xx/Kconfig2
-rw-r--r--arch/arm/mach-shmobile/Kconfig2
-rw-r--r--arch/arm/mach-vexpress/Kconfig2
-rw-r--r--arch/arm/plat-samsung/Kconfig4
-rw-r--r--arch/arm64/Kconfig3
-rw-r--r--arch/arm64/include/asm/Kbuild1
-rw-r--r--arch/arm64/include/asm/fixmap.h67
-rw-r--r--arch/arm64/include/asm/io.h1
-rw-r--r--arch/arm64/include/asm/memory.h2
-rw-r--r--arch/arm64/include/asm/mmu.h1
-rw-r--r--arch/arm64/kernel/early_printk.c8
-rw-r--r--arch/arm64/kernel/head.S9
-rw-r--r--arch/arm64/kernel/setup.c4
-rw-r--r--arch/arm64/mm/ioremap.c85
-rw-r--r--arch/arm64/mm/mmu.c44
-rw-r--r--arch/cris/Kconfig3
-rw-r--r--arch/cris/kernel/setup.c2
-rw-r--r--arch/hexagon/Kconfig2
-rw-r--r--arch/ia64/Kconfig1
-rw-r--r--arch/m32r/Kconfig2
-rw-r--r--arch/m68k/Kconfig2
-rw-r--r--arch/metag/Kconfig2
-rw-r--r--arch/mips/Kconfig4
-rw-r--r--arch/openrisc/Kconfig2
-rw-r--r--arch/powerpc/include/asm/fadump.h1
-rw-r--r--arch/powerpc/platforms/Kconfig.cputype1
-rw-r--r--arch/powerpc/sysdev/fsl_rio.c1
-rw-r--r--arch/s390/Kconfig2
-rw-r--r--arch/s390/mm/pgtable.c3
-rw-r--r--arch/sh/Kconfig4
-rw-r--r--arch/sh/boards/Kconfig8
-rw-r--r--arch/sh/include/asm/io.h4
-rw-r--r--arch/sh/include/asm/io_trapped.h2
-rw-r--r--arch/sh/include/asm/machvec.h2
-rw-r--r--arch/sh/kernel/Makefile2
-rw-r--r--arch/sh/kernel/io_trapped.c4
-rw-r--r--arch/tile/Kconfig2
-rw-r--r--arch/um/kernel/process.c2
-rw-r--r--arch/unicore32/Kconfig2
-rw-r--r--arch/unicore32/include/asm/mmu_context.h4
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/include/asm/Kbuild1
-rw-r--r--arch/x86/include/asm/bug.h3
-rw-r--r--arch/x86/include/asm/fixmap.h6
-rw-r--r--arch/x86/include/asm/io.h14
-rw-r--r--arch/x86/include/asm/percpu.h98
-rw-r--r--arch/x86/include/asm/preempt.h16
-rw-r--r--arch/x86/mm/ioremap.c224
-rw-r--r--arch/x86/mm/pgtable_32.c2
-rw-r--r--arch/xtensa/Kconfig4
-rw-r--r--arch/xtensa/configs/iss_defconfig2
-rw-r--r--arch/xtensa/configs/s6105_defconfig2
-rw-r--r--drivers/block/zram/Kconfig10
-rw-r--r--drivers/block/zram/Makefile4
-rw-r--r--drivers/block/zram/zcomp.c353
-rw-r--r--drivers/block/zram/zcomp.h68
-rw-r--r--drivers/block/zram/zcomp_lz4.c47
-rw-r--r--drivers/block/zram/zcomp_lz4.h17
-rw-r--r--drivers/block/zram/zcomp_lzo.c47
-rw-r--r--drivers/block/zram/zcomp_lzo.h17
-rw-r--r--drivers/block/zram/zram_drv.c383
-rw-r--r--drivers/block/zram/zram_drv.h21
-rw-r--r--drivers/char/tpm/Kconfig2
-rw-r--r--drivers/i2c/busses/Kconfig2
-rw-r--r--drivers/lguest/page_tables.c6
-rw-r--r--drivers/misc/sgi-gru/grukdump.c6
-rw-r--r--drivers/net/can/sja1000/Kconfig2
-rw-r--r--drivers/net/ethernet/3com/Kconfig2
-rw-r--r--drivers/net/rionet.c1
-rw-r--r--drivers/rapidio/devices/tsi721.c1
-rw-r--r--drivers/rapidio/devices/tsi721.h4
-rw-r--r--drivers/rapidio/devices/tsi721_dma.c111
-rw-r--r--drivers/rapidio/rio-driver.c22
-rw-r--r--drivers/rapidio/rio-scan.c1
-rw-r--r--drivers/rapidio/rio-sysfs.c40
-rw-r--r--drivers/rapidio/rio.c11
-rw-r--r--drivers/rapidio/rio.h1
-rw-r--r--fs/9p/vfs_file.c2
-rw-r--r--fs/adfs/super.c2
-rw-r--r--fs/affs/affs.h20
-rw-r--r--fs/affs/amigaffs.c23
-rw-r--r--fs/affs/dir.c28
-rw-r--r--fs/affs/namei.c32
-rw-r--r--fs/affs/super.c8
-rw-r--r--fs/bfs/inode.c2
-rw-r--r--fs/binfmt_elf.c4
-rw-r--r--fs/btrfs/file.c1
-rw-r--r--fs/cifs/file.c1
-rw-r--r--fs/exec.c26
-rw-r--r--fs/ext4/file.c1
-rw-r--r--fs/f2fs/file.c1
-rw-r--r--fs/fuse/file.c1
-rw-r--r--fs/gfs2/file.c1
-rw-r--r--fs/nfs/file.c1
-rw-r--r--fs/nilfs2/file.c1
-rw-r--r--fs/ntfs/debug.c58
-rw-r--r--fs/ntfs/debug.h7
-rw-r--r--fs/ntfs/super.c28
-rw-r--r--fs/proc/array.c4
-rw-r--r--fs/proc/base.c19
-rw-r--r--fs/proc/fd.c6
-rw-r--r--fs/proc/inode.c2
-rw-r--r--fs/proc/meminfo.c2
-rw-r--r--fs/proc/task_mmu.c3
-rw-r--r--fs/proc/vmcore.c3
-rw-r--r--fs/ubifs/file.c1
-rw-r--r--fs/ufs/balloc.c12
-rw-r--r--fs/ufs/ialloc.c4
-rw-r--r--fs/ufs/super.c8
-rw-r--r--fs/xfs/xfs_file.c1
-rw-r--r--include/asm-generic/bug.h60
-rw-r--r--include/asm-generic/early_ioremap.h42
-rw-r--r--include/asm-generic/io.h4
-rw-r--r--include/asm-generic/iomap.h2
-rw-r--r--include/asm-generic/percpu.h13
-rw-r--r--include/linux/binfmts.h1
-rw-r--r--include/linux/crash_dump.h1
-rw-r--r--include/linux/idr.h63
-rw-r--r--include/linux/io.h2
-rw-r--r--include/linux/lglock.h16
-rw-r--r--include/linux/memcontrol.h23
-rw-r--r--include/linux/mempolicy.h3
-rw-r--r--include/linux/mm.h17
-rw-r--r--include/linux/mm_types.h4
-rw-r--r--include/linux/mmdebug.h4
-rw-r--r--include/linux/percpu.h350
-rw-r--r--include/linux/res_counter.h6
-rw-r--r--include/linux/rio.h5
-rw-r--r--include/linux/sched.h15
-rw-r--r--include/linux/slab.h6
-rw-r--r--include/linux/slub_def.h3
-rw-r--r--include/linux/topology.h4
-rw-r--r--include/linux/vmacache.h38
-rw-r--r--include/linux/vmstat.h8
-rw-r--r--include/linux/writeback.h2
-rw-r--r--include/trace/events/task.h2
-rw-r--r--include/uapi/asm-generic/mman-common.h2
-rw-r--r--include/uapi/linux/prctl.h3
-rw-r--r--init/Kconfig1
-rw-r--r--init/initramfs.c1
-rw-r--r--ipc/compat.c3
-rw-r--r--ipc/ipc_sysctl.c2
-rw-r--r--ipc/mqueue.c2
-rw-r--r--ipc/util.c2
-rw-r--r--kernel/debug/debug_core.c14
-rw-r--r--kernel/exit.c110
-rw-r--r--kernel/fork.c34
-rw-r--r--kernel/kallsyms.c11
-rw-r--r--kernel/kexec.c5
-rw-r--r--kernel/ksysfs.c5
-rw-r--r--kernel/locking/Makefile3
-rw-r--r--kernel/module.c2
-rw-r--r--kernel/panic.c13
-rw-r--r--kernel/power/power.h3
-rw-r--r--kernel/power/snapshot.c3
-rw-r--r--kernel/power/suspend.c5
-rw-r--r--kernel/power/swap.c2
-rw-r--r--kernel/res_counter.c23
-rw-r--r--kernel/sched/clock.c3
-rw-r--r--kernel/sched/core.c3
-rw-r--r--kernel/signal.c4
-rw-r--r--kernel/sys.c15
-rw-r--r--kernel/sysctl.c6
-rw-r--r--kernel/time/timekeeping.c5
-rw-r--r--kernel/trace/trace.h3
-rw-r--r--lib/Kconfig4
-rw-r--r--lib/decompress.c3
-rw-r--r--lib/devres.c4
-rw-r--r--lib/idr.c24
-rw-r--r--lib/iomap.c4
-rw-r--r--lib/smp_processor_id.c18
-rw-r--r--mm/Kconfig4
-rw-r--r--mm/Makefile3
-rw-r--r--mm/compaction.c84
-rw-r--r--mm/early_ioremap.c245
-rw-r--r--mm/filemap.c86
-rw-r--r--mm/huge_memory.c21
-rw-r--r--mm/hugetlb.c14
-rw-r--r--mm/internal.h16
-rw-r--r--mm/memblock.c28
-rw-r--r--mm/memcontrol.c453
-rw-r--r--mm/memory.c147
-rw-r--r--mm/mempolicy.c46
-rw-r--r--mm/mempool.c4
-rw-r--r--mm/mlock.c2
-rw-r--r--mm/mmap.c55
-rw-r--r--mm/mprotect.c56
-rw-r--r--mm/nommu.c49
-rw-r--r--mm/page-writeback.c4
-rw-r--r--mm/page_alloc.c118
-rw-r--r--mm/readahead.c21
-rw-r--r--mm/rmap.c14
-rw-r--r--mm/shmem.c7
-rw-r--r--mm/slab.c8
-rw-r--r--mm/slab.h21
-rw-r--r--mm/slab_common.c250
-rw-r--r--mm/slub.c87
-rw-r--r--mm/sparse.c4
-rw-r--r--mm/util.c5
-rw-r--r--mm/vmacache.c112
-rw-r--r--mm/vmalloc.c10
-rw-r--r--mm/vmscan.c12
-rw-r--r--mm/zswap.c78
-rw-r--r--net/ipv4/route.c2
-rw-r--r--scripts/kconfig/confdata.c5
-rw-r--r--scripts/kconfig/expr.h3
-rw-r--r--scripts/kconfig/lkc.h1
-rw-r--r--scripts/kconfig/menu.c3
-rw-r--r--scripts/kconfig/zconf.gperf1
-rw-r--r--scripts/kconfig/zconf.hash.c_shipped13
-rw-r--r--sound/isa/Kconfig2
-rw-r--r--sound/pci/Kconfig2
-rw-r--r--tools/vm/page-types.c170
233 files changed, 3714 insertions, 2012 deletions
diff --git a/Documentation/ABI/testing/sysfs-block-zram b/Documentation/ABI/testing/sysfs-block-zram
index 3f0b9ae61d8c..70ec992514d0 100644
--- a/Documentation/ABI/testing/sysfs-block-zram
+++ b/Documentation/ABI/testing/sysfs-block-zram
@@ -43,6 +43,36 @@ Description:
43 The invalid_io file is read-only and specifies the number of 43 The invalid_io file is read-only and specifies the number of
44 non-page-size-aligned I/O requests issued to this device. 44 non-page-size-aligned I/O requests issued to this device.
45 45
46What: /sys/block/zram<id>/failed_reads
47Date: February 2014
48Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
49Description:
50 The failed_reads file is read-only and specifies the number of
51 failed reads happened on this device.
52
53What: /sys/block/zram<id>/failed_writes
54Date: February 2014
55Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
56Description:
57 The failed_writes file is read-only and specifies the number of
58 failed writes happened on this device.
59
60What: /sys/block/zram<id>/max_comp_streams
61Date: February 2014
62Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
63Description:
64 The max_comp_streams file is read-write and specifies the
65 number of backend's zcomp_strm compression streams (number of
66 concurrent compress operations).
67
68What: /sys/block/zram<id>/comp_algorithm
69Date: February 2014
70Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
71Description:
72 The comp_algorithm file is read-write and lets to show
73 available and selected compression algorithms, change
74 compression algorithm selection.
75
46What: /sys/block/zram<id>/notify_free 76What: /sys/block/zram<id>/notify_free
47Date: August 2010 77Date: August 2010
48Contact: Nitin Gupta <ngupta@vflare.org> 78Contact: Nitin Gupta <ngupta@vflare.org>
@@ -53,15 +83,6 @@ Description:
53 is freed. This statistic is applicable only when this disk is 83 is freed. This statistic is applicable only when this disk is
54 being used as a swap disk. 84 being used as a swap disk.
55 85
56What: /sys/block/zram<id>/discard
57Date: August 2010
58Contact: Nitin Gupta <ngupta@vflare.org>
59Description:
60 The discard file is read-only and specifies the number of
61 discard requests received by this device. These requests
62 provide information to block device regarding blocks which are
63 no longer used by filesystem.
64
65What: /sys/block/zram<id>/zero_pages 86What: /sys/block/zram<id>/zero_pages
66Date: August 2010 87Date: August 2010
67Contact: Nitin Gupta <ngupta@vflare.org> 88Contact: Nitin Gupta <ngupta@vflare.org>
diff --git a/Documentation/DocBook/kernel-hacking.tmpl b/Documentation/DocBook/kernel-hacking.tmpl
index bd9015d10cff..e84f09467cd7 100644
--- a/Documentation/DocBook/kernel-hacking.tmpl
+++ b/Documentation/DocBook/kernel-hacking.tmpl
@@ -671,7 +671,7 @@ printk(KERN_INFO "my ip: %pI4\n", &amp;ipaddress);
671 671
672 <sect1 id="routines-local-irqs"> 672 <sect1 id="routines-local-irqs">
673 <title><function>local_irq_save()</function>/<function>local_irq_restore()</function> 673 <title><function>local_irq_save()</function>/<function>local_irq_restore()</function>
674 <filename class="headerfile">include/asm/system.h</filename> 674 <filename class="headerfile">include/linux/irqflags.h</filename>
675 </title> 675 </title>
676 676
677 <para> 677 <para>
diff --git a/Documentation/arm64/memory.txt b/Documentation/arm64/memory.txt
index 85e24c4f215c..d50fa618371b 100644
--- a/Documentation/arm64/memory.txt
+++ b/Documentation/arm64/memory.txt
@@ -39,7 +39,7 @@ ffffffbffa000000 ffffffbffaffffff 16MB PCI I/O space
39 39
40ffffffbffb000000 ffffffbffbbfffff 12MB [guard] 40ffffffbffb000000 ffffffbffbbfffff 12MB [guard]
41 41
42ffffffbffbc00000 ffffffbffbdfffff 2MB earlyprintk device 42ffffffbffbc00000 ffffffbffbdfffff 2MB fixed mappings
43 43
44ffffffbffbe00000 ffffffbffbffffff 2MB [guard] 44ffffffbffbe00000 ffffffbffbffffff 2MB [guard]
45 45
@@ -66,7 +66,7 @@ fffffdfffa000000 fffffdfffaffffff 16MB PCI I/O space
66 66
67fffffdfffb000000 fffffdfffbbfffff 12MB [guard] 67fffffdfffb000000 fffffdfffbbfffff 12MB [guard]
68 68
69fffffdfffbc00000 fffffdfffbdfffff 2MB earlyprintk device 69fffffdfffbc00000 fffffdfffbdfffff 2MB fixed mappings
70 70
71fffffdfffbe00000 fffffdfffbffffff 2MB [guard] 71fffffdfffbe00000 fffffdfffbffffff 2MB [guard]
72 72
diff --git a/Documentation/blockdev/zram.txt b/Documentation/blockdev/zram.txt
index 2eccddffa6c8..0595c3f56ccf 100644
--- a/Documentation/blockdev/zram.txt
+++ b/Documentation/blockdev/zram.txt
@@ -21,7 +21,43 @@ Following shows a typical sequence of steps for using zram.
21 This creates 4 devices: /dev/zram{0,1,2,3} 21 This creates 4 devices: /dev/zram{0,1,2,3}
22 (num_devices parameter is optional. Default: 1) 22 (num_devices parameter is optional. Default: 1)
23 23
242) Set Disksize 242) Set max number of compression streams
25 Compression backend may use up to max_comp_streams compression streams,
26 thus allowing up to max_comp_streams concurrent compression operations.
27 By default, compression backend uses single compression stream.
28
29 Examples:
30 #show max compression streams number
31 cat /sys/block/zram0/max_comp_streams
32
33 #set max compression streams number to 3
34 echo 3 > /sys/block/zram0/max_comp_streams
35
36Note:
37In order to enable compression backend's multi stream support max_comp_streams
38must be initially set to desired concurrency level before ZRAM device
39initialisation. Once the device initialised as a single stream compression
40backend (max_comp_streams equals to 1), you will see error if you try to change
41the value of max_comp_streams because single stream compression backend
42implemented as a special case by lock overhead issue and does not support
43dynamic max_comp_streams. Only multi stream backend supports dynamic
44max_comp_streams adjustment.
45
463) Select compression algorithm
47 Using comp_algorithm device attribute one can see available and
48 currently selected (shown in square brackets) compression algortithms,
49 change selected compression algorithm (once the device is initialised
50 there is no way to change compression algorithm).
51
52 Examples:
53 #show supported compression algorithms
54 cat /sys/block/zram0/comp_algorithm
55 lzo [lz4]
56
57 #select lzo compression algorithm
58 echo lzo > /sys/block/zram0/comp_algorithm
59
604) Set Disksize
25 Set disk size by writing the value to sysfs node 'disksize'. 61 Set disk size by writing the value to sysfs node 'disksize'.
26 The value can be either in bytes or you can use mem suffixes. 62 The value can be either in bytes or you can use mem suffixes.
27 Examples: 63 Examples:
@@ -33,32 +69,38 @@ Following shows a typical sequence of steps for using zram.
33 echo 512M > /sys/block/zram0/disksize 69 echo 512M > /sys/block/zram0/disksize
34 echo 1G > /sys/block/zram0/disksize 70 echo 1G > /sys/block/zram0/disksize
35 71
363) Activate: 72Note:
73There is little point creating a zram of greater than twice the size of memory
74since we expect a 2:1 compression ratio. Note that zram uses about 0.1% of the
75size of the disk when not in use so a huge zram is wasteful.
76
775) Activate:
37 mkswap /dev/zram0 78 mkswap /dev/zram0
38 swapon /dev/zram0 79 swapon /dev/zram0
39 80
40 mkfs.ext4 /dev/zram1 81 mkfs.ext4 /dev/zram1
41 mount /dev/zram1 /tmp 82 mount /dev/zram1 /tmp
42 83
434) Stats: 846) Stats:
44 Per-device statistics are exported as various nodes under 85 Per-device statistics are exported as various nodes under
45 /sys/block/zram<id>/ 86 /sys/block/zram<id>/
46 disksize 87 disksize
47 num_reads 88 num_reads
48 num_writes 89 num_writes
90 failed_reads
91 failed_writes
49 invalid_io 92 invalid_io
50 notify_free 93 notify_free
51 discard
52 zero_pages 94 zero_pages
53 orig_data_size 95 orig_data_size
54 compr_data_size 96 compr_data_size
55 mem_used_total 97 mem_used_total
56 98
575) Deactivate: 997) Deactivate:
58 swapoff /dev/zram0 100 swapoff /dev/zram0
59 umount /dev/zram1 101 umount /dev/zram1
60 102
616) Reset: 1038) Reset:
62 Write any positive value to 'reset' sysfs node 104 Write any positive value to 'reset' sysfs node
63 echo 1 > /sys/block/zram0/reset 105 echo 1 > /sys/block/zram0/reset
64 echo 1 > /sys/block/zram1/reset 106 echo 1 > /sys/block/zram1/reset
diff --git a/Documentation/cgroups/memcg_test.txt b/Documentation/cgroups/memcg_test.txt
index ce94a83a7d9a..80ac454704b8 100644
--- a/Documentation/cgroups/memcg_test.txt
+++ b/Documentation/cgroups/memcg_test.txt
@@ -24,7 +24,7 @@ Please note that implementation details can be changed.
24 24
25 a page/swp_entry may be charged (usage += PAGE_SIZE) at 25 a page/swp_entry may be charged (usage += PAGE_SIZE) at
26 26
27 mem_cgroup_newpage_charge() 27 mem_cgroup_charge_anon()
28 Called at new page fault and Copy-On-Write. 28 Called at new page fault and Copy-On-Write.
29 29
30 mem_cgroup_try_charge_swapin() 30 mem_cgroup_try_charge_swapin()
@@ -32,7 +32,7 @@ Please note that implementation details can be changed.
32 Followed by charge-commit-cancel protocol. (With swap accounting) 32 Followed by charge-commit-cancel protocol. (With swap accounting)
33 At commit, a charge recorded in swap_cgroup is removed. 33 At commit, a charge recorded in swap_cgroup is removed.
34 34
35 mem_cgroup_cache_charge() 35 mem_cgroup_charge_file()
36 Called at add_to_page_cache() 36 Called at add_to_page_cache()
37 37
38 mem_cgroup_cache_charge_swapin() 38 mem_cgroup_cache_charge_swapin()
diff --git a/Documentation/cgroups/resource_counter.txt b/Documentation/cgroups/resource_counter.txt
index 5108afb3645c..762ca54eb929 100644
--- a/Documentation/cgroups/resource_counter.txt
+++ b/Documentation/cgroups/resource_counter.txt
@@ -76,15 +76,7 @@ to work with it.
76 limit_fail_at parameter is set to the particular res_counter element 76 limit_fail_at parameter is set to the particular res_counter element
77 where the charging failed. 77 where the charging failed.
78 78
79 d. int res_counter_charge_locked 79 d. u64 res_counter_uncharge(struct res_counter *rc, unsigned long val)
80 (struct res_counter *rc, unsigned long val, bool force)
81
82 The same as res_counter_charge(), but it must not acquire/release the
83 res_counter->lock internally (it must be called with res_counter->lock
84 held). The force parameter indicates whether we can bypass the limit.
85
86 e. u64 res_counter_uncharge[_locked]
87 (struct res_counter *rc, unsigned long val)
88 80
89 When a resource is released (freed) it should be de-accounted 81 When a resource is released (freed) it should be de-accounted
90 from the resource counter it was accounted to. This is called 82 from the resource counter it was accounted to. This is called
@@ -93,7 +85,7 @@ to work with it.
93 85
94 The _locked routines imply that the res_counter->lock is taken. 86 The _locked routines imply that the res_counter->lock is taken.
95 87
96 f. u64 res_counter_uncharge_until 88 e. u64 res_counter_uncharge_until
97 (struct res_counter *rc, struct res_counter *top, 89 (struct res_counter *rc, struct res_counter *top,
98 unsigned long val) 90 unsigned long val)
99 91
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index f424e0e5b46b..efca5c1bbb10 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -529,6 +529,7 @@ locking rules:
529open: yes 529open: yes
530close: yes 530close: yes
531fault: yes can return with page locked 531fault: yes can return with page locked
532map_pages: yes
532page_mkwrite: yes can return with page locked 533page_mkwrite: yes can return with page locked
533access: yes 534access: yes
534 535
@@ -540,6 +541,15 @@ the page, then ensure it is not already truncated (the page lock will block
540subsequent truncate), and then return with VM_FAULT_LOCKED, and the page 541subsequent truncate), and then return with VM_FAULT_LOCKED, and the page
541locked. The VM will unlock the page. 542locked. The VM will unlock the page.
542 543
544 ->map_pages() is called when VM asks to map easy accessible pages.
545Filesystem should find and map pages associated with offsets from "pgoff"
546till "max_pgoff". ->map_pages() is called with page table locked and must
547not block. If it's not possible to reach a page without blocking,
548filesystem should skip it. Filesystem should use do_set_pte() to setup
549page table entry. Pointer to entry associated with offset "pgoff" is
550passed in "pte" field in vm_fault structure. Pointers to entries for other
551offsets should be calculated relative to "pte".
552
543 ->page_mkwrite() is called when a previously read-only pte is 553 ->page_mkwrite() is called when a previously read-only pte is
544about to become writeable. The filesystem again must ensure that there are 554about to become writeable. The filesystem again must ensure that there are
545no truncate/invalidate races, and then return with the page locked. If 555no truncate/invalidate races, and then return with the page locked. If
diff --git a/Documentation/filesystems/affs.txt b/Documentation/filesystems/affs.txt
index 81ac488e3758..71b63c2b9841 100644
--- a/Documentation/filesystems/affs.txt
+++ b/Documentation/filesystems/affs.txt
@@ -49,6 +49,10 @@ mode=mode Sets the mode flags to the given (octal) value, regardless
49 This is useful since most of the plain AmigaOS files 49 This is useful since most of the plain AmigaOS files
50 will map to 600. 50 will map to 600.
51 51
52nofilenametruncate
53 The file system will return an error when filename exceeds
54 standard maximum filename length (30 characters).
55
52reserved=num Sets the number of reserved blocks at the start of the 56reserved=num Sets the number of reserved blocks at the start of the
53 partition to num. You should never need this option. 57 partition to num. You should never need this option.
54 Default is 2. 58 Default is 2.
@@ -181,9 +185,8 @@ tested, though several hundred MB have been read and written using
181this fs. For a most up-to-date list of bugs please consult 185this fs. For a most up-to-date list of bugs please consult
182fs/affs/Changes. 186fs/affs/Changes.
183 187
184Filenames are truncated to 30 characters without warning (this 188By default, filenames are truncated to 30 characters without warning.
185can be changed by setting the compile-time option AFFS_NO_TRUNCATE 189'nofilenametruncate' mount option can change that behavior.
186in include/linux/amigaffs.h).
187 190
188Case is ignored by the affs in filename matching, but Linux shells 191Case is ignored by the affs in filename matching, but Linux shells
189do care about the case. Example (with /wb being an affs mounted fs): 192do care about the case. Example (with /wb being an affs mounted fs):
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index f00bee144add..8b9cd8eb3f91 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -1648,18 +1648,21 @@ pids, so one need to either stop or freeze processes being inspected
1648if precise results are needed. 1648if precise results are needed.
1649 1649
1650 1650
16513.7 /proc/<pid>/fdinfo/<fd> - Information about opened file 16513.8 /proc/<pid>/fdinfo/<fd> - Information about opened file
1652--------------------------------------------------------------- 1652---------------------------------------------------------------
1653This file provides information associated with an opened file. The regular 1653This file provides information associated with an opened file. The regular
1654files have at least two fields -- 'pos' and 'flags'. The 'pos' represents 1654files have at least three fields -- 'pos', 'flags' and mnt_id. The 'pos'
1655the current offset of the opened file in decimal form [see lseek(2) for 1655represents the current offset of the opened file in decimal form [see lseek(2)
1656details] and 'flags' denotes the octal O_xxx mask the file has been 1656for details], 'flags' denotes the octal O_xxx mask the file has been
1657created with [see open(2) for details]. 1657created with [see open(2) for details] and 'mnt_id' represents mount ID of
1658the file system containing the opened file [see 3.5 /proc/<pid>/mountinfo
1659for details].
1658 1660
1659A typical output is 1661A typical output is
1660 1662
1661 pos: 0 1663 pos: 0
1662 flags: 0100002 1664 flags: 0100002
1665 mnt_id: 19
1663 1666
1664The files such as eventfd, fsnotify, signalfd, epoll among the regular pos/flags 1667The files such as eventfd, fsnotify, signalfd, epoll among the regular pos/flags
1665pair provide additional information particular to the objects they represent. 1668pair provide additional information particular to the objects they represent.
@@ -1668,6 +1671,7 @@ pair provide additional information particular to the objects they represent.
1668 ~~~~~~~~~~~~~ 1671 ~~~~~~~~~~~~~
1669 pos: 0 1672 pos: 0
1670 flags: 04002 1673 flags: 04002
1674 mnt_id: 9
1671 eventfd-count: 5a 1675 eventfd-count: 5a
1672 1676
1673 where 'eventfd-count' is hex value of a counter. 1677 where 'eventfd-count' is hex value of a counter.
@@ -1676,6 +1680,7 @@ pair provide additional information particular to the objects they represent.
1676 ~~~~~~~~~~~~~~ 1680 ~~~~~~~~~~~~~~
1677 pos: 0 1681 pos: 0
1678 flags: 04002 1682 flags: 04002
1683 mnt_id: 9
1679 sigmask: 0000000000000200 1684 sigmask: 0000000000000200
1680 1685
1681 where 'sigmask' is hex value of the signal mask associated 1686 where 'sigmask' is hex value of the signal mask associated
@@ -1685,6 +1690,7 @@ pair provide additional information particular to the objects they represent.
1685 ~~~~~~~~~~~ 1690 ~~~~~~~~~~~
1686 pos: 0 1691 pos: 0
1687 flags: 02 1692 flags: 02
1693 mnt_id: 9
1688 tfd: 5 events: 1d data: ffffffffffffffff 1694 tfd: 5 events: 1d data: ffffffffffffffff
1689 1695
1690 where 'tfd' is a target file descriptor number in decimal form, 1696 where 'tfd' is a target file descriptor number in decimal form,
@@ -1718,6 +1724,7 @@ pair provide additional information particular to the objects they represent.
1718 1724
1719 pos: 0 1725 pos: 0
1720 flags: 02 1726 flags: 02
1727 mnt_id: 9
1721 fanotify flags:10 event-flags:0 1728 fanotify flags:10 event-flags:0
1722 fanotify mnt_id:12 mflags:40 mask:38 ignored_mask:40000003 1729 fanotify mnt_id:12 mflags:40 mask:38 ignored_mask:40000003
1723 fanotify ino:4f969 sdev:800013 mflags:0 mask:3b ignored_mask:40000000 fhandle-bytes:8 fhandle-type:1 f_handle:69f90400c275b5b4 1730 fanotify ino:4f969 sdev:800013 mflags:0 mask:3b ignored_mask:40000000 fhandle-bytes:8 fhandle-type:1 f_handle:69f90400c275b5b4
diff --git a/Documentation/irqflags-tracing.txt b/Documentation/irqflags-tracing.txt
index 67aa71e73035..f6da05670e16 100644
--- a/Documentation/irqflags-tracing.txt
+++ b/Documentation/irqflags-tracing.txt
@@ -22,13 +22,6 @@ rather straightforward and risk-free manner.
22Architectures that want to support this need to do a couple of 22Architectures that want to support this need to do a couple of
23code-organizational changes first: 23code-organizational changes first:
24 24
25- move their irq-flags manipulation code from their asm/system.h header
26 to asm/irqflags.h
27
28- rename local_irq_disable()/etc to raw_local_irq_disable()/etc. so that
29 the linux/irqflags.h code can inject callbacks and can construct the
30 real local_irq_disable()/etc APIs.
31
32- add and enable TRACE_IRQFLAGS_SUPPORT in their arch level Kconfig file 25- add and enable TRACE_IRQFLAGS_SUPPORT in their arch level Kconfig file
33 26
34and then a couple of functional changes are needed as well to implement 27and then a couple of functional changes are needed as well to implement
diff --git a/Documentation/kbuild/kconfig-language.txt b/Documentation/kbuild/kconfig-language.txt
index c420676c6fe3..350f733bf2c7 100644
--- a/Documentation/kbuild/kconfig-language.txt
+++ b/Documentation/kbuild/kconfig-language.txt
@@ -157,6 +157,10 @@ applicable everywhere (see syntax).
157 to the build environment (if this is desired, it can be done via 157 to the build environment (if this is desired, it can be done via
158 another symbol). 158 another symbol).
159 159
160 - "allnoconfig_y"
161 This declares the symbol as one that should have the value y when
162 using "allnoconfig". Used for symbols that hide other symbols.
163
160Menu dependencies 164Menu dependencies
161----------------- 165-----------------
162 166
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index bc3478581f67..b6c67d592be5 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -884,6 +884,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
884 Enable debug messages at boot time. See 884 Enable debug messages at boot time. See
885 Documentation/dynamic-debug-howto.txt for details. 885 Documentation/dynamic-debug-howto.txt for details.
886 886
887 early_ioremap_debug [KNL]
888 Enable debug messages in early_ioremap support. This
889 is useful for tracking down temporary early mappings
890 which are not unmapped.
891
887 earlycon= [KNL] Output early console device and options. 892 earlycon= [KNL] Output early console device and options.
888 uart[8250],io,<addr>[,options] 893 uart[8250],io,<addr>[,options]
889 uart[8250],mmio,<addr>[,options] 894 uart[8250],mmio,<addr>[,options]
diff --git a/Documentation/rapidio/sysfs.txt b/Documentation/rapidio/sysfs.txt
index 271438c0617f..47ce9a5336e1 100644
--- a/Documentation/rapidio/sysfs.txt
+++ b/Documentation/rapidio/sysfs.txt
@@ -2,8 +2,8 @@
2 2
3~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 3~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
4 4
51. Device Subdirectories 51. RapidIO Device Subdirectories
6------------------------ 6--------------------------------
7 7
8For each RapidIO device, the RapidIO subsystem creates files in an individual 8For each RapidIO device, the RapidIO subsystem creates files in an individual
9subdirectory with the following name, /sys/bus/rapidio/devices/<device_name>. 9subdirectory with the following name, /sys/bus/rapidio/devices/<device_name>.
@@ -25,8 +25,8 @@ seen by the enumerating host (destID = 1):
25NOTE: An enumerating or discovering endpoint does not create a sysfs entry for 25NOTE: An enumerating or discovering endpoint does not create a sysfs entry for
26itself, this is why an endpoint with destID=1 is not shown in the list. 26itself, this is why an endpoint with destID=1 is not shown in the list.
27 27
282. Attributes Common for All Devices 282. Attributes Common for All RapidIO Devices
29------------------------------------ 29--------------------------------------------
30 30
31Each device subdirectory contains the following informational read-only files: 31Each device subdirectory contains the following informational read-only files:
32 32
@@ -52,16 +52,16 @@ This attribute is similar in behavior to the "config" attribute of PCI devices
52and provides an access to the RapidIO device registers using standard file read 52and provides an access to the RapidIO device registers using standard file read
53and write operations. 53and write operations.
54 54
553. Endpoint Device Attributes 553. RapidIO Endpoint Device Attributes
56----------------------------- 56-------------------------------------
57 57
58Currently Linux RapidIO subsystem does not create any endpoint specific sysfs 58Currently Linux RapidIO subsystem does not create any endpoint specific sysfs
59attributes. It is possible that RapidIO master port drivers and endpoint device 59attributes. It is possible that RapidIO master port drivers and endpoint device
60drivers will add their device-specific sysfs attributes but such attributes are 60drivers will add their device-specific sysfs attributes but such attributes are
61outside the scope of this document. 61outside the scope of this document.
62 62
634. Switch Device Attributes 634. RapidIO Switch Device Attributes
64--------------------------- 64-----------------------------------
65 65
66RapidIO switches have additional attributes in sysfs. RapidIO subsystem supports 66RapidIO switches have additional attributes in sysfs. RapidIO subsystem supports
67common and device-specific sysfs attributes for switches. Because switches are 67common and device-specific sysfs attributes for switches. Because switches are
@@ -106,3 +106,53 @@ attribute:
106 for that controller always will be 0. 106 for that controller always will be 0.
107 To initiate RapidIO enumeration/discovery on all available mports 107 To initiate RapidIO enumeration/discovery on all available mports
108 a user must write '-1' (or RIO_MPORT_ANY) into this attribute file. 108 a user must write '-1' (or RIO_MPORT_ANY) into this attribute file.
109
110
1116. RapidIO Bus Controllers/Ports
112--------------------------------
113
114On-chip RapidIO controllers and PCIe-to-RapidIO bridges (referenced as
115"Master Port" or "mport") are presented in sysfs as the special class of
116devices: "rapidio_port".
117
118The /sys/class/rapidio_port subdirectory contains individual subdirectories
119named as "rapidioN" where N = mport ID registered with RapidIO subsystem.
120
121NOTE: An mport ID is not a RapidIO destination ID assigned to a given local
122mport device.
123
124Each mport device subdirectory in addition to standard entries contains the
125following device-specific attributes:
126
127 port_destid - reports RapidIO destination ID assigned to the given RapidIO
128 mport device. If value 0xFFFFFFFF is returned this means that
129 no valid destination ID have been assigned to the mport (yet).
130 Normally, before enumeration/discovery have been executed only
131 fabric enumerating mports have a valid destination ID assigned
132 to them using "hdid=..." rapidio module parameter.
133 sys_size - reports RapidIO common transport system size:
134 0 = small (8-bit destination ID, max. 256 devices),
135 1 = large (16-bit destination ID, max. 65536 devices).
136
137After enumeration or discovery was performed for a given mport device,
138the corresponding subdirectory will also contain subdirectories for each
139child RapidIO device connected to the mport. Naming conventions for RapidIO
140devices are described in Section 1 above.
141
142The example below shows mport device subdirectory with several child RapidIO
143devices attached to it.
144
145[rio@rapidio ~]$ ls /sys/class/rapidio_port/rapidio0/ -l
146total 0
147drwxr-xr-x 3 root root 0 Feb 11 15:10 00:e:0001
148drwxr-xr-x 3 root root 0 Feb 11 15:10 00:e:0004
149drwxr-xr-x 3 root root 0 Feb 11 15:10 00:e:0007
150drwxr-xr-x 3 root root 0 Feb 11 15:10 00:s:0002
151drwxr-xr-x 3 root root 0 Feb 11 15:10 00:s:0003
152drwxr-xr-x 3 root root 0 Feb 11 15:10 00:s:0005
153lrwxrwxrwx 1 root root 0 Feb 11 15:11 device -> ../../../0000:01:00.0
154-r--r--r-- 1 root root 4096 Feb 11 15:11 port_destid
155drwxr-xr-x 2 root root 0 Feb 11 15:11 power
156lrwxrwxrwx 1 root root 0 Feb 11 15:04 subsystem -> ../../../../../../class/rapidio_port
157-r--r--r-- 1 root root 4096 Feb 11 15:11 sys_size
158-rw-r--r-- 1 root root 4096 Feb 11 15:04 uevent
diff --git a/Documentation/scheduler/sched-arch.txt b/Documentation/scheduler/sched-arch.txt
index 9290de703450..a2f27bbf2cba 100644
--- a/Documentation/scheduler/sched-arch.txt
+++ b/Documentation/scheduler/sched-arch.txt
@@ -8,7 +8,7 @@ Context switch
8By default, the switch_to arch function is called with the runqueue 8By default, the switch_to arch function is called with the runqueue
9locked. This is usually not a problem unless switch_to may need to 9locked. This is usually not a problem unless switch_to may need to
10take the runqueue lock. This is usually due to a wake up operation in 10take the runqueue lock. This is usually due to a wake up operation in
11the context switch. See arch/ia64/include/asm/system.h for an example. 11the context switch. See arch/ia64/include/asm/switch_to.h for an example.
12 12
13To request the scheduler call switch_to with the runqueue unlocked, 13To request the scheduler call switch_to with the runqueue unlocked,
14you must `#define __ARCH_WANT_UNLOCKED_CTXSW` in a header file 14you must `#define __ARCH_WANT_UNLOCKED_CTXSW` in a header file
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index 271a09db6629..9886c3d57fc2 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -317,6 +317,7 @@ for more than this value report a warning.
317This file shows up if CONFIG_DETECT_HUNG_TASK is enabled. 317This file shows up if CONFIG_DETECT_HUNG_TASK is enabled.
318 318
3190: means infinite timeout - no checking done. 3190: means infinite timeout - no checking done.
320Possible values to set are in range {0..LONG_MAX/HZ}.
320 321
321============================================================== 322==============================================================
322 323
diff --git a/MAINTAINERS b/MAINTAINERS
index bfff89690c3a..d0b8afe26123 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4542,8 +4542,7 @@ K: \b(ABS|SYN)_MT_
4542 4542
4543INTEL C600 SERIES SAS CONTROLLER DRIVER 4543INTEL C600 SERIES SAS CONTROLLER DRIVER
4544M: Intel SCU Linux support <intel-linux-scu@intel.com> 4544M: Intel SCU Linux support <intel-linux-scu@intel.com>
4545M: Lukasz Dorau <lukasz.dorau@intel.com> 4545M: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
4546M: Maciej Patelczyk <maciej.patelczyk@intel.com>
4547M: Dave Jiang <dave.jiang@intel.com> 4546M: Dave Jiang <dave.jiang@intel.com>
4548L: linux-scsi@vger.kernel.org 4547L: linux-scsi@vger.kernel.org
4549T: git git://git.code.sf.net/p/intel-sas/isci 4548T: git git://git.code.sf.net/p/intel-sas/isci
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index 75de197a2fef..9596b0ab108d 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -57,7 +57,7 @@ config ARCH_FLATMEM_ENABLE
57config MMU 57config MMU
58 def_bool y 58 def_bool y
59 59
60config NO_IOPORT 60config NO_IOPORT_MAP
61 def_bool y 61 def_bool y
62 62
63config GENERIC_CALIBRATE_DELAY 63config GENERIC_CALIBRATE_DELAY
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index d7a71e3ef55f..5db05f6a0412 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -126,7 +126,7 @@ config HAVE_TCM
126config HAVE_PROC_CPU 126config HAVE_PROC_CPU
127 bool 127 bool
128 128
129config NO_IOPORT 129config NO_IOPORT_MAP
130 bool 130 bool
131 131
132config EISA 132config EISA
@@ -410,7 +410,7 @@ config ARCH_EBSA110
410 select ISA 410 select ISA
411 select NEED_MACH_IO_H 411 select NEED_MACH_IO_H
412 select NEED_MACH_MEMORY_H 412 select NEED_MACH_MEMORY_H
413 select NO_IOPORT 413 select NO_IOPORT_MAP
414 help 414 help
415 This is an evaluation board for the StrongARM processor available 415 This is an evaluation board for the StrongARM processor available
416 from Digital. It has limited hardware on-board, including an 416 from Digital. It has limited hardware on-board, including an
@@ -428,7 +428,7 @@ config ARCH_EFM32
428 select CPU_V7M 428 select CPU_V7M
429 select GENERIC_CLOCKEVENTS 429 select GENERIC_CLOCKEVENTS
430 select NO_DMA 430 select NO_DMA
431 select NO_IOPORT 431 select NO_IOPORT_MAP
432 select SPARSE_IRQ 432 select SPARSE_IRQ
433 select USE_OF 433 select USE_OF
434 help 434 help
@@ -677,7 +677,7 @@ config ARCH_SHMOBILE_LEGACY
677 select HAVE_SMP 677 select HAVE_SMP
678 select MIGHT_HAVE_CACHE_L2X0 678 select MIGHT_HAVE_CACHE_L2X0
679 select MULTI_IRQ_HANDLER 679 select MULTI_IRQ_HANDLER
680 select NO_IOPORT 680 select NO_IOPORT_MAP
681 select PINCTRL 681 select PINCTRL
682 select PM_GENERIC_DOMAINS if PM 682 select PM_GENERIC_DOMAINS if PM
683 select SPARSE_IRQ 683 select SPARSE_IRQ
@@ -699,7 +699,7 @@ config ARCH_RPC
699 select ISA_DMA_API 699 select ISA_DMA_API
700 select NEED_MACH_IO_H 700 select NEED_MACH_IO_H
701 select NEED_MACH_MEMORY_H 701 select NEED_MACH_MEMORY_H
702 select NO_IOPORT 702 select NO_IOPORT_MAP
703 select VIRT_TO_BUS 703 select VIRT_TO_BUS
704 help 704 help
705 On the Acorn Risc-PC, Linux can support the internal IDE disk and 705 On the Acorn Risc-PC, Linux can support the internal IDE disk and
@@ -760,7 +760,7 @@ config ARCH_S3C64XX
760 select HAVE_S3C2410_I2C if I2C 760 select HAVE_S3C2410_I2C if I2C
761 select HAVE_S3C2410_WATCHDOG if WATCHDOG 761 select HAVE_S3C2410_WATCHDOG if WATCHDOG
762 select HAVE_TCM 762 select HAVE_TCM
763 select NO_IOPORT 763 select NO_IOPORT_MAP
764 select PLAT_SAMSUNG 764 select PLAT_SAMSUNG
765 select PM_GENERIC_DOMAINS if PM 765 select PM_GENERIC_DOMAINS if PM
766 select S3C_DEV_NAND 766 select S3C_DEV_NAND
diff --git a/arch/arm/mach-picoxcell/Kconfig b/arch/arm/mach-picoxcell/Kconfig
index eca9eb1c5931..62240f69b4ee 100644
--- a/arch/arm/mach-picoxcell/Kconfig
+++ b/arch/arm/mach-picoxcell/Kconfig
@@ -4,4 +4,4 @@ config ARCH_PICOXCELL
4 select ARM_VIC 4 select ARM_VIC
5 select DW_APB_TIMER_OF 5 select DW_APB_TIMER_OF
6 select HAVE_TCM 6 select HAVE_TCM
7 select NO_IOPORT 7 select NO_IOPORT_MAP
diff --git a/arch/arm/mach-prima2/Kconfig b/arch/arm/mach-prima2/Kconfig
index 3e8189186a5b..e4e505f52ba0 100644
--- a/arch/arm/mach-prima2/Kconfig
+++ b/arch/arm/mach-prima2/Kconfig
@@ -3,7 +3,7 @@ config ARCH_SIRF
3 select ARCH_HAS_RESET_CONTROLLER 3 select ARCH_HAS_RESET_CONTROLLER
4 select ARCH_REQUIRE_GPIOLIB 4 select ARCH_REQUIRE_GPIOLIB
5 select GENERIC_IRQ_CHIP 5 select GENERIC_IRQ_CHIP
6 select NO_IOPORT 6 select NO_IOPORT_MAP
7 select PINCTRL 7 select PINCTRL
8 select PINCTRL_SIRF 8 select PINCTRL_SIRF
9 help 9 help
diff --git a/arch/arm/mach-s3c24xx/Kconfig b/arch/arm/mach-s3c24xx/Kconfig
index ba1cc6246778..40cf50b9940c 100644
--- a/arch/arm/mach-s3c24xx/Kconfig
+++ b/arch/arm/mach-s3c24xx/Kconfig
@@ -12,7 +12,7 @@ if ARCH_S3C24XX
12config PLAT_S3C24XX 12config PLAT_S3C24XX
13 def_bool y 13 def_bool y
14 select ARCH_REQUIRE_GPIOLIB 14 select ARCH_REQUIRE_GPIOLIB
15 select NO_IOPORT 15 select NO_IOPORT_MAP
16 select S3C_DEV_NAND 16 select S3C_DEV_NAND
17 select IRQ_DOMAIN 17 select IRQ_DOMAIN
18 help 18 help
diff --git a/arch/arm/mach-shmobile/Kconfig b/arch/arm/mach-shmobile/Kconfig
index a182008e3aeb..0f92ba8e7884 100644
--- a/arch/arm/mach-shmobile/Kconfig
+++ b/arch/arm/mach-shmobile/Kconfig
@@ -10,7 +10,7 @@ config ARCH_SHMOBILE_MULTI
10 select ARM_GIC 10 select ARM_GIC
11 select MIGHT_HAVE_PCI 11 select MIGHT_HAVE_PCI
12 select ARCH_DMA_ADDR_T_64BIT if ARM_LPAE 12 select ARCH_DMA_ADDR_T_64BIT if ARM_LPAE
13 select NO_IOPORT 13 select NO_IOPORT_MAP
14 select PINCTRL 14 select PINCTRL
15 select ARCH_REQUIRE_GPIOLIB 15 select ARCH_REQUIRE_GPIOLIB
16 16
diff --git a/arch/arm/mach-vexpress/Kconfig b/arch/arm/mach-vexpress/Kconfig
index 80b4be36f10a..657d52d0391f 100644
--- a/arch/arm/mach-vexpress/Kconfig
+++ b/arch/arm/mach-vexpress/Kconfig
@@ -10,7 +10,7 @@ config ARCH_VEXPRESS
10 select HAVE_ARM_TWD if SMP 10 select HAVE_ARM_TWD if SMP
11 select HAVE_PATA_PLATFORM 11 select HAVE_PATA_PLATFORM
12 select ICST 12 select ICST
13 select NO_IOPORT 13 select NO_IOPORT_MAP
14 select PLAT_VERSATILE 14 select PLAT_VERSATILE
15 select PLAT_VERSATILE_CLCD 15 select PLAT_VERSATILE_CLCD
16 select POWER_RESET 16 select POWER_RESET
diff --git a/arch/arm/plat-samsung/Kconfig b/arch/arm/plat-samsung/Kconfig
index b57e922f1614..243dfcb2ca0e 100644
--- a/arch/arm/plat-samsung/Kconfig
+++ b/arch/arm/plat-samsung/Kconfig
@@ -9,7 +9,7 @@ config PLAT_SAMSUNG
9 depends on PLAT_S3C24XX || ARCH_S3C64XX || PLAT_S5P || ARCH_EXYNOS 9 depends on PLAT_S3C24XX || ARCH_S3C64XX || PLAT_S5P || ARCH_EXYNOS
10 default y 10 default y
11 select GENERIC_IRQ_CHIP 11 select GENERIC_IRQ_CHIP
12 select NO_IOPORT 12 select NO_IOPORT_MAP
13 help 13 help
14 Base platform code for all Samsung SoC based systems 14 Base platform code for all Samsung SoC based systems
15 15
@@ -19,7 +19,7 @@ config PLAT_S5P
19 default y 19 default y
20 select ARCH_REQUIRE_GPIOLIB 20 select ARCH_REQUIRE_GPIOLIB
21 select ARM_VIC 21 select ARM_VIC
22 select NO_IOPORT 22 select NO_IOPORT_MAP
23 select PLAT_SAMSUNG 23 select PLAT_SAMSUNG
24 select S3C_GPIO_TRACK 24 select S3C_GPIO_TRACK
25 select S5P_GPIO_DRVSTR 25 select S5P_GPIO_DRVSTR
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 9711a5fd948d..e6e4d3749a6e 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -17,6 +17,7 @@ config ARM64
17 select GENERIC_CLOCKEVENTS 17 select GENERIC_CLOCKEVENTS
18 select GENERIC_CLOCKEVENTS_BROADCAST if SMP 18 select GENERIC_CLOCKEVENTS_BROADCAST if SMP
19 select GENERIC_CPU_AUTOPROBE 19 select GENERIC_CPU_AUTOPROBE
20 select GENERIC_EARLY_IOREMAP
20 select GENERIC_IOMAP 21 select GENERIC_IOMAP
21 select GENERIC_IRQ_PROBE 22 select GENERIC_IRQ_PROBE
22 select GENERIC_IRQ_SHOW 23 select GENERIC_IRQ_SHOW
@@ -66,7 +67,7 @@ config ARCH_PHYS_ADDR_T_64BIT
66config MMU 67config MMU
67 def_bool y 68 def_bool y
68 69
69config NO_IOPORT 70config NO_IOPORT_MAP
70 def_bool y 71 def_bool y
71 72
72config STACKTRACE_SUPPORT 73config STACKTRACE_SUPPORT
diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild
index 4bca4923fc0b..83f71b3004a8 100644
--- a/arch/arm64/include/asm/Kbuild
+++ b/arch/arm64/include/asm/Kbuild
@@ -10,6 +10,7 @@ generic-y += delay.h
10generic-y += div64.h 10generic-y += div64.h
11generic-y += dma.h 11generic-y += dma.h
12generic-y += emergency-restart.h 12generic-y += emergency-restart.h
13generic-y += early_ioremap.h
13generic-y += errno.h 14generic-y += errno.h
14generic-y += ftrace.h 15generic-y += ftrace.h
15generic-y += hash.h 16generic-y += hash.h
diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h
new file mode 100644
index 000000000000..5f7bfe6df723
--- /dev/null
+++ b/arch/arm64/include/asm/fixmap.h
@@ -0,0 +1,67 @@
1/*
2 * fixmap.h: compile-time virtual memory allocation
3 *
4 * This file is subject to the terms and conditions of the GNU General Public
5 * License. See the file "COPYING" in the main directory of this archive
6 * for more details.
7 *
8 * Copyright (C) 1998 Ingo Molnar
9 * Copyright (C) 2013 Mark Salter <msalter@redhat.com>
10 *
11 * Adapted from arch/x86_64 version.
12 *
13 */
14
15#ifndef _ASM_ARM64_FIXMAP_H
16#define _ASM_ARM64_FIXMAP_H
17
18#ifndef __ASSEMBLY__
19#include <linux/kernel.h>
20#include <asm/page.h>
21
22/*
23 * Here we define all the compile-time 'special' virtual
24 * addresses. The point is to have a constant address at
25 * compile time, but to set the physical address only
26 * in the boot process.
27 *
28 * These 'compile-time allocated' memory buffers are
29 * page-sized. Use set_fixmap(idx,phys) to associate
30 * physical memory with fixmap indices.
31 *
32 */
33enum fixed_addresses {
34 FIX_EARLYCON_MEM_BASE,
35 __end_of_permanent_fixed_addresses,
36
37 /*
38 * Temporary boot-time mappings, used by early_ioremap(),
39 * before ioremap() is functional.
40 */
41#ifdef CONFIG_ARM64_64K_PAGES
42#define NR_FIX_BTMAPS 4
43#else
44#define NR_FIX_BTMAPS 64
45#endif
46#define FIX_BTMAPS_SLOTS 7
47#define TOTAL_FIX_BTMAPS (NR_FIX_BTMAPS * FIX_BTMAPS_SLOTS)
48
49 FIX_BTMAP_END = __end_of_permanent_fixed_addresses,
50 FIX_BTMAP_BEGIN = FIX_BTMAP_END + TOTAL_FIX_BTMAPS - 1,
51 __end_of_fixed_addresses
52};
53
54#define FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT)
55#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE)
56
57#define FIXMAP_PAGE_IO __pgprot(PROT_DEVICE_nGnRE)
58
59extern void __early_set_fixmap(enum fixed_addresses idx,
60 phys_addr_t phys, pgprot_t flags);
61
62#define __set_fixmap __early_set_fixmap
63
64#include <asm-generic/fixmap.h>
65
66#endif /* !__ASSEMBLY__ */
67#endif /* _ASM_ARM64_FIXMAP_H */
diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h
index 7846a6bb0833..a1bef78f0303 100644
--- a/arch/arm64/include/asm/io.h
+++ b/arch/arm64/include/asm/io.h
@@ -27,6 +27,7 @@
27#include <asm/byteorder.h> 27#include <asm/byteorder.h>
28#include <asm/barrier.h> 28#include <asm/barrier.h>
29#include <asm/pgtable.h> 29#include <asm/pgtable.h>
30#include <asm/early_ioremap.h>
30 31
31#include <xen/xen.h> 32#include <xen/xen.h>
32 33
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index 9dc5dc39fded..e94f9458aa6f 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -49,7 +49,7 @@
49#define PAGE_OFFSET (UL(0xffffffffffffffff) << (VA_BITS - 1)) 49#define PAGE_OFFSET (UL(0xffffffffffffffff) << (VA_BITS - 1))
50#define MODULES_END (PAGE_OFFSET) 50#define MODULES_END (PAGE_OFFSET)
51#define MODULES_VADDR (MODULES_END - SZ_64M) 51#define MODULES_VADDR (MODULES_END - SZ_64M)
52#define EARLYCON_IOBASE (MODULES_VADDR - SZ_4M) 52#define FIXADDR_TOP (MODULES_VADDR - SZ_2M - PAGE_SIZE)
53#define TASK_SIZE_64 (UL(1) << VA_BITS) 53#define TASK_SIZE_64 (UL(1) << VA_BITS)
54 54
55#ifdef CONFIG_COMPAT 55#ifdef CONFIG_COMPAT
diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h
index 2494fc01896a..f600d400c07d 100644
--- a/arch/arm64/include/asm/mmu.h
+++ b/arch/arm64/include/asm/mmu.h
@@ -27,5 +27,6 @@ typedef struct {
27extern void paging_init(void); 27extern void paging_init(void);
28extern void setup_mm_for_reboot(void); 28extern void setup_mm_for_reboot(void);
29extern void __iomem *early_io_map(phys_addr_t phys, unsigned long virt); 29extern void __iomem *early_io_map(phys_addr_t phys, unsigned long virt);
30extern void init_mem_pgprot(void);
30 31
31#endif 32#endif
diff --git a/arch/arm64/kernel/early_printk.c b/arch/arm64/kernel/early_printk.c
index fbb6e1843659..ffbbdde7aba1 100644
--- a/arch/arm64/kernel/early_printk.c
+++ b/arch/arm64/kernel/early_printk.c
@@ -26,6 +26,8 @@
26#include <linux/amba/serial.h> 26#include <linux/amba/serial.h>
27#include <linux/serial_reg.h> 27#include <linux/serial_reg.h>
28 28
29#include <asm/fixmap.h>
30
29static void __iomem *early_base; 31static void __iomem *early_base;
30static void (*printch)(char ch); 32static void (*printch)(char ch);
31 33
@@ -141,8 +143,10 @@ static int __init setup_early_printk(char *buf)
141 } 143 }
142 /* no options parsing yet */ 144 /* no options parsing yet */
143 145
144 if (paddr) 146 if (paddr) {
145 early_base = early_io_map(paddr, EARLYCON_IOBASE); 147 set_fixmap_io(FIX_EARLYCON_MEM_BASE, paddr);
148 early_base = (void __iomem *)fix_to_virt(FIX_EARLYCON_MEM_BASE);
149 }
146 150
147 printch = match->printch; 151 printch = match->printch;
148 early_console = &early_console_dev; 152 early_console = &early_console_dev;
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 61035d6814cb..1fe5d8d2bdfd 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -404,7 +404,7 @@ ENDPROC(__calc_phys_offset)
404 * - identity mapping to enable the MMU (low address, TTBR0) 404 * - identity mapping to enable the MMU (low address, TTBR0)
405 * - first few MB of the kernel linear mapping to jump to once the MMU has 405 * - first few MB of the kernel linear mapping to jump to once the MMU has
406 * been enabled, including the FDT blob (TTBR1) 406 * been enabled, including the FDT blob (TTBR1)
407 * - UART mapping if CONFIG_EARLY_PRINTK is enabled (TTBR1) 407 * - pgd entry for fixed mappings (TTBR1)
408 */ 408 */
409__create_page_tables: 409__create_page_tables:
410 pgtbl x25, x26, x24 // idmap_pg_dir and swapper_pg_dir addresses 410 pgtbl x25, x26, x24 // idmap_pg_dir and swapper_pg_dir addresses
@@ -461,15 +461,12 @@ __create_page_tables:
461 sub x6, x6, #1 // inclusive range 461 sub x6, x6, #1 // inclusive range
462 create_block_map x0, x7, x3, x5, x6 462 create_block_map x0, x7, x3, x5, x6
4631: 4631:
464#ifdef CONFIG_EARLY_PRINTK
465 /* 464 /*
466 * Create the pgd entry for the UART mapping. The full mapping is done 465 * Create the pgd entry for the fixed mappings.
467 * later based earlyprintk kernel parameter.
468 */ 466 */
469 ldr x5, =EARLYCON_IOBASE // UART virtual address 467 ldr x5, =FIXADDR_TOP // Fixed mapping virtual address
470 add x0, x26, #2 * PAGE_SIZE // section table address 468 add x0, x26, #2 * PAGE_SIZE // section table address
471 create_pgd_entry x26, x0, x5, x6, x7 469 create_pgd_entry x26, x0, x5, x6, x7
472#endif
473 ret 470 ret
474ENDPROC(__create_page_tables) 471ENDPROC(__create_page_tables)
475 .ltorg 472 .ltorg
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 67da30741a1b..720853f70b6b 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -42,6 +42,7 @@
42#include <linux/of_fdt.h> 42#include <linux/of_fdt.h>
43#include <linux/of_platform.h> 43#include <linux/of_platform.h>
44 44
45#include <asm/fixmap.h>
45#include <asm/cputype.h> 46#include <asm/cputype.h>
46#include <asm/elf.h> 47#include <asm/elf.h>
47#include <asm/cputable.h> 48#include <asm/cputable.h>
@@ -360,6 +361,9 @@ void __init setup_arch(char **cmdline_p)
360 361
361 *cmdline_p = boot_command_line; 362 *cmdline_p = boot_command_line;
362 363
364 init_mem_pgprot();
365 early_ioremap_init();
366
363 parse_early_param(); 367 parse_early_param();
364 368
365 arm64_memblock_init(); 369 arm64_memblock_init();
diff --git a/arch/arm64/mm/ioremap.c b/arch/arm64/mm/ioremap.c
index 2bb1d586664c..7ec328392ae0 100644
--- a/arch/arm64/mm/ioremap.c
+++ b/arch/arm64/mm/ioremap.c
@@ -25,6 +25,10 @@
25#include <linux/vmalloc.h> 25#include <linux/vmalloc.h>
26#include <linux/io.h> 26#include <linux/io.h>
27 27
28#include <asm/fixmap.h>
29#include <asm/tlbflush.h>
30#include <asm/pgalloc.h>
31
28static void __iomem *__ioremap_caller(phys_addr_t phys_addr, size_t size, 32static void __iomem *__ioremap_caller(phys_addr_t phys_addr, size_t size,
29 pgprot_t prot, void *caller) 33 pgprot_t prot, void *caller)
30{ 34{
@@ -98,3 +102,84 @@ void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size)
98 __builtin_return_address(0)); 102 __builtin_return_address(0));
99} 103}
100EXPORT_SYMBOL(ioremap_cache); 104EXPORT_SYMBOL(ioremap_cache);
105
106#ifndef CONFIG_ARM64_64K_PAGES
107static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss;
108#endif
109
110static inline pmd_t * __init early_ioremap_pmd(unsigned long addr)
111{
112 pgd_t *pgd;
113 pud_t *pud;
114
115 pgd = pgd_offset_k(addr);
116 BUG_ON(pgd_none(*pgd) || pgd_bad(*pgd));
117
118 pud = pud_offset(pgd, addr);
119 BUG_ON(pud_none(*pud) || pud_bad(*pud));
120
121 return pmd_offset(pud, addr);
122}
123
124static inline pte_t * __init early_ioremap_pte(unsigned long addr)
125{
126 pmd_t *pmd = early_ioremap_pmd(addr);
127
128 BUG_ON(pmd_none(*pmd) || pmd_bad(*pmd));
129
130 return pte_offset_kernel(pmd, addr);
131}
132
133void __init early_ioremap_init(void)
134{
135 pmd_t *pmd;
136
137 pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
138#ifndef CONFIG_ARM64_64K_PAGES
139 /* need to populate pmd for 4k pagesize only */
140 pmd_populate_kernel(&init_mm, pmd, bm_pte);
141#endif
142 /*
143 * The boot-ioremap range spans multiple pmds, for which
144 * we are not prepared:
145 */
146 BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
147 != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT));
148
149 if (pmd != early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))) {
150 WARN_ON(1);
151 pr_warn("pmd %p != %p\n",
152 pmd, early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END)));
153 pr_warn("fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
154 fix_to_virt(FIX_BTMAP_BEGIN));
155 pr_warn("fix_to_virt(FIX_BTMAP_END): %08lx\n",
156 fix_to_virt(FIX_BTMAP_END));
157
158 pr_warn("FIX_BTMAP_END: %d\n", FIX_BTMAP_END);
159 pr_warn("FIX_BTMAP_BEGIN: %d\n",
160 FIX_BTMAP_BEGIN);
161 }
162
163 early_ioremap_setup();
164}
165
166void __init __early_set_fixmap(enum fixed_addresses idx,
167 phys_addr_t phys, pgprot_t flags)
168{
169 unsigned long addr = __fix_to_virt(idx);
170 pte_t *pte;
171
172 if (idx >= __end_of_fixed_addresses) {
173 BUG();
174 return;
175 }
176
177 pte = early_ioremap_pte(addr);
178
179 if (pgprot_val(flags))
180 set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags));
181 else {
182 pte_clear(&init_mm, addr, pte);
183 flush_tlb_kernel_range(addr, addr+PAGE_SIZE);
184 }
185}
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index f8dc7e8fce6f..6b7e89569a3a 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -125,7 +125,7 @@ early_param("cachepolicy", early_cachepolicy);
125/* 125/*
126 * Adjust the PMD section entries according to the CPU in use. 126 * Adjust the PMD section entries according to the CPU in use.
127 */ 127 */
128static void __init init_mem_pgprot(void) 128void __init init_mem_pgprot(void)
129{ 129{
130 pteval_t default_pgprot; 130 pteval_t default_pgprot;
131 int i; 131 int i;
@@ -260,47 +260,6 @@ static void __init create_mapping(phys_addr_t phys, unsigned long virt,
260 } while (pgd++, addr = next, addr != end); 260 } while (pgd++, addr = next, addr != end);
261} 261}
262 262
263#ifdef CONFIG_EARLY_PRINTK
264/*
265 * Create an early I/O mapping using the pgd/pmd entries already populated
266 * in head.S as this function is called too early to allocated any memory. The
267 * mapping size is 2MB with 4KB pages or 64KB or 64KB pages.
268 */
269void __iomem * __init early_io_map(phys_addr_t phys, unsigned long virt)
270{
271 unsigned long size, mask;
272 bool page64k = IS_ENABLED(CONFIG_ARM64_64K_PAGES);
273 pgd_t *pgd;
274 pud_t *pud;
275 pmd_t *pmd;
276 pte_t *pte;
277
278 /*
279 * No early pte entries with !ARM64_64K_PAGES configuration, so using
280 * sections (pmd).
281 */
282 size = page64k ? PAGE_SIZE : SECTION_SIZE;
283 mask = ~(size - 1);
284
285 pgd = pgd_offset_k(virt);
286 pud = pud_offset(pgd, virt);
287 if (pud_none(*pud))
288 return NULL;
289 pmd = pmd_offset(pud, virt);
290
291 if (page64k) {
292 if (pmd_none(*pmd))
293 return NULL;
294 pte = pte_offset_kernel(pmd, virt);
295 set_pte(pte, __pte((phys & mask) | PROT_DEVICE_nGnRE));
296 } else {
297 set_pmd(pmd, __pmd((phys & mask) | PROT_SECT_DEVICE_nGnRE));
298 }
299
300 return (void __iomem *)((virt & mask) + (phys & ~mask));
301}
302#endif
303
304static void __init map_mem(void) 263static void __init map_mem(void)
305{ 264{
306 struct memblock_region *reg; 265 struct memblock_region *reg;
@@ -357,7 +316,6 @@ void __init paging_init(void)
357{ 316{
358 void *zero_page; 317 void *zero_page;
359 318
360 init_mem_pgprot();
361 map_mem(); 319 map_mem();
362 320
363 /* 321 /*
diff --git a/arch/cris/Kconfig b/arch/cris/Kconfig
index ed0fcdf7e990..52731e221851 100644
--- a/arch/cris/Kconfig
+++ b/arch/cris/Kconfig
@@ -29,7 +29,7 @@ config GENERIC_CALIBRATE_DELAY
29 bool 29 bool
30 default y 30 default y
31 31
32config NO_IOPORT 32config NO_IOPORT_MAP
33 def_bool y 33 def_bool y
34 34
35config FORCE_MAX_ZONEORDER 35config FORCE_MAX_ZONEORDER
@@ -138,6 +138,7 @@ config ETRAX_ARCH_V10
138 bool 138 bool
139 default y if ETRAX100LX || ETRAX100LX_V2 139 default y if ETRAX100LX || ETRAX100LX_V2
140 default n if !(ETRAX100LX || ETRAX100LX_V2) 140 default n if !(ETRAX100LX || ETRAX100LX_V2)
141 select TTY
141 142
142config ETRAX_ARCH_V32 143config ETRAX_ARCH_V32
143 bool 144 bool
diff --git a/arch/cris/kernel/setup.c b/arch/cris/kernel/setup.c
index 32c3d248868e..905b70ea9939 100644
--- a/arch/cris/kernel/setup.c
+++ b/arch/cris/kernel/setup.c
@@ -165,6 +165,7 @@ void __init setup_arch(char **cmdline_p)
165 strcpy(init_utsname()->machine, cris_machine_name); 165 strcpy(init_utsname()->machine, cris_machine_name);
166} 166}
167 167
168#ifdef CONFIG_PROC_FS
168static void *c_start(struct seq_file *m, loff_t *pos) 169static void *c_start(struct seq_file *m, loff_t *pos)
169{ 170{
170 return *pos < nr_cpu_ids ? (void *)(int)(*pos + 1) : NULL; 171 return *pos < nr_cpu_ids ? (void *)(int)(*pos + 1) : NULL;
@@ -188,6 +189,7 @@ const struct seq_operations cpuinfo_op = {
188 .stop = c_stop, 189 .stop = c_stop,
189 .show = show_cpuinfo, 190 .show = show_cpuinfo,
190}; 191};
192#endif /* CONFIG_PROC_FS */
191 193
192static int __init topology_init(void) 194static int __init topology_init(void)
193{ 195{
diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig
index fbc5c78c9ac7..0fd6138f6203 100644
--- a/arch/hexagon/Kconfig
+++ b/arch/hexagon/Kconfig
@@ -19,7 +19,7 @@ config HEXAGON
19 select GENERIC_IRQ_SHOW 19 select GENERIC_IRQ_SHOW
20 select HAVE_ARCH_KGDB 20 select HAVE_ARCH_KGDB
21 select HAVE_ARCH_TRACEHOOK 21 select HAVE_ARCH_TRACEHOOK
22 select NO_IOPORT 22 select NO_IOPORT_MAP
23 select GENERIC_IOMAP 23 select GENERIC_IOMAP
24 select GENERIC_SMP_IDLE_THREAD 24 select GENERIC_SMP_IDLE_THREAD
25 select STACKTRACE_SUPPORT 25 select STACKTRACE_SUPPORT
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 0c8e553e0b9f..1325c3bc58e1 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -21,6 +21,7 @@ config IA64
21 select HAVE_FUNCTION_TRACER 21 select HAVE_FUNCTION_TRACER
22 select HAVE_DMA_ATTRS 22 select HAVE_DMA_ATTRS
23 select HAVE_KVM 23 select HAVE_KVM
24 select TTY
24 select HAVE_ARCH_TRACEHOOK 25 select HAVE_ARCH_TRACEHOOK
25 select HAVE_DMA_API_DEBUG 26 select HAVE_DMA_API_DEBUG
26 select HAVE_MEMBLOCK 27 select HAVE_MEMBLOCK
diff --git a/arch/m32r/Kconfig b/arch/m32r/Kconfig
index ca4504424dae..9e44bbd8051e 100644
--- a/arch/m32r/Kconfig
+++ b/arch/m32r/Kconfig
@@ -28,7 +28,7 @@ config ZONE_DMA
28 bool 28 bool
29 default y 29 default y
30 30
31config NO_IOPORT 31config NO_IOPORT_MAP
32 def_bool y 32 def_bool y
33 33
34config NO_DMA 34config NO_DMA
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index b2e322939256..87b7c7581b1d 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -52,7 +52,7 @@ config TIME_LOW_RES
52 bool 52 bool
53 default y 53 default y
54 54
55config NO_IOPORT 55config NO_IOPORT_MAP
56 def_bool y 56 def_bool y
57 57
58config NO_DMA 58config NO_DMA
diff --git a/arch/metag/Kconfig b/arch/metag/Kconfig
index b1d3c9c0eff8..499b7610eaaf 100644
--- a/arch/metag/Kconfig
+++ b/arch/metag/Kconfig
@@ -52,7 +52,7 @@ config GENERIC_HWEIGHT
52config GENERIC_CALIBRATE_DELAY 52config GENERIC_CALIBRATE_DELAY
53 def_bool y 53 def_bool y
54 54
55config NO_IOPORT 55config NO_IOPORT_MAP
56 def_bool y 56 def_bool y
57 57
58source "init/Kconfig" 58source "init/Kconfig"
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 16d5ab1615b1..5cd695f905a1 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -175,7 +175,7 @@ config MACH_DECSTATION
175 select CPU_R4000_WORKAROUNDS if 64BIT 175 select CPU_R4000_WORKAROUNDS if 64BIT
176 select CPU_R4400_WORKAROUNDS if 64BIT 176 select CPU_R4400_WORKAROUNDS if 64BIT
177 select DMA_NONCOHERENT 177 select DMA_NONCOHERENT
178 select NO_IOPORT 178 select NO_IOPORT_MAP
179 select IRQ_CPU 179 select IRQ_CPU
180 select SYS_HAS_CPU_R3000 180 select SYS_HAS_CPU_R3000
181 select SYS_HAS_CPU_R4X00 181 select SYS_HAS_CPU_R4X00
@@ -947,7 +947,7 @@ config SYNC_R4K
947config MIPS_MACHINE 947config MIPS_MACHINE
948 def_bool n 948 def_bool n
949 949
950config NO_IOPORT 950config NO_IOPORT_MAP
951 def_bool n 951 def_bool n
952 952
953config GENERIC_ISA_DMA 953config GENERIC_ISA_DMA
diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig
index 9488209a5253..e71d712afb79 100644
--- a/arch/openrisc/Kconfig
+++ b/arch/openrisc/Kconfig
@@ -41,7 +41,7 @@ config RWSEM_XCHGADD_ALGORITHM
41config GENERIC_HWEIGHT 41config GENERIC_HWEIGHT
42 def_bool y 42 def_bool y
43 43
44config NO_IOPORT 44config NO_IOPORT_MAP
45 def_bool y 45 def_bool y
46 46
47config TRACE_IRQFLAGS_SUPPORT 47config TRACE_IRQFLAGS_SUPPORT
diff --git a/arch/powerpc/include/asm/fadump.h b/arch/powerpc/include/asm/fadump.h
index 88dbf9659185..a6774560afe3 100644
--- a/arch/powerpc/include/asm/fadump.h
+++ b/arch/powerpc/include/asm/fadump.h
@@ -210,7 +210,6 @@ extern int is_fadump_active(void);
210extern void crash_fadump(struct pt_regs *, const char *); 210extern void crash_fadump(struct pt_regs *, const char *);
211extern void fadump_cleanup(void); 211extern void fadump_cleanup(void);
212 212
213extern void vmcore_cleanup(void);
214#else /* CONFIG_FA_DUMP */ 213#else /* CONFIG_FA_DUMP */
215static inline int is_fadump_active(void) { return 0; } 214static inline int is_fadump_active(void) { return 0; }
216static inline void crash_fadump(struct pt_regs *regs, const char *str) { } 215static inline void crash_fadump(struct pt_regs *regs, const char *str) { }
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 434fda39bf8b..d9e2b19b7c8d 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -73,6 +73,7 @@ config PPC_BOOK3S_64
73 select SYS_SUPPORTS_HUGETLBFS 73 select SYS_SUPPORTS_HUGETLBFS
74 select HAVE_ARCH_TRANSPARENT_HUGEPAGE if PPC_64K_PAGES 74 select HAVE_ARCH_TRANSPARENT_HUGEPAGE if PPC_64K_PAGES
75 select ARCH_SUPPORTS_NUMA_BALANCING 75 select ARCH_SUPPORTS_NUMA_BALANCING
76 select IRQ_WORK
76 77
77config PPC_BOOK3E_64 78config PPC_BOOK3E_64
78 bool "Embedded processors" 79 bool "Embedded processors"
diff --git a/arch/powerpc/sysdev/fsl_rio.c b/arch/powerpc/sysdev/fsl_rio.c
index 95dd892e9904..cf2b0840a672 100644
--- a/arch/powerpc/sysdev/fsl_rio.c
+++ b/arch/powerpc/sysdev/fsl_rio.c
@@ -531,6 +531,7 @@ int fsl_rio_setup(struct platform_device *dev)
531 sprintf(port->name, "RIO mport %d", i); 531 sprintf(port->name, "RIO mport %d", i);
532 532
533 priv->dev = &dev->dev; 533 priv->dev = &dev->dev;
534 port->dev.parent = &dev->dev;
534 port->ops = ops; 535 port->ops = ops;
535 port->priv = priv; 536 port->priv = priv;
536 port->phys_efptr = 0x100; 537 port->phys_efptr = 0x100;
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 953f17c8d17c..346d21678ffd 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -52,7 +52,7 @@ config KEXEC
52config AUDIT_ARCH 52config AUDIT_ARCH
53 def_bool y 53 def_bool y
54 54
55config NO_IOPORT 55config NO_IOPORT_MAP
56 def_bool y 56 def_bool y
57 57
58config PCI_QUIRKS 58config PCI_QUIRKS
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 796c9320c709..5d8324cd866b 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -505,6 +505,9 @@ static int gmap_connect_pgtable(unsigned long address, unsigned long segment,
505 if (!pmd_present(*pmd) && 505 if (!pmd_present(*pmd) &&
506 __pte_alloc(mm, vma, pmd, vmaddr)) 506 __pte_alloc(mm, vma, pmd, vmaddr))
507 return -ENOMEM; 507 return -ENOMEM;
508 /* large pmds cannot yet be handled */
509 if (pmd_large(*pmd))
510 return -EFAULT;
508 /* pmd now points to a valid segment table entry. */ 511 /* pmd now points to a valid segment table entry. */
509 rmap = kmalloc(sizeof(*rmap), GFP_KERNEL|__GFP_REPEAT); 512 rmap = kmalloc(sizeof(*rmap), GFP_KERNEL|__GFP_REPEAT);
510 if (!rmap) 513 if (!rmap)
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 1399383315a3..ba55e939a820 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -3,7 +3,7 @@ config SUPERH
3 select ARCH_MIGHT_HAVE_PC_PARPORT 3 select ARCH_MIGHT_HAVE_PC_PARPORT
4 select EXPERT 4 select EXPERT
5 select CLKDEV_LOOKUP 5 select CLKDEV_LOOKUP
6 select HAVE_IDE if HAS_IOPORT 6 select HAVE_IDE if HAS_IOPORT_MAP
7 select HAVE_MEMBLOCK 7 select HAVE_MEMBLOCK
8 select HAVE_MEMBLOCK_NODE_MAP 8 select HAVE_MEMBLOCK_NODE_MAP
9 select ARCH_DISCARD_MEMBLOCK 9 select ARCH_DISCARD_MEMBLOCK
@@ -138,7 +138,7 @@ config ARCH_HAS_ILOG2_U32
138config ARCH_HAS_ILOG2_U64 138config ARCH_HAS_ILOG2_U64
139 def_bool n 139 def_bool n
140 140
141config NO_IOPORT 141config NO_IOPORT_MAP
142 def_bool !PCI 142 def_bool !PCI
143 depends on !SH_CAYMAN && !SH_SH4202_MICRODEV && !SH_SHMIN && \ 143 depends on !SH_CAYMAN && !SH_SH4202_MICRODEV && !SH_SHMIN && \
144 !SH_HP6XX && !SH_SOLUTION_ENGINE 144 !SH_HP6XX && !SH_SOLUTION_ENGINE
diff --git a/arch/sh/boards/Kconfig b/arch/sh/boards/Kconfig
index eb1cf84231a2..e331e5373b8e 100644
--- a/arch/sh/boards/Kconfig
+++ b/arch/sh/boards/Kconfig
@@ -158,7 +158,7 @@ config SH_SDK7786
158 bool "SDK7786" 158 bool "SDK7786"
159 depends on CPU_SUBTYPE_SH7786 159 depends on CPU_SUBTYPE_SH7786
160 select SYS_SUPPORTS_PCI 160 select SYS_SUPPORTS_PCI
161 select NO_IOPORT if !PCI 161 select NO_IOPORT_MAP if !PCI
162 select ARCH_WANT_OPTIONAL_GPIOLIB 162 select ARCH_WANT_OPTIONAL_GPIOLIB
163 select HAVE_SRAM_POOL 163 select HAVE_SRAM_POOL
164 select REGULATOR_FIXED_VOLTAGE if REGULATOR 164 select REGULATOR_FIXED_VOLTAGE if REGULATOR
@@ -204,7 +204,7 @@ config SH_URQUELL
204 depends on CPU_SUBTYPE_SH7786 204 depends on CPU_SUBTYPE_SH7786
205 select ARCH_REQUIRE_GPIOLIB 205 select ARCH_REQUIRE_GPIOLIB
206 select SYS_SUPPORTS_PCI 206 select SYS_SUPPORTS_PCI
207 select NO_IOPORT if !PCI 207 select NO_IOPORT_MAP if !PCI
208 208
209config SH_MIGOR 209config SH_MIGOR
210 bool "Migo-R" 210 bool "Migo-R"
@@ -306,7 +306,7 @@ config SH_LBOX_RE2
306config SH_X3PROTO 306config SH_X3PROTO
307 bool "SH-X3 Prototype board" 307 bool "SH-X3 Prototype board"
308 depends on CPU_SUBTYPE_SHX3 308 depends on CPU_SUBTYPE_SHX3
309 select NO_IOPORT if !PCI 309 select NO_IOPORT_MAP if !PCI
310 select IRQ_DOMAIN 310 select IRQ_DOMAIN
311 311
312config SH_MAGIC_PANEL_R2 312config SH_MAGIC_PANEL_R2
@@ -333,7 +333,7 @@ config SH_POLARIS
333 333
334config SH_SH2007 334config SH_SH2007
335 bool "SH-2007 board" 335 bool "SH-2007 board"
336 select NO_IOPORT 336 select NO_IOPORT_MAP
337 select REGULATOR_FIXED_VOLTAGE if REGULATOR 337 select REGULATOR_FIXED_VOLTAGE if REGULATOR
338 depends on CPU_SUBTYPE_SH7780 338 depends on CPU_SUBTYPE_SH7780
339 help 339 help
diff --git a/arch/sh/include/asm/io.h b/arch/sh/include/asm/io.h
index 629db2ad7916..728c4c571f40 100644
--- a/arch/sh/include/asm/io.h
+++ b/arch/sh/include/asm/io.h
@@ -122,7 +122,7 @@ __BUILD_MEMORY_STRING(__raw_, l, u32)
122 122
123__BUILD_MEMORY_STRING(__raw_, q, u64) 123__BUILD_MEMORY_STRING(__raw_, q, u64)
124 124
125#ifdef CONFIG_HAS_IOPORT 125#ifdef CONFIG_HAS_IOPORT_MAP
126 126
127/* 127/*
128 * Slowdown I/O port space accesses for antique hardware. 128 * Slowdown I/O port space accesses for antique hardware.
@@ -218,7 +218,7 @@ __BUILD_IOPORT_STRING(w, u16)
218__BUILD_IOPORT_STRING(l, u32) 218__BUILD_IOPORT_STRING(l, u32)
219__BUILD_IOPORT_STRING(q, u64) 219__BUILD_IOPORT_STRING(q, u64)
220 220
221#else /* !CONFIG_HAS_IOPORT */ 221#else /* !CONFIG_HAS_IOPORT_MAP */
222 222
223#include <asm/io_noioport.h> 223#include <asm/io_noioport.h>
224 224
diff --git a/arch/sh/include/asm/io_trapped.h b/arch/sh/include/asm/io_trapped.h
index f1251d4f0ba9..4ab94ef51071 100644
--- a/arch/sh/include/asm/io_trapped.h
+++ b/arch/sh/include/asm/io_trapped.h
@@ -36,7 +36,7 @@ __ioremap_trapped(unsigned long offset, unsigned long size)
36#define __ioremap_trapped(offset, size) NULL 36#define __ioremap_trapped(offset, size) NULL
37#endif 37#endif
38 38
39#ifdef CONFIG_HAS_IOPORT 39#ifdef CONFIG_HAS_IOPORT_MAP
40extern struct list_head trapped_io; 40extern struct list_head trapped_io;
41 41
42static inline void __iomem * 42static inline void __iomem *
diff --git a/arch/sh/include/asm/machvec.h b/arch/sh/include/asm/machvec.h
index eb9c20d971dd..d3324e4f372e 100644
--- a/arch/sh/include/asm/machvec.h
+++ b/arch/sh/include/asm/machvec.h
@@ -21,7 +21,7 @@ struct sh_machine_vector {
21 int (*mv_irq_demux)(int irq); 21 int (*mv_irq_demux)(int irq);
22 void (*mv_init_irq)(void); 22 void (*mv_init_irq)(void);
23 23
24#ifdef CONFIG_HAS_IOPORT 24#ifdef CONFIG_HAS_IOPORT_MAP
25 void __iomem *(*mv_ioport_map)(unsigned long port, unsigned int size); 25 void __iomem *(*mv_ioport_map)(unsigned long port, unsigned int size);
26 void (*mv_ioport_unmap)(void __iomem *); 26 void (*mv_ioport_unmap)(void __iomem *);
27#endif 27#endif
diff --git a/arch/sh/kernel/Makefile b/arch/sh/kernel/Makefile
index 261c8bfd75ce..2ccf36c824c6 100644
--- a/arch/sh/kernel/Makefile
+++ b/arch/sh/kernel/Makefile
@@ -22,7 +22,7 @@ obj-y := debugtraps.o dma-nommu.o dumpstack.o \
22 22
23ifndef CONFIG_GENERIC_IOMAP 23ifndef CONFIG_GENERIC_IOMAP
24obj-y += iomap.o 24obj-y += iomap.o
25obj-$(CONFIG_HAS_IOPORT) += ioport.o 25obj-$(CONFIG_HAS_IOPORT_MAP) += ioport.o
26endif 26endif
27 27
28obj-$(CONFIG_SUPERH32) += sys_sh32.o 28obj-$(CONFIG_SUPERH32) += sys_sh32.o
diff --git a/arch/sh/kernel/io_trapped.c b/arch/sh/kernel/io_trapped.c
index c0a9761f2f8a..f8ce36286cea 100644
--- a/arch/sh/kernel/io_trapped.c
+++ b/arch/sh/kernel/io_trapped.c
@@ -22,7 +22,7 @@
22 22
23#define TRAPPED_PAGES_MAX 16 23#define TRAPPED_PAGES_MAX 16
24 24
25#ifdef CONFIG_HAS_IOPORT 25#ifdef CONFIG_HAS_IOPORT_MAP
26LIST_HEAD(trapped_io); 26LIST_HEAD(trapped_io);
27EXPORT_SYMBOL_GPL(trapped_io); 27EXPORT_SYMBOL_GPL(trapped_io);
28#endif 28#endif
@@ -90,7 +90,7 @@ int register_trapped_io(struct trapped_io *tiop)
90 tiop->magic = IO_TRAPPED_MAGIC; 90 tiop->magic = IO_TRAPPED_MAGIC;
91 INIT_LIST_HEAD(&tiop->list); 91 INIT_LIST_HEAD(&tiop->list);
92 spin_lock_irq(&trapped_lock); 92 spin_lock_irq(&trapped_lock);
93#ifdef CONFIG_HAS_IOPORT 93#ifdef CONFIG_HAS_IOPORT_MAP
94 if (flags & IORESOURCE_IO) 94 if (flags & IORESOURCE_IO)
95 list_add(&tiop->list, &trapped_io); 95 list_add(&tiop->list, &trapped_io);
96#endif 96#endif
diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig
index 31c8c6223995..85258ca43ff5 100644
--- a/arch/tile/Kconfig
+++ b/arch/tile/Kconfig
@@ -411,7 +411,7 @@ config PCI_DOMAINS
411config NO_IOMEM 411config NO_IOMEM
412 def_bool !PCI 412 def_bool !PCI
413 413
414config NO_IOPORT 414config NO_IOPORT_MAP
415 def_bool !PCI 415 def_bool !PCI
416 416
417config TILE_PCI_IO 417config TILE_PCI_IO
diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c
index eecc4142764c..f17bca8ed2ce 100644
--- a/arch/um/kernel/process.c
+++ b/arch/um/kernel/process.c
@@ -359,7 +359,7 @@ int singlestepping(void * t)
359/* 359/*
360 * Only x86 and x86_64 have an arch_align_stack(). 360 * Only x86 and x86_64 have an arch_align_stack().
361 * All other arches have "#define arch_align_stack(x) (x)" 361 * All other arches have "#define arch_align_stack(x) (x)"
362 * in their asm/system.h 362 * in their asm/exec.h
363 * As this is included in UML from asm-um/system-generic.h, 363 * As this is included in UML from asm-um/system-generic.h,
364 * we can use it to behave as the subarch does. 364 * we can use it to behave as the subarch does.
365 */ 365 */
diff --git a/arch/unicore32/Kconfig b/arch/unicore32/Kconfig
index 25c0dba508cc..aafad6fa1667 100644
--- a/arch/unicore32/Kconfig
+++ b/arch/unicore32/Kconfig
@@ -27,7 +27,7 @@ config UNICORE32
27config GENERIC_CSUM 27config GENERIC_CSUM
28 def_bool y 28 def_bool y
29 29
30config NO_IOPORT 30config NO_IOPORT_MAP
31 bool 31 bool
32 32
33config STACKTRACE_SUPPORT 33config STACKTRACE_SUPPORT
diff --git a/arch/unicore32/include/asm/mmu_context.h b/arch/unicore32/include/asm/mmu_context.h
index fb5e4c658f7a..ef470a7a3d0f 100644
--- a/arch/unicore32/include/asm/mmu_context.h
+++ b/arch/unicore32/include/asm/mmu_context.h
@@ -14,6 +14,8 @@
14 14
15#include <linux/compiler.h> 15#include <linux/compiler.h>
16#include <linux/sched.h> 16#include <linux/sched.h>
17#include <linux/mm.h>
18#include <linux/vmacache.h>
17#include <linux/io.h> 19#include <linux/io.h>
18 20
19#include <asm/cacheflush.h> 21#include <asm/cacheflush.h>
@@ -73,7 +75,7 @@ do { \
73 else \ 75 else \
74 mm->mmap = NULL; \ 76 mm->mmap = NULL; \
75 rb_erase(&high_vma->vm_rb, &mm->mm_rb); \ 77 rb_erase(&high_vma->vm_rb, &mm->mm_rb); \
76 mm->mmap_cache = NULL; \ 78 vmacache_invalidate(mm); \
77 mm->map_count--; \ 79 mm->map_count--; \
78 remove_vma(high_vma); \ 80 remove_vma(high_vma); \
79 } \ 81 } \
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index f73071742975..5b8ec0f53b57 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -43,6 +43,7 @@ config X86
43 select HAVE_DMA_ATTRS 43 select HAVE_DMA_ATTRS
44 select HAVE_DMA_CONTIGUOUS if !SWIOTLB 44 select HAVE_DMA_CONTIGUOUS if !SWIOTLB
45 select HAVE_KRETPROBES 45 select HAVE_KRETPROBES
46 select GENERIC_EARLY_IOREMAP
46 select HAVE_OPTPROBES 47 select HAVE_OPTPROBES
47 select HAVE_KPROBES_ON_FTRACE 48 select HAVE_KPROBES_ON_FTRACE
48 select HAVE_FTRACE_MCOUNT_RECORD 49 select HAVE_FTRACE_MCOUNT_RECORD
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild
index 4acddc43ee0c..3ca9762e1649 100644
--- a/arch/x86/include/asm/Kbuild
+++ b/arch/x86/include/asm/Kbuild
@@ -5,5 +5,6 @@ genhdr-y += unistd_64.h
5genhdr-y += unistd_x32.h 5genhdr-y += unistd_x32.h
6 6
7generic-y += clkdev.h 7generic-y += clkdev.h
8generic-y += early_ioremap.h
8generic-y += cputime.h 9generic-y += cputime.h
9generic-y += mcs_spinlock.h 10generic-y += mcs_spinlock.h
diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h
index 2f03ff018d36..ba38ebbaced3 100644
--- a/arch/x86/include/asm/bug.h
+++ b/arch/x86/include/asm/bug.h
@@ -1,7 +1,6 @@
1#ifndef _ASM_X86_BUG_H 1#ifndef _ASM_X86_BUG_H
2#define _ASM_X86_BUG_H 2#define _ASM_X86_BUG_H
3 3
4#ifdef CONFIG_BUG
5#define HAVE_ARCH_BUG 4#define HAVE_ARCH_BUG
6 5
7#ifdef CONFIG_DEBUG_BUGVERBOSE 6#ifdef CONFIG_DEBUG_BUGVERBOSE
@@ -33,8 +32,6 @@ do { \
33} while (0) 32} while (0)
34#endif 33#endif
35 34
36#endif /* !CONFIG_BUG */
37
38#include <asm-generic/bug.h> 35#include <asm-generic/bug.h>
39 36
40#endif /* _ASM_X86_BUG_H */ 37#endif /* _ASM_X86_BUG_H */
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index 8dcd35c4c787..43f482a0db37 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -163,5 +163,11 @@ static inline void __set_fixmap(enum fixed_addresses idx,
163 163
164#include <asm-generic/fixmap.h> 164#include <asm-generic/fixmap.h>
165 165
166#define __late_set_fixmap(idx, phys, flags) __set_fixmap(idx, phys, flags)
167#define __late_clear_fixmap(idx) __set_fixmap(idx, 0, __pgprot(0))
168
169void __early_set_fixmap(enum fixed_addresses idx,
170 phys_addr_t phys, pgprot_t flags);
171
166#endif /* !__ASSEMBLY__ */ 172#endif /* !__ASSEMBLY__ */
167#endif /* _ASM_X86_FIXMAP_H */ 173#endif /* _ASM_X86_FIXMAP_H */
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index 91d9c69a629e..b8237d8a1e0c 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -39,6 +39,7 @@
39#include <linux/string.h> 39#include <linux/string.h>
40#include <linux/compiler.h> 40#include <linux/compiler.h>
41#include <asm/page.h> 41#include <asm/page.h>
42#include <asm/early_ioremap.h>
42 43
43#define build_mmio_read(name, size, type, reg, barrier) \ 44#define build_mmio_read(name, size, type, reg, barrier) \
44static inline type name(const volatile void __iomem *addr) \ 45static inline type name(const volatile void __iomem *addr) \
@@ -316,19 +317,6 @@ extern int ioremap_change_attr(unsigned long vaddr, unsigned long size,
316 unsigned long prot_val); 317 unsigned long prot_val);
317extern void __iomem *ioremap_wc(resource_size_t offset, unsigned long size); 318extern void __iomem *ioremap_wc(resource_size_t offset, unsigned long size);
318 319
319/*
320 * early_ioremap() and early_iounmap() are for temporary early boot-time
321 * mappings, before the real ioremap() is functional.
322 * A boot-time mapping is currently limited to at most 16 pages.
323 */
324extern void early_ioremap_init(void);
325extern void early_ioremap_reset(void);
326extern void __iomem *early_ioremap(resource_size_t phys_addr,
327 unsigned long size);
328extern void __iomem *early_memremap(resource_size_t phys_addr,
329 unsigned long size);
330extern void early_iounmap(void __iomem *addr, unsigned long size);
331extern void fixup_early_ioremap(void);
332extern bool is_early_ioremap_ptep(pte_t *ptep); 320extern bool is_early_ioremap_ptep(pte_t *ptep);
333 321
334#ifdef CONFIG_XEN 322#ifdef CONFIG_XEN
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 94220d14d5cc..851bcdc5db04 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -52,7 +52,7 @@
52 * Compared to the generic __my_cpu_offset version, the following 52 * Compared to the generic __my_cpu_offset version, the following
53 * saves one instruction and avoids clobbering a temp register. 53 * saves one instruction and avoids clobbering a temp register.
54 */ 54 */
55#define __this_cpu_ptr(ptr) \ 55#define raw_cpu_ptr(ptr) \
56({ \ 56({ \
57 unsigned long tcp_ptr__; \ 57 unsigned long tcp_ptr__; \
58 __verify_pcpu_ptr(ptr); \ 58 __verify_pcpu_ptr(ptr); \
@@ -362,25 +362,25 @@ do { \
362 */ 362 */
363#define this_cpu_read_stable(var) percpu_from_op("mov", var, "p" (&(var))) 363#define this_cpu_read_stable(var) percpu_from_op("mov", var, "p" (&(var)))
364 364
365#define __this_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) 365#define raw_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
366#define __this_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) 366#define raw_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
367#define __this_cpu_read_4(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) 367#define raw_cpu_read_4(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
368 368
369#define __this_cpu_write_1(pcp, val) percpu_to_op("mov", (pcp), val) 369#define raw_cpu_write_1(pcp, val) percpu_to_op("mov", (pcp), val)
370#define __this_cpu_write_2(pcp, val) percpu_to_op("mov", (pcp), val) 370#define raw_cpu_write_2(pcp, val) percpu_to_op("mov", (pcp), val)
371#define __this_cpu_write_4(pcp, val) percpu_to_op("mov", (pcp), val) 371#define raw_cpu_write_4(pcp, val) percpu_to_op("mov", (pcp), val)
372#define __this_cpu_add_1(pcp, val) percpu_add_op((pcp), val) 372#define raw_cpu_add_1(pcp, val) percpu_add_op((pcp), val)
373#define __this_cpu_add_2(pcp, val) percpu_add_op((pcp), val) 373#define raw_cpu_add_2(pcp, val) percpu_add_op((pcp), val)
374#define __this_cpu_add_4(pcp, val) percpu_add_op((pcp), val) 374#define raw_cpu_add_4(pcp, val) percpu_add_op((pcp), val)
375#define __this_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val) 375#define raw_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val)
376#define __this_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val) 376#define raw_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val)
377#define __this_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val) 377#define raw_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val)
378#define __this_cpu_or_1(pcp, val) percpu_to_op("or", (pcp), val) 378#define raw_cpu_or_1(pcp, val) percpu_to_op("or", (pcp), val)
379#define __this_cpu_or_2(pcp, val) percpu_to_op("or", (pcp), val) 379#define raw_cpu_or_2(pcp, val) percpu_to_op("or", (pcp), val)
380#define __this_cpu_or_4(pcp, val) percpu_to_op("or", (pcp), val) 380#define raw_cpu_or_4(pcp, val) percpu_to_op("or", (pcp), val)
381#define __this_cpu_xchg_1(pcp, val) percpu_xchg_op(pcp, val) 381#define raw_cpu_xchg_1(pcp, val) percpu_xchg_op(pcp, val)
382#define __this_cpu_xchg_2(pcp, val) percpu_xchg_op(pcp, val) 382#define raw_cpu_xchg_2(pcp, val) percpu_xchg_op(pcp, val)
383#define __this_cpu_xchg_4(pcp, val) percpu_xchg_op(pcp, val) 383#define raw_cpu_xchg_4(pcp, val) percpu_xchg_op(pcp, val)
384 384
385#define this_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) 385#define this_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
386#define this_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) 386#define this_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
@@ -401,16 +401,16 @@ do { \
401#define this_cpu_xchg_2(pcp, nval) percpu_xchg_op(pcp, nval) 401#define this_cpu_xchg_2(pcp, nval) percpu_xchg_op(pcp, nval)
402#define this_cpu_xchg_4(pcp, nval) percpu_xchg_op(pcp, nval) 402#define this_cpu_xchg_4(pcp, nval) percpu_xchg_op(pcp, nval)
403 403
404#define __this_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val) 404#define raw_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val)
405#define __this_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val) 405#define raw_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val)
406#define __this_cpu_add_return_4(pcp, val) percpu_add_return_op(pcp, val) 406#define raw_cpu_add_return_4(pcp, val) percpu_add_return_op(pcp, val)
407#define __this_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) 407#define raw_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
408#define __this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) 408#define raw_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
409#define __this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) 409#define raw_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
410 410
411#define this_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val) 411#define this_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val)
412#define this_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val) 412#define this_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val)
413#define this_cpu_add_return_4(pcp, val) percpu_add_return_op(pcp, val) 413#define this_cpu_add_return_4(pcp, val) percpu_add_return_op(pcp, val)
414#define this_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) 414#define this_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
415#define this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) 415#define this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
416#define this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) 416#define this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
@@ -427,7 +427,7 @@ do { \
427 __ret; \ 427 __ret; \
428}) 428})
429 429
430#define __this_cpu_cmpxchg_double_4 percpu_cmpxchg8b_double 430#define raw_cpu_cmpxchg_double_4 percpu_cmpxchg8b_double
431#define this_cpu_cmpxchg_double_4 percpu_cmpxchg8b_double 431#define this_cpu_cmpxchg_double_4 percpu_cmpxchg8b_double
432#endif /* CONFIG_X86_CMPXCHG64 */ 432#endif /* CONFIG_X86_CMPXCHG64 */
433 433
@@ -436,22 +436,22 @@ do { \
436 * 32 bit must fall back to generic operations. 436 * 32 bit must fall back to generic operations.
437 */ 437 */
438#ifdef CONFIG_X86_64 438#ifdef CONFIG_X86_64
439#define __this_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) 439#define raw_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
440#define __this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val) 440#define raw_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val)
441#define __this_cpu_add_8(pcp, val) percpu_add_op((pcp), val) 441#define raw_cpu_add_8(pcp, val) percpu_add_op((pcp), val)
442#define __this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) 442#define raw_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val)
443#define __this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) 443#define raw_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val)
444#define __this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val) 444#define raw_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val)
445#define __this_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval) 445#define raw_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval)
446#define __this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) 446#define raw_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
447 447
448#define this_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) 448#define this_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
449#define this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val) 449#define this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val)
450#define this_cpu_add_8(pcp, val) percpu_add_op((pcp), val) 450#define this_cpu_add_8(pcp, val) percpu_add_op((pcp), val)
451#define this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) 451#define this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val)
452#define this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) 452#define this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val)
453#define this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val) 453#define this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val)
454#define this_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval) 454#define this_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval)
455#define this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) 455#define this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
456 456
457/* 457/*
@@ -474,7 +474,7 @@ do { \
474 __ret; \ 474 __ret; \
475}) 475})
476 476
477#define __this_cpu_cmpxchg_double_8 percpu_cmpxchg16b_double 477#define raw_cpu_cmpxchg_double_8 percpu_cmpxchg16b_double
478#define this_cpu_cmpxchg_double_8 percpu_cmpxchg16b_double 478#define this_cpu_cmpxchg_double_8 percpu_cmpxchg16b_double
479 479
480#endif 480#endif
@@ -495,9 +495,9 @@ static __always_inline int x86_this_cpu_constant_test_bit(unsigned int nr,
495 unsigned long __percpu *a = (unsigned long *)addr + nr / BITS_PER_LONG; 495 unsigned long __percpu *a = (unsigned long *)addr + nr / BITS_PER_LONG;
496 496
497#ifdef CONFIG_X86_64 497#ifdef CONFIG_X86_64
498 return ((1UL << (nr % BITS_PER_LONG)) & __this_cpu_read_8(*a)) != 0; 498 return ((1UL << (nr % BITS_PER_LONG)) & raw_cpu_read_8(*a)) != 0;
499#else 499#else
500 return ((1UL << (nr % BITS_PER_LONG)) & __this_cpu_read_4(*a)) != 0; 500 return ((1UL << (nr % BITS_PER_LONG)) & raw_cpu_read_4(*a)) != 0;
501#endif 501#endif
502} 502}
503 503
diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h
index c8b051933b1b..7024c12f7bfe 100644
--- a/arch/x86/include/asm/preempt.h
+++ b/arch/x86/include/asm/preempt.h
@@ -19,12 +19,12 @@ DECLARE_PER_CPU(int, __preempt_count);
19 */ 19 */
20static __always_inline int preempt_count(void) 20static __always_inline int preempt_count(void)
21{ 21{
22 return __this_cpu_read_4(__preempt_count) & ~PREEMPT_NEED_RESCHED; 22 return raw_cpu_read_4(__preempt_count) & ~PREEMPT_NEED_RESCHED;
23} 23}
24 24
25static __always_inline void preempt_count_set(int pc) 25static __always_inline void preempt_count_set(int pc)
26{ 26{
27 __this_cpu_write_4(__preempt_count, pc); 27 raw_cpu_write_4(__preempt_count, pc);
28} 28}
29 29
30/* 30/*
@@ -53,17 +53,17 @@ static __always_inline void preempt_count_set(int pc)
53 53
54static __always_inline void set_preempt_need_resched(void) 54static __always_inline void set_preempt_need_resched(void)
55{ 55{
56 __this_cpu_and_4(__preempt_count, ~PREEMPT_NEED_RESCHED); 56 raw_cpu_and_4(__preempt_count, ~PREEMPT_NEED_RESCHED);
57} 57}
58 58
59static __always_inline void clear_preempt_need_resched(void) 59static __always_inline void clear_preempt_need_resched(void)
60{ 60{
61 __this_cpu_or_4(__preempt_count, PREEMPT_NEED_RESCHED); 61 raw_cpu_or_4(__preempt_count, PREEMPT_NEED_RESCHED);
62} 62}
63 63
64static __always_inline bool test_preempt_need_resched(void) 64static __always_inline bool test_preempt_need_resched(void)
65{ 65{
66 return !(__this_cpu_read_4(__preempt_count) & PREEMPT_NEED_RESCHED); 66 return !(raw_cpu_read_4(__preempt_count) & PREEMPT_NEED_RESCHED);
67} 67}
68 68
69/* 69/*
@@ -72,12 +72,12 @@ static __always_inline bool test_preempt_need_resched(void)
72 72
73static __always_inline void __preempt_count_add(int val) 73static __always_inline void __preempt_count_add(int val)
74{ 74{
75 __this_cpu_add_4(__preempt_count, val); 75 raw_cpu_add_4(__preempt_count, val);
76} 76}
77 77
78static __always_inline void __preempt_count_sub(int val) 78static __always_inline void __preempt_count_sub(int val)
79{ 79{
80 __this_cpu_add_4(__preempt_count, -val); 80 raw_cpu_add_4(__preempt_count, -val);
81} 81}
82 82
83/* 83/*
@@ -95,7 +95,7 @@ static __always_inline bool __preempt_count_dec_and_test(void)
95 */ 95 */
96static __always_inline bool should_resched(void) 96static __always_inline bool should_resched(void)
97{ 97{
98 return unlikely(!__this_cpu_read_4(__preempt_count)); 98 return unlikely(!raw_cpu_read_4(__preempt_count));
99} 99}
100 100
101#ifdef CONFIG_PREEMPT 101#ifdef CONFIG_PREEMPT
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 799580cabc78..597ac155c91c 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -328,17 +328,6 @@ void unxlate_dev_mem_ptr(unsigned long phys, void *addr)
328 return; 328 return;
329} 329}
330 330
331static int __initdata early_ioremap_debug;
332
333static int __init early_ioremap_debug_setup(char *str)
334{
335 early_ioremap_debug = 1;
336
337 return 0;
338}
339early_param("early_ioremap_debug", early_ioremap_debug_setup);
340
341static __initdata int after_paging_init;
342static pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)] __page_aligned_bss; 331static pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)] __page_aligned_bss;
343 332
344static inline pmd_t * __init early_ioremap_pmd(unsigned long addr) 333static inline pmd_t * __init early_ioremap_pmd(unsigned long addr)
@@ -362,18 +351,11 @@ bool __init is_early_ioremap_ptep(pte_t *ptep)
362 return ptep >= &bm_pte[0] && ptep < &bm_pte[PAGE_SIZE/sizeof(pte_t)]; 351 return ptep >= &bm_pte[0] && ptep < &bm_pte[PAGE_SIZE/sizeof(pte_t)];
363} 352}
364 353
365static unsigned long slot_virt[FIX_BTMAPS_SLOTS] __initdata;
366
367void __init early_ioremap_init(void) 354void __init early_ioremap_init(void)
368{ 355{
369 pmd_t *pmd; 356 pmd_t *pmd;
370 int i;
371 357
372 if (early_ioremap_debug) 358 early_ioremap_setup();
373 printk(KERN_INFO "early_ioremap_init()\n");
374
375 for (i = 0; i < FIX_BTMAPS_SLOTS; i++)
376 slot_virt[i] = __fix_to_virt(FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*i);
377 359
378 pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)); 360 pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
379 memset(bm_pte, 0, sizeof(bm_pte)); 361 memset(bm_pte, 0, sizeof(bm_pte));
@@ -402,13 +384,8 @@ void __init early_ioremap_init(void)
402 } 384 }
403} 385}
404 386
405void __init early_ioremap_reset(void) 387void __init __early_set_fixmap(enum fixed_addresses idx,
406{ 388 phys_addr_t phys, pgprot_t flags)
407 after_paging_init = 1;
408}
409
410static void __init __early_set_fixmap(enum fixed_addresses idx,
411 phys_addr_t phys, pgprot_t flags)
412{ 389{
413 unsigned long addr = __fix_to_virt(idx); 390 unsigned long addr = __fix_to_virt(idx);
414 pte_t *pte; 391 pte_t *pte;
@@ -425,198 +402,3 @@ static void __init __early_set_fixmap(enum fixed_addresses idx,
425 pte_clear(&init_mm, addr, pte); 402 pte_clear(&init_mm, addr, pte);
426 __flush_tlb_one(addr); 403 __flush_tlb_one(addr);
427} 404}
428
429static inline void __init early_set_fixmap(enum fixed_addresses idx,
430 phys_addr_t phys, pgprot_t prot)
431{
432 if (after_paging_init)
433 __set_fixmap(idx, phys, prot);
434 else
435 __early_set_fixmap(idx, phys, prot);
436}
437
438static inline void __init early_clear_fixmap(enum fixed_addresses idx)
439{
440 if (after_paging_init)
441 clear_fixmap(idx);
442 else
443 __early_set_fixmap(idx, 0, __pgprot(0));
444}
445
446static void __iomem *prev_map[FIX_BTMAPS_SLOTS] __initdata;
447static unsigned long prev_size[FIX_BTMAPS_SLOTS] __initdata;
448
449void __init fixup_early_ioremap(void)
450{
451 int i;
452
453 for (i = 0; i < FIX_BTMAPS_SLOTS; i++) {
454 if (prev_map[i]) {
455 WARN_ON(1);
456 break;
457 }
458 }
459
460 early_ioremap_init();
461}
462
463static int __init check_early_ioremap_leak(void)
464{
465 int count = 0;
466 int i;
467
468 for (i = 0; i < FIX_BTMAPS_SLOTS; i++)
469 if (prev_map[i])
470 count++;
471
472 if (!count)
473 return 0;
474 WARN(1, KERN_WARNING
475 "Debug warning: early ioremap leak of %d areas detected.\n",
476 count);
477 printk(KERN_WARNING
478 "please boot with early_ioremap_debug and report the dmesg.\n");
479
480 return 1;
481}
482late_initcall(check_early_ioremap_leak);
483
484static void __init __iomem *
485__early_ioremap(resource_size_t phys_addr, unsigned long size, pgprot_t prot)
486{
487 unsigned long offset;
488 resource_size_t last_addr;
489 unsigned int nrpages;
490 enum fixed_addresses idx;
491 int i, slot;
492
493 WARN_ON(system_state != SYSTEM_BOOTING);
494
495 slot = -1;
496 for (i = 0; i < FIX_BTMAPS_SLOTS; i++) {
497 if (!prev_map[i]) {
498 slot = i;
499 break;
500 }
501 }
502
503 if (slot < 0) {
504 printk(KERN_INFO "%s(%08llx, %08lx) not found slot\n",
505 __func__, (u64)phys_addr, size);
506 WARN_ON(1);
507 return NULL;
508 }
509
510 if (early_ioremap_debug) {
511 printk(KERN_INFO "%s(%08llx, %08lx) [%d] => ",
512 __func__, (u64)phys_addr, size, slot);
513 dump_stack();
514 }
515
516 /* Don't allow wraparound or zero size */
517 last_addr = phys_addr + size - 1;
518 if (!size || last_addr < phys_addr) {
519 WARN_ON(1);
520 return NULL;
521 }
522
523 prev_size[slot] = size;
524 /*
525 * Mappings have to be page-aligned
526 */
527 offset = phys_addr & ~PAGE_MASK;
528 phys_addr &= PAGE_MASK;
529 size = PAGE_ALIGN(last_addr + 1) - phys_addr;
530
531 /*
532 * Mappings have to fit in the FIX_BTMAP area.
533 */
534 nrpages = size >> PAGE_SHIFT;
535 if (nrpages > NR_FIX_BTMAPS) {
536 WARN_ON(1);
537 return NULL;
538 }
539
540 /*
541 * Ok, go for it..
542 */
543 idx = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*slot;
544 while (nrpages > 0) {
545 early_set_fixmap(idx, phys_addr, prot);
546 phys_addr += PAGE_SIZE;
547 --idx;
548 --nrpages;
549 }
550 if (early_ioremap_debug)
551 printk(KERN_CONT "%08lx + %08lx\n", offset, slot_virt[slot]);
552
553 prev_map[slot] = (void __iomem *)(offset + slot_virt[slot]);
554 return prev_map[slot];
555}
556
557/* Remap an IO device */
558void __init __iomem *
559early_ioremap(resource_size_t phys_addr, unsigned long size)
560{
561 return __early_ioremap(phys_addr, size, PAGE_KERNEL_IO);
562}
563
564/* Remap memory */
565void __init __iomem *
566early_memremap(resource_size_t phys_addr, unsigned long size)
567{
568 return __early_ioremap(phys_addr, size, PAGE_KERNEL);
569}
570
571void __init early_iounmap(void __iomem *addr, unsigned long size)
572{
573 unsigned long virt_addr;
574 unsigned long offset;
575 unsigned int nrpages;
576 enum fixed_addresses idx;
577 int i, slot;
578
579 slot = -1;
580 for (i = 0; i < FIX_BTMAPS_SLOTS; i++) {
581 if (prev_map[i] == addr) {
582 slot = i;
583 break;
584 }
585 }
586
587 if (slot < 0) {
588 printk(KERN_INFO "early_iounmap(%p, %08lx) not found slot\n",
589 addr, size);
590 WARN_ON(1);
591 return;
592 }
593
594 if (prev_size[slot] != size) {
595 printk(KERN_INFO "early_iounmap(%p, %08lx) [%d] size not consistent %08lx\n",
596 addr, size, slot, prev_size[slot]);
597 WARN_ON(1);
598 return;
599 }
600
601 if (early_ioremap_debug) {
602 printk(KERN_INFO "early_iounmap(%p, %08lx) [%d]\n", addr,
603 size, slot);
604 dump_stack();
605 }
606
607 virt_addr = (unsigned long)addr;
608 if (virt_addr < fix_to_virt(FIX_BTMAP_BEGIN)) {
609 WARN_ON(1);
610 return;
611 }
612 offset = virt_addr & ~PAGE_MASK;
613 nrpages = PAGE_ALIGN(offset + size) >> PAGE_SHIFT;
614
615 idx = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*slot;
616 while (nrpages > 0) {
617 early_clear_fixmap(idx);
618 --idx;
619 --nrpages;
620 }
621 prev_map[slot] = NULL;
622}
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c
index a69bcb8c7621..4dd8cf652579 100644
--- a/arch/x86/mm/pgtable_32.c
+++ b/arch/x86/mm/pgtable_32.c
@@ -127,7 +127,7 @@ static int __init parse_reservetop(char *arg)
127 127
128 address = memparse(arg, &arg); 128 address = memparse(arg, &arg);
129 reserve_top_address(address); 129 reserve_top_address(address);
130 fixup_early_ioremap(); 130 early_ioremap_init();
131 return 0; 131 return 0;
132} 132}
133early_param("reservetop", parse_reservetop); 133early_param("reservetop", parse_reservetop);
diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index c87ae7c6e5f9..02d6d29a63c1 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -41,7 +41,7 @@ config ARCH_HAS_ILOG2_U32
41config ARCH_HAS_ILOG2_U64 41config ARCH_HAS_ILOG2_U64
42 def_bool n 42 def_bool n
43 43
44config NO_IOPORT 44config NO_IOPORT_MAP
45 def_bool n 45 def_bool n
46 46
47config HZ 47config HZ
@@ -239,7 +239,7 @@ config XTENSA_PLATFORM_XT2000
239config XTENSA_PLATFORM_S6105 239config XTENSA_PLATFORM_S6105
240 bool "S6105" 240 bool "S6105"
241 select SERIAL_CONSOLE 241 select SERIAL_CONSOLE
242 select NO_IOPORT 242 select NO_IOPORT_MAP
243 243
244config XTENSA_PLATFORM_XTFPGA 244config XTENSA_PLATFORM_XTFPGA
245 bool "XTFPGA" 245 bool "XTFPGA"
diff --git a/arch/xtensa/configs/iss_defconfig b/arch/xtensa/configs/iss_defconfig
index 4f233204faf9..d57d917ff240 100644
--- a/arch/xtensa/configs/iss_defconfig
+++ b/arch/xtensa/configs/iss_defconfig
@@ -11,7 +11,7 @@ CONFIG_GENERIC_FIND_NEXT_BIT=y
11CONFIG_GENERIC_HWEIGHT=y 11CONFIG_GENERIC_HWEIGHT=y
12# CONFIG_ARCH_HAS_ILOG2_U32 is not set 12# CONFIG_ARCH_HAS_ILOG2_U32 is not set
13# CONFIG_ARCH_HAS_ILOG2_U64 is not set 13# CONFIG_ARCH_HAS_ILOG2_U64 is not set
14CONFIG_NO_IOPORT=y 14CONFIG_NO_IOPORT_MAP=y
15CONFIG_HZ=100 15CONFIG_HZ=100
16CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" 16CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
17CONFIG_CONSTRUCTORS=y 17CONFIG_CONSTRUCTORS=y
diff --git a/arch/xtensa/configs/s6105_defconfig b/arch/xtensa/configs/s6105_defconfig
index d929f77a0360..583c2b0974ca 100644
--- a/arch/xtensa/configs/s6105_defconfig
+++ b/arch/xtensa/configs/s6105_defconfig
@@ -11,7 +11,7 @@ CONFIG_GENERIC_FIND_NEXT_BIT=y
11CONFIG_GENERIC_HWEIGHT=y 11CONFIG_GENERIC_HWEIGHT=y
12# CONFIG_ARCH_HAS_ILOG2_U32 is not set 12# CONFIG_ARCH_HAS_ILOG2_U32 is not set
13# CONFIG_ARCH_HAS_ILOG2_U64 is not set 13# CONFIG_ARCH_HAS_ILOG2_U64 is not set
14CONFIG_NO_IOPORT=y 14CONFIG_NO_IOPORT_MAP=y
15CONFIG_HZ=100 15CONFIG_HZ=100
16CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" 16CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
17 17
diff --git a/drivers/block/zram/Kconfig b/drivers/block/zram/Kconfig
index 3450be850399..6489c0fd0ea6 100644
--- a/drivers/block/zram/Kconfig
+++ b/drivers/block/zram/Kconfig
@@ -15,6 +15,16 @@ config ZRAM
15 15
16 See zram.txt for more information. 16 See zram.txt for more information.
17 17
18config ZRAM_LZ4_COMPRESS
19 bool "Enable LZ4 algorithm support"
20 depends on ZRAM
21 select LZ4_COMPRESS
22 select LZ4_DECOMPRESS
23 default n
24 help
25 This option enables LZ4 compression algorithm support. Compression
26 algorithm can be changed using `comp_algorithm' device attribute.
27
18config ZRAM_DEBUG 28config ZRAM_DEBUG
19 bool "Compressed RAM block device debug support" 29 bool "Compressed RAM block device debug support"
20 depends on ZRAM 30 depends on ZRAM
diff --git a/drivers/block/zram/Makefile b/drivers/block/zram/Makefile
index cb0f9ced6a93..be0763ff57a2 100644
--- a/drivers/block/zram/Makefile
+++ b/drivers/block/zram/Makefile
@@ -1,3 +1,5 @@
1zram-y := zram_drv.o 1zram-y := zcomp_lzo.o zcomp.o zram_drv.o
2
3zram-$(CONFIG_ZRAM_LZ4_COMPRESS) += zcomp_lz4.o
2 4
3obj-$(CONFIG_ZRAM) += zram.o 5obj-$(CONFIG_ZRAM) += zram.o
diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c
new file mode 100644
index 000000000000..f1ff39a3d1c1
--- /dev/null
+++ b/drivers/block/zram/zcomp.c
@@ -0,0 +1,353 @@
1/*
2 * Copyright (C) 2014 Sergey Senozhatsky.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 */
9
10#include <linux/kernel.h>
11#include <linux/string.h>
12#include <linux/err.h>
13#include <linux/slab.h>
14#include <linux/wait.h>
15#include <linux/sched.h>
16
17#include "zcomp.h"
18#include "zcomp_lzo.h"
19#ifdef CONFIG_ZRAM_LZ4_COMPRESS
20#include "zcomp_lz4.h"
21#endif
22
23/*
24 * single zcomp_strm backend
25 */
26struct zcomp_strm_single {
27 struct mutex strm_lock;
28 struct zcomp_strm *zstrm;
29};
30
31/*
32 * multi zcomp_strm backend
33 */
34struct zcomp_strm_multi {
35 /* protect strm list */
36 spinlock_t strm_lock;
37 /* max possible number of zstrm streams */
38 int max_strm;
39 /* number of available zstrm streams */
40 int avail_strm;
41 /* list of available strms */
42 struct list_head idle_strm;
43 wait_queue_head_t strm_wait;
44};
45
46static struct zcomp_backend *backends[] = {
47 &zcomp_lzo,
48#ifdef CONFIG_ZRAM_LZ4_COMPRESS
49 &zcomp_lz4,
50#endif
51 NULL
52};
53
54static struct zcomp_backend *find_backend(const char *compress)
55{
56 int i = 0;
57 while (backends[i]) {
58 if (sysfs_streq(compress, backends[i]->name))
59 break;
60 i++;
61 }
62 return backends[i];
63}
64
65static void zcomp_strm_free(struct zcomp *comp, struct zcomp_strm *zstrm)
66{
67 if (zstrm->private)
68 comp->backend->destroy(zstrm->private);
69 free_pages((unsigned long)zstrm->buffer, 1);
70 kfree(zstrm);
71}
72
73/*
74 * allocate new zcomp_strm structure with ->private initialized by
75 * backend, return NULL on error
76 */
77static struct zcomp_strm *zcomp_strm_alloc(struct zcomp *comp)
78{
79 struct zcomp_strm *zstrm = kmalloc(sizeof(*zstrm), GFP_KERNEL);
80 if (!zstrm)
81 return NULL;
82
83 zstrm->private = comp->backend->create();
84 /*
85 * allocate 2 pages. 1 for compressed data, plus 1 extra for the
86 * case when compressed size is larger than the original one
87 */
88 zstrm->buffer = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 1);
89 if (!zstrm->private || !zstrm->buffer) {
90 zcomp_strm_free(comp, zstrm);
91 zstrm = NULL;
92 }
93 return zstrm;
94}
95
96/*
97 * get idle zcomp_strm or wait until other process release
98 * (zcomp_strm_release()) one for us
99 */
100static struct zcomp_strm *zcomp_strm_multi_find(struct zcomp *comp)
101{
102 struct zcomp_strm_multi *zs = comp->stream;
103 struct zcomp_strm *zstrm;
104
105 while (1) {
106 spin_lock(&zs->strm_lock);
107 if (!list_empty(&zs->idle_strm)) {
108 zstrm = list_entry(zs->idle_strm.next,
109 struct zcomp_strm, list);
110 list_del(&zstrm->list);
111 spin_unlock(&zs->strm_lock);
112 return zstrm;
113 }
114 /* zstrm streams limit reached, wait for idle stream */
115 if (zs->avail_strm >= zs->max_strm) {
116 spin_unlock(&zs->strm_lock);
117 wait_event(zs->strm_wait, !list_empty(&zs->idle_strm));
118 continue;
119 }
120 /* allocate new zstrm stream */
121 zs->avail_strm++;
122 spin_unlock(&zs->strm_lock);
123
124 zstrm = zcomp_strm_alloc(comp);
125 if (!zstrm) {
126 spin_lock(&zs->strm_lock);
127 zs->avail_strm--;
128 spin_unlock(&zs->strm_lock);
129 wait_event(zs->strm_wait, !list_empty(&zs->idle_strm));
130 continue;
131 }
132 break;
133 }
134 return zstrm;
135}
136
137/* add stream back to idle list and wake up waiter or free the stream */
138static void zcomp_strm_multi_release(struct zcomp *comp, struct zcomp_strm *zstrm)
139{
140 struct zcomp_strm_multi *zs = comp->stream;
141
142 spin_lock(&zs->strm_lock);
143 if (zs->avail_strm <= zs->max_strm) {
144 list_add(&zstrm->list, &zs->idle_strm);
145 spin_unlock(&zs->strm_lock);
146 wake_up(&zs->strm_wait);
147 return;
148 }
149
150 zs->avail_strm--;
151 spin_unlock(&zs->strm_lock);
152 zcomp_strm_free(comp, zstrm);
153}
154
155/* change max_strm limit */
156static bool zcomp_strm_multi_set_max_streams(struct zcomp *comp, int num_strm)
157{
158 struct zcomp_strm_multi *zs = comp->stream;
159 struct zcomp_strm *zstrm;
160
161 spin_lock(&zs->strm_lock);
162 zs->max_strm = num_strm;
163 /*
164 * if user has lowered the limit and there are idle streams,
165 * immediately free as much streams (and memory) as we can.
166 */
167 while (zs->avail_strm > num_strm && !list_empty(&zs->idle_strm)) {
168 zstrm = list_entry(zs->idle_strm.next,
169 struct zcomp_strm, list);
170 list_del(&zstrm->list);
171 zcomp_strm_free(comp, zstrm);
172 zs->avail_strm--;
173 }
174 spin_unlock(&zs->strm_lock);
175 return true;
176}
177
178static void zcomp_strm_multi_destroy(struct zcomp *comp)
179{
180 struct zcomp_strm_multi *zs = comp->stream;
181 struct zcomp_strm *zstrm;
182
183 while (!list_empty(&zs->idle_strm)) {
184 zstrm = list_entry(zs->idle_strm.next,
185 struct zcomp_strm, list);
186 list_del(&zstrm->list);
187 zcomp_strm_free(comp, zstrm);
188 }
189 kfree(zs);
190}
191
192static int zcomp_strm_multi_create(struct zcomp *comp, int max_strm)
193{
194 struct zcomp_strm *zstrm;
195 struct zcomp_strm_multi *zs;
196
197 comp->destroy = zcomp_strm_multi_destroy;
198 comp->strm_find = zcomp_strm_multi_find;
199 comp->strm_release = zcomp_strm_multi_release;
200 comp->set_max_streams = zcomp_strm_multi_set_max_streams;
201 zs = kmalloc(sizeof(struct zcomp_strm_multi), GFP_KERNEL);
202 if (!zs)
203 return -ENOMEM;
204
205 comp->stream = zs;
206 spin_lock_init(&zs->strm_lock);
207 INIT_LIST_HEAD(&zs->idle_strm);
208 init_waitqueue_head(&zs->strm_wait);
209 zs->max_strm = max_strm;
210 zs->avail_strm = 1;
211
212 zstrm = zcomp_strm_alloc(comp);
213 if (!zstrm) {
214 kfree(zs);
215 return -ENOMEM;
216 }
217 list_add(&zstrm->list, &zs->idle_strm);
218 return 0;
219}
220
221static struct zcomp_strm *zcomp_strm_single_find(struct zcomp *comp)
222{
223 struct zcomp_strm_single *zs = comp->stream;
224 mutex_lock(&zs->strm_lock);
225 return zs->zstrm;
226}
227
228static void zcomp_strm_single_release(struct zcomp *comp,
229 struct zcomp_strm *zstrm)
230{
231 struct zcomp_strm_single *zs = comp->stream;
232 mutex_unlock(&zs->strm_lock);
233}
234
235static bool zcomp_strm_single_set_max_streams(struct zcomp *comp, int num_strm)
236{
237 /* zcomp_strm_single support only max_comp_streams == 1 */
238 return false;
239}
240
241static void zcomp_strm_single_destroy(struct zcomp *comp)
242{
243 struct zcomp_strm_single *zs = comp->stream;
244 zcomp_strm_free(comp, zs->zstrm);
245 kfree(zs);
246}
247
248static int zcomp_strm_single_create(struct zcomp *comp)
249{
250 struct zcomp_strm_single *zs;
251
252 comp->destroy = zcomp_strm_single_destroy;
253 comp->strm_find = zcomp_strm_single_find;
254 comp->strm_release = zcomp_strm_single_release;
255 comp->set_max_streams = zcomp_strm_single_set_max_streams;
256 zs = kmalloc(sizeof(struct zcomp_strm_single), GFP_KERNEL);
257 if (!zs)
258 return -ENOMEM;
259
260 comp->stream = zs;
261 mutex_init(&zs->strm_lock);
262 zs->zstrm = zcomp_strm_alloc(comp);
263 if (!zs->zstrm) {
264 kfree(zs);
265 return -ENOMEM;
266 }
267 return 0;
268}
269
270/* show available compressors */
271ssize_t zcomp_available_show(const char *comp, char *buf)
272{
273 ssize_t sz = 0;
274 int i = 0;
275
276 while (backends[i]) {
277 if (sysfs_streq(comp, backends[i]->name))
278 sz += scnprintf(buf + sz, PAGE_SIZE - sz - 2,
279 "[%s] ", backends[i]->name);
280 else
281 sz += scnprintf(buf + sz, PAGE_SIZE - sz - 2,
282 "%s ", backends[i]->name);
283 i++;
284 }
285 sz += scnprintf(buf + sz, PAGE_SIZE - sz, "\n");
286 return sz;
287}
288
289bool zcomp_set_max_streams(struct zcomp *comp, int num_strm)
290{
291 return comp->set_max_streams(comp, num_strm);
292}
293
294struct zcomp_strm *zcomp_strm_find(struct zcomp *comp)
295{
296 return comp->strm_find(comp);
297}
298
299void zcomp_strm_release(struct zcomp *comp, struct zcomp_strm *zstrm)
300{
301 comp->strm_release(comp, zstrm);
302}
303
304int zcomp_compress(struct zcomp *comp, struct zcomp_strm *zstrm,
305 const unsigned char *src, size_t *dst_len)
306{
307 return comp->backend->compress(src, zstrm->buffer, dst_len,
308 zstrm->private);
309}
310
311int zcomp_decompress(struct zcomp *comp, const unsigned char *src,
312 size_t src_len, unsigned char *dst)
313{
314 return comp->backend->decompress(src, src_len, dst);
315}
316
317void zcomp_destroy(struct zcomp *comp)
318{
319 comp->destroy(comp);
320 kfree(comp);
321}
322
323/*
324 * search available compressors for requested algorithm.
325 * allocate new zcomp and initialize it. return compressing
326 * backend pointer or ERR_PTR if things went bad. ERR_PTR(-EINVAL)
327 * if requested algorithm is not supported, ERR_PTR(-ENOMEM) in
328 * case of allocation error.
329 */
330struct zcomp *zcomp_create(const char *compress, int max_strm)
331{
332 struct zcomp *comp;
333 struct zcomp_backend *backend;
334
335 backend = find_backend(compress);
336 if (!backend)
337 return ERR_PTR(-EINVAL);
338
339 comp = kzalloc(sizeof(struct zcomp), GFP_KERNEL);
340 if (!comp)
341 return ERR_PTR(-ENOMEM);
342
343 comp->backend = backend;
344 if (max_strm > 1)
345 zcomp_strm_multi_create(comp, max_strm);
346 else
347 zcomp_strm_single_create(comp);
348 if (!comp->stream) {
349 kfree(comp);
350 return ERR_PTR(-ENOMEM);
351 }
352 return comp;
353}
diff --git a/drivers/block/zram/zcomp.h b/drivers/block/zram/zcomp.h
new file mode 100644
index 000000000000..c59d1fca72c0
--- /dev/null
+++ b/drivers/block/zram/zcomp.h
@@ -0,0 +1,68 @@
1/*
2 * Copyright (C) 2014 Sergey Senozhatsky.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 */
9
10#ifndef _ZCOMP_H_
11#define _ZCOMP_H_
12
13#include <linux/mutex.h>
14
15struct zcomp_strm {
16 /* compression/decompression buffer */
17 void *buffer;
18 /*
19 * The private data of the compression stream, only compression
20 * stream backend can touch this (e.g. compression algorithm
21 * working memory)
22 */
23 void *private;
24 /* used in multi stream backend, protected by backend strm_lock */
25 struct list_head list;
26};
27
28/* static compression backend */
29struct zcomp_backend {
30 int (*compress)(const unsigned char *src, unsigned char *dst,
31 size_t *dst_len, void *private);
32
33 int (*decompress)(const unsigned char *src, size_t src_len,
34 unsigned char *dst);
35
36 void *(*create)(void);
37 void (*destroy)(void *private);
38
39 const char *name;
40};
41
42/* dynamic per-device compression frontend */
43struct zcomp {
44 void *stream;
45 struct zcomp_backend *backend;
46
47 struct zcomp_strm *(*strm_find)(struct zcomp *comp);
48 void (*strm_release)(struct zcomp *comp, struct zcomp_strm *zstrm);
49 bool (*set_max_streams)(struct zcomp *comp, int num_strm);
50 void (*destroy)(struct zcomp *comp);
51};
52
53ssize_t zcomp_available_show(const char *comp, char *buf);
54
55struct zcomp *zcomp_create(const char *comp, int max_strm);
56void zcomp_destroy(struct zcomp *comp);
57
58struct zcomp_strm *zcomp_strm_find(struct zcomp *comp);
59void zcomp_strm_release(struct zcomp *comp, struct zcomp_strm *zstrm);
60
61int zcomp_compress(struct zcomp *comp, struct zcomp_strm *zstrm,
62 const unsigned char *src, size_t *dst_len);
63
64int zcomp_decompress(struct zcomp *comp, const unsigned char *src,
65 size_t src_len, unsigned char *dst);
66
67bool zcomp_set_max_streams(struct zcomp *comp, int num_strm);
68#endif /* _ZCOMP_H_ */
diff --git a/drivers/block/zram/zcomp_lz4.c b/drivers/block/zram/zcomp_lz4.c
new file mode 100644
index 000000000000..f2afb7e988c3
--- /dev/null
+++ b/drivers/block/zram/zcomp_lz4.c
@@ -0,0 +1,47 @@
1/*
2 * Copyright (C) 2014 Sergey Senozhatsky.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 */
9
10#include <linux/kernel.h>
11#include <linux/slab.h>
12#include <linux/lz4.h>
13
14#include "zcomp_lz4.h"
15
16static void *zcomp_lz4_create(void)
17{
18 return kzalloc(LZ4_MEM_COMPRESS, GFP_KERNEL);
19}
20
21static void zcomp_lz4_destroy(void *private)
22{
23 kfree(private);
24}
25
26static int zcomp_lz4_compress(const unsigned char *src, unsigned char *dst,
27 size_t *dst_len, void *private)
28{
29 /* return : Success if return 0 */
30 return lz4_compress(src, PAGE_SIZE, dst, dst_len, private);
31}
32
33static int zcomp_lz4_decompress(const unsigned char *src, size_t src_len,
34 unsigned char *dst)
35{
36 size_t dst_len = PAGE_SIZE;
37 /* return : Success if return 0 */
38 return lz4_decompress_unknownoutputsize(src, src_len, dst, &dst_len);
39}
40
41struct zcomp_backend zcomp_lz4 = {
42 .compress = zcomp_lz4_compress,
43 .decompress = zcomp_lz4_decompress,
44 .create = zcomp_lz4_create,
45 .destroy = zcomp_lz4_destroy,
46 .name = "lz4",
47};
diff --git a/drivers/block/zram/zcomp_lz4.h b/drivers/block/zram/zcomp_lz4.h
new file mode 100644
index 000000000000..60613fb29dd8
--- /dev/null
+++ b/drivers/block/zram/zcomp_lz4.h
@@ -0,0 +1,17 @@
1/*
2 * Copyright (C) 2014 Sergey Senozhatsky.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 */
9
10#ifndef _ZCOMP_LZ4_H_
11#define _ZCOMP_LZ4_H_
12
13#include "zcomp.h"
14
15extern struct zcomp_backend zcomp_lz4;
16
17#endif /* _ZCOMP_LZ4_H_ */
diff --git a/drivers/block/zram/zcomp_lzo.c b/drivers/block/zram/zcomp_lzo.c
new file mode 100644
index 000000000000..da1bc47d588e
--- /dev/null
+++ b/drivers/block/zram/zcomp_lzo.c
@@ -0,0 +1,47 @@
1/*
2 * Copyright (C) 2014 Sergey Senozhatsky.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 */
9
10#include <linux/kernel.h>
11#include <linux/slab.h>
12#include <linux/lzo.h>
13
14#include "zcomp_lzo.h"
15
16static void *lzo_create(void)
17{
18 return kzalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL);
19}
20
21static void lzo_destroy(void *private)
22{
23 kfree(private);
24}
25
26static int lzo_compress(const unsigned char *src, unsigned char *dst,
27 size_t *dst_len, void *private)
28{
29 int ret = lzo1x_1_compress(src, PAGE_SIZE, dst, dst_len, private);
30 return ret == LZO_E_OK ? 0 : ret;
31}
32
33static int lzo_decompress(const unsigned char *src, size_t src_len,
34 unsigned char *dst)
35{
36 size_t dst_len = PAGE_SIZE;
37 int ret = lzo1x_decompress_safe(src, src_len, dst, &dst_len);
38 return ret == LZO_E_OK ? 0 : ret;
39}
40
41struct zcomp_backend zcomp_lzo = {
42 .compress = lzo_compress,
43 .decompress = lzo_decompress,
44 .create = lzo_create,
45 .destroy = lzo_destroy,
46 .name = "lzo",
47};
diff --git a/drivers/block/zram/zcomp_lzo.h b/drivers/block/zram/zcomp_lzo.h
new file mode 100644
index 000000000000..128c5807fa14
--- /dev/null
+++ b/drivers/block/zram/zcomp_lzo.h
@@ -0,0 +1,17 @@
1/*
2 * Copyright (C) 2014 Sergey Senozhatsky.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 */
9
10#ifndef _ZCOMP_LZO_H_
11#define _ZCOMP_LZO_H_
12
13#include "zcomp.h"
14
15extern struct zcomp_backend zcomp_lzo;
16
17#endif /* _ZCOMP_LZO_H_ */
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 51c557cfd92b..9849b5233bf4 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -29,19 +29,36 @@
29#include <linux/genhd.h> 29#include <linux/genhd.h>
30#include <linux/highmem.h> 30#include <linux/highmem.h>
31#include <linux/slab.h> 31#include <linux/slab.h>
32#include <linux/lzo.h>
33#include <linux/string.h> 32#include <linux/string.h>
34#include <linux/vmalloc.h> 33#include <linux/vmalloc.h>
34#include <linux/err.h>
35 35
36#include "zram_drv.h" 36#include "zram_drv.h"
37 37
38/* Globals */ 38/* Globals */
39static int zram_major; 39static int zram_major;
40static struct zram *zram_devices; 40static struct zram *zram_devices;
41static const char *default_compressor = "lzo";
41 42
42/* Module params (documentation at end) */ 43/* Module params (documentation at end) */
43static unsigned int num_devices = 1; 44static unsigned int num_devices = 1;
44 45
46#define ZRAM_ATTR_RO(name) \
47static ssize_t zram_attr_##name##_show(struct device *d, \
48 struct device_attribute *attr, char *b) \
49{ \
50 struct zram *zram = dev_to_zram(d); \
51 return scnprintf(b, PAGE_SIZE, "%llu\n", \
52 (u64)atomic64_read(&zram->stats.name)); \
53} \
54static struct device_attribute dev_attr_##name = \
55 __ATTR(name, S_IRUGO, zram_attr_##name##_show, NULL);
56
57static inline int init_done(struct zram *zram)
58{
59 return zram->meta != NULL;
60}
61
45static inline struct zram *dev_to_zram(struct device *dev) 62static inline struct zram *dev_to_zram(struct device *dev)
46{ 63{
47 return (struct zram *)dev_to_disk(dev)->private_data; 64 return (struct zram *)dev_to_disk(dev)->private_data;
@@ -52,92 +69,114 @@ static ssize_t disksize_show(struct device *dev,
52{ 69{
53 struct zram *zram = dev_to_zram(dev); 70 struct zram *zram = dev_to_zram(dev);
54 71
55 return sprintf(buf, "%llu\n", zram->disksize); 72 return scnprintf(buf, PAGE_SIZE, "%llu\n", zram->disksize);
56} 73}
57 74
58static ssize_t initstate_show(struct device *dev, 75static ssize_t initstate_show(struct device *dev,
59 struct device_attribute *attr, char *buf) 76 struct device_attribute *attr, char *buf)
60{ 77{
78 u32 val;
61 struct zram *zram = dev_to_zram(dev); 79 struct zram *zram = dev_to_zram(dev);
62 80
63 return sprintf(buf, "%u\n", zram->init_done); 81 down_read(&zram->init_lock);
64} 82 val = init_done(zram);
65 83 up_read(&zram->init_lock);
66static ssize_t num_reads_show(struct device *dev,
67 struct device_attribute *attr, char *buf)
68{
69 struct zram *zram = dev_to_zram(dev);
70 84
71 return sprintf(buf, "%llu\n", 85 return scnprintf(buf, PAGE_SIZE, "%u\n", val);
72 (u64)atomic64_read(&zram->stats.num_reads));
73} 86}
74 87
75static ssize_t num_writes_show(struct device *dev, 88static ssize_t orig_data_size_show(struct device *dev,
76 struct device_attribute *attr, char *buf) 89 struct device_attribute *attr, char *buf)
77{ 90{
78 struct zram *zram = dev_to_zram(dev); 91 struct zram *zram = dev_to_zram(dev);
79 92
80 return sprintf(buf, "%llu\n", 93 return scnprintf(buf, PAGE_SIZE, "%llu\n",
81 (u64)atomic64_read(&zram->stats.num_writes)); 94 (u64)(atomic64_read(&zram->stats.pages_stored)) << PAGE_SHIFT);
82} 95}
83 96
84static ssize_t invalid_io_show(struct device *dev, 97static ssize_t mem_used_total_show(struct device *dev,
85 struct device_attribute *attr, char *buf) 98 struct device_attribute *attr, char *buf)
86{ 99{
100 u64 val = 0;
87 struct zram *zram = dev_to_zram(dev); 101 struct zram *zram = dev_to_zram(dev);
102 struct zram_meta *meta = zram->meta;
88 103
89 return sprintf(buf, "%llu\n", 104 down_read(&zram->init_lock);
90 (u64)atomic64_read(&zram->stats.invalid_io)); 105 if (init_done(zram))
91} 106 val = zs_get_total_size_bytes(meta->mem_pool);
92 107 up_read(&zram->init_lock);
93static ssize_t notify_free_show(struct device *dev,
94 struct device_attribute *attr, char *buf)
95{
96 struct zram *zram = dev_to_zram(dev);
97 108
98 return sprintf(buf, "%llu\n", 109 return scnprintf(buf, PAGE_SIZE, "%llu\n", val);
99 (u64)atomic64_read(&zram->stats.notify_free));
100} 110}
101 111
102static ssize_t zero_pages_show(struct device *dev, 112static ssize_t max_comp_streams_show(struct device *dev,
103 struct device_attribute *attr, char *buf) 113 struct device_attribute *attr, char *buf)
104{ 114{
115 int val;
105 struct zram *zram = dev_to_zram(dev); 116 struct zram *zram = dev_to_zram(dev);
106 117
107 return sprintf(buf, "%u\n", atomic_read(&zram->stats.pages_zero)); 118 down_read(&zram->init_lock);
119 val = zram->max_comp_streams;
120 up_read(&zram->init_lock);
121
122 return scnprintf(buf, PAGE_SIZE, "%d\n", val);
108} 123}
109 124
110static ssize_t orig_data_size_show(struct device *dev, 125static ssize_t max_comp_streams_store(struct device *dev,
111 struct device_attribute *attr, char *buf) 126 struct device_attribute *attr, const char *buf, size_t len)
112{ 127{
128 int num;
113 struct zram *zram = dev_to_zram(dev); 129 struct zram *zram = dev_to_zram(dev);
130 int ret;
114 131
115 return sprintf(buf, "%llu\n", 132 ret = kstrtoint(buf, 0, &num);
116 (u64)(atomic_read(&zram->stats.pages_stored)) << PAGE_SHIFT); 133 if (ret < 0)
117} 134 return ret;
135 if (num < 1)
136 return -EINVAL;
118 137
119static ssize_t compr_data_size_show(struct device *dev, 138 down_write(&zram->init_lock);
120 struct device_attribute *attr, char *buf) 139 if (init_done(zram)) {
121{ 140 if (!zcomp_set_max_streams(zram->comp, num)) {
122 struct zram *zram = dev_to_zram(dev); 141 pr_info("Cannot change max compression streams\n");
142 ret = -EINVAL;
143 goto out;
144 }
145 }
123 146
124 return sprintf(buf, "%llu\n", 147 zram->max_comp_streams = num;
125 (u64)atomic64_read(&zram->stats.compr_size)); 148 ret = len;
149out:
150 up_write(&zram->init_lock);
151 return ret;
126} 152}
127 153
128static ssize_t mem_used_total_show(struct device *dev, 154static ssize_t comp_algorithm_show(struct device *dev,
129 struct device_attribute *attr, char *buf) 155 struct device_attribute *attr, char *buf)
130{ 156{
131 u64 val = 0; 157 size_t sz;
132 struct zram *zram = dev_to_zram(dev); 158 struct zram *zram = dev_to_zram(dev);
133 struct zram_meta *meta = zram->meta;
134 159
135 down_read(&zram->init_lock); 160 down_read(&zram->init_lock);
136 if (zram->init_done) 161 sz = zcomp_available_show(zram->compressor, buf);
137 val = zs_get_total_size_bytes(meta->mem_pool);
138 up_read(&zram->init_lock); 162 up_read(&zram->init_lock);
139 163
140 return sprintf(buf, "%llu\n", val); 164 return sz;
165}
166
167static ssize_t comp_algorithm_store(struct device *dev,
168 struct device_attribute *attr, const char *buf, size_t len)
169{
170 struct zram *zram = dev_to_zram(dev);
171 down_write(&zram->init_lock);
172 if (init_done(zram)) {
173 up_write(&zram->init_lock);
174 pr_info("Can't change algorithm for initialized device\n");
175 return -EBUSY;
176 }
177 strlcpy(zram->compressor, buf, sizeof(zram->compressor));
178 up_write(&zram->init_lock);
179 return len;
141} 180}
142 181
143/* flag operations needs meta->tb_lock */ 182/* flag operations needs meta->tb_lock */
@@ -192,8 +231,6 @@ static inline int valid_io_request(struct zram *zram, struct bio *bio)
192static void zram_meta_free(struct zram_meta *meta) 231static void zram_meta_free(struct zram_meta *meta)
193{ 232{
194 zs_destroy_pool(meta->mem_pool); 233 zs_destroy_pool(meta->mem_pool);
195 kfree(meta->compress_workmem);
196 free_pages((unsigned long)meta->compress_buffer, 1);
197 vfree(meta->table); 234 vfree(meta->table);
198 kfree(meta); 235 kfree(meta);
199} 236}
@@ -205,22 +242,11 @@ static struct zram_meta *zram_meta_alloc(u64 disksize)
205 if (!meta) 242 if (!meta)
206 goto out; 243 goto out;
207 244
208 meta->compress_workmem = kzalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL);
209 if (!meta->compress_workmem)
210 goto free_meta;
211
212 meta->compress_buffer =
213 (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 1);
214 if (!meta->compress_buffer) {
215 pr_err("Error allocating compressor buffer space\n");
216 goto free_workmem;
217 }
218
219 num_pages = disksize >> PAGE_SHIFT; 245 num_pages = disksize >> PAGE_SHIFT;
220 meta->table = vzalloc(num_pages * sizeof(*meta->table)); 246 meta->table = vzalloc(num_pages * sizeof(*meta->table));
221 if (!meta->table) { 247 if (!meta->table) {
222 pr_err("Error allocating zram address table\n"); 248 pr_err("Error allocating zram address table\n");
223 goto free_buffer; 249 goto free_meta;
224 } 250 }
225 251
226 meta->mem_pool = zs_create_pool(GFP_NOIO | __GFP_HIGHMEM); 252 meta->mem_pool = zs_create_pool(GFP_NOIO | __GFP_HIGHMEM);
@@ -230,15 +256,10 @@ static struct zram_meta *zram_meta_alloc(u64 disksize)
230 } 256 }
231 257
232 rwlock_init(&meta->tb_lock); 258 rwlock_init(&meta->tb_lock);
233 mutex_init(&meta->buffer_lock);
234 return meta; 259 return meta;
235 260
236free_table: 261free_table:
237 vfree(meta->table); 262 vfree(meta->table);
238free_buffer:
239 free_pages((unsigned long)meta->compress_buffer, 1);
240free_workmem:
241 kfree(meta->compress_workmem);
242free_meta: 263free_meta:
243 kfree(meta); 264 kfree(meta);
244 meta = NULL; 265 meta = NULL;
@@ -288,7 +309,6 @@ static void zram_free_page(struct zram *zram, size_t index)
288{ 309{
289 struct zram_meta *meta = zram->meta; 310 struct zram_meta *meta = zram->meta;
290 unsigned long handle = meta->table[index].handle; 311 unsigned long handle = meta->table[index].handle;
291 u16 size = meta->table[index].size;
292 312
293 if (unlikely(!handle)) { 313 if (unlikely(!handle)) {
294 /* 314 /*
@@ -297,21 +317,15 @@ static void zram_free_page(struct zram *zram, size_t index)
297 */ 317 */
298 if (zram_test_flag(meta, index, ZRAM_ZERO)) { 318 if (zram_test_flag(meta, index, ZRAM_ZERO)) {
299 zram_clear_flag(meta, index, ZRAM_ZERO); 319 zram_clear_flag(meta, index, ZRAM_ZERO);
300 atomic_dec(&zram->stats.pages_zero); 320 atomic64_dec(&zram->stats.zero_pages);
301 } 321 }
302 return; 322 return;
303 } 323 }
304 324
305 if (unlikely(size > max_zpage_size))
306 atomic_dec(&zram->stats.bad_compress);
307
308 zs_free(meta->mem_pool, handle); 325 zs_free(meta->mem_pool, handle);
309 326
310 if (size <= PAGE_SIZE / 2) 327 atomic64_sub(meta->table[index].size, &zram->stats.compr_data_size);
311 atomic_dec(&zram->stats.good_compress); 328 atomic64_dec(&zram->stats.pages_stored);
312
313 atomic64_sub(meta->table[index].size, &zram->stats.compr_size);
314 atomic_dec(&zram->stats.pages_stored);
315 329
316 meta->table[index].handle = 0; 330 meta->table[index].handle = 0;
317 meta->table[index].size = 0; 331 meta->table[index].size = 0;
@@ -319,8 +333,7 @@ static void zram_free_page(struct zram *zram, size_t index)
319 333
320static int zram_decompress_page(struct zram *zram, char *mem, u32 index) 334static int zram_decompress_page(struct zram *zram, char *mem, u32 index)
321{ 335{
322 int ret = LZO_E_OK; 336 int ret = 0;
323 size_t clen = PAGE_SIZE;
324 unsigned char *cmem; 337 unsigned char *cmem;
325 struct zram_meta *meta = zram->meta; 338 struct zram_meta *meta = zram->meta;
326 unsigned long handle; 339 unsigned long handle;
@@ -340,12 +353,12 @@ static int zram_decompress_page(struct zram *zram, char *mem, u32 index)
340 if (size == PAGE_SIZE) 353 if (size == PAGE_SIZE)
341 copy_page(mem, cmem); 354 copy_page(mem, cmem);
342 else 355 else
343 ret = lzo1x_decompress_safe(cmem, size, mem, &clen); 356 ret = zcomp_decompress(zram->comp, cmem, size, mem);
344 zs_unmap_object(meta->mem_pool, handle); 357 zs_unmap_object(meta->mem_pool, handle);
345 read_unlock(&meta->tb_lock); 358 read_unlock(&meta->tb_lock);
346 359
347 /* Should NEVER happen. Return bio error if it does. */ 360 /* Should NEVER happen. Return bio error if it does. */
348 if (unlikely(ret != LZO_E_OK)) { 361 if (unlikely(ret)) {
349 pr_err("Decompression failed! err=%d, page=%u\n", ret, index); 362 pr_err("Decompression failed! err=%d, page=%u\n", ret, index);
350 atomic64_inc(&zram->stats.failed_reads); 363 atomic64_inc(&zram->stats.failed_reads);
351 return ret; 364 return ret;
@@ -388,7 +401,7 @@ static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
388 401
389 ret = zram_decompress_page(zram, uncmem, index); 402 ret = zram_decompress_page(zram, uncmem, index);
390 /* Should NEVER happen. Return bio error if it does. */ 403 /* Should NEVER happen. Return bio error if it does. */
391 if (unlikely(ret != LZO_E_OK)) 404 if (unlikely(ret))
392 goto out_cleanup; 405 goto out_cleanup;
393 406
394 if (is_partial_io(bvec)) 407 if (is_partial_io(bvec))
@@ -413,11 +426,10 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
413 struct page *page; 426 struct page *page;
414 unsigned char *user_mem, *cmem, *src, *uncmem = NULL; 427 unsigned char *user_mem, *cmem, *src, *uncmem = NULL;
415 struct zram_meta *meta = zram->meta; 428 struct zram_meta *meta = zram->meta;
429 struct zcomp_strm *zstrm;
416 bool locked = false; 430 bool locked = false;
417 431
418 page = bvec->bv_page; 432 page = bvec->bv_page;
419 src = meta->compress_buffer;
420
421 if (is_partial_io(bvec)) { 433 if (is_partial_io(bvec)) {
422 /* 434 /*
423 * This is a partial IO. We need to read the full page 435 * This is a partial IO. We need to read the full page
@@ -433,7 +445,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
433 goto out; 445 goto out;
434 } 446 }
435 447
436 mutex_lock(&meta->buffer_lock); 448 zstrm = zcomp_strm_find(zram->comp);
437 locked = true; 449 locked = true;
438 user_mem = kmap_atomic(page); 450 user_mem = kmap_atomic(page);
439 451
@@ -454,28 +466,25 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
454 zram_set_flag(meta, index, ZRAM_ZERO); 466 zram_set_flag(meta, index, ZRAM_ZERO);
455 write_unlock(&zram->meta->tb_lock); 467 write_unlock(&zram->meta->tb_lock);
456 468
457 atomic_inc(&zram->stats.pages_zero); 469 atomic64_inc(&zram->stats.zero_pages);
458 ret = 0; 470 ret = 0;
459 goto out; 471 goto out;
460 } 472 }
461 473
462 ret = lzo1x_1_compress(uncmem, PAGE_SIZE, src, &clen, 474 ret = zcomp_compress(zram->comp, zstrm, uncmem, &clen);
463 meta->compress_workmem);
464 if (!is_partial_io(bvec)) { 475 if (!is_partial_io(bvec)) {
465 kunmap_atomic(user_mem); 476 kunmap_atomic(user_mem);
466 user_mem = NULL; 477 user_mem = NULL;
467 uncmem = NULL; 478 uncmem = NULL;
468 } 479 }
469 480
470 if (unlikely(ret != LZO_E_OK)) { 481 if (unlikely(ret)) {
471 pr_err("Compression failed! err=%d\n", ret); 482 pr_err("Compression failed! err=%d\n", ret);
472 goto out; 483 goto out;
473 } 484 }
474 485 src = zstrm->buffer;
475 if (unlikely(clen > max_zpage_size)) { 486 if (unlikely(clen > max_zpage_size)) {
476 atomic_inc(&zram->stats.bad_compress);
477 clen = PAGE_SIZE; 487 clen = PAGE_SIZE;
478 src = NULL;
479 if (is_partial_io(bvec)) 488 if (is_partial_io(bvec))
480 src = uncmem; 489 src = uncmem;
481 } 490 }
@@ -497,6 +506,8 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
497 memcpy(cmem, src, clen); 506 memcpy(cmem, src, clen);
498 } 507 }
499 508
509 zcomp_strm_release(zram->comp, zstrm);
510 locked = false;
500 zs_unmap_object(meta->mem_pool, handle); 511 zs_unmap_object(meta->mem_pool, handle);
501 512
502 /* 513 /*
@@ -511,49 +522,88 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
511 write_unlock(&zram->meta->tb_lock); 522 write_unlock(&zram->meta->tb_lock);
512 523
513 /* Update stats */ 524 /* Update stats */
514 atomic64_add(clen, &zram->stats.compr_size); 525 atomic64_add(clen, &zram->stats.compr_data_size);
515 atomic_inc(&zram->stats.pages_stored); 526 atomic64_inc(&zram->stats.pages_stored);
516 if (clen <= PAGE_SIZE / 2)
517 atomic_inc(&zram->stats.good_compress);
518
519out: 527out:
520 if (locked) 528 if (locked)
521 mutex_unlock(&meta->buffer_lock); 529 zcomp_strm_release(zram->comp, zstrm);
522 if (is_partial_io(bvec)) 530 if (is_partial_io(bvec))
523 kfree(uncmem); 531 kfree(uncmem);
524
525 if (ret) 532 if (ret)
526 atomic64_inc(&zram->stats.failed_writes); 533 atomic64_inc(&zram->stats.failed_writes);
527 return ret; 534 return ret;
528} 535}
529 536
530static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, 537static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
531 int offset, struct bio *bio, int rw) 538 int offset, struct bio *bio)
532{ 539{
533 int ret; 540 int ret;
541 int rw = bio_data_dir(bio);
534 542
535 if (rw == READ) 543 if (rw == READ) {
544 atomic64_inc(&zram->stats.num_reads);
536 ret = zram_bvec_read(zram, bvec, index, offset, bio); 545 ret = zram_bvec_read(zram, bvec, index, offset, bio);
537 else 546 } else {
547 atomic64_inc(&zram->stats.num_writes);
538 ret = zram_bvec_write(zram, bvec, index, offset); 548 ret = zram_bvec_write(zram, bvec, index, offset);
549 }
539 550
540 return ret; 551 return ret;
541} 552}
542 553
554/*
555 * zram_bio_discard - handler on discard request
556 * @index: physical block index in PAGE_SIZE units
557 * @offset: byte offset within physical block
558 */
559static void zram_bio_discard(struct zram *zram, u32 index,
560 int offset, struct bio *bio)
561{
562 size_t n = bio->bi_iter.bi_size;
563
564 /*
565 * zram manages data in physical block size units. Because logical block
566 * size isn't identical with physical block size on some arch, we
567 * could get a discard request pointing to a specific offset within a
568 * certain physical block. Although we can handle this request by
569 * reading that physiclal block and decompressing and partially zeroing
570 * and re-compressing and then re-storing it, this isn't reasonable
571 * because our intent with a discard request is to save memory. So
572 * skipping this logical block is appropriate here.
573 */
574 if (offset) {
575 if (n < offset)
576 return;
577
578 n -= offset;
579 index++;
580 }
581
582 while (n >= PAGE_SIZE) {
583 /*
584 * Discard request can be large so the lock hold times could be
585 * lengthy. So take the lock once per page.
586 */
587 write_lock(&zram->meta->tb_lock);
588 zram_free_page(zram, index);
589 write_unlock(&zram->meta->tb_lock);
590 index++;
591 n -= PAGE_SIZE;
592 }
593}
594
543static void zram_reset_device(struct zram *zram, bool reset_capacity) 595static void zram_reset_device(struct zram *zram, bool reset_capacity)
544{ 596{
545 size_t index; 597 size_t index;
546 struct zram_meta *meta; 598 struct zram_meta *meta;
547 599
548 down_write(&zram->init_lock); 600 down_write(&zram->init_lock);
549 if (!zram->init_done) { 601 if (!init_done(zram)) {
550 up_write(&zram->init_lock); 602 up_write(&zram->init_lock);
551 return; 603 return;
552 } 604 }
553 605
554 meta = zram->meta; 606 meta = zram->meta;
555 zram->init_done = 0;
556
557 /* Free all pages that are still in this zram device */ 607 /* Free all pages that are still in this zram device */
558 for (index = 0; index < zram->disksize >> PAGE_SHIFT; index++) { 608 for (index = 0; index < zram->disksize >> PAGE_SHIFT; index++) {
559 unsigned long handle = meta->table[index].handle; 609 unsigned long handle = meta->table[index].handle;
@@ -563,6 +613,9 @@ static void zram_reset_device(struct zram *zram, bool reset_capacity)
563 zs_free(meta->mem_pool, handle); 613 zs_free(meta->mem_pool, handle);
564 } 614 }
565 615
616 zcomp_destroy(zram->comp);
617 zram->max_comp_streams = 1;
618
566 zram_meta_free(zram->meta); 619 zram_meta_free(zram->meta);
567 zram->meta = NULL; 620 zram->meta = NULL;
568 /* Reset stats */ 621 /* Reset stats */
@@ -574,37 +627,14 @@ static void zram_reset_device(struct zram *zram, bool reset_capacity)
574 up_write(&zram->init_lock); 627 up_write(&zram->init_lock);
575} 628}
576 629
577static void zram_init_device(struct zram *zram, struct zram_meta *meta)
578{
579 if (zram->disksize > 2 * (totalram_pages << PAGE_SHIFT)) {
580 pr_info(
581 "There is little point creating a zram of greater than "
582 "twice the size of memory since we expect a 2:1 compression "
583 "ratio. Note that zram uses about 0.1%% of the size of "
584 "the disk when not in use so a huge zram is "
585 "wasteful.\n"
586 "\tMemory Size: %lu kB\n"
587 "\tSize you selected: %llu kB\n"
588 "Continuing anyway ...\n",
589 (totalram_pages << PAGE_SHIFT) >> 10, zram->disksize >> 10
590 );
591 }
592
593 /* zram devices sort of resembles non-rotational disks */
594 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zram->disk->queue);
595
596 zram->meta = meta;
597 zram->init_done = 1;
598
599 pr_debug("Initialization done!\n");
600}
601
602static ssize_t disksize_store(struct device *dev, 630static ssize_t disksize_store(struct device *dev,
603 struct device_attribute *attr, const char *buf, size_t len) 631 struct device_attribute *attr, const char *buf, size_t len)
604{ 632{
605 u64 disksize; 633 u64 disksize;
634 struct zcomp *comp;
606 struct zram_meta *meta; 635 struct zram_meta *meta;
607 struct zram *zram = dev_to_zram(dev); 636 struct zram *zram = dev_to_zram(dev);
637 int err;
608 638
609 disksize = memparse(buf, NULL); 639 disksize = memparse(buf, NULL);
610 if (!disksize) 640 if (!disksize)
@@ -614,20 +644,35 @@ static ssize_t disksize_store(struct device *dev,
614 meta = zram_meta_alloc(disksize); 644 meta = zram_meta_alloc(disksize);
615 if (!meta) 645 if (!meta)
616 return -ENOMEM; 646 return -ENOMEM;
647
648 comp = zcomp_create(zram->compressor, zram->max_comp_streams);
649 if (IS_ERR(comp)) {
650 pr_info("Cannot initialise %s compressing backend\n",
651 zram->compressor);
652 err = PTR_ERR(comp);
653 goto out_free_meta;
654 }
655
617 down_write(&zram->init_lock); 656 down_write(&zram->init_lock);
618 if (zram->init_done) { 657 if (init_done(zram)) {
619 up_write(&zram->init_lock);
620 zram_meta_free(meta);
621 pr_info("Cannot change disksize for initialized device\n"); 658 pr_info("Cannot change disksize for initialized device\n");
622 return -EBUSY; 659 err = -EBUSY;
660 goto out_destroy_comp;
623 } 661 }
624 662
663 zram->meta = meta;
664 zram->comp = comp;
625 zram->disksize = disksize; 665 zram->disksize = disksize;
626 set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT); 666 set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT);
627 zram_init_device(zram, meta);
628 up_write(&zram->init_lock); 667 up_write(&zram->init_lock);
629
630 return len; 668 return len;
669
670out_destroy_comp:
671 up_write(&zram->init_lock);
672 zcomp_destroy(comp);
673out_free_meta:
674 zram_meta_free(meta);
675 return err;
631} 676}
632 677
633static ssize_t reset_store(struct device *dev, 678static ssize_t reset_store(struct device *dev,
@@ -671,26 +716,23 @@ out:
671 return ret; 716 return ret;
672} 717}
673 718
674static void __zram_make_request(struct zram *zram, struct bio *bio, int rw) 719static void __zram_make_request(struct zram *zram, struct bio *bio)
675{ 720{
676 int offset; 721 int offset;
677 u32 index; 722 u32 index;
678 struct bio_vec bvec; 723 struct bio_vec bvec;
679 struct bvec_iter iter; 724 struct bvec_iter iter;
680 725
681 switch (rw) {
682 case READ:
683 atomic64_inc(&zram->stats.num_reads);
684 break;
685 case WRITE:
686 atomic64_inc(&zram->stats.num_writes);
687 break;
688 }
689
690 index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT; 726 index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
691 offset = (bio->bi_iter.bi_sector & 727 offset = (bio->bi_iter.bi_sector &
692 (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT; 728 (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
693 729
730 if (unlikely(bio->bi_rw & REQ_DISCARD)) {
731 zram_bio_discard(zram, index, offset, bio);
732 bio_endio(bio, 0);
733 return;
734 }
735
694 bio_for_each_segment(bvec, bio, iter) { 736 bio_for_each_segment(bvec, bio, iter) {
695 int max_transfer_size = PAGE_SIZE - offset; 737 int max_transfer_size = PAGE_SIZE - offset;
696 738
@@ -705,16 +747,15 @@ static void __zram_make_request(struct zram *zram, struct bio *bio, int rw)
705 bv.bv_len = max_transfer_size; 747 bv.bv_len = max_transfer_size;
706 bv.bv_offset = bvec.bv_offset; 748 bv.bv_offset = bvec.bv_offset;
707 749
708 if (zram_bvec_rw(zram, &bv, index, offset, bio, rw) < 0) 750 if (zram_bvec_rw(zram, &bv, index, offset, bio) < 0)
709 goto out; 751 goto out;
710 752
711 bv.bv_len = bvec.bv_len - max_transfer_size; 753 bv.bv_len = bvec.bv_len - max_transfer_size;
712 bv.bv_offset += max_transfer_size; 754 bv.bv_offset += max_transfer_size;
713 if (zram_bvec_rw(zram, &bv, index+1, 0, bio, rw) < 0) 755 if (zram_bvec_rw(zram, &bv, index + 1, 0, bio) < 0)
714 goto out; 756 goto out;
715 } else 757 } else
716 if (zram_bvec_rw(zram, &bvec, index, offset, bio, rw) 758 if (zram_bvec_rw(zram, &bvec, index, offset, bio) < 0)
717 < 0)
718 goto out; 759 goto out;
719 760
720 update_position(&index, &offset, &bvec); 761 update_position(&index, &offset, &bvec);
@@ -736,7 +777,7 @@ static void zram_make_request(struct request_queue *queue, struct bio *bio)
736 struct zram *zram = queue->queuedata; 777 struct zram *zram = queue->queuedata;
737 778
738 down_read(&zram->init_lock); 779 down_read(&zram->init_lock);
739 if (unlikely(!zram->init_done)) 780 if (unlikely(!init_done(zram)))
740 goto error; 781 goto error;
741 782
742 if (!valid_io_request(zram, bio)) { 783 if (!valid_io_request(zram, bio)) {
@@ -744,7 +785,7 @@ static void zram_make_request(struct request_queue *queue, struct bio *bio)
744 goto error; 785 goto error;
745 } 786 }
746 787
747 __zram_make_request(zram, bio, bio_data_dir(bio)); 788 __zram_make_request(zram, bio);
748 up_read(&zram->init_lock); 789 up_read(&zram->init_lock);
749 790
750 return; 791 return;
@@ -778,14 +819,21 @@ static DEVICE_ATTR(disksize, S_IRUGO | S_IWUSR,
778 disksize_show, disksize_store); 819 disksize_show, disksize_store);
779static DEVICE_ATTR(initstate, S_IRUGO, initstate_show, NULL); 820static DEVICE_ATTR(initstate, S_IRUGO, initstate_show, NULL);
780static DEVICE_ATTR(reset, S_IWUSR, NULL, reset_store); 821static DEVICE_ATTR(reset, S_IWUSR, NULL, reset_store);
781static DEVICE_ATTR(num_reads, S_IRUGO, num_reads_show, NULL);
782static DEVICE_ATTR(num_writes, S_IRUGO, num_writes_show, NULL);
783static DEVICE_ATTR(invalid_io, S_IRUGO, invalid_io_show, NULL);
784static DEVICE_ATTR(notify_free, S_IRUGO, notify_free_show, NULL);
785static DEVICE_ATTR(zero_pages, S_IRUGO, zero_pages_show, NULL);
786static DEVICE_ATTR(orig_data_size, S_IRUGO, orig_data_size_show, NULL); 822static DEVICE_ATTR(orig_data_size, S_IRUGO, orig_data_size_show, NULL);
787static DEVICE_ATTR(compr_data_size, S_IRUGO, compr_data_size_show, NULL);
788static DEVICE_ATTR(mem_used_total, S_IRUGO, mem_used_total_show, NULL); 823static DEVICE_ATTR(mem_used_total, S_IRUGO, mem_used_total_show, NULL);
824static DEVICE_ATTR(max_comp_streams, S_IRUGO | S_IWUSR,
825 max_comp_streams_show, max_comp_streams_store);
826static DEVICE_ATTR(comp_algorithm, S_IRUGO | S_IWUSR,
827 comp_algorithm_show, comp_algorithm_store);
828
829ZRAM_ATTR_RO(num_reads);
830ZRAM_ATTR_RO(num_writes);
831ZRAM_ATTR_RO(failed_reads);
832ZRAM_ATTR_RO(failed_writes);
833ZRAM_ATTR_RO(invalid_io);
834ZRAM_ATTR_RO(notify_free);
835ZRAM_ATTR_RO(zero_pages);
836ZRAM_ATTR_RO(compr_data_size);
789 837
790static struct attribute *zram_disk_attrs[] = { 838static struct attribute *zram_disk_attrs[] = {
791 &dev_attr_disksize.attr, 839 &dev_attr_disksize.attr,
@@ -793,12 +841,16 @@ static struct attribute *zram_disk_attrs[] = {
793 &dev_attr_reset.attr, 841 &dev_attr_reset.attr,
794 &dev_attr_num_reads.attr, 842 &dev_attr_num_reads.attr,
795 &dev_attr_num_writes.attr, 843 &dev_attr_num_writes.attr,
844 &dev_attr_failed_reads.attr,
845 &dev_attr_failed_writes.attr,
796 &dev_attr_invalid_io.attr, 846 &dev_attr_invalid_io.attr,
797 &dev_attr_notify_free.attr, 847 &dev_attr_notify_free.attr,
798 &dev_attr_zero_pages.attr, 848 &dev_attr_zero_pages.attr,
799 &dev_attr_orig_data_size.attr, 849 &dev_attr_orig_data_size.attr,
800 &dev_attr_compr_data_size.attr, 850 &dev_attr_compr_data_size.attr,
801 &dev_attr_mem_used_total.attr, 851 &dev_attr_mem_used_total.attr,
852 &dev_attr_max_comp_streams.attr,
853 &dev_attr_comp_algorithm.attr,
802 NULL, 854 NULL,
803}; 855};
804 856
@@ -839,7 +891,8 @@ static int create_device(struct zram *zram, int device_id)
839 891
840 /* Actual capacity set using syfs (/sys/block/zram<id>/disksize */ 892 /* Actual capacity set using syfs (/sys/block/zram<id>/disksize */
841 set_capacity(zram->disk, 0); 893 set_capacity(zram->disk, 0);
842 894 /* zram devices sort of resembles non-rotational disks */
895 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zram->disk->queue);
843 /* 896 /*
844 * To ensure that we always get PAGE_SIZE aligned 897 * To ensure that we always get PAGE_SIZE aligned
845 * and n*PAGE_SIZED sized I/O requests. 898 * and n*PAGE_SIZED sized I/O requests.
@@ -849,6 +902,21 @@ static int create_device(struct zram *zram, int device_id)
849 ZRAM_LOGICAL_BLOCK_SIZE); 902 ZRAM_LOGICAL_BLOCK_SIZE);
850 blk_queue_io_min(zram->disk->queue, PAGE_SIZE); 903 blk_queue_io_min(zram->disk->queue, PAGE_SIZE);
851 blk_queue_io_opt(zram->disk->queue, PAGE_SIZE); 904 blk_queue_io_opt(zram->disk->queue, PAGE_SIZE);
905 zram->disk->queue->limits.discard_granularity = PAGE_SIZE;
906 zram->disk->queue->limits.max_discard_sectors = UINT_MAX;
907 /*
908 * zram_bio_discard() will clear all logical blocks if logical block
909 * size is identical with physical block size(PAGE_SIZE). But if it is
910 * different, we will skip discarding some parts of logical blocks in
911 * the part of the request range which isn't aligned to physical block
912 * size. So we can't ensure that all discarded logical blocks are
913 * zeroed.
914 */
915 if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE)
916 zram->disk->queue->limits.discard_zeroes_data = 1;
917 else
918 zram->disk->queue->limits.discard_zeroes_data = 0;
919 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, zram->disk->queue);
852 920
853 add_disk(zram->disk); 921 add_disk(zram->disk);
854 922
@@ -858,8 +926,9 @@ static int create_device(struct zram *zram, int device_id)
858 pr_warn("Error creating sysfs group"); 926 pr_warn("Error creating sysfs group");
859 goto out_free_disk; 927 goto out_free_disk;
860 } 928 }
861 929 strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor));
862 zram->init_done = 0; 930 zram->meta = NULL;
931 zram->max_comp_streams = 1;
863 return 0; 932 return 0;
864 933
865out_free_disk: 934out_free_disk:
diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h
index ad8aa35bae00..7f21c145e317 100644
--- a/drivers/block/zram/zram_drv.h
+++ b/drivers/block/zram/zram_drv.h
@@ -16,9 +16,10 @@
16#define _ZRAM_DRV_H_ 16#define _ZRAM_DRV_H_
17 17
18#include <linux/spinlock.h> 18#include <linux/spinlock.h>
19#include <linux/mutex.h>
20#include <linux/zsmalloc.h> 19#include <linux/zsmalloc.h>
21 20
21#include "zcomp.h"
22
22/* 23/*
23 * Some arbitrary value. This is just to catch 24 * Some arbitrary value. This is just to catch
24 * invalid value for num_devices module parameter. 25 * invalid value for num_devices module parameter.
@@ -64,38 +65,33 @@ enum zram_pageflags {
64struct table { 65struct table {
65 unsigned long handle; 66 unsigned long handle;
66 u16 size; /* object size (excluding header) */ 67 u16 size; /* object size (excluding header) */
67 u8 count; /* object ref count (not yet used) */
68 u8 flags; 68 u8 flags;
69} __aligned(4); 69} __aligned(4);
70 70
71struct zram_stats { 71struct zram_stats {
72 atomic64_t compr_size; /* compressed size of pages stored */ 72 atomic64_t compr_data_size; /* compressed size of pages stored */
73 atomic64_t num_reads; /* failed + successful */ 73 atomic64_t num_reads; /* failed + successful */
74 atomic64_t num_writes; /* --do-- */ 74 atomic64_t num_writes; /* --do-- */
75 atomic64_t failed_reads; /* should NEVER! happen */ 75 atomic64_t failed_reads; /* should NEVER! happen */
76 atomic64_t failed_writes; /* can happen when memory is too low */ 76 atomic64_t failed_writes; /* can happen when memory is too low */
77 atomic64_t invalid_io; /* non-page-aligned I/O requests */ 77 atomic64_t invalid_io; /* non-page-aligned I/O requests */
78 atomic64_t notify_free; /* no. of swap slot free notifications */ 78 atomic64_t notify_free; /* no. of swap slot free notifications */
79 atomic_t pages_zero; /* no. of zero filled pages */ 79 atomic64_t zero_pages; /* no. of zero filled pages */
80 atomic_t pages_stored; /* no. of pages currently stored */ 80 atomic64_t pages_stored; /* no. of pages currently stored */
81 atomic_t good_compress; /* % of pages with compression ratio<=50% */
82 atomic_t bad_compress; /* % of pages with compression ratio>=75% */
83}; 81};
84 82
85struct zram_meta { 83struct zram_meta {
86 rwlock_t tb_lock; /* protect table */ 84 rwlock_t tb_lock; /* protect table */
87 void *compress_workmem;
88 void *compress_buffer;
89 struct table *table; 85 struct table *table;
90 struct zs_pool *mem_pool; 86 struct zs_pool *mem_pool;
91 struct mutex buffer_lock; /* protect compress buffers */
92}; 87};
93 88
94struct zram { 89struct zram {
95 struct zram_meta *meta; 90 struct zram_meta *meta;
96 struct request_queue *queue; 91 struct request_queue *queue;
97 struct gendisk *disk; 92 struct gendisk *disk;
98 int init_done; 93 struct zcomp *comp;
94
99 /* Prevent concurrent execution of device init, reset and R/W request */ 95 /* Prevent concurrent execution of device init, reset and R/W request */
100 struct rw_semaphore init_lock; 96 struct rw_semaphore init_lock;
101 /* 97 /*
@@ -103,7 +99,8 @@ struct zram {
103 * we can store in a disk. 99 * we can store in a disk.
104 */ 100 */
105 u64 disksize; /* bytes */ 101 u64 disksize; /* bytes */
106 102 int max_comp_streams;
107 struct zram_stats stats; 103 struct zram_stats stats;
104 char compressor[10];
108}; 105};
109#endif 106#endif
diff --git a/drivers/char/tpm/Kconfig b/drivers/char/tpm/Kconfig
index 1a65838888cd..c54cac3f8bc8 100644
--- a/drivers/char/tpm/Kconfig
+++ b/drivers/char/tpm/Kconfig
@@ -74,7 +74,7 @@ config TCG_NSC
74 74
75config TCG_ATMEL 75config TCG_ATMEL
76 tristate "Atmel TPM Interface" 76 tristate "Atmel TPM Interface"
77 depends on PPC64 || HAS_IOPORT 77 depends on PPC64 || HAS_IOPORT_MAP
78 ---help--- 78 ---help---
79 If you have a TPM security chip from Atmel say Yes and it 79 If you have a TPM security chip from Atmel say Yes and it
80 will be accessible from within Linux. To compile this driver 80 will be accessible from within Linux. To compile this driver
diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig
index de17c5593d97..014afab1d551 100644
--- a/drivers/i2c/busses/Kconfig
+++ b/drivers/i2c/busses/Kconfig
@@ -936,7 +936,7 @@ config I2C_ACORN
936 936
937config I2C_ELEKTOR 937config I2C_ELEKTOR
938 tristate "Elektor ISA card" 938 tristate "Elektor ISA card"
939 depends on ISA && HAS_IOPORT && BROKEN_ON_SMP 939 depends on ISA && HAS_IOPORT_MAP && BROKEN_ON_SMP
940 select I2C_ALGOPCF 940 select I2C_ALGOPCF
941 help 941 help
942 This supports the PCF8584 ISA bus I2C adapter. Say Y if you own 942 This supports the PCF8584 ISA bus I2C adapter. Say Y if you own
diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c
index bfb39bb56ef1..e8b55c3a6170 100644
--- a/drivers/lguest/page_tables.c
+++ b/drivers/lguest/page_tables.c
@@ -887,7 +887,7 @@ void guest_new_pagetable(struct lg_cpu *cpu, unsigned long pgtable)
887 * _PAGE_ACCESSED then we can put a read-only PTE entry in immediately, and if 887 * _PAGE_ACCESSED then we can put a read-only PTE entry in immediately, and if
888 * they set _PAGE_DIRTY then we can put a writable PTE entry in immediately. 888 * they set _PAGE_DIRTY then we can put a writable PTE entry in immediately.
889 */ 889 */
890static void do_set_pte(struct lg_cpu *cpu, int idx, 890static void __guest_set_pte(struct lg_cpu *cpu, int idx,
891 unsigned long vaddr, pte_t gpte) 891 unsigned long vaddr, pte_t gpte)
892{ 892{
893 /* Look up the matching shadow page directory entry. */ 893 /* Look up the matching shadow page directory entry. */
@@ -960,13 +960,13 @@ void guest_set_pte(struct lg_cpu *cpu,
960 unsigned int i; 960 unsigned int i;
961 for (i = 0; i < ARRAY_SIZE(cpu->lg->pgdirs); i++) 961 for (i = 0; i < ARRAY_SIZE(cpu->lg->pgdirs); i++)
962 if (cpu->lg->pgdirs[i].pgdir) 962 if (cpu->lg->pgdirs[i].pgdir)
963 do_set_pte(cpu, i, vaddr, gpte); 963 __guest_set_pte(cpu, i, vaddr, gpte);
964 } else { 964 } else {
965 /* Is this page table one we have a shadow for? */ 965 /* Is this page table one we have a shadow for? */
966 int pgdir = find_pgdir(cpu->lg, gpgdir); 966 int pgdir = find_pgdir(cpu->lg, gpgdir);
967 if (pgdir != ARRAY_SIZE(cpu->lg->pgdirs)) 967 if (pgdir != ARRAY_SIZE(cpu->lg->pgdirs))
968 /* If so, do the update. */ 968 /* If so, do the update. */
969 do_set_pte(cpu, pgdir, vaddr, gpte); 969 __guest_set_pte(cpu, pgdir, vaddr, gpte);
970 } 970 }
971} 971}
972 972
diff --git a/drivers/misc/sgi-gru/grukdump.c b/drivers/misc/sgi-gru/grukdump.c
index 2bef3f76032a..a3700a56b8ff 100644
--- a/drivers/misc/sgi-gru/grukdump.c
+++ b/drivers/misc/sgi-gru/grukdump.c
@@ -178,10 +178,10 @@ static int gru_dump_context(struct gru_state *gru, int ctxnum,
178 hdr.cbrcnt = cbrcnt; 178 hdr.cbrcnt = cbrcnt;
179 hdr.dsrcnt = dsrcnt; 179 hdr.dsrcnt = dsrcnt;
180 hdr.cch_locked = cch_locked; 180 hdr.cch_locked = cch_locked;
181 if (!ret && copy_to_user((void __user *)uhdr, &hdr, sizeof(hdr))) 181 if (copy_to_user(uhdr, &hdr, sizeof(hdr)))
182 ret = -EFAULT; 182 return -EFAULT;
183 183
184 return ret ? ret : bytes; 184 return bytes;
185} 185}
186 186
187int gru_dump_chiplet_request(unsigned long arg) 187int gru_dump_chiplet_request(unsigned long arg)
diff --git a/drivers/net/can/sja1000/Kconfig b/drivers/net/can/sja1000/Kconfig
index 4b18b8765523..1e65cb6c2591 100644
--- a/drivers/net/can/sja1000/Kconfig
+++ b/drivers/net/can/sja1000/Kconfig
@@ -39,7 +39,7 @@ config CAN_EMS_PCI
39config CAN_PEAK_PCMCIA 39config CAN_PEAK_PCMCIA
40 tristate "PEAK PCAN-PC Card" 40 tristate "PEAK PCAN-PC Card"
41 depends on PCMCIA 41 depends on PCMCIA
42 depends on HAS_IOPORT 42 depends on HAS_IOPORT_MAP
43 ---help--- 43 ---help---
44 This driver is for the PCAN-PC Card PCMCIA adapter (1 or 2 channels) 44 This driver is for the PCAN-PC Card PCMCIA adapter (1 or 2 channels)
45 from PEAK-System (http://www.peak-system.com). To compile this 45 from PEAK-System (http://www.peak-system.com). To compile this
diff --git a/drivers/net/ethernet/3com/Kconfig b/drivers/net/ethernet/3com/Kconfig
index 65b735d4a6ad..afaab4b2333f 100644
--- a/drivers/net/ethernet/3com/Kconfig
+++ b/drivers/net/ethernet/3com/Kconfig
@@ -66,7 +66,7 @@ config PCMCIA_3C589
66 66
67config VORTEX 67config VORTEX
68 tristate "3c590/3c900 series (592/595/597) \"Vortex/Boomerang\" support" 68 tristate "3c590/3c900 series (592/595/597) \"Vortex/Boomerang\" support"
69 depends on (PCI || EISA) && HAS_IOPORT 69 depends on (PCI || EISA) && HAS_IOPORT_MAP
70 select MII 70 select MII
71 ---help--- 71 ---help---
72 This option enables driver support for a large number of 10Mbps and 72 This option enables driver support for a large number of 10Mbps and
diff --git a/drivers/net/rionet.c b/drivers/net/rionet.c
index 6d1f6ed3113f..a8497183ff8b 100644
--- a/drivers/net/rionet.c
+++ b/drivers/net/rionet.c
@@ -493,6 +493,7 @@ static int rionet_setup_netdev(struct rio_mport *mport, struct net_device *ndev)
493 ndev->netdev_ops = &rionet_netdev_ops; 493 ndev->netdev_ops = &rionet_netdev_ops;
494 ndev->mtu = RIO_MAX_MSG_SIZE - 14; 494 ndev->mtu = RIO_MAX_MSG_SIZE - 14;
495 ndev->features = NETIF_F_LLTX; 495 ndev->features = NETIF_F_LLTX;
496 SET_NETDEV_DEV(ndev, &mport->dev);
496 SET_ETHTOOL_OPS(ndev, &rionet_ethtool_ops); 497 SET_ETHTOOL_OPS(ndev, &rionet_ethtool_ops);
497 498
498 spin_lock_init(&rnet->lock); 499 spin_lock_init(&rnet->lock);
diff --git a/drivers/rapidio/devices/tsi721.c b/drivers/rapidio/devices/tsi721.c
index ff7cbf2d28e3..1753dc693c15 100644
--- a/drivers/rapidio/devices/tsi721.c
+++ b/drivers/rapidio/devices/tsi721.c
@@ -2256,6 +2256,7 @@ static int tsi721_setup_mport(struct tsi721_device *priv)
2256 mport->phy_type = RIO_PHY_SERIAL; 2256 mport->phy_type = RIO_PHY_SERIAL;
2257 mport->priv = (void *)priv; 2257 mport->priv = (void *)priv;
2258 mport->phys_efptr = 0x100; 2258 mport->phys_efptr = 0x100;
2259 mport->dev.parent = &pdev->dev;
2259 priv->mport = mport; 2260 priv->mport = mport;
2260 2261
2261 INIT_LIST_HEAD(&mport->dbells); 2262 INIT_LIST_HEAD(&mport->dbells);
diff --git a/drivers/rapidio/devices/tsi721.h b/drivers/rapidio/devices/tsi721.h
index 7061ac0ad428..0305675270ee 100644
--- a/drivers/rapidio/devices/tsi721.h
+++ b/drivers/rapidio/devices/tsi721.h
@@ -644,6 +644,9 @@ enum tsi721_smsg_int_flag {
644 644
645#ifdef CONFIG_RAPIDIO_DMA_ENGINE 645#ifdef CONFIG_RAPIDIO_DMA_ENGINE
646 646
647#define TSI721_BDMA_BD_RING_SZ 128
648#define TSI721_BDMA_MAX_BCOUNT (TSI721_DMAD_BCOUNT1 + 1)
649
647struct tsi721_tx_desc { 650struct tsi721_tx_desc {
648 struct dma_async_tx_descriptor txd; 651 struct dma_async_tx_descriptor txd;
649 struct tsi721_dma_desc *hw_desc; 652 struct tsi721_dma_desc *hw_desc;
@@ -652,6 +655,7 @@ struct tsi721_tx_desc {
652 u64 rio_addr; 655 u64 rio_addr;
653 /* upper 2-bits of 66-bit RIO address */ 656 /* upper 2-bits of 66-bit RIO address */
654 u8 rio_addr_u; 657 u8 rio_addr_u;
658 u32 bcount;
655 bool interrupt; 659 bool interrupt;
656 struct list_head desc_node; 660 struct list_head desc_node;
657 struct list_head tx_list; 661 struct list_head tx_list;
diff --git a/drivers/rapidio/devices/tsi721_dma.c b/drivers/rapidio/devices/tsi721_dma.c
index 91245f5dbe81..9b60b1f3261c 100644
--- a/drivers/rapidio/devices/tsi721_dma.c
+++ b/drivers/rapidio/devices/tsi721_dma.c
@@ -304,35 +304,17 @@ struct tsi721_tx_desc *tsi721_desc_get(struct tsi721_bdma_chan *bdma_chan)
304} 304}
305 305
306static int 306static int
307tsi721_fill_desc(struct tsi721_bdma_chan *bdma_chan, 307tsi721_desc_fill_init(struct tsi721_tx_desc *desc, struct scatterlist *sg,
308 struct tsi721_tx_desc *desc, struct scatterlist *sg,
309 enum dma_rtype rtype, u32 sys_size) 308 enum dma_rtype rtype, u32 sys_size)
310{ 309{
311 struct tsi721_dma_desc *bd_ptr = desc->hw_desc; 310 struct tsi721_dma_desc *bd_ptr = desc->hw_desc;
312 u64 rio_addr; 311 u64 rio_addr;
313 312
314 if (sg_dma_len(sg) > TSI721_DMAD_BCOUNT1 + 1) {
315 dev_err(bdma_chan->dchan.device->dev,
316 "SG element is too large\n");
317 return -EINVAL;
318 }
319
320 dev_dbg(bdma_chan->dchan.device->dev,
321 "desc: 0x%llx, addr: 0x%llx len: 0x%x\n",
322 (u64)desc->txd.phys, (unsigned long long)sg_dma_address(sg),
323 sg_dma_len(sg));
324
325 dev_dbg(bdma_chan->dchan.device->dev,
326 "bd_ptr = %p did=%d raddr=0x%llx\n",
327 bd_ptr, desc->destid, desc->rio_addr);
328
329 /* Initialize DMA descriptor */ 313 /* Initialize DMA descriptor */
330 bd_ptr->type_id = cpu_to_le32((DTYPE1 << 29) | 314 bd_ptr->type_id = cpu_to_le32((DTYPE1 << 29) |
331 (rtype << 19) | desc->destid); 315 (rtype << 19) | desc->destid);
332 if (desc->interrupt)
333 bd_ptr->type_id |= cpu_to_le32(TSI721_DMAD_IOF);
334 bd_ptr->bcount = cpu_to_le32(((desc->rio_addr & 0x3) << 30) | 316 bd_ptr->bcount = cpu_to_le32(((desc->rio_addr & 0x3) << 30) |
335 (sys_size << 26) | sg_dma_len(sg)); 317 (sys_size << 26));
336 rio_addr = (desc->rio_addr >> 2) | 318 rio_addr = (desc->rio_addr >> 2) |
337 ((u64)(desc->rio_addr_u & 0x3) << 62); 319 ((u64)(desc->rio_addr_u & 0x3) << 62);
338 bd_ptr->raddr_lo = cpu_to_le32(rio_addr & 0xffffffff); 320 bd_ptr->raddr_lo = cpu_to_le32(rio_addr & 0xffffffff);
@@ -346,6 +328,20 @@ tsi721_fill_desc(struct tsi721_bdma_chan *bdma_chan,
346 return 0; 328 return 0;
347} 329}
348 330
331static int
332tsi721_desc_fill_end(struct tsi721_tx_desc *desc)
333{
334 struct tsi721_dma_desc *bd_ptr = desc->hw_desc;
335
336 /* Update DMA descriptor */
337 if (desc->interrupt)
338 bd_ptr->type_id |= cpu_to_le32(TSI721_DMAD_IOF);
339 bd_ptr->bcount |= cpu_to_le32(desc->bcount & TSI721_DMAD_BCOUNT1);
340
341 return 0;
342}
343
344
349static void tsi721_dma_chain_complete(struct tsi721_bdma_chan *bdma_chan, 345static void tsi721_dma_chain_complete(struct tsi721_bdma_chan *bdma_chan,
350 struct tsi721_tx_desc *desc) 346 struct tsi721_tx_desc *desc)
351{ 347{
@@ -674,6 +670,7 @@ struct dma_async_tx_descriptor *tsi721_prep_rio_sg(struct dma_chan *dchan,
674 unsigned int i; 670 unsigned int i;
675 u32 sys_size = dma_to_mport(dchan->device)->sys_size; 671 u32 sys_size = dma_to_mport(dchan->device)->sys_size;
676 enum dma_rtype rtype; 672 enum dma_rtype rtype;
673 dma_addr_t next_addr = -1;
677 674
678 if (!sgl || !sg_len) { 675 if (!sgl || !sg_len) {
679 dev_err(dchan->device->dev, "%s: No SG list\n", __func__); 676 dev_err(dchan->device->dev, "%s: No SG list\n", __func__);
@@ -704,36 +701,84 @@ struct dma_async_tx_descriptor *tsi721_prep_rio_sg(struct dma_chan *dchan,
704 for_each_sg(sgl, sg, sg_len, i) { 701 for_each_sg(sgl, sg, sg_len, i) {
705 int err; 702 int err;
706 703
707 dev_dbg(dchan->device->dev, "%s: sg #%d\n", __func__, i); 704 if (sg_dma_len(sg) > TSI721_BDMA_MAX_BCOUNT) {
705 dev_err(dchan->device->dev,
706 "%s: SG entry %d is too large\n", __func__, i);
707 goto err_desc_put;
708 }
709
710 /*
711 * If this sg entry forms contiguous block with previous one,
712 * try to merge it into existing DMA descriptor
713 */
714 if (desc) {
715 if (next_addr == sg_dma_address(sg) &&
716 desc->bcount + sg_dma_len(sg) <=
717 TSI721_BDMA_MAX_BCOUNT) {
718 /* Adjust byte count of the descriptor */
719 desc->bcount += sg_dma_len(sg);
720 goto entry_done;
721 }
722
723 /*
724 * Finalize this descriptor using total
725 * byte count value.
726 */
727 tsi721_desc_fill_end(desc);
728 dev_dbg(dchan->device->dev, "%s: desc final len: %d\n",
729 __func__, desc->bcount);
730 }
731
732 /*
733 * Obtain and initialize a new descriptor
734 */
708 desc = tsi721_desc_get(bdma_chan); 735 desc = tsi721_desc_get(bdma_chan);
709 if (!desc) { 736 if (!desc) {
710 dev_err(dchan->device->dev, 737 dev_err(dchan->device->dev,
711 "Not enough descriptors available\n"); 738 "%s: Failed to get new descriptor for SG %d\n",
712 goto err_desc_get; 739 __func__, i);
740 goto err_desc_put;
713 } 741 }
714 742
715 if (sg_is_last(sg))
716 desc->interrupt = (flags & DMA_PREP_INTERRUPT) != 0;
717 else
718 desc->interrupt = false;
719
720 desc->destid = rext->destid; 743 desc->destid = rext->destid;
721 desc->rio_addr = rio_addr; 744 desc->rio_addr = rio_addr;
722 desc->rio_addr_u = 0; 745 desc->rio_addr_u = 0;
746 desc->bcount = sg_dma_len(sg);
747
748 dev_dbg(dchan->device->dev,
749 "sg%d desc: 0x%llx, addr: 0x%llx len: %d\n",
750 i, (u64)desc->txd.phys,
751 (unsigned long long)sg_dma_address(sg),
752 sg_dma_len(sg));
753
754 dev_dbg(dchan->device->dev,
755 "bd_ptr = %p did=%d raddr=0x%llx\n",
756 desc->hw_desc, desc->destid, desc->rio_addr);
723 757
724 err = tsi721_fill_desc(bdma_chan, desc, sg, rtype, sys_size); 758 err = tsi721_desc_fill_init(desc, sg, rtype, sys_size);
725 if (err) { 759 if (err) {
726 dev_err(dchan->device->dev, 760 dev_err(dchan->device->dev,
727 "Failed to build desc: %d\n", err); 761 "Failed to build desc: %d\n", err);
728 goto err_desc_get; 762 goto err_desc_put;
729 } 763 }
730 764
731 rio_addr += sg_dma_len(sg); 765 next_addr = sg_dma_address(sg);
732 766
733 if (!first) 767 if (!first)
734 first = desc; 768 first = desc;
735 else 769 else
736 list_add_tail(&desc->desc_node, &first->tx_list); 770 list_add_tail(&desc->desc_node, &first->tx_list);
771
772entry_done:
773 if (sg_is_last(sg)) {
774 desc->interrupt = (flags & DMA_PREP_INTERRUPT) != 0;
775 tsi721_desc_fill_end(desc);
776 dev_dbg(dchan->device->dev, "%s: desc final len: %d\n",
777 __func__, desc->bcount);
778 } else {
779 rio_addr += sg_dma_len(sg);
780 next_addr += sg_dma_len(sg);
781 }
737 } 782 }
738 783
739 first->txd.cookie = -EBUSY; 784 first->txd.cookie = -EBUSY;
@@ -741,7 +786,7 @@ struct dma_async_tx_descriptor *tsi721_prep_rio_sg(struct dma_chan *dchan,
741 786
742 return &first->txd; 787 return &first->txd;
743 788
744err_desc_get: 789err_desc_put:
745 tsi721_desc_put(bdma_chan, first); 790 tsi721_desc_put(bdma_chan, first);
746 return NULL; 791 return NULL;
747} 792}
@@ -792,7 +837,7 @@ int tsi721_register_dma(struct tsi721_device *priv)
792 if (i == TSI721_DMACH_MAINT) 837 if (i == TSI721_DMACH_MAINT)
793 continue; 838 continue;
794 839
795 bdma_chan->bd_num = 64; 840 bdma_chan->bd_num = TSI721_BDMA_BD_RING_SZ;
796 bdma_chan->regs = priv->regs + TSI721_DMAC_BASE(i); 841 bdma_chan->regs = priv->regs + TSI721_DMAC_BASE(i);
797 842
798 bdma_chan->dchan.device = &mport->dma; 843 bdma_chan->dchan.device = &mport->dma;
diff --git a/drivers/rapidio/rio-driver.c b/drivers/rapidio/rio-driver.c
index c9ae692d3451..f301f059bb85 100644
--- a/drivers/rapidio/rio-driver.c
+++ b/drivers/rapidio/rio-driver.c
@@ -167,7 +167,6 @@ void rio_unregister_driver(struct rio_driver *rdrv)
167void rio_attach_device(struct rio_dev *rdev) 167void rio_attach_device(struct rio_dev *rdev)
168{ 168{
169 rdev->dev.bus = &rio_bus_type; 169 rdev->dev.bus = &rio_bus_type;
170 rdev->dev.parent = &rio_bus;
171} 170}
172EXPORT_SYMBOL_GPL(rio_attach_device); 171EXPORT_SYMBOL_GPL(rio_attach_device);
173 172
@@ -216,9 +215,12 @@ static int rio_uevent(struct device *dev, struct kobj_uevent_env *env)
216 return 0; 215 return 0;
217} 216}
218 217
219struct device rio_bus = { 218struct class rio_mport_class = {
220 .init_name = "rapidio", 219 .name = "rapidio_port",
220 .owner = THIS_MODULE,
221 .dev_groups = rio_mport_groups,
221}; 222};
223EXPORT_SYMBOL_GPL(rio_mport_class);
222 224
223struct bus_type rio_bus_type = { 225struct bus_type rio_bus_type = {
224 .name = "rapidio", 226 .name = "rapidio",
@@ -233,14 +235,20 @@ struct bus_type rio_bus_type = {
233/** 235/**
234 * rio_bus_init - Register the RapidIO bus with the device model 236 * rio_bus_init - Register the RapidIO bus with the device model
235 * 237 *
236 * Registers the RIO bus device and RIO bus type with the Linux 238 * Registers the RIO mport device class and RIO bus type with the Linux
237 * device model. 239 * device model.
238 */ 240 */
239static int __init rio_bus_init(void) 241static int __init rio_bus_init(void)
240{ 242{
241 if (device_register(&rio_bus) < 0) 243 int ret;
242 printk("RIO: failed to register RIO bus device\n"); 244
243 return bus_register(&rio_bus_type); 245 ret = class_register(&rio_mport_class);
246 if (!ret) {
247 ret = bus_register(&rio_bus_type);
248 if (ret)
249 class_unregister(&rio_mport_class);
250 }
251 return ret;
244} 252}
245 253
246postcore_initcall(rio_bus_init); 254postcore_initcall(rio_bus_init);
diff --git a/drivers/rapidio/rio-scan.c b/drivers/rapidio/rio-scan.c
index d3a6539a77cc..47a1b2ea76c4 100644
--- a/drivers/rapidio/rio-scan.c
+++ b/drivers/rapidio/rio-scan.c
@@ -461,6 +461,7 @@ static struct rio_dev *rio_setup_device(struct rio_net *net,
461 rdev->comp_tag & RIO_CTAG_UDEVID); 461 rdev->comp_tag & RIO_CTAG_UDEVID);
462 } 462 }
463 463
464 rdev->dev.parent = &port->dev;
464 rio_attach_device(rdev); 465 rio_attach_device(rdev);
465 466
466 device_initialize(&rdev->dev); 467 device_initialize(&rdev->dev);
diff --git a/drivers/rapidio/rio-sysfs.c b/drivers/rapidio/rio-sysfs.c
index e0221c6d0cc2..cdb005c0094d 100644
--- a/drivers/rapidio/rio-sysfs.c
+++ b/drivers/rapidio/rio-sysfs.c
@@ -341,3 +341,43 @@ const struct attribute_group *rio_bus_groups[] = {
341 &rio_bus_group, 341 &rio_bus_group,
342 NULL, 342 NULL,
343}; 343};
344
345static ssize_t
346port_destid_show(struct device *dev, struct device_attribute *attr,
347 char *buf)
348{
349 struct rio_mport *mport = to_rio_mport(dev);
350
351 if (mport)
352 return sprintf(buf, "0x%04x\n", mport->host_deviceid);
353 else
354 return -ENODEV;
355}
356static DEVICE_ATTR_RO(port_destid);
357
358static ssize_t sys_size_show(struct device *dev, struct device_attribute *attr,
359 char *buf)
360{
361 struct rio_mport *mport = to_rio_mport(dev);
362
363 if (mport)
364 return sprintf(buf, "%u\n", mport->sys_size);
365 else
366 return -ENODEV;
367}
368static DEVICE_ATTR_RO(sys_size);
369
370static struct attribute *rio_mport_attrs[] = {
371 &dev_attr_port_destid.attr,
372 &dev_attr_sys_size.attr,
373 NULL,
374};
375
376static const struct attribute_group rio_mport_group = {
377 .attrs = rio_mport_attrs,
378};
379
380const struct attribute_group *rio_mport_groups[] = {
381 &rio_mport_group,
382 NULL,
383};
diff --git a/drivers/rapidio/rio.c b/drivers/rapidio/rio.c
index 2e8a20cac588..a54ba0494dd3 100644
--- a/drivers/rapidio/rio.c
+++ b/drivers/rapidio/rio.c
@@ -1884,6 +1884,7 @@ static int rio_get_hdid(int index)
1884int rio_register_mport(struct rio_mport *port) 1884int rio_register_mport(struct rio_mport *port)
1885{ 1885{
1886 struct rio_scan_node *scan = NULL; 1886 struct rio_scan_node *scan = NULL;
1887 int res = 0;
1887 1888
1888 if (next_portid >= RIO_MAX_MPORTS) { 1889 if (next_portid >= RIO_MAX_MPORTS) {
1889 pr_err("RIO: reached specified max number of mports\n"); 1890 pr_err("RIO: reached specified max number of mports\n");
@@ -1894,6 +1895,16 @@ int rio_register_mport(struct rio_mport *port)
1894 port->host_deviceid = rio_get_hdid(port->id); 1895 port->host_deviceid = rio_get_hdid(port->id);
1895 port->nscan = NULL; 1896 port->nscan = NULL;
1896 1897
1898 dev_set_name(&port->dev, "rapidio%d", port->id);
1899 port->dev.class = &rio_mport_class;
1900
1901 res = device_register(&port->dev);
1902 if (res)
1903 dev_err(&port->dev, "RIO: mport%d registration failed ERR=%d\n",
1904 port->id, res);
1905 else
1906 dev_dbg(&port->dev, "RIO: mport%d registered\n", port->id);
1907
1897 mutex_lock(&rio_mport_list_lock); 1908 mutex_lock(&rio_mport_list_lock);
1898 list_add_tail(&port->node, &rio_mports); 1909 list_add_tail(&port->node, &rio_mports);
1899 1910
diff --git a/drivers/rapidio/rio.h b/drivers/rapidio/rio.h
index 5f99d22ad0b0..2d0550e08ea2 100644
--- a/drivers/rapidio/rio.h
+++ b/drivers/rapidio/rio.h
@@ -50,6 +50,7 @@ extern int rio_mport_scan(int mport_id);
50/* Structures internal to the RIO core code */ 50/* Structures internal to the RIO core code */
51extern const struct attribute_group *rio_dev_groups[]; 51extern const struct attribute_group *rio_dev_groups[];
52extern const struct attribute_group *rio_bus_groups[]; 52extern const struct attribute_group *rio_bus_groups[];
53extern const struct attribute_group *rio_mport_groups[];
53 54
54#define RIO_GET_DID(size, x) (size ? (x & 0xffff) : ((x & 0x00ff0000) >> 16)) 55#define RIO_GET_DID(size, x) (size ? (x & 0xffff) : ((x & 0x00ff0000) >> 16))
55#define RIO_SET_DID(size, x) (size ? (x & 0xffff) : ((x & 0x000000ff) << 16)) 56#define RIO_SET_DID(size, x) (size ? (x & 0xffff) : ((x & 0x000000ff) << 16))
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index a16b0ff497ca..d8223209d4b1 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -832,6 +832,7 @@ static void v9fs_mmap_vm_close(struct vm_area_struct *vma)
832 832
833static const struct vm_operations_struct v9fs_file_vm_ops = { 833static const struct vm_operations_struct v9fs_file_vm_ops = {
834 .fault = filemap_fault, 834 .fault = filemap_fault,
835 .map_pages = filemap_map_pages,
835 .page_mkwrite = v9fs_vm_page_mkwrite, 836 .page_mkwrite = v9fs_vm_page_mkwrite,
836 .remap_pages = generic_file_remap_pages, 837 .remap_pages = generic_file_remap_pages,
837}; 838};
@@ -839,6 +840,7 @@ static const struct vm_operations_struct v9fs_file_vm_ops = {
839static const struct vm_operations_struct v9fs_mmap_file_vm_ops = { 840static const struct vm_operations_struct v9fs_mmap_file_vm_ops = {
840 .close = v9fs_mmap_vm_close, 841 .close = v9fs_mmap_vm_close,
841 .fault = filemap_fault, 842 .fault = filemap_fault,
843 .map_pages = filemap_map_pages,
842 .page_mkwrite = v9fs_vm_page_mkwrite, 844 .page_mkwrite = v9fs_vm_page_mkwrite,
843 .remap_pages = generic_file_remap_pages, 845 .remap_pages = generic_file_remap_pages,
844}; 846};
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index 952aeb048349..9852bdf34d76 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -266,7 +266,7 @@ static void init_once(void *foo)
266 inode_init_once(&ei->vfs_inode); 266 inode_init_once(&ei->vfs_inode);
267} 267}
268 268
269static int init_inodecache(void) 269static int __init init_inodecache(void)
270{ 270{
271 adfs_inode_cachep = kmem_cache_create("adfs_inode_cache", 271 adfs_inode_cachep = kmem_cache_create("adfs_inode_cache",
272 sizeof(struct adfs_inode_info), 272 sizeof(struct adfs_inode_info),
diff --git a/fs/affs/affs.h b/fs/affs/affs.h
index 3952121f2f28..25b23b1e7f22 100644
--- a/fs/affs/affs.h
+++ b/fs/affs/affs.h
@@ -5,14 +5,6 @@
5#include <linux/mutex.h> 5#include <linux/mutex.h>
6#include <linux/workqueue.h> 6#include <linux/workqueue.h>
7 7
8/* AmigaOS allows file names with up to 30 characters length.
9 * Names longer than that will be silently truncated. If you
10 * want to disallow this, comment out the following #define.
11 * Creating filesystem objects with longer names will then
12 * result in an error (ENAMETOOLONG).
13 */
14/*#define AFFS_NO_TRUNCATE */
15
16/* Ugly macros make the code more pretty. */ 8/* Ugly macros make the code more pretty. */
17 9
18#define GET_END_PTR(st,p,sz) ((st *)((char *)(p)+((sz)-sizeof(st)))) 10#define GET_END_PTR(st,p,sz) ((st *)((char *)(p)+((sz)-sizeof(st))))
@@ -28,7 +20,6 @@
28 20
29#define AFFS_CACHE_SIZE PAGE_SIZE 21#define AFFS_CACHE_SIZE PAGE_SIZE
30 22
31#define AFFS_MAX_PREALLOC 32
32#define AFFS_LC_SIZE (AFFS_CACHE_SIZE/sizeof(u32)/2) 23#define AFFS_LC_SIZE (AFFS_CACHE_SIZE/sizeof(u32)/2)
33#define AFFS_AC_SIZE (AFFS_CACHE_SIZE/sizeof(struct affs_ext_key)/2) 24#define AFFS_AC_SIZE (AFFS_CACHE_SIZE/sizeof(struct affs_ext_key)/2)
34#define AFFS_AC_MASK (AFFS_AC_SIZE-1) 25#define AFFS_AC_MASK (AFFS_AC_SIZE-1)
@@ -118,6 +109,7 @@ struct affs_sb_info {
118#define SF_OFS 0x0200 /* Old filesystem */ 109#define SF_OFS 0x0200 /* Old filesystem */
119#define SF_PREFIX 0x0400 /* Buffer for prefix is allocated */ 110#define SF_PREFIX 0x0400 /* Buffer for prefix is allocated */
120#define SF_VERBOSE 0x0800 /* Talk about fs when mounting */ 111#define SF_VERBOSE 0x0800 /* Talk about fs when mounting */
112#define SF_NO_TRUNCATE 0x1000 /* Don't truncate filenames */
121 113
122/* short cut to get to the affs specific sb data */ 114/* short cut to get to the affs specific sb data */
123static inline struct affs_sb_info *AFFS_SB(struct super_block *sb) 115static inline struct affs_sb_info *AFFS_SB(struct super_block *sb)
@@ -137,9 +129,13 @@ extern void affs_fix_checksum(struct super_block *sb, struct buffer_head *bh);
137extern void secs_to_datestamp(time_t secs, struct affs_date *ds); 129extern void secs_to_datestamp(time_t secs, struct affs_date *ds);
138extern umode_t prot_to_mode(u32 prot); 130extern umode_t prot_to_mode(u32 prot);
139extern void mode_to_prot(struct inode *inode); 131extern void mode_to_prot(struct inode *inode);
140extern void affs_error(struct super_block *sb, const char *function, const char *fmt, ...); 132extern void affs_error(struct super_block *sb, const char *function,
141extern void affs_warning(struct super_block *sb, const char *function, const char *fmt, ...); 133 const char *fmt, ...);
142extern int affs_check_name(const unsigned char *name, int len); 134extern void affs_warning(struct super_block *sb, const char *function,
135 const char *fmt, ...);
136extern bool affs_nofilenametruncate(const struct dentry *dentry);
137extern int affs_check_name(const unsigned char *name, int len,
138 bool notruncate);
143extern int affs_copy_name(unsigned char *bstr, struct dentry *dentry); 139extern int affs_copy_name(unsigned char *bstr, struct dentry *dentry);
144 140
145/* bitmap. c */ 141/* bitmap. c */
diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c
index d9a43674cb94..533a322c41c0 100644
--- a/fs/affs/amigaffs.c
+++ b/fs/affs/amigaffs.c
@@ -471,20 +471,27 @@ affs_warning(struct super_block *sb, const char *function, const char *fmt, ...)
471 function,ErrorBuffer); 471 function,ErrorBuffer);
472} 472}
473 473
474bool
475affs_nofilenametruncate(const struct dentry *dentry)
476{
477 struct inode *inode = dentry->d_inode;
478 return AFFS_SB(inode->i_sb)->s_flags & SF_NO_TRUNCATE;
479
480}
481
474/* Check if the name is valid for a affs object. */ 482/* Check if the name is valid for a affs object. */
475 483
476int 484int
477affs_check_name(const unsigned char *name, int len) 485affs_check_name(const unsigned char *name, int len, bool notruncate)
478{ 486{
479 int i; 487 int i;
480 488
481 if (len > 30) 489 if (len > 30) {
482#ifdef AFFS_NO_TRUNCATE 490 if (notruncate)
483 return -ENAMETOOLONG; 491 return -ENAMETOOLONG;
484#else 492 else
485 len = 30; 493 len = 30;
486#endif 494 }
487
488 for (i = 0; i < len; i++) { 495 for (i = 0; i < len; i++) {
489 if (name[i] < ' ' || name[i] == ':' 496 if (name[i] < ' ' || name[i] == ':'
490 || (name[i] > 0x7e && name[i] < 0xa0)) 497 || (name[i] > 0x7e && name[i] < 0xa0))
diff --git a/fs/affs/dir.c b/fs/affs/dir.c
index f1eba8c3644e..cbbda476a805 100644
--- a/fs/affs/dir.c
+++ b/fs/affs/dir.c
@@ -52,8 +52,10 @@ affs_readdir(struct file *file, struct dir_context *ctx)
52 int hash_pos; 52 int hash_pos;
53 int chain_pos; 53 int chain_pos;
54 u32 ino; 54 u32 ino;
55 int error = 0;
55 56
56 pr_debug("AFFS: readdir(ino=%lu,f_pos=%lx)\n",inode->i_ino,(unsigned long)ctx->pos); 57 pr_debug("AFFS: readdir(ino=%lu,f_pos=%lx)\n",
58 inode->i_ino, (unsigned long)ctx->pos);
57 59
58 if (ctx->pos < 2) { 60 if (ctx->pos < 2) {
59 file->private_data = (void *)0; 61 file->private_data = (void *)0;
@@ -72,7 +74,7 @@ affs_readdir(struct file *file, struct dir_context *ctx)
72 } 74 }
73 dir_bh = affs_bread(sb, inode->i_ino); 75 dir_bh = affs_bread(sb, inode->i_ino);
74 if (!dir_bh) 76 if (!dir_bh)
75 goto readdir_out; 77 goto out_unlock_dir;
76 78
77 /* If the directory hasn't changed since the last call to readdir(), 79 /* If the directory hasn't changed since the last call to readdir(),
78 * we can jump directly to where we left off. 80 * we can jump directly to where we left off.
@@ -88,7 +90,8 @@ affs_readdir(struct file *file, struct dir_context *ctx)
88 fh_bh = affs_bread(sb, ino); 90 fh_bh = affs_bread(sb, ino);
89 if (!fh_bh) { 91 if (!fh_bh) {
90 affs_error(sb, "readdir","Cannot read block %d", i); 92 affs_error(sb, "readdir","Cannot read block %d", i);
91 return -EIO; 93 error = -EIO;
94 goto out_brelse_dir;
92 } 95 }
93 ino = be32_to_cpu(AFFS_TAIL(sb, fh_bh)->hash_chain); 96 ino = be32_to_cpu(AFFS_TAIL(sb, fh_bh)->hash_chain);
94 affs_brelse(fh_bh); 97 affs_brelse(fh_bh);
@@ -107,29 +110,34 @@ inside:
107 do { 110 do {
108 fh_bh = affs_bread(sb, ino); 111 fh_bh = affs_bread(sb, ino);
109 if (!fh_bh) { 112 if (!fh_bh) {
110 affs_error(sb, "readdir","Cannot read block %d", ino); 113 affs_error(sb, "readdir",
114 "Cannot read block %d", ino);
111 break; 115 break;
112 } 116 }
113 117
114 namelen = min(AFFS_TAIL(sb, fh_bh)->name[0], (u8)30); 118 namelen = min(AFFS_TAIL(sb, fh_bh)->name[0], (u8)30);
115 name = AFFS_TAIL(sb, fh_bh)->name + 1; 119 name = AFFS_TAIL(sb, fh_bh)->name + 1;
116 pr_debug("AFFS: readdir(): filldir(\"%.*s\", ino=%u), hash=%d, f_pos=%x\n", 120 pr_debug("AFFS: readdir(): dir_emit(\"%.*s\", "
121 "ino=%u), hash=%d, f_pos=%x\n",
117 namelen, name, ino, hash_pos, (u32)ctx->pos); 122 namelen, name, ino, hash_pos, (u32)ctx->pos);
123
118 if (!dir_emit(ctx, name, namelen, ino, DT_UNKNOWN)) 124 if (!dir_emit(ctx, name, namelen, ino, DT_UNKNOWN))
119 goto readdir_done; 125 goto done;
120 ctx->pos++; 126 ctx->pos++;
121 ino = be32_to_cpu(AFFS_TAIL(sb, fh_bh)->hash_chain); 127 ino = be32_to_cpu(AFFS_TAIL(sb, fh_bh)->hash_chain);
122 affs_brelse(fh_bh); 128 affs_brelse(fh_bh);
123 fh_bh = NULL; 129 fh_bh = NULL;
124 } while (ino); 130 } while (ino);
125 } 131 }
126readdir_done: 132done:
127 file->f_version = inode->i_version; 133 file->f_version = inode->i_version;
128 file->private_data = (void *)(long)ino; 134 file->private_data = (void *)(long)ino;
135 affs_brelse(fh_bh);
129 136
130readdir_out: 137out_brelse_dir:
131 affs_brelse(dir_bh); 138 affs_brelse(dir_bh);
132 affs_brelse(fh_bh); 139
140out_unlock_dir:
133 affs_unlock_dir(inode); 141 affs_unlock_dir(inode);
134 return 0; 142 return error;
135} 143}
diff --git a/fs/affs/namei.c b/fs/affs/namei.c
index c36cbb4537a2..6dae1ccd176d 100644
--- a/fs/affs/namei.c
+++ b/fs/affs/namei.c
@@ -60,13 +60,13 @@ affs_get_toupper(struct super_block *sb)
60 * Note: the dentry argument is the parent dentry. 60 * Note: the dentry argument is the parent dentry.
61 */ 61 */
62static inline int 62static inline int
63__affs_hash_dentry(struct qstr *qstr, toupper_t toupper) 63__affs_hash_dentry(struct qstr *qstr, toupper_t toupper, bool notruncate)
64{ 64{
65 const u8 *name = qstr->name; 65 const u8 *name = qstr->name;
66 unsigned long hash; 66 unsigned long hash;
67 int i; 67 int i;
68 68
69 i = affs_check_name(qstr->name, qstr->len); 69 i = affs_check_name(qstr->name, qstr->len, notruncate);
70 if (i) 70 if (i)
71 return i; 71 return i;
72 72
@@ -82,16 +82,22 @@ __affs_hash_dentry(struct qstr *qstr, toupper_t toupper)
82static int 82static int
83affs_hash_dentry(const struct dentry *dentry, struct qstr *qstr) 83affs_hash_dentry(const struct dentry *dentry, struct qstr *qstr)
84{ 84{
85 return __affs_hash_dentry(qstr, affs_toupper); 85 return __affs_hash_dentry(qstr, affs_toupper,
86 affs_nofilenametruncate(dentry));
87
86} 88}
89
87static int 90static int
88affs_intl_hash_dentry(const struct dentry *dentry, struct qstr *qstr) 91affs_intl_hash_dentry(const struct dentry *dentry, struct qstr *qstr)
89{ 92{
90 return __affs_hash_dentry(qstr, affs_intl_toupper); 93 return __affs_hash_dentry(qstr, affs_intl_toupper,
94 affs_nofilenametruncate(dentry));
95
91} 96}
92 97
93static inline int __affs_compare_dentry(unsigned int len, 98static inline int __affs_compare_dentry(unsigned int len,
94 const char *str, const struct qstr *name, toupper_t toupper) 99 const char *str, const struct qstr *name, toupper_t toupper,
100 bool notruncate)
95{ 101{
96 const u8 *aname = str; 102 const u8 *aname = str;
97 const u8 *bname = name->name; 103 const u8 *bname = name->name;
@@ -101,7 +107,7 @@ static inline int __affs_compare_dentry(unsigned int len,
101 * must be valid. 'name' must be validated first. 107 * must be valid. 'name' must be validated first.
102 */ 108 */
103 109
104 if (affs_check_name(name->name, name->len)) 110 if (affs_check_name(name->name, name->len, notruncate))
105 return 1; 111 return 1;
106 112
107 /* 113 /*
@@ -126,13 +132,18 @@ static int
126affs_compare_dentry(const struct dentry *parent, const struct dentry *dentry, 132affs_compare_dentry(const struct dentry *parent, const struct dentry *dentry,
127 unsigned int len, const char *str, const struct qstr *name) 133 unsigned int len, const char *str, const struct qstr *name)
128{ 134{
129 return __affs_compare_dentry(len, str, name, affs_toupper); 135
136 return __affs_compare_dentry(len, str, name, affs_toupper,
137 affs_nofilenametruncate(parent));
130} 138}
139
131static int 140static int
132affs_intl_compare_dentry(const struct dentry *parent, const struct dentry *dentry, 141affs_intl_compare_dentry(const struct dentry *parent, const struct dentry *dentry,
133 unsigned int len, const char *str, const struct qstr *name) 142 unsigned int len, const char *str, const struct qstr *name)
134{ 143{
135 return __affs_compare_dentry(len, str, name, affs_intl_toupper); 144 return __affs_compare_dentry(len, str, name, affs_intl_toupper,
145 affs_nofilenametruncate(parent));
146
136} 147}
137 148
138/* 149/*
@@ -411,7 +422,10 @@ affs_rename(struct inode *old_dir, struct dentry *old_dentry,
411 (u32)old_dir->i_ino, (int)old_dentry->d_name.len, old_dentry->d_name.name, 422 (u32)old_dir->i_ino, (int)old_dentry->d_name.len, old_dentry->d_name.name,
412 (u32)new_dir->i_ino, (int)new_dentry->d_name.len, new_dentry->d_name.name); 423 (u32)new_dir->i_ino, (int)new_dentry->d_name.len, new_dentry->d_name.name);
413 424
414 retval = affs_check_name(new_dentry->d_name.name,new_dentry->d_name.len); 425 retval = affs_check_name(new_dentry->d_name.name,
426 new_dentry->d_name.len,
427 affs_nofilenametruncate(old_dentry));
428
415 if (retval) 429 if (retval)
416 return retval; 430 return retval;
417 431
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 307453086c3f..6d589f28bf9b 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -128,7 +128,7 @@ static void init_once(void *foo)
128 inode_init_once(&ei->vfs_inode); 128 inode_init_once(&ei->vfs_inode);
129} 129}
130 130
131static int init_inodecache(void) 131static int __init init_inodecache(void)
132{ 132{
133 affs_inode_cachep = kmem_cache_create("affs_inode_cache", 133 affs_inode_cachep = kmem_cache_create("affs_inode_cache",
134 sizeof(struct affs_inode_info), 134 sizeof(struct affs_inode_info),
@@ -163,7 +163,7 @@ static const struct super_operations affs_sops = {
163}; 163};
164 164
165enum { 165enum {
166 Opt_bs, Opt_mode, Opt_mufs, Opt_prefix, Opt_protect, 166 Opt_bs, Opt_mode, Opt_mufs, Opt_notruncate, Opt_prefix, Opt_protect,
167 Opt_reserved, Opt_root, Opt_setgid, Opt_setuid, 167 Opt_reserved, Opt_root, Opt_setgid, Opt_setuid,
168 Opt_verbose, Opt_volume, Opt_ignore, Opt_err, 168 Opt_verbose, Opt_volume, Opt_ignore, Opt_err,
169}; 169};
@@ -172,6 +172,7 @@ static const match_table_t tokens = {
172 {Opt_bs, "bs=%u"}, 172 {Opt_bs, "bs=%u"},
173 {Opt_mode, "mode=%o"}, 173 {Opt_mode, "mode=%o"},
174 {Opt_mufs, "mufs"}, 174 {Opt_mufs, "mufs"},
175 {Opt_notruncate, "nofilenametruncate"},
175 {Opt_prefix, "prefix=%s"}, 176 {Opt_prefix, "prefix=%s"},
176 {Opt_protect, "protect"}, 177 {Opt_protect, "protect"},
177 {Opt_reserved, "reserved=%u"}, 178 {Opt_reserved, "reserved=%u"},
@@ -233,6 +234,9 @@ parse_options(char *options, kuid_t *uid, kgid_t *gid, int *mode, int *reserved,
233 case Opt_mufs: 234 case Opt_mufs:
234 *mount_opts |= SF_MUFS; 235 *mount_opts |= SF_MUFS;
235 break; 236 break;
237 case Opt_notruncate:
238 *mount_opts |= SF_NO_TRUNCATE;
239 break;
236 case Opt_prefix: 240 case Opt_prefix:
237 *prefix = match_strdup(&args[0]); 241 *prefix = match_strdup(&args[0]);
238 if (!*prefix) 242 if (!*prefix)
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 29aa5cf6639b..7041ac35ace8 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -266,7 +266,7 @@ static void init_once(void *foo)
266 inode_init_once(&bi->vfs_inode); 266 inode_init_once(&bi->vfs_inode);
267} 267}
268 268
269static int init_inodecache(void) 269static int __init init_inodecache(void)
270{ 270{
271 bfs_inode_cachep = kmem_cache_create("bfs_inode_cache", 271 bfs_inode_cachep = kmem_cache_create("bfs_inode_cache",
272 sizeof(struct bfs_inode_info), 272 sizeof(struct bfs_inode_info),
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 0f59799fa105..aa3cb626671e 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -584,7 +584,6 @@ static int load_elf_binary(struct linux_binprm *bprm)
584 unsigned long start_code, end_code, start_data, end_data; 584 unsigned long start_code, end_code, start_data, end_data;
585 unsigned long reloc_func_desc __maybe_unused = 0; 585 unsigned long reloc_func_desc __maybe_unused = 0;
586 int executable_stack = EXSTACK_DEFAULT; 586 int executable_stack = EXSTACK_DEFAULT;
587 unsigned long def_flags = 0;
588 struct pt_regs *regs = current_pt_regs(); 587 struct pt_regs *regs = current_pt_regs();
589 struct { 588 struct {
590 struct elfhdr elf_ex; 589 struct elfhdr elf_ex;
@@ -724,9 +723,6 @@ static int load_elf_binary(struct linux_binprm *bprm)
724 if (retval) 723 if (retval)
725 goto out_free_dentry; 724 goto out_free_dentry;
726 725
727 /* OK, This is the point of no return */
728 current->mm->def_flags = def_flags;
729
730 /* Do this immediately, since STACK_TOP as used in setup_arg_pages 726 /* Do this immediately, since STACK_TOP as used in setup_arg_pages
731 may depend on the personality. */ 727 may depend on the personality. */
732 SET_PERSONALITY(loc->elf_ex); 728 SET_PERSONALITY(loc->elf_ex);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index e1ffb1e22898..c660527af838 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -2025,6 +2025,7 @@ out:
2025 2025
2026static const struct vm_operations_struct btrfs_file_vm_ops = { 2026static const struct vm_operations_struct btrfs_file_vm_ops = {
2027 .fault = filemap_fault, 2027 .fault = filemap_fault,
2028 .map_pages = filemap_map_pages,
2028 .page_mkwrite = btrfs_page_mkwrite, 2029 .page_mkwrite = btrfs_page_mkwrite,
2029 .remap_pages = generic_file_remap_pages, 2030 .remap_pages = generic_file_remap_pages,
2030}; 2031};
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 834fce759d80..216d7e99f921 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -3113,6 +3113,7 @@ cifs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
3113 3113
3114static struct vm_operations_struct cifs_file_vm_ops = { 3114static struct vm_operations_struct cifs_file_vm_ops = {
3115 .fault = filemap_fault, 3115 .fault = filemap_fault,
3116 .map_pages = filemap_map_pages,
3116 .page_mkwrite = cifs_page_mkwrite, 3117 .page_mkwrite = cifs_page_mkwrite,
3117 .remap_pages = generic_file_remap_pages, 3118 .remap_pages = generic_file_remap_pages,
3118}; 3119};
diff --git a/fs/exec.c b/fs/exec.c
index 25dfeba6d55f..9e81c630dfa7 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -26,6 +26,7 @@
26#include <linux/file.h> 26#include <linux/file.h>
27#include <linux/fdtable.h> 27#include <linux/fdtable.h>
28#include <linux/mm.h> 28#include <linux/mm.h>
29#include <linux/vmacache.h>
29#include <linux/stat.h> 30#include <linux/stat.h>
30#include <linux/fcntl.h> 31#include <linux/fcntl.h>
31#include <linux/swap.h> 32#include <linux/swap.h>
@@ -822,7 +823,7 @@ EXPORT_SYMBOL(read_code);
822static int exec_mmap(struct mm_struct *mm) 823static int exec_mmap(struct mm_struct *mm)
823{ 824{
824 struct task_struct *tsk; 825 struct task_struct *tsk;
825 struct mm_struct * old_mm, *active_mm; 826 struct mm_struct *old_mm, *active_mm;
826 827
827 /* Notify parent that we're no longer interested in the old VM */ 828 /* Notify parent that we're no longer interested in the old VM */
828 tsk = current; 829 tsk = current;
@@ -848,6 +849,8 @@ static int exec_mmap(struct mm_struct *mm)
848 tsk->mm = mm; 849 tsk->mm = mm;
849 tsk->active_mm = mm; 850 tsk->active_mm = mm;
850 activate_mm(active_mm, mm); 851 activate_mm(active_mm, mm);
852 tsk->mm->vmacache_seqnum = 0;
853 vmacache_flush(tsk);
851 task_unlock(tsk); 854 task_unlock(tsk);
852 if (old_mm) { 855 if (old_mm) {
853 up_read(&old_mm->mmap_sem); 856 up_read(&old_mm->mmap_sem);
@@ -1043,7 +1046,7 @@ EXPORT_SYMBOL_GPL(get_task_comm);
1043 * so that a new one can be started 1046 * so that a new one can be started
1044 */ 1047 */
1045 1048
1046void set_task_comm(struct task_struct *tsk, char *buf) 1049void set_task_comm(struct task_struct *tsk, const char *buf)
1047{ 1050{
1048 task_lock(tsk); 1051 task_lock(tsk);
1049 trace_task_rename(tsk, buf); 1052 trace_task_rename(tsk, buf);
@@ -1052,21 +1055,6 @@ void set_task_comm(struct task_struct *tsk, char *buf)
1052 perf_event_comm(tsk); 1055 perf_event_comm(tsk);
1053} 1056}
1054 1057
1055static void filename_to_taskname(char *tcomm, const char *fn, unsigned int len)
1056{
1057 int i, ch;
1058
1059 /* Copies the binary name from after last slash */
1060 for (i = 0; (ch = *(fn++)) != '\0';) {
1061 if (ch == '/')
1062 i = 0; /* overwrite what we wrote */
1063 else
1064 if (i < len - 1)
1065 tcomm[i++] = ch;
1066 }
1067 tcomm[i] = '\0';
1068}
1069
1070int flush_old_exec(struct linux_binprm * bprm) 1058int flush_old_exec(struct linux_binprm * bprm)
1071{ 1059{
1072 int retval; 1060 int retval;
@@ -1080,8 +1068,6 @@ int flush_old_exec(struct linux_binprm * bprm)
1080 goto out; 1068 goto out;
1081 1069
1082 set_mm_exe_file(bprm->mm, bprm->file); 1070 set_mm_exe_file(bprm->mm, bprm->file);
1083
1084 filename_to_taskname(bprm->tcomm, bprm->filename, sizeof(bprm->tcomm));
1085 /* 1071 /*
1086 * Release all of the old mmap stuff 1072 * Release all of the old mmap stuff
1087 */ 1073 */
@@ -1124,7 +1110,7 @@ void setup_new_exec(struct linux_binprm * bprm)
1124 else 1110 else
1125 set_dumpable(current->mm, suid_dumpable); 1111 set_dumpable(current->mm, suid_dumpable);
1126 1112
1127 set_task_comm(current, bprm->tcomm); 1113 set_task_comm(current, kbasename(bprm->filename));
1128 1114
1129 /* Set the new mm task size. We have to do that late because it may 1115 /* Set the new mm task size. We have to do that late because it may
1130 * depend on TIF_32BIT which is only updated in flush_thread() on 1116 * depend on TIF_32BIT which is only updated in flush_thread() on
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 6db7f7db7777..4e508fc83dcf 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -200,6 +200,7 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
200 200
201static const struct vm_operations_struct ext4_file_vm_ops = { 201static const struct vm_operations_struct ext4_file_vm_ops = {
202 .fault = filemap_fault, 202 .fault = filemap_fault,
203 .map_pages = filemap_map_pages,
203 .page_mkwrite = ext4_page_mkwrite, 204 .page_mkwrite = ext4_page_mkwrite,
204 .remap_pages = generic_file_remap_pages, 205 .remap_pages = generic_file_remap_pages,
205}; 206};
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 302d552afea5..60e7d5448a1d 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -84,6 +84,7 @@ out:
84 84
85static const struct vm_operations_struct f2fs_file_vm_ops = { 85static const struct vm_operations_struct f2fs_file_vm_ops = {
86 .fault = filemap_fault, 86 .fault = filemap_fault,
87 .map_pages = filemap_map_pages,
87 .page_mkwrite = f2fs_vm_page_mkwrite, 88 .page_mkwrite = f2fs_vm_page_mkwrite,
88 .remap_pages = generic_file_remap_pages, 89 .remap_pages = generic_file_remap_pages,
89}; 90};
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 65df7d8be4f5..48992cac714b 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -2117,6 +2117,7 @@ static int fuse_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
2117static const struct vm_operations_struct fuse_file_vm_ops = { 2117static const struct vm_operations_struct fuse_file_vm_ops = {
2118 .close = fuse_vma_close, 2118 .close = fuse_vma_close,
2119 .fault = filemap_fault, 2119 .fault = filemap_fault,
2120 .map_pages = filemap_map_pages,
2120 .page_mkwrite = fuse_page_mkwrite, 2121 .page_mkwrite = fuse_page_mkwrite,
2121 .remap_pages = generic_file_remap_pages, 2122 .remap_pages = generic_file_remap_pages,
2122}; 2123};
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 6c794085abac..80d67253623c 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -494,6 +494,7 @@ out:
494 494
495static const struct vm_operations_struct gfs2_vm_ops = { 495static const struct vm_operations_struct gfs2_vm_ops = {
496 .fault = filemap_fault, 496 .fault = filemap_fault,
497 .map_pages = filemap_map_pages,
497 .page_mkwrite = gfs2_page_mkwrite, 498 .page_mkwrite = gfs2_page_mkwrite,
498 .remap_pages = generic_file_remap_pages, 499 .remap_pages = generic_file_remap_pages,
499}; 500};
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 5bb790a69c71..284ca901fe16 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -617,6 +617,7 @@ out:
617 617
618static const struct vm_operations_struct nfs_file_vm_ops = { 618static const struct vm_operations_struct nfs_file_vm_ops = {
619 .fault = filemap_fault, 619 .fault = filemap_fault,
620 .map_pages = filemap_map_pages,
620 .page_mkwrite = nfs_vm_page_mkwrite, 621 .page_mkwrite = nfs_vm_page_mkwrite,
621 .remap_pages = generic_file_remap_pages, 622 .remap_pages = generic_file_remap_pages,
622}; 623};
diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c
index 08fdb77852ac..f3a82fbcae02 100644
--- a/fs/nilfs2/file.c
+++ b/fs/nilfs2/file.c
@@ -134,6 +134,7 @@ static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
134 134
135static const struct vm_operations_struct nilfs_file_vm_ops = { 135static const struct vm_operations_struct nilfs_file_vm_ops = {
136 .fault = filemap_fault, 136 .fault = filemap_fault,
137 .map_pages = filemap_map_pages,
137 .page_mkwrite = nilfs_page_mkwrite, 138 .page_mkwrite = nilfs_page_mkwrite,
138 .remap_pages = generic_file_remap_pages, 139 .remap_pages = generic_file_remap_pages,
139}; 140};
diff --git a/fs/ntfs/debug.c b/fs/ntfs/debug.c
index 807150e2c2b9..dd6103cc93c1 100644
--- a/fs/ntfs/debug.c
+++ b/fs/ntfs/debug.c
@@ -18,16 +18,9 @@
18 * distribution in the file COPYING); if not, write to the Free Software 18 * distribution in the file COPYING); if not, write to the Free Software
19 * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 19 * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 */ 20 */
21 21#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
22#include "debug.h" 22#include "debug.h"
23 23
24/*
25 * A static buffer to hold the error string being displayed and a spinlock
26 * to protect concurrent accesses to it.
27 */
28static char err_buf[1024];
29static DEFINE_SPINLOCK(err_buf_lock);
30
31/** 24/**
32 * __ntfs_warning - output a warning to the syslog 25 * __ntfs_warning - output a warning to the syslog
33 * @function: name of function outputting the warning 26 * @function: name of function outputting the warning
@@ -50,6 +43,7 @@ static DEFINE_SPINLOCK(err_buf_lock);
50void __ntfs_warning(const char *function, const struct super_block *sb, 43void __ntfs_warning(const char *function, const struct super_block *sb,
51 const char *fmt, ...) 44 const char *fmt, ...)
52{ 45{
46 struct va_format vaf;
53 va_list args; 47 va_list args;
54 int flen = 0; 48 int flen = 0;
55 49
@@ -59,17 +53,15 @@ void __ntfs_warning(const char *function, const struct super_block *sb,
59#endif 53#endif
60 if (function) 54 if (function)
61 flen = strlen(function); 55 flen = strlen(function);
62 spin_lock(&err_buf_lock);
63 va_start(args, fmt); 56 va_start(args, fmt);
64 vsnprintf(err_buf, sizeof(err_buf), fmt, args); 57 vaf.fmt = fmt;
65 va_end(args); 58 vaf.va = &args;
66 if (sb) 59 if (sb)
67 printk(KERN_ERR "NTFS-fs warning (device %s): %s(): %s\n", 60 pr_warn("(device %s): %s(): %pV\n",
68 sb->s_id, flen ? function : "", err_buf); 61 sb->s_id, flen ? function : "", &vaf);
69 else 62 else
70 printk(KERN_ERR "NTFS-fs warning: %s(): %s\n", 63 pr_warn("%s(): %pV\n", flen ? function : "", &vaf);
71 flen ? function : "", err_buf); 64 va_end(args);
72 spin_unlock(&err_buf_lock);
73} 65}
74 66
75/** 67/**
@@ -94,6 +86,7 @@ void __ntfs_warning(const char *function, const struct super_block *sb,
94void __ntfs_error(const char *function, const struct super_block *sb, 86void __ntfs_error(const char *function, const struct super_block *sb,
95 const char *fmt, ...) 87 const char *fmt, ...)
96{ 88{
89 struct va_format vaf;
97 va_list args; 90 va_list args;
98 int flen = 0; 91 int flen = 0;
99 92
@@ -103,17 +96,15 @@ void __ntfs_error(const char *function, const struct super_block *sb,
103#endif 96#endif
104 if (function) 97 if (function)
105 flen = strlen(function); 98 flen = strlen(function);
106 spin_lock(&err_buf_lock);
107 va_start(args, fmt); 99 va_start(args, fmt);
108 vsnprintf(err_buf, sizeof(err_buf), fmt, args); 100 vaf.fmt = fmt;
109 va_end(args); 101 vaf.va = &args;
110 if (sb) 102 if (sb)
111 printk(KERN_ERR "NTFS-fs error (device %s): %s(): %s\n", 103 pr_err("(device %s): %s(): %pV\n",
112 sb->s_id, flen ? function : "", err_buf); 104 sb->s_id, flen ? function : "", &vaf);
113 else 105 else
114 printk(KERN_ERR "NTFS-fs error: %s(): %s\n", 106 pr_err("%s(): %pV\n", flen ? function : "", &vaf);
115 flen ? function : "", err_buf); 107 va_end(args);
116 spin_unlock(&err_buf_lock);
117} 108}
118 109
119#ifdef DEBUG 110#ifdef DEBUG
@@ -124,6 +115,7 @@ int debug_msgs = 0;
124void __ntfs_debug (const char *file, int line, const char *function, 115void __ntfs_debug (const char *file, int line, const char *function,
125 const char *fmt, ...) 116 const char *fmt, ...)
126{ 117{
118 struct va_format vaf;
127 va_list args; 119 va_list args;
128 int flen = 0; 120 int flen = 0;
129 121
@@ -131,13 +123,11 @@ void __ntfs_debug (const char *file, int line, const char *function,
131 return; 123 return;
132 if (function) 124 if (function)
133 flen = strlen(function); 125 flen = strlen(function);
134 spin_lock(&err_buf_lock);
135 va_start(args, fmt); 126 va_start(args, fmt);
136 vsnprintf(err_buf, sizeof(err_buf), fmt, args); 127 vaf.fmt = fmt;
128 vaf.va = &args;
129 pr_debug("(%s, %d): %s(): %pV", file, line, flen ? function : "", &vaf);
137 va_end(args); 130 va_end(args);
138 printk(KERN_DEBUG "NTFS-fs DEBUG (%s, %d): %s(): %s\n", file, line,
139 flen ? function : "", err_buf);
140 spin_unlock(&err_buf_lock);
141} 131}
142 132
143/* Dump a runlist. Caller has to provide synchronisation for @rl. */ 133/* Dump a runlist. Caller has to provide synchronisation for @rl. */
@@ -149,12 +139,12 @@ void ntfs_debug_dump_runlist(const runlist_element *rl)
149 139
150 if (!debug_msgs) 140 if (!debug_msgs)
151 return; 141 return;
152 printk(KERN_DEBUG "NTFS-fs DEBUG: Dumping runlist (values in hex):\n"); 142 pr_debug("Dumping runlist (values in hex):\n");
153 if (!rl) { 143 if (!rl) {
154 printk(KERN_DEBUG "Run list not present.\n"); 144 pr_debug("Run list not present.\n");
155 return; 145 return;
156 } 146 }
157 printk(KERN_DEBUG "VCN LCN Run length\n"); 147 pr_debug("VCN LCN Run length\n");
158 for (i = 0; ; i++) { 148 for (i = 0; ; i++) {
159 LCN lcn = (rl + i)->lcn; 149 LCN lcn = (rl + i)->lcn;
160 150
@@ -163,13 +153,13 @@ void ntfs_debug_dump_runlist(const runlist_element *rl)
163 153
164 if (index > -LCN_ENOENT - 1) 154 if (index > -LCN_ENOENT - 1)
165 index = 3; 155 index = 3;
166 printk(KERN_DEBUG "%-16Lx %s %-16Lx%s\n", 156 pr_debug("%-16Lx %s %-16Lx%s\n",
167 (long long)(rl + i)->vcn, lcn_str[index], 157 (long long)(rl + i)->vcn, lcn_str[index],
168 (long long)(rl + i)->length, 158 (long long)(rl + i)->length,
169 (rl + i)->length ? "" : 159 (rl + i)->length ? "" :
170 " (runlist end)"); 160 " (runlist end)");
171 } else 161 } else
172 printk(KERN_DEBUG "%-16Lx %-16Lx %-16Lx%s\n", 162 pr_debug("%-16Lx %-16Lx %-16Lx%s\n",
173 (long long)(rl + i)->vcn, 163 (long long)(rl + i)->vcn,
174 (long long)(rl + i)->lcn, 164 (long long)(rl + i)->lcn,
175 (long long)(rl + i)->length, 165 (long long)(rl + i)->length,
diff --git a/fs/ntfs/debug.h b/fs/ntfs/debug.h
index 53c27eaf2307..61bf091e32a8 100644
--- a/fs/ntfs/debug.h
+++ b/fs/ntfs/debug.h
@@ -48,7 +48,12 @@ extern void ntfs_debug_dump_runlist(const runlist_element *rl);
48 48
49#else /* !DEBUG */ 49#else /* !DEBUG */
50 50
51#define ntfs_debug(f, a...) do {} while (0) 51#define ntfs_debug(fmt, ...) \
52do { \
53 if (0) \
54 no_printk(fmt, ##__VA_ARGS__); \
55} while (0)
56
52#define ntfs_debug_dump_runlist(rl) do {} while (0) 57#define ntfs_debug_dump_runlist(rl) do {} while (0)
53 58
54#endif /* !DEBUG */ 59#endif /* !DEBUG */
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index bd5610d48242..9de2491f2926 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -19,6 +19,7 @@
19 * distribution in the file COPYING); if not, write to the Free Software 19 * distribution in the file COPYING); if not, write to the Free Software
20 * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 20 * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 */ 21 */
22#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
22 23
23#include <linux/stddef.h> 24#include <linux/stddef.h>
24#include <linux/init.h> 25#include <linux/init.h>
@@ -1896,7 +1897,7 @@ get_ctx_vol_failed:
1896 vol->minor_ver = vi->minor_ver; 1897 vol->minor_ver = vi->minor_ver;
1897 ntfs_attr_put_search_ctx(ctx); 1898 ntfs_attr_put_search_ctx(ctx);
1898 unmap_mft_record(NTFS_I(vol->vol_ino)); 1899 unmap_mft_record(NTFS_I(vol->vol_ino));
1899 printk(KERN_INFO "NTFS volume version %i.%i.\n", vol->major_ver, 1900 pr_info("volume version %i.%i.\n", vol->major_ver,
1900 vol->minor_ver); 1901 vol->minor_ver);
1901 if (vol->major_ver < 3 && NVolSparseEnabled(vol)) { 1902 if (vol->major_ver < 3 && NVolSparseEnabled(vol)) {
1902 ntfs_warning(vol->sb, "Disabling sparse support due to NTFS " 1903 ntfs_warning(vol->sb, "Disabling sparse support due to NTFS "
@@ -3095,7 +3096,7 @@ static int __init init_ntfs_fs(void)
3095 int err = 0; 3096 int err = 0;
3096 3097
3097 /* This may be ugly but it results in pretty output so who cares. (-8 */ 3098 /* This may be ugly but it results in pretty output so who cares. (-8 */
3098 printk(KERN_INFO "NTFS driver " NTFS_VERSION " [Flags: R/" 3099 pr_info("driver " NTFS_VERSION " [Flags: R/"
3099#ifdef NTFS_RW 3100#ifdef NTFS_RW
3100 "W" 3101 "W"
3101#else 3102#else
@@ -3115,16 +3116,15 @@ static int __init init_ntfs_fs(void)
3115 sizeof(ntfs_index_context), 0 /* offset */, 3116 sizeof(ntfs_index_context), 0 /* offset */,
3116 SLAB_HWCACHE_ALIGN, NULL /* ctor */); 3117 SLAB_HWCACHE_ALIGN, NULL /* ctor */);
3117 if (!ntfs_index_ctx_cache) { 3118 if (!ntfs_index_ctx_cache) {
3118 printk(KERN_CRIT "NTFS: Failed to create %s!\n", 3119 pr_crit("Failed to create %s!\n", ntfs_index_ctx_cache_name);
3119 ntfs_index_ctx_cache_name);
3120 goto ictx_err_out; 3120 goto ictx_err_out;
3121 } 3121 }
3122 ntfs_attr_ctx_cache = kmem_cache_create(ntfs_attr_ctx_cache_name, 3122 ntfs_attr_ctx_cache = kmem_cache_create(ntfs_attr_ctx_cache_name,
3123 sizeof(ntfs_attr_search_ctx), 0 /* offset */, 3123 sizeof(ntfs_attr_search_ctx), 0 /* offset */,
3124 SLAB_HWCACHE_ALIGN, NULL /* ctor */); 3124 SLAB_HWCACHE_ALIGN, NULL /* ctor */);
3125 if (!ntfs_attr_ctx_cache) { 3125 if (!ntfs_attr_ctx_cache) {
3126 printk(KERN_CRIT "NTFS: Failed to create %s!\n", 3126 pr_crit("NTFS: Failed to create %s!\n",
3127 ntfs_attr_ctx_cache_name); 3127 ntfs_attr_ctx_cache_name);
3128 goto actx_err_out; 3128 goto actx_err_out;
3129 } 3129 }
3130 3130
@@ -3132,8 +3132,7 @@ static int __init init_ntfs_fs(void)
3132 (NTFS_MAX_NAME_LEN+1) * sizeof(ntfschar), 0, 3132 (NTFS_MAX_NAME_LEN+1) * sizeof(ntfschar), 0,
3133 SLAB_HWCACHE_ALIGN, NULL); 3133 SLAB_HWCACHE_ALIGN, NULL);
3134 if (!ntfs_name_cache) { 3134 if (!ntfs_name_cache) {
3135 printk(KERN_CRIT "NTFS: Failed to create %s!\n", 3135 pr_crit("Failed to create %s!\n", ntfs_name_cache_name);
3136 ntfs_name_cache_name);
3137 goto name_err_out; 3136 goto name_err_out;
3138 } 3137 }
3139 3138
@@ -3141,8 +3140,7 @@ static int __init init_ntfs_fs(void)
3141 sizeof(ntfs_inode), 0, 3140 sizeof(ntfs_inode), 0,
3142 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL); 3141 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL);
3143 if (!ntfs_inode_cache) { 3142 if (!ntfs_inode_cache) {
3144 printk(KERN_CRIT "NTFS: Failed to create %s!\n", 3143 pr_crit("Failed to create %s!\n", ntfs_inode_cache_name);
3145 ntfs_inode_cache_name);
3146 goto inode_err_out; 3144 goto inode_err_out;
3147 } 3145 }
3148 3146
@@ -3151,15 +3149,14 @@ static int __init init_ntfs_fs(void)
3151 SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, 3149 SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
3152 ntfs_big_inode_init_once); 3150 ntfs_big_inode_init_once);
3153 if (!ntfs_big_inode_cache) { 3151 if (!ntfs_big_inode_cache) {
3154 printk(KERN_CRIT "NTFS: Failed to create %s!\n", 3152 pr_crit("Failed to create %s!\n", ntfs_big_inode_cache_name);
3155 ntfs_big_inode_cache_name);
3156 goto big_inode_err_out; 3153 goto big_inode_err_out;
3157 } 3154 }
3158 3155
3159 /* Register the ntfs sysctls. */ 3156 /* Register the ntfs sysctls. */
3160 err = ntfs_sysctl(1); 3157 err = ntfs_sysctl(1);
3161 if (err) { 3158 if (err) {
3162 printk(KERN_CRIT "NTFS: Failed to register NTFS sysctls!\n"); 3159 pr_crit("Failed to register NTFS sysctls!\n");
3163 goto sysctl_err_out; 3160 goto sysctl_err_out;
3164 } 3161 }
3165 3162
@@ -3168,7 +3165,7 @@ static int __init init_ntfs_fs(void)
3168 ntfs_debug("NTFS driver registered successfully."); 3165 ntfs_debug("NTFS driver registered successfully.");
3169 return 0; /* Success! */ 3166 return 0; /* Success! */
3170 } 3167 }
3171 printk(KERN_CRIT "NTFS: Failed to register NTFS filesystem driver!\n"); 3168 pr_crit("Failed to register NTFS filesystem driver!\n");
3172 3169
3173 /* Unregister the ntfs sysctls. */ 3170 /* Unregister the ntfs sysctls. */
3174 ntfs_sysctl(0); 3171 ntfs_sysctl(0);
@@ -3184,8 +3181,7 @@ actx_err_out:
3184 kmem_cache_destroy(ntfs_index_ctx_cache); 3181 kmem_cache_destroy(ntfs_index_ctx_cache);
3185ictx_err_out: 3182ictx_err_out:
3186 if (!err) { 3183 if (!err) {
3187 printk(KERN_CRIT "NTFS: Aborting NTFS filesystem driver " 3184 pr_crit("Aborting NTFS filesystem driver registration...\n");
3188 "registration...\n");
3189 err = -ENOMEM; 3185 err = -ENOMEM;
3190 } 3186 }
3191 return err; 3187 return err;
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 656e401794de..64db2bceac59 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -138,8 +138,8 @@ static const char * const task_state_array[] = {
138 "D (disk sleep)", /* 2 */ 138 "D (disk sleep)", /* 2 */
139 "T (stopped)", /* 4 */ 139 "T (stopped)", /* 4 */
140 "t (tracing stop)", /* 8 */ 140 "t (tracing stop)", /* 8 */
141 "Z (zombie)", /* 16 */ 141 "X (dead)", /* 16 */
142 "X (dead)", /* 32 */ 142 "Z (zombie)", /* 32 */
143}; 143};
144 144
145static inline const char *get_task_state(struct task_struct *tsk) 145static inline const char *get_task_state(struct task_struct *tsk)
diff --git a/fs/proc/base.c b/fs/proc/base.c
index b9760628e1fd..6b7087e2e8fb 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1236,6 +1236,9 @@ static ssize_t proc_fault_inject_write(struct file * file,
1236 make_it_fail = simple_strtol(strstrip(buffer), &end, 0); 1236 make_it_fail = simple_strtol(strstrip(buffer), &end, 0);
1237 if (*end) 1237 if (*end)
1238 return -EINVAL; 1238 return -EINVAL;
1239 if (make_it_fail < 0 || make_it_fail > 1)
1240 return -EINVAL;
1241
1239 task = get_proc_task(file_inode(file)); 1242 task = get_proc_task(file_inode(file));
1240 if (!task) 1243 if (!task)
1241 return -ESRCH; 1244 return -ESRCH;
@@ -2588,7 +2591,7 @@ static const struct pid_entry tgid_base_stuff[] = {
2588 REG("environ", S_IRUSR, proc_environ_operations), 2591 REG("environ", S_IRUSR, proc_environ_operations),
2589 INF("auxv", S_IRUSR, proc_pid_auxv), 2592 INF("auxv", S_IRUSR, proc_pid_auxv),
2590 ONE("status", S_IRUGO, proc_pid_status), 2593 ONE("status", S_IRUGO, proc_pid_status),
2591 ONE("personality", S_IRUGO, proc_pid_personality), 2594 ONE("personality", S_IRUSR, proc_pid_personality),
2592 INF("limits", S_IRUGO, proc_pid_limits), 2595 INF("limits", S_IRUGO, proc_pid_limits),
2593#ifdef CONFIG_SCHED_DEBUG 2596#ifdef CONFIG_SCHED_DEBUG
2594 REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), 2597 REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations),
@@ -2598,7 +2601,7 @@ static const struct pid_entry tgid_base_stuff[] = {
2598#endif 2601#endif
2599 REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations), 2602 REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations),
2600#ifdef CONFIG_HAVE_ARCH_TRACEHOOK 2603#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
2601 INF("syscall", S_IRUGO, proc_pid_syscall), 2604 INF("syscall", S_IRUSR, proc_pid_syscall),
2602#endif 2605#endif
2603 INF("cmdline", S_IRUGO, proc_pid_cmdline), 2606 INF("cmdline", S_IRUGO, proc_pid_cmdline),
2604 ONE("stat", S_IRUGO, proc_tgid_stat), 2607 ONE("stat", S_IRUGO, proc_tgid_stat),
@@ -2617,7 +2620,7 @@ static const struct pid_entry tgid_base_stuff[] = {
2617#ifdef CONFIG_PROC_PAGE_MONITOR 2620#ifdef CONFIG_PROC_PAGE_MONITOR
2618 REG("clear_refs", S_IWUSR, proc_clear_refs_operations), 2621 REG("clear_refs", S_IWUSR, proc_clear_refs_operations),
2619 REG("smaps", S_IRUGO, proc_pid_smaps_operations), 2622 REG("smaps", S_IRUGO, proc_pid_smaps_operations),
2620 REG("pagemap", S_IRUGO, proc_pagemap_operations), 2623 REG("pagemap", S_IRUSR, proc_pagemap_operations),
2621#endif 2624#endif
2622#ifdef CONFIG_SECURITY 2625#ifdef CONFIG_SECURITY
2623 DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations), 2626 DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations),
@@ -2626,7 +2629,7 @@ static const struct pid_entry tgid_base_stuff[] = {
2626 INF("wchan", S_IRUGO, proc_pid_wchan), 2629 INF("wchan", S_IRUGO, proc_pid_wchan),
2627#endif 2630#endif
2628#ifdef CONFIG_STACKTRACE 2631#ifdef CONFIG_STACKTRACE
2629 ONE("stack", S_IRUGO, proc_pid_stack), 2632 ONE("stack", S_IRUSR, proc_pid_stack),
2630#endif 2633#endif
2631#ifdef CONFIG_SCHEDSTATS 2634#ifdef CONFIG_SCHEDSTATS
2632 INF("schedstat", S_IRUGO, proc_pid_schedstat), 2635 INF("schedstat", S_IRUGO, proc_pid_schedstat),
@@ -2927,14 +2930,14 @@ static const struct pid_entry tid_base_stuff[] = {
2927 REG("environ", S_IRUSR, proc_environ_operations), 2930 REG("environ", S_IRUSR, proc_environ_operations),
2928 INF("auxv", S_IRUSR, proc_pid_auxv), 2931 INF("auxv", S_IRUSR, proc_pid_auxv),
2929 ONE("status", S_IRUGO, proc_pid_status), 2932 ONE("status", S_IRUGO, proc_pid_status),
2930 ONE("personality", S_IRUGO, proc_pid_personality), 2933 ONE("personality", S_IRUSR, proc_pid_personality),
2931 INF("limits", S_IRUGO, proc_pid_limits), 2934 INF("limits", S_IRUGO, proc_pid_limits),
2932#ifdef CONFIG_SCHED_DEBUG 2935#ifdef CONFIG_SCHED_DEBUG
2933 REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), 2936 REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations),
2934#endif 2937#endif
2935 REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations), 2938 REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations),
2936#ifdef CONFIG_HAVE_ARCH_TRACEHOOK 2939#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
2937 INF("syscall", S_IRUGO, proc_pid_syscall), 2940 INF("syscall", S_IRUSR, proc_pid_syscall),
2938#endif 2941#endif
2939 INF("cmdline", S_IRUGO, proc_pid_cmdline), 2942 INF("cmdline", S_IRUGO, proc_pid_cmdline),
2940 ONE("stat", S_IRUGO, proc_tid_stat), 2943 ONE("stat", S_IRUGO, proc_tid_stat),
@@ -2955,7 +2958,7 @@ static const struct pid_entry tid_base_stuff[] = {
2955#ifdef CONFIG_PROC_PAGE_MONITOR 2958#ifdef CONFIG_PROC_PAGE_MONITOR
2956 REG("clear_refs", S_IWUSR, proc_clear_refs_operations), 2959 REG("clear_refs", S_IWUSR, proc_clear_refs_operations),
2957 REG("smaps", S_IRUGO, proc_tid_smaps_operations), 2960 REG("smaps", S_IRUGO, proc_tid_smaps_operations),
2958 REG("pagemap", S_IRUGO, proc_pagemap_operations), 2961 REG("pagemap", S_IRUSR, proc_pagemap_operations),
2959#endif 2962#endif
2960#ifdef CONFIG_SECURITY 2963#ifdef CONFIG_SECURITY
2961 DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations), 2964 DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations),
@@ -2964,7 +2967,7 @@ static const struct pid_entry tid_base_stuff[] = {
2964 INF("wchan", S_IRUGO, proc_pid_wchan), 2967 INF("wchan", S_IRUGO, proc_pid_wchan),
2965#endif 2968#endif
2966#ifdef CONFIG_STACKTRACE 2969#ifdef CONFIG_STACKTRACE
2967 ONE("stack", S_IRUGO, proc_pid_stack), 2970 ONE("stack", S_IRUSR, proc_pid_stack),
2968#endif 2971#endif
2969#ifdef CONFIG_SCHEDSTATS 2972#ifdef CONFIG_SCHEDSTATS
2970 INF("schedstat", S_IRUGO, proc_pid_schedstat), 2973 INF("schedstat", S_IRUGO, proc_pid_schedstat),
diff --git a/fs/proc/fd.c b/fs/proc/fd.c
index 985ea881b5bc..0788d093f5d8 100644
--- a/fs/proc/fd.c
+++ b/fs/proc/fd.c
@@ -11,6 +11,7 @@
11 11
12#include <linux/proc_fs.h> 12#include <linux/proc_fs.h>
13 13
14#include "../mount.h"
14#include "internal.h" 15#include "internal.h"
15#include "fd.h" 16#include "fd.h"
16 17
@@ -48,8 +49,9 @@ static int seq_show(struct seq_file *m, void *v)
48 } 49 }
49 50
50 if (!ret) { 51 if (!ret) {
51 seq_printf(m, "pos:\t%lli\nflags:\t0%o\n", 52 seq_printf(m, "pos:\t%lli\nflags:\t0%o\nmnt_id:\t%i\n",
52 (long long)file->f_pos, f_flags); 53 (long long)file->f_pos, f_flags,
54 real_mount(file->f_path.mnt)->mnt_id);
53 if (file->f_op->show_fdinfo) 55 if (file->f_op->show_fdinfo)
54 ret = file->f_op->show_fdinfo(m, file); 56 ret = file->f_op->show_fdinfo(m, file);
55 fput(file); 57 fput(file);
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 8f20e3404fd2..0adbc02d60e3 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -47,7 +47,7 @@ static void proc_evict_inode(struct inode *inode)
47 pde_put(de); 47 pde_put(de);
48 head = PROC_I(inode)->sysctl; 48 head = PROC_I(inode)->sysctl;
49 if (head) { 49 if (head) {
50 rcu_assign_pointer(PROC_I(inode)->sysctl, NULL); 50 RCU_INIT_POINTER(PROC_I(inode)->sysctl, NULL);
51 sysctl_head_put(head); 51 sysctl_head_put(head);
52 } 52 }
53 /* Release any associated namespace */ 53 /* Release any associated namespace */
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 136e548d9567..7445af0b1aa3 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -73,7 +73,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
73 available += pagecache; 73 available += pagecache;
74 74
75 /* 75 /*
76 * Part of the reclaimable swap consists of items that are in use, 76 * Part of the reclaimable slab consists of items that are in use,
77 * and cannot be freed. Cap this estimate at the low watermark. 77 * and cannot be freed. Cap this estimate at the low watermark.
78 */ 78 */
79 available += global_page_state(NR_SLAB_RECLAIMABLE) - 79 available += global_page_state(NR_SLAB_RECLAIMABLE) -
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index fb52b548080d..442177b1119a 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1,4 +1,5 @@
1#include <linux/mm.h> 1#include <linux/mm.h>
2#include <linux/vmacache.h>
2#include <linux/hugetlb.h> 3#include <linux/hugetlb.h>
3#include <linux/huge_mm.h> 4#include <linux/huge_mm.h>
4#include <linux/mount.h> 5#include <linux/mount.h>
@@ -152,7 +153,7 @@ static void *m_start(struct seq_file *m, loff_t *pos)
152 153
153 /* 154 /*
154 * We remember last_addr rather than next_addr to hit with 155 * We remember last_addr rather than next_addr to hit with
155 * mmap_cache most of the time. We have zero last_addr at 156 * vmacache most of the time. We have zero last_addr at
156 * the beginning and also after lseek. We will have -1 last_addr 157 * the beginning and also after lseek. We will have -1 last_addr
157 * after the end of the vmas. 158 * after the end of the vmas.
158 */ 159 */
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 88d4585b30f1..6a8e785b29da 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -484,7 +484,6 @@ static int __init update_note_header_size_elf64(const Elf64_Ehdr *ehdr_ptr)
484 phdr_ptr->p_memsz = real_sz; 484 phdr_ptr->p_memsz = real_sz;
485 if (real_sz == 0) { 485 if (real_sz == 0) {
486 pr_warn("Warning: Zero PT_NOTE entries found\n"); 486 pr_warn("Warning: Zero PT_NOTE entries found\n");
487 return -EINVAL;
488 } 487 }
489 } 488 }
490 489
@@ -671,7 +670,6 @@ static int __init update_note_header_size_elf32(const Elf32_Ehdr *ehdr_ptr)
671 phdr_ptr->p_memsz = real_sz; 670 phdr_ptr->p_memsz = real_sz;
672 if (real_sz == 0) { 671 if (real_sz == 0) {
673 pr_warn("Warning: Zero PT_NOTE entries found\n"); 672 pr_warn("Warning: Zero PT_NOTE entries found\n");
674 return -EINVAL;
675 } 673 }
676 } 674 }
677 675
@@ -1118,4 +1116,3 @@ void vmcore_cleanup(void)
1118 } 1116 }
1119 free_elfcorebuf(); 1117 free_elfcorebuf();
1120} 1118}
1121EXPORT_SYMBOL_GPL(vmcore_cleanup);
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 123c79b7261e..4f34dbae823d 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -1538,6 +1538,7 @@ out_unlock:
1538 1538
1539static const struct vm_operations_struct ubifs_file_vm_ops = { 1539static const struct vm_operations_struct ubifs_file_vm_ops = {
1540 .fault = filemap_fault, 1540 .fault = filemap_fault,
1541 .map_pages = filemap_map_pages,
1541 .page_mkwrite = ubifs_vm_page_mkwrite, 1542 .page_mkwrite = ubifs_vm_page_mkwrite,
1542 .remap_pages = generic_file_remap_pages, 1543 .remap_pages = generic_file_remap_pages,
1543}; 1544};
diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c
index a7ea492ae660..0ab1de4b39a5 100644
--- a/fs/ufs/balloc.c
+++ b/fs/ufs/balloc.c
@@ -38,7 +38,6 @@ void ufs_free_fragments(struct inode *inode, u64 fragment, unsigned count)
38{ 38{
39 struct super_block * sb; 39 struct super_block * sb;
40 struct ufs_sb_private_info * uspi; 40 struct ufs_sb_private_info * uspi;
41 struct ufs_super_block_first * usb1;
42 struct ufs_cg_private_info * ucpi; 41 struct ufs_cg_private_info * ucpi;
43 struct ufs_cylinder_group * ucg; 42 struct ufs_cylinder_group * ucg;
44 unsigned cgno, bit, end_bit, bbase, blkmap, i; 43 unsigned cgno, bit, end_bit, bbase, blkmap, i;
@@ -46,7 +45,6 @@ void ufs_free_fragments(struct inode *inode, u64 fragment, unsigned count)
46 45
47 sb = inode->i_sb; 46 sb = inode->i_sb;
48 uspi = UFS_SB(sb)->s_uspi; 47 uspi = UFS_SB(sb)->s_uspi;
49 usb1 = ubh_get_usb_first(uspi);
50 48
51 UFSD("ENTER, fragment %llu, count %u\n", 49 UFSD("ENTER, fragment %llu, count %u\n",
52 (unsigned long long)fragment, count); 50 (unsigned long long)fragment, count);
@@ -135,7 +133,6 @@ void ufs_free_blocks(struct inode *inode, u64 fragment, unsigned count)
135{ 133{
136 struct super_block * sb; 134 struct super_block * sb;
137 struct ufs_sb_private_info * uspi; 135 struct ufs_sb_private_info * uspi;
138 struct ufs_super_block_first * usb1;
139 struct ufs_cg_private_info * ucpi; 136 struct ufs_cg_private_info * ucpi;
140 struct ufs_cylinder_group * ucg; 137 struct ufs_cylinder_group * ucg;
141 unsigned overflow, cgno, bit, end_bit, i; 138 unsigned overflow, cgno, bit, end_bit, i;
@@ -143,7 +140,6 @@ void ufs_free_blocks(struct inode *inode, u64 fragment, unsigned count)
143 140
144 sb = inode->i_sb; 141 sb = inode->i_sb;
145 uspi = UFS_SB(sb)->s_uspi; 142 uspi = UFS_SB(sb)->s_uspi;
146 usb1 = ubh_get_usb_first(uspi);
147 143
148 UFSD("ENTER, fragment %llu, count %u\n", 144 UFSD("ENTER, fragment %llu, count %u\n",
149 (unsigned long long)fragment, count); 145 (unsigned long long)fragment, count);
@@ -499,7 +495,6 @@ static u64 ufs_add_fragments(struct inode *inode, u64 fragment,
499{ 495{
500 struct super_block * sb; 496 struct super_block * sb;
501 struct ufs_sb_private_info * uspi; 497 struct ufs_sb_private_info * uspi;
502 struct ufs_super_block_first * usb1;
503 struct ufs_cg_private_info * ucpi; 498 struct ufs_cg_private_info * ucpi;
504 struct ufs_cylinder_group * ucg; 499 struct ufs_cylinder_group * ucg;
505 unsigned cgno, fragno, fragoff, count, fragsize, i; 500 unsigned cgno, fragno, fragoff, count, fragsize, i;
@@ -509,7 +504,6 @@ static u64 ufs_add_fragments(struct inode *inode, u64 fragment,
509 504
510 sb = inode->i_sb; 505 sb = inode->i_sb;
511 uspi = UFS_SB(sb)->s_uspi; 506 uspi = UFS_SB(sb)->s_uspi;
512 usb1 = ubh_get_usb_first (uspi);
513 count = newcount - oldcount; 507 count = newcount - oldcount;
514 508
515 cgno = ufs_dtog(uspi, fragment); 509 cgno = ufs_dtog(uspi, fragment);
@@ -577,7 +571,6 @@ static u64 ufs_alloc_fragments(struct inode *inode, unsigned cgno,
577{ 571{
578 struct super_block * sb; 572 struct super_block * sb;
579 struct ufs_sb_private_info * uspi; 573 struct ufs_sb_private_info * uspi;
580 struct ufs_super_block_first * usb1;
581 struct ufs_cg_private_info * ucpi; 574 struct ufs_cg_private_info * ucpi;
582 struct ufs_cylinder_group * ucg; 575 struct ufs_cylinder_group * ucg;
583 unsigned oldcg, i, j, k, allocsize; 576 unsigned oldcg, i, j, k, allocsize;
@@ -588,7 +581,6 @@ static u64 ufs_alloc_fragments(struct inode *inode, unsigned cgno,
588 581
589 sb = inode->i_sb; 582 sb = inode->i_sb;
590 uspi = UFS_SB(sb)->s_uspi; 583 uspi = UFS_SB(sb)->s_uspi;
591 usb1 = ubh_get_usb_first(uspi);
592 oldcg = cgno; 584 oldcg = cgno;
593 585
594 /* 586 /*
@@ -690,7 +682,6 @@ static u64 ufs_alloccg_block(struct inode *inode,
690{ 682{
691 struct super_block * sb; 683 struct super_block * sb;
692 struct ufs_sb_private_info * uspi; 684 struct ufs_sb_private_info * uspi;
693 struct ufs_super_block_first * usb1;
694 struct ufs_cylinder_group * ucg; 685 struct ufs_cylinder_group * ucg;
695 u64 result, blkno; 686 u64 result, blkno;
696 687
@@ -698,7 +689,6 @@ static u64 ufs_alloccg_block(struct inode *inode,
698 689
699 sb = inode->i_sb; 690 sb = inode->i_sb;
700 uspi = UFS_SB(sb)->s_uspi; 691 uspi = UFS_SB(sb)->s_uspi;
701 usb1 = ubh_get_usb_first(uspi);
702 ucg = ubh_get_ucg(UCPI_UBH(ucpi)); 692 ucg = ubh_get_ucg(UCPI_UBH(ucpi));
703 693
704 if (goal == 0) { 694 if (goal == 0) {
@@ -794,7 +784,6 @@ static u64 ufs_bitmap_search(struct super_block *sb,
794 0x0, 0x2, 0x6, 0xe, 0x1e, 0x3e, 0x7e, 0xfe, 0x1fe 784 0x0, 0x2, 0x6, 0xe, 0x1e, 0x3e, 0x7e, 0xfe, 0x1fe
795 }; 785 };
796 struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi; 786 struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
797 struct ufs_super_block_first *usb1;
798 struct ufs_cylinder_group *ucg; 787 struct ufs_cylinder_group *ucg;
799 unsigned start, length, loc; 788 unsigned start, length, loc;
800 unsigned pos, want, blockmap, mask, end; 789 unsigned pos, want, blockmap, mask, end;
@@ -803,7 +792,6 @@ static u64 ufs_bitmap_search(struct super_block *sb,
803 UFSD("ENTER, cg %u, goal %llu, count %u\n", ucpi->c_cgx, 792 UFSD("ENTER, cg %u, goal %llu, count %u\n", ucpi->c_cgx,
804 (unsigned long long)goal, count); 793 (unsigned long long)goal, count);
805 794
806 usb1 = ubh_get_usb_first (uspi);
807 ucg = ubh_get_ucg(UCPI_UBH(ucpi)); 795 ucg = ubh_get_ucg(UCPI_UBH(ucpi));
808 796
809 if (goal) 797 if (goal)
diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c
index d0426d74817b..98f7211599ff 100644
--- a/fs/ufs/ialloc.c
+++ b/fs/ufs/ialloc.c
@@ -57,7 +57,6 @@ void ufs_free_inode (struct inode * inode)
57{ 57{
58 struct super_block * sb; 58 struct super_block * sb;
59 struct ufs_sb_private_info * uspi; 59 struct ufs_sb_private_info * uspi;
60 struct ufs_super_block_first * usb1;
61 struct ufs_cg_private_info * ucpi; 60 struct ufs_cg_private_info * ucpi;
62 struct ufs_cylinder_group * ucg; 61 struct ufs_cylinder_group * ucg;
63 int is_directory; 62 int is_directory;
@@ -67,7 +66,6 @@ void ufs_free_inode (struct inode * inode)
67 66
68 sb = inode->i_sb; 67 sb = inode->i_sb;
69 uspi = UFS_SB(sb)->s_uspi; 68 uspi = UFS_SB(sb)->s_uspi;
70 usb1 = ubh_get_usb_first(uspi);
71 69
72 ino = inode->i_ino; 70 ino = inode->i_ino;
73 71
@@ -175,7 +173,6 @@ struct inode *ufs_new_inode(struct inode *dir, umode_t mode)
175 struct super_block * sb; 173 struct super_block * sb;
176 struct ufs_sb_info * sbi; 174 struct ufs_sb_info * sbi;
177 struct ufs_sb_private_info * uspi; 175 struct ufs_sb_private_info * uspi;
178 struct ufs_super_block_first * usb1;
179 struct ufs_cg_private_info * ucpi; 176 struct ufs_cg_private_info * ucpi;
180 struct ufs_cylinder_group * ucg; 177 struct ufs_cylinder_group * ucg;
181 struct inode * inode; 178 struct inode * inode;
@@ -195,7 +192,6 @@ struct inode *ufs_new_inode(struct inode *dir, umode_t mode)
195 ufsi = UFS_I(inode); 192 ufsi = UFS_I(inode);
196 sbi = UFS_SB(sb); 193 sbi = UFS_SB(sb);
197 uspi = sbi->s_uspi; 194 uspi = sbi->s_uspi;
198 usb1 = ubh_get_usb_first(uspi);
199 195
200 mutex_lock(&sbi->s_lock); 196 mutex_lock(&sbi->s_lock);
201 197
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index b8c6791f046f..c1183f9f69dc 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -524,11 +524,9 @@ static int ufs_read_cylinder_structures(struct super_block *sb)
524 struct ufs_buffer_head * ubh; 524 struct ufs_buffer_head * ubh;
525 unsigned char * base, * space; 525 unsigned char * base, * space;
526 unsigned size, blks, i; 526 unsigned size, blks, i;
527 struct ufs_super_block_third *usb3;
528 527
529 UFSD("ENTER\n"); 528 UFSD("ENTER\n");
530 529
531 usb3 = ubh_get_usb_third(uspi);
532 /* 530 /*
533 * Read cs structures from (usually) first data block 531 * Read cs structures from (usually) first data block
534 * on the device. 532 * on the device.
@@ -1390,15 +1388,11 @@ static int ufs_statfs(struct dentry *dentry, struct kstatfs *buf)
1390 struct super_block *sb = dentry->d_sb; 1388 struct super_block *sb = dentry->d_sb;
1391 struct ufs_sb_private_info *uspi= UFS_SB(sb)->s_uspi; 1389 struct ufs_sb_private_info *uspi= UFS_SB(sb)->s_uspi;
1392 unsigned flags = UFS_SB(sb)->s_flags; 1390 unsigned flags = UFS_SB(sb)->s_flags;
1393 struct ufs_super_block_first *usb1;
1394 struct ufs_super_block_second *usb2;
1395 struct ufs_super_block_third *usb3; 1391 struct ufs_super_block_third *usb3;
1396 u64 id = huge_encode_dev(sb->s_bdev->bd_dev); 1392 u64 id = huge_encode_dev(sb->s_bdev->bd_dev);
1397 1393
1398 lock_ufs(sb); 1394 lock_ufs(sb);
1399 1395
1400 usb1 = ubh_get_usb_first(uspi);
1401 usb2 = ubh_get_usb_second(uspi);
1402 usb3 = ubh_get_usb_third(uspi); 1396 usb3 = ubh_get_usb_third(uspi);
1403 1397
1404 if ((flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2) { 1398 if ((flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2) {
@@ -1454,7 +1448,7 @@ static void init_once(void *foo)
1454 inode_init_once(&ei->vfs_inode); 1448 inode_init_once(&ei->vfs_inode);
1455} 1449}
1456 1450
1457static int init_inodecache(void) 1451static int __init init_inodecache(void)
1458{ 1452{
1459 ufs_inode_cachep = kmem_cache_create("ufs_inode_cache", 1453 ufs_inode_cachep = kmem_cache_create("ufs_inode_cache",
1460 sizeof(struct ufs_inode_info), 1454 sizeof(struct ufs_inode_info),
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index f7abff8c16ca..003c0051b62f 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -1483,6 +1483,7 @@ const struct file_operations xfs_dir_file_operations = {
1483 1483
1484static const struct vm_operations_struct xfs_file_vm_ops = { 1484static const struct vm_operations_struct xfs_file_vm_ops = {
1485 .fault = filemap_fault, 1485 .fault = filemap_fault,
1486 .map_pages = filemap_map_pages,
1486 .page_mkwrite = xfs_vm_page_mkwrite, 1487 .page_mkwrite = xfs_vm_page_mkwrite,
1487 .remap_pages = generic_file_remap_pages, 1488 .remap_pages = generic_file_remap_pages,
1488}; 1489};
diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h
index 7d10f962aa13..630dd2372238 100644
--- a/include/asm-generic/bug.h
+++ b/include/asm-generic/bug.h
@@ -52,7 +52,7 @@ struct bug_entry {
52#endif 52#endif
53 53
54#ifndef HAVE_ARCH_BUG_ON 54#ifndef HAVE_ARCH_BUG_ON
55#define BUG_ON(condition) do { if (unlikely(condition)) BUG(); } while(0) 55#define BUG_ON(condition) do { if (unlikely(condition)) BUG(); } while (0)
56#endif 56#endif
57 57
58/* 58/*
@@ -106,33 +106,6 @@ extern void warn_slowpath_null(const char *file, const int line);
106 unlikely(__ret_warn_on); \ 106 unlikely(__ret_warn_on); \
107}) 107})
108 108
109#else /* !CONFIG_BUG */
110#ifndef HAVE_ARCH_BUG
111#define BUG() do {} while(0)
112#endif
113
114#ifndef HAVE_ARCH_BUG_ON
115#define BUG_ON(condition) do { if (condition) ; } while(0)
116#endif
117
118#ifndef HAVE_ARCH_WARN_ON
119#define WARN_ON(condition) ({ \
120 int __ret_warn_on = !!(condition); \
121 unlikely(__ret_warn_on); \
122})
123#endif
124
125#ifndef WARN
126#define WARN(condition, format...) ({ \
127 int __ret_warn_on = !!(condition); \
128 unlikely(__ret_warn_on); \
129})
130#endif
131
132#define WARN_TAINT(condition, taint, format...) WARN_ON(condition)
133
134#endif
135
136#define WARN_ON_ONCE(condition) ({ \ 109#define WARN_ON_ONCE(condition) ({ \
137 static bool __section(.data.unlikely) __warned; \ 110 static bool __section(.data.unlikely) __warned; \
138 int __ret_warn_once = !!(condition); \ 111 int __ret_warn_once = !!(condition); \
@@ -163,6 +136,37 @@ extern void warn_slowpath_null(const char *file, const int line);
163 unlikely(__ret_warn_once); \ 136 unlikely(__ret_warn_once); \
164}) 137})
165 138
139#else /* !CONFIG_BUG */
140#ifndef HAVE_ARCH_BUG
141#define BUG() do {} while (1)
142#endif
143
144#ifndef HAVE_ARCH_BUG_ON
145#define BUG_ON(condition) do { if (condition) ; } while (0)
146#endif
147
148#ifndef HAVE_ARCH_WARN_ON
149#define WARN_ON(condition) ({ \
150 int __ret_warn_on = !!(condition); \
151 unlikely(__ret_warn_on); \
152})
153#endif
154
155#ifndef WARN
156#define WARN(condition, format...) ({ \
157 int __ret_warn_on = !!(condition); \
158 no_printk(format); \
159 unlikely(__ret_warn_on); \
160})
161#endif
162
163#define WARN_ON_ONCE(condition) WARN_ON(condition)
164#define WARN_ONCE(condition, format...) WARN(condition, format)
165#define WARN_TAINT(condition, taint, format...) WARN(condition, format)
166#define WARN_TAINT_ONCE(condition, taint, format...) WARN(condition, format)
167
168#endif
169
166/* 170/*
167 * WARN_ON_SMP() is for cases that the warning is either 171 * WARN_ON_SMP() is for cases that the warning is either
168 * meaningless for !SMP or may even cause failures. 172 * meaningless for !SMP or may even cause failures.
diff --git a/include/asm-generic/early_ioremap.h b/include/asm-generic/early_ioremap.h
new file mode 100644
index 000000000000..a5de55c04fb2
--- /dev/null
+++ b/include/asm-generic/early_ioremap.h
@@ -0,0 +1,42 @@
1#ifndef _ASM_EARLY_IOREMAP_H_
2#define _ASM_EARLY_IOREMAP_H_
3
4#include <linux/types.h>
5
6/*
7 * early_ioremap() and early_iounmap() are for temporary early boot-time
8 * mappings, before the real ioremap() is functional.
9 */
10extern void __iomem *early_ioremap(resource_size_t phys_addr,
11 unsigned long size);
12extern void *early_memremap(resource_size_t phys_addr,
13 unsigned long size);
14extern void early_iounmap(void __iomem *addr, unsigned long size);
15extern void early_memunmap(void *addr, unsigned long size);
16
17/*
18 * Weak function called by early_ioremap_reset(). It does nothing, but
19 * architectures may provide their own version to do any needed cleanups.
20 */
21extern void early_ioremap_shutdown(void);
22
23#if defined(CONFIG_GENERIC_EARLY_IOREMAP) && defined(CONFIG_MMU)
24/* Arch-specific initialization */
25extern void early_ioremap_init(void);
26
27/* Generic initialization called by architecture code */
28extern void early_ioremap_setup(void);
29
30/*
31 * Called as last step in paging_init() so library can act
32 * accordingly for subsequent map/unmap requests.
33 */
34extern void early_ioremap_reset(void);
35
36#else
37static inline void early_ioremap_init(void) { }
38static inline void early_ioremap_setup(void) { }
39static inline void early_ioremap_reset(void) { }
40#endif
41
42#endif /* _ASM_EARLY_IOREMAP_H_ */
diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h
index d5afe96adba6..975e1cc75edb 100644
--- a/include/asm-generic/io.h
+++ b/include/asm-generic/io.h
@@ -327,7 +327,7 @@ static inline void iounmap(void __iomem *addr)
327} 327}
328#endif /* CONFIG_MMU */ 328#endif /* CONFIG_MMU */
329 329
330#ifdef CONFIG_HAS_IOPORT 330#ifdef CONFIG_HAS_IOPORT_MAP
331#ifndef CONFIG_GENERIC_IOMAP 331#ifndef CONFIG_GENERIC_IOMAP
332static inline void __iomem *ioport_map(unsigned long port, unsigned int nr) 332static inline void __iomem *ioport_map(unsigned long port, unsigned int nr)
333{ 333{
@@ -341,7 +341,7 @@ static inline void ioport_unmap(void __iomem *p)
341extern void __iomem *ioport_map(unsigned long port, unsigned int nr); 341extern void __iomem *ioport_map(unsigned long port, unsigned int nr);
342extern void ioport_unmap(void __iomem *p); 342extern void ioport_unmap(void __iomem *p);
343#endif /* CONFIG_GENERIC_IOMAP */ 343#endif /* CONFIG_GENERIC_IOMAP */
344#endif /* CONFIG_HAS_IOPORT */ 344#endif /* CONFIG_HAS_IOPORT_MAP */
345 345
346#ifndef xlate_dev_kmem_ptr 346#ifndef xlate_dev_kmem_ptr
347#define xlate_dev_kmem_ptr(p) p 347#define xlate_dev_kmem_ptr(p) p
diff --git a/include/asm-generic/iomap.h b/include/asm-generic/iomap.h
index 6afd7d6a9899..1b41011643a5 100644
--- a/include/asm-generic/iomap.h
+++ b/include/asm-generic/iomap.h
@@ -56,7 +56,7 @@ extern void iowrite8_rep(void __iomem *port, const void *buf, unsigned long coun
56extern void iowrite16_rep(void __iomem *port, const void *buf, unsigned long count); 56extern void iowrite16_rep(void __iomem *port, const void *buf, unsigned long count);
57extern void iowrite32_rep(void __iomem *port, const void *buf, unsigned long count); 57extern void iowrite32_rep(void __iomem *port, const void *buf, unsigned long count);
58 58
59#ifdef CONFIG_HAS_IOPORT 59#ifdef CONFIG_HAS_IOPORT_MAP
60/* Create a virtual mapping cookie for an IO port range */ 60/* Create a virtual mapping cookie for an IO port range */
61extern void __iomem *ioport_map(unsigned long port, unsigned int nr); 61extern void __iomem *ioport_map(unsigned long port, unsigned int nr);
62extern void ioport_unmap(void __iomem *); 62extern void ioport_unmap(void __iomem *);
diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h
index d17784ea37ff..0703aa75b5e8 100644
--- a/include/asm-generic/percpu.h
+++ b/include/asm-generic/percpu.h
@@ -56,17 +56,17 @@ extern unsigned long __per_cpu_offset[NR_CPUS];
56#define per_cpu(var, cpu) \ 56#define per_cpu(var, cpu) \
57 (*SHIFT_PERCPU_PTR(&(var), per_cpu_offset(cpu))) 57 (*SHIFT_PERCPU_PTR(&(var), per_cpu_offset(cpu)))
58 58
59#ifndef __this_cpu_ptr 59#ifndef raw_cpu_ptr
60#define __this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, __my_cpu_offset) 60#define raw_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, __my_cpu_offset)
61#endif 61#endif
62#ifdef CONFIG_DEBUG_PREEMPT 62#ifdef CONFIG_DEBUG_PREEMPT
63#define this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, my_cpu_offset) 63#define this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, my_cpu_offset)
64#else 64#else
65#define this_cpu_ptr(ptr) __this_cpu_ptr(ptr) 65#define this_cpu_ptr(ptr) raw_cpu_ptr(ptr)
66#endif 66#endif
67 67
68#define __get_cpu_var(var) (*this_cpu_ptr(&(var))) 68#define __get_cpu_var(var) (*this_cpu_ptr(&(var)))
69#define __raw_get_cpu_var(var) (*__this_cpu_ptr(&(var))) 69#define __raw_get_cpu_var(var) (*raw_cpu_ptr(&(var)))
70 70
71#ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA 71#ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA
72extern void setup_per_cpu_areas(void); 72extern void setup_per_cpu_areas(void);
@@ -83,7 +83,7 @@ extern void setup_per_cpu_areas(void);
83#define __get_cpu_var(var) (*VERIFY_PERCPU_PTR(&(var))) 83#define __get_cpu_var(var) (*VERIFY_PERCPU_PTR(&(var)))
84#define __raw_get_cpu_var(var) (*VERIFY_PERCPU_PTR(&(var))) 84#define __raw_get_cpu_var(var) (*VERIFY_PERCPU_PTR(&(var)))
85#define this_cpu_ptr(ptr) per_cpu_ptr(ptr, 0) 85#define this_cpu_ptr(ptr) per_cpu_ptr(ptr, 0)
86#define __this_cpu_ptr(ptr) this_cpu_ptr(ptr) 86#define raw_cpu_ptr(ptr) this_cpu_ptr(ptr)
87 87
88#endif /* SMP */ 88#endif /* SMP */
89 89
@@ -122,4 +122,7 @@ extern void setup_per_cpu_areas(void);
122#define PER_CPU_DEF_ATTRIBUTES 122#define PER_CPU_DEF_ATTRIBUTES
123#endif 123#endif
124 124
125/* Keep until we have removed all uses of __this_cpu_ptr */
126#define __this_cpu_ptr raw_cpu_ptr
127
125#endif /* _ASM_GENERIC_PERCPU_H_ */ 128#endif /* _ASM_GENERIC_PERCPU_H_ */
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index b4a745d7d9a9..61f29e5ea840 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -44,7 +44,6 @@ struct linux_binprm {
44 unsigned interp_flags; 44 unsigned interp_flags;
45 unsigned interp_data; 45 unsigned interp_data;
46 unsigned long loader, exec; 46 unsigned long loader, exec;
47 char tcomm[TASK_COMM_LEN];
48}; 47};
49 48
50#define BINPRM_FLAGS_ENFORCE_NONDUMP_BIT 0 49#define BINPRM_FLAGS_ENFORCE_NONDUMP_BIT 0
diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h
index 7032518f8542..72ab536ad3de 100644
--- a/include/linux/crash_dump.h
+++ b/include/linux/crash_dump.h
@@ -25,6 +25,7 @@ extern int __weak remap_oldmem_pfn_range(struct vm_area_struct *vma,
25 25
26extern ssize_t copy_oldmem_page(unsigned long, char *, size_t, 26extern ssize_t copy_oldmem_page(unsigned long, char *, size_t,
27 unsigned long, int); 27 unsigned long, int);
28void vmcore_cleanup(void);
28 29
29/* Architecture code defines this if there are other possible ELF 30/* Architecture code defines this if there are other possible ELF
30 * machine types, e.g. on bi-arch capable hardware. */ 31 * machine types, e.g. on bi-arch capable hardware. */
diff --git a/include/linux/idr.h b/include/linux/idr.h
index f669585c4fc5..6af3400b9b2f 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -133,69 +133,6 @@ static inline void *idr_find(struct idr *idr, int id)
133 for (id = 0; ((entry) = idr_get_next(idp, &(id))) != NULL; ++id) 133 for (id = 0; ((entry) = idr_get_next(idp, &(id))) != NULL; ++id)
134 134
135/* 135/*
136 * Don't use the following functions. These exist only to suppress
137 * deprecated warnings on EXPORT_SYMBOL()s.
138 */
139int __idr_pre_get(struct idr *idp, gfp_t gfp_mask);
140int __idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id);
141void __idr_remove_all(struct idr *idp);
142
143/**
144 * idr_pre_get - reserve resources for idr allocation
145 * @idp: idr handle
146 * @gfp_mask: memory allocation flags
147 *
148 * Part of old alloc interface. This is going away. Use
149 * idr_preload[_end]() and idr_alloc() instead.
150 */
151static inline int __deprecated idr_pre_get(struct idr *idp, gfp_t gfp_mask)
152{
153 return __idr_pre_get(idp, gfp_mask);
154}
155
156/**
157 * idr_get_new_above - allocate new idr entry above or equal to a start id
158 * @idp: idr handle
159 * @ptr: pointer you want associated with the id
160 * @starting_id: id to start search at
161 * @id: pointer to the allocated handle
162 *
163 * Part of old alloc interface. This is going away. Use
164 * idr_preload[_end]() and idr_alloc() instead.
165 */
166static inline int __deprecated idr_get_new_above(struct idr *idp, void *ptr,
167 int starting_id, int *id)
168{
169 return __idr_get_new_above(idp, ptr, starting_id, id);
170}
171
172/**
173 * idr_get_new - allocate new idr entry
174 * @idp: idr handle
175 * @ptr: pointer you want associated with the id
176 * @id: pointer to the allocated handle
177 *
178 * Part of old alloc interface. This is going away. Use
179 * idr_preload[_end]() and idr_alloc() instead.
180 */
181static inline int __deprecated idr_get_new(struct idr *idp, void *ptr, int *id)
182{
183 return __idr_get_new_above(idp, ptr, 0, id);
184}
185
186/**
187 * idr_remove_all - remove all ids from the given idr tree
188 * @idp: idr handle
189 *
190 * If you're trying to destroy @idp, calling idr_destroy() is enough.
191 * This is going away. Don't use.
192 */
193static inline void __deprecated idr_remove_all(struct idr *idp)
194{
195 __idr_remove_all(idp);
196}
197
198/*
199 * IDA - IDR based id allocator, use when translation from id to 136 * IDA - IDR based id allocator, use when translation from id to
200 * pointer isn't necessary. 137 * pointer isn't necessary.
201 * 138 *
diff --git a/include/linux/io.h b/include/linux/io.h
index 8a18e75600cc..b76e6e545806 100644
--- a/include/linux/io.h
+++ b/include/linux/io.h
@@ -41,7 +41,7 @@ static inline int ioremap_page_range(unsigned long addr, unsigned long end,
41/* 41/*
42 * Managed iomap interface 42 * Managed iomap interface
43 */ 43 */
44#ifdef CONFIG_HAS_IOPORT 44#ifdef CONFIG_HAS_IOPORT_MAP
45void __iomem * devm_ioport_map(struct device *dev, unsigned long port, 45void __iomem * devm_ioport_map(struct device *dev, unsigned long port,
46 unsigned int nr); 46 unsigned int nr);
47void devm_ioport_unmap(struct device *dev, void __iomem *addr); 47void devm_ioport_unmap(struct device *dev, void __iomem *addr);
diff --git a/include/linux/lglock.h b/include/linux/lglock.h
index 96549abe8842..0081f000e34b 100644
--- a/include/linux/lglock.h
+++ b/include/linux/lglock.h
@@ -25,6 +25,8 @@
25#include <linux/cpu.h> 25#include <linux/cpu.h>
26#include <linux/notifier.h> 26#include <linux/notifier.h>
27 27
28#ifdef CONFIG_SMP
29
28#ifdef CONFIG_DEBUG_LOCK_ALLOC 30#ifdef CONFIG_DEBUG_LOCK_ALLOC
29#define LOCKDEP_INIT_MAP lockdep_init_map 31#define LOCKDEP_INIT_MAP lockdep_init_map
30#else 32#else
@@ -57,4 +59,18 @@ void lg_local_unlock_cpu(struct lglock *lg, int cpu);
57void lg_global_lock(struct lglock *lg); 59void lg_global_lock(struct lglock *lg);
58void lg_global_unlock(struct lglock *lg); 60void lg_global_unlock(struct lglock *lg);
59 61
62#else
63/* When !CONFIG_SMP, map lglock to spinlock */
64#define lglock spinlock
65#define DEFINE_LGLOCK(name) DEFINE_SPINLOCK(name)
66#define DEFINE_STATIC_LGLOCK(name) static DEFINE_SPINLOCK(name)
67#define lg_lock_init(lg, name) spin_lock_init(lg)
68#define lg_local_lock spin_lock
69#define lg_local_unlock spin_unlock
70#define lg_local_lock_cpu(lg, cpu) spin_lock(lg)
71#define lg_local_unlock_cpu(lg, cpu) spin_unlock(lg)
72#define lg_global_lock spin_lock
73#define lg_global_unlock spin_unlock
74#endif
75
60#endif 76#endif
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index eccfb4a4b379..b569b8be5c5a 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -65,7 +65,7 @@ struct mem_cgroup_reclaim_cookie {
65 * (Of course, if memcg does memory allocation in future, GFP_KERNEL is sane.) 65 * (Of course, if memcg does memory allocation in future, GFP_KERNEL is sane.)
66 */ 66 */
67 67
68extern int mem_cgroup_newpage_charge(struct page *page, struct mm_struct *mm, 68extern int mem_cgroup_charge_anon(struct page *page, struct mm_struct *mm,
69 gfp_t gfp_mask); 69 gfp_t gfp_mask);
70/* for swap handling */ 70/* for swap handling */
71extern int mem_cgroup_try_charge_swapin(struct mm_struct *mm, 71extern int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
@@ -74,7 +74,7 @@ extern void mem_cgroup_commit_charge_swapin(struct page *page,
74 struct mem_cgroup *memcg); 74 struct mem_cgroup *memcg);
75extern void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *memcg); 75extern void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *memcg);
76 76
77extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, 77extern int mem_cgroup_charge_file(struct page *page, struct mm_struct *mm,
78 gfp_t gfp_mask); 78 gfp_t gfp_mask);
79 79
80struct lruvec *mem_cgroup_zone_lruvec(struct zone *, struct mem_cgroup *); 80struct lruvec *mem_cgroup_zone_lruvec(struct zone *, struct mem_cgroup *);
@@ -94,7 +94,6 @@ bool task_in_mem_cgroup(struct task_struct *task,
94 94
95extern struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page); 95extern struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page);
96extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p); 96extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
97extern struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm);
98 97
99extern struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg); 98extern struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg);
100extern struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css); 99extern struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css);
@@ -234,13 +233,13 @@ void mem_cgroup_print_bad_page(struct page *page);
234#else /* CONFIG_MEMCG */ 233#else /* CONFIG_MEMCG */
235struct mem_cgroup; 234struct mem_cgroup;
236 235
237static inline int mem_cgroup_newpage_charge(struct page *page, 236static inline int mem_cgroup_charge_anon(struct page *page,
238 struct mm_struct *mm, gfp_t gfp_mask) 237 struct mm_struct *mm, gfp_t gfp_mask)
239{ 238{
240 return 0; 239 return 0;
241} 240}
242 241
243static inline int mem_cgroup_cache_charge(struct page *page, 242static inline int mem_cgroup_charge_file(struct page *page,
244 struct mm_struct *mm, gfp_t gfp_mask) 243 struct mm_struct *mm, gfp_t gfp_mask)
245{ 244{
246 return 0; 245 return 0;
@@ -294,11 +293,6 @@ static inline struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
294 return NULL; 293 return NULL;
295} 294}
296 295
297static inline struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm)
298{
299 return NULL;
300}
301
302static inline bool mm_match_cgroup(struct mm_struct *mm, 296static inline bool mm_match_cgroup(struct mm_struct *mm,
303 struct mem_cgroup *memcg) 297 struct mem_cgroup *memcg)
304{ 298{
@@ -497,6 +491,9 @@ void __memcg_kmem_commit_charge(struct page *page,
497void __memcg_kmem_uncharge_pages(struct page *page, int order); 491void __memcg_kmem_uncharge_pages(struct page *page, int order);
498 492
499int memcg_cache_id(struct mem_cgroup *memcg); 493int memcg_cache_id(struct mem_cgroup *memcg);
494
495char *memcg_create_cache_name(struct mem_cgroup *memcg,
496 struct kmem_cache *root_cache);
500int memcg_alloc_cache_params(struct mem_cgroup *memcg, struct kmem_cache *s, 497int memcg_alloc_cache_params(struct mem_cgroup *memcg, struct kmem_cache *s,
501 struct kmem_cache *root_cache); 498 struct kmem_cache *root_cache);
502void memcg_free_cache_params(struct kmem_cache *s); 499void memcg_free_cache_params(struct kmem_cache *s);
@@ -510,7 +507,7 @@ struct kmem_cache *
510__memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp); 507__memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp);
511 508
512void mem_cgroup_destroy_cache(struct kmem_cache *cachep); 509void mem_cgroup_destroy_cache(struct kmem_cache *cachep);
513void kmem_cache_destroy_memcg_children(struct kmem_cache *s); 510int __kmem_cache_destroy_memcg_children(struct kmem_cache *s);
514 511
515/** 512/**
516 * memcg_kmem_newpage_charge: verify if a new kmem allocation is allowed. 513 * memcg_kmem_newpage_charge: verify if a new kmem allocation is allowed.
@@ -664,10 +661,6 @@ memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp)
664{ 661{
665 return cachep; 662 return cachep;
666} 663}
667
668static inline void kmem_cache_destroy_memcg_children(struct kmem_cache *s)
669{
670}
671#endif /* CONFIG_MEMCG_KMEM */ 664#endif /* CONFIG_MEMCG_KMEM */
672#endif /* _LINUX_MEMCONTROL_H */ 665#endif /* _LINUX_MEMCONTROL_H */
673 666
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 5f1ea756aace..3c1b968da0ca 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -143,7 +143,6 @@ extern void numa_policy_init(void);
143extern void mpol_rebind_task(struct task_struct *tsk, const nodemask_t *new, 143extern void mpol_rebind_task(struct task_struct *tsk, const nodemask_t *new,
144 enum mpol_rebind_step step); 144 enum mpol_rebind_step step);
145extern void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new); 145extern void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new);
146extern void mpol_fix_fork_child_flag(struct task_struct *p);
147 146
148extern struct zonelist *huge_zonelist(struct vm_area_struct *vma, 147extern struct zonelist *huge_zonelist(struct vm_area_struct *vma,
149 unsigned long addr, gfp_t gfp_flags, 148 unsigned long addr, gfp_t gfp_flags,
@@ -151,7 +150,7 @@ extern struct zonelist *huge_zonelist(struct vm_area_struct *vma,
151extern bool init_nodemask_of_mempolicy(nodemask_t *mask); 150extern bool init_nodemask_of_mempolicy(nodemask_t *mask);
152extern bool mempolicy_nodemask_intersects(struct task_struct *tsk, 151extern bool mempolicy_nodemask_intersects(struct task_struct *tsk,
153 const nodemask_t *mask); 152 const nodemask_t *mask);
154extern unsigned slab_node(void); 153extern unsigned int mempolicy_slab_node(void);
155 154
156extern enum zone_type policy_zone; 155extern enum zone_type policy_zone;
157 156
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 35300f390eb6..abc848412e3c 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -177,6 +177,9 @@ extern unsigned int kobjsize(const void *objp);
177 */ 177 */
178#define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_PFNMAP | VM_MIXEDMAP) 178#define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_PFNMAP | VM_MIXEDMAP)
179 179
180/* This mask defines which mm->def_flags a process can inherit its parent */
181#define VM_INIT_DEF_MASK VM_NOHUGEPAGE
182
180/* 183/*
181 * mapping from the currently active vm_flags protection bits (the 184 * mapping from the currently active vm_flags protection bits (the
182 * low four bits) to a page protection mask.. 185 * low four bits) to a page protection mask..
@@ -210,6 +213,10 @@ struct vm_fault {
210 * is set (which is also implied by 213 * is set (which is also implied by
211 * VM_FAULT_ERROR). 214 * VM_FAULT_ERROR).
212 */ 215 */
216 /* for ->map_pages() only */
217 pgoff_t max_pgoff; /* map pages for offset from pgoff till
218 * max_pgoff inclusive */
219 pte_t *pte; /* pte entry associated with ->pgoff */
213}; 220};
214 221
215/* 222/*
@@ -221,6 +228,7 @@ struct vm_operations_struct {
221 void (*open)(struct vm_area_struct * area); 228 void (*open)(struct vm_area_struct * area);
222 void (*close)(struct vm_area_struct * area); 229 void (*close)(struct vm_area_struct * area);
223 int (*fault)(struct vm_area_struct *vma, struct vm_fault *vmf); 230 int (*fault)(struct vm_area_struct *vma, struct vm_fault *vmf);
231 void (*map_pages)(struct vm_area_struct *vma, struct vm_fault *vmf);
224 232
225 /* notification that a previously read-only page is about to become 233 /* notification that a previously read-only page is about to become
226 * writable, if an error is returned it will cause a SIGBUS */ 234 * writable, if an error is returned it will cause a SIGBUS */
@@ -581,6 +589,9 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
581 pte = pte_mkwrite(pte); 589 pte = pte_mkwrite(pte);
582 return pte; 590 return pte;
583} 591}
592
593void do_set_pte(struct vm_area_struct *vma, unsigned long address,
594 struct page *page, pte_t *pte, bool write, bool anon);
584#endif 595#endif
585 596
586/* 597/*
@@ -684,7 +695,7 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
684#define ZONES_MASK ((1UL << ZONES_WIDTH) - 1) 695#define ZONES_MASK ((1UL << ZONES_WIDTH) - 1)
685#define NODES_MASK ((1UL << NODES_WIDTH) - 1) 696#define NODES_MASK ((1UL << NODES_WIDTH) - 1)
686#define SECTIONS_MASK ((1UL << SECTIONS_WIDTH) - 1) 697#define SECTIONS_MASK ((1UL << SECTIONS_WIDTH) - 1)
687#define LAST_CPUPID_MASK ((1UL << LAST_CPUPID_WIDTH) - 1) 698#define LAST_CPUPID_MASK ((1UL << LAST_CPUPID_SHIFT) - 1)
688#define ZONEID_MASK ((1UL << ZONEID_SHIFT) - 1) 699#define ZONEID_MASK ((1UL << ZONEID_SHIFT) - 1)
689 700
690static inline enum zone_type page_zonenum(const struct page *page) 701static inline enum zone_type page_zonenum(const struct page *page)
@@ -1836,6 +1847,7 @@ extern void truncate_inode_pages_final(struct address_space *);
1836 1847
1837/* generic vm_area_ops exported for stackable file systems */ 1848/* generic vm_area_ops exported for stackable file systems */
1838extern int filemap_fault(struct vm_area_struct *, struct vm_fault *); 1849extern int filemap_fault(struct vm_area_struct *, struct vm_fault *);
1850extern void filemap_map_pages(struct vm_area_struct *vma, struct vm_fault *vmf);
1839extern int filemap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); 1851extern int filemap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
1840 1852
1841/* mm/page-writeback.c */ 1853/* mm/page-writeback.c */
@@ -1863,9 +1875,6 @@ void page_cache_async_readahead(struct address_space *mapping,
1863 unsigned long size); 1875 unsigned long size);
1864 1876
1865unsigned long max_sane_readahead(unsigned long nr); 1877unsigned long max_sane_readahead(unsigned long nr);
1866unsigned long ra_submit(struct file_ra_state *ra,
1867 struct address_space *mapping,
1868 struct file *filp);
1869 1878
1870/* Generic expand stack which grows the stack according to GROWS{UP,DOWN} */ 1879/* Generic expand stack which grows the stack according to GROWS{UP,DOWN} */
1871extern int expand_stack(struct vm_area_struct *vma, unsigned long address); 1880extern int expand_stack(struct vm_area_struct *vma, unsigned long address);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 290901a8c1de..2b58d192ea24 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -342,9 +342,9 @@ struct mm_rss_stat {
342 342
343struct kioctx_table; 343struct kioctx_table;
344struct mm_struct { 344struct mm_struct {
345 struct vm_area_struct * mmap; /* list of VMAs */ 345 struct vm_area_struct *mmap; /* list of VMAs */
346 struct rb_root mm_rb; 346 struct rb_root mm_rb;
347 struct vm_area_struct * mmap_cache; /* last find_vma result */ 347 u32 vmacache_seqnum; /* per-thread vmacache */
348#ifdef CONFIG_MMU 348#ifdef CONFIG_MMU
349 unsigned long (*get_unmapped_area) (struct file *filp, 349 unsigned long (*get_unmapped_area) (struct file *filp,
350 unsigned long addr, unsigned long len, 350 unsigned long addr, unsigned long len,
diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h
index 5042c036dda9..2d57efa64cc1 100644
--- a/include/linux/mmdebug.h
+++ b/include/linux/mmdebug.h
@@ -3,8 +3,8 @@
3 3
4struct page; 4struct page;
5 5
6extern void dump_page(struct page *page, char *reason); 6extern void dump_page(struct page *page, const char *reason);
7extern void dump_page_badflags(struct page *page, char *reason, 7extern void dump_page_badflags(struct page *page, const char *reason,
8 unsigned long badflags); 8 unsigned long badflags);
9 9
10#ifdef CONFIG_DEBUG_VM 10#ifdef CONFIG_DEBUG_VM
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index e3817d2441b6..e7a0b95ed527 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -173,6 +173,12 @@ extern phys_addr_t per_cpu_ptr_to_phys(void *addr);
173 173
174extern void __bad_size_call_parameter(void); 174extern void __bad_size_call_parameter(void);
175 175
176#ifdef CONFIG_DEBUG_PREEMPT
177extern void __this_cpu_preempt_check(const char *op);
178#else
179static inline void __this_cpu_preempt_check(const char *op) { }
180#endif
181
176#define __pcpu_size_call_return(stem, variable) \ 182#define __pcpu_size_call_return(stem, variable) \
177({ typeof(variable) pscr_ret__; \ 183({ typeof(variable) pscr_ret__; \
178 __verify_pcpu_ptr(&(variable)); \ 184 __verify_pcpu_ptr(&(variable)); \
@@ -243,6 +249,8 @@ do { \
243} while (0) 249} while (0)
244 250
245/* 251/*
252 * this_cpu operations (C) 2008-2013 Christoph Lameter <cl@linux.com>
253 *
246 * Optimized manipulation for memory allocated through the per cpu 254 * Optimized manipulation for memory allocated through the per cpu
247 * allocator or for addresses of per cpu variables. 255 * allocator or for addresses of per cpu variables.
248 * 256 *
@@ -296,7 +304,7 @@ do { \
296do { \ 304do { \
297 unsigned long flags; \ 305 unsigned long flags; \
298 raw_local_irq_save(flags); \ 306 raw_local_irq_save(flags); \
299 *__this_cpu_ptr(&(pcp)) op val; \ 307 *raw_cpu_ptr(&(pcp)) op val; \
300 raw_local_irq_restore(flags); \ 308 raw_local_irq_restore(flags); \
301} while (0) 309} while (0)
302 310
@@ -381,8 +389,8 @@ do { \
381 typeof(pcp) ret__; \ 389 typeof(pcp) ret__; \
382 unsigned long flags; \ 390 unsigned long flags; \
383 raw_local_irq_save(flags); \ 391 raw_local_irq_save(flags); \
384 __this_cpu_add(pcp, val); \ 392 raw_cpu_add(pcp, val); \
385 ret__ = __this_cpu_read(pcp); \ 393 ret__ = raw_cpu_read(pcp); \
386 raw_local_irq_restore(flags); \ 394 raw_local_irq_restore(flags); \
387 ret__; \ 395 ret__; \
388}) 396})
@@ -411,8 +419,8 @@ do { \
411({ typeof(pcp) ret__; \ 419({ typeof(pcp) ret__; \
412 unsigned long flags; \ 420 unsigned long flags; \
413 raw_local_irq_save(flags); \ 421 raw_local_irq_save(flags); \
414 ret__ = __this_cpu_read(pcp); \ 422 ret__ = raw_cpu_read(pcp); \
415 __this_cpu_write(pcp, nval); \ 423 raw_cpu_write(pcp, nval); \
416 raw_local_irq_restore(flags); \ 424 raw_local_irq_restore(flags); \
417 ret__; \ 425 ret__; \
418}) 426})
@@ -439,9 +447,9 @@ do { \
439 typeof(pcp) ret__; \ 447 typeof(pcp) ret__; \
440 unsigned long flags; \ 448 unsigned long flags; \
441 raw_local_irq_save(flags); \ 449 raw_local_irq_save(flags); \
442 ret__ = __this_cpu_read(pcp); \ 450 ret__ = raw_cpu_read(pcp); \
443 if (ret__ == (oval)) \ 451 if (ret__ == (oval)) \
444 __this_cpu_write(pcp, nval); \ 452 raw_cpu_write(pcp, nval); \
445 raw_local_irq_restore(flags); \ 453 raw_local_irq_restore(flags); \
446 ret__; \ 454 ret__; \
447}) 455})
@@ -476,7 +484,7 @@ do { \
476 int ret__; \ 484 int ret__; \
477 unsigned long flags; \ 485 unsigned long flags; \
478 raw_local_irq_save(flags); \ 486 raw_local_irq_save(flags); \
479 ret__ = __this_cpu_generic_cmpxchg_double(pcp1, pcp2, \ 487 ret__ = raw_cpu_generic_cmpxchg_double(pcp1, pcp2, \
480 oval1, oval2, nval1, nval2); \ 488 oval1, oval2, nval1, nval2); \
481 raw_local_irq_restore(flags); \ 489 raw_local_irq_restore(flags); \
482 ret__; \ 490 ret__; \
@@ -504,12 +512,8 @@ do { \
504#endif 512#endif
505 513
506/* 514/*
507 * Generic percpu operations for context that are safe from preemption/interrupts. 515 * Generic percpu operations for contexts where we do not want to do
508 * Either we do not care about races or the caller has the 516 * any checks for preemptiosn.
509 * responsibility of handling preemption/interrupt issues. Arch code can still
510 * override these instructions since the arch per cpu code may be more
511 * efficient and may actually get race freeness for free (that is the
512 * case for x86 for example).
513 * 517 *
514 * If there is no other protection through preempt disable and/or 518 * If there is no other protection through preempt disable and/or
515 * disabling interupts then one of these RMW operations can show unexpected 519 * disabling interupts then one of these RMW operations can show unexpected
@@ -517,211 +521,285 @@ do { \
517 * or an interrupt occurred and the same percpu variable was modified from 521 * or an interrupt occurred and the same percpu variable was modified from
518 * the interrupt context. 522 * the interrupt context.
519 */ 523 */
520#ifndef __this_cpu_read 524#ifndef raw_cpu_read
521# ifndef __this_cpu_read_1 525# ifndef raw_cpu_read_1
522# define __this_cpu_read_1(pcp) (*__this_cpu_ptr(&(pcp))) 526# define raw_cpu_read_1(pcp) (*raw_cpu_ptr(&(pcp)))
523# endif 527# endif
524# ifndef __this_cpu_read_2 528# ifndef raw_cpu_read_2
525# define __this_cpu_read_2(pcp) (*__this_cpu_ptr(&(pcp))) 529# define raw_cpu_read_2(pcp) (*raw_cpu_ptr(&(pcp)))
526# endif 530# endif
527# ifndef __this_cpu_read_4 531# ifndef raw_cpu_read_4
528# define __this_cpu_read_4(pcp) (*__this_cpu_ptr(&(pcp))) 532# define raw_cpu_read_4(pcp) (*raw_cpu_ptr(&(pcp)))
529# endif 533# endif
530# ifndef __this_cpu_read_8 534# ifndef raw_cpu_read_8
531# define __this_cpu_read_8(pcp) (*__this_cpu_ptr(&(pcp))) 535# define raw_cpu_read_8(pcp) (*raw_cpu_ptr(&(pcp)))
532# endif 536# endif
533# define __this_cpu_read(pcp) __pcpu_size_call_return(__this_cpu_read_, (pcp)) 537# define raw_cpu_read(pcp) __pcpu_size_call_return(raw_cpu_read_, (pcp))
534#endif 538#endif
535 539
536#define __this_cpu_generic_to_op(pcp, val, op) \ 540#define raw_cpu_generic_to_op(pcp, val, op) \
537do { \ 541do { \
538 *__this_cpu_ptr(&(pcp)) op val; \ 542 *raw_cpu_ptr(&(pcp)) op val; \
539} while (0) 543} while (0)
540 544
541#ifndef __this_cpu_write 545
542# ifndef __this_cpu_write_1 546#ifndef raw_cpu_write
543# define __this_cpu_write_1(pcp, val) __this_cpu_generic_to_op((pcp), (val), =) 547# ifndef raw_cpu_write_1
548# define raw_cpu_write_1(pcp, val) raw_cpu_generic_to_op((pcp), (val), =)
544# endif 549# endif
545# ifndef __this_cpu_write_2 550# ifndef raw_cpu_write_2
546# define __this_cpu_write_2(pcp, val) __this_cpu_generic_to_op((pcp), (val), =) 551# define raw_cpu_write_2(pcp, val) raw_cpu_generic_to_op((pcp), (val), =)
547# endif 552# endif
548# ifndef __this_cpu_write_4 553# ifndef raw_cpu_write_4
549# define __this_cpu_write_4(pcp, val) __this_cpu_generic_to_op((pcp), (val), =) 554# define raw_cpu_write_4(pcp, val) raw_cpu_generic_to_op((pcp), (val), =)
550# endif 555# endif
551# ifndef __this_cpu_write_8 556# ifndef raw_cpu_write_8
552# define __this_cpu_write_8(pcp, val) __this_cpu_generic_to_op((pcp), (val), =) 557# define raw_cpu_write_8(pcp, val) raw_cpu_generic_to_op((pcp), (val), =)
553# endif 558# endif
554# define __this_cpu_write(pcp, val) __pcpu_size_call(__this_cpu_write_, (pcp), (val)) 559# define raw_cpu_write(pcp, val) __pcpu_size_call(raw_cpu_write_, (pcp), (val))
555#endif 560#endif
556 561
557#ifndef __this_cpu_add 562#ifndef raw_cpu_add
558# ifndef __this_cpu_add_1 563# ifndef raw_cpu_add_1
559# define __this_cpu_add_1(pcp, val) __this_cpu_generic_to_op((pcp), (val), +=) 564# define raw_cpu_add_1(pcp, val) raw_cpu_generic_to_op((pcp), (val), +=)
560# endif 565# endif
561# ifndef __this_cpu_add_2 566# ifndef raw_cpu_add_2
562# define __this_cpu_add_2(pcp, val) __this_cpu_generic_to_op((pcp), (val), +=) 567# define raw_cpu_add_2(pcp, val) raw_cpu_generic_to_op((pcp), (val), +=)
563# endif 568# endif
564# ifndef __this_cpu_add_4 569# ifndef raw_cpu_add_4
565# define __this_cpu_add_4(pcp, val) __this_cpu_generic_to_op((pcp), (val), +=) 570# define raw_cpu_add_4(pcp, val) raw_cpu_generic_to_op((pcp), (val), +=)
566# endif 571# endif
567# ifndef __this_cpu_add_8 572# ifndef raw_cpu_add_8
568# define __this_cpu_add_8(pcp, val) __this_cpu_generic_to_op((pcp), (val), +=) 573# define raw_cpu_add_8(pcp, val) raw_cpu_generic_to_op((pcp), (val), +=)
569# endif 574# endif
570# define __this_cpu_add(pcp, val) __pcpu_size_call(__this_cpu_add_, (pcp), (val)) 575# define raw_cpu_add(pcp, val) __pcpu_size_call(raw_cpu_add_, (pcp), (val))
571#endif 576#endif
572 577
573#ifndef __this_cpu_sub 578#ifndef raw_cpu_sub
574# define __this_cpu_sub(pcp, val) __this_cpu_add((pcp), -(typeof(pcp))(val)) 579# define raw_cpu_sub(pcp, val) raw_cpu_add((pcp), -(val))
575#endif 580#endif
576 581
577#ifndef __this_cpu_inc 582#ifndef raw_cpu_inc
578# define __this_cpu_inc(pcp) __this_cpu_add((pcp), 1) 583# define raw_cpu_inc(pcp) raw_cpu_add((pcp), 1)
579#endif 584#endif
580 585
581#ifndef __this_cpu_dec 586#ifndef raw_cpu_dec
582# define __this_cpu_dec(pcp) __this_cpu_sub((pcp), 1) 587# define raw_cpu_dec(pcp) raw_cpu_sub((pcp), 1)
583#endif 588#endif
584 589
585#ifndef __this_cpu_and 590#ifndef raw_cpu_and
586# ifndef __this_cpu_and_1 591# ifndef raw_cpu_and_1
587# define __this_cpu_and_1(pcp, val) __this_cpu_generic_to_op((pcp), (val), &=) 592# define raw_cpu_and_1(pcp, val) raw_cpu_generic_to_op((pcp), (val), &=)
588# endif 593# endif
589# ifndef __this_cpu_and_2 594# ifndef raw_cpu_and_2
590# define __this_cpu_and_2(pcp, val) __this_cpu_generic_to_op((pcp), (val), &=) 595# define raw_cpu_and_2(pcp, val) raw_cpu_generic_to_op((pcp), (val), &=)
591# endif 596# endif
592# ifndef __this_cpu_and_4 597# ifndef raw_cpu_and_4
593# define __this_cpu_and_4(pcp, val) __this_cpu_generic_to_op((pcp), (val), &=) 598# define raw_cpu_and_4(pcp, val) raw_cpu_generic_to_op((pcp), (val), &=)
594# endif 599# endif
595# ifndef __this_cpu_and_8 600# ifndef raw_cpu_and_8
596# define __this_cpu_and_8(pcp, val) __this_cpu_generic_to_op((pcp), (val), &=) 601# define raw_cpu_and_8(pcp, val) raw_cpu_generic_to_op((pcp), (val), &=)
597# endif 602# endif
598# define __this_cpu_and(pcp, val) __pcpu_size_call(__this_cpu_and_, (pcp), (val)) 603# define raw_cpu_and(pcp, val) __pcpu_size_call(raw_cpu_and_, (pcp), (val))
599#endif 604#endif
600 605
601#ifndef __this_cpu_or 606#ifndef raw_cpu_or
602# ifndef __this_cpu_or_1 607# ifndef raw_cpu_or_1
603# define __this_cpu_or_1(pcp, val) __this_cpu_generic_to_op((pcp), (val), |=) 608# define raw_cpu_or_1(pcp, val) raw_cpu_generic_to_op((pcp), (val), |=)
604# endif 609# endif
605# ifndef __this_cpu_or_2 610# ifndef raw_cpu_or_2
606# define __this_cpu_or_2(pcp, val) __this_cpu_generic_to_op((pcp), (val), |=) 611# define raw_cpu_or_2(pcp, val) raw_cpu_generic_to_op((pcp), (val), |=)
607# endif 612# endif
608# ifndef __this_cpu_or_4 613# ifndef raw_cpu_or_4
609# define __this_cpu_or_4(pcp, val) __this_cpu_generic_to_op((pcp), (val), |=) 614# define raw_cpu_or_4(pcp, val) raw_cpu_generic_to_op((pcp), (val), |=)
610# endif 615# endif
611# ifndef __this_cpu_or_8 616# ifndef raw_cpu_or_8
612# define __this_cpu_or_8(pcp, val) __this_cpu_generic_to_op((pcp), (val), |=) 617# define raw_cpu_or_8(pcp, val) raw_cpu_generic_to_op((pcp), (val), |=)
613# endif 618# endif
614# define __this_cpu_or(pcp, val) __pcpu_size_call(__this_cpu_or_, (pcp), (val)) 619# define raw_cpu_or(pcp, val) __pcpu_size_call(raw_cpu_or_, (pcp), (val))
615#endif 620#endif
616 621
617#define __this_cpu_generic_add_return(pcp, val) \ 622#define raw_cpu_generic_add_return(pcp, val) \
618({ \ 623({ \
619 __this_cpu_add(pcp, val); \ 624 raw_cpu_add(pcp, val); \
620 __this_cpu_read(pcp); \ 625 raw_cpu_read(pcp); \
621}) 626})
622 627
623#ifndef __this_cpu_add_return 628#ifndef raw_cpu_add_return
624# ifndef __this_cpu_add_return_1 629# ifndef raw_cpu_add_return_1
625# define __this_cpu_add_return_1(pcp, val) __this_cpu_generic_add_return(pcp, val) 630# define raw_cpu_add_return_1(pcp, val) raw_cpu_generic_add_return(pcp, val)
626# endif 631# endif
627# ifndef __this_cpu_add_return_2 632# ifndef raw_cpu_add_return_2
628# define __this_cpu_add_return_2(pcp, val) __this_cpu_generic_add_return(pcp, val) 633# define raw_cpu_add_return_2(pcp, val) raw_cpu_generic_add_return(pcp, val)
629# endif 634# endif
630# ifndef __this_cpu_add_return_4 635# ifndef raw_cpu_add_return_4
631# define __this_cpu_add_return_4(pcp, val) __this_cpu_generic_add_return(pcp, val) 636# define raw_cpu_add_return_4(pcp, val) raw_cpu_generic_add_return(pcp, val)
632# endif 637# endif
633# ifndef __this_cpu_add_return_8 638# ifndef raw_cpu_add_return_8
634# define __this_cpu_add_return_8(pcp, val) __this_cpu_generic_add_return(pcp, val) 639# define raw_cpu_add_return_8(pcp, val) raw_cpu_generic_add_return(pcp, val)
635# endif 640# endif
636# define __this_cpu_add_return(pcp, val) \ 641# define raw_cpu_add_return(pcp, val) \
637 __pcpu_size_call_return2(__this_cpu_add_return_, pcp, val) 642 __pcpu_size_call_return2(raw_add_return_, pcp, val)
638#endif 643#endif
639 644
640#define __this_cpu_sub_return(pcp, val) __this_cpu_add_return(pcp, -(typeof(pcp))(val)) 645#define raw_cpu_sub_return(pcp, val) raw_cpu_add_return(pcp, -(typeof(pcp))(val))
641#define __this_cpu_inc_return(pcp) __this_cpu_add_return(pcp, 1) 646#define raw_cpu_inc_return(pcp) raw_cpu_add_return(pcp, 1)
642#define __this_cpu_dec_return(pcp) __this_cpu_add_return(pcp, -1) 647#define raw_cpu_dec_return(pcp) raw_cpu_add_return(pcp, -1)
643 648
644#define __this_cpu_generic_xchg(pcp, nval) \ 649#define raw_cpu_generic_xchg(pcp, nval) \
645({ typeof(pcp) ret__; \ 650({ typeof(pcp) ret__; \
646 ret__ = __this_cpu_read(pcp); \ 651 ret__ = raw_cpu_read(pcp); \
647 __this_cpu_write(pcp, nval); \ 652 raw_cpu_write(pcp, nval); \
648 ret__; \ 653 ret__; \
649}) 654})
650 655
651#ifndef __this_cpu_xchg 656#ifndef raw_cpu_xchg
652# ifndef __this_cpu_xchg_1 657# ifndef raw_cpu_xchg_1
653# define __this_cpu_xchg_1(pcp, nval) __this_cpu_generic_xchg(pcp, nval) 658# define raw_cpu_xchg_1(pcp, nval) raw_cpu_generic_xchg(pcp, nval)
654# endif 659# endif
655# ifndef __this_cpu_xchg_2 660# ifndef raw_cpu_xchg_2
656# define __this_cpu_xchg_2(pcp, nval) __this_cpu_generic_xchg(pcp, nval) 661# define raw_cpu_xchg_2(pcp, nval) raw_cpu_generic_xchg(pcp, nval)
657# endif 662# endif
658# ifndef __this_cpu_xchg_4 663# ifndef raw_cpu_xchg_4
659# define __this_cpu_xchg_4(pcp, nval) __this_cpu_generic_xchg(pcp, nval) 664# define raw_cpu_xchg_4(pcp, nval) raw_cpu_generic_xchg(pcp, nval)
660# endif 665# endif
661# ifndef __this_cpu_xchg_8 666# ifndef raw_cpu_xchg_8
662# define __this_cpu_xchg_8(pcp, nval) __this_cpu_generic_xchg(pcp, nval) 667# define raw_cpu_xchg_8(pcp, nval) raw_cpu_generic_xchg(pcp, nval)
663# endif 668# endif
664# define __this_cpu_xchg(pcp, nval) \ 669# define raw_cpu_xchg(pcp, nval) \
665 __pcpu_size_call_return2(__this_cpu_xchg_, (pcp), nval) 670 __pcpu_size_call_return2(raw_cpu_xchg_, (pcp), nval)
666#endif 671#endif
667 672
668#define __this_cpu_generic_cmpxchg(pcp, oval, nval) \ 673#define raw_cpu_generic_cmpxchg(pcp, oval, nval) \
669({ \ 674({ \
670 typeof(pcp) ret__; \ 675 typeof(pcp) ret__; \
671 ret__ = __this_cpu_read(pcp); \ 676 ret__ = raw_cpu_read(pcp); \
672 if (ret__ == (oval)) \ 677 if (ret__ == (oval)) \
673 __this_cpu_write(pcp, nval); \ 678 raw_cpu_write(pcp, nval); \
674 ret__; \ 679 ret__; \
675}) 680})
676 681
677#ifndef __this_cpu_cmpxchg 682#ifndef raw_cpu_cmpxchg
678# ifndef __this_cpu_cmpxchg_1 683# ifndef raw_cpu_cmpxchg_1
679# define __this_cpu_cmpxchg_1(pcp, oval, nval) __this_cpu_generic_cmpxchg(pcp, oval, nval) 684# define raw_cpu_cmpxchg_1(pcp, oval, nval) raw_cpu_generic_cmpxchg(pcp, oval, nval)
680# endif 685# endif
681# ifndef __this_cpu_cmpxchg_2 686# ifndef raw_cpu_cmpxchg_2
682# define __this_cpu_cmpxchg_2(pcp, oval, nval) __this_cpu_generic_cmpxchg(pcp, oval, nval) 687# define raw_cpu_cmpxchg_2(pcp, oval, nval) raw_cpu_generic_cmpxchg(pcp, oval, nval)
683# endif 688# endif
684# ifndef __this_cpu_cmpxchg_4 689# ifndef raw_cpu_cmpxchg_4
685# define __this_cpu_cmpxchg_4(pcp, oval, nval) __this_cpu_generic_cmpxchg(pcp, oval, nval) 690# define raw_cpu_cmpxchg_4(pcp, oval, nval) raw_cpu_generic_cmpxchg(pcp, oval, nval)
686# endif 691# endif
687# ifndef __this_cpu_cmpxchg_8 692# ifndef raw_cpu_cmpxchg_8
688# define __this_cpu_cmpxchg_8(pcp, oval, nval) __this_cpu_generic_cmpxchg(pcp, oval, nval) 693# define raw_cpu_cmpxchg_8(pcp, oval, nval) raw_cpu_generic_cmpxchg(pcp, oval, nval)
689# endif 694# endif
690# define __this_cpu_cmpxchg(pcp, oval, nval) \ 695# define raw_cpu_cmpxchg(pcp, oval, nval) \
691 __pcpu_size_call_return2(__this_cpu_cmpxchg_, pcp, oval, nval) 696 __pcpu_size_call_return2(raw_cpu_cmpxchg_, pcp, oval, nval)
692#endif 697#endif
693 698
694#define __this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \ 699#define raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \
695({ \ 700({ \
696 int __ret = 0; \ 701 int __ret = 0; \
697 if (__this_cpu_read(pcp1) == (oval1) && \ 702 if (raw_cpu_read(pcp1) == (oval1) && \
698 __this_cpu_read(pcp2) == (oval2)) { \ 703 raw_cpu_read(pcp2) == (oval2)) { \
699 __this_cpu_write(pcp1, (nval1)); \ 704 raw_cpu_write(pcp1, (nval1)); \
700 __this_cpu_write(pcp2, (nval2)); \ 705 raw_cpu_write(pcp2, (nval2)); \
701 __ret = 1; \ 706 __ret = 1; \
702 } \ 707 } \
703 (__ret); \ 708 (__ret); \
704}) 709})
705 710
706#ifndef __this_cpu_cmpxchg_double 711#ifndef raw_cpu_cmpxchg_double
707# ifndef __this_cpu_cmpxchg_double_1 712# ifndef raw_cpu_cmpxchg_double_1
708# define __this_cpu_cmpxchg_double_1(pcp1, pcp2, oval1, oval2, nval1, nval2) \ 713# define raw_cpu_cmpxchg_double_1(pcp1, pcp2, oval1, oval2, nval1, nval2) \
709 __this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) 714 raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
710# endif 715# endif
711# ifndef __this_cpu_cmpxchg_double_2 716# ifndef raw_cpu_cmpxchg_double_2
712# define __this_cpu_cmpxchg_double_2(pcp1, pcp2, oval1, oval2, nval1, nval2) \ 717# define raw_cpu_cmpxchg_double_2(pcp1, pcp2, oval1, oval2, nval1, nval2) \
713 __this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) 718 raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
714# endif 719# endif
715# ifndef __this_cpu_cmpxchg_double_4 720# ifndef raw_cpu_cmpxchg_double_4
716# define __this_cpu_cmpxchg_double_4(pcp1, pcp2, oval1, oval2, nval1, nval2) \ 721# define raw_cpu_cmpxchg_double_4(pcp1, pcp2, oval1, oval2, nval1, nval2) \
717 __this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) 722 raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
718# endif 723# endif
719# ifndef __this_cpu_cmpxchg_double_8 724# ifndef raw_cpu_cmpxchg_double_8
720# define __this_cpu_cmpxchg_double_8(pcp1, pcp2, oval1, oval2, nval1, nval2) \ 725# define raw_cpu_cmpxchg_double_8(pcp1, pcp2, oval1, oval2, nval1, nval2) \
721 __this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) 726 raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
722# endif 727# endif
728# define raw_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \
729 __pcpu_double_call_return_bool(raw_cpu_cmpxchg_double_, (pcp1), (pcp2), (oval1), (oval2), (nval1), (nval2))
730#endif
731
732/*
733 * Generic percpu operations for context that are safe from preemption/interrupts.
734 */
735#ifndef __this_cpu_read
736# define __this_cpu_read(pcp) \
737 (__this_cpu_preempt_check("read"),__pcpu_size_call_return(raw_cpu_read_, (pcp)))
738#endif
739
740#ifndef __this_cpu_write
741# define __this_cpu_write(pcp, val) \
742do { __this_cpu_preempt_check("write"); \
743 __pcpu_size_call(raw_cpu_write_, (pcp), (val)); \
744} while (0)
745#endif
746
747#ifndef __this_cpu_add
748# define __this_cpu_add(pcp, val) \
749do { __this_cpu_preempt_check("add"); \
750 __pcpu_size_call(raw_cpu_add_, (pcp), (val)); \
751} while (0)
752#endif
753
754#ifndef __this_cpu_sub
755# define __this_cpu_sub(pcp, val) __this_cpu_add((pcp), -(typeof(pcp))(val))
756#endif
757
758#ifndef __this_cpu_inc
759# define __this_cpu_inc(pcp) __this_cpu_add((pcp), 1)
760#endif
761
762#ifndef __this_cpu_dec
763# define __this_cpu_dec(pcp) __this_cpu_sub((pcp), 1)
764#endif
765
766#ifndef __this_cpu_and
767# define __this_cpu_and(pcp, val) \
768do { __this_cpu_preempt_check("and"); \
769 __pcpu_size_call(raw_cpu_and_, (pcp), (val)); \
770} while (0)
771
772#endif
773
774#ifndef __this_cpu_or
775# define __this_cpu_or(pcp, val) \
776do { __this_cpu_preempt_check("or"); \
777 __pcpu_size_call(raw_cpu_or_, (pcp), (val)); \
778} while (0)
779#endif
780
781#ifndef __this_cpu_add_return
782# define __this_cpu_add_return(pcp, val) \
783 (__this_cpu_preempt_check("add_return"),__pcpu_size_call_return2(raw_cpu_add_return_, pcp, val))
784#endif
785
786#define __this_cpu_sub_return(pcp, val) __this_cpu_add_return(pcp, -(typeof(pcp))(val))
787#define __this_cpu_inc_return(pcp) __this_cpu_add_return(pcp, 1)
788#define __this_cpu_dec_return(pcp) __this_cpu_add_return(pcp, -1)
789
790#ifndef __this_cpu_xchg
791# define __this_cpu_xchg(pcp, nval) \
792 (__this_cpu_preempt_check("xchg"),__pcpu_size_call_return2(raw_cpu_xchg_, (pcp), nval))
793#endif
794
795#ifndef __this_cpu_cmpxchg
796# define __this_cpu_cmpxchg(pcp, oval, nval) \
797 (__this_cpu_preempt_check("cmpxchg"),__pcpu_size_call_return2(raw_cpu_cmpxchg_, pcp, oval, nval))
798#endif
799
800#ifndef __this_cpu_cmpxchg_double
723# define __this_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \ 801# define __this_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \
724 __pcpu_double_call_return_bool(__this_cpu_cmpxchg_double_, (pcp1), (pcp2), (oval1), (oval2), (nval1), (nval2)) 802 (__this_cpu_preempt_check("cmpxchg_double"),__pcpu_double_call_return_bool(raw_cpu_cmpxchg_double_, (pcp1), (pcp2), (oval1), (oval2), (nval1), (nval2)))
725#endif 803#endif
726 804
727#endif /* __LINUX_PERCPU_H */ 805#endif /* __LINUX_PERCPU_H */
diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h
index 201a69749659..56b7bc32db4f 100644
--- a/include/linux/res_counter.h
+++ b/include/linux/res_counter.h
@@ -104,15 +104,13 @@ void res_counter_init(struct res_counter *counter, struct res_counter *parent);
104 * units, e.g. numbers, bytes, Kbytes, etc 104 * units, e.g. numbers, bytes, Kbytes, etc
105 * 105 *
106 * returns 0 on success and <0 if the counter->usage will exceed the 106 * returns 0 on success and <0 if the counter->usage will exceed the
107 * counter->limit _locked call expects the counter->lock to be taken 107 * counter->limit
108 * 108 *
109 * charge_nofail works the same, except that it charges the resource 109 * charge_nofail works the same, except that it charges the resource
110 * counter unconditionally, and returns < 0 if the after the current 110 * counter unconditionally, and returns < 0 if the after the current
111 * charge we are over limit. 111 * charge we are over limit.
112 */ 112 */
113 113
114int __must_check res_counter_charge_locked(struct res_counter *counter,
115 unsigned long val, bool force);
116int __must_check res_counter_charge(struct res_counter *counter, 114int __must_check res_counter_charge(struct res_counter *counter,
117 unsigned long val, struct res_counter **limit_fail_at); 115 unsigned long val, struct res_counter **limit_fail_at);
118int res_counter_charge_nofail(struct res_counter *counter, 116int res_counter_charge_nofail(struct res_counter *counter,
@@ -125,12 +123,10 @@ int res_counter_charge_nofail(struct res_counter *counter,
125 * @val: the amount of the resource 123 * @val: the amount of the resource
126 * 124 *
127 * these calls check for usage underflow and show a warning on the console 125 * these calls check for usage underflow and show a warning on the console
128 * _locked call expects the counter->lock to be taken
129 * 126 *
130 * returns the total charges still present in @counter. 127 * returns the total charges still present in @counter.
131 */ 128 */
132 129
133u64 res_counter_uncharge_locked(struct res_counter *counter, unsigned long val);
134u64 res_counter_uncharge(struct res_counter *counter, unsigned long val); 130u64 res_counter_uncharge(struct res_counter *counter, unsigned long val);
135 131
136u64 res_counter_uncharge_until(struct res_counter *counter, 132u64 res_counter_uncharge_until(struct res_counter *counter,
diff --git a/include/linux/rio.h b/include/linux/rio.h
index b71d5738e683..6bda06f21930 100644
--- a/include/linux/rio.h
+++ b/include/linux/rio.h
@@ -83,7 +83,7 @@
83#define RIO_CTAG_UDEVID 0x0001ffff /* Unique device identifier */ 83#define RIO_CTAG_UDEVID 0x0001ffff /* Unique device identifier */
84 84
85extern struct bus_type rio_bus_type; 85extern struct bus_type rio_bus_type;
86extern struct device rio_bus; 86extern struct class rio_mport_class;
87 87
88struct rio_mport; 88struct rio_mport;
89struct rio_dev; 89struct rio_dev;
@@ -201,6 +201,7 @@ struct rio_dev {
201#define rio_dev_f(n) list_entry(n, struct rio_dev, net_list) 201#define rio_dev_f(n) list_entry(n, struct rio_dev, net_list)
202#define to_rio_dev(n) container_of(n, struct rio_dev, dev) 202#define to_rio_dev(n) container_of(n, struct rio_dev, dev)
203#define sw_to_rio_dev(n) container_of(n, struct rio_dev, rswitch[0]) 203#define sw_to_rio_dev(n) container_of(n, struct rio_dev, rswitch[0])
204#define to_rio_mport(n) container_of(n, struct rio_mport, dev)
204 205
205/** 206/**
206 * struct rio_msg - RIO message event 207 * struct rio_msg - RIO message event
@@ -248,6 +249,7 @@ enum rio_phy_type {
248 * @phy_type: RapidIO phy type 249 * @phy_type: RapidIO phy type
249 * @phys_efptr: RIO port extended features pointer 250 * @phys_efptr: RIO port extended features pointer
250 * @name: Port name string 251 * @name: Port name string
252 * @dev: device structure associated with an mport
251 * @priv: Master port private data 253 * @priv: Master port private data
252 * @dma: DMA device associated with mport 254 * @dma: DMA device associated with mport
253 * @nscan: RapidIO network enumeration/discovery operations 255 * @nscan: RapidIO network enumeration/discovery operations
@@ -272,6 +274,7 @@ struct rio_mport {
272 enum rio_phy_type phy_type; /* RapidIO phy type */ 274 enum rio_phy_type phy_type; /* RapidIO phy type */
273 u32 phys_efptr; 275 u32 phys_efptr;
274 unsigned char name[RIO_MAX_MPORT_NAME]; 276 unsigned char name[RIO_MAX_MPORT_NAME];
277 struct device dev;
275 void *priv; /* Master port private data */ 278 void *priv; /* Master port private data */
276#ifdef CONFIG_RAPIDIO_DMA_ENGINE 279#ifdef CONFIG_RAPIDIO_DMA_ENGINE
277 struct dma_device dma; 280 struct dma_device dma;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 7cb07fd26680..075b3056c0c0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -132,6 +132,10 @@ struct perf_event_context;
132struct blk_plug; 132struct blk_plug;
133struct filename; 133struct filename;
134 134
135#define VMACACHE_BITS 2
136#define VMACACHE_SIZE (1U << VMACACHE_BITS)
137#define VMACACHE_MASK (VMACACHE_SIZE - 1)
138
135/* 139/*
136 * List of flags we want to share for kernel threads, 140 * List of flags we want to share for kernel threads,
137 * if only because they are not used by them anyway. 141 * if only because they are not used by them anyway.
@@ -206,8 +210,9 @@ print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq);
206#define __TASK_STOPPED 4 210#define __TASK_STOPPED 4
207#define __TASK_TRACED 8 211#define __TASK_TRACED 8
208/* in tsk->exit_state */ 212/* in tsk->exit_state */
209#define EXIT_ZOMBIE 16 213#define EXIT_DEAD 16
210#define EXIT_DEAD 32 214#define EXIT_ZOMBIE 32
215#define EXIT_TRACE (EXIT_ZOMBIE | EXIT_DEAD)
211/* in tsk->state again */ 216/* in tsk->state again */
212#define TASK_DEAD 64 217#define TASK_DEAD 64
213#define TASK_WAKEKILL 128 218#define TASK_WAKEKILL 128
@@ -1235,6 +1240,9 @@ struct task_struct {
1235#ifdef CONFIG_COMPAT_BRK 1240#ifdef CONFIG_COMPAT_BRK
1236 unsigned brk_randomized:1; 1241 unsigned brk_randomized:1;
1237#endif 1242#endif
1243 /* per-thread vma caching */
1244 u32 vmacache_seqnum;
1245 struct vm_area_struct *vmacache[VMACACHE_SIZE];
1238#if defined(SPLIT_RSS_COUNTING) 1246#if defined(SPLIT_RSS_COUNTING)
1239 struct task_rss_stat rss_stat; 1247 struct task_rss_stat rss_stat;
1240#endif 1248#endif
@@ -1844,7 +1852,6 @@ extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut,
1844#define PF_SPREAD_SLAB 0x02000000 /* Spread some slab caches over cpuset */ 1852#define PF_SPREAD_SLAB 0x02000000 /* Spread some slab caches over cpuset */
1845#define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_allowed */ 1853#define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_allowed */
1846#define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */ 1854#define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */
1847#define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */
1848#define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */ 1855#define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */
1849#define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezable */ 1856#define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezable */
1850#define PF_SUSPEND_TASK 0x80000000 /* this thread called freeze_processes and should not be frozen */ 1857#define PF_SUSPEND_TASK 0x80000000 /* this thread called freeze_processes and should not be frozen */
@@ -2351,7 +2358,7 @@ extern long do_fork(unsigned long, unsigned long, unsigned long, int __user *, i
2351struct task_struct *fork_idle(int); 2358struct task_struct *fork_idle(int);
2352extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags); 2359extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
2353 2360
2354extern void set_task_comm(struct task_struct *tsk, char *from); 2361extern void set_task_comm(struct task_struct *tsk, const char *from);
2355extern char *get_task_comm(char *to, struct task_struct *tsk); 2362extern char *get_task_comm(char *to, struct task_struct *tsk);
2356 2363
2357#ifdef CONFIG_SMP 2364#ifdef CONFIG_SMP
diff --git a/include/linux/slab.h b/include/linux/slab.h
index b5b2df60299e..3dd389aa91c7 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -115,9 +115,9 @@ int slab_is_available(void);
115struct kmem_cache *kmem_cache_create(const char *, size_t, size_t, 115struct kmem_cache *kmem_cache_create(const char *, size_t, size_t,
116 unsigned long, 116 unsigned long,
117 void (*)(void *)); 117 void (*)(void *));
118struct kmem_cache * 118#ifdef CONFIG_MEMCG_KMEM
119kmem_cache_create_memcg(struct mem_cgroup *, const char *, size_t, size_t, 119void kmem_cache_create_memcg(struct mem_cgroup *, struct kmem_cache *);
120 unsigned long, void (*)(void *), struct kmem_cache *); 120#endif
121void kmem_cache_destroy(struct kmem_cache *); 121void kmem_cache_destroy(struct kmem_cache *);
122int kmem_cache_shrink(struct kmem_cache *); 122int kmem_cache_shrink(struct kmem_cache *);
123void kmem_cache_free(struct kmem_cache *, void *); 123void kmem_cache_free(struct kmem_cache *, void *);
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index f56bfa9e4526..f2f7398848cf 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -87,6 +87,9 @@ struct kmem_cache {
87#ifdef CONFIG_MEMCG_KMEM 87#ifdef CONFIG_MEMCG_KMEM
88 struct memcg_cache_params *memcg_params; 88 struct memcg_cache_params *memcg_params;
89 int max_attr_size; /* for propagation, maximum size of a stored attr */ 89 int max_attr_size; /* for propagation, maximum size of a stored attr */
90#ifdef CONFIG_SYSFS
91 struct kset *memcg_kset;
92#endif
90#endif 93#endif
91 94
92#ifdef CONFIG_NUMA 95#ifdef CONFIG_NUMA
diff --git a/include/linux/topology.h b/include/linux/topology.h
index 12ae6ce997d6..7062330a1329 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -188,7 +188,7 @@ DECLARE_PER_CPU(int, numa_node);
188/* Returns the number of the current Node. */ 188/* Returns the number of the current Node. */
189static inline int numa_node_id(void) 189static inline int numa_node_id(void)
190{ 190{
191 return __this_cpu_read(numa_node); 191 return raw_cpu_read(numa_node);
192} 192}
193#endif 193#endif
194 194
@@ -245,7 +245,7 @@ static inline void set_numa_mem(int node)
245/* Returns the number of the nearest Node with memory */ 245/* Returns the number of the nearest Node with memory */
246static inline int numa_mem_id(void) 246static inline int numa_mem_id(void)
247{ 247{
248 return __this_cpu_read(_numa_mem_); 248 return raw_cpu_read(_numa_mem_);
249} 249}
250#endif 250#endif
251 251
diff --git a/include/linux/vmacache.h b/include/linux/vmacache.h
new file mode 100644
index 000000000000..c3fa0fd43949
--- /dev/null
+++ b/include/linux/vmacache.h
@@ -0,0 +1,38 @@
1#ifndef __LINUX_VMACACHE_H
2#define __LINUX_VMACACHE_H
3
4#include <linux/sched.h>
5#include <linux/mm.h>
6
7/*
8 * Hash based on the page number. Provides a good hit rate for
9 * workloads with good locality and those with random accesses as well.
10 */
11#define VMACACHE_HASH(addr) ((addr >> PAGE_SHIFT) & VMACACHE_MASK)
12
13static inline void vmacache_flush(struct task_struct *tsk)
14{
15 memset(tsk->vmacache, 0, sizeof(tsk->vmacache));
16}
17
18extern void vmacache_flush_all(struct mm_struct *mm);
19extern void vmacache_update(unsigned long addr, struct vm_area_struct *newvma);
20extern struct vm_area_struct *vmacache_find(struct mm_struct *mm,
21 unsigned long addr);
22
23#ifndef CONFIG_MMU
24extern struct vm_area_struct *vmacache_find_exact(struct mm_struct *mm,
25 unsigned long start,
26 unsigned long end);
27#endif
28
29static inline void vmacache_invalidate(struct mm_struct *mm)
30{
31 mm->vmacache_seqnum++;
32
33 /* deal with overflows */
34 if (unlikely(mm->vmacache_seqnum == 0))
35 vmacache_flush_all(mm);
36}
37
38#endif /* __LINUX_VMACACHE_H */
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index ea4476157e00..45c9cd1daf7a 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -27,9 +27,13 @@ struct vm_event_state {
27 27
28DECLARE_PER_CPU(struct vm_event_state, vm_event_states); 28DECLARE_PER_CPU(struct vm_event_state, vm_event_states);
29 29
30/*
31 * vm counters are allowed to be racy. Use raw_cpu_ops to avoid the
32 * local_irq_disable overhead.
33 */
30static inline void __count_vm_event(enum vm_event_item item) 34static inline void __count_vm_event(enum vm_event_item item)
31{ 35{
32 __this_cpu_inc(vm_event_states.event[item]); 36 raw_cpu_inc(vm_event_states.event[item]);
33} 37}
34 38
35static inline void count_vm_event(enum vm_event_item item) 39static inline void count_vm_event(enum vm_event_item item)
@@ -39,7 +43,7 @@ static inline void count_vm_event(enum vm_event_item item)
39 43
40static inline void __count_vm_events(enum vm_event_item item, long delta) 44static inline void __count_vm_events(enum vm_event_item item, long delta)
41{ 45{
42 __this_cpu_add(vm_event_states.event[item], delta); 46 raw_cpu_add(vm_event_states.event[item], delta);
43} 47}
44 48
45static inline void count_vm_events(enum vm_event_item item, long delta) 49static inline void count_vm_events(enum vm_event_item item, long delta)
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 021b8a319b9e..5777c13849ba 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -178,7 +178,7 @@ int write_cache_pages(struct address_space *mapping,
178 struct writeback_control *wbc, writepage_t writepage, 178 struct writeback_control *wbc, writepage_t writepage,
179 void *data); 179 void *data);
180int do_writepages(struct address_space *mapping, struct writeback_control *wbc); 180int do_writepages(struct address_space *mapping, struct writeback_control *wbc);
181void set_page_dirty_balance(struct page *page, int page_mkwrite); 181void set_page_dirty_balance(struct page *page);
182void writeback_set_ratelimit(void); 182void writeback_set_ratelimit(void);
183void tag_pages_for_writeback(struct address_space *mapping, 183void tag_pages_for_writeback(struct address_space *mapping,
184 pgoff_t start, pgoff_t end); 184 pgoff_t start, pgoff_t end);
diff --git a/include/trace/events/task.h b/include/trace/events/task.h
index 102a646e1996..dee3bb1d5a6b 100644
--- a/include/trace/events/task.h
+++ b/include/trace/events/task.h
@@ -32,7 +32,7 @@ TRACE_EVENT(task_newtask,
32 32
33TRACE_EVENT(task_rename, 33TRACE_EVENT(task_rename,
34 34
35 TP_PROTO(struct task_struct *task, char *comm), 35 TP_PROTO(struct task_struct *task, const char *comm),
36 36
37 TP_ARGS(task, comm), 37 TP_ARGS(task, comm),
38 38
diff --git a/include/uapi/asm-generic/mman-common.h b/include/uapi/asm-generic/mman-common.h
index 4164529a94f9..ddc3b36f1046 100644
--- a/include/uapi/asm-generic/mman-common.h
+++ b/include/uapi/asm-generic/mman-common.h
@@ -50,7 +50,7 @@
50 50
51#define MADV_DONTDUMP 16 /* Explicity exclude from the core dump, 51#define MADV_DONTDUMP 16 /* Explicity exclude from the core dump,
52 overrides the coredump filter bits */ 52 overrides the coredump filter bits */
53#define MADV_DODUMP 17 /* Clear the MADV_NODUMP flag */ 53#define MADV_DODUMP 17 /* Clear the MADV_DONTDUMP flag */
54 54
55/* compatibility flags */ 55/* compatibility flags */
56#define MAP_FILE 0 56#define MAP_FILE 0
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index 289760f424aa..58afc04c107e 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -149,4 +149,7 @@
149 149
150#define PR_GET_TID_ADDRESS 40 150#define PR_GET_TID_ADDRESS 40
151 151
152#define PR_SET_THP_DISABLE 41
153#define PR_GET_THP_DISABLE 42
154
152#endif /* _LINUX_PRCTL_H */ 155#endif /* _LINUX_PRCTL_H */
diff --git a/init/Kconfig b/init/Kconfig
index 8851c6417880..427ba60d638f 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1483,6 +1483,7 @@ config PCI_QUIRKS
1483 1483
1484config EMBEDDED 1484config EMBEDDED
1485 bool "Embedded system" 1485 bool "Embedded system"
1486 option allnoconfig_y
1486 select EXPERT 1487 select EXPERT
1487 help 1488 help
1488 This option should be enabled if compiling the kernel for 1489 This option should be enabled if compiling the kernel for
diff --git a/init/initramfs.c b/init/initramfs.c
index 93b61396756b..a8497fab1c3d 100644
--- a/init/initramfs.c
+++ b/init/initramfs.c
@@ -455,6 +455,7 @@ static char * __init unpack_to_rootfs(char *buf, unsigned len)
455 } 455 }
456 this_header = 0; 456 this_header = 0;
457 decompress = decompress_method(buf, len, &compress_name); 457 decompress = decompress_method(buf, len, &compress_name);
458 pr_debug("Detected %s compressed data\n", compress_name);
458 if (decompress) { 459 if (decompress) {
459 res = decompress(buf, len, NULL, flush_buffer, NULL, 460 res = decompress(buf, len, NULL, flush_buffer, NULL,
460 &my_inptr, error); 461 &my_inptr, error);
diff --git a/ipc/compat.c b/ipc/compat.c
index a4695ada3275..45d035d4cedc 100644
--- a/ipc/compat.c
+++ b/ipc/compat.c
@@ -113,9 +113,6 @@ struct compat_shm_info {
113 compat_ulong_t swap_attempts, swap_successes; 113 compat_ulong_t swap_attempts, swap_successes;
114}; 114};
115 115
116extern int sem_ctls[];
117#define sc_semopm (sem_ctls[2])
118
119static inline int compat_ipc_parse_version(int *cmd) 116static inline int compat_ipc_parse_version(int *cmd)
120{ 117{
121#ifdef CONFIG_ARCH_WANT_COMPAT_IPC_PARSE_VERSION 118#ifdef CONFIG_ARCH_WANT_COMPAT_IPC_PARSE_VERSION
diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c
index 17028648cfeb..998d31b230f1 100644
--- a/ipc/ipc_sysctl.c
+++ b/ipc/ipc_sysctl.c
@@ -281,4 +281,4 @@ static int __init ipc_sysctl_init(void)
281 return 0; 281 return 0;
282} 282}
283 283
284__initcall(ipc_sysctl_init); 284device_initcall(ipc_sysctl_init);
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index c3b31179122c..4fcf39af1776 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -1459,4 +1459,4 @@ out_sysctl:
1459 return error; 1459 return error;
1460} 1460}
1461 1461
1462__initcall(init_mqueue_fs); 1462device_initcall(init_mqueue_fs);
diff --git a/ipc/util.c b/ipc/util.c
index e1b4c6db8aa0..2eb0d1eaa312 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -128,7 +128,7 @@ static int __init ipc_init(void)
128 register_ipcns_notifier(&init_ipc_ns); 128 register_ipcns_notifier(&init_ipc_ns);
129 return 0; 129 return 0;
130} 130}
131__initcall(ipc_init); 131device_initcall(ipc_init);
132 132
133/** 133/**
134 * ipc_init_ids - initialise ipc identifiers 134 * ipc_init_ids - initialise ipc identifiers
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index 99982a70ddad..2956c8da1605 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -49,6 +49,7 @@
49#include <linux/pid.h> 49#include <linux/pid.h>
50#include <linux/smp.h> 50#include <linux/smp.h>
51#include <linux/mm.h> 51#include <linux/mm.h>
52#include <linux/vmacache.h>
52#include <linux/rcupdate.h> 53#include <linux/rcupdate.h>
53 54
54#include <asm/cacheflush.h> 55#include <asm/cacheflush.h>
@@ -224,10 +225,17 @@ static void kgdb_flush_swbreak_addr(unsigned long addr)
224 if (!CACHE_FLUSH_IS_SAFE) 225 if (!CACHE_FLUSH_IS_SAFE)
225 return; 226 return;
226 227
227 if (current->mm && current->mm->mmap_cache) { 228 if (current->mm) {
228 flush_cache_range(current->mm->mmap_cache, 229 int i;
229 addr, addr + BREAK_INSTR_SIZE); 230
231 for (i = 0; i < VMACACHE_SIZE; i++) {
232 if (!current->vmacache[i])
233 continue;
234 flush_cache_range(current->vmacache[i],
235 addr, addr + BREAK_INSTR_SIZE);
236 }
230 } 237 }
238
231 /* Force flush instruction cache if it was outside the mm */ 239 /* Force flush instruction cache if it was outside the mm */
232 flush_icache_range(addr, addr + BREAK_INSTR_SIZE); 240 flush_icache_range(addr, addr + BREAK_INSTR_SIZE);
233} 241}
diff --git a/kernel/exit.c b/kernel/exit.c
index 6480d1c85d7a..6ed6a1d552b5 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -570,7 +570,7 @@ static void reparent_leader(struct task_struct *father, struct task_struct *p,
570 if (same_thread_group(p->real_parent, father)) 570 if (same_thread_group(p->real_parent, father))
571 return; 571 return;
572 572
573 /* We don't want people slaying init. */ 573 /* We don't want people slaying init. */
574 p->exit_signal = SIGCHLD; 574 p->exit_signal = SIGCHLD;
575 575
576 /* If it has exited notify the new parent about this child's death. */ 576 /* If it has exited notify the new parent about this child's death. */
@@ -784,9 +784,10 @@ void do_exit(long code)
784 exit_shm(tsk); 784 exit_shm(tsk);
785 exit_files(tsk); 785 exit_files(tsk);
786 exit_fs(tsk); 786 exit_fs(tsk);
787 if (group_dead)
788 disassociate_ctty(1);
787 exit_task_namespaces(tsk); 789 exit_task_namespaces(tsk);
788 exit_task_work(tsk); 790 exit_task_work(tsk);
789 check_stack_usage();
790 exit_thread(); 791 exit_thread();
791 792
792 /* 793 /*
@@ -799,19 +800,15 @@ void do_exit(long code)
799 800
800 cgroup_exit(tsk); 801 cgroup_exit(tsk);
801 802
802 if (group_dead)
803 disassociate_ctty(1);
804
805 module_put(task_thread_info(tsk)->exec_domain->module); 803 module_put(task_thread_info(tsk)->exec_domain->module);
806 804
807 proc_exit_connector(tsk);
808
809 /* 805 /*
810 * FIXME: do that only when needed, using sched_exit tracepoint 806 * FIXME: do that only when needed, using sched_exit tracepoint
811 */ 807 */
812 flush_ptrace_hw_breakpoint(tsk); 808 flush_ptrace_hw_breakpoint(tsk);
813 809
814 exit_notify(tsk, group_dead); 810 exit_notify(tsk, group_dead);
811 proc_exit_connector(tsk);
815#ifdef CONFIG_NUMA 812#ifdef CONFIG_NUMA
816 task_lock(tsk); 813 task_lock(tsk);
817 mpol_put(tsk->mempolicy); 814 mpol_put(tsk->mempolicy);
@@ -844,6 +841,7 @@ void do_exit(long code)
844 841
845 validate_creds_for_do_exit(tsk); 842 validate_creds_for_do_exit(tsk);
846 843
844 check_stack_usage();
847 preempt_disable(); 845 preempt_disable();
848 if (tsk->nr_dirtied) 846 if (tsk->nr_dirtied)
849 __this_cpu_add(dirty_throttle_leaks, tsk->nr_dirtied); 847 __this_cpu_add(dirty_throttle_leaks, tsk->nr_dirtied);
@@ -1038,17 +1036,13 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
1038 return wait_noreap_copyout(wo, p, pid, uid, why, status); 1036 return wait_noreap_copyout(wo, p, pid, uid, why, status);
1039 } 1037 }
1040 1038
1039 traced = ptrace_reparented(p);
1041 /* 1040 /*
1042 * Try to move the task's state to DEAD 1041 * Move the task's state to DEAD/TRACE, only one thread can do this.
1043 * only one thread is allowed to do this:
1044 */ 1042 */
1045 state = xchg(&p->exit_state, EXIT_DEAD); 1043 state = traced && thread_group_leader(p) ? EXIT_TRACE : EXIT_DEAD;
1046 if (state != EXIT_ZOMBIE) { 1044 if (cmpxchg(&p->exit_state, EXIT_ZOMBIE, state) != EXIT_ZOMBIE)
1047 BUG_ON(state != EXIT_DEAD);
1048 return 0; 1045 return 0;
1049 }
1050
1051 traced = ptrace_reparented(p);
1052 /* 1046 /*
1053 * It can be ptraced but not reparented, check 1047 * It can be ptraced but not reparented, check
1054 * thread_group_leader() to filter out sub-threads. 1048 * thread_group_leader() to filter out sub-threads.
@@ -1109,7 +1103,7 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
1109 1103
1110 /* 1104 /*
1111 * Now we are sure this task is interesting, and no other 1105 * Now we are sure this task is interesting, and no other
1112 * thread can reap it because we set its state to EXIT_DEAD. 1106 * thread can reap it because we its state == DEAD/TRACE.
1113 */ 1107 */
1114 read_unlock(&tasklist_lock); 1108 read_unlock(&tasklist_lock);
1115 1109
@@ -1146,22 +1140,19 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
1146 if (!retval) 1140 if (!retval)
1147 retval = pid; 1141 retval = pid;
1148 1142
1149 if (traced) { 1143 if (state == EXIT_TRACE) {
1150 write_lock_irq(&tasklist_lock); 1144 write_lock_irq(&tasklist_lock);
1151 /* We dropped tasklist, ptracer could die and untrace */ 1145 /* We dropped tasklist, ptracer could die and untrace */
1152 ptrace_unlink(p); 1146 ptrace_unlink(p);
1153 /* 1147
1154 * If this is not a sub-thread, notify the parent. 1148 /* If parent wants a zombie, don't release it now */
1155 * If parent wants a zombie, don't release it now. 1149 state = EXIT_ZOMBIE;
1156 */ 1150 if (do_notify_parent(p, p->exit_signal))
1157 if (thread_group_leader(p) && 1151 state = EXIT_DEAD;
1158 !do_notify_parent(p, p->exit_signal)) { 1152 p->exit_state = state;
1159 p->exit_state = EXIT_ZOMBIE;
1160 p = NULL;
1161 }
1162 write_unlock_irq(&tasklist_lock); 1153 write_unlock_irq(&tasklist_lock);
1163 } 1154 }
1164 if (p != NULL) 1155 if (state == EXIT_DEAD)
1165 release_task(p); 1156 release_task(p);
1166 1157
1167 return retval; 1158 return retval;
@@ -1338,7 +1329,12 @@ static int wait_task_continued(struct wait_opts *wo, struct task_struct *p)
1338static int wait_consider_task(struct wait_opts *wo, int ptrace, 1329static int wait_consider_task(struct wait_opts *wo, int ptrace,
1339 struct task_struct *p) 1330 struct task_struct *p)
1340{ 1331{
1341 int ret = eligible_child(wo, p); 1332 int ret;
1333
1334 if (unlikely(p->exit_state == EXIT_DEAD))
1335 return 0;
1336
1337 ret = eligible_child(wo, p);
1342 if (!ret) 1338 if (!ret)
1343 return ret; 1339 return ret;
1344 1340
@@ -1356,33 +1352,44 @@ static int wait_consider_task(struct wait_opts *wo, int ptrace,
1356 return 0; 1352 return 0;
1357 } 1353 }
1358 1354
1359 /* dead body doesn't have much to contribute */ 1355 if (unlikely(p->exit_state == EXIT_TRACE)) {
1360 if (unlikely(p->exit_state == EXIT_DEAD)) {
1361 /* 1356 /*
1362 * But do not ignore this task until the tracer does 1357 * ptrace == 0 means we are the natural parent. In this case
1363 * wait_task_zombie()->do_notify_parent(). 1358 * we should clear notask_error, debugger will notify us.
1364 */ 1359 */
1365 if (likely(!ptrace) && unlikely(ptrace_reparented(p))) 1360 if (likely(!ptrace))
1366 wo->notask_error = 0; 1361 wo->notask_error = 0;
1367 return 0; 1362 return 0;
1368 } 1363 }
1369 1364
1370 /* slay zombie? */ 1365 if (likely(!ptrace) && unlikely(p->ptrace)) {
1371 if (p->exit_state == EXIT_ZOMBIE) {
1372 /* 1366 /*
1373 * A zombie ptracee is only visible to its ptracer. 1367 * If it is traced by its real parent's group, just pretend
1374 * Notification and reaping will be cascaded to the real 1368 * the caller is ptrace_do_wait() and reap this child if it
1375 * parent when the ptracer detaches. 1369 * is zombie.
1370 *
1371 * This also hides group stop state from real parent; otherwise
1372 * a single stop can be reported twice as group and ptrace stop.
1373 * If a ptracer wants to distinguish these two events for its
1374 * own children it should create a separate process which takes
1375 * the role of real parent.
1376 */ 1376 */
1377 if (likely(!ptrace) && unlikely(p->ptrace)) { 1377 if (!ptrace_reparented(p))
1378 /* it will become visible, clear notask_error */ 1378 ptrace = 1;
1379 wo->notask_error = 0; 1379 }
1380 return 0;
1381 }
1382 1380
1381 /* slay zombie? */
1382 if (p->exit_state == EXIT_ZOMBIE) {
1383 /* we don't reap group leaders with subthreads */ 1383 /* we don't reap group leaders with subthreads */
1384 if (!delay_group_leader(p)) 1384 if (!delay_group_leader(p)) {
1385 return wait_task_zombie(wo, p); 1385 /*
1386 * A zombie ptracee is only visible to its ptracer.
1387 * Notification and reaping will be cascaded to the
1388 * real parent when the ptracer detaches.
1389 */
1390 if (unlikely(ptrace) || likely(!p->ptrace))
1391 return wait_task_zombie(wo, p);
1392 }
1386 1393
1387 /* 1394 /*
1388 * Allow access to stopped/continued state via zombie by 1395 * Allow access to stopped/continued state via zombie by
@@ -1408,19 +1415,6 @@ static int wait_consider_task(struct wait_opts *wo, int ptrace,
1408 wo->notask_error = 0; 1415 wo->notask_error = 0;
1409 } else { 1416 } else {
1410 /* 1417 /*
1411 * If @p is ptraced by a task in its real parent's group,
1412 * hide group stop/continued state when looking at @p as
1413 * the real parent; otherwise, a single stop can be
1414 * reported twice as group and ptrace stops.
1415 *
1416 * If a ptracer wants to distinguish the two events for its
1417 * own children, it should create a separate process which
1418 * takes the role of real parent.
1419 */
1420 if (likely(!ptrace) && p->ptrace && !ptrace_reparented(p))
1421 return 0;
1422
1423 /*
1424 * @p is alive and it's gonna stop, continue or exit, so 1418 * @p is alive and it's gonna stop, continue or exit, so
1425 * there always is something to wait for. 1419 * there always is something to wait for.
1426 */ 1420 */
diff --git a/kernel/fork.c b/kernel/fork.c
index abc45890f0a5..54a8d26f612f 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -28,6 +28,8 @@
28#include <linux/mman.h> 28#include <linux/mman.h>
29#include <linux/mmu_notifier.h> 29#include <linux/mmu_notifier.h>
30#include <linux/fs.h> 30#include <linux/fs.h>
31#include <linux/mm.h>
32#include <linux/vmacache.h>
31#include <linux/nsproxy.h> 33#include <linux/nsproxy.h>
32#include <linux/capability.h> 34#include <linux/capability.h>
33#include <linux/cpu.h> 35#include <linux/cpu.h>
@@ -71,6 +73,7 @@
71#include <linux/signalfd.h> 73#include <linux/signalfd.h>
72#include <linux/uprobes.h> 74#include <linux/uprobes.h>
73#include <linux/aio.h> 75#include <linux/aio.h>
76#include <linux/compiler.h>
74 77
75#include <asm/pgtable.h> 78#include <asm/pgtable.h>
76#include <asm/pgalloc.h> 79#include <asm/pgalloc.h>
@@ -284,7 +287,7 @@ void __init fork_init(unsigned long mempages)
284 init_task.signal->rlim[RLIMIT_NPROC]; 287 init_task.signal->rlim[RLIMIT_NPROC];
285} 288}
286 289
287int __attribute__((weak)) arch_dup_task_struct(struct task_struct *dst, 290int __weak arch_dup_task_struct(struct task_struct *dst,
288 struct task_struct *src) 291 struct task_struct *src)
289{ 292{
290 *dst = *src; 293 *dst = *src;
@@ -364,7 +367,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
364 367
365 mm->locked_vm = 0; 368 mm->locked_vm = 0;
366 mm->mmap = NULL; 369 mm->mmap = NULL;
367 mm->mmap_cache = NULL; 370 mm->vmacache_seqnum = 0;
368 mm->map_count = 0; 371 mm->map_count = 0;
369 cpumask_clear(mm_cpumask(mm)); 372 cpumask_clear(mm_cpumask(mm));
370 mm->mm_rb = RB_ROOT; 373 mm->mm_rb = RB_ROOT;
@@ -530,8 +533,6 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p)
530 atomic_set(&mm->mm_count, 1); 533 atomic_set(&mm->mm_count, 1);
531 init_rwsem(&mm->mmap_sem); 534 init_rwsem(&mm->mmap_sem);
532 INIT_LIST_HEAD(&mm->mmlist); 535 INIT_LIST_HEAD(&mm->mmlist);
533 mm->flags = (current->mm) ?
534 (current->mm->flags & MMF_INIT_MASK) : default_dump_filter;
535 mm->core_state = NULL; 536 mm->core_state = NULL;
536 atomic_long_set(&mm->nr_ptes, 0); 537 atomic_long_set(&mm->nr_ptes, 0);
537 memset(&mm->rss_stat, 0, sizeof(mm->rss_stat)); 538 memset(&mm->rss_stat, 0, sizeof(mm->rss_stat));
@@ -540,8 +541,15 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p)
540 mm_init_owner(mm, p); 541 mm_init_owner(mm, p);
541 clear_tlb_flush_pending(mm); 542 clear_tlb_flush_pending(mm);
542 543
543 if (likely(!mm_alloc_pgd(mm))) { 544 if (current->mm) {
545 mm->flags = current->mm->flags & MMF_INIT_MASK;
546 mm->def_flags = current->mm->def_flags & VM_INIT_DEF_MASK;
547 } else {
548 mm->flags = default_dump_filter;
544 mm->def_flags = 0; 549 mm->def_flags = 0;
550 }
551
552 if (likely(!mm_alloc_pgd(mm))) {
545 mmu_notifier_mm_init(mm); 553 mmu_notifier_mm_init(mm);
546 return mm; 554 return mm;
547 } 555 }
@@ -877,6 +885,9 @@ static int copy_mm(unsigned long clone_flags, struct task_struct *tsk)
877 if (!oldmm) 885 if (!oldmm)
878 return 0; 886 return 0;
879 887
888 /* initialize the new vmacache entries */
889 vmacache_flush(tsk);
890
880 if (clone_flags & CLONE_VM) { 891 if (clone_flags & CLONE_VM) {
881 atomic_inc(&oldmm->mm_users); 892 atomic_inc(&oldmm->mm_users);
882 mm = oldmm; 893 mm = oldmm;
@@ -1070,15 +1081,6 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
1070 return 0; 1081 return 0;
1071} 1082}
1072 1083
1073static void copy_flags(unsigned long clone_flags, struct task_struct *p)
1074{
1075 unsigned long new_flags = p->flags;
1076
1077 new_flags &= ~(PF_SUPERPRIV | PF_WQ_WORKER);
1078 new_flags |= PF_FORKNOEXEC;
1079 p->flags = new_flags;
1080}
1081
1082SYSCALL_DEFINE1(set_tid_address, int __user *, tidptr) 1084SYSCALL_DEFINE1(set_tid_address, int __user *, tidptr)
1083{ 1085{
1084 current->clear_child_tid = tidptr; 1086 current->clear_child_tid = tidptr;
@@ -1228,7 +1230,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1228 goto bad_fork_cleanup_count; 1230 goto bad_fork_cleanup_count;
1229 1231
1230 delayacct_tsk_init(p); /* Must remain after dup_task_struct() */ 1232 delayacct_tsk_init(p); /* Must remain after dup_task_struct() */
1231 copy_flags(clone_flags, p); 1233 p->flags &= ~(PF_SUPERPRIV | PF_WQ_WORKER);
1234 p->flags |= PF_FORKNOEXEC;
1232 INIT_LIST_HEAD(&p->children); 1235 INIT_LIST_HEAD(&p->children);
1233 INIT_LIST_HEAD(&p->sibling); 1236 INIT_LIST_HEAD(&p->sibling);
1234 rcu_copy_process(p); 1237 rcu_copy_process(p);
@@ -1274,7 +1277,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1274 p->mempolicy = NULL; 1277 p->mempolicy = NULL;
1275 goto bad_fork_cleanup_threadgroup_lock; 1278 goto bad_fork_cleanup_threadgroup_lock;
1276 } 1279 }
1277 mpol_fix_fork_child_flag(p);
1278#endif 1280#endif
1279#ifdef CONFIG_CPUSETS 1281#ifdef CONFIG_CPUSETS
1280 p->cpuset_mem_spread_rotor = NUMA_NO_NODE; 1282 p->cpuset_mem_spread_rotor = NUMA_NO_NODE;
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index 3127ad52cdb2..cb0cf37dac3a 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -23,6 +23,7 @@
23#include <linux/mm.h> 23#include <linux/mm.h>
24#include <linux/ctype.h> 24#include <linux/ctype.h>
25#include <linux/slab.h> 25#include <linux/slab.h>
26#include <linux/compiler.h>
26 27
27#include <asm/sections.h> 28#include <asm/sections.h>
28 29
@@ -36,8 +37,8 @@
36 * These will be re-linked against their real values 37 * These will be re-linked against their real values
37 * during the second link stage. 38 * during the second link stage.
38 */ 39 */
39extern const unsigned long kallsyms_addresses[] __attribute__((weak)); 40extern const unsigned long kallsyms_addresses[] __weak;
40extern const u8 kallsyms_names[] __attribute__((weak)); 41extern const u8 kallsyms_names[] __weak;
41 42
42/* 43/*
43 * Tell the compiler that the count isn't in the small data section if the arch 44 * Tell the compiler that the count isn't in the small data section if the arch
@@ -46,10 +47,10 @@ extern const u8 kallsyms_names[] __attribute__((weak));
46extern const unsigned long kallsyms_num_syms 47extern const unsigned long kallsyms_num_syms
47__attribute__((weak, section(".rodata"))); 48__attribute__((weak, section(".rodata")));
48 49
49extern const u8 kallsyms_token_table[] __attribute__((weak)); 50extern const u8 kallsyms_token_table[] __weak;
50extern const u16 kallsyms_token_index[] __attribute__((weak)); 51extern const u16 kallsyms_token_index[] __weak;
51 52
52extern const unsigned long kallsyms_markers[] __attribute__((weak)); 53extern const unsigned long kallsyms_markers[] __weak;
53 54
54static inline int is_kernel_inittext(unsigned long addr) 55static inline int is_kernel_inittext(unsigned long addr)
55{ 56{
diff --git a/kernel/kexec.c b/kernel/kexec.c
index c0d261c7db7b..c8380ad203bc 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -32,6 +32,7 @@
32#include <linux/vmalloc.h> 32#include <linux/vmalloc.h>
33#include <linux/swap.h> 33#include <linux/swap.h>
34#include <linux/syscore_ops.h> 34#include <linux/syscore_ops.h>
35#include <linux/compiler.h>
35 36
36#include <asm/page.h> 37#include <asm/page.h>
37#include <asm/uaccess.h> 38#include <asm/uaccess.h>
@@ -1551,10 +1552,10 @@ void vmcoreinfo_append_str(const char *fmt, ...)
1551 * provide an empty default implementation here -- architecture 1552 * provide an empty default implementation here -- architecture
1552 * code may override this 1553 * code may override this
1553 */ 1554 */
1554void __attribute__ ((weak)) arch_crash_save_vmcoreinfo(void) 1555void __weak arch_crash_save_vmcoreinfo(void)
1555{} 1556{}
1556 1557
1557unsigned long __attribute__ ((weak)) paddr_vmcoreinfo_note(void) 1558unsigned long __weak paddr_vmcoreinfo_note(void)
1558{ 1559{
1559 return __pa((unsigned long)(char *)&vmcoreinfo_note); 1560 return __pa((unsigned long)(char *)&vmcoreinfo_note);
1560} 1561}
diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
index e660964086e2..2495a9b14ac8 100644
--- a/kernel/ksysfs.c
+++ b/kernel/ksysfs.c
@@ -18,6 +18,7 @@
18#include <linux/stat.h> 18#include <linux/stat.h>
19#include <linux/sched.h> 19#include <linux/sched.h>
20#include <linux/capability.h> 20#include <linux/capability.h>
21#include <linux/compiler.h>
21 22
22#include <linux/rcupdate.h> /* rcu_expedited */ 23#include <linux/rcupdate.h> /* rcu_expedited */
23 24
@@ -162,8 +163,8 @@ KERNEL_ATTR_RW(rcu_expedited);
162/* 163/*
163 * Make /sys/kernel/notes give the raw contents of our kernel .notes section. 164 * Make /sys/kernel/notes give the raw contents of our kernel .notes section.
164 */ 165 */
165extern const void __start_notes __attribute__((weak)); 166extern const void __start_notes __weak;
166extern const void __stop_notes __attribute__((weak)); 167extern const void __stop_notes __weak;
167#define notes_size (&__stop_notes - &__start_notes) 168#define notes_size (&__stop_notes - &__start_notes)
168 169
169static ssize_t notes_read(struct file *filp, struct kobject *kobj, 170static ssize_t notes_read(struct file *filp, struct kobject *kobj,
diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile
index 306a76b51e0f..b8bdcd4785b7 100644
--- a/kernel/locking/Makefile
+++ b/kernel/locking/Makefile
@@ -1,5 +1,5 @@
1 1
2obj-y += mutex.o semaphore.o rwsem.o lglock.o mcs_spinlock.o 2obj-y += mutex.o semaphore.o rwsem.o mcs_spinlock.o
3 3
4ifdef CONFIG_FUNCTION_TRACER 4ifdef CONFIG_FUNCTION_TRACER
5CFLAGS_REMOVE_lockdep.o = -pg 5CFLAGS_REMOVE_lockdep.o = -pg
@@ -14,6 +14,7 @@ ifeq ($(CONFIG_PROC_FS),y)
14obj-$(CONFIG_LOCKDEP) += lockdep_proc.o 14obj-$(CONFIG_LOCKDEP) += lockdep_proc.o
15endif 15endif
16obj-$(CONFIG_SMP) += spinlock.o 16obj-$(CONFIG_SMP) += spinlock.o
17obj-$(CONFIG_SMP) += lglock.o
17obj-$(CONFIG_PROVE_LOCKING) += spinlock.o 18obj-$(CONFIG_PROVE_LOCKING) += spinlock.o
18obj-$(CONFIG_RT_MUTEXES) += rtmutex.o 19obj-$(CONFIG_RT_MUTEXES) += rtmutex.o
19obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o 20obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o
diff --git a/kernel/module.c b/kernel/module.c
index 29f7790eaa14..11869408f79b 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -640,7 +640,7 @@ static int module_unload_init(struct module *mod)
640 INIT_LIST_HEAD(&mod->target_list); 640 INIT_LIST_HEAD(&mod->target_list);
641 641
642 /* Hold reference count during initialization. */ 642 /* Hold reference count during initialization. */
643 __this_cpu_write(mod->refptr->incs, 1); 643 raw_cpu_write(mod->refptr->incs, 1);
644 644
645 return 0; 645 return 0;
646} 646}
diff --git a/kernel/panic.c b/kernel/panic.c
index 79fd820bb5e8..d02fa9fef46a 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -100,7 +100,7 @@ void panic(const char *fmt, ...)
100 va_start(args, fmt); 100 va_start(args, fmt);
101 vsnprintf(buf, sizeof(buf), fmt, args); 101 vsnprintf(buf, sizeof(buf), fmt, args);
102 va_end(args); 102 va_end(args);
103 printk(KERN_EMERG "Kernel panic - not syncing: %s\n",buf); 103 pr_emerg("Kernel panic - not syncing: %s\n", buf);
104#ifdef CONFIG_DEBUG_BUGVERBOSE 104#ifdef CONFIG_DEBUG_BUGVERBOSE
105 /* 105 /*
106 * Avoid nested stack-dumping if a panic occurs during oops processing 106 * Avoid nested stack-dumping if a panic occurs during oops processing
@@ -141,7 +141,7 @@ void panic(const char *fmt, ...)
141 * Delay timeout seconds before rebooting the machine. 141 * Delay timeout seconds before rebooting the machine.
142 * We can't use the "normal" timers since we just panicked. 142 * We can't use the "normal" timers since we just panicked.
143 */ 143 */
144 printk(KERN_EMERG "Rebooting in %d seconds..", panic_timeout); 144 pr_emerg("Rebooting in %d seconds..", panic_timeout);
145 145
146 for (i = 0; i < panic_timeout * 1000; i += PANIC_TIMER_STEP) { 146 for (i = 0; i < panic_timeout * 1000; i += PANIC_TIMER_STEP) {
147 touch_nmi_watchdog(); 147 touch_nmi_watchdog();
@@ -165,7 +165,7 @@ void panic(const char *fmt, ...)
165 extern int stop_a_enabled; 165 extern int stop_a_enabled;
166 /* Make sure the user can actually press Stop-A (L1-A) */ 166 /* Make sure the user can actually press Stop-A (L1-A) */
167 stop_a_enabled = 1; 167 stop_a_enabled = 1;
168 printk(KERN_EMERG "Press Stop-A (L1-A) to return to the boot prom\n"); 168 pr_emerg("Press Stop-A (L1-A) to return to the boot prom\n");
169 } 169 }
170#endif 170#endif
171#if defined(CONFIG_S390) 171#if defined(CONFIG_S390)
@@ -176,6 +176,7 @@ void panic(const char *fmt, ...)
176 disabled_wait(caller); 176 disabled_wait(caller);
177 } 177 }
178#endif 178#endif
179 pr_emerg("---[ end Kernel panic - not syncing: %s\n", buf);
179 local_irq_enable(); 180 local_irq_enable();
180 for (i = 0; ; i += PANIC_TIMER_STEP) { 181 for (i = 0; ; i += PANIC_TIMER_STEP) {
181 touch_softlockup_watchdog(); 182 touch_softlockup_watchdog();
@@ -276,8 +277,7 @@ unsigned long get_taint(void)
276void add_taint(unsigned flag, enum lockdep_ok lockdep_ok) 277void add_taint(unsigned flag, enum lockdep_ok lockdep_ok)
277{ 278{
278 if (lockdep_ok == LOCKDEP_NOW_UNRELIABLE && __debug_locks_off()) 279 if (lockdep_ok == LOCKDEP_NOW_UNRELIABLE && __debug_locks_off())
279 printk(KERN_WARNING 280 pr_warn("Disabling lock debugging due to kernel taint\n");
280 "Disabling lock debugging due to kernel taint\n");
281 281
282 set_bit(flag, &tainted_mask); 282 set_bit(flag, &tainted_mask);
283} 283}
@@ -382,8 +382,7 @@ late_initcall(init_oops_id);
382void print_oops_end_marker(void) 382void print_oops_end_marker(void)
383{ 383{
384 init_oops_id(); 384 init_oops_id();
385 printk(KERN_WARNING "---[ end trace %016llx ]---\n", 385 pr_warn("---[ end trace %016llx ]---\n", (unsigned long long)oops_id);
386 (unsigned long long)oops_id);
387} 386}
388 387
389/* 388/*
diff --git a/kernel/power/power.h b/kernel/power/power.h
index 1ca753106557..15f37ea08719 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -2,6 +2,7 @@
2#include <linux/suspend_ioctls.h> 2#include <linux/suspend_ioctls.h>
3#include <linux/utsname.h> 3#include <linux/utsname.h>
4#include <linux/freezer.h> 4#include <linux/freezer.h>
5#include <linux/compiler.h>
5 6
6struct swsusp_info { 7struct swsusp_info {
7 struct new_utsname uts; 8 struct new_utsname uts;
@@ -11,7 +12,7 @@ struct swsusp_info {
11 unsigned long image_pages; 12 unsigned long image_pages;
12 unsigned long pages; 13 unsigned long pages;
13 unsigned long size; 14 unsigned long size;
14} __attribute__((aligned(PAGE_SIZE))); 15} __aligned(PAGE_SIZE);
15 16
16#ifdef CONFIG_HIBERNATION 17#ifdef CONFIG_HIBERNATION
17/* kernel/power/snapshot.c */ 18/* kernel/power/snapshot.c */
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 149e745eaa52..18fb7a2fb14b 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -27,6 +27,7 @@
27#include <linux/highmem.h> 27#include <linux/highmem.h>
28#include <linux/list.h> 28#include <linux/list.h>
29#include <linux/slab.h> 29#include <linux/slab.h>
30#include <linux/compiler.h>
30 31
31#include <asm/uaccess.h> 32#include <asm/uaccess.h>
32#include <asm/mmu_context.h> 33#include <asm/mmu_context.h>
@@ -155,7 +156,7 @@ static inline void free_image_page(void *addr, int clear_nosave_free)
155struct linked_page { 156struct linked_page {
156 struct linked_page *next; 157 struct linked_page *next;
157 char data[LINKED_PAGE_DATA_SIZE]; 158 char data[LINKED_PAGE_DATA_SIZE];
158} __attribute__((packed)); 159} __packed;
159 160
160static inline void 161static inline void
161free_list_of_pages(struct linked_page *list, int clear_page_nosave) 162free_list_of_pages(struct linked_page *list, int clear_page_nosave)
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 90b3d9366d1a..c3ad9cafe930 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -26,6 +26,7 @@
26#include <linux/syscore_ops.h> 26#include <linux/syscore_ops.h>
27#include <linux/ftrace.h> 27#include <linux/ftrace.h>
28#include <trace/events/power.h> 28#include <trace/events/power.h>
29#include <linux/compiler.h>
29 30
30#include "power.h" 31#include "power.h"
31 32
@@ -156,13 +157,13 @@ static int suspend_prepare(suspend_state_t state)
156} 157}
157 158
158/* default implementation */ 159/* default implementation */
159void __attribute__ ((weak)) arch_suspend_disable_irqs(void) 160void __weak arch_suspend_disable_irqs(void)
160{ 161{
161 local_irq_disable(); 162 local_irq_disable();
162} 163}
163 164
164/* default implementation */ 165/* default implementation */
165void __attribute__ ((weak)) arch_suspend_enable_irqs(void) 166void __weak arch_suspend_enable_irqs(void)
166{ 167{
167 local_irq_enable(); 168 local_irq_enable();
168} 169}
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index 7c33ed200410..8c9a4819f798 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -101,7 +101,7 @@ struct swsusp_header {
101 unsigned int flags; /* Flags to pass to the "boot" kernel */ 101 unsigned int flags; /* Flags to pass to the "boot" kernel */
102 char orig_sig[10]; 102 char orig_sig[10];
103 char sig[10]; 103 char sig[10];
104} __attribute__((packed)); 104} __packed;
105 105
106static struct swsusp_header *swsusp_header; 106static struct swsusp_header *swsusp_header;
107 107
diff --git a/kernel/res_counter.c b/kernel/res_counter.c
index 4aa8a305aede..51dbac6a3633 100644
--- a/kernel/res_counter.c
+++ b/kernel/res_counter.c
@@ -22,8 +22,18 @@ void res_counter_init(struct res_counter *counter, struct res_counter *parent)
22 counter->parent = parent; 22 counter->parent = parent;
23} 23}
24 24
25int res_counter_charge_locked(struct res_counter *counter, unsigned long val, 25static u64 res_counter_uncharge_locked(struct res_counter *counter,
26 bool force) 26 unsigned long val)
27{
28 if (WARN_ON(counter->usage < val))
29 val = counter->usage;
30
31 counter->usage -= val;
32 return counter->usage;
33}
34
35static int res_counter_charge_locked(struct res_counter *counter,
36 unsigned long val, bool force)
27{ 37{
28 int ret = 0; 38 int ret = 0;
29 39
@@ -86,15 +96,6 @@ int res_counter_charge_nofail(struct res_counter *counter, unsigned long val,
86 return __res_counter_charge(counter, val, limit_fail_at, true); 96 return __res_counter_charge(counter, val, limit_fail_at, true);
87} 97}
88 98
89u64 res_counter_uncharge_locked(struct res_counter *counter, unsigned long val)
90{
91 if (WARN_ON(counter->usage < val))
92 val = counter->usage;
93
94 counter->usage -= val;
95 return counter->usage;
96}
97
98u64 res_counter_uncharge_until(struct res_counter *counter, 99u64 res_counter_uncharge_until(struct res_counter *counter,
99 struct res_counter *top, 100 struct res_counter *top,
100 unsigned long val) 101 unsigned long val)
diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c
index b30a2924ef14..3ef6451e972e 100644
--- a/kernel/sched/clock.c
+++ b/kernel/sched/clock.c
@@ -60,13 +60,14 @@
60#include <linux/sched.h> 60#include <linux/sched.h>
61#include <linux/static_key.h> 61#include <linux/static_key.h>
62#include <linux/workqueue.h> 62#include <linux/workqueue.h>
63#include <linux/compiler.h>
63 64
64/* 65/*
65 * Scheduler clock - returns current time in nanosec units. 66 * Scheduler clock - returns current time in nanosec units.
66 * This is default implementation. 67 * This is default implementation.
67 * Architectures and sub-architectures can override this. 68 * Architectures and sub-architectures can override this.
68 */ 69 */
69unsigned long long __attribute__((weak)) sched_clock(void) 70unsigned long long __weak sched_clock(void)
70{ 71{
71 return (unsigned long long)(jiffies - INITIAL_JIFFIES) 72 return (unsigned long long)(jiffies - INITIAL_JIFFIES)
72 * (NSEC_PER_SEC / HZ); 73 * (NSEC_PER_SEC / HZ);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 0ff3f34bc7e3..268a45ea238c 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -73,6 +73,7 @@
73#include <linux/init_task.h> 73#include <linux/init_task.h>
74#include <linux/binfmts.h> 74#include <linux/binfmts.h>
75#include <linux/context_tracking.h> 75#include <linux/context_tracking.h>
76#include <linux/compiler.h>
76 77
77#include <asm/switch_to.h> 78#include <asm/switch_to.h>
78#include <asm/tlb.h> 79#include <asm/tlb.h>
@@ -6452,7 +6453,7 @@ static cpumask_var_t fallback_doms;
6452 * cpu core maps. It is supposed to return 1 if the topology changed 6453 * cpu core maps. It is supposed to return 1 if the topology changed
6453 * or 0 if it stayed the same. 6454 * or 0 if it stayed the same.
6454 */ 6455 */
6455int __attribute__((weak)) arch_update_cpu_topology(void) 6456int __weak arch_update_cpu_topology(void)
6456{ 6457{
6457 return 0; 6458 return 0;
6458} 6459}
diff --git a/kernel/signal.c b/kernel/signal.c
index 5d4b05a229a6..6ea13c09ae56 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -33,6 +33,8 @@
33#include <linux/uprobes.h> 33#include <linux/uprobes.h>
34#include <linux/compat.h> 34#include <linux/compat.h>
35#include <linux/cn_proc.h> 35#include <linux/cn_proc.h>
36#include <linux/compiler.h>
37
36#define CREATE_TRACE_POINTS 38#define CREATE_TRACE_POINTS
37#include <trace/events/signal.h> 39#include <trace/events/signal.h>
38 40
@@ -3618,7 +3620,7 @@ SYSCALL_DEFINE3(sigsuspend, int, unused1, int, unused2, old_sigset_t, mask)
3618} 3620}
3619#endif 3621#endif
3620 3622
3621__attribute__((weak)) const char *arch_vma_name(struct vm_area_struct *vma) 3623__weak const char *arch_vma_name(struct vm_area_struct *vma)
3622{ 3624{
3623 return NULL; 3625 return NULL;
3624} 3626}
diff --git a/kernel/sys.c b/kernel/sys.c
index adaeab6f7a87..fba0f29401ea 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1996,6 +1996,21 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
1996 if (arg2 || arg3 || arg4 || arg5) 1996 if (arg2 || arg3 || arg4 || arg5)
1997 return -EINVAL; 1997 return -EINVAL;
1998 return current->no_new_privs ? 1 : 0; 1998 return current->no_new_privs ? 1 : 0;
1999 case PR_GET_THP_DISABLE:
2000 if (arg2 || arg3 || arg4 || arg5)
2001 return -EINVAL;
2002 error = !!(me->mm->def_flags & VM_NOHUGEPAGE);
2003 break;
2004 case PR_SET_THP_DISABLE:
2005 if (arg3 || arg4 || arg5)
2006 return -EINVAL;
2007 down_write(&me->mm->mmap_sem);
2008 if (arg2)
2009 me->mm->def_flags |= VM_NOHUGEPAGE;
2010 else
2011 me->mm->def_flags &= ~VM_NOHUGEPAGE;
2012 up_write(&me->mm->mmap_sem);
2013 break;
1999 default: 2014 default:
2000 error = -EINVAL; 2015 error = -EINVAL;
2001 break; 2016 break;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 5c14b547882e..74f5b580fe34 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -141,6 +141,11 @@ static int min_percpu_pagelist_fract = 8;
141static int ngroups_max = NGROUPS_MAX; 141static int ngroups_max = NGROUPS_MAX;
142static const int cap_last_cap = CAP_LAST_CAP; 142static const int cap_last_cap = CAP_LAST_CAP;
143 143
144/*this is needed for proc_doulongvec_minmax of sysctl_hung_task_timeout_secs */
145#ifdef CONFIG_DETECT_HUNG_TASK
146static unsigned long hung_task_timeout_max = (LONG_MAX/HZ);
147#endif
148
144#ifdef CONFIG_INOTIFY_USER 149#ifdef CONFIG_INOTIFY_USER
145#include <linux/inotify.h> 150#include <linux/inotify.h>
146#endif 151#endif
@@ -985,6 +990,7 @@ static struct ctl_table kern_table[] = {
985 .maxlen = sizeof(unsigned long), 990 .maxlen = sizeof(unsigned long),
986 .mode = 0644, 991 .mode = 0644,
987 .proc_handler = proc_dohung_task_timeout_secs, 992 .proc_handler = proc_dohung_task_timeout_secs,
993 .extra2 = &hung_task_timeout_max,
988 }, 994 },
989 { 995 {
990 .procname = "hung_task_warnings", 996 .procname = "hung_task_warnings",
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 5b40279ecd71..f7df8ea21707 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -22,6 +22,7 @@
22#include <linux/tick.h> 22#include <linux/tick.h>
23#include <linux/stop_machine.h> 23#include <linux/stop_machine.h>
24#include <linux/pvclock_gtod.h> 24#include <linux/pvclock_gtod.h>
25#include <linux/compiler.h>
25 26
26#include "tick-internal.h" 27#include "tick-internal.h"
27#include "ntp_internal.h" 28#include "ntp_internal.h"
@@ -760,7 +761,7 @@ u64 timekeeping_max_deferment(void)
760 * 761 *
761 * XXX - Do be sure to remove it once all arches implement it. 762 * XXX - Do be sure to remove it once all arches implement it.
762 */ 763 */
763void __attribute__((weak)) read_persistent_clock(struct timespec *ts) 764void __weak read_persistent_clock(struct timespec *ts)
764{ 765{
765 ts->tv_sec = 0; 766 ts->tv_sec = 0;
766 ts->tv_nsec = 0; 767 ts->tv_nsec = 0;
@@ -775,7 +776,7 @@ void __attribute__((weak)) read_persistent_clock(struct timespec *ts)
775 * 776 *
776 * XXX - Do be sure to remove it once all arches implement it. 777 * XXX - Do be sure to remove it once all arches implement it.
777 */ 778 */
778void __attribute__((weak)) read_boot_clock(struct timespec *ts) 779void __weak read_boot_clock(struct timespec *ts)
779{ 780{
780 ts->tv_sec = 0; 781 ts->tv_sec = 0;
781 ts->tv_nsec = 0; 782 ts->tv_nsec = 0;
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index ffc314b7e92b..2e29d7ba5a52 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -13,6 +13,7 @@
13#include <linux/hw_breakpoint.h> 13#include <linux/hw_breakpoint.h>
14#include <linux/trace_seq.h> 14#include <linux/trace_seq.h>
15#include <linux/ftrace_event.h> 15#include <linux/ftrace_event.h>
16#include <linux/compiler.h>
16 17
17#ifdef CONFIG_FTRACE_SYSCALLS 18#ifdef CONFIG_FTRACE_SYSCALLS
18#include <asm/unistd.h> /* For NR_SYSCALLS */ 19#include <asm/unistd.h> /* For NR_SYSCALLS */
@@ -1279,7 +1280,7 @@ int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled);
1279#undef FTRACE_ENTRY 1280#undef FTRACE_ENTRY
1280#define FTRACE_ENTRY(call, struct_name, id, tstruct, print, filter) \ 1281#define FTRACE_ENTRY(call, struct_name, id, tstruct, print, filter) \
1281 extern struct ftrace_event_call \ 1282 extern struct ftrace_event_call \
1282 __attribute__((__aligned__(4))) event_##call; 1283 __aligned(4) event_##call;
1283#undef FTRACE_ENTRY_DUP 1284#undef FTRACE_ENTRY_DUP
1284#define FTRACE_ENTRY_DUP(call, struct_name, id, tstruct, print, filter) \ 1285#define FTRACE_ENTRY_DUP(call, struct_name, id, tstruct, print, filter) \
1285 FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print), \ 1286 FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print), \
diff --git a/lib/Kconfig b/lib/Kconfig
index 991c98bc4a3f..5d4984c505f8 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -342,9 +342,9 @@ config HAS_IOMEM
342 select GENERIC_IO 342 select GENERIC_IO
343 default y 343 default y
344 344
345config HAS_IOPORT 345config HAS_IOPORT_MAP
346 boolean 346 boolean
347 depends on HAS_IOMEM && !NO_IOPORT 347 depends on HAS_IOMEM && !NO_IOPORT_MAP
348 default y 348 default y
349 349
350config HAS_DMA 350config HAS_DMA
diff --git a/lib/decompress.c b/lib/decompress.c
index 4d1cd0397aab..86069d74c062 100644
--- a/lib/decompress.c
+++ b/lib/decompress.c
@@ -16,6 +16,7 @@
16#include <linux/types.h> 16#include <linux/types.h>
17#include <linux/string.h> 17#include <linux/string.h>
18#include <linux/init.h> 18#include <linux/init.h>
19#include <linux/printk.h>
19 20
20#ifndef CONFIG_DECOMPRESS_GZIP 21#ifndef CONFIG_DECOMPRESS_GZIP
21# define gunzip NULL 22# define gunzip NULL
@@ -61,6 +62,8 @@ decompress_fn __init decompress_method(const unsigned char *inbuf, int len,
61 if (len < 2) 62 if (len < 2)
62 return NULL; /* Need at least this much... */ 63 return NULL; /* Need at least this much... */
63 64
65 pr_debug("Compressed data magic: %#.2x %#.2x\n", inbuf[0], inbuf[1]);
66
64 for (cf = compressed_formats; cf->name; cf++) { 67 for (cf = compressed_formats; cf->name; cf++) {
65 if (!memcmp(inbuf, cf->magic, 2)) 68 if (!memcmp(inbuf, cf->magic, 2))
66 break; 69 break;
diff --git a/lib/devres.c b/lib/devres.c
index 48cb3c7bd7de..2f16c133fd36 100644
--- a/lib/devres.c
+++ b/lib/devres.c
@@ -170,7 +170,7 @@ void __iomem *devm_request_and_ioremap(struct device *device,
170} 170}
171EXPORT_SYMBOL(devm_request_and_ioremap); 171EXPORT_SYMBOL(devm_request_and_ioremap);
172 172
173#ifdef CONFIG_HAS_IOPORT 173#ifdef CONFIG_HAS_IOPORT_MAP
174/* 174/*
175 * Generic iomap devres 175 * Generic iomap devres
176 */ 176 */
@@ -229,7 +229,7 @@ void devm_ioport_unmap(struct device *dev, void __iomem *addr)
229 devm_ioport_map_match, (__force void *)addr)); 229 devm_ioport_map_match, (__force void *)addr));
230} 230}
231EXPORT_SYMBOL(devm_ioport_unmap); 231EXPORT_SYMBOL(devm_ioport_unmap);
232#endif /* CONFIG_HAS_IOPORT */ 232#endif /* CONFIG_HAS_IOPORT_MAP */
233 233
234#ifdef CONFIG_PCI 234#ifdef CONFIG_PCI
235/* 235/*
diff --git a/lib/idr.c b/lib/idr.c
index 1ba4956bfbff..2642fa8e424d 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -196,7 +196,7 @@ static void idr_mark_full(struct idr_layer **pa, int id)
196 } 196 }
197} 197}
198 198
199int __idr_pre_get(struct idr *idp, gfp_t gfp_mask) 199static int __idr_pre_get(struct idr *idp, gfp_t gfp_mask)
200{ 200{
201 while (idp->id_free_cnt < MAX_IDR_FREE) { 201 while (idp->id_free_cnt < MAX_IDR_FREE) {
202 struct idr_layer *new; 202 struct idr_layer *new;
@@ -207,7 +207,6 @@ int __idr_pre_get(struct idr *idp, gfp_t gfp_mask)
207 } 207 }
208 return 1; 208 return 1;
209} 209}
210EXPORT_SYMBOL(__idr_pre_get);
211 210
212/** 211/**
213 * sub_alloc - try to allocate an id without growing the tree depth 212 * sub_alloc - try to allocate an id without growing the tree depth
@@ -374,20 +373,6 @@ static void idr_fill_slot(struct idr *idr, void *ptr, int id,
374 idr_mark_full(pa, id); 373 idr_mark_full(pa, id);
375} 374}
376 375
377int __idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id)
378{
379 struct idr_layer *pa[MAX_IDR_LEVEL + 1];
380 int rv;
381
382 rv = idr_get_empty_slot(idp, starting_id, pa, 0, idp);
383 if (rv < 0)
384 return rv == -ENOMEM ? -EAGAIN : rv;
385
386 idr_fill_slot(idp, ptr, rv, pa);
387 *id = rv;
388 return 0;
389}
390EXPORT_SYMBOL(__idr_get_new_above);
391 376
392/** 377/**
393 * idr_preload - preload for idr_alloc() 378 * idr_preload - preload for idr_alloc()
@@ -548,7 +533,7 @@ static void sub_remove(struct idr *idp, int shift, int id)
548 n = id & IDR_MASK; 533 n = id & IDR_MASK;
549 if (likely(p != NULL && test_bit(n, p->bitmap))) { 534 if (likely(p != NULL && test_bit(n, p->bitmap))) {
550 __clear_bit(n, p->bitmap); 535 __clear_bit(n, p->bitmap);
551 rcu_assign_pointer(p->ary[n], NULL); 536 RCU_INIT_POINTER(p->ary[n], NULL);
552 to_free = NULL; 537 to_free = NULL;
553 while(*paa && ! --((**paa)->count)){ 538 while(*paa && ! --((**paa)->count)){
554 if (to_free) 539 if (to_free)
@@ -607,7 +592,7 @@ void idr_remove(struct idr *idp, int id)
607} 592}
608EXPORT_SYMBOL(idr_remove); 593EXPORT_SYMBOL(idr_remove);
609 594
610void __idr_remove_all(struct idr *idp) 595static void __idr_remove_all(struct idr *idp)
611{ 596{
612 int n, id, max; 597 int n, id, max;
613 int bt_mask; 598 int bt_mask;
@@ -617,7 +602,7 @@ void __idr_remove_all(struct idr *idp)
617 602
618 n = idp->layers * IDR_BITS; 603 n = idp->layers * IDR_BITS;
619 p = idp->top; 604 p = idp->top;
620 rcu_assign_pointer(idp->top, NULL); 605 RCU_INIT_POINTER(idp->top, NULL);
621 max = idr_max(idp->layers); 606 max = idr_max(idp->layers);
622 607
623 id = 0; 608 id = 0;
@@ -640,7 +625,6 @@ void __idr_remove_all(struct idr *idp)
640 } 625 }
641 idp->layers = 0; 626 idp->layers = 0;
642} 627}
643EXPORT_SYMBOL(__idr_remove_all);
644 628
645/** 629/**
646 * idr_destroy - release all cached layers within an idr tree 630 * idr_destroy - release all cached layers within an idr tree
diff --git a/lib/iomap.c b/lib/iomap.c
index 2c08f36862eb..fc3dcb4b238e 100644
--- a/lib/iomap.c
+++ b/lib/iomap.c
@@ -224,7 +224,7 @@ EXPORT_SYMBOL(iowrite8_rep);
224EXPORT_SYMBOL(iowrite16_rep); 224EXPORT_SYMBOL(iowrite16_rep);
225EXPORT_SYMBOL(iowrite32_rep); 225EXPORT_SYMBOL(iowrite32_rep);
226 226
227#ifdef CONFIG_HAS_IOPORT 227#ifdef CONFIG_HAS_IOPORT_MAP
228/* Create a virtual mapping cookie for an IO port range */ 228/* Create a virtual mapping cookie for an IO port range */
229void __iomem *ioport_map(unsigned long port, unsigned int nr) 229void __iomem *ioport_map(unsigned long port, unsigned int nr)
230{ 230{
@@ -239,7 +239,7 @@ void ioport_unmap(void __iomem *addr)
239} 239}
240EXPORT_SYMBOL(ioport_map); 240EXPORT_SYMBOL(ioport_map);
241EXPORT_SYMBOL(ioport_unmap); 241EXPORT_SYMBOL(ioport_unmap);
242#endif /* CONFIG_HAS_IOPORT */ 242#endif /* CONFIG_HAS_IOPORT_MAP */
243 243
244#ifdef CONFIG_PCI 244#ifdef CONFIG_PCI
245/* Hide the details if this is a MMIO or PIO address space and just do what 245/* Hide the details if this is a MMIO or PIO address space and just do what
diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c
index 04abe53f12a1..1afec32de6f2 100644
--- a/lib/smp_processor_id.c
+++ b/lib/smp_processor_id.c
@@ -7,7 +7,8 @@
7#include <linux/kallsyms.h> 7#include <linux/kallsyms.h>
8#include <linux/sched.h> 8#include <linux/sched.h>
9 9
10notrace unsigned int debug_smp_processor_id(void) 10notrace static unsigned int check_preemption_disabled(const char *what1,
11 const char *what2)
11{ 12{
12 int this_cpu = raw_smp_processor_id(); 13 int this_cpu = raw_smp_processor_id();
13 14
@@ -38,9 +39,9 @@ notrace unsigned int debug_smp_processor_id(void)
38 if (!printk_ratelimit()) 39 if (!printk_ratelimit())
39 goto out_enable; 40 goto out_enable;
40 41
41 printk(KERN_ERR "BUG: using smp_processor_id() in preemptible [%08x] " 42 printk(KERN_ERR "BUG: using %s%s() in preemptible [%08x] code: %s/%d\n",
42 "code: %s/%d\n", 43 what1, what2, preempt_count() - 1, current->comm, current->pid);
43 preempt_count() - 1, current->comm, current->pid); 44
44 print_symbol("caller is %s\n", (long)__builtin_return_address(0)); 45 print_symbol("caller is %s\n", (long)__builtin_return_address(0));
45 dump_stack(); 46 dump_stack();
46 47
@@ -50,5 +51,14 @@ out:
50 return this_cpu; 51 return this_cpu;
51} 52}
52 53
54notrace unsigned int debug_smp_processor_id(void)
55{
56 return check_preemption_disabled("smp_processor_id", "");
57}
53EXPORT_SYMBOL(debug_smp_processor_id); 58EXPORT_SYMBOL(debug_smp_processor_id);
54 59
60notrace void __this_cpu_preempt_check(const char *op)
61{
62 check_preemption_disabled("__this_cpu_", op);
63}
64EXPORT_SYMBOL(__this_cpu_preempt_check);
diff --git a/mm/Kconfig b/mm/Kconfig
index 2888024e0b0a..ebe5880c29d6 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -216,6 +216,7 @@ config PAGEFLAGS_EXTENDED
216# 216#
217config SPLIT_PTLOCK_CPUS 217config SPLIT_PTLOCK_CPUS
218 int 218 int
219 default "999999" if !MMU
219 default "999999" if ARM && !CPU_CACHE_VIPT 220 default "999999" if ARM && !CPU_CACHE_VIPT
220 default "999999" if PARISC && !PA20 221 default "999999" if PARISC && !PA20
221 default "4" 222 default "4"
@@ -577,3 +578,6 @@ config PGTABLE_MAPPING
577 578
578 You can check speed with zsmalloc benchmark: 579 You can check speed with zsmalloc benchmark:
579 https://github.com/spartacus06/zsmapbench 580 https://github.com/spartacus06/zsmapbench
581
582config GENERIC_EARLY_IOREMAP
583 bool
diff --git a/mm/Makefile b/mm/Makefile
index cdd741519ee0..9e5aaf92197d 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -16,7 +16,7 @@ obj-y := filemap.o mempool.o oom_kill.o fadvise.o \
16 readahead.o swap.o truncate.o vmscan.o shmem.o \ 16 readahead.o swap.o truncate.o vmscan.o shmem.o \
17 util.o mmzone.o vmstat.o backing-dev.o \ 17 util.o mmzone.o vmstat.o backing-dev.o \
18 mm_init.o mmu_context.o percpu.o slab_common.o \ 18 mm_init.o mmu_context.o percpu.o slab_common.o \
19 compaction.o balloon_compaction.o \ 19 compaction.o balloon_compaction.o vmacache.o \
20 interval_tree.o list_lru.o workingset.o $(mmu-y) 20 interval_tree.o list_lru.o workingset.o $(mmu-y)
21 21
22obj-y += init-mm.o 22obj-y += init-mm.o
@@ -61,3 +61,4 @@ obj-$(CONFIG_CLEANCACHE) += cleancache.o
61obj-$(CONFIG_MEMORY_ISOLATION) += page_isolation.o 61obj-$(CONFIG_MEMORY_ISOLATION) += page_isolation.o
62obj-$(CONFIG_ZBUD) += zbud.o 62obj-$(CONFIG_ZBUD) += zbud.o
63obj-$(CONFIG_ZSMALLOC) += zsmalloc.o 63obj-$(CONFIG_ZSMALLOC) += zsmalloc.o
64obj-$(CONFIG_GENERIC_EARLY_IOREMAP) += early_ioremap.o
diff --git a/mm/compaction.c b/mm/compaction.c
index b6ab77160068..37f976287068 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -217,21 +217,12 @@ static inline bool compact_trylock_irqsave(spinlock_t *lock,
217/* Returns true if the page is within a block suitable for migration to */ 217/* Returns true if the page is within a block suitable for migration to */
218static bool suitable_migration_target(struct page *page) 218static bool suitable_migration_target(struct page *page)
219{ 219{
220 int migratetype = get_pageblock_migratetype(page); 220 /* If the page is a large free page, then disallow migration */
221
222 /* Don't interfere with memory hot-remove or the min_free_kbytes blocks */
223 if (migratetype == MIGRATE_RESERVE)
224 return false;
225
226 if (is_migrate_isolate(migratetype))
227 return false;
228
229 /* If the page is a large free page, then allow migration */
230 if (PageBuddy(page) && page_order(page) >= pageblock_order) 221 if (PageBuddy(page) && page_order(page) >= pageblock_order)
231 return true; 222 return false;
232 223
233 /* If the block is MIGRATE_MOVABLE or MIGRATE_CMA, allow migration */ 224 /* If the block is MIGRATE_MOVABLE or MIGRATE_CMA, allow migration */
234 if (migrate_async_suitable(migratetype)) 225 if (migrate_async_suitable(get_pageblock_migratetype(page)))
235 return true; 226 return true;
236 227
237 /* Otherwise skip the block */ 228 /* Otherwise skip the block */
@@ -253,6 +244,7 @@ static unsigned long isolate_freepages_block(struct compact_control *cc,
253 struct page *cursor, *valid_page = NULL; 244 struct page *cursor, *valid_page = NULL;
254 unsigned long flags; 245 unsigned long flags;
255 bool locked = false; 246 bool locked = false;
247 bool checked_pageblock = false;
256 248
257 cursor = pfn_to_page(blockpfn); 249 cursor = pfn_to_page(blockpfn);
258 250
@@ -284,8 +276,16 @@ static unsigned long isolate_freepages_block(struct compact_control *cc,
284 break; 276 break;
285 277
286 /* Recheck this is a suitable migration target under lock */ 278 /* Recheck this is a suitable migration target under lock */
287 if (!strict && !suitable_migration_target(page)) 279 if (!strict && !checked_pageblock) {
288 break; 280 /*
281 * We need to check suitability of pageblock only once
282 * and this isolate_freepages_block() is called with
283 * pageblock range, so just check once is sufficient.
284 */
285 checked_pageblock = true;
286 if (!suitable_migration_target(page))
287 break;
288 }
289 289
290 /* Recheck this is a buddy page under lock */ 290 /* Recheck this is a buddy page under lock */
291 if (!PageBuddy(page)) 291 if (!PageBuddy(page))
@@ -460,12 +460,13 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
460 unsigned long last_pageblock_nr = 0, pageblock_nr; 460 unsigned long last_pageblock_nr = 0, pageblock_nr;
461 unsigned long nr_scanned = 0, nr_isolated = 0; 461 unsigned long nr_scanned = 0, nr_isolated = 0;
462 struct list_head *migratelist = &cc->migratepages; 462 struct list_head *migratelist = &cc->migratepages;
463 isolate_mode_t mode = 0;
464 struct lruvec *lruvec; 463 struct lruvec *lruvec;
465 unsigned long flags; 464 unsigned long flags;
466 bool locked = false; 465 bool locked = false;
467 struct page *page = NULL, *valid_page = NULL; 466 struct page *page = NULL, *valid_page = NULL;
468 bool skipped_async_unsuitable = false; 467 bool skipped_async_unsuitable = false;
468 const isolate_mode_t mode = (!cc->sync ? ISOLATE_ASYNC_MIGRATE : 0) |
469 (unevictable ? ISOLATE_UNEVICTABLE : 0);
469 470
470 /* 471 /*
471 * Ensure that there are not too many pages isolated from the LRU 472 * Ensure that there are not too many pages isolated from the LRU
@@ -487,7 +488,7 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
487 cond_resched(); 488 cond_resched();
488 for (; low_pfn < end_pfn; low_pfn++) { 489 for (; low_pfn < end_pfn; low_pfn++) {
489 /* give a chance to irqs before checking need_resched() */ 490 /* give a chance to irqs before checking need_resched() */
490 if (locked && !((low_pfn+1) % SWAP_CLUSTER_MAX)) { 491 if (locked && !(low_pfn % SWAP_CLUSTER_MAX)) {
491 if (should_release_lock(&zone->lru_lock)) { 492 if (should_release_lock(&zone->lru_lock)) {
492 spin_unlock_irqrestore(&zone->lru_lock, flags); 493 spin_unlock_irqrestore(&zone->lru_lock, flags);
493 locked = false; 494 locked = false;
@@ -526,8 +527,25 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
526 527
527 /* If isolation recently failed, do not retry */ 528 /* If isolation recently failed, do not retry */
528 pageblock_nr = low_pfn >> pageblock_order; 529 pageblock_nr = low_pfn >> pageblock_order;
529 if (!isolation_suitable(cc, page)) 530 if (last_pageblock_nr != pageblock_nr) {
530 goto next_pageblock; 531 int mt;
532
533 last_pageblock_nr = pageblock_nr;
534 if (!isolation_suitable(cc, page))
535 goto next_pageblock;
536
537 /*
538 * For async migration, also only scan in MOVABLE
539 * blocks. Async migration is optimistic to see if
540 * the minimum amount of work satisfies the allocation
541 */
542 mt = get_pageblock_migratetype(page);
543 if (!cc->sync && !migrate_async_suitable(mt)) {
544 cc->finished_update_migrate = true;
545 skipped_async_unsuitable = true;
546 goto next_pageblock;
547 }
548 }
531 549
532 /* 550 /*
533 * Skip if free. page_order cannot be used without zone->lock 551 * Skip if free. page_order cannot be used without zone->lock
@@ -537,18 +555,6 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
537 continue; 555 continue;
538 556
539 /* 557 /*
540 * For async migration, also only scan in MOVABLE blocks. Async
541 * migration is optimistic to see if the minimum amount of work
542 * satisfies the allocation
543 */
544 if (!cc->sync && last_pageblock_nr != pageblock_nr &&
545 !migrate_async_suitable(get_pageblock_migratetype(page))) {
546 cc->finished_update_migrate = true;
547 skipped_async_unsuitable = true;
548 goto next_pageblock;
549 }
550
551 /*
552 * Check may be lockless but that's ok as we recheck later. 558 * Check may be lockless but that's ok as we recheck later.
553 * It's possible to migrate LRU pages and balloon pages 559 * It's possible to migrate LRU pages and balloon pages
554 * Skip any other type of page 560 * Skip any other type of page
@@ -557,11 +563,7 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
557 if (unlikely(balloon_page_movable(page))) { 563 if (unlikely(balloon_page_movable(page))) {
558 if (locked && balloon_page_isolate(page)) { 564 if (locked && balloon_page_isolate(page)) {
559 /* Successfully isolated */ 565 /* Successfully isolated */
560 cc->finished_update_migrate = true; 566 goto isolate_success;
561 list_add(&page->lru, migratelist);
562 cc->nr_migratepages++;
563 nr_isolated++;
564 goto check_compact_cluster;
565 } 567 }
566 } 568 }
567 continue; 569 continue;
@@ -607,12 +609,6 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
607 continue; 609 continue;
608 } 610 }
609 611
610 if (!cc->sync)
611 mode |= ISOLATE_ASYNC_MIGRATE;
612
613 if (unevictable)
614 mode |= ISOLATE_UNEVICTABLE;
615
616 lruvec = mem_cgroup_page_lruvec(page, zone); 612 lruvec = mem_cgroup_page_lruvec(page, zone);
617 613
618 /* Try isolate the page */ 614 /* Try isolate the page */
@@ -622,13 +618,14 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
622 VM_BUG_ON_PAGE(PageTransCompound(page), page); 618 VM_BUG_ON_PAGE(PageTransCompound(page), page);
623 619
624 /* Successfully isolated */ 620 /* Successfully isolated */
625 cc->finished_update_migrate = true;
626 del_page_from_lru_list(page, lruvec, page_lru(page)); 621 del_page_from_lru_list(page, lruvec, page_lru(page));
622
623isolate_success:
624 cc->finished_update_migrate = true;
627 list_add(&page->lru, migratelist); 625 list_add(&page->lru, migratelist);
628 cc->nr_migratepages++; 626 cc->nr_migratepages++;
629 nr_isolated++; 627 nr_isolated++;
630 628
631check_compact_cluster:
632 /* Avoid isolating too much */ 629 /* Avoid isolating too much */
633 if (cc->nr_migratepages == COMPACT_CLUSTER_MAX) { 630 if (cc->nr_migratepages == COMPACT_CLUSTER_MAX) {
634 ++low_pfn; 631 ++low_pfn;
@@ -639,7 +636,6 @@ check_compact_cluster:
639 636
640next_pageblock: 637next_pageblock:
641 low_pfn = ALIGN(low_pfn + 1, pageblock_nr_pages) - 1; 638 low_pfn = ALIGN(low_pfn + 1, pageblock_nr_pages) - 1;
642 last_pageblock_nr = pageblock_nr;
643 } 639 }
644 640
645 acct_isolated(zone, locked, cc); 641 acct_isolated(zone, locked, cc);
diff --git a/mm/early_ioremap.c b/mm/early_ioremap.c
new file mode 100644
index 000000000000..e10ccd299d66
--- /dev/null
+++ b/mm/early_ioremap.c
@@ -0,0 +1,245 @@
1/*
2 * Provide common bits of early_ioremap() support for architectures needing
3 * temporary mappings during boot before ioremap() is available.
4 *
5 * This is mostly a direct copy of the x86 early_ioremap implementation.
6 *
7 * (C) Copyright 1995 1996, 2014 Linus Torvalds
8 *
9 */
10#include <linux/kernel.h>
11#include <linux/init.h>
12#include <linux/io.h>
13#include <linux/module.h>
14#include <linux/slab.h>
15#include <linux/mm.h>
16#include <linux/vmalloc.h>
17#include <asm/fixmap.h>
18
19#ifdef CONFIG_MMU
20static int early_ioremap_debug __initdata;
21
22static int __init early_ioremap_debug_setup(char *str)
23{
24 early_ioremap_debug = 1;
25
26 return 0;
27}
28early_param("early_ioremap_debug", early_ioremap_debug_setup);
29
30static int after_paging_init __initdata;
31
32void __init __weak early_ioremap_shutdown(void)
33{
34}
35
36void __init early_ioremap_reset(void)
37{
38 early_ioremap_shutdown();
39 after_paging_init = 1;
40}
41
42/*
43 * Generally, ioremap() is available after paging_init() has been called.
44 * Architectures wanting to allow early_ioremap after paging_init() can
45 * define __late_set_fixmap and __late_clear_fixmap to do the right thing.
46 */
47#ifndef __late_set_fixmap
48static inline void __init __late_set_fixmap(enum fixed_addresses idx,
49 phys_addr_t phys, pgprot_t prot)
50{
51 BUG();
52}
53#endif
54
55#ifndef __late_clear_fixmap
56static inline void __init __late_clear_fixmap(enum fixed_addresses idx)
57{
58 BUG();
59}
60#endif
61
62static void __iomem *prev_map[FIX_BTMAPS_SLOTS] __initdata;
63static unsigned long prev_size[FIX_BTMAPS_SLOTS] __initdata;
64static unsigned long slot_virt[FIX_BTMAPS_SLOTS] __initdata;
65
66void __init early_ioremap_setup(void)
67{
68 int i;
69
70 for (i = 0; i < FIX_BTMAPS_SLOTS; i++)
71 if (WARN_ON(prev_map[i]))
72 break;
73
74 for (i = 0; i < FIX_BTMAPS_SLOTS; i++)
75 slot_virt[i] = __fix_to_virt(FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*i);
76}
77
78static int __init check_early_ioremap_leak(void)
79{
80 int count = 0;
81 int i;
82
83 for (i = 0; i < FIX_BTMAPS_SLOTS; i++)
84 if (prev_map[i])
85 count++;
86
87 if (WARN(count, KERN_WARNING
88 "Debug warning: early ioremap leak of %d areas detected.\n"
89 "please boot with early_ioremap_debug and report the dmesg.\n",
90 count))
91 return 1;
92 return 0;
93}
94late_initcall(check_early_ioremap_leak);
95
96static void __init __iomem *
97__early_ioremap(resource_size_t phys_addr, unsigned long size, pgprot_t prot)
98{
99 unsigned long offset;
100 resource_size_t last_addr;
101 unsigned int nrpages;
102 enum fixed_addresses idx;
103 int i, slot;
104
105 WARN_ON(system_state != SYSTEM_BOOTING);
106
107 slot = -1;
108 for (i = 0; i < FIX_BTMAPS_SLOTS; i++) {
109 if (!prev_map[i]) {
110 slot = i;
111 break;
112 }
113 }
114
115 if (WARN(slot < 0, "%s(%08llx, %08lx) not found slot\n",
116 __func__, (u64)phys_addr, size))
117 return NULL;
118
119 /* Don't allow wraparound or zero size */
120 last_addr = phys_addr + size - 1;
121 if (WARN_ON(!size || last_addr < phys_addr))
122 return NULL;
123
124 prev_size[slot] = size;
125 /*
126 * Mappings have to be page-aligned
127 */
128 offset = phys_addr & ~PAGE_MASK;
129 phys_addr &= PAGE_MASK;
130 size = PAGE_ALIGN(last_addr + 1) - phys_addr;
131
132 /*
133 * Mappings have to fit in the FIX_BTMAP area.
134 */
135 nrpages = size >> PAGE_SHIFT;
136 if (WARN_ON(nrpages > NR_FIX_BTMAPS))
137 return NULL;
138
139 /*
140 * Ok, go for it..
141 */
142 idx = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*slot;
143 while (nrpages > 0) {
144 if (after_paging_init)
145 __late_set_fixmap(idx, phys_addr, prot);
146 else
147 __early_set_fixmap(idx, phys_addr, prot);
148 phys_addr += PAGE_SIZE;
149 --idx;
150 --nrpages;
151 }
152 WARN(early_ioremap_debug, "%s(%08llx, %08lx) [%d] => %08lx + %08lx\n",
153 __func__, (u64)phys_addr, size, slot, offset, slot_virt[slot]);
154
155 prev_map[slot] = (void __iomem *)(offset + slot_virt[slot]);
156 return prev_map[slot];
157}
158
159void __init early_iounmap(void __iomem *addr, unsigned long size)
160{
161 unsigned long virt_addr;
162 unsigned long offset;
163 unsigned int nrpages;
164 enum fixed_addresses idx;
165 int i, slot;
166
167 slot = -1;
168 for (i = 0; i < FIX_BTMAPS_SLOTS; i++) {
169 if (prev_map[i] == addr) {
170 slot = i;
171 break;
172 }
173 }
174
175 if (WARN(slot < 0, "early_iounmap(%p, %08lx) not found slot\n",
176 addr, size))
177 return;
178
179 if (WARN(prev_size[slot] != size,
180 "early_iounmap(%p, %08lx) [%d] size not consistent %08lx\n",
181 addr, size, slot, prev_size[slot]))
182 return;
183
184 WARN(early_ioremap_debug, "early_iounmap(%p, %08lx) [%d]\n",
185 addr, size, slot);
186
187 virt_addr = (unsigned long)addr;
188 if (WARN_ON(virt_addr < fix_to_virt(FIX_BTMAP_BEGIN)))
189 return;
190
191 offset = virt_addr & ~PAGE_MASK;
192 nrpages = PAGE_ALIGN(offset + size) >> PAGE_SHIFT;
193
194 idx = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*slot;
195 while (nrpages > 0) {
196 if (after_paging_init)
197 __late_clear_fixmap(idx);
198 else
199 __early_set_fixmap(idx, 0, FIXMAP_PAGE_CLEAR);
200 --idx;
201 --nrpages;
202 }
203 prev_map[slot] = NULL;
204}
205
206/* Remap an IO device */
207void __init __iomem *
208early_ioremap(resource_size_t phys_addr, unsigned long size)
209{
210 return __early_ioremap(phys_addr, size, FIXMAP_PAGE_IO);
211}
212
213/* Remap memory */
214void __init *
215early_memremap(resource_size_t phys_addr, unsigned long size)
216{
217 return (__force void *)__early_ioremap(phys_addr, size,
218 FIXMAP_PAGE_NORMAL);
219}
220#else /* CONFIG_MMU */
221
222void __init __iomem *
223early_ioremap(resource_size_t phys_addr, unsigned long size)
224{
225 return (__force void __iomem *)phys_addr;
226}
227
228/* Remap memory */
229void __init *
230early_memremap(resource_size_t phys_addr, unsigned long size)
231{
232 return (void *)phys_addr;
233}
234
235void __init early_iounmap(void __iomem *addr, unsigned long size)
236{
237}
238
239#endif /* CONFIG_MMU */
240
241
242void __init early_memunmap(void *addr, unsigned long size)
243{
244 early_iounmap((__force void __iomem *)addr, size);
245}
diff --git a/mm/filemap.c b/mm/filemap.c
index 21781f1fe52b..27ebc0c9571b 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -33,6 +33,7 @@
33#include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */ 33#include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */
34#include <linux/memcontrol.h> 34#include <linux/memcontrol.h>
35#include <linux/cleancache.h> 35#include <linux/cleancache.h>
36#include <linux/rmap.h>
36#include "internal.h" 37#include "internal.h"
37 38
38#define CREATE_TRACE_POINTS 39#define CREATE_TRACE_POINTS
@@ -562,7 +563,7 @@ static int __add_to_page_cache_locked(struct page *page,
562 VM_BUG_ON_PAGE(!PageLocked(page), page); 563 VM_BUG_ON_PAGE(!PageLocked(page), page);
563 VM_BUG_ON_PAGE(PageSwapBacked(page), page); 564 VM_BUG_ON_PAGE(PageSwapBacked(page), page);
564 565
565 error = mem_cgroup_cache_charge(page, current->mm, 566 error = mem_cgroup_charge_file(page, current->mm,
566 gfp_mask & GFP_RECLAIM_MASK); 567 gfp_mask & GFP_RECLAIM_MASK);
567 if (error) 568 if (error)
568 return error; 569 return error;
@@ -1952,11 +1953,11 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1952 struct inode *inode = mapping->host; 1953 struct inode *inode = mapping->host;
1953 pgoff_t offset = vmf->pgoff; 1954 pgoff_t offset = vmf->pgoff;
1954 struct page *page; 1955 struct page *page;
1955 pgoff_t size; 1956 loff_t size;
1956 int ret = 0; 1957 int ret = 0;
1957 1958
1958 size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 1959 size = round_up(i_size_read(inode), PAGE_CACHE_SIZE);
1959 if (offset >= size) 1960 if (offset >= size >> PAGE_CACHE_SHIFT)
1960 return VM_FAULT_SIGBUS; 1961 return VM_FAULT_SIGBUS;
1961 1962
1962 /* 1963 /*
@@ -2005,8 +2006,8 @@ retry_find:
2005 * Found the page and have a reference on it. 2006 * Found the page and have a reference on it.
2006 * We must recheck i_size under page lock. 2007 * We must recheck i_size under page lock.
2007 */ 2008 */
2008 size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 2009 size = round_up(i_size_read(inode), PAGE_CACHE_SIZE);
2009 if (unlikely(offset >= size)) { 2010 if (unlikely(offset >= size >> PAGE_CACHE_SHIFT)) {
2010 unlock_page(page); 2011 unlock_page(page);
2011 page_cache_release(page); 2012 page_cache_release(page);
2012 return VM_FAULT_SIGBUS; 2013 return VM_FAULT_SIGBUS;
@@ -2064,6 +2065,78 @@ page_not_uptodate:
2064} 2065}
2065EXPORT_SYMBOL(filemap_fault); 2066EXPORT_SYMBOL(filemap_fault);
2066 2067
2068void filemap_map_pages(struct vm_area_struct *vma, struct vm_fault *vmf)
2069{
2070 struct radix_tree_iter iter;
2071 void **slot;
2072 struct file *file = vma->vm_file;
2073 struct address_space *mapping = file->f_mapping;
2074 loff_t size;
2075 struct page *page;
2076 unsigned long address = (unsigned long) vmf->virtual_address;
2077 unsigned long addr;
2078 pte_t *pte;
2079
2080 rcu_read_lock();
2081 radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, vmf->pgoff) {
2082 if (iter.index > vmf->max_pgoff)
2083 break;
2084repeat:
2085 page = radix_tree_deref_slot(slot);
2086 if (unlikely(!page))
2087 goto next;
2088 if (radix_tree_exception(page)) {
2089 if (radix_tree_deref_retry(page))
2090 break;
2091 else
2092 goto next;
2093 }
2094
2095 if (!page_cache_get_speculative(page))
2096 goto repeat;
2097
2098 /* Has the page moved? */
2099 if (unlikely(page != *slot)) {
2100 page_cache_release(page);
2101 goto repeat;
2102 }
2103
2104 if (!PageUptodate(page) ||
2105 PageReadahead(page) ||
2106 PageHWPoison(page))
2107 goto skip;
2108 if (!trylock_page(page))
2109 goto skip;
2110
2111 if (page->mapping != mapping || !PageUptodate(page))
2112 goto unlock;
2113
2114 size = round_up(i_size_read(mapping->host), PAGE_CACHE_SIZE);
2115 if (page->index >= size >> PAGE_CACHE_SHIFT)
2116 goto unlock;
2117
2118 pte = vmf->pte + page->index - vmf->pgoff;
2119 if (!pte_none(*pte))
2120 goto unlock;
2121
2122 if (file->f_ra.mmap_miss > 0)
2123 file->f_ra.mmap_miss--;
2124 addr = address + (page->index - vmf->pgoff) * PAGE_SIZE;
2125 do_set_pte(vma, addr, page, pte, false, false);
2126 unlock_page(page);
2127 goto next;
2128unlock:
2129 unlock_page(page);
2130skip:
2131 page_cache_release(page);
2132next:
2133 if (iter.index == vmf->max_pgoff)
2134 break;
2135 }
2136 rcu_read_unlock();
2137}
2138EXPORT_SYMBOL(filemap_map_pages);
2139
2067int filemap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) 2140int filemap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
2068{ 2141{
2069 struct page *page = vmf->page; 2142 struct page *page = vmf->page;
@@ -2093,6 +2166,7 @@ EXPORT_SYMBOL(filemap_page_mkwrite);
2093 2166
2094const struct vm_operations_struct generic_file_vm_ops = { 2167const struct vm_operations_struct generic_file_vm_ops = {
2095 .fault = filemap_fault, 2168 .fault = filemap_fault,
2169 .map_pages = filemap_map_pages,
2096 .page_mkwrite = filemap_page_mkwrite, 2170 .page_mkwrite = filemap_page_mkwrite,
2097 .remap_pages = generic_file_remap_pages, 2171 .remap_pages = generic_file_remap_pages,
2098}; 2172};
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 6ac89e9f82ef..64635f5278ff 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -827,7 +827,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
827 count_vm_event(THP_FAULT_FALLBACK); 827 count_vm_event(THP_FAULT_FALLBACK);
828 return VM_FAULT_FALLBACK; 828 return VM_FAULT_FALLBACK;
829 } 829 }
830 if (unlikely(mem_cgroup_newpage_charge(page, mm, GFP_KERNEL))) { 830 if (unlikely(mem_cgroup_charge_anon(page, mm, GFP_KERNEL))) {
831 put_page(page); 831 put_page(page);
832 count_vm_event(THP_FAULT_FALLBACK); 832 count_vm_event(THP_FAULT_FALLBACK);
833 return VM_FAULT_FALLBACK; 833 return VM_FAULT_FALLBACK;
@@ -968,7 +968,7 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
968 __GFP_OTHER_NODE, 968 __GFP_OTHER_NODE,
969 vma, address, page_to_nid(page)); 969 vma, address, page_to_nid(page));
970 if (unlikely(!pages[i] || 970 if (unlikely(!pages[i] ||
971 mem_cgroup_newpage_charge(pages[i], mm, 971 mem_cgroup_charge_anon(pages[i], mm,
972 GFP_KERNEL))) { 972 GFP_KERNEL))) {
973 if (pages[i]) 973 if (pages[i])
974 put_page(pages[i]); 974 put_page(pages[i]);
@@ -1101,7 +1101,7 @@ alloc:
1101 goto out; 1101 goto out;
1102 } 1102 }
1103 1103
1104 if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) { 1104 if (unlikely(mem_cgroup_charge_anon(new_page, mm, GFP_KERNEL))) {
1105 put_page(new_page); 1105 put_page(new_page);
1106 if (page) { 1106 if (page) {
1107 split_huge_page(page); 1107 split_huge_page(page);
@@ -1891,17 +1891,22 @@ out:
1891int hugepage_madvise(struct vm_area_struct *vma, 1891int hugepage_madvise(struct vm_area_struct *vma,
1892 unsigned long *vm_flags, int advice) 1892 unsigned long *vm_flags, int advice)
1893{ 1893{
1894 struct mm_struct *mm = vma->vm_mm;
1895
1896 switch (advice) { 1894 switch (advice) {
1897 case MADV_HUGEPAGE: 1895 case MADV_HUGEPAGE:
1896#ifdef CONFIG_S390
1897 /*
1898 * qemu blindly sets MADV_HUGEPAGE on all allocations, but s390
1899 * can't handle this properly after s390_enable_sie, so we simply
1900 * ignore the madvise to prevent qemu from causing a SIGSEGV.
1901 */
1902 if (mm_has_pgste(vma->vm_mm))
1903 return 0;
1904#endif
1898 /* 1905 /*
1899 * Be somewhat over-protective like KSM for now! 1906 * Be somewhat over-protective like KSM for now!
1900 */ 1907 */
1901 if (*vm_flags & (VM_HUGEPAGE | VM_NO_THP)) 1908 if (*vm_flags & (VM_HUGEPAGE | VM_NO_THP))
1902 return -EINVAL; 1909 return -EINVAL;
1903 if (mm->def_flags & VM_NOHUGEPAGE)
1904 return -EINVAL;
1905 *vm_flags &= ~VM_NOHUGEPAGE; 1910 *vm_flags &= ~VM_NOHUGEPAGE;
1906 *vm_flags |= VM_HUGEPAGE; 1911 *vm_flags |= VM_HUGEPAGE;
1907 /* 1912 /*
@@ -2354,7 +2359,7 @@ static void collapse_huge_page(struct mm_struct *mm,
2354 if (!new_page) 2359 if (!new_page)
2355 return; 2360 return;
2356 2361
2357 if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) 2362 if (unlikely(mem_cgroup_charge_anon(new_page, mm, GFP_KERNEL)))
2358 return; 2363 return;
2359 2364
2360 /* 2365 /*
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 7c02b9dadfb0..dd30f22b35e0 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -13,6 +13,7 @@
13#include <linux/nodemask.h> 13#include <linux/nodemask.h>
14#include <linux/pagemap.h> 14#include <linux/pagemap.h>
15#include <linux/mempolicy.h> 15#include <linux/mempolicy.h>
16#include <linux/compiler.h>
16#include <linux/cpuset.h> 17#include <linux/cpuset.h>
17#include <linux/mutex.h> 18#include <linux/mutex.h>
18#include <linux/bootmem.h> 19#include <linux/bootmem.h>
@@ -1535,6 +1536,7 @@ static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count,
1535 while (min_count < persistent_huge_pages(h)) { 1536 while (min_count < persistent_huge_pages(h)) {
1536 if (!free_pool_huge_page(h, nodes_allowed, 0)) 1537 if (!free_pool_huge_page(h, nodes_allowed, 0))
1537 break; 1538 break;
1539 cond_resched_lock(&hugetlb_lock);
1538 } 1540 }
1539 while (count < persistent_huge_pages(h)) { 1541 while (count < persistent_huge_pages(h)) {
1540 if (!adjust_pool_surplus(h, nodes_allowed, 1)) 1542 if (!adjust_pool_surplus(h, nodes_allowed, 1))
@@ -2690,7 +2692,8 @@ retry_avoidcopy:
2690 BUG_ON(huge_pte_none(pte)); 2692 BUG_ON(huge_pte_none(pte));
2691 spin_lock(ptl); 2693 spin_lock(ptl);
2692 ptep = huge_pte_offset(mm, address & huge_page_mask(h)); 2694 ptep = huge_pte_offset(mm, address & huge_page_mask(h));
2693 if (likely(pte_same(huge_ptep_get(ptep), pte))) 2695 if (likely(ptep &&
2696 pte_same(huge_ptep_get(ptep), pte)))
2694 goto retry_avoidcopy; 2697 goto retry_avoidcopy;
2695 /* 2698 /*
2696 * race occurs while re-acquiring page table 2699 * race occurs while re-acquiring page table
@@ -2734,7 +2737,7 @@ retry_avoidcopy:
2734 */ 2737 */
2735 spin_lock(ptl); 2738 spin_lock(ptl);
2736 ptep = huge_pte_offset(mm, address & huge_page_mask(h)); 2739 ptep = huge_pte_offset(mm, address & huge_page_mask(h));
2737 if (likely(pte_same(huge_ptep_get(ptep), pte))) { 2740 if (likely(ptep && pte_same(huge_ptep_get(ptep), pte))) {
2738 ClearPagePrivate(new_page); 2741 ClearPagePrivate(new_page);
2739 2742
2740 /* Break COW */ 2743 /* Break COW */
@@ -2896,8 +2899,7 @@ retry:
2896 if (anon_rmap) { 2899 if (anon_rmap) {
2897 ClearPagePrivate(page); 2900 ClearPagePrivate(page);
2898 hugepage_add_new_anon_rmap(page, vma, address); 2901 hugepage_add_new_anon_rmap(page, vma, address);
2899 } 2902 } else
2900 else
2901 page_dup_rmap(page); 2903 page_dup_rmap(page);
2902 new_pte = make_huge_pte(vma, page, ((vma->vm_flags & VM_WRITE) 2904 new_pte = make_huge_pte(vma, page, ((vma->vm_flags & VM_WRITE)
2903 && (vma->vm_flags & VM_SHARED))); 2905 && (vma->vm_flags & VM_SHARED)));
@@ -3185,6 +3187,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
3185 BUG_ON(address >= end); 3187 BUG_ON(address >= end);
3186 flush_cache_range(vma, address, end); 3188 flush_cache_range(vma, address, end);
3187 3189
3190 mmu_notifier_invalidate_range_start(mm, start, end);
3188 mutex_lock(&vma->vm_file->f_mapping->i_mmap_mutex); 3191 mutex_lock(&vma->vm_file->f_mapping->i_mmap_mutex);
3189 for (; address < end; address += huge_page_size(h)) { 3192 for (; address < end; address += huge_page_size(h)) {
3190 spinlock_t *ptl; 3193 spinlock_t *ptl;
@@ -3214,6 +3217,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
3214 */ 3217 */
3215 flush_tlb_range(vma, start, end); 3218 flush_tlb_range(vma, start, end);
3216 mutex_unlock(&vma->vm_file->f_mapping->i_mmap_mutex); 3219 mutex_unlock(&vma->vm_file->f_mapping->i_mmap_mutex);
3220 mmu_notifier_invalidate_range_end(mm, start, end);
3217 3221
3218 return pages << h->order; 3222 return pages << h->order;
3219} 3223}
@@ -3518,7 +3522,7 @@ follow_huge_pud(struct mm_struct *mm, unsigned long address,
3518#else /* !CONFIG_ARCH_WANT_GENERAL_HUGETLB */ 3522#else /* !CONFIG_ARCH_WANT_GENERAL_HUGETLB */
3519 3523
3520/* Can be overriden by architectures */ 3524/* Can be overriden by architectures */
3521__attribute__((weak)) struct page * 3525struct page * __weak
3522follow_huge_pud(struct mm_struct *mm, unsigned long address, 3526follow_huge_pud(struct mm_struct *mm, unsigned long address,
3523 pud_t *pud, int write) 3527 pud_t *pud, int write)
3524{ 3528{
diff --git a/mm/internal.h b/mm/internal.h
index 29e1e761f9eb..07b67361a40a 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -11,6 +11,7 @@
11#ifndef __MM_INTERNAL_H 11#ifndef __MM_INTERNAL_H
12#define __MM_INTERNAL_H 12#define __MM_INTERNAL_H
13 13
14#include <linux/fs.h>
14#include <linux/mm.h> 15#include <linux/mm.h>
15 16
16void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma, 17void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
@@ -21,6 +22,20 @@ static inline void set_page_count(struct page *page, int v)
21 atomic_set(&page->_count, v); 22 atomic_set(&page->_count, v);
22} 23}
23 24
25extern int __do_page_cache_readahead(struct address_space *mapping,
26 struct file *filp, pgoff_t offset, unsigned long nr_to_read,
27 unsigned long lookahead_size);
28
29/*
30 * Submit IO for the read-ahead request in file_ra_state.
31 */
32static inline unsigned long ra_submit(struct file_ra_state *ra,
33 struct address_space *mapping, struct file *filp)
34{
35 return __do_page_cache_readahead(mapping, filp,
36 ra->start, ra->size, ra->async_size);
37}
38
24/* 39/*
25 * Turn a non-refcounted page (->_count == 0) into refcounted with 40 * Turn a non-refcounted page (->_count == 0) into refcounted with
26 * a count of one. 41 * a count of one.
@@ -370,5 +385,6 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone,
370#define ALLOC_HIGH 0x20 /* __GFP_HIGH set */ 385#define ALLOC_HIGH 0x20 /* __GFP_HIGH set */
371#define ALLOC_CPUSET 0x40 /* check for correct cpuset */ 386#define ALLOC_CPUSET 0x40 /* check for correct cpuset */
372#define ALLOC_CMA 0x80 /* allow allocations from CMA areas */ 387#define ALLOC_CMA 0x80 /* allow allocations from CMA areas */
388#define ALLOC_FAIR 0x100 /* fair zone allocation */
373 389
374#endif /* __MM_INTERNAL_H */ 390#endif /* __MM_INTERNAL_H */
diff --git a/mm/memblock.c b/mm/memblock.c
index 7fe5354e7552..e9d6ca9a01a9 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1253,7 +1253,7 @@ phys_addr_t __init memblock_mem_size(unsigned long limit_pfn)
1253 pages += end_pfn - start_pfn; 1253 pages += end_pfn - start_pfn;
1254 } 1254 }
1255 1255
1256 return (phys_addr_t)pages << PAGE_SHIFT; 1256 return PFN_PHYS(pages);
1257} 1257}
1258 1258
1259/* lowest address */ 1259/* lowest address */
@@ -1271,16 +1271,14 @@ phys_addr_t __init_memblock memblock_end_of_DRAM(void)
1271 1271
1272void __init memblock_enforce_memory_limit(phys_addr_t limit) 1272void __init memblock_enforce_memory_limit(phys_addr_t limit)
1273{ 1273{
1274 unsigned long i;
1275 phys_addr_t max_addr = (phys_addr_t)ULLONG_MAX; 1274 phys_addr_t max_addr = (phys_addr_t)ULLONG_MAX;
1275 struct memblock_region *r;
1276 1276
1277 if (!limit) 1277 if (!limit)
1278 return; 1278 return;
1279 1279
1280 /* find out max address */ 1280 /* find out max address */
1281 for (i = 0; i < memblock.memory.cnt; i++) { 1281 for_each_memblock(memory, r) {
1282 struct memblock_region *r = &memblock.memory.regions[i];
1283
1284 if (limit <= r->size) { 1282 if (limit <= r->size) {
1285 max_addr = r->base + limit; 1283 max_addr = r->base + limit;
1286 break; 1284 break;
@@ -1326,7 +1324,7 @@ int __init_memblock memblock_search_pfn_nid(unsigned long pfn,
1326 unsigned long *start_pfn, unsigned long *end_pfn) 1324 unsigned long *start_pfn, unsigned long *end_pfn)
1327{ 1325{
1328 struct memblock_type *type = &memblock.memory; 1326 struct memblock_type *type = &memblock.memory;
1329 int mid = memblock_search(type, (phys_addr_t)pfn << PAGE_SHIFT); 1327 int mid = memblock_search(type, PFN_PHYS(pfn));
1330 1328
1331 if (mid == -1) 1329 if (mid == -1)
1332 return -1; 1330 return -1;
@@ -1379,13 +1377,12 @@ int __init_memblock memblock_is_region_reserved(phys_addr_t base, phys_addr_t si
1379 1377
1380void __init_memblock memblock_trim_memory(phys_addr_t align) 1378void __init_memblock memblock_trim_memory(phys_addr_t align)
1381{ 1379{
1382 int i;
1383 phys_addr_t start, end, orig_start, orig_end; 1380 phys_addr_t start, end, orig_start, orig_end;
1384 struct memblock_type *mem = &memblock.memory; 1381 struct memblock_region *r;
1385 1382
1386 for (i = 0; i < mem->cnt; i++) { 1383 for_each_memblock(memory, r) {
1387 orig_start = mem->regions[i].base; 1384 orig_start = r->base;
1388 orig_end = mem->regions[i].base + mem->regions[i].size; 1385 orig_end = r->base + r->size;
1389 start = round_up(orig_start, align); 1386 start = round_up(orig_start, align);
1390 end = round_down(orig_end, align); 1387 end = round_down(orig_end, align);
1391 1388
@@ -1393,11 +1390,12 @@ void __init_memblock memblock_trim_memory(phys_addr_t align)
1393 continue; 1390 continue;
1394 1391
1395 if (start < end) { 1392 if (start < end) {
1396 mem->regions[i].base = start; 1393 r->base = start;
1397 mem->regions[i].size = end - start; 1394 r->size = end - start;
1398 } else { 1395 } else {
1399 memblock_remove_region(mem, i); 1396 memblock_remove_region(&memblock.memory,
1400 i--; 1397 r - memblock.memory.regions);
1398 r--;
1401 } 1399 }
1402 } 1400 }
1403} 1401}
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index dcc8153a1681..29501f040568 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -921,8 +921,6 @@ static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg,
921 struct page *page, 921 struct page *page,
922 bool anon, int nr_pages) 922 bool anon, int nr_pages)
923{ 923{
924 preempt_disable();
925
926 /* 924 /*
927 * Here, RSS means 'mapped anon' and anon's SwapCache. Shmem/tmpfs is 925 * Here, RSS means 'mapped anon' and anon's SwapCache. Shmem/tmpfs is
928 * counted as CACHE even if it's on ANON LRU. 926 * counted as CACHE even if it's on ANON LRU.
@@ -947,8 +945,6 @@ static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg,
947 } 945 }
948 946
949 __this_cpu_add(memcg->stat->nr_page_events, nr_pages); 947 __this_cpu_add(memcg->stat->nr_page_events, nr_pages);
950
951 preempt_enable();
952} 948}
953 949
954unsigned long 950unsigned long
@@ -1075,22 +1071,15 @@ struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p)
1075 return mem_cgroup_from_css(task_css(p, memory_cgrp_id)); 1071 return mem_cgroup_from_css(task_css(p, memory_cgrp_id));
1076} 1072}
1077 1073
1078struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm) 1074static struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm)
1079{ 1075{
1080 struct mem_cgroup *memcg = NULL; 1076 struct mem_cgroup *memcg = NULL;
1081 1077
1082 if (!mm)
1083 return NULL;
1084 /*
1085 * Because we have no locks, mm->owner's may be being moved to other
1086 * cgroup. We use css_tryget() here even if this looks
1087 * pessimistic (rather than adding locks here).
1088 */
1089 rcu_read_lock(); 1078 rcu_read_lock();
1090 do { 1079 do {
1091 memcg = mem_cgroup_from_task(rcu_dereference(mm->owner)); 1080 memcg = mem_cgroup_from_task(rcu_dereference(mm->owner));
1092 if (unlikely(!memcg)) 1081 if (unlikely(!memcg))
1093 break; 1082 memcg = root_mem_cgroup;
1094 } while (!css_tryget(&memcg->css)); 1083 } while (!css_tryget(&memcg->css));
1095 rcu_read_unlock(); 1084 rcu_read_unlock();
1096 return memcg; 1085 return memcg;
@@ -1486,7 +1475,7 @@ bool task_in_mem_cgroup(struct task_struct *task,
1486 1475
1487 p = find_lock_task_mm(task); 1476 p = find_lock_task_mm(task);
1488 if (p) { 1477 if (p) {
1489 curr = try_get_mem_cgroup_from_mm(p->mm); 1478 curr = get_mem_cgroup_from_mm(p->mm);
1490 task_unlock(p); 1479 task_unlock(p);
1491 } else { 1480 } else {
1492 /* 1481 /*
@@ -1500,8 +1489,6 @@ bool task_in_mem_cgroup(struct task_struct *task,
1500 css_get(&curr->css); 1489 css_get(&curr->css);
1501 rcu_read_unlock(); 1490 rcu_read_unlock();
1502 } 1491 }
1503 if (!curr)
1504 return false;
1505 /* 1492 /*
1506 * We should check use_hierarchy of "memcg" not "curr". Because checking 1493 * We should check use_hierarchy of "memcg" not "curr". Because checking
1507 * use_hierarchy of "curr" here make this function true if hierarchy is 1494 * use_hierarchy of "curr" here make this function true if hierarchy is
@@ -2588,7 +2575,7 @@ static int memcg_cpu_hotplug_callback(struct notifier_block *nb,
2588} 2575}
2589 2576
2590 2577
2591/* See __mem_cgroup_try_charge() for details */ 2578/* See mem_cgroup_try_charge() for details */
2592enum { 2579enum {
2593 CHARGE_OK, /* success */ 2580 CHARGE_OK, /* success */
2594 CHARGE_RETRY, /* need to retry but retry is not bad */ 2581 CHARGE_RETRY, /* need to retry but retry is not bad */
@@ -2661,45 +2648,34 @@ static int mem_cgroup_do_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
2661 return CHARGE_NOMEM; 2648 return CHARGE_NOMEM;
2662} 2649}
2663 2650
2664/* 2651/**
2665 * __mem_cgroup_try_charge() does 2652 * mem_cgroup_try_charge - try charging a memcg
2666 * 1. detect memcg to be charged against from passed *mm and *ptr, 2653 * @memcg: memcg to charge
2667 * 2. update res_counter 2654 * @nr_pages: number of pages to charge
2668 * 3. call memory reclaim if necessary. 2655 * @oom: trigger OOM if reclaim fails
2669 *
2670 * In some special case, if the task is fatal, fatal_signal_pending() or
2671 * has TIF_MEMDIE, this function returns -EINTR while writing root_mem_cgroup
2672 * to *ptr. There are two reasons for this. 1: fatal threads should quit as soon
2673 * as possible without any hazards. 2: all pages should have a valid
2674 * pc->mem_cgroup. If mm is NULL and the caller doesn't pass a valid memcg
2675 * pointer, that is treated as a charge to root_mem_cgroup.
2676 *
2677 * So __mem_cgroup_try_charge() will return
2678 * 0 ... on success, filling *ptr with a valid memcg pointer.
2679 * -ENOMEM ... charge failure because of resource limits.
2680 * -EINTR ... if thread is fatal. *ptr is filled with root_mem_cgroup.
2681 * 2656 *
2682 * Unlike the exported interface, an "oom" parameter is added. if oom==true, 2657 * Returns 0 if @memcg was charged successfully, -EINTR if the charge
2683 * the oom-killer can be invoked. 2658 * was bypassed to root_mem_cgroup, and -ENOMEM if the charge failed.
2684 */ 2659 */
2685static int __mem_cgroup_try_charge(struct mm_struct *mm, 2660static int mem_cgroup_try_charge(struct mem_cgroup *memcg,
2686 gfp_t gfp_mask, 2661 gfp_t gfp_mask,
2687 unsigned int nr_pages, 2662 unsigned int nr_pages,
2688 struct mem_cgroup **ptr, 2663 bool oom)
2689 bool oom)
2690{ 2664{
2691 unsigned int batch = max(CHARGE_BATCH, nr_pages); 2665 unsigned int batch = max(CHARGE_BATCH, nr_pages);
2692 int nr_oom_retries = MEM_CGROUP_RECLAIM_RETRIES; 2666 int nr_oom_retries = MEM_CGROUP_RECLAIM_RETRIES;
2693 struct mem_cgroup *memcg = NULL;
2694 int ret; 2667 int ret;
2695 2668
2669 if (mem_cgroup_is_root(memcg))
2670 goto done;
2696 /* 2671 /*
2697 * Unlike gloval-vm's OOM-kill, we're not in memory shortage 2672 * Unlike in global OOM situations, memcg is not in a physical
2698 * in system level. So, allow to go ahead dying process in addition to 2673 * memory shortage. Allow dying and OOM-killed tasks to
2699 * MEMDIE process. 2674 * bypass the last charges so that they can exit quickly and
2675 * free their memory.
2700 */ 2676 */
2701 if (unlikely(test_thread_flag(TIF_MEMDIE) 2677 if (unlikely(test_thread_flag(TIF_MEMDIE) ||
2702 || fatal_signal_pending(current))) 2678 fatal_signal_pending(current)))
2703 goto bypass; 2679 goto bypass;
2704 2680
2705 if (unlikely(task_in_memcg_oom(current))) 2681 if (unlikely(task_in_memcg_oom(current)))
@@ -2707,73 +2683,16 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
2707 2683
2708 if (gfp_mask & __GFP_NOFAIL) 2684 if (gfp_mask & __GFP_NOFAIL)
2709 oom = false; 2685 oom = false;
2710
2711 /*
2712 * We always charge the cgroup the mm_struct belongs to.
2713 * The mm_struct's mem_cgroup changes on task migration if the
2714 * thread group leader migrates. It's possible that mm is not
2715 * set, if so charge the root memcg (happens for pagecache usage).
2716 */
2717 if (!*ptr && !mm)
2718 *ptr = root_mem_cgroup;
2719again: 2686again:
2720 if (*ptr) { /* css should be a valid one */ 2687 if (consume_stock(memcg, nr_pages))
2721 memcg = *ptr; 2688 goto done;
2722 if (mem_cgroup_is_root(memcg))
2723 goto done;
2724 if (consume_stock(memcg, nr_pages))
2725 goto done;
2726 css_get(&memcg->css);
2727 } else {
2728 struct task_struct *p;
2729
2730 rcu_read_lock();
2731 p = rcu_dereference(mm->owner);
2732 /*
2733 * Because we don't have task_lock(), "p" can exit.
2734 * In that case, "memcg" can point to root or p can be NULL with
2735 * race with swapoff. Then, we have small risk of mis-accouning.
2736 * But such kind of mis-account by race always happens because
2737 * we don't have cgroup_mutex(). It's overkill and we allo that
2738 * small race, here.
2739 * (*) swapoff at el will charge against mm-struct not against
2740 * task-struct. So, mm->owner can be NULL.
2741 */
2742 memcg = mem_cgroup_from_task(p);
2743 if (!memcg)
2744 memcg = root_mem_cgroup;
2745 if (mem_cgroup_is_root(memcg)) {
2746 rcu_read_unlock();
2747 goto done;
2748 }
2749 if (consume_stock(memcg, nr_pages)) {
2750 /*
2751 * It seems dagerous to access memcg without css_get().
2752 * But considering how consume_stok works, it's not
2753 * necessary. If consume_stock success, some charges
2754 * from this memcg are cached on this cpu. So, we
2755 * don't need to call css_get()/css_tryget() before
2756 * calling consume_stock().
2757 */
2758 rcu_read_unlock();
2759 goto done;
2760 }
2761 /* after here, we may be blocked. we need to get refcnt */
2762 if (!css_tryget(&memcg->css)) {
2763 rcu_read_unlock();
2764 goto again;
2765 }
2766 rcu_read_unlock();
2767 }
2768 2689
2769 do { 2690 do {
2770 bool invoke_oom = oom && !nr_oom_retries; 2691 bool invoke_oom = oom && !nr_oom_retries;
2771 2692
2772 /* If killed, bypass charge */ 2693 /* If killed, bypass charge */
2773 if (fatal_signal_pending(current)) { 2694 if (fatal_signal_pending(current))
2774 css_put(&memcg->css);
2775 goto bypass; 2695 goto bypass;
2776 }
2777 2696
2778 ret = mem_cgroup_do_charge(memcg, gfp_mask, batch, 2697 ret = mem_cgroup_do_charge(memcg, gfp_mask, batch,
2779 nr_pages, invoke_oom); 2698 nr_pages, invoke_oom);
@@ -2782,17 +2701,12 @@ again:
2782 break; 2701 break;
2783 case CHARGE_RETRY: /* not in OOM situation but retry */ 2702 case CHARGE_RETRY: /* not in OOM situation but retry */
2784 batch = nr_pages; 2703 batch = nr_pages;
2785 css_put(&memcg->css);
2786 memcg = NULL;
2787 goto again; 2704 goto again;
2788 case CHARGE_WOULDBLOCK: /* !__GFP_WAIT */ 2705 case CHARGE_WOULDBLOCK: /* !__GFP_WAIT */
2789 css_put(&memcg->css);
2790 goto nomem; 2706 goto nomem;
2791 case CHARGE_NOMEM: /* OOM routine works */ 2707 case CHARGE_NOMEM: /* OOM routine works */
2792 if (!oom || invoke_oom) { 2708 if (!oom || invoke_oom)
2793 css_put(&memcg->css);
2794 goto nomem; 2709 goto nomem;
2795 }
2796 nr_oom_retries--; 2710 nr_oom_retries--;
2797 break; 2711 break;
2798 } 2712 }
@@ -2800,20 +2714,44 @@ again:
2800 2714
2801 if (batch > nr_pages) 2715 if (batch > nr_pages)
2802 refill_stock(memcg, batch - nr_pages); 2716 refill_stock(memcg, batch - nr_pages);
2803 css_put(&memcg->css);
2804done: 2717done:
2805 *ptr = memcg;
2806 return 0; 2718 return 0;
2807nomem: 2719nomem:
2808 if (!(gfp_mask & __GFP_NOFAIL)) { 2720 if (!(gfp_mask & __GFP_NOFAIL))
2809 *ptr = NULL;
2810 return -ENOMEM; 2721 return -ENOMEM;
2811 }
2812bypass: 2722bypass:
2813 *ptr = root_mem_cgroup;
2814 return -EINTR; 2723 return -EINTR;
2815} 2724}
2816 2725
2726/**
2727 * mem_cgroup_try_charge_mm - try charging a mm
2728 * @mm: mm_struct to charge
2729 * @nr_pages: number of pages to charge
2730 * @oom: trigger OOM if reclaim fails
2731 *
2732 * Returns the charged mem_cgroup associated with the given mm_struct or
2733 * NULL the charge failed.
2734 */
2735static struct mem_cgroup *mem_cgroup_try_charge_mm(struct mm_struct *mm,
2736 gfp_t gfp_mask,
2737 unsigned int nr_pages,
2738 bool oom)
2739
2740{
2741 struct mem_cgroup *memcg;
2742 int ret;
2743
2744 memcg = get_mem_cgroup_from_mm(mm);
2745 ret = mem_cgroup_try_charge(memcg, gfp_mask, nr_pages, oom);
2746 css_put(&memcg->css);
2747 if (ret == -EINTR)
2748 memcg = root_mem_cgroup;
2749 else if (ret)
2750 memcg = NULL;
2751
2752 return memcg;
2753}
2754
2817/* 2755/*
2818 * Somemtimes we have to undo a charge we got by try_charge(). 2756 * Somemtimes we have to undo a charge we got by try_charge().
2819 * This function is for that and do uncharge, put css's refcnt. 2757 * This function is for that and do uncharge, put css's refcnt.
@@ -3009,20 +2947,17 @@ static int mem_cgroup_slabinfo_read(struct seq_file *m, void *v)
3009static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size) 2947static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size)
3010{ 2948{
3011 struct res_counter *fail_res; 2949 struct res_counter *fail_res;
3012 struct mem_cgroup *_memcg;
3013 int ret = 0; 2950 int ret = 0;
3014 2951
3015 ret = res_counter_charge(&memcg->kmem, size, &fail_res); 2952 ret = res_counter_charge(&memcg->kmem, size, &fail_res);
3016 if (ret) 2953 if (ret)
3017 return ret; 2954 return ret;
3018 2955
3019 _memcg = memcg; 2956 ret = mem_cgroup_try_charge(memcg, gfp, size >> PAGE_SHIFT,
3020 ret = __mem_cgroup_try_charge(NULL, gfp, size >> PAGE_SHIFT, 2957 oom_gfp_allowed(gfp));
3021 &_memcg, oom_gfp_allowed(gfp));
3022
3023 if (ret == -EINTR) { 2958 if (ret == -EINTR) {
3024 /* 2959 /*
3025 * __mem_cgroup_try_charge() chosed to bypass to root due to 2960 * mem_cgroup_try_charge() chosed to bypass to root due to
3026 * OOM kill or fatal signal. Since our only options are to 2961 * OOM kill or fatal signal. Since our only options are to
3027 * either fail the allocation or charge it to this cgroup, do 2962 * either fail the allocation or charge it to this cgroup, do
3028 * it as a temporary condition. But we can't fail. From a 2963 * it as a temporary condition. But we can't fail. From a
@@ -3032,7 +2967,7 @@ static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size)
3032 * 2967 *
3033 * This condition will only trigger if the task entered 2968 * This condition will only trigger if the task entered
3034 * memcg_charge_kmem in a sane state, but was OOM-killed during 2969 * memcg_charge_kmem in a sane state, but was OOM-killed during
3035 * __mem_cgroup_try_charge() above. Tasks that were already 2970 * mem_cgroup_try_charge() above. Tasks that were already
3036 * dying when the allocation triggers should have been already 2971 * dying when the allocation triggers should have been already
3037 * directed to the root cgroup in memcontrol.h 2972 * directed to the root cgroup in memcontrol.h
3038 */ 2973 */
@@ -3159,6 +3094,29 @@ int memcg_update_cache_size(struct kmem_cache *s, int num_groups)
3159 return 0; 3094 return 0;
3160} 3095}
3161 3096
3097char *memcg_create_cache_name(struct mem_cgroup *memcg,
3098 struct kmem_cache *root_cache)
3099{
3100 static char *buf = NULL;
3101
3102 /*
3103 * We need a mutex here to protect the shared buffer. Since this is
3104 * expected to be called only on cache creation, we can employ the
3105 * slab_mutex for that purpose.
3106 */
3107 lockdep_assert_held(&slab_mutex);
3108
3109 if (!buf) {
3110 buf = kmalloc(NAME_MAX + 1, GFP_KERNEL);
3111 if (!buf)
3112 return NULL;
3113 }
3114
3115 cgroup_name(memcg->css.cgroup, buf, NAME_MAX + 1);
3116 return kasprintf(GFP_KERNEL, "%s(%d:%s)", root_cache->name,
3117 memcg_cache_id(memcg), buf);
3118}
3119
3162int memcg_alloc_cache_params(struct mem_cgroup *memcg, struct kmem_cache *s, 3120int memcg_alloc_cache_params(struct mem_cgroup *memcg, struct kmem_cache *s,
3163 struct kmem_cache *root_cache) 3121 struct kmem_cache *root_cache)
3164{ 3122{
@@ -3182,6 +3140,7 @@ int memcg_alloc_cache_params(struct mem_cgroup *memcg, struct kmem_cache *s,
3182 s->memcg_params->root_cache = root_cache; 3140 s->memcg_params->root_cache = root_cache;
3183 INIT_WORK(&s->memcg_params->destroy, 3141 INIT_WORK(&s->memcg_params->destroy,
3184 kmem_cache_destroy_work_func); 3142 kmem_cache_destroy_work_func);
3143 css_get(&memcg->css);
3185 } else 3144 } else
3186 s->memcg_params->is_root_cache = true; 3145 s->memcg_params->is_root_cache = true;
3187 3146
@@ -3190,6 +3149,10 @@ int memcg_alloc_cache_params(struct mem_cgroup *memcg, struct kmem_cache *s,
3190 3149
3191void memcg_free_cache_params(struct kmem_cache *s) 3150void memcg_free_cache_params(struct kmem_cache *s)
3192{ 3151{
3152 if (!s->memcg_params)
3153 return;
3154 if (!s->memcg_params->is_root_cache)
3155 css_put(&s->memcg_params->memcg->css);
3193 kfree(s->memcg_params); 3156 kfree(s->memcg_params);
3194} 3157}
3195 3158
@@ -3212,9 +3175,6 @@ void memcg_register_cache(struct kmem_cache *s)
3212 memcg = s->memcg_params->memcg; 3175 memcg = s->memcg_params->memcg;
3213 id = memcg_cache_id(memcg); 3176 id = memcg_cache_id(memcg);
3214 3177
3215 css_get(&memcg->css);
3216
3217
3218 /* 3178 /*
3219 * Since readers won't lock (see cache_from_memcg_idx()), we need a 3179 * Since readers won't lock (see cache_from_memcg_idx()), we need a
3220 * barrier here to ensure nobody will see the kmem_cache partially 3180 * barrier here to ensure nobody will see the kmem_cache partially
@@ -3263,10 +3223,8 @@ void memcg_unregister_cache(struct kmem_cache *s)
3263 * after removing it from the memcg_slab_caches list, otherwise we can 3223 * after removing it from the memcg_slab_caches list, otherwise we can
3264 * fail to convert memcg_params_to_cache() while traversing the list. 3224 * fail to convert memcg_params_to_cache() while traversing the list.
3265 */ 3225 */
3266 VM_BUG_ON(!root->memcg_params->memcg_caches[id]); 3226 VM_BUG_ON(root->memcg_params->memcg_caches[id] != s);
3267 root->memcg_params->memcg_caches[id] = NULL; 3227 root->memcg_params->memcg_caches[id] = NULL;
3268
3269 css_put(&memcg->css);
3270} 3228}
3271 3229
3272/* 3230/*
@@ -3363,55 +3321,10 @@ void mem_cgroup_destroy_cache(struct kmem_cache *cachep)
3363 schedule_work(&cachep->memcg_params->destroy); 3321 schedule_work(&cachep->memcg_params->destroy);
3364} 3322}
3365 3323
3366static struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg, 3324int __kmem_cache_destroy_memcg_children(struct kmem_cache *s)
3367 struct kmem_cache *s)
3368{
3369 struct kmem_cache *new = NULL;
3370 static char *tmp_path = NULL, *tmp_name = NULL;
3371 static DEFINE_MUTEX(mutex); /* protects tmp_name */
3372
3373 BUG_ON(!memcg_can_account_kmem(memcg));
3374
3375 mutex_lock(&mutex);
3376 /*
3377 * kmem_cache_create_memcg duplicates the given name and
3378 * cgroup_name for this name requires RCU context.
3379 * This static temporary buffer is used to prevent from
3380 * pointless shortliving allocation.
3381 */
3382 if (!tmp_path || !tmp_name) {
3383 if (!tmp_path)
3384 tmp_path = kmalloc(PATH_MAX, GFP_KERNEL);
3385 if (!tmp_name)
3386 tmp_name = kmalloc(NAME_MAX + 1, GFP_KERNEL);
3387 if (!tmp_path || !tmp_name)
3388 goto out;
3389 }
3390
3391 cgroup_name(memcg->css.cgroup, tmp_name, NAME_MAX + 1);
3392 snprintf(tmp_path, PATH_MAX, "%s(%d:%s)", s->name,
3393 memcg_cache_id(memcg), tmp_name);
3394
3395 new = kmem_cache_create_memcg(memcg, tmp_path, s->object_size, s->align,
3396 (s->flags & ~SLAB_PANIC), s->ctor, s);
3397 if (new)
3398 new->allocflags |= __GFP_KMEMCG;
3399 else
3400 new = s;
3401out:
3402 mutex_unlock(&mutex);
3403 return new;
3404}
3405
3406void kmem_cache_destroy_memcg_children(struct kmem_cache *s)
3407{ 3325{
3408 struct kmem_cache *c; 3326 struct kmem_cache *c;
3409 int i; 3327 int i, failed = 0;
3410
3411 if (!s->memcg_params)
3412 return;
3413 if (!s->memcg_params->is_root_cache)
3414 return;
3415 3328
3416 /* 3329 /*
3417 * If the cache is being destroyed, we trust that there is no one else 3330 * If the cache is being destroyed, we trust that there is no one else
@@ -3445,16 +3358,14 @@ void kmem_cache_destroy_memcg_children(struct kmem_cache *s)
3445 c->memcg_params->dead = false; 3358 c->memcg_params->dead = false;
3446 cancel_work_sync(&c->memcg_params->destroy); 3359 cancel_work_sync(&c->memcg_params->destroy);
3447 kmem_cache_destroy(c); 3360 kmem_cache_destroy(c);
3361
3362 if (cache_from_memcg_idx(s, i))
3363 failed++;
3448 } 3364 }
3449 mutex_unlock(&activate_kmem_mutex); 3365 mutex_unlock(&activate_kmem_mutex);
3366 return failed;
3450} 3367}
3451 3368
3452struct create_work {
3453 struct mem_cgroup *memcg;
3454 struct kmem_cache *cachep;
3455 struct work_struct work;
3456};
3457
3458static void mem_cgroup_destroy_all_caches(struct mem_cgroup *memcg) 3369static void mem_cgroup_destroy_all_caches(struct mem_cgroup *memcg)
3459{ 3370{
3460 struct kmem_cache *cachep; 3371 struct kmem_cache *cachep;
@@ -3472,13 +3383,20 @@ static void mem_cgroup_destroy_all_caches(struct mem_cgroup *memcg)
3472 mutex_unlock(&memcg->slab_caches_mutex); 3383 mutex_unlock(&memcg->slab_caches_mutex);
3473} 3384}
3474 3385
3386struct create_work {
3387 struct mem_cgroup *memcg;
3388 struct kmem_cache *cachep;
3389 struct work_struct work;
3390};
3391
3475static void memcg_create_cache_work_func(struct work_struct *w) 3392static void memcg_create_cache_work_func(struct work_struct *w)
3476{ 3393{
3477 struct create_work *cw; 3394 struct create_work *cw = container_of(w, struct create_work, work);
3395 struct mem_cgroup *memcg = cw->memcg;
3396 struct kmem_cache *cachep = cw->cachep;
3478 3397
3479 cw = container_of(w, struct create_work, work); 3398 kmem_cache_create_memcg(memcg, cachep);
3480 memcg_create_kmem_cache(cw->memcg, cw->cachep); 3399 css_put(&memcg->css);
3481 css_put(&cw->memcg->css);
3482 kfree(cw); 3400 kfree(cw);
3483} 3401}
3484 3402
@@ -3637,15 +3555,7 @@ __memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **_memcg, int order)
3637 if (!current->mm || current->memcg_kmem_skip_account) 3555 if (!current->mm || current->memcg_kmem_skip_account)
3638 return true; 3556 return true;
3639 3557
3640 memcg = try_get_mem_cgroup_from_mm(current->mm); 3558 memcg = get_mem_cgroup_from_mm(current->mm);
3641
3642 /*
3643 * very rare case described in mem_cgroup_from_task. Unfortunately there
3644 * isn't much we can do without complicating this too much, and it would
3645 * be gfp-dependent anyway. Just let it go
3646 */
3647 if (unlikely(!memcg))
3648 return true;
3649 3559
3650 if (!memcg_can_account_kmem(memcg)) { 3560 if (!memcg_can_account_kmem(memcg)) {
3651 css_put(&memcg->css); 3561 css_put(&memcg->css);
@@ -3748,19 +3658,6 @@ void mem_cgroup_split_huge_fixup(struct page *head)
3748} 3658}
3749#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 3659#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
3750 3660
3751static inline
3752void mem_cgroup_move_account_page_stat(struct mem_cgroup *from,
3753 struct mem_cgroup *to,
3754 unsigned int nr_pages,
3755 enum mem_cgroup_stat_index idx)
3756{
3757 /* Update stat data for mem_cgroup */
3758 preempt_disable();
3759 __this_cpu_sub(from->stat->count[idx], nr_pages);
3760 __this_cpu_add(to->stat->count[idx], nr_pages);
3761 preempt_enable();
3762}
3763
3764/** 3661/**
3765 * mem_cgroup_move_account - move account of the page 3662 * mem_cgroup_move_account - move account of the page
3766 * @page: the page 3663 * @page: the page
@@ -3806,13 +3703,19 @@ static int mem_cgroup_move_account(struct page *page,
3806 3703
3807 move_lock_mem_cgroup(from, &flags); 3704 move_lock_mem_cgroup(from, &flags);
3808 3705
3809 if (!anon && page_mapped(page)) 3706 if (!anon && page_mapped(page)) {
3810 mem_cgroup_move_account_page_stat(from, to, nr_pages, 3707 __this_cpu_sub(from->stat->count[MEM_CGROUP_STAT_FILE_MAPPED],
3811 MEM_CGROUP_STAT_FILE_MAPPED); 3708 nr_pages);
3709 __this_cpu_add(to->stat->count[MEM_CGROUP_STAT_FILE_MAPPED],
3710 nr_pages);
3711 }
3812 3712
3813 if (PageWriteback(page)) 3713 if (PageWriteback(page)) {
3814 mem_cgroup_move_account_page_stat(from, to, nr_pages, 3714 __this_cpu_sub(from->stat->count[MEM_CGROUP_STAT_WRITEBACK],
3815 MEM_CGROUP_STAT_WRITEBACK); 3715 nr_pages);
3716 __this_cpu_add(to->stat->count[MEM_CGROUP_STAT_WRITEBACK],
3717 nr_pages);
3718 }
3816 3719
3817 mem_cgroup_charge_statistics(from, page, anon, -nr_pages); 3720 mem_cgroup_charge_statistics(from, page, anon, -nr_pages);
3818 3721
@@ -3898,19 +3801,19 @@ out:
3898 return ret; 3801 return ret;
3899} 3802}
3900 3803
3901/* 3804int mem_cgroup_charge_anon(struct page *page,
3902 * Charge the memory controller for page usage. 3805 struct mm_struct *mm, gfp_t gfp_mask)
3903 * Return
3904 * 0 if the charge was successful
3905 * < 0 if the cgroup is over its limit
3906 */
3907static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
3908 gfp_t gfp_mask, enum charge_type ctype)
3909{ 3806{
3910 struct mem_cgroup *memcg = NULL;
3911 unsigned int nr_pages = 1; 3807 unsigned int nr_pages = 1;
3808 struct mem_cgroup *memcg;
3912 bool oom = true; 3809 bool oom = true;
3913 int ret; 3810
3811 if (mem_cgroup_disabled())
3812 return 0;
3813
3814 VM_BUG_ON_PAGE(page_mapped(page), page);
3815 VM_BUG_ON_PAGE(page->mapping && !PageAnon(page), page);
3816 VM_BUG_ON(!mm);
3914 3817
3915 if (PageTransHuge(page)) { 3818 if (PageTransHuge(page)) {
3916 nr_pages <<= compound_order(page); 3819 nr_pages <<= compound_order(page);
@@ -3922,25 +3825,14 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
3922 oom = false; 3825 oom = false;
3923 } 3826 }
3924 3827
3925 ret = __mem_cgroup_try_charge(mm, gfp_mask, nr_pages, &memcg, oom); 3828 memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, nr_pages, oom);
3926 if (ret == -ENOMEM) 3829 if (!memcg)
3927 return ret; 3830 return -ENOMEM;
3928 __mem_cgroup_commit_charge(memcg, page, nr_pages, ctype, false); 3831 __mem_cgroup_commit_charge(memcg, page, nr_pages,
3832 MEM_CGROUP_CHARGE_TYPE_ANON, false);
3929 return 0; 3833 return 0;
3930} 3834}
3931 3835
3932int mem_cgroup_newpage_charge(struct page *page,
3933 struct mm_struct *mm, gfp_t gfp_mask)
3934{
3935 if (mem_cgroup_disabled())
3936 return 0;
3937 VM_BUG_ON_PAGE(page_mapped(page), page);
3938 VM_BUG_ON_PAGE(page->mapping && !PageAnon(page), page);
3939 VM_BUG_ON(!mm);
3940 return mem_cgroup_charge_common(page, mm, gfp_mask,
3941 MEM_CGROUP_CHARGE_TYPE_ANON);
3942}
3943
3944/* 3836/*
3945 * While swap-in, try_charge -> commit or cancel, the page is locked. 3837 * While swap-in, try_charge -> commit or cancel, the page is locked.
3946 * And when try_charge() successfully returns, one refcnt to memcg without 3838 * And when try_charge() successfully returns, one refcnt to memcg without
@@ -3952,7 +3844,7 @@ static int __mem_cgroup_try_charge_swapin(struct mm_struct *mm,
3952 gfp_t mask, 3844 gfp_t mask,
3953 struct mem_cgroup **memcgp) 3845 struct mem_cgroup **memcgp)
3954{ 3846{
3955 struct mem_cgroup *memcg; 3847 struct mem_cgroup *memcg = NULL;
3956 struct page_cgroup *pc; 3848 struct page_cgroup *pc;
3957 int ret; 3849 int ret;
3958 3850
@@ -3965,31 +3857,29 @@ static int __mem_cgroup_try_charge_swapin(struct mm_struct *mm,
3965 * in turn serializes uncharging. 3857 * in turn serializes uncharging.
3966 */ 3858 */
3967 if (PageCgroupUsed(pc)) 3859 if (PageCgroupUsed(pc))
3968 return 0; 3860 goto out;
3969 if (!do_swap_account) 3861 if (do_swap_account)
3970 goto charge_cur_mm; 3862 memcg = try_get_mem_cgroup_from_page(page);
3971 memcg = try_get_mem_cgroup_from_page(page);
3972 if (!memcg) 3863 if (!memcg)
3973 goto charge_cur_mm; 3864 memcg = get_mem_cgroup_from_mm(mm);
3974 *memcgp = memcg; 3865 ret = mem_cgroup_try_charge(memcg, mask, 1, true);
3975 ret = __mem_cgroup_try_charge(NULL, mask, 1, memcgp, true);
3976 css_put(&memcg->css); 3866 css_put(&memcg->css);
3977 if (ret == -EINTR) 3867 if (ret == -EINTR)
3978 ret = 0; 3868 memcg = root_mem_cgroup;
3979 return ret; 3869 else if (ret)
3980charge_cur_mm: 3870 return ret;
3981 ret = __mem_cgroup_try_charge(mm, mask, 1, memcgp, true); 3871out:
3982 if (ret == -EINTR) 3872 *memcgp = memcg;
3983 ret = 0; 3873 return 0;
3984 return ret;
3985} 3874}
3986 3875
3987int mem_cgroup_try_charge_swapin(struct mm_struct *mm, struct page *page, 3876int mem_cgroup_try_charge_swapin(struct mm_struct *mm, struct page *page,
3988 gfp_t gfp_mask, struct mem_cgroup **memcgp) 3877 gfp_t gfp_mask, struct mem_cgroup **memcgp)
3989{ 3878{
3990 *memcgp = NULL; 3879 if (mem_cgroup_disabled()) {
3991 if (mem_cgroup_disabled()) 3880 *memcgp = NULL;
3992 return 0; 3881 return 0;
3882 }
3993 /* 3883 /*
3994 * A racing thread's fault, or swapoff, may have already 3884 * A racing thread's fault, or swapoff, may have already
3995 * updated the pte, and even removed page from swap cache: in 3885 * updated the pte, and even removed page from swap cache: in
@@ -3997,12 +3887,13 @@ int mem_cgroup_try_charge_swapin(struct mm_struct *mm, struct page *page,
3997 * there's also a KSM case which does need to charge the page. 3887 * there's also a KSM case which does need to charge the page.
3998 */ 3888 */
3999 if (!PageSwapCache(page)) { 3889 if (!PageSwapCache(page)) {
4000 int ret; 3890 struct mem_cgroup *memcg;
4001 3891
4002 ret = __mem_cgroup_try_charge(mm, gfp_mask, 1, memcgp, true); 3892 memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, 1, true);
4003 if (ret == -EINTR) 3893 if (!memcg)
4004 ret = 0; 3894 return -ENOMEM;
4005 return ret; 3895 *memcgp = memcg;
3896 return 0;
4006 } 3897 }
4007 return __mem_cgroup_try_charge_swapin(mm, page, gfp_mask, memcgp); 3898 return __mem_cgroup_try_charge_swapin(mm, page, gfp_mask, memcgp);
4008} 3899}
@@ -4046,11 +3937,11 @@ void mem_cgroup_commit_charge_swapin(struct page *page,
4046 MEM_CGROUP_CHARGE_TYPE_ANON); 3937 MEM_CGROUP_CHARGE_TYPE_ANON);
4047} 3938}
4048 3939
4049int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, 3940int mem_cgroup_charge_file(struct page *page, struct mm_struct *mm,
4050 gfp_t gfp_mask) 3941 gfp_t gfp_mask)
4051{ 3942{
4052 struct mem_cgroup *memcg = NULL;
4053 enum charge_type type = MEM_CGROUP_CHARGE_TYPE_CACHE; 3943 enum charge_type type = MEM_CGROUP_CHARGE_TYPE_CACHE;
3944 struct mem_cgroup *memcg;
4054 int ret; 3945 int ret;
4055 3946
4056 if (mem_cgroup_disabled()) 3947 if (mem_cgroup_disabled())
@@ -4058,15 +3949,28 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
4058 if (PageCompound(page)) 3949 if (PageCompound(page))
4059 return 0; 3950 return 0;
4060 3951
4061 if (!PageSwapCache(page)) 3952 if (PageSwapCache(page)) { /* shmem */
4062 ret = mem_cgroup_charge_common(page, mm, gfp_mask, type);
4063 else { /* page is swapcache/shmem */
4064 ret = __mem_cgroup_try_charge_swapin(mm, page, 3953 ret = __mem_cgroup_try_charge_swapin(mm, page,
4065 gfp_mask, &memcg); 3954 gfp_mask, &memcg);
4066 if (!ret) 3955 if (ret)
4067 __mem_cgroup_commit_charge_swapin(page, memcg, type); 3956 return ret;
3957 __mem_cgroup_commit_charge_swapin(page, memcg, type);
3958 return 0;
4068 } 3959 }
4069 return ret; 3960
3961 /*
3962 * Page cache insertions can happen without an actual mm
3963 * context, e.g. during disk probing on boot.
3964 */
3965 if (unlikely(!mm))
3966 memcg = root_mem_cgroup;
3967 else {
3968 memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, 1, true);
3969 if (!memcg)
3970 return -ENOMEM;
3971 }
3972 __mem_cgroup_commit_charge(memcg, page, 1, type, false);
3973 return 0;
4070} 3974}
4071 3975
4072static void mem_cgroup_do_uncharge(struct mem_cgroup *memcg, 3976static void mem_cgroup_do_uncharge(struct mem_cgroup *memcg,
@@ -6678,8 +6582,7 @@ one_by_one:
6678 batch_count = PRECHARGE_COUNT_AT_ONCE; 6582 batch_count = PRECHARGE_COUNT_AT_ONCE;
6679 cond_resched(); 6583 cond_resched();
6680 } 6584 }
6681 ret = __mem_cgroup_try_charge(NULL, 6585 ret = mem_cgroup_try_charge(memcg, GFP_KERNEL, 1, false);
6682 GFP_KERNEL, 1, &memcg, false);
6683 if (ret) 6586 if (ret)
6684 /* mem_cgroup_clear_mc() will do uncharge later */ 6587 /* mem_cgroup_clear_mc() will do uncharge later */
6685 return ret; 6588 return ret;
diff --git a/mm/memory.c b/mm/memory.c
index 82c1e4cf00d1..d0f0bef3be48 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -60,6 +60,7 @@
60#include <linux/migrate.h> 60#include <linux/migrate.h>
61#include <linux/string.h> 61#include <linux/string.h>
62#include <linux/dma-debug.h> 62#include <linux/dma-debug.h>
63#include <linux/debugfs.h>
63 64
64#include <asm/io.h> 65#include <asm/io.h>
65#include <asm/pgalloc.h> 66#include <asm/pgalloc.h>
@@ -1320,9 +1321,9 @@ static void unmap_single_vma(struct mmu_gather *tlb,
1320 * It is undesirable to test vma->vm_file as it 1321 * It is undesirable to test vma->vm_file as it
1321 * should be non-null for valid hugetlb area. 1322 * should be non-null for valid hugetlb area.
1322 * However, vm_file will be NULL in the error 1323 * However, vm_file will be NULL in the error
1323 * cleanup path of do_mmap_pgoff. When 1324 * cleanup path of mmap_region. When
1324 * hugetlbfs ->mmap method fails, 1325 * hugetlbfs ->mmap method fails,
1325 * do_mmap_pgoff() nullifies vma->vm_file 1326 * mmap_region() nullifies vma->vm_file
1326 * before calling this function to clean up. 1327 * before calling this function to clean up.
1327 * Since no pte has actually been setup, it is 1328 * Since no pte has actually been setup, it is
1328 * safe to do nothing in this case. 1329 * safe to do nothing in this case.
@@ -2781,7 +2782,7 @@ reuse:
2781 */ 2782 */
2782 if (!page_mkwrite) { 2783 if (!page_mkwrite) {
2783 wait_on_page_locked(dirty_page); 2784 wait_on_page_locked(dirty_page);
2784 set_page_dirty_balance(dirty_page, page_mkwrite); 2785 set_page_dirty_balance(dirty_page);
2785 /* file_update_time outside page_lock */ 2786 /* file_update_time outside page_lock */
2786 if (vma->vm_file) 2787 if (vma->vm_file)
2787 file_update_time(vma->vm_file); 2788 file_update_time(vma->vm_file);
@@ -2827,7 +2828,7 @@ gotten:
2827 } 2828 }
2828 __SetPageUptodate(new_page); 2829 __SetPageUptodate(new_page);
2829 2830
2830 if (mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL)) 2831 if (mem_cgroup_charge_anon(new_page, mm, GFP_KERNEL))
2831 goto oom_free_new; 2832 goto oom_free_new;
2832 2833
2833 mmun_start = address & PAGE_MASK; 2834 mmun_start = address & PAGE_MASK;
@@ -3280,7 +3281,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
3280 */ 3281 */
3281 __SetPageUptodate(page); 3282 __SetPageUptodate(page);
3282 3283
3283 if (mem_cgroup_newpage_charge(page, mm, GFP_KERNEL)) 3284 if (mem_cgroup_charge_anon(page, mm, GFP_KERNEL))
3284 goto oom_free_page; 3285 goto oom_free_page;
3285 3286
3286 entry = mk_pte(page, vma->vm_page_prot); 3287 entry = mk_pte(page, vma->vm_page_prot);
@@ -3342,7 +3343,22 @@ static int __do_fault(struct vm_area_struct *vma, unsigned long address,
3342 return ret; 3343 return ret;
3343} 3344}
3344 3345
3345static void do_set_pte(struct vm_area_struct *vma, unsigned long address, 3346/**
3347 * do_set_pte - setup new PTE entry for given page and add reverse page mapping.
3348 *
3349 * @vma: virtual memory area
3350 * @address: user virtual address
3351 * @page: page to map
3352 * @pte: pointer to target page table entry
3353 * @write: true, if new entry is writable
3354 * @anon: true, if it's anonymous page
3355 *
3356 * Caller must hold page table lock relevant for @pte.
3357 *
3358 * Target users are page handler itself and implementations of
3359 * vm_ops->map_pages.
3360 */
3361void do_set_pte(struct vm_area_struct *vma, unsigned long address,
3346 struct page *page, pte_t *pte, bool write, bool anon) 3362 struct page *page, pte_t *pte, bool write, bool anon)
3347{ 3363{
3348 pte_t entry; 3364 pte_t entry;
@@ -3366,6 +3382,105 @@ static void do_set_pte(struct vm_area_struct *vma, unsigned long address,
3366 update_mmu_cache(vma, address, pte); 3382 update_mmu_cache(vma, address, pte);
3367} 3383}
3368 3384
3385#define FAULT_AROUND_ORDER 4
3386
3387#ifdef CONFIG_DEBUG_FS
3388static unsigned int fault_around_order = FAULT_AROUND_ORDER;
3389
3390static int fault_around_order_get(void *data, u64 *val)
3391{
3392 *val = fault_around_order;
3393 return 0;
3394}
3395
3396static int fault_around_order_set(void *data, u64 val)
3397{
3398 BUILD_BUG_ON((1UL << FAULT_AROUND_ORDER) > PTRS_PER_PTE);
3399 if (1UL << val > PTRS_PER_PTE)
3400 return -EINVAL;
3401 fault_around_order = val;
3402 return 0;
3403}
3404DEFINE_SIMPLE_ATTRIBUTE(fault_around_order_fops,
3405 fault_around_order_get, fault_around_order_set, "%llu\n");
3406
3407static int __init fault_around_debugfs(void)
3408{
3409 void *ret;
3410
3411 ret = debugfs_create_file("fault_around_order", 0644, NULL, NULL,
3412 &fault_around_order_fops);
3413 if (!ret)
3414 pr_warn("Failed to create fault_around_order in debugfs");
3415 return 0;
3416}
3417late_initcall(fault_around_debugfs);
3418
3419static inline unsigned long fault_around_pages(void)
3420{
3421 return 1UL << fault_around_order;
3422}
3423
3424static inline unsigned long fault_around_mask(void)
3425{
3426 return ~((1UL << (PAGE_SHIFT + fault_around_order)) - 1);
3427}
3428#else
3429static inline unsigned long fault_around_pages(void)
3430{
3431 unsigned long nr_pages;
3432
3433 nr_pages = 1UL << FAULT_AROUND_ORDER;
3434 BUILD_BUG_ON(nr_pages > PTRS_PER_PTE);
3435 return nr_pages;
3436}
3437
3438static inline unsigned long fault_around_mask(void)
3439{
3440 return ~((1UL << (PAGE_SHIFT + FAULT_AROUND_ORDER)) - 1);
3441}
3442#endif
3443
3444static void do_fault_around(struct vm_area_struct *vma, unsigned long address,
3445 pte_t *pte, pgoff_t pgoff, unsigned int flags)
3446{
3447 unsigned long start_addr;
3448 pgoff_t max_pgoff;
3449 struct vm_fault vmf;
3450 int off;
3451
3452 start_addr = max(address & fault_around_mask(), vma->vm_start);
3453 off = ((address - start_addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1);
3454 pte -= off;
3455 pgoff -= off;
3456
3457 /*
3458 * max_pgoff is either end of page table or end of vma
3459 * or fault_around_pages() from pgoff, depending what is neast.
3460 */
3461 max_pgoff = pgoff - ((start_addr >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) +
3462 PTRS_PER_PTE - 1;
3463 max_pgoff = min3(max_pgoff, vma_pages(vma) + vma->vm_pgoff - 1,
3464 pgoff + fault_around_pages() - 1);
3465
3466 /* Check if it makes any sense to call ->map_pages */
3467 while (!pte_none(*pte)) {
3468 if (++pgoff > max_pgoff)
3469 return;
3470 start_addr += PAGE_SIZE;
3471 if (start_addr >= vma->vm_end)
3472 return;
3473 pte++;
3474 }
3475
3476 vmf.virtual_address = (void __user *) start_addr;
3477 vmf.pte = pte;
3478 vmf.pgoff = pgoff;
3479 vmf.max_pgoff = max_pgoff;
3480 vmf.flags = flags;
3481 vma->vm_ops->map_pages(vma, &vmf);
3482}
3483
3369static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma, 3484static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma,
3370 unsigned long address, pmd_t *pmd, 3485 unsigned long address, pmd_t *pmd,
3371 pgoff_t pgoff, unsigned int flags, pte_t orig_pte) 3486 pgoff_t pgoff, unsigned int flags, pte_t orig_pte)
@@ -3373,7 +3488,20 @@ static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma,
3373 struct page *fault_page; 3488 struct page *fault_page;
3374 spinlock_t *ptl; 3489 spinlock_t *ptl;
3375 pte_t *pte; 3490 pte_t *pte;
3376 int ret; 3491 int ret = 0;
3492
3493 /*
3494 * Let's call ->map_pages() first and use ->fault() as fallback
3495 * if page by the offset is not ready to be mapped (cold cache or
3496 * something).
3497 */
3498 if (vma->vm_ops->map_pages) {
3499 pte = pte_offset_map_lock(mm, pmd, address, &ptl);
3500 do_fault_around(vma, address, pte, pgoff, flags);
3501 if (!pte_same(*pte, orig_pte))
3502 goto unlock_out;
3503 pte_unmap_unlock(pte, ptl);
3504 }
3377 3505
3378 ret = __do_fault(vma, address, pgoff, flags, &fault_page); 3506 ret = __do_fault(vma, address, pgoff, flags, &fault_page);
3379 if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) 3507 if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
@@ -3387,8 +3515,9 @@ static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma,
3387 return ret; 3515 return ret;
3388 } 3516 }
3389 do_set_pte(vma, address, fault_page, pte, false, false); 3517 do_set_pte(vma, address, fault_page, pte, false, false);
3390 pte_unmap_unlock(pte, ptl);
3391 unlock_page(fault_page); 3518 unlock_page(fault_page);
3519unlock_out:
3520 pte_unmap_unlock(pte, ptl);
3392 return ret; 3521 return ret;
3393} 3522}
3394 3523
@@ -3408,7 +3537,7 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma,
3408 if (!new_page) 3537 if (!new_page)
3409 return VM_FAULT_OOM; 3538 return VM_FAULT_OOM;
3410 3539
3411 if (mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL)) { 3540 if (mem_cgroup_charge_anon(new_page, mm, GFP_KERNEL)) {
3412 page_cache_release(new_page); 3541 page_cache_release(new_page);
3413 return VM_FAULT_OOM; 3542 return VM_FAULT_OOM;
3414 } 3543 }
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index e3ab02822799..78e1472933ea 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -795,36 +795,6 @@ static int mbind_range(struct mm_struct *mm, unsigned long start,
795 return err; 795 return err;
796} 796}
797 797
798/*
799 * Update task->flags PF_MEMPOLICY bit: set iff non-default
800 * mempolicy. Allows more rapid checking of this (combined perhaps
801 * with other PF_* flag bits) on memory allocation hot code paths.
802 *
803 * If called from outside this file, the task 'p' should -only- be
804 * a newly forked child not yet visible on the task list, because
805 * manipulating the task flags of a visible task is not safe.
806 *
807 * The above limitation is why this routine has the funny name
808 * mpol_fix_fork_child_flag().
809 *
810 * It is also safe to call this with a task pointer of current,
811 * which the static wrapper mpol_set_task_struct_flag() does,
812 * for use within this file.
813 */
814
815void mpol_fix_fork_child_flag(struct task_struct *p)
816{
817 if (p->mempolicy)
818 p->flags |= PF_MEMPOLICY;
819 else
820 p->flags &= ~PF_MEMPOLICY;
821}
822
823static void mpol_set_task_struct_flag(void)
824{
825 mpol_fix_fork_child_flag(current);
826}
827
828/* Set the process memory policy */ 798/* Set the process memory policy */
829static long do_set_mempolicy(unsigned short mode, unsigned short flags, 799static long do_set_mempolicy(unsigned short mode, unsigned short flags,
830 nodemask_t *nodes) 800 nodemask_t *nodes)
@@ -861,7 +831,6 @@ static long do_set_mempolicy(unsigned short mode, unsigned short flags,
861 } 831 }
862 old = current->mempolicy; 832 old = current->mempolicy;
863 current->mempolicy = new; 833 current->mempolicy = new;
864 mpol_set_task_struct_flag();
865 if (new && new->mode == MPOL_INTERLEAVE && 834 if (new && new->mode == MPOL_INTERLEAVE &&
866 nodes_weight(new->v.nodes)) 835 nodes_weight(new->v.nodes))
867 current->il_next = first_node(new->v.nodes); 836 current->il_next = first_node(new->v.nodes);
@@ -1782,21 +1751,18 @@ static unsigned interleave_nodes(struct mempolicy *policy)
1782/* 1751/*
1783 * Depending on the memory policy provide a node from which to allocate the 1752 * Depending on the memory policy provide a node from which to allocate the
1784 * next slab entry. 1753 * next slab entry.
1785 * @policy must be protected by freeing by the caller. If @policy is
1786 * the current task's mempolicy, this protection is implicit, as only the
1787 * task can change it's policy. The system default policy requires no
1788 * such protection.
1789 */ 1754 */
1790unsigned slab_node(void) 1755unsigned int mempolicy_slab_node(void)
1791{ 1756{
1792 struct mempolicy *policy; 1757 struct mempolicy *policy;
1758 int node = numa_mem_id();
1793 1759
1794 if (in_interrupt()) 1760 if (in_interrupt())
1795 return numa_node_id(); 1761 return node;
1796 1762
1797 policy = current->mempolicy; 1763 policy = current->mempolicy;
1798 if (!policy || policy->flags & MPOL_F_LOCAL) 1764 if (!policy || policy->flags & MPOL_F_LOCAL)
1799 return numa_node_id(); 1765 return node;
1800 1766
1801 switch (policy->mode) { 1767 switch (policy->mode) {
1802 case MPOL_PREFERRED: 1768 case MPOL_PREFERRED:
@@ -1816,11 +1782,11 @@ unsigned slab_node(void)
1816 struct zonelist *zonelist; 1782 struct zonelist *zonelist;
1817 struct zone *zone; 1783 struct zone *zone;
1818 enum zone_type highest_zoneidx = gfp_zone(GFP_KERNEL); 1784 enum zone_type highest_zoneidx = gfp_zone(GFP_KERNEL);
1819 zonelist = &NODE_DATA(numa_node_id())->node_zonelists[0]; 1785 zonelist = &NODE_DATA(node)->node_zonelists[0];
1820 (void)first_zones_zonelist(zonelist, highest_zoneidx, 1786 (void)first_zones_zonelist(zonelist, highest_zoneidx,
1821 &policy->v.nodes, 1787 &policy->v.nodes,
1822 &zone); 1788 &zone);
1823 return zone ? zone->node : numa_node_id(); 1789 return zone ? zone->node : node;
1824 } 1790 }
1825 1791
1826 default: 1792 default:
diff --git a/mm/mempool.c b/mm/mempool.c
index 659aa42bad16..905434f18c97 100644
--- a/mm/mempool.c
+++ b/mm/mempool.c
@@ -304,9 +304,9 @@ void mempool_free(void *element, mempool_t *pool)
304 * ensures that there will be frees which return elements to the 304 * ensures that there will be frees which return elements to the
305 * pool waking up the waiters. 305 * pool waking up the waiters.
306 */ 306 */
307 if (pool->curr_nr < pool->min_nr) { 307 if (unlikely(pool->curr_nr < pool->min_nr)) {
308 spin_lock_irqsave(&pool->lock, flags); 308 spin_lock_irqsave(&pool->lock, flags);
309 if (pool->curr_nr < pool->min_nr) { 309 if (likely(pool->curr_nr < pool->min_nr)) {
310 add_element(pool, element); 310 add_element(pool, element);
311 spin_unlock_irqrestore(&pool->lock, flags); 311 spin_unlock_irqrestore(&pool->lock, flags);
312 wake_up(&pool->wait); 312 wake_up(&pool->wait);
diff --git a/mm/mlock.c b/mm/mlock.c
index 4e1a68162285..b1eb53634005 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -79,6 +79,7 @@ void clear_page_mlock(struct page *page)
79 */ 79 */
80void mlock_vma_page(struct page *page) 80void mlock_vma_page(struct page *page)
81{ 81{
82 /* Serialize with page migration */
82 BUG_ON(!PageLocked(page)); 83 BUG_ON(!PageLocked(page));
83 84
84 if (!TestSetPageMlocked(page)) { 85 if (!TestSetPageMlocked(page)) {
@@ -174,6 +175,7 @@ unsigned int munlock_vma_page(struct page *page)
174 unsigned int nr_pages; 175 unsigned int nr_pages;
175 struct zone *zone = page_zone(page); 176 struct zone *zone = page_zone(page);
176 177
178 /* For try_to_munlock() and to serialize with page migration */
177 BUG_ON(!PageLocked(page)); 179 BUG_ON(!PageLocked(page));
178 180
179 /* 181 /*
diff --git a/mm/mmap.c b/mm/mmap.c
index 46433e137abc..b1202cf81f4b 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -10,6 +10,7 @@
10#include <linux/slab.h> 10#include <linux/slab.h>
11#include <linux/backing-dev.h> 11#include <linux/backing-dev.h>
12#include <linux/mm.h> 12#include <linux/mm.h>
13#include <linux/vmacache.h>
13#include <linux/shm.h> 14#include <linux/shm.h>
14#include <linux/mman.h> 15#include <linux/mman.h>
15#include <linux/pagemap.h> 16#include <linux/pagemap.h>
@@ -681,8 +682,9 @@ __vma_unlink(struct mm_struct *mm, struct vm_area_struct *vma,
681 prev->vm_next = next = vma->vm_next; 682 prev->vm_next = next = vma->vm_next;
682 if (next) 683 if (next)
683 next->vm_prev = prev; 684 next->vm_prev = prev;
684 if (mm->mmap_cache == vma) 685
685 mm->mmap_cache = prev; 686 /* Kill the cache */
687 vmacache_invalidate(mm);
686} 688}
687 689
688/* 690/*
@@ -1989,34 +1991,33 @@ EXPORT_SYMBOL(get_unmapped_area);
1989/* Look up the first VMA which satisfies addr < vm_end, NULL if none. */ 1991/* Look up the first VMA which satisfies addr < vm_end, NULL if none. */
1990struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr) 1992struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
1991{ 1993{
1992 struct vm_area_struct *vma = NULL; 1994 struct rb_node *rb_node;
1995 struct vm_area_struct *vma;
1993 1996
1994 /* Check the cache first. */ 1997 /* Check the cache first. */
1995 /* (Cache hit rate is typically around 35%.) */ 1998 vma = vmacache_find(mm, addr);
1996 vma = ACCESS_ONCE(mm->mmap_cache); 1999 if (likely(vma))
1997 if (!(vma && vma->vm_end > addr && vma->vm_start <= addr)) { 2000 return vma;
1998 struct rb_node *rb_node;
1999 2001
2000 rb_node = mm->mm_rb.rb_node; 2002 rb_node = mm->mm_rb.rb_node;
2001 vma = NULL; 2003 vma = NULL;
2002 2004
2003 while (rb_node) { 2005 while (rb_node) {
2004 struct vm_area_struct *vma_tmp; 2006 struct vm_area_struct *tmp;
2005 2007
2006 vma_tmp = rb_entry(rb_node, 2008 tmp = rb_entry(rb_node, struct vm_area_struct, vm_rb);
2007 struct vm_area_struct, vm_rb); 2009
2008 2010 if (tmp->vm_end > addr) {
2009 if (vma_tmp->vm_end > addr) { 2011 vma = tmp;
2010 vma = vma_tmp; 2012 if (tmp->vm_start <= addr)
2011 if (vma_tmp->vm_start <= addr) 2013 break;
2012 break; 2014 rb_node = rb_node->rb_left;
2013 rb_node = rb_node->rb_left; 2015 } else
2014 } else 2016 rb_node = rb_node->rb_right;
2015 rb_node = rb_node->rb_right;
2016 }
2017 if (vma)
2018 mm->mmap_cache = vma;
2019 } 2017 }
2018
2019 if (vma)
2020 vmacache_update(addr, vma);
2020 return vma; 2021 return vma;
2021} 2022}
2022 2023
@@ -2388,7 +2389,9 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
2388 } else 2389 } else
2389 mm->highest_vm_end = prev ? prev->vm_end : 0; 2390 mm->highest_vm_end = prev ? prev->vm_end : 0;
2390 tail_vma->vm_next = NULL; 2391 tail_vma->vm_next = NULL;
2391 mm->mmap_cache = NULL; /* Kill the cache. */ 2392
2393 /* Kill the cache */
2394 vmacache_invalidate(mm);
2392} 2395}
2393 2396
2394/* 2397/*
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 769a67a15803..c43d557941f8 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -36,6 +36,34 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
36} 36}
37#endif 37#endif
38 38
39/*
40 * For a prot_numa update we only hold mmap_sem for read so there is a
41 * potential race with faulting where a pmd was temporarily none. This
42 * function checks for a transhuge pmd under the appropriate lock. It
43 * returns a pte if it was successfully locked or NULL if it raced with
44 * a transhuge insertion.
45 */
46static pte_t *lock_pte_protection(struct vm_area_struct *vma, pmd_t *pmd,
47 unsigned long addr, int prot_numa, spinlock_t **ptl)
48{
49 pte_t *pte;
50 spinlock_t *pmdl;
51
52 /* !prot_numa is protected by mmap_sem held for write */
53 if (!prot_numa)
54 return pte_offset_map_lock(vma->vm_mm, pmd, addr, ptl);
55
56 pmdl = pmd_lock(vma->vm_mm, pmd);
57 if (unlikely(pmd_trans_huge(*pmd) || pmd_none(*pmd))) {
58 spin_unlock(pmdl);
59 return NULL;
60 }
61
62 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, ptl);
63 spin_unlock(pmdl);
64 return pte;
65}
66
39static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, 67static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
40 unsigned long addr, unsigned long end, pgprot_t newprot, 68 unsigned long addr, unsigned long end, pgprot_t newprot,
41 int dirty_accountable, int prot_numa) 69 int dirty_accountable, int prot_numa)
@@ -45,7 +73,10 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
45 spinlock_t *ptl; 73 spinlock_t *ptl;
46 unsigned long pages = 0; 74 unsigned long pages = 0;
47 75
48 pte = pte_offset_map_lock(mm, pmd, addr, &ptl); 76 pte = lock_pte_protection(vma, pmd, addr, prot_numa, &ptl);
77 if (!pte)
78 return 0;
79
49 arch_enter_lazy_mmu_mode(); 80 arch_enter_lazy_mmu_mode();
50 do { 81 do {
51 oldpte = *pte; 82 oldpte = *pte;
@@ -109,15 +140,26 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
109 pgprot_t newprot, int dirty_accountable, int prot_numa) 140 pgprot_t newprot, int dirty_accountable, int prot_numa)
110{ 141{
111 pmd_t *pmd; 142 pmd_t *pmd;
143 struct mm_struct *mm = vma->vm_mm;
112 unsigned long next; 144 unsigned long next;
113 unsigned long pages = 0; 145 unsigned long pages = 0;
114 unsigned long nr_huge_updates = 0; 146 unsigned long nr_huge_updates = 0;
147 unsigned long mni_start = 0;
115 148
116 pmd = pmd_offset(pud, addr); 149 pmd = pmd_offset(pud, addr);
117 do { 150 do {
118 unsigned long this_pages; 151 unsigned long this_pages;
119 152
120 next = pmd_addr_end(addr, end); 153 next = pmd_addr_end(addr, end);
154 if (!pmd_trans_huge(*pmd) && pmd_none_or_clear_bad(pmd))
155 continue;
156
157 /* invoke the mmu notifier if the pmd is populated */
158 if (!mni_start) {
159 mni_start = addr;
160 mmu_notifier_invalidate_range_start(mm, mni_start, end);
161 }
162
121 if (pmd_trans_huge(*pmd)) { 163 if (pmd_trans_huge(*pmd)) {
122 if (next - addr != HPAGE_PMD_SIZE) 164 if (next - addr != HPAGE_PMD_SIZE)
123 split_huge_page_pmd(vma, addr, pmd); 165 split_huge_page_pmd(vma, addr, pmd);
@@ -130,18 +172,21 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
130 pages += HPAGE_PMD_NR; 172 pages += HPAGE_PMD_NR;
131 nr_huge_updates++; 173 nr_huge_updates++;
132 } 174 }
175
176 /* huge pmd was handled */
133 continue; 177 continue;
134 } 178 }
135 } 179 }
136 /* fall through */ 180 /* fall through, the trans huge pmd just split */
137 } 181 }
138 if (pmd_none_or_clear_bad(pmd))
139 continue;
140 this_pages = change_pte_range(vma, pmd, addr, next, newprot, 182 this_pages = change_pte_range(vma, pmd, addr, next, newprot,
141 dirty_accountable, prot_numa); 183 dirty_accountable, prot_numa);
142 pages += this_pages; 184 pages += this_pages;
143 } while (pmd++, addr = next, addr != end); 185 } while (pmd++, addr = next, addr != end);
144 186
187 if (mni_start)
188 mmu_notifier_invalidate_range_end(mm, mni_start, end);
189
145 if (nr_huge_updates) 190 if (nr_huge_updates)
146 count_vm_numa_events(NUMA_HUGE_PTE_UPDATES, nr_huge_updates); 191 count_vm_numa_events(NUMA_HUGE_PTE_UPDATES, nr_huge_updates);
147 return pages; 192 return pages;
@@ -201,15 +246,12 @@ unsigned long change_protection(struct vm_area_struct *vma, unsigned long start,
201 unsigned long end, pgprot_t newprot, 246 unsigned long end, pgprot_t newprot,
202 int dirty_accountable, int prot_numa) 247 int dirty_accountable, int prot_numa)
203{ 248{
204 struct mm_struct *mm = vma->vm_mm;
205 unsigned long pages; 249 unsigned long pages;
206 250
207 mmu_notifier_invalidate_range_start(mm, start, end);
208 if (is_vm_hugetlb_page(vma)) 251 if (is_vm_hugetlb_page(vma))
209 pages = hugetlb_change_protection(vma, start, end, newprot); 252 pages = hugetlb_change_protection(vma, start, end, newprot);
210 else 253 else
211 pages = change_protection_range(vma, start, end, newprot, dirty_accountable, prot_numa); 254 pages = change_protection_range(vma, start, end, newprot, dirty_accountable, prot_numa);
212 mmu_notifier_invalidate_range_end(mm, start, end);
213 255
214 return pages; 256 return pages;
215} 257}
diff --git a/mm/nommu.c b/mm/nommu.c
index a554e5a451cd..85f8d6698d48 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -15,6 +15,7 @@
15 15
16#include <linux/export.h> 16#include <linux/export.h>
17#include <linux/mm.h> 17#include <linux/mm.h>
18#include <linux/vmacache.h>
18#include <linux/mman.h> 19#include <linux/mman.h>
19#include <linux/swap.h> 20#include <linux/swap.h>
20#include <linux/file.h> 21#include <linux/file.h>
@@ -24,6 +25,7 @@
24#include <linux/vmalloc.h> 25#include <linux/vmalloc.h>
25#include <linux/blkdev.h> 26#include <linux/blkdev.h>
26#include <linux/backing-dev.h> 27#include <linux/backing-dev.h>
28#include <linux/compiler.h>
27#include <linux/mount.h> 29#include <linux/mount.h>
28#include <linux/personality.h> 30#include <linux/personality.h>
29#include <linux/security.h> 31#include <linux/security.h>
@@ -296,7 +298,7 @@ long vwrite(char *buf, char *addr, unsigned long count)
296 count = -(unsigned long) addr; 298 count = -(unsigned long) addr;
297 299
298 memcpy(addr, buf, count); 300 memcpy(addr, buf, count);
299 return(count); 301 return count;
300} 302}
301 303
302/* 304/*
@@ -459,7 +461,7 @@ EXPORT_SYMBOL_GPL(vm_unmap_aliases);
459 * Implement a stub for vmalloc_sync_all() if the architecture chose not to 461 * Implement a stub for vmalloc_sync_all() if the architecture chose not to
460 * have one. 462 * have one.
461 */ 463 */
462void __attribute__((weak)) vmalloc_sync_all(void) 464void __weak vmalloc_sync_all(void)
463{ 465{
464} 466}
465 467
@@ -768,16 +770,23 @@ static void add_vma_to_mm(struct mm_struct *mm, struct vm_area_struct *vma)
768 */ 770 */
769static void delete_vma_from_mm(struct vm_area_struct *vma) 771static void delete_vma_from_mm(struct vm_area_struct *vma)
770{ 772{
773 int i;
771 struct address_space *mapping; 774 struct address_space *mapping;
772 struct mm_struct *mm = vma->vm_mm; 775 struct mm_struct *mm = vma->vm_mm;
776 struct task_struct *curr = current;
773 777
774 kenter("%p", vma); 778 kenter("%p", vma);
775 779
776 protect_vma(vma, 0); 780 protect_vma(vma, 0);
777 781
778 mm->map_count--; 782 mm->map_count--;
779 if (mm->mmap_cache == vma) 783 for (i = 0; i < VMACACHE_SIZE; i++) {
780 mm->mmap_cache = NULL; 784 /* if the vma is cached, invalidate the entire cache */
785 if (curr->vmacache[i] == vma) {
786 vmacache_invalidate(curr->mm);
787 break;
788 }
789 }
781 790
782 /* remove the VMA from the mapping */ 791 /* remove the VMA from the mapping */
783 if (vma->vm_file) { 792 if (vma->vm_file) {
@@ -825,8 +834,8 @@ struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
825 struct vm_area_struct *vma; 834 struct vm_area_struct *vma;
826 835
827 /* check the cache first */ 836 /* check the cache first */
828 vma = ACCESS_ONCE(mm->mmap_cache); 837 vma = vmacache_find(mm, addr);
829 if (vma && vma->vm_start <= addr && vma->vm_end > addr) 838 if (likely(vma))
830 return vma; 839 return vma;
831 840
832 /* trawl the list (there may be multiple mappings in which addr 841 /* trawl the list (there may be multiple mappings in which addr
@@ -835,7 +844,7 @@ struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
835 if (vma->vm_start > addr) 844 if (vma->vm_start > addr)
836 return NULL; 845 return NULL;
837 if (vma->vm_end > addr) { 846 if (vma->vm_end > addr) {
838 mm->mmap_cache = vma; 847 vmacache_update(addr, vma);
839 return vma; 848 return vma;
840 } 849 }
841 } 850 }
@@ -874,8 +883,8 @@ static struct vm_area_struct *find_vma_exact(struct mm_struct *mm,
874 unsigned long end = addr + len; 883 unsigned long end = addr + len;
875 884
876 /* check the cache first */ 885 /* check the cache first */
877 vma = mm->mmap_cache; 886 vma = vmacache_find_exact(mm, addr, end);
878 if (vma && vma->vm_start == addr && vma->vm_end == end) 887 if (vma)
879 return vma; 888 return vma;
880 889
881 /* trawl the list (there may be multiple mappings in which addr 890 /* trawl the list (there may be multiple mappings in which addr
@@ -886,7 +895,7 @@ static struct vm_area_struct *find_vma_exact(struct mm_struct *mm,
886 if (vma->vm_start > addr) 895 if (vma->vm_start > addr)
887 return NULL; 896 return NULL;
888 if (vma->vm_end == end) { 897 if (vma->vm_end == end) {
889 mm->mmap_cache = vma; 898 vmacache_update(addr, vma);
890 return vma; 899 return vma;
891 } 900 }
892 } 901 }
@@ -1003,8 +1012,7 @@ static int validate_mmap_request(struct file *file,
1003 1012
1004 /* we mustn't privatise shared mappings */ 1013 /* we mustn't privatise shared mappings */
1005 capabilities &= ~BDI_CAP_MAP_COPY; 1014 capabilities &= ~BDI_CAP_MAP_COPY;
1006 } 1015 } else {
1007 else {
1008 /* we're going to read the file into private memory we 1016 /* we're going to read the file into private memory we
1009 * allocate */ 1017 * allocate */
1010 if (!(capabilities & BDI_CAP_MAP_COPY)) 1018 if (!(capabilities & BDI_CAP_MAP_COPY))
@@ -1035,23 +1043,20 @@ static int validate_mmap_request(struct file *file,
1035 if (file->f_path.mnt->mnt_flags & MNT_NOEXEC) { 1043 if (file->f_path.mnt->mnt_flags & MNT_NOEXEC) {
1036 if (prot & PROT_EXEC) 1044 if (prot & PROT_EXEC)
1037 return -EPERM; 1045 return -EPERM;
1038 } 1046 } else if ((prot & PROT_READ) && !(prot & PROT_EXEC)) {
1039 else if ((prot & PROT_READ) && !(prot & PROT_EXEC)) {
1040 /* handle implication of PROT_EXEC by PROT_READ */ 1047 /* handle implication of PROT_EXEC by PROT_READ */
1041 if (current->personality & READ_IMPLIES_EXEC) { 1048 if (current->personality & READ_IMPLIES_EXEC) {
1042 if (capabilities & BDI_CAP_EXEC_MAP) 1049 if (capabilities & BDI_CAP_EXEC_MAP)
1043 prot |= PROT_EXEC; 1050 prot |= PROT_EXEC;
1044 } 1051 }
1045 } 1052 } else if ((prot & PROT_READ) &&
1046 else if ((prot & PROT_READ) &&
1047 (prot & PROT_EXEC) && 1053 (prot & PROT_EXEC) &&
1048 !(capabilities & BDI_CAP_EXEC_MAP) 1054 !(capabilities & BDI_CAP_EXEC_MAP)
1049 ) { 1055 ) {
1050 /* backing file is not executable, try to copy */ 1056 /* backing file is not executable, try to copy */
1051 capabilities &= ~BDI_CAP_MAP_DIRECT; 1057 capabilities &= ~BDI_CAP_MAP_DIRECT;
1052 } 1058 }
1053 } 1059 } else {
1054 else {
1055 /* anonymous mappings are always memory backed and can be 1060 /* anonymous mappings are always memory backed and can be
1056 * privately mapped 1061 * privately mapped
1057 */ 1062 */
@@ -1659,7 +1664,7 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
1659 /* find the first potentially overlapping VMA */ 1664 /* find the first potentially overlapping VMA */
1660 vma = find_vma(mm, start); 1665 vma = find_vma(mm, start);
1661 if (!vma) { 1666 if (!vma) {
1662 static int limit = 0; 1667 static int limit;
1663 if (limit < 5) { 1668 if (limit < 5) {
1664 printk(KERN_WARNING 1669 printk(KERN_WARNING
1665 "munmap of memory not mmapped by process %d" 1670 "munmap of memory not mmapped by process %d"
@@ -1985,6 +1990,12 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1985} 1990}
1986EXPORT_SYMBOL(filemap_fault); 1991EXPORT_SYMBOL(filemap_fault);
1987 1992
1993void filemap_map_pages(struct vm_area_struct *vma, struct vm_fault *vmf)
1994{
1995 BUG();
1996}
1997EXPORT_SYMBOL(filemap_map_pages);
1998
1988int generic_file_remap_pages(struct vm_area_struct *vma, unsigned long addr, 1999int generic_file_remap_pages(struct vm_area_struct *vma, unsigned long addr,
1989 unsigned long size, pgoff_t pgoff) 2000 unsigned long size, pgoff_t pgoff)
1990{ 2001{
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 7106cb1aca8e..ef413492a149 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -1562,9 +1562,9 @@ pause:
1562 bdi_start_background_writeback(bdi); 1562 bdi_start_background_writeback(bdi);
1563} 1563}
1564 1564
1565void set_page_dirty_balance(struct page *page, int page_mkwrite) 1565void set_page_dirty_balance(struct page *page)
1566{ 1566{
1567 if (set_page_dirty(page) || page_mkwrite) { 1567 if (set_page_dirty(page)) {
1568 struct address_space *mapping = page_mapping(page); 1568 struct address_space *mapping = page_mapping(page);
1569 1569
1570 if (mapping) 1570 if (mapping)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 979378deccbf..5dba2933c9c0 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -295,7 +295,8 @@ static inline int bad_range(struct zone *zone, struct page *page)
295} 295}
296#endif 296#endif
297 297
298static void bad_page(struct page *page, char *reason, unsigned long bad_flags) 298static void bad_page(struct page *page, const char *reason,
299 unsigned long bad_flags)
299{ 300{
300 static unsigned long resume; 301 static unsigned long resume;
301 static unsigned long nr_shown; 302 static unsigned long nr_shown;
@@ -623,7 +624,7 @@ out:
623 624
624static inline int free_pages_check(struct page *page) 625static inline int free_pages_check(struct page *page)
625{ 626{
626 char *bad_reason = NULL; 627 const char *bad_reason = NULL;
627 unsigned long bad_flags = 0; 628 unsigned long bad_flags = 0;
628 629
629 if (unlikely(page_mapcount(page))) 630 if (unlikely(page_mapcount(page)))
@@ -859,7 +860,7 @@ static inline void expand(struct zone *zone, struct page *page,
859 */ 860 */
860static inline int check_new_page(struct page *page) 861static inline int check_new_page(struct page *page)
861{ 862{
862 char *bad_reason = NULL; 863 const char *bad_reason = NULL;
863 unsigned long bad_flags = 0; 864 unsigned long bad_flags = 0;
864 865
865 if (unlikely(page_mapcount(page))) 866 if (unlikely(page_mapcount(page)))
@@ -1238,15 +1239,6 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
1238 } 1239 }
1239 local_irq_restore(flags); 1240 local_irq_restore(flags);
1240} 1241}
1241static bool gfp_thisnode_allocation(gfp_t gfp_mask)
1242{
1243 return (gfp_mask & GFP_THISNODE) == GFP_THISNODE;
1244}
1245#else
1246static bool gfp_thisnode_allocation(gfp_t gfp_mask)
1247{
1248 return false;
1249}
1250#endif 1242#endif
1251 1243
1252/* 1244/*
@@ -1583,12 +1575,7 @@ again:
1583 get_pageblock_migratetype(page)); 1575 get_pageblock_migratetype(page));
1584 } 1576 }
1585 1577
1586 /* 1578 __mod_zone_page_state(zone, NR_ALLOC_BATCH, -(1 << order));
1587 * NOTE: GFP_THISNODE allocations do not partake in the kswapd
1588 * aging protocol, so they can't be fair.
1589 */
1590 if (!gfp_thisnode_allocation(gfp_flags))
1591 __mod_zone_page_state(zone, NR_ALLOC_BATCH, -(1 << order));
1592 1579
1593 __count_zone_vm_events(PGALLOC, zone, 1 << order); 1580 __count_zone_vm_events(PGALLOC, zone, 1 << order);
1594 zone_statistics(preferred_zone, zone, gfp_flags); 1581 zone_statistics(preferred_zone, zone, gfp_flags);
@@ -1870,7 +1857,7 @@ static void __paginginit init_zone_allows_reclaim(int nid)
1870{ 1857{
1871 int i; 1858 int i;
1872 1859
1873 for_each_online_node(i) 1860 for_each_node_state(i, N_MEMORY)
1874 if (node_distance(nid, i) <= RECLAIM_DISTANCE) 1861 if (node_distance(nid, i) <= RECLAIM_DISTANCE)
1875 node_set(i, NODE_DATA(nid)->reclaim_nodes); 1862 node_set(i, NODE_DATA(nid)->reclaim_nodes);
1876 else 1863 else
@@ -1954,23 +1941,12 @@ zonelist_scan:
1954 * zone size to ensure fair page aging. The zone a 1941 * zone size to ensure fair page aging. The zone a
1955 * page was allocated in should have no effect on the 1942 * page was allocated in should have no effect on the
1956 * time the page has in memory before being reclaimed. 1943 * time the page has in memory before being reclaimed.
1957 *
1958 * Try to stay in local zones in the fastpath. If
1959 * that fails, the slowpath is entered, which will do
1960 * another pass starting with the local zones, but
1961 * ultimately fall back to remote zones that do not
1962 * partake in the fairness round-robin cycle of this
1963 * zonelist.
1964 *
1965 * NOTE: GFP_THISNODE allocations do not partake in
1966 * the kswapd aging protocol, so they can't be fair.
1967 */ 1944 */
1968 if ((alloc_flags & ALLOC_WMARK_LOW) && 1945 if (alloc_flags & ALLOC_FAIR) {
1969 !gfp_thisnode_allocation(gfp_mask)) {
1970 if (zone_page_state(zone, NR_ALLOC_BATCH) <= 0)
1971 continue;
1972 if (!zone_local(preferred_zone, zone)) 1946 if (!zone_local(preferred_zone, zone))
1973 continue; 1947 continue;
1948 if (zone_page_state(zone, NR_ALLOC_BATCH) <= 0)
1949 continue;
1974 } 1950 }
1975 /* 1951 /*
1976 * When allocating a page cache page for writing, we 1952 * When allocating a page cache page for writing, we
@@ -2408,32 +2384,40 @@ __alloc_pages_high_priority(gfp_t gfp_mask, unsigned int order,
2408 return page; 2384 return page;
2409} 2385}
2410 2386
2411static void prepare_slowpath(gfp_t gfp_mask, unsigned int order, 2387static void reset_alloc_batches(struct zonelist *zonelist,
2412 struct zonelist *zonelist, 2388 enum zone_type high_zoneidx,
2413 enum zone_type high_zoneidx, 2389 struct zone *preferred_zone)
2414 struct zone *preferred_zone)
2415{ 2390{
2416 struct zoneref *z; 2391 struct zoneref *z;
2417 struct zone *zone; 2392 struct zone *zone;
2418 2393
2419 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) { 2394 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
2420 if (!(gfp_mask & __GFP_NO_KSWAPD))
2421 wakeup_kswapd(zone, order, zone_idx(preferred_zone));
2422 /* 2395 /*
2423 * Only reset the batches of zones that were actually 2396 * Only reset the batches of zones that were actually
2424 * considered in the fast path, we don't want to 2397 * considered in the fairness pass, we don't want to
2425 * thrash fairness information for zones that are not 2398 * trash fairness information for zones that are not
2426 * actually part of this zonelist's round-robin cycle. 2399 * actually part of this zonelist's round-robin cycle.
2427 */ 2400 */
2428 if (!zone_local(preferred_zone, zone)) 2401 if (!zone_local(preferred_zone, zone))
2429 continue; 2402 continue;
2430 mod_zone_page_state(zone, NR_ALLOC_BATCH, 2403 mod_zone_page_state(zone, NR_ALLOC_BATCH,
2431 high_wmark_pages(zone) - 2404 high_wmark_pages(zone) - low_wmark_pages(zone) -
2432 low_wmark_pages(zone) - 2405 atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH]));
2433 zone_page_state(zone, NR_ALLOC_BATCH));
2434 } 2406 }
2435} 2407}
2436 2408
2409static void wake_all_kswapds(unsigned int order,
2410 struct zonelist *zonelist,
2411 enum zone_type high_zoneidx,
2412 struct zone *preferred_zone)
2413{
2414 struct zoneref *z;
2415 struct zone *zone;
2416
2417 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx)
2418 wakeup_kswapd(zone, order, zone_idx(preferred_zone));
2419}
2420
2437static inline int 2421static inline int
2438gfp_to_alloc_flags(gfp_t gfp_mask) 2422gfp_to_alloc_flags(gfp_t gfp_mask)
2439{ 2423{
@@ -2522,12 +2506,13 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
2522 * allowed per node queues are empty and that nodes are 2506 * allowed per node queues are empty and that nodes are
2523 * over allocated. 2507 * over allocated.
2524 */ 2508 */
2525 if (gfp_thisnode_allocation(gfp_mask)) 2509 if (IS_ENABLED(CONFIG_NUMA) &&
2510 (gfp_mask & GFP_THISNODE) == GFP_THISNODE)
2526 goto nopage; 2511 goto nopage;
2527 2512
2528restart: 2513restart:
2529 prepare_slowpath(gfp_mask, order, zonelist, 2514 if (!(gfp_mask & __GFP_NO_KSWAPD))
2530 high_zoneidx, preferred_zone); 2515 wake_all_kswapds(order, zonelist, high_zoneidx, preferred_zone);
2531 2516
2532 /* 2517 /*
2533 * OK, we're below the kswapd watermark and have kicked background 2518 * OK, we're below the kswapd watermark and have kicked background
@@ -2711,7 +2696,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
2711 struct page *page = NULL; 2696 struct page *page = NULL;
2712 int migratetype = allocflags_to_migratetype(gfp_mask); 2697 int migratetype = allocflags_to_migratetype(gfp_mask);
2713 unsigned int cpuset_mems_cookie; 2698 unsigned int cpuset_mems_cookie;
2714 int alloc_flags = ALLOC_WMARK_LOW|ALLOC_CPUSET; 2699 int alloc_flags = ALLOC_WMARK_LOW|ALLOC_CPUSET|ALLOC_FAIR;
2715 struct mem_cgroup *memcg = NULL; 2700 struct mem_cgroup *memcg = NULL;
2716 2701
2717 gfp_mask &= gfp_allowed_mask; 2702 gfp_mask &= gfp_allowed_mask;
@@ -2752,12 +2737,29 @@ retry_cpuset:
2752 if (allocflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE) 2737 if (allocflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE)
2753 alloc_flags |= ALLOC_CMA; 2738 alloc_flags |= ALLOC_CMA;
2754#endif 2739#endif
2740retry:
2755 /* First allocation attempt */ 2741 /* First allocation attempt */
2756 page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order, 2742 page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order,
2757 zonelist, high_zoneidx, alloc_flags, 2743 zonelist, high_zoneidx, alloc_flags,
2758 preferred_zone, migratetype); 2744 preferred_zone, migratetype);
2759 if (unlikely(!page)) { 2745 if (unlikely(!page)) {
2760 /* 2746 /*
2747 * The first pass makes sure allocations are spread
2748 * fairly within the local node. However, the local
2749 * node might have free pages left after the fairness
2750 * batches are exhausted, and remote zones haven't
2751 * even been considered yet. Try once more without
2752 * fairness, and include remote zones now, before
2753 * entering the slowpath and waking kswapd: prefer
2754 * spilling to a remote zone over swapping locally.
2755 */
2756 if (alloc_flags & ALLOC_FAIR) {
2757 reset_alloc_batches(zonelist, high_zoneidx,
2758 preferred_zone);
2759 alloc_flags &= ~ALLOC_FAIR;
2760 goto retry;
2761 }
2762 /*
2761 * Runtime PM, block IO and its error handling path 2763 * Runtime PM, block IO and its error handling path
2762 * can deadlock because I/O on the device might not 2764 * can deadlock because I/O on the device might not
2763 * complete. 2765 * complete.
@@ -4919,7 +4921,8 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
4919 4921
4920 pgdat->node_id = nid; 4922 pgdat->node_id = nid;
4921 pgdat->node_start_pfn = node_start_pfn; 4923 pgdat->node_start_pfn = node_start_pfn;
4922 init_zone_allows_reclaim(nid); 4924 if (node_state(nid, N_MEMORY))
4925 init_zone_allows_reclaim(nid);
4923#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP 4926#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
4924 get_pfn_range_for_nid(nid, &start_pfn, &end_pfn); 4927 get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
4925#endif 4928#endif
@@ -5070,7 +5073,7 @@ static void __init find_zone_movable_pfns_for_nodes(void)
5070 nodemask_t saved_node_state = node_states[N_MEMORY]; 5073 nodemask_t saved_node_state = node_states[N_MEMORY];
5071 unsigned long totalpages = early_calculate_totalpages(); 5074 unsigned long totalpages = early_calculate_totalpages();
5072 int usable_nodes = nodes_weight(node_states[N_MEMORY]); 5075 int usable_nodes = nodes_weight(node_states[N_MEMORY]);
5073 struct memblock_type *type = &memblock.memory; 5076 struct memblock_region *r;
5074 5077
5075 /* Need to find movable_zone earlier when movable_node is specified. */ 5078 /* Need to find movable_zone earlier when movable_node is specified. */
5076 find_usable_zone_for_movable(); 5079 find_usable_zone_for_movable();
@@ -5080,13 +5083,13 @@ static void __init find_zone_movable_pfns_for_nodes(void)
5080 * options. 5083 * options.
5081 */ 5084 */
5082 if (movable_node_is_enabled()) { 5085 if (movable_node_is_enabled()) {
5083 for (i = 0; i < type->cnt; i++) { 5086 for_each_memblock(memory, r) {
5084 if (!memblock_is_hotpluggable(&type->regions[i])) 5087 if (!memblock_is_hotpluggable(r))
5085 continue; 5088 continue;
5086 5089
5087 nid = type->regions[i].nid; 5090 nid = r->nid;
5088 5091
5089 usable_startpfn = PFN_DOWN(type->regions[i].base); 5092 usable_startpfn = PFN_DOWN(r->base);
5090 zone_movable_pfn[nid] = zone_movable_pfn[nid] ? 5093 zone_movable_pfn[nid] = zone_movable_pfn[nid] ?
5091 min(usable_startpfn, zone_movable_pfn[nid]) : 5094 min(usable_startpfn, zone_movable_pfn[nid]) :
5092 usable_startpfn; 5095 usable_startpfn;
@@ -6544,7 +6547,8 @@ static void dump_page_flags(unsigned long flags)
6544 printk(")\n"); 6547 printk(")\n");
6545} 6548}
6546 6549
6547void dump_page_badflags(struct page *page, char *reason, unsigned long badflags) 6550void dump_page_badflags(struct page *page, const char *reason,
6551 unsigned long badflags)
6548{ 6552{
6549 printk(KERN_ALERT 6553 printk(KERN_ALERT
6550 "page:%p count:%d mapcount:%d mapping:%p index:%#lx\n", 6554 "page:%p count:%d mapcount:%d mapping:%p index:%#lx\n",
@@ -6560,8 +6564,8 @@ void dump_page_badflags(struct page *page, char *reason, unsigned long badflags)
6560 mem_cgroup_print_bad_page(page); 6564 mem_cgroup_print_bad_page(page);
6561} 6565}
6562 6566
6563void dump_page(struct page *page, char *reason) 6567void dump_page(struct page *page, const char *reason)
6564{ 6568{
6565 dump_page_badflags(page, reason, 0); 6569 dump_page_badflags(page, reason, 0);
6566} 6570}
6567EXPORT_SYMBOL_GPL(dump_page); 6571EXPORT_SYMBOL(dump_page);
diff --git a/mm/readahead.c b/mm/readahead.c
index 29c5e1af5a0c..0ca36a7770b1 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -8,9 +8,7 @@
8 */ 8 */
9 9
10#include <linux/kernel.h> 10#include <linux/kernel.h>
11#include <linux/fs.h>
12#include <linux/gfp.h> 11#include <linux/gfp.h>
13#include <linux/mm.h>
14#include <linux/export.h> 12#include <linux/export.h>
15#include <linux/blkdev.h> 13#include <linux/blkdev.h>
16#include <linux/backing-dev.h> 14#include <linux/backing-dev.h>
@@ -20,6 +18,8 @@
20#include <linux/syscalls.h> 18#include <linux/syscalls.h>
21#include <linux/file.h> 19#include <linux/file.h>
22 20
21#include "internal.h"
22
23/* 23/*
24 * Initialise a struct file's readahead state. Assumes that the caller has 24 * Initialise a struct file's readahead state. Assumes that the caller has
25 * memset *ra to zero. 25 * memset *ra to zero.
@@ -149,8 +149,7 @@ out:
149 * 149 *
150 * Returns the number of pages requested, or the maximum amount of I/O allowed. 150 * Returns the number of pages requested, or the maximum amount of I/O allowed.
151 */ 151 */
152static int 152int __do_page_cache_readahead(struct address_space *mapping, struct file *filp,
153__do_page_cache_readahead(struct address_space *mapping, struct file *filp,
154 pgoff_t offset, unsigned long nr_to_read, 153 pgoff_t offset, unsigned long nr_to_read,
155 unsigned long lookahead_size) 154 unsigned long lookahead_size)
156{ 155{
@@ -244,20 +243,6 @@ unsigned long max_sane_readahead(unsigned long nr)
244} 243}
245 244
246/* 245/*
247 * Submit IO for the read-ahead request in file_ra_state.
248 */
249unsigned long ra_submit(struct file_ra_state *ra,
250 struct address_space *mapping, struct file *filp)
251{
252 int actual;
253
254 actual = __do_page_cache_readahead(mapping, filp,
255 ra->start, ra->size, ra->async_size);
256
257 return actual;
258}
259
260/*
261 * Set the initial window size, round to next power of 2 and square 246 * Set the initial window size, round to next power of 2 and square
262 * for small size, x 4 for medium, and x 2 for large 247 * for small size, x 4 for medium, and x 2 for large
263 * for 128k (32 page) max ra 248 * for 128k (32 page) max ra
diff --git a/mm/rmap.c b/mm/rmap.c
index 11cf322f8133..9c3e77396d1a 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1332,9 +1332,19 @@ static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount,
1332 BUG_ON(!page || PageAnon(page)); 1332 BUG_ON(!page || PageAnon(page));
1333 1333
1334 if (locked_vma) { 1334 if (locked_vma) {
1335 mlock_vma_page(page); /* no-op if already mlocked */ 1335 if (page == check_page) {
1336 if (page == check_page) 1336 /* we know we have check_page locked */
1337 mlock_vma_page(page);
1337 ret = SWAP_MLOCK; 1338 ret = SWAP_MLOCK;
1339 } else if (trylock_page(page)) {
1340 /*
1341 * If we can lock the page, perform mlock.
1342 * Otherwise leave the page alone, it will be
1343 * eventually encountered again later.
1344 */
1345 mlock_vma_page(page);
1346 unlock_page(page);
1347 }
1338 continue; /* don't unmap */ 1348 continue; /* don't unmap */
1339 } 1349 }
1340 1350
diff --git a/mm/shmem.c b/mm/shmem.c
index a3ba988ec946..70273f8df586 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -683,7 +683,7 @@ int shmem_unuse(swp_entry_t swap, struct page *page)
683 * the shmem_swaplist_mutex which might hold up shmem_writepage(). 683 * the shmem_swaplist_mutex which might hold up shmem_writepage().
684 * Charged back to the user (not to caller) when swap account is used. 684 * Charged back to the user (not to caller) when swap account is used.
685 */ 685 */
686 error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL); 686 error = mem_cgroup_charge_file(page, current->mm, GFP_KERNEL);
687 if (error) 687 if (error)
688 goto out; 688 goto out;
689 /* No radix_tree_preload: swap entry keeps a place for page in tree */ 689 /* No radix_tree_preload: swap entry keeps a place for page in tree */
@@ -1080,7 +1080,7 @@ repeat:
1080 goto failed; 1080 goto failed;
1081 } 1081 }
1082 1082
1083 error = mem_cgroup_cache_charge(page, current->mm, 1083 error = mem_cgroup_charge_file(page, current->mm,
1084 gfp & GFP_RECLAIM_MASK); 1084 gfp & GFP_RECLAIM_MASK);
1085 if (!error) { 1085 if (!error) {
1086 error = shmem_add_to_page_cache(page, mapping, index, 1086 error = shmem_add_to_page_cache(page, mapping, index,
@@ -1134,7 +1134,7 @@ repeat:
1134 1134
1135 SetPageSwapBacked(page); 1135 SetPageSwapBacked(page);
1136 __set_page_locked(page); 1136 __set_page_locked(page);
1137 error = mem_cgroup_cache_charge(page, current->mm, 1137 error = mem_cgroup_charge_file(page, current->mm,
1138 gfp & GFP_RECLAIM_MASK); 1138 gfp & GFP_RECLAIM_MASK);
1139 if (error) 1139 if (error)
1140 goto decused; 1140 goto decused;
@@ -2723,6 +2723,7 @@ static const struct super_operations shmem_ops = {
2723 2723
2724static const struct vm_operations_struct shmem_vm_ops = { 2724static const struct vm_operations_struct shmem_vm_ops = {
2725 .fault = shmem_fault, 2725 .fault = shmem_fault,
2726 .map_pages = filemap_map_pages,
2726#ifdef CONFIG_NUMA 2727#ifdef CONFIG_NUMA
2727 .set_policy = shmem_set_policy, 2728 .set_policy = shmem_set_policy,
2728 .get_policy = shmem_get_policy, 2729 .get_policy = shmem_get_policy,
diff --git a/mm/slab.c b/mm/slab.c
index 9153c802e2fe..3db4cb06e32e 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3027,7 +3027,7 @@ out:
3027 3027
3028#ifdef CONFIG_NUMA 3028#ifdef CONFIG_NUMA
3029/* 3029/*
3030 * Try allocating on another node if PF_SPREAD_SLAB|PF_MEMPOLICY. 3030 * Try allocating on another node if PF_SPREAD_SLAB is a mempolicy is set.
3031 * 3031 *
3032 * If we are in_interrupt, then process context, including cpusets and 3032 * If we are in_interrupt, then process context, including cpusets and
3033 * mempolicy, may not apply and should not be used for allocation policy. 3033 * mempolicy, may not apply and should not be used for allocation policy.
@@ -3042,7 +3042,7 @@ static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags)
3042 if (cpuset_do_slab_mem_spread() && (cachep->flags & SLAB_MEM_SPREAD)) 3042 if (cpuset_do_slab_mem_spread() && (cachep->flags & SLAB_MEM_SPREAD))
3043 nid_alloc = cpuset_slab_spread_node(); 3043 nid_alloc = cpuset_slab_spread_node();
3044 else if (current->mempolicy) 3044 else if (current->mempolicy)
3045 nid_alloc = slab_node(); 3045 nid_alloc = mempolicy_slab_node();
3046 if (nid_alloc != nid_here) 3046 if (nid_alloc != nid_here)
3047 return ____cache_alloc_node(cachep, flags, nid_alloc); 3047 return ____cache_alloc_node(cachep, flags, nid_alloc);
3048 return NULL; 3048 return NULL;
@@ -3074,7 +3074,7 @@ static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags)
3074 3074
3075retry_cpuset: 3075retry_cpuset:
3076 cpuset_mems_cookie = read_mems_allowed_begin(); 3076 cpuset_mems_cookie = read_mems_allowed_begin();
3077 zonelist = node_zonelist(slab_node(), flags); 3077 zonelist = node_zonelist(mempolicy_slab_node(), flags);
3078 3078
3079retry: 3079retry:
3080 /* 3080 /*
@@ -3259,7 +3259,7 @@ __do_cache_alloc(struct kmem_cache *cache, gfp_t flags)
3259{ 3259{
3260 void *objp; 3260 void *objp;
3261 3261
3262 if (unlikely(current->flags & (PF_SPREAD_SLAB | PF_MEMPOLICY))) { 3262 if (current->mempolicy || unlikely(current->flags & PF_SPREAD_SLAB)) {
3263 objp = alternate_node_alloc(cache, flags); 3263 objp = alternate_node_alloc(cache, flags);
3264 if (objp) 3264 if (objp)
3265 goto out; 3265 goto out;
diff --git a/mm/slab.h b/mm/slab.h
index 8184a7cde272..3045316b7c9d 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -55,12 +55,12 @@ extern void create_boot_cache(struct kmem_cache *, const char *name,
55struct mem_cgroup; 55struct mem_cgroup;
56#ifdef CONFIG_SLUB 56#ifdef CONFIG_SLUB
57struct kmem_cache * 57struct kmem_cache *
58__kmem_cache_alias(struct mem_cgroup *memcg, const char *name, size_t size, 58__kmem_cache_alias(const char *name, size_t size, size_t align,
59 size_t align, unsigned long flags, void (*ctor)(void *)); 59 unsigned long flags, void (*ctor)(void *));
60#else 60#else
61static inline struct kmem_cache * 61static inline struct kmem_cache *
62__kmem_cache_alias(struct mem_cgroup *memcg, const char *name, size_t size, 62__kmem_cache_alias(const char *name, size_t size, size_t align,
63 size_t align, unsigned long flags, void (*ctor)(void *)) 63 unsigned long flags, void (*ctor)(void *))
64{ return NULL; } 64{ return NULL; }
65#endif 65#endif
66 66
@@ -119,13 +119,6 @@ static inline bool is_root_cache(struct kmem_cache *s)
119 return !s->memcg_params || s->memcg_params->is_root_cache; 119 return !s->memcg_params || s->memcg_params->is_root_cache;
120} 120}
121 121
122static inline bool cache_match_memcg(struct kmem_cache *cachep,
123 struct mem_cgroup *memcg)
124{
125 return (is_root_cache(cachep) && !memcg) ||
126 (cachep->memcg_params->memcg == memcg);
127}
128
129static inline void memcg_bind_pages(struct kmem_cache *s, int order) 122static inline void memcg_bind_pages(struct kmem_cache *s, int order)
130{ 123{
131 if (!is_root_cache(s)) 124 if (!is_root_cache(s))
@@ -204,12 +197,6 @@ static inline bool is_root_cache(struct kmem_cache *s)
204 return true; 197 return true;
205} 198}
206 199
207static inline bool cache_match_memcg(struct kmem_cache *cachep,
208 struct mem_cgroup *memcg)
209{
210 return true;
211}
212
213static inline void memcg_bind_pages(struct kmem_cache *s, int order) 200static inline void memcg_bind_pages(struct kmem_cache *s, int order)
214{ 201{
215} 202}
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 1ec3c619ba04..f3cfccf76dda 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -29,8 +29,7 @@ DEFINE_MUTEX(slab_mutex);
29struct kmem_cache *kmem_cache; 29struct kmem_cache *kmem_cache;
30 30
31#ifdef CONFIG_DEBUG_VM 31#ifdef CONFIG_DEBUG_VM
32static int kmem_cache_sanity_check(struct mem_cgroup *memcg, const char *name, 32static int kmem_cache_sanity_check(const char *name, size_t size)
33 size_t size)
34{ 33{
35 struct kmem_cache *s = NULL; 34 struct kmem_cache *s = NULL;
36 35
@@ -57,13 +56,7 @@ static int kmem_cache_sanity_check(struct mem_cgroup *memcg, const char *name,
57 } 56 }
58 57
59#if !defined(CONFIG_SLUB) || !defined(CONFIG_SLUB_DEBUG_ON) 58#if !defined(CONFIG_SLUB) || !defined(CONFIG_SLUB_DEBUG_ON)
60 /* 59 if (!strcmp(s->name, name)) {
61 * For simplicity, we won't check this in the list of memcg
62 * caches. We have control over memcg naming, and if there
63 * aren't duplicates in the global list, there won't be any
64 * duplicates in the memcg lists as well.
65 */
66 if (!memcg && !strcmp(s->name, name)) {
67 pr_err("%s (%s): Cache name already exists.\n", 60 pr_err("%s (%s): Cache name already exists.\n",
68 __func__, name); 61 __func__, name);
69 dump_stack(); 62 dump_stack();
@@ -77,8 +70,7 @@ static int kmem_cache_sanity_check(struct mem_cgroup *memcg, const char *name,
77 return 0; 70 return 0;
78} 71}
79#else 72#else
80static inline int kmem_cache_sanity_check(struct mem_cgroup *memcg, 73static inline int kmem_cache_sanity_check(const char *name, size_t size)
81 const char *name, size_t size)
82{ 74{
83 return 0; 75 return 0;
84} 76}
@@ -139,6 +131,46 @@ unsigned long calculate_alignment(unsigned long flags,
139 return ALIGN(align, sizeof(void *)); 131 return ALIGN(align, sizeof(void *));
140} 132}
141 133
134static struct kmem_cache *
135do_kmem_cache_create(char *name, size_t object_size, size_t size, size_t align,
136 unsigned long flags, void (*ctor)(void *),
137 struct mem_cgroup *memcg, struct kmem_cache *root_cache)
138{
139 struct kmem_cache *s;
140 int err;
141
142 err = -ENOMEM;
143 s = kmem_cache_zalloc(kmem_cache, GFP_KERNEL);
144 if (!s)
145 goto out;
146
147 s->name = name;
148 s->object_size = object_size;
149 s->size = size;
150 s->align = align;
151 s->ctor = ctor;
152
153 err = memcg_alloc_cache_params(memcg, s, root_cache);
154 if (err)
155 goto out_free_cache;
156
157 err = __kmem_cache_create(s, flags);
158 if (err)
159 goto out_free_cache;
160
161 s->refcount = 1;
162 list_add(&s->list, &slab_caches);
163 memcg_register_cache(s);
164out:
165 if (err)
166 return ERR_PTR(err);
167 return s;
168
169out_free_cache:
170 memcg_free_cache_params(s);
171 kfree(s);
172 goto out;
173}
142 174
143/* 175/*
144 * kmem_cache_create - Create a cache. 176 * kmem_cache_create - Create a cache.
@@ -164,34 +196,21 @@ unsigned long calculate_alignment(unsigned long flags,
164 * cacheline. This can be beneficial if you're counting cycles as closely 196 * cacheline. This can be beneficial if you're counting cycles as closely
165 * as davem. 197 * as davem.
166 */ 198 */
167
168struct kmem_cache * 199struct kmem_cache *
169kmem_cache_create_memcg(struct mem_cgroup *memcg, const char *name, size_t size, 200kmem_cache_create(const char *name, size_t size, size_t align,
170 size_t align, unsigned long flags, void (*ctor)(void *), 201 unsigned long flags, void (*ctor)(void *))
171 struct kmem_cache *parent_cache)
172{ 202{
173 struct kmem_cache *s = NULL; 203 struct kmem_cache *s;
204 char *cache_name;
174 int err; 205 int err;
175 206
176 get_online_cpus(); 207 get_online_cpus();
177 mutex_lock(&slab_mutex); 208 mutex_lock(&slab_mutex);
178 209
179 err = kmem_cache_sanity_check(memcg, name, size); 210 err = kmem_cache_sanity_check(name, size);
180 if (err) 211 if (err)
181 goto out_unlock; 212 goto out_unlock;
182 213
183 if (memcg) {
184 /*
185 * Since per-memcg caches are created asynchronously on first
186 * allocation (see memcg_kmem_get_cache()), several threads can
187 * try to create the same cache, but only one of them may
188 * succeed. Therefore if we get here and see the cache has
189 * already been created, we silently return NULL.
190 */
191 if (cache_from_memcg_idx(parent_cache, memcg_cache_id(memcg)))
192 goto out_unlock;
193 }
194
195 /* 214 /*
196 * Some allocators will constraint the set of valid flags to a subset 215 * Some allocators will constraint the set of valid flags to a subset
197 * of all flags. We expect them to define CACHE_CREATE_MASK in this 216 * of all flags. We expect them to define CACHE_CREATE_MASK in this
@@ -200,50 +219,29 @@ kmem_cache_create_memcg(struct mem_cgroup *memcg, const char *name, size_t size,
200 */ 219 */
201 flags &= CACHE_CREATE_MASK; 220 flags &= CACHE_CREATE_MASK;
202 221
203 s = __kmem_cache_alias(memcg, name, size, align, flags, ctor); 222 s = __kmem_cache_alias(name, size, align, flags, ctor);
204 if (s) 223 if (s)
205 goto out_unlock; 224 goto out_unlock;
206 225
207 err = -ENOMEM; 226 cache_name = kstrdup(name, GFP_KERNEL);
208 s = kmem_cache_zalloc(kmem_cache, GFP_KERNEL); 227 if (!cache_name) {
209 if (!s) 228 err = -ENOMEM;
210 goto out_unlock; 229 goto out_unlock;
230 }
211 231
212 s->object_size = s->size = size; 232 s = do_kmem_cache_create(cache_name, size, size,
213 s->align = calculate_alignment(flags, align, size); 233 calculate_alignment(flags, align, size),
214 s->ctor = ctor; 234 flags, ctor, NULL, NULL);
215 235 if (IS_ERR(s)) {
216 s->name = kstrdup(name, GFP_KERNEL); 236 err = PTR_ERR(s);
217 if (!s->name) 237 kfree(cache_name);
218 goto out_free_cache; 238 }
219
220 err = memcg_alloc_cache_params(memcg, s, parent_cache);
221 if (err)
222 goto out_free_cache;
223
224 err = __kmem_cache_create(s, flags);
225 if (err)
226 goto out_free_cache;
227
228 s->refcount = 1;
229 list_add(&s->list, &slab_caches);
230 memcg_register_cache(s);
231 239
232out_unlock: 240out_unlock:
233 mutex_unlock(&slab_mutex); 241 mutex_unlock(&slab_mutex);
234 put_online_cpus(); 242 put_online_cpus();
235 243
236 if (err) { 244 if (err) {
237 /*
238 * There is no point in flooding logs with warnings or
239 * especially crashing the system if we fail to create a cache
240 * for a memcg. In this case we will be accounting the memcg
241 * allocation to the root cgroup until we succeed to create its
242 * own cache, but it isn't that critical.
243 */
244 if (!memcg)
245 return NULL;
246
247 if (flags & SLAB_PANIC) 245 if (flags & SLAB_PANIC)
248 panic("kmem_cache_create: Failed to create slab '%s'. Error %d\n", 246 panic("kmem_cache_create: Failed to create slab '%s'. Error %d\n",
249 name, err); 247 name, err);
@@ -255,52 +253,112 @@ out_unlock:
255 return NULL; 253 return NULL;
256 } 254 }
257 return s; 255 return s;
256}
257EXPORT_SYMBOL(kmem_cache_create);
258 258
259out_free_cache: 259#ifdef CONFIG_MEMCG_KMEM
260 memcg_free_cache_params(s); 260/*
261 kfree(s->name); 261 * kmem_cache_create_memcg - Create a cache for a memory cgroup.
262 kmem_cache_free(kmem_cache, s); 262 * @memcg: The memory cgroup the new cache is for.
263 goto out_unlock; 263 * @root_cache: The parent of the new cache.
264 *
265 * This function attempts to create a kmem cache that will serve allocation
266 * requests going from @memcg to @root_cache. The new cache inherits properties
267 * from its parent.
268 */
269void kmem_cache_create_memcg(struct mem_cgroup *memcg, struct kmem_cache *root_cache)
270{
271 struct kmem_cache *s;
272 char *cache_name;
273
274 get_online_cpus();
275 mutex_lock(&slab_mutex);
276
277 /*
278 * Since per-memcg caches are created asynchronously on first
279 * allocation (see memcg_kmem_get_cache()), several threads can try to
280 * create the same cache, but only one of them may succeed.
281 */
282 if (cache_from_memcg_idx(root_cache, memcg_cache_id(memcg)))
283 goto out_unlock;
284
285 cache_name = memcg_create_cache_name(memcg, root_cache);
286 if (!cache_name)
287 goto out_unlock;
288
289 s = do_kmem_cache_create(cache_name, root_cache->object_size,
290 root_cache->size, root_cache->align,
291 root_cache->flags, root_cache->ctor,
292 memcg, root_cache);
293 if (IS_ERR(s)) {
294 kfree(cache_name);
295 goto out_unlock;
296 }
297
298 s->allocflags |= __GFP_KMEMCG;
299
300out_unlock:
301 mutex_unlock(&slab_mutex);
302 put_online_cpus();
264} 303}
265 304
266struct kmem_cache * 305static int kmem_cache_destroy_memcg_children(struct kmem_cache *s)
267kmem_cache_create(const char *name, size_t size, size_t align,
268 unsigned long flags, void (*ctor)(void *))
269{ 306{
270 return kmem_cache_create_memcg(NULL, name, size, align, flags, ctor, NULL); 307 int rc;
308
309 if (!s->memcg_params ||
310 !s->memcg_params->is_root_cache)
311 return 0;
312
313 mutex_unlock(&slab_mutex);
314 rc = __kmem_cache_destroy_memcg_children(s);
315 mutex_lock(&slab_mutex);
316
317 return rc;
271} 318}
272EXPORT_SYMBOL(kmem_cache_create); 319#else
320static int kmem_cache_destroy_memcg_children(struct kmem_cache *s)
321{
322 return 0;
323}
324#endif /* CONFIG_MEMCG_KMEM */
273 325
274void kmem_cache_destroy(struct kmem_cache *s) 326void kmem_cache_destroy(struct kmem_cache *s)
275{ 327{
276 /* Destroy all the children caches if we aren't a memcg cache */
277 kmem_cache_destroy_memcg_children(s);
278
279 get_online_cpus(); 328 get_online_cpus();
280 mutex_lock(&slab_mutex); 329 mutex_lock(&slab_mutex);
330
281 s->refcount--; 331 s->refcount--;
282 if (!s->refcount) { 332 if (s->refcount)
283 list_del(&s->list); 333 goto out_unlock;
284 334
285 if (!__kmem_cache_shutdown(s)) { 335 if (kmem_cache_destroy_memcg_children(s) != 0)
286 memcg_unregister_cache(s); 336 goto out_unlock;
287 mutex_unlock(&slab_mutex); 337
288 if (s->flags & SLAB_DESTROY_BY_RCU) 338 list_del(&s->list);
289 rcu_barrier(); 339 memcg_unregister_cache(s);
290 340
291 memcg_free_cache_params(s); 341 if (__kmem_cache_shutdown(s) != 0) {
292 kfree(s->name); 342 list_add(&s->list, &slab_caches);
293 kmem_cache_free(kmem_cache, s); 343 memcg_register_cache(s);
294 } else { 344 printk(KERN_ERR "kmem_cache_destroy %s: "
295 list_add(&s->list, &slab_caches); 345 "Slab cache still has objects\n", s->name);
296 mutex_unlock(&slab_mutex); 346 dump_stack();
297 printk(KERN_ERR "kmem_cache_destroy %s: Slab cache still has objects\n", 347 goto out_unlock;
298 s->name);
299 dump_stack();
300 }
301 } else {
302 mutex_unlock(&slab_mutex);
303 } 348 }
349
350 mutex_unlock(&slab_mutex);
351 if (s->flags & SLAB_DESTROY_BY_RCU)
352 rcu_barrier();
353
354 memcg_free_cache_params(s);
355 kfree(s->name);
356 kmem_cache_free(kmem_cache, s);
357 goto out_put_cpus;
358
359out_unlock:
360 mutex_unlock(&slab_mutex);
361out_put_cpus:
304 put_online_cpus(); 362 put_online_cpus();
305} 363}
306EXPORT_SYMBOL(kmem_cache_destroy); 364EXPORT_SYMBOL(kmem_cache_destroy);
diff --git a/mm/slub.c b/mm/slub.c
index fe6d7be22ef0..f620bbf4054a 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -224,7 +224,11 @@ static inline void memcg_propagate_slab_attrs(struct kmem_cache *s) { }
224static inline void stat(const struct kmem_cache *s, enum stat_item si) 224static inline void stat(const struct kmem_cache *s, enum stat_item si)
225{ 225{
226#ifdef CONFIG_SLUB_STATS 226#ifdef CONFIG_SLUB_STATS
227 __this_cpu_inc(s->cpu_slab->stat[si]); 227 /*
228 * The rmw is racy on a preemptible kernel but this is acceptable, so
229 * avoid this_cpu_add()'s irq-disable overhead.
230 */
231 raw_cpu_inc(s->cpu_slab->stat[si]);
228#endif 232#endif
229} 233}
230 234
@@ -1685,7 +1689,7 @@ static void *get_any_partial(struct kmem_cache *s, gfp_t flags,
1685 1689
1686 do { 1690 do {
1687 cpuset_mems_cookie = read_mems_allowed_begin(); 1691 cpuset_mems_cookie = read_mems_allowed_begin();
1688 zonelist = node_zonelist(slab_node(), flags); 1692 zonelist = node_zonelist(mempolicy_slab_node(), flags);
1689 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) { 1693 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
1690 struct kmem_cache_node *n; 1694 struct kmem_cache_node *n;
1691 1695
@@ -3685,6 +3689,9 @@ static int slab_unmergeable(struct kmem_cache *s)
3685 if (slub_nomerge || (s->flags & SLUB_NEVER_MERGE)) 3689 if (slub_nomerge || (s->flags & SLUB_NEVER_MERGE))
3686 return 1; 3690 return 1;
3687 3691
3692 if (!is_root_cache(s))
3693 return 1;
3694
3688 if (s->ctor) 3695 if (s->ctor)
3689 return 1; 3696 return 1;
3690 3697
@@ -3697,9 +3704,8 @@ static int slab_unmergeable(struct kmem_cache *s)
3697 return 0; 3704 return 0;
3698} 3705}
3699 3706
3700static struct kmem_cache *find_mergeable(struct mem_cgroup *memcg, size_t size, 3707static struct kmem_cache *find_mergeable(size_t size, size_t align,
3701 size_t align, unsigned long flags, const char *name, 3708 unsigned long flags, const char *name, void (*ctor)(void *))
3702 void (*ctor)(void *))
3703{ 3709{
3704 struct kmem_cache *s; 3710 struct kmem_cache *s;
3705 3711
@@ -3722,7 +3728,7 @@ static struct kmem_cache *find_mergeable(struct mem_cgroup *memcg, size_t size,
3722 continue; 3728 continue;
3723 3729
3724 if ((flags & SLUB_MERGE_SAME) != (s->flags & SLUB_MERGE_SAME)) 3730 if ((flags & SLUB_MERGE_SAME) != (s->flags & SLUB_MERGE_SAME))
3725 continue; 3731 continue;
3726 /* 3732 /*
3727 * Check if alignment is compatible. 3733 * Check if alignment is compatible.
3728 * Courtesy of Adrian Drzewiecki 3734 * Courtesy of Adrian Drzewiecki
@@ -3733,23 +3739,24 @@ static struct kmem_cache *find_mergeable(struct mem_cgroup *memcg, size_t size,
3733 if (s->size - size >= sizeof(void *)) 3739 if (s->size - size >= sizeof(void *))
3734 continue; 3740 continue;
3735 3741
3736 if (!cache_match_memcg(s, memcg))
3737 continue;
3738
3739 return s; 3742 return s;
3740 } 3743 }
3741 return NULL; 3744 return NULL;
3742} 3745}
3743 3746
3744struct kmem_cache * 3747struct kmem_cache *
3745__kmem_cache_alias(struct mem_cgroup *memcg, const char *name, size_t size, 3748__kmem_cache_alias(const char *name, size_t size, size_t align,
3746 size_t align, unsigned long flags, void (*ctor)(void *)) 3749 unsigned long flags, void (*ctor)(void *))
3747{ 3750{
3748 struct kmem_cache *s; 3751 struct kmem_cache *s;
3749 3752
3750 s = find_mergeable(memcg, size, align, flags, name, ctor); 3753 s = find_mergeable(size, align, flags, name, ctor);
3751 if (s) { 3754 if (s) {
3755 int i;
3756 struct kmem_cache *c;
3757
3752 s->refcount++; 3758 s->refcount++;
3759
3753 /* 3760 /*
3754 * Adjust the object sizes so that we clear 3761 * Adjust the object sizes so that we clear
3755 * the complete object on kzalloc. 3762 * the complete object on kzalloc.
@@ -3757,6 +3764,15 @@ __kmem_cache_alias(struct mem_cgroup *memcg, const char *name, size_t size,
3757 s->object_size = max(s->object_size, (int)size); 3764 s->object_size = max(s->object_size, (int)size);
3758 s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *))); 3765 s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *)));
3759 3766
3767 for_each_memcg_cache_index(i) {
3768 c = cache_from_memcg_idx(s, i);
3769 if (!c)
3770 continue;
3771 c->object_size = s->object_size;
3772 c->inuse = max_t(int, c->inuse,
3773 ALIGN(size, sizeof(void *)));
3774 }
3775
3760 if (sysfs_slab_alias(s, name)) { 3776 if (sysfs_slab_alias(s, name)) {
3761 s->refcount--; 3777 s->refcount--;
3762 s = NULL; 3778 s = NULL;
@@ -5126,6 +5142,15 @@ static const struct kset_uevent_ops slab_uevent_ops = {
5126 5142
5127static struct kset *slab_kset; 5143static struct kset *slab_kset;
5128 5144
5145static inline struct kset *cache_kset(struct kmem_cache *s)
5146{
5147#ifdef CONFIG_MEMCG_KMEM
5148 if (!is_root_cache(s))
5149 return s->memcg_params->root_cache->memcg_kset;
5150#endif
5151 return slab_kset;
5152}
5153
5129#define ID_STR_LENGTH 64 5154#define ID_STR_LENGTH 64
5130 5155
5131/* Create a unique string id for a slab cache: 5156/* Create a unique string id for a slab cache:
@@ -5191,26 +5216,39 @@ static int sysfs_slab_add(struct kmem_cache *s)
5191 name = create_unique_id(s); 5216 name = create_unique_id(s);
5192 } 5217 }
5193 5218
5194 s->kobj.kset = slab_kset; 5219 s->kobj.kset = cache_kset(s);
5195 err = kobject_init_and_add(&s->kobj, &slab_ktype, NULL, "%s", name); 5220 err = kobject_init_and_add(&s->kobj, &slab_ktype, NULL, "%s", name);
5196 if (err) { 5221 if (err)
5197 kobject_put(&s->kobj); 5222 goto out_put_kobj;
5198 return err;
5199 }
5200 5223
5201 err = sysfs_create_group(&s->kobj, &slab_attr_group); 5224 err = sysfs_create_group(&s->kobj, &slab_attr_group);
5202 if (err) { 5225 if (err)
5203 kobject_del(&s->kobj); 5226 goto out_del_kobj;
5204 kobject_put(&s->kobj); 5227
5205 return err; 5228#ifdef CONFIG_MEMCG_KMEM
5229 if (is_root_cache(s)) {
5230 s->memcg_kset = kset_create_and_add("cgroup", NULL, &s->kobj);
5231 if (!s->memcg_kset) {
5232 err = -ENOMEM;
5233 goto out_del_kobj;
5234 }
5206 } 5235 }
5236#endif
5237
5207 kobject_uevent(&s->kobj, KOBJ_ADD); 5238 kobject_uevent(&s->kobj, KOBJ_ADD);
5208 if (!unmergeable) { 5239 if (!unmergeable) {
5209 /* Setup first alias */ 5240 /* Setup first alias */
5210 sysfs_slab_alias(s, s->name); 5241 sysfs_slab_alias(s, s->name);
5211 kfree(name);
5212 } 5242 }
5213 return 0; 5243out:
5244 if (!unmergeable)
5245 kfree(name);
5246 return err;
5247out_del_kobj:
5248 kobject_del(&s->kobj);
5249out_put_kobj:
5250 kobject_put(&s->kobj);
5251 goto out;
5214} 5252}
5215 5253
5216static void sysfs_slab_remove(struct kmem_cache *s) 5254static void sysfs_slab_remove(struct kmem_cache *s)
@@ -5222,6 +5260,9 @@ static void sysfs_slab_remove(struct kmem_cache *s)
5222 */ 5260 */
5223 return; 5261 return;
5224 5262
5263#ifdef CONFIG_MEMCG_KMEM
5264 kset_unregister(s->memcg_kset);
5265#endif
5225 kobject_uevent(&s->kobj, KOBJ_REMOVE); 5266 kobject_uevent(&s->kobj, KOBJ_REMOVE);
5226 kobject_del(&s->kobj); 5267 kobject_del(&s->kobj);
5227 kobject_put(&s->kobj); 5268 kobject_put(&s->kobj);
diff --git a/mm/sparse.c b/mm/sparse.c
index 38cad8fd7397..d1b48b691ac8 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -5,10 +5,12 @@
5#include <linux/slab.h> 5#include <linux/slab.h>
6#include <linux/mmzone.h> 6#include <linux/mmzone.h>
7#include <linux/bootmem.h> 7#include <linux/bootmem.h>
8#include <linux/compiler.h>
8#include <linux/highmem.h> 9#include <linux/highmem.h>
9#include <linux/export.h> 10#include <linux/export.h>
10#include <linux/spinlock.h> 11#include <linux/spinlock.h>
11#include <linux/vmalloc.h> 12#include <linux/vmalloc.h>
13
12#include "internal.h" 14#include "internal.h"
13#include <asm/dma.h> 15#include <asm/dma.h>
14#include <asm/pgalloc.h> 16#include <asm/pgalloc.h>
@@ -461,7 +463,7 @@ static struct page __init *sparse_early_mem_map_alloc(unsigned long pnum)
461} 463}
462#endif 464#endif
463 465
464void __attribute__((weak)) __meminit vmemmap_populate_print_last(void) 466void __weak __meminit vmemmap_populate_print_last(void)
465{ 467{
466} 468}
467 469
diff --git a/mm/util.c b/mm/util.c
index a24aa22f2473..d7813e6d4cc7 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -1,6 +1,7 @@
1#include <linux/mm.h> 1#include <linux/mm.h>
2#include <linux/slab.h> 2#include <linux/slab.h>
3#include <linux/string.h> 3#include <linux/string.h>
4#include <linux/compiler.h>
4#include <linux/export.h> 5#include <linux/export.h>
5#include <linux/err.h> 6#include <linux/err.h>
6#include <linux/sched.h> 7#include <linux/sched.h>
@@ -307,7 +308,7 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
307 * If the architecture not support this function, simply return with no 308 * If the architecture not support this function, simply return with no
308 * page pinned 309 * page pinned
309 */ 310 */
310int __attribute__((weak)) __get_user_pages_fast(unsigned long start, 311int __weak __get_user_pages_fast(unsigned long start,
311 int nr_pages, int write, struct page **pages) 312 int nr_pages, int write, struct page **pages)
312{ 313{
313 return 0; 314 return 0;
@@ -338,7 +339,7 @@ EXPORT_SYMBOL_GPL(__get_user_pages_fast);
338 * callers need to carefully consider what to use. On many architectures, 339 * callers need to carefully consider what to use. On many architectures,
339 * get_user_pages_fast simply falls back to get_user_pages. 340 * get_user_pages_fast simply falls back to get_user_pages.
340 */ 341 */
341int __attribute__((weak)) get_user_pages_fast(unsigned long start, 342int __weak get_user_pages_fast(unsigned long start,
342 int nr_pages, int write, struct page **pages) 343 int nr_pages, int write, struct page **pages)
343{ 344{
344 struct mm_struct *mm = current->mm; 345 struct mm_struct *mm = current->mm;
diff --git a/mm/vmacache.c b/mm/vmacache.c
new file mode 100644
index 000000000000..d4224b397c0e
--- /dev/null
+++ b/mm/vmacache.c
@@ -0,0 +1,112 @@
1/*
2 * Copyright (C) 2014 Davidlohr Bueso.
3 */
4#include <linux/sched.h>
5#include <linux/mm.h>
6#include <linux/vmacache.h>
7
8/*
9 * Flush vma caches for threads that share a given mm.
10 *
11 * The operation is safe because the caller holds the mmap_sem
12 * exclusively and other threads accessing the vma cache will
13 * have mmap_sem held at least for read, so no extra locking
14 * is required to maintain the vma cache.
15 */
16void vmacache_flush_all(struct mm_struct *mm)
17{
18 struct task_struct *g, *p;
19
20 rcu_read_lock();
21 for_each_process_thread(g, p) {
22 /*
23 * Only flush the vmacache pointers as the
24 * mm seqnum is already set and curr's will
25 * be set upon invalidation when the next
26 * lookup is done.
27 */
28 if (mm == p->mm)
29 vmacache_flush(p);
30 }
31 rcu_read_unlock();
32}
33
34/*
35 * This task may be accessing a foreign mm via (for example)
36 * get_user_pages()->find_vma(). The vmacache is task-local and this
37 * task's vmacache pertains to a different mm (ie, its own). There is
38 * nothing we can do here.
39 *
40 * Also handle the case where a kernel thread has adopted this mm via use_mm().
41 * That kernel thread's vmacache is not applicable to this mm.
42 */
43static bool vmacache_valid_mm(struct mm_struct *mm)
44{
45 return current->mm == mm && !(current->flags & PF_KTHREAD);
46}
47
48void vmacache_update(unsigned long addr, struct vm_area_struct *newvma)
49{
50 if (vmacache_valid_mm(newvma->vm_mm))
51 current->vmacache[VMACACHE_HASH(addr)] = newvma;
52}
53
54static bool vmacache_valid(struct mm_struct *mm)
55{
56 struct task_struct *curr;
57
58 if (!vmacache_valid_mm(mm))
59 return false;
60
61 curr = current;
62 if (mm->vmacache_seqnum != curr->vmacache_seqnum) {
63 /*
64 * First attempt will always be invalid, initialize
65 * the new cache for this task here.
66 */
67 curr->vmacache_seqnum = mm->vmacache_seqnum;
68 vmacache_flush(curr);
69 return false;
70 }
71 return true;
72}
73
74struct vm_area_struct *vmacache_find(struct mm_struct *mm, unsigned long addr)
75{
76 int i;
77
78 if (!vmacache_valid(mm))
79 return NULL;
80
81 for (i = 0; i < VMACACHE_SIZE; i++) {
82 struct vm_area_struct *vma = current->vmacache[i];
83
84 if (vma && vma->vm_start <= addr && vma->vm_end > addr) {
85 BUG_ON(vma->vm_mm != mm);
86 return vma;
87 }
88 }
89
90 return NULL;
91}
92
93#ifndef CONFIG_MMU
94struct vm_area_struct *vmacache_find_exact(struct mm_struct *mm,
95 unsigned long start,
96 unsigned long end)
97{
98 int i;
99
100 if (!vmacache_valid(mm))
101 return NULL;
102
103 for (i = 0; i < VMACACHE_SIZE; i++) {
104 struct vm_area_struct *vma = current->vmacache[i];
105
106 if (vma && vma->vm_start == start && vma->vm_end == end)
107 return vma;
108 }
109
110 return NULL;
111}
112#endif
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 0fdf96803c5b..bf233b283319 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -27,7 +27,9 @@
27#include <linux/pfn.h> 27#include <linux/pfn.h>
28#include <linux/kmemleak.h> 28#include <linux/kmemleak.h>
29#include <linux/atomic.h> 29#include <linux/atomic.h>
30#include <linux/compiler.h>
30#include <linux/llist.h> 31#include <linux/llist.h>
32
31#include <asm/uaccess.h> 33#include <asm/uaccess.h>
32#include <asm/tlbflush.h> 34#include <asm/tlbflush.h>
33#include <asm/shmparam.h> 35#include <asm/shmparam.h>
@@ -1083,6 +1085,12 @@ EXPORT_SYMBOL(vm_unmap_ram);
1083 * @node: prefer to allocate data structures on this node 1085 * @node: prefer to allocate data structures on this node
1084 * @prot: memory protection to use. PAGE_KERNEL for regular RAM 1086 * @prot: memory protection to use. PAGE_KERNEL for regular RAM
1085 * 1087 *
1088 * If you use this function for less than VMAP_MAX_ALLOC pages, it could be
1089 * faster than vmap so it's good. But if you mix long-life and short-life
1090 * objects with vm_map_ram(), it could consume lots of address space through
1091 * fragmentation (especially on a 32bit machine). You could see failures in
1092 * the end. Please use this function for short-lived objects.
1093 *
1086 * Returns: a pointer to the address that has been mapped, or %NULL on failure 1094 * Returns: a pointer to the address that has been mapped, or %NULL on failure
1087 */ 1095 */
1088void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t prot) 1096void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t prot)
@@ -2181,7 +2189,7 @@ EXPORT_SYMBOL(remap_vmalloc_range);
2181 * Implement a stub for vmalloc_sync_all() if the architecture chose not to 2189 * Implement a stub for vmalloc_sync_all() if the architecture chose not to
2182 * have one. 2190 * have one.
2183 */ 2191 */
2184void __attribute__((weak)) vmalloc_sync_all(void) 2192void __weak vmalloc_sync_all(void)
2185{ 2193{
2186} 2194}
2187 2195
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 1f56a80a7c41..06879ead7380 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2314,15 +2314,18 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
2314 unsigned long lru_pages = 0; 2314 unsigned long lru_pages = 0;
2315 bool aborted_reclaim = false; 2315 bool aborted_reclaim = false;
2316 struct reclaim_state *reclaim_state = current->reclaim_state; 2316 struct reclaim_state *reclaim_state = current->reclaim_state;
2317 gfp_t orig_mask;
2317 struct shrink_control shrink = { 2318 struct shrink_control shrink = {
2318 .gfp_mask = sc->gfp_mask, 2319 .gfp_mask = sc->gfp_mask,
2319 }; 2320 };
2321 enum zone_type requested_highidx = gfp_zone(sc->gfp_mask);
2320 2322
2321 /* 2323 /*
2322 * If the number of buffer_heads in the machine exceeds the maximum 2324 * If the number of buffer_heads in the machine exceeds the maximum
2323 * allowed level, force direct reclaim to scan the highmem zone as 2325 * allowed level, force direct reclaim to scan the highmem zone as
2324 * highmem pages could be pinning lowmem pages storing buffer_heads 2326 * highmem pages could be pinning lowmem pages storing buffer_heads
2325 */ 2327 */
2328 orig_mask = sc->gfp_mask;
2326 if (buffer_heads_over_limit) 2329 if (buffer_heads_over_limit)
2327 sc->gfp_mask |= __GFP_HIGHMEM; 2330 sc->gfp_mask |= __GFP_HIGHMEM;
2328 2331
@@ -2356,7 +2359,8 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
2356 * noticeable problem, like transparent huge 2359 * noticeable problem, like transparent huge
2357 * page allocations. 2360 * page allocations.
2358 */ 2361 */
2359 if (compaction_ready(zone, sc)) { 2362 if ((zonelist_zone_idx(z) <= requested_highidx)
2363 && compaction_ready(zone, sc)) {
2360 aborted_reclaim = true; 2364 aborted_reclaim = true;
2361 continue; 2365 continue;
2362 } 2366 }
@@ -2393,6 +2397,12 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
2393 } 2397 }
2394 } 2398 }
2395 2399
2400 /*
2401 * Restore to original mask to avoid the impact on the caller if we
2402 * promoted it to __GFP_HIGHMEM.
2403 */
2404 sc->gfp_mask = orig_mask;
2405
2396 return aborted_reclaim; 2406 return aborted_reclaim;
2397} 2407}
2398 2408
diff --git a/mm/zswap.c b/mm/zswap.c
index d7337fbf6605..aeaef0fb5624 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -89,6 +89,9 @@ static unsigned int zswap_max_pool_percent = 20;
89module_param_named(max_pool_percent, 89module_param_named(max_pool_percent,
90 zswap_max_pool_percent, uint, 0644); 90 zswap_max_pool_percent, uint, 0644);
91 91
92/* zbud_pool is shared by all of zswap backend */
93static struct zbud_pool *zswap_pool;
94
92/********************************* 95/*********************************
93* compression functions 96* compression functions
94**********************************/ 97**********************************/
@@ -160,14 +163,14 @@ static void zswap_comp_exit(void)
160 * rbnode - links the entry into red-black tree for the appropriate swap type 163 * rbnode - links the entry into red-black tree for the appropriate swap type
161 * refcount - the number of outstanding reference to the entry. This is needed 164 * refcount - the number of outstanding reference to the entry. This is needed
162 * to protect against premature freeing of the entry by code 165 * to protect against premature freeing of the entry by code
163 * concurent calls to load, invalidate, and writeback. The lock 166 * concurrent calls to load, invalidate, and writeback. The lock
164 * for the zswap_tree structure that contains the entry must 167 * for the zswap_tree structure that contains the entry must
165 * be held while changing the refcount. Since the lock must 168 * be held while changing the refcount. Since the lock must
166 * be held, there is no reason to also make refcount atomic. 169 * be held, there is no reason to also make refcount atomic.
167 * offset - the swap offset for the entry. Index into the red-black tree. 170 * offset - the swap offset for the entry. Index into the red-black tree.
168 * handle - zsmalloc allocation handle that stores the compressed page data 171 * handle - zbud allocation handle that stores the compressed page data
169 * length - the length in bytes of the compressed page data. Needed during 172 * length - the length in bytes of the compressed page data. Needed during
170 * decompression 173 * decompression
171 */ 174 */
172struct zswap_entry { 175struct zswap_entry {
173 struct rb_node rbnode; 176 struct rb_node rbnode;
@@ -189,7 +192,6 @@ struct zswap_header {
189struct zswap_tree { 192struct zswap_tree {
190 struct rb_root rbroot; 193 struct rb_root rbroot;
191 spinlock_t lock; 194 spinlock_t lock;
192 struct zbud_pool *pool;
193}; 195};
194 196
195static struct zswap_tree *zswap_trees[MAX_SWAPFILES]; 197static struct zswap_tree *zswap_trees[MAX_SWAPFILES];
@@ -202,7 +204,7 @@ static struct kmem_cache *zswap_entry_cache;
202static int zswap_entry_cache_create(void) 204static int zswap_entry_cache_create(void)
203{ 205{
204 zswap_entry_cache = KMEM_CACHE(zswap_entry, 0); 206 zswap_entry_cache = KMEM_CACHE(zswap_entry, 0);
205 return (zswap_entry_cache == NULL); 207 return zswap_entry_cache == NULL;
206} 208}
207 209
208static void zswap_entry_cache_destory(void) 210static void zswap_entry_cache_destory(void)
@@ -282,16 +284,15 @@ static void zswap_rb_erase(struct rb_root *root, struct zswap_entry *entry)
282} 284}
283 285
284/* 286/*
285 * Carries out the common pattern of freeing and entry's zsmalloc allocation, 287 * Carries out the common pattern of freeing and entry's zbud allocation,
286 * freeing the entry itself, and decrementing the number of stored pages. 288 * freeing the entry itself, and decrementing the number of stored pages.
287 */ 289 */
288static void zswap_free_entry(struct zswap_tree *tree, 290static void zswap_free_entry(struct zswap_entry *entry)
289 struct zswap_entry *entry)
290{ 291{
291 zbud_free(tree->pool, entry->handle); 292 zbud_free(zswap_pool, entry->handle);
292 zswap_entry_cache_free(entry); 293 zswap_entry_cache_free(entry);
293 atomic_dec(&zswap_stored_pages); 294 atomic_dec(&zswap_stored_pages);
294 zswap_pool_pages = zbud_get_pool_size(tree->pool); 295 zswap_pool_pages = zbud_get_pool_size(zswap_pool);
295} 296}
296 297
297/* caller must hold the tree lock */ 298/* caller must hold the tree lock */
@@ -311,7 +312,7 @@ static void zswap_entry_put(struct zswap_tree *tree,
311 BUG_ON(refcount < 0); 312 BUG_ON(refcount < 0);
312 if (refcount == 0) { 313 if (refcount == 0) {
313 zswap_rb_erase(&tree->rbroot, entry); 314 zswap_rb_erase(&tree->rbroot, entry);
314 zswap_free_entry(tree, entry); 315 zswap_free_entry(entry);
315 } 316 }
316} 317}
317 318
@@ -407,8 +408,8 @@ cleanup:
407**********************************/ 408**********************************/
408static bool zswap_is_full(void) 409static bool zswap_is_full(void)
409{ 410{
410 return (totalram_pages * zswap_max_pool_percent / 100 < 411 return totalram_pages * zswap_max_pool_percent / 100 <
411 zswap_pool_pages); 412 zswap_pool_pages;
412} 413}
413 414
414/********************************* 415/*********************************
@@ -545,7 +546,6 @@ static int zswap_writeback_entry(struct zbud_pool *pool, unsigned long handle)
545 zbud_unmap(pool, handle); 546 zbud_unmap(pool, handle);
546 tree = zswap_trees[swp_type(swpentry)]; 547 tree = zswap_trees[swp_type(swpentry)];
547 offset = swp_offset(swpentry); 548 offset = swp_offset(swpentry);
548 BUG_ON(pool != tree->pool);
549 549
550 /* find and ref zswap entry */ 550 /* find and ref zswap entry */
551 spin_lock(&tree->lock); 551 spin_lock(&tree->lock);
@@ -573,13 +573,13 @@ static int zswap_writeback_entry(struct zbud_pool *pool, unsigned long handle)
573 case ZSWAP_SWAPCACHE_NEW: /* page is locked */ 573 case ZSWAP_SWAPCACHE_NEW: /* page is locked */
574 /* decompress */ 574 /* decompress */
575 dlen = PAGE_SIZE; 575 dlen = PAGE_SIZE;
576 src = (u8 *)zbud_map(tree->pool, entry->handle) + 576 src = (u8 *)zbud_map(zswap_pool, entry->handle) +
577 sizeof(struct zswap_header); 577 sizeof(struct zswap_header);
578 dst = kmap_atomic(page); 578 dst = kmap_atomic(page);
579 ret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src, 579 ret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src,
580 entry->length, dst, &dlen); 580 entry->length, dst, &dlen);
581 kunmap_atomic(dst); 581 kunmap_atomic(dst);
582 zbud_unmap(tree->pool, entry->handle); 582 zbud_unmap(zswap_pool, entry->handle);
583 BUG_ON(ret); 583 BUG_ON(ret);
584 BUG_ON(dlen != PAGE_SIZE); 584 BUG_ON(dlen != PAGE_SIZE);
585 585
@@ -652,7 +652,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
652 /* reclaim space if needed */ 652 /* reclaim space if needed */
653 if (zswap_is_full()) { 653 if (zswap_is_full()) {
654 zswap_pool_limit_hit++; 654 zswap_pool_limit_hit++;
655 if (zbud_reclaim_page(tree->pool, 8)) { 655 if (zbud_reclaim_page(zswap_pool, 8)) {
656 zswap_reject_reclaim_fail++; 656 zswap_reject_reclaim_fail++;
657 ret = -ENOMEM; 657 ret = -ENOMEM;
658 goto reject; 658 goto reject;
@@ -679,7 +679,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
679 679
680 /* store */ 680 /* store */
681 len = dlen + sizeof(struct zswap_header); 681 len = dlen + sizeof(struct zswap_header);
682 ret = zbud_alloc(tree->pool, len, __GFP_NORETRY | __GFP_NOWARN, 682 ret = zbud_alloc(zswap_pool, len, __GFP_NORETRY | __GFP_NOWARN,
683 &handle); 683 &handle);
684 if (ret == -ENOSPC) { 684 if (ret == -ENOSPC) {
685 zswap_reject_compress_poor++; 685 zswap_reject_compress_poor++;
@@ -689,11 +689,11 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
689 zswap_reject_alloc_fail++; 689 zswap_reject_alloc_fail++;
690 goto freepage; 690 goto freepage;
691 } 691 }
692 zhdr = zbud_map(tree->pool, handle); 692 zhdr = zbud_map(zswap_pool, handle);
693 zhdr->swpentry = swp_entry(type, offset); 693 zhdr->swpentry = swp_entry(type, offset);
694 buf = (u8 *)(zhdr + 1); 694 buf = (u8 *)(zhdr + 1);
695 memcpy(buf, dst, dlen); 695 memcpy(buf, dst, dlen);
696 zbud_unmap(tree->pool, handle); 696 zbud_unmap(zswap_pool, handle);
697 put_cpu_var(zswap_dstmem); 697 put_cpu_var(zswap_dstmem);
698 698
699 /* populate entry */ 699 /* populate entry */
@@ -716,7 +716,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
716 716
717 /* update stats */ 717 /* update stats */
718 atomic_inc(&zswap_stored_pages); 718 atomic_inc(&zswap_stored_pages);
719 zswap_pool_pages = zbud_get_pool_size(tree->pool); 719 zswap_pool_pages = zbud_get_pool_size(zswap_pool);
720 720
721 return 0; 721 return 0;
722 722
@@ -752,13 +752,13 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset,
752 752
753 /* decompress */ 753 /* decompress */
754 dlen = PAGE_SIZE; 754 dlen = PAGE_SIZE;
755 src = (u8 *)zbud_map(tree->pool, entry->handle) + 755 src = (u8 *)zbud_map(zswap_pool, entry->handle) +
756 sizeof(struct zswap_header); 756 sizeof(struct zswap_header);
757 dst = kmap_atomic(page); 757 dst = kmap_atomic(page);
758 ret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src, entry->length, 758 ret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src, entry->length,
759 dst, &dlen); 759 dst, &dlen);
760 kunmap_atomic(dst); 760 kunmap_atomic(dst);
761 zbud_unmap(tree->pool, entry->handle); 761 zbud_unmap(zswap_pool, entry->handle);
762 BUG_ON(ret); 762 BUG_ON(ret);
763 763
764 spin_lock(&tree->lock); 764 spin_lock(&tree->lock);
@@ -804,11 +804,9 @@ static void zswap_frontswap_invalidate_area(unsigned type)
804 /* walk the tree and free everything */ 804 /* walk the tree and free everything */
805 spin_lock(&tree->lock); 805 spin_lock(&tree->lock);
806 rbtree_postorder_for_each_entry_safe(entry, n, &tree->rbroot, rbnode) 806 rbtree_postorder_for_each_entry_safe(entry, n, &tree->rbroot, rbnode)
807 zswap_free_entry(tree, entry); 807 zswap_free_entry(entry);
808 tree->rbroot = RB_ROOT; 808 tree->rbroot = RB_ROOT;
809 spin_unlock(&tree->lock); 809 spin_unlock(&tree->lock);
810
811 zbud_destroy_pool(tree->pool);
812 kfree(tree); 810 kfree(tree);
813 zswap_trees[type] = NULL; 811 zswap_trees[type] = NULL;
814} 812}
@@ -822,20 +820,14 @@ static void zswap_frontswap_init(unsigned type)
822 struct zswap_tree *tree; 820 struct zswap_tree *tree;
823 821
824 tree = kzalloc(sizeof(struct zswap_tree), GFP_KERNEL); 822 tree = kzalloc(sizeof(struct zswap_tree), GFP_KERNEL);
825 if (!tree) 823 if (!tree) {
826 goto err; 824 pr_err("alloc failed, zswap disabled for swap type %d\n", type);
827 tree->pool = zbud_create_pool(GFP_KERNEL, &zswap_zbud_ops); 825 return;
828 if (!tree->pool) 826 }
829 goto freetree; 827
830 tree->rbroot = RB_ROOT; 828 tree->rbroot = RB_ROOT;
831 spin_lock_init(&tree->lock); 829 spin_lock_init(&tree->lock);
832 zswap_trees[type] = tree; 830 zswap_trees[type] = tree;
833 return;
834
835freetree:
836 kfree(tree);
837err:
838 pr_err("alloc failed, zswap disabled for swap type %d\n", type);
839} 831}
840 832
841static struct frontswap_ops zswap_frontswap_ops = { 833static struct frontswap_ops zswap_frontswap_ops = {
@@ -907,9 +899,16 @@ static int __init init_zswap(void)
907 return 0; 899 return 0;
908 900
909 pr_info("loading zswap\n"); 901 pr_info("loading zswap\n");
902
903 zswap_pool = zbud_create_pool(GFP_KERNEL, &zswap_zbud_ops);
904 if (!zswap_pool) {
905 pr_err("zbud pool creation failed\n");
906 goto error;
907 }
908
910 if (zswap_entry_cache_create()) { 909 if (zswap_entry_cache_create()) {
911 pr_err("entry cache creation failed\n"); 910 pr_err("entry cache creation failed\n");
912 goto error; 911 goto cachefail;
913 } 912 }
914 if (zswap_comp_init()) { 913 if (zswap_comp_init()) {
915 pr_err("compressor initialization failed\n"); 914 pr_err("compressor initialization failed\n");
@@ -919,6 +918,7 @@ static int __init init_zswap(void)
919 pr_err("per-cpu initialization failed\n"); 918 pr_err("per-cpu initialization failed\n");
920 goto pcpufail; 919 goto pcpufail;
921 } 920 }
921
922 frontswap_register_ops(&zswap_frontswap_ops); 922 frontswap_register_ops(&zswap_frontswap_ops);
923 if (zswap_debugfs_init()) 923 if (zswap_debugfs_init())
924 pr_warn("debugfs initialization failed\n"); 924 pr_warn("debugfs initialization failed\n");
@@ -927,6 +927,8 @@ pcpufail:
927 zswap_comp_exit(); 927 zswap_comp_exit();
928compfail: 928compfail:
929 zswap_entry_cache_destory(); 929 zswap_entry_cache_destory();
930cachefail:
931 zbud_destroy_pool(zswap_pool);
930error: 932error:
931 return -ENOMEM; 933 return -ENOMEM;
932} 934}
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 1be9e990514d..34d094cadb11 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -188,7 +188,7 @@ const __u8 ip_tos2prio[16] = {
188EXPORT_SYMBOL(ip_tos2prio); 188EXPORT_SYMBOL(ip_tos2prio);
189 189
190static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat); 190static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
191#define RT_CACHE_STAT_INC(field) __this_cpu_inc(rt_cache_stat.field) 191#define RT_CACHE_STAT_INC(field) raw_cpu_inc(rt_cache_stat.field)
192 192
193#ifdef CONFIG_PROC_FS 193#ifdef CONFIG_PROC_FS
194static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos) 194static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos)
diff --git a/scripts/kconfig/confdata.c b/scripts/kconfig/confdata.c
index 87f723804079..f88d90f20228 100644
--- a/scripts/kconfig/confdata.c
+++ b/scripts/kconfig/confdata.c
@@ -1178,7 +1178,10 @@ bool conf_set_all_new_symbols(enum conf_def_mode mode)
1178 sym->def[S_DEF_USER].tri = mod; 1178 sym->def[S_DEF_USER].tri = mod;
1179 break; 1179 break;
1180 case def_no: 1180 case def_no:
1181 sym->def[S_DEF_USER].tri = no; 1181 if (sym->flags & SYMBOL_ALLNOCONFIG_Y)
1182 sym->def[S_DEF_USER].tri = yes;
1183 else
1184 sym->def[S_DEF_USER].tri = no;
1182 break; 1185 break;
1183 case def_random: 1186 case def_random:
1184 sym->def[S_DEF_USER].tri = no; 1187 sym->def[S_DEF_USER].tri = no;
diff --git a/scripts/kconfig/expr.h b/scripts/kconfig/expr.h
index ba663e1dc7e3..412ea8a2abb8 100644
--- a/scripts/kconfig/expr.h
+++ b/scripts/kconfig/expr.h
@@ -109,6 +109,9 @@ struct symbol {
109/* choice values need to be set before calculating this symbol value */ 109/* choice values need to be set before calculating this symbol value */
110#define SYMBOL_NEED_SET_CHOICE_VALUES 0x100000 110#define SYMBOL_NEED_SET_CHOICE_VALUES 0x100000
111 111
112/* Set symbol to y if allnoconfig; used for symbols that hide others */
113#define SYMBOL_ALLNOCONFIG_Y 0x200000
114
112#define SYMBOL_MAXLENGTH 256 115#define SYMBOL_MAXLENGTH 256
113#define SYMBOL_HASHSIZE 9973 116#define SYMBOL_HASHSIZE 9973
114 117
diff --git a/scripts/kconfig/lkc.h b/scripts/kconfig/lkc.h
index 09f4edfdc911..d5daa7af8b49 100644
--- a/scripts/kconfig/lkc.h
+++ b/scripts/kconfig/lkc.h
@@ -61,6 +61,7 @@ enum conf_def_mode {
61#define T_OPT_MODULES 1 61#define T_OPT_MODULES 1
62#define T_OPT_DEFCONFIG_LIST 2 62#define T_OPT_DEFCONFIG_LIST 2
63#define T_OPT_ENV 3 63#define T_OPT_ENV 3
64#define T_OPT_ALLNOCONFIG_Y 4
64 65
65struct kconf_id { 66struct kconf_id {
66 int name; 67 int name;
diff --git a/scripts/kconfig/menu.c b/scripts/kconfig/menu.c
index db1512ae30cc..3ac2c9c6e280 100644
--- a/scripts/kconfig/menu.c
+++ b/scripts/kconfig/menu.c
@@ -217,6 +217,9 @@ void menu_add_option(int token, char *arg)
217 case T_OPT_ENV: 217 case T_OPT_ENV:
218 prop_add_env(arg); 218 prop_add_env(arg);
219 break; 219 break;
220 case T_OPT_ALLNOCONFIG_Y:
221 current_entry->sym->flags |= SYMBOL_ALLNOCONFIG_Y;
222 break;
220 } 223 }
221} 224}
222 225
diff --git a/scripts/kconfig/zconf.gperf b/scripts/kconfig/zconf.gperf
index f14ab41154b6..b6ac02d604f1 100644
--- a/scripts/kconfig/zconf.gperf
+++ b/scripts/kconfig/zconf.gperf
@@ -44,4 +44,5 @@ on, T_ON, TF_PARAM
44modules, T_OPT_MODULES, TF_OPTION 44modules, T_OPT_MODULES, TF_OPTION
45defconfig_list, T_OPT_DEFCONFIG_LIST,TF_OPTION 45defconfig_list, T_OPT_DEFCONFIG_LIST,TF_OPTION
46env, T_OPT_ENV, TF_OPTION 46env, T_OPT_ENV, TF_OPTION
47allnoconfig_y, T_OPT_ALLNOCONFIG_Y,TF_OPTION
47%% 48%%
diff --git a/scripts/kconfig/zconf.hash.c_shipped b/scripts/kconfig/zconf.hash.c_shipped
index 40df0005daa9..c77a8eff1ef2 100644
--- a/scripts/kconfig/zconf.hash.c_shipped
+++ b/scripts/kconfig/zconf.hash.c_shipped
@@ -55,10 +55,10 @@ kconf_id_hash (register const char *str, register unsigned int len)
55 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 55 73, 73, 73, 73, 73, 73, 73, 73, 73, 73,
56 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 56 73, 73, 73, 73, 73, 73, 73, 73, 73, 73,
57 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 57 73, 73, 73, 73, 73, 73, 73, 73, 73, 73,
58 73, 73, 73, 73, 73, 73, 73, 73, 25, 25, 58 73, 73, 73, 73, 73, 73, 73, 5, 25, 25,
59 0, 0, 0, 5, 0, 0, 73, 73, 5, 0, 59 0, 0, 0, 5, 0, 0, 73, 73, 5, 0,
60 10, 5, 45, 73, 20, 20, 0, 15, 15, 73, 60 10, 5, 45, 73, 20, 20, 0, 15, 15, 73,
61 20, 73, 73, 73, 73, 73, 73, 73, 73, 73, 61 20, 5, 73, 73, 73, 73, 73, 73, 73, 73,
62 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 62 73, 73, 73, 73, 73, 73, 73, 73, 73, 73,
63 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 63 73, 73, 73, 73, 73, 73, 73, 73, 73, 73,
64 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 64 73, 73, 73, 73, 73, 73, 73, 73, 73, 73,
@@ -106,6 +106,7 @@ struct kconf_id_strings_t
106 char kconf_id_strings_str23[sizeof("mainmenu")]; 106 char kconf_id_strings_str23[sizeof("mainmenu")];
107 char kconf_id_strings_str25[sizeof("menuconfig")]; 107 char kconf_id_strings_str25[sizeof("menuconfig")];
108 char kconf_id_strings_str27[sizeof("modules")]; 108 char kconf_id_strings_str27[sizeof("modules")];
109 char kconf_id_strings_str28[sizeof("allnoconfig_y")];
109 char kconf_id_strings_str29[sizeof("menu")]; 110 char kconf_id_strings_str29[sizeof("menu")];
110 char kconf_id_strings_str31[sizeof("select")]; 111 char kconf_id_strings_str31[sizeof("select")];
111 char kconf_id_strings_str32[sizeof("comment")]; 112 char kconf_id_strings_str32[sizeof("comment")];
@@ -141,6 +142,7 @@ static const struct kconf_id_strings_t kconf_id_strings_contents =
141 "mainmenu", 142 "mainmenu",
142 "menuconfig", 143 "menuconfig",
143 "modules", 144 "modules",
145 "allnoconfig_y",
144 "menu", 146 "menu",
145 "select", 147 "select",
146 "comment", 148 "comment",
@@ -170,7 +172,7 @@ kconf_id_lookup (register const char *str, register unsigned int len)
170{ 172{
171 enum 173 enum
172 { 174 {
173 TOTAL_KEYWORDS = 32, 175 TOTAL_KEYWORDS = 33,
174 MIN_WORD_LENGTH = 2, 176 MIN_WORD_LENGTH = 2,
175 MAX_WORD_LENGTH = 14, 177 MAX_WORD_LENGTH = 14,
176 MIN_HASH_VALUE = 2, 178 MIN_HASH_VALUE = 2,
@@ -219,7 +221,8 @@ kconf_id_lookup (register const char *str, register unsigned int len)
219 {-1}, 221 {-1},
220#line 44 "scripts/kconfig/zconf.gperf" 222#line 44 "scripts/kconfig/zconf.gperf"
221 {(int)(long)&((struct kconf_id_strings_t *)0)->kconf_id_strings_str27, T_OPT_MODULES, TF_OPTION}, 223 {(int)(long)&((struct kconf_id_strings_t *)0)->kconf_id_strings_str27, T_OPT_MODULES, TF_OPTION},
222 {-1}, 224#line 47 "scripts/kconfig/zconf.gperf"
225 {(int)(long)&((struct kconf_id_strings_t *)0)->kconf_id_strings_str28, T_OPT_ALLNOCONFIG_Y,TF_OPTION},
223#line 16 "scripts/kconfig/zconf.gperf" 226#line 16 "scripts/kconfig/zconf.gperf"
224 {(int)(long)&((struct kconf_id_strings_t *)0)->kconf_id_strings_str29, T_MENU, TF_COMMAND}, 227 {(int)(long)&((struct kconf_id_strings_t *)0)->kconf_id_strings_str29, T_MENU, TF_COMMAND},
225 {-1}, 228 {-1},
@@ -282,5 +285,5 @@ kconf_id_lookup (register const char *str, register unsigned int len)
282 } 285 }
283 return 0; 286 return 0;
284} 287}
285#line 47 "scripts/kconfig/zconf.gperf" 288#line 48 "scripts/kconfig/zconf.gperf"
286 289
diff --git a/sound/isa/Kconfig b/sound/isa/Kconfig
index affa13480659..0216475fc759 100644
--- a/sound/isa/Kconfig
+++ b/sound/isa/Kconfig
@@ -191,7 +191,7 @@ config SND_ES18XX
191 191
192config SND_SC6000 192config SND_SC6000
193 tristate "Gallant SC-6000/6600/7000 and Audio Excel DSP 16" 193 tristate "Gallant SC-6000/6600/7000 and Audio Excel DSP 16"
194 depends on HAS_IOPORT 194 depends on HAS_IOPORT_MAP
195 select SND_WSS_LIB 195 select SND_WSS_LIB
196 select SND_OPL3_LIB 196 select SND_OPL3_LIB
197 select SND_MPU401_UART 197 select SND_MPU401_UART
diff --git a/sound/pci/Kconfig b/sound/pci/Kconfig
index 0b0c0cf13f74..3a3a3a71088b 100644
--- a/sound/pci/Kconfig
+++ b/sound/pci/Kconfig
@@ -688,7 +688,7 @@ config SND_LOLA
688 688
689config SND_LX6464ES 689config SND_LX6464ES
690 tristate "Digigram LX6464ES" 690 tristate "Digigram LX6464ES"
691 depends on HAS_IOPORT 691 depends on HAS_IOPORT_MAP
692 select SND_PCM 692 select SND_PCM
693 help 693 help
694 Say Y here to include support for Digigram LX6464ES boards. 694 Say Y here to include support for Digigram LX6464ES boards.
diff --git a/tools/vm/page-types.c b/tools/vm/page-types.c
index f9be24d9efac..05654f5e48d5 100644
--- a/tools/vm/page-types.c
+++ b/tools/vm/page-types.c
@@ -19,7 +19,8 @@
19 * Authors: Wu Fengguang <fengguang.wu@intel.com> 19 * Authors: Wu Fengguang <fengguang.wu@intel.com>
20 */ 20 */
21 21
22#define _LARGEFILE64_SOURCE 22#define _FILE_OFFSET_BITS 64
23#define _GNU_SOURCE
23#include <stdio.h> 24#include <stdio.h>
24#include <stdlib.h> 25#include <stdlib.h>
25#include <unistd.h> 26#include <unistd.h>
@@ -29,11 +30,14 @@
29#include <getopt.h> 30#include <getopt.h>
30#include <limits.h> 31#include <limits.h>
31#include <assert.h> 32#include <assert.h>
33#include <ftw.h>
34#include <time.h>
32#include <sys/types.h> 35#include <sys/types.h>
33#include <sys/errno.h> 36#include <sys/errno.h>
34#include <sys/fcntl.h> 37#include <sys/fcntl.h>
35#include <sys/mount.h> 38#include <sys/mount.h>
36#include <sys/statfs.h> 39#include <sys/statfs.h>
40#include <sys/mman.h>
37#include "../../include/uapi/linux/magic.h" 41#include "../../include/uapi/linux/magic.h"
38#include "../../include/uapi/linux/kernel-page-flags.h" 42#include "../../include/uapi/linux/kernel-page-flags.h"
39#include <api/fs/debugfs.h> 43#include <api/fs/debugfs.h>
@@ -158,6 +162,7 @@ static int opt_raw; /* for kernel developers */
158static int opt_list; /* list pages (in ranges) */ 162static int opt_list; /* list pages (in ranges) */
159static int opt_no_summary; /* don't show summary */ 163static int opt_no_summary; /* don't show summary */
160static pid_t opt_pid; /* process to walk */ 164static pid_t opt_pid; /* process to walk */
165const char * opt_file;
161 166
162#define MAX_ADDR_RANGES 1024 167#define MAX_ADDR_RANGES 1024
163static int nr_addr_ranges; 168static int nr_addr_ranges;
@@ -253,12 +258,7 @@ static unsigned long do_u64_read(int fd, char *name,
253 if (index > ULONG_MAX / 8) 258 if (index > ULONG_MAX / 8)
254 fatal("index overflow: %lu\n", index); 259 fatal("index overflow: %lu\n", index);
255 260
256 if (lseek(fd, index * 8, SEEK_SET) < 0) { 261 bytes = pread(fd, buf, count * 8, (off_t)index * 8);
257 perror(name);
258 exit(EXIT_FAILURE);
259 }
260
261 bytes = read(fd, buf, count * 8);
262 if (bytes < 0) { 262 if (bytes < 0) {
263 perror(name); 263 perror(name);
264 exit(EXIT_FAILURE); 264 exit(EXIT_FAILURE);
@@ -343,8 +343,8 @@ static char *page_flag_longname(uint64_t flags)
343 * page list and summary 343 * page list and summary
344 */ 344 */
345 345
346static void show_page_range(unsigned long voffset, 346static void show_page_range(unsigned long voffset, unsigned long offset,
347 unsigned long offset, uint64_t flags) 347 unsigned long size, uint64_t flags)
348{ 348{
349 static uint64_t flags0; 349 static uint64_t flags0;
350 static unsigned long voff; 350 static unsigned long voff;
@@ -352,14 +352,16 @@ static void show_page_range(unsigned long voffset,
352 static unsigned long count; 352 static unsigned long count;
353 353
354 if (flags == flags0 && offset == index + count && 354 if (flags == flags0 && offset == index + count &&
355 (!opt_pid || voffset == voff + count)) { 355 size && voffset == voff + count) {
356 count++; 356 count += size;
357 return; 357 return;
358 } 358 }
359 359
360 if (count) { 360 if (count) {
361 if (opt_pid) 361 if (opt_pid)
362 printf("%lx\t", voff); 362 printf("%lx\t", voff);
363 if (opt_file)
364 printf("%lu\t", voff);
363 printf("%lx\t%lx\t%s\n", 365 printf("%lx\t%lx\t%s\n",
364 index, count, page_flag_name(flags0)); 366 index, count, page_flag_name(flags0));
365 } 367 }
@@ -367,7 +369,12 @@ static void show_page_range(unsigned long voffset,
367 flags0 = flags; 369 flags0 = flags;
368 index = offset; 370 index = offset;
369 voff = voffset; 371 voff = voffset;
370 count = 1; 372 count = size;
373}
374
375static void flush_page_range(void)
376{
377 show_page_range(0, 0, 0, 0);
371} 378}
372 379
373static void show_page(unsigned long voffset, 380static void show_page(unsigned long voffset,
@@ -375,6 +382,8 @@ static void show_page(unsigned long voffset,
375{ 382{
376 if (opt_pid) 383 if (opt_pid)
377 printf("%lx\t", voffset); 384 printf("%lx\t", voffset);
385 if (opt_file)
386 printf("%lu\t", voffset);
378 printf("%lx\t%s\n", offset, page_flag_name(flags)); 387 printf("%lx\t%s\n", offset, page_flag_name(flags));
379} 388}
380 389
@@ -565,7 +574,7 @@ static void add_page(unsigned long voffset,
565 unpoison_page(offset); 574 unpoison_page(offset);
566 575
567 if (opt_list == 1) 576 if (opt_list == 1)
568 show_page_range(voffset, offset, flags); 577 show_page_range(voffset, offset, 1, flags);
569 else if (opt_list == 2) 578 else if (opt_list == 2)
570 show_page(voffset, offset, flags); 579 show_page(voffset, offset, flags);
571 580
@@ -667,7 +676,7 @@ static void walk_addr_ranges(void)
667 676
668 for (i = 0; i < nr_addr_ranges; i++) 677 for (i = 0; i < nr_addr_ranges; i++)
669 if (!opt_pid) 678 if (!opt_pid)
670 walk_pfn(0, opt_offset[i], opt_size[i], 0); 679 walk_pfn(opt_offset[i], opt_offset[i], opt_size[i], 0);
671 else 680 else
672 walk_task(opt_offset[i], opt_size[i]); 681 walk_task(opt_offset[i], opt_size[i]);
673 682
@@ -699,9 +708,7 @@ static void usage(void)
699" -a|--addr addr-spec Walk a range of pages\n" 708" -a|--addr addr-spec Walk a range of pages\n"
700" -b|--bits bits-spec Walk pages with specified bits\n" 709" -b|--bits bits-spec Walk pages with specified bits\n"
701" -p|--pid pid Walk process address space\n" 710" -p|--pid pid Walk process address space\n"
702#if 0 /* planned features */
703" -f|--file filename Walk file address space\n" 711" -f|--file filename Walk file address space\n"
704#endif
705" -l|--list Show page details in ranges\n" 712" -l|--list Show page details in ranges\n"
706" -L|--list-each Show page details one by one\n" 713" -L|--list-each Show page details one by one\n"
707" -N|--no-summary Don't show summary info\n" 714" -N|--no-summary Don't show summary info\n"
@@ -799,8 +806,130 @@ static void parse_pid(const char *str)
799 fclose(file); 806 fclose(file);
800} 807}
801 808
809static void show_file(const char *name, const struct stat *st)
810{
811 unsigned long long size = st->st_size;
812 char atime[64], mtime[64];
813 long now = time(NULL);
814
815 printf("%s\tInode: %u\tSize: %llu (%llu pages)\n",
816 name, (unsigned)st->st_ino,
817 size, (size + page_size - 1) / page_size);
818
819 strftime(atime, sizeof(atime), "%c", localtime(&st->st_atime));
820 strftime(mtime, sizeof(mtime), "%c", localtime(&st->st_mtime));
821
822 printf("Modify: %s (%ld seconds ago)\nAccess: %s (%ld seconds ago)\n",
823 mtime, now - st->st_mtime,
824 atime, now - st->st_atime);
825}
826
827static void walk_file(const char *name, const struct stat *st)
828{
829 uint8_t vec[PAGEMAP_BATCH];
830 uint64_t buf[PAGEMAP_BATCH], flags;
831 unsigned long nr_pages, pfn, i;
832 int fd;
833 off_t off;
834 ssize_t len;
835 void *ptr;
836 int first = 1;
837
838 fd = checked_open(name, O_RDONLY|O_NOATIME|O_NOFOLLOW);
839
840 for (off = 0; off < st->st_size; off += len) {
841 nr_pages = (st->st_size - off + page_size - 1) / page_size;
842 if (nr_pages > PAGEMAP_BATCH)
843 nr_pages = PAGEMAP_BATCH;
844 len = nr_pages * page_size;
845
846 ptr = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, off);
847 if (ptr == MAP_FAILED)
848 fatal("mmap failed: %s", name);
849
850 /* determine cached pages */
851 if (mincore(ptr, len, vec))
852 fatal("mincore failed: %s", name);
853
854 /* turn off readahead */
855 if (madvise(ptr, len, MADV_RANDOM))
856 fatal("madvice failed: %s", name);
857
858 /* populate ptes */
859 for (i = 0; i < nr_pages ; i++) {
860 if (vec[i] & 1)
861 (void)*(volatile int *)(ptr + i * page_size);
862 }
863
864 /* turn off harvesting reference bits */
865 if (madvise(ptr, len, MADV_SEQUENTIAL))
866 fatal("madvice failed: %s", name);
867
868 if (pagemap_read(buf, (unsigned long)ptr / page_size,
869 nr_pages) != nr_pages)
870 fatal("cannot read pagemap");
871
872 munmap(ptr, len);
873
874 for (i = 0; i < nr_pages; i++) {
875 pfn = pagemap_pfn(buf[i]);
876 if (!pfn)
877 continue;
878 if (!kpageflags_read(&flags, pfn, 1))
879 continue;
880 if (first && opt_list) {
881 first = 0;
882 flush_page_range();
883 show_file(name, st);
884 }
885 add_page(off / page_size + i, pfn, flags, buf[i]);
886 }
887 }
888
889 close(fd);
890}
891
892int walk_tree(const char *name, const struct stat *st, int type, struct FTW *f)
893{
894 (void)f;
895 switch (type) {
896 case FTW_F:
897 if (S_ISREG(st->st_mode))
898 walk_file(name, st);
899 break;
900 case FTW_DNR:
901 fprintf(stderr, "cannot read dir: %s\n", name);
902 break;
903 }
904 return 0;
905}
906
907static void walk_page_cache(void)
908{
909 struct stat st;
910
911 kpageflags_fd = checked_open(PROC_KPAGEFLAGS, O_RDONLY);
912 pagemap_fd = checked_open("/proc/self/pagemap", O_RDONLY);
913
914 if (stat(opt_file, &st))
915 fatal("stat failed: %s\n", opt_file);
916
917 if (S_ISREG(st.st_mode)) {
918 walk_file(opt_file, &st);
919 } else if (S_ISDIR(st.st_mode)) {
920 /* do not follow symlinks and mountpoints */
921 if (nftw(opt_file, walk_tree, 64, FTW_MOUNT | FTW_PHYS) < 0)
922 fatal("nftw failed: %s\n", opt_file);
923 } else
924 fatal("unhandled file type: %s\n", opt_file);
925
926 close(kpageflags_fd);
927 close(pagemap_fd);
928}
929
802static void parse_file(const char *name) 930static void parse_file(const char *name)
803{ 931{
932 opt_file = name;
804} 933}
805 934
806static void parse_addr_range(const char *optarg) 935static void parse_addr_range(const char *optarg)
@@ -991,15 +1120,20 @@ int main(int argc, char *argv[])
991 1120
992 if (opt_list && opt_pid) 1121 if (opt_list && opt_pid)
993 printf("voffset\t"); 1122 printf("voffset\t");
1123 if (opt_list && opt_file)
1124 printf("foffset\t");
994 if (opt_list == 1) 1125 if (opt_list == 1)
995 printf("offset\tlen\tflags\n"); 1126 printf("offset\tlen\tflags\n");
996 if (opt_list == 2) 1127 if (opt_list == 2)
997 printf("offset\tflags\n"); 1128 printf("offset\tflags\n");
998 1129
999 walk_addr_ranges(); 1130 if (opt_file)
1131 walk_page_cache();
1132 else
1133 walk_addr_ranges();
1000 1134
1001 if (opt_list == 1) 1135 if (opt_list == 1)
1002 show_page_range(0, 0, 0); /* drain the buffer */ 1136 flush_page_range();
1003 1137
1004 if (opt_no_summary) 1138 if (opt_no_summary)
1005 return 0; 1139 return 0;