aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-mux-pinctrl.txt93
-rw-r--r--Documentation/kernel-parameters.txt9
-rw-r--r--Documentation/vm/frontswap.txt278
-rw-r--r--MAINTAINERS50
-rw-r--r--Makefile2
-rw-r--r--arch/arm/Kconfig1
-rw-r--r--arch/arm/mach-shmobile/Kconfig6
-rw-r--r--arch/arm/mach-ux500/board-mop500.c14
-rw-r--r--arch/arm/mm/dma-mapping.c10
-rw-r--r--arch/avr32/kernel/signal.c2
-rw-r--r--arch/blackfin/kernel/process.c2
-rw-r--r--arch/parisc/Makefile3
-rw-r--r--arch/parisc/include/asm/Kbuild1
-rw-r--r--arch/parisc/include/asm/bug.h2
-rw-r--r--arch/powerpc/kernel/module_32.c11
-rw-r--r--arch/powerpc/kernel/time.c14
-rw-r--r--arch/tile/include/asm/thread_info.h5
-rw-r--r--arch/tile/kernel/entry.S14
-rw-r--r--arch/tile/kernel/setup.c1
-rw-r--r--arch/x86/boot/header.S42
-rw-r--r--arch/x86/boot/tools/build.c172
-rw-r--r--arch/x86/include/asm/nmi.h14
-rw-r--r--arch/x86/include/asm/uaccess.h12
-rw-r--r--arch/x86/include/asm/uv/uv_bau.h1
-rw-r--r--arch/x86/kernel/aperture_64.c6
-rw-r--r--arch/x86/kernel/apic/io_apic.c4
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c4
-rw-r--r--arch/x86/kernel/cpu/perf_event.c11
-rw-r--r--arch/x86/kernel/cpu/perf_event.h2
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c145
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c9
-rw-r--r--arch/x86/kernel/nmi_selftest.c4
-rw-r--r--arch/x86/kernel/reboot.c6
-rw-r--r--arch/x86/kernel/smpboot.c19
-rw-r--r--arch/x86/lib/usercopy.c4
-rw-r--r--arch/x86/lib/x86-opcode-map.txt8
-rw-r--r--arch/x86/mm/init.c3
-rw-r--r--arch/x86/mm/srat.c2
-rw-r--r--arch/x86/platform/mrst/mrst.c2
-rw-r--r--arch/x86/platform/uv/tlb_uv.c1
-rw-r--r--arch/x86/tools/gen-insn-attr-x86.awk14
-rw-r--r--arch/xtensa/include/asm/syscall.h4
-rw-r--r--arch/xtensa/kernel/signal.c2
-rw-r--r--drivers/acpi/Kconfig2
-rw-r--r--drivers/acpi/battery.c10
-rw-r--r--drivers/acpi/bus.c88
-rw-r--r--drivers/acpi/power.c2
-rw-r--r--drivers/acpi/processor_perflib.c30
-rw-r--r--drivers/acpi/scan.c1
-rw-r--r--drivers/acpi/sleep.c49
-rw-r--r--drivers/acpi/video.c33
-rw-r--r--drivers/char/agp/intel-agp.c1
-rw-r--r--drivers/char/agp/intel-agp.h1
-rw-r--r--drivers/clocksource/Makefile1
-rw-r--r--drivers/clocksource/em_sti.c406
-rw-r--r--drivers/gpio/gpio-samsung.c2
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_drv.c4
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_encoder.c7
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_fb.c19
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_fb.h4
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_gem.c9
-rw-r--r--drivers/gpu/drm/exynos/exynos_mixer.c12
-rw-r--r--drivers/gpu/drm/i915/i915_drv.c13
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h3
-rw-r--r--drivers/gpu/drm/i915/i915_irq.c38
-rw-r--r--drivers/gpu/drm/i915/i915_reg.h43
-rw-r--r--drivers/gpu/drm/i915/intel_display.c19
-rw-r--r--drivers/gpu/drm/i915/intel_ringbuffer.c21
-rw-r--r--drivers/gpu/drm/radeon/ni.c21
-rw-r--r--drivers/gpu/drm/radeon/r600.c15
-rw-r--r--drivers/gpu/drm/radeon/r600_audio.c5
-rw-r--r--drivers/gpu/drm/radeon/r600_hdmi.c1
-rw-r--r--drivers/gpu/drm/radeon/radeon.h5
-rw-r--r--drivers/gpu/drm/radeon/radeon_gart.c19
-rw-r--r--drivers/gpu/drm/radeon/radeon_kms.c2
-rw-r--r--drivers/gpu/drm/radeon/rs600.c12
-rw-r--r--drivers/gpu/drm/radeon/rs690.c12
-rw-r--r--drivers/gpu/drm/radeon/rv770.c18
-rw-r--r--drivers/gpu/drm/radeon/si.c477
-rw-r--r--drivers/gpu/drm/radeon/sid.h19
-rw-r--r--drivers/i2c/muxes/Kconfig12
-rw-r--r--drivers/i2c/muxes/Makefile1
-rw-r--r--drivers/i2c/muxes/i2c-mux-pinctrl.c279
-rw-r--r--drivers/infiniband/hw/cxgb4/cm.c4
-rw-r--r--drivers/infiniband/hw/mlx4/main.c21
-rw-r--r--drivers/infiniband/hw/mlx4/mlx4_ib.h8
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c21
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma.h1
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_abi.h5
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_hw.c9
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_main.c1
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.c5
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.h1
-rw-r--r--drivers/iommu/amd_iommu.c71
-rw-r--r--drivers/iommu/amd_iommu_init.c13
-rw-r--r--drivers/iommu/amd_iommu_types.h3
-rw-r--r--drivers/md/raid1.c4
-rw-r--r--drivers/md/raid10.c4
-rw-r--r--drivers/mtd/ubi/debug.c12
-rw-r--r--drivers/mtd/ubi/wl.c17
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/port.c4
-rw-r--r--drivers/platform/x86/acerhdf.c2
-rw-r--r--drivers/rtc/rtc-cmos.c9
-rw-r--r--drivers/staging/ramster/zcache-main.c8
-rw-r--r--drivers/staging/zcache/zcache-main.c10
-rw-r--r--drivers/target/sbp/sbp_target.c8
-rw-r--r--drivers/target/target_core_file.c70
-rw-r--r--drivers/target/target_core_file.h1
-rw-r--r--drivers/xen/tmem.c8
-rw-r--r--fs/cifs/cifsglob.h7
-rw-r--r--fs/cifs/cifsproto.h1
-rw-r--r--fs/cifs/cifssmb.c8
-rw-r--r--fs/cifs/connect.c8
-rw-r--r--fs/cifs/file.c106
-rw-r--r--fs/cifs/misc.c89
-rw-r--r--fs/cifs/smb1ops.c89
-rw-r--r--fs/cifs/transport.c2
-rw-r--r--fs/dcache.c16
-rw-r--r--fs/ext4/balloc.c8
-rw-r--r--fs/ext4/ioctl.c1
-rw-r--r--fs/fuse/control.c10
-rw-r--r--fs/fuse/dir.c11
-rw-r--r--fs/fuse/file.c40
-rw-r--r--fs/fuse/fuse_i.h6
-rw-r--r--fs/fuse/inode.c17
-rw-r--r--fs/proc/base.c17
-rw-r--r--fs/ubifs/debug.c12
-rw-r--r--include/acpi/acpi_bus.h4
-rw-r--r--include/drm/drm_pciids.h17
-rw-r--r--include/drm/exynos_drm.h4
-rw-r--r--include/linux/clockchips.h1
-rw-r--r--include/linux/compaction.h19
-rw-r--r--include/linux/frontswap.h127
-rw-r--r--include/linux/fs.h6
-rw-r--r--include/linux/fuse.h14
-rw-r--r--include/linux/i2c-mux-pinctrl.h41
-rw-r--r--include/linux/init_task.h2
-rw-r--r--include/linux/mfd/abx500/ab8500-codec.h52
-rw-r--r--include/linux/mfd/abx500/ab8500.h2
-rw-r--r--include/linux/moduleparam.h10
-rw-r--r--include/linux/perf_event.h4
-rw-r--r--include/linux/prctl.h10
-rw-r--r--include/linux/radix-tree.h5
-rw-r--r--include/linux/sched.h15
-rw-r--r--include/linux/swap.h4
-rw-r--r--include/linux/swapfile.h13
-rw-r--r--init/main.c9
-rw-r--r--ipc/shm.c12
-rw-r--r--kernel/cgroup.c17
-rw-r--r--kernel/events/core.c1
-rw-r--r--kernel/irq/chip.c8
-rw-r--r--kernel/irq/internals.h3
-rw-r--r--kernel/irq/manage.c39
-rw-r--r--kernel/irq/migration.c13
-rw-r--r--kernel/sched/core.c249
-rw-r--r--kernel/sched/fair.c71
-rw-r--r--kernel/sched/rt.c53
-rw-r--r--kernel/sched/sched.h2
-rw-r--r--kernel/smpboot.c17
-rw-r--r--kernel/sys.c60
-rw-r--r--kernel/time/clockevents.c3
-rw-r--r--kernel/time/tick-sched.c19
-rw-r--r--kernel/time/timekeeping.c2
-rw-r--r--lib/btree.c5
-rw-r--r--lib/radix-tree.c3
-rw-r--r--lib/raid6/recov.c7
-rw-r--r--lib/raid6/recov_ssse3.c7
-rw-r--r--mm/Kconfig17
-rw-r--r--mm/Makefile1
-rw-r--r--mm/compaction.c142
-rw-r--r--mm/frontswap.c314
-rw-r--r--mm/internal.h9
-rw-r--r--mm/migrate.c5
-rw-r--r--mm/nommu.c2
-rw-r--r--mm/oom_kill.c4
-rw-r--r--mm/page_alloc.c8
-rw-r--r--mm/page_io.c12
-rw-r--r--mm/shmem.c57
-rw-r--r--mm/swapfile.c54
-rw-r--r--tools/perf/MANIFEST2
-rw-r--r--tools/perf/builtin-report.c4
-rw-r--r--tools/perf/builtin-stat.c8
-rw-r--r--tools/perf/builtin-top.c2
-rw-r--r--tools/perf/design.txt7
-rw-r--r--tools/perf/ui/browsers/annotate.c2
-rwxr-xr-xtools/perf/util/PERF-VERSION-GEN2
-rw-r--r--tools/perf/util/callchain.c2
-rw-r--r--tools/perf/util/callchain.h2
-rw-r--r--tools/perf/util/evlist.c17
-rw-r--r--tools/perf/util/evlist.h4
-rw-r--r--tools/perf/util/evsel.c29
-rw-r--r--tools/perf/util/hist.c7
-rw-r--r--tools/perf/util/hist.h2
-rw-r--r--tools/perf/util/pager.c4
-rw-r--r--tools/perf/util/probe-event.c8
-rw-r--r--tools/perf/util/session.c97
-rw-r--r--tools/perf/util/symbol.c38
-rw-r--r--tools/perf/util/symbol.h30
-rw-r--r--tools/power/x86/turbostat/turbostat.c30
-rw-r--r--virt/kvm/irq_comm.c1
200 files changed, 3902 insertions, 1464 deletions
diff --git a/Documentation/devicetree/bindings/i2c/i2c-mux-pinctrl.txt b/Documentation/devicetree/bindings/i2c/i2c-mux-pinctrl.txt
new file mode 100644
index 000000000000..ae8af1694e95
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/i2c-mux-pinctrl.txt
@@ -0,0 +1,93 @@
1Pinctrl-based I2C Bus Mux
2
3This binding describes an I2C bus multiplexer that uses pin multiplexing to
4route the I2C signals, and represents the pin multiplexing configuration
5using the pinctrl device tree bindings.
6
7 +-----+ +-----+
8 | dev | | dev |
9 +------------------------+ +-----+ +-----+
10 | SoC | | |
11 | /----|------+--------+
12 | +---+ +------+ | child bus A, on first set of pins
13 | |I2C|---|Pinmux| |
14 | +---+ +------+ | child bus B, on second set of pins
15 | \----|------+--------+--------+
16 | | | | |
17 +------------------------+ +-----+ +-----+ +-----+
18 | dev | | dev | | dev |
19 +-----+ +-----+ +-----+
20
21Required properties:
22- compatible: i2c-mux-pinctrl
23- i2c-parent: The phandle of the I2C bus that this multiplexer's master-side
24 port is connected to.
25
26Also required are:
27
28* Standard pinctrl properties that specify the pin mux state for each child
29 bus. See ../pinctrl/pinctrl-bindings.txt.
30
31* Standard I2C mux properties. See mux.txt in this directory.
32
33* I2C child bus nodes. See mux.txt in this directory.
34
35For each named state defined in the pinctrl-names property, an I2C child bus
36will be created. I2C child bus numbers are assigned based on the index into
37the pinctrl-names property.
38
39The only exception is that no bus will be created for a state named "idle". If
40such a state is defined, it must be the last entry in pinctrl-names. For
41example:
42
43 pinctrl-names = "ddc", "pta", "idle" -> ddc = bus 0, pta = bus 1
44 pinctrl-names = "ddc", "idle", "pta" -> Invalid ("idle" not last)
45 pinctrl-names = "idle", "ddc", "pta" -> Invalid ("idle" not last)
46
47Whenever an access is made to a device on a child bus, the relevant pinctrl
48state will be programmed into hardware.
49
50If an idle state is defined, whenever an access is not being made to a device
51on a child bus, the idle pinctrl state will be programmed into hardware.
52
53If an idle state is not defined, the most recently used pinctrl state will be
54left programmed into hardware whenever no access is being made of a device on
55a child bus.
56
57Example:
58
59 i2cmux {
60 compatible = "i2c-mux-pinctrl";
61 #address-cells = <1>;
62 #size-cells = <0>;
63
64 i2c-parent = <&i2c1>;
65
66 pinctrl-names = "ddc", "pta", "idle";
67 pinctrl-0 = <&state_i2cmux_ddc>;
68 pinctrl-1 = <&state_i2cmux_pta>;
69 pinctrl-2 = <&state_i2cmux_idle>;
70
71 i2c@0 {
72 reg = <0>;
73 #address-cells = <1>;
74 #size-cells = <0>;
75
76 eeprom {
77 compatible = "eeprom";
78 reg = <0x50>;
79 };
80 };
81
82 i2c@1 {
83 reg = <1>;
84 #address-cells = <1>;
85 #size-cells = <0>;
86
87 eeprom {
88 compatible = "eeprom";
89 reg = <0x50>;
90 };
91 };
92 };
93
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index c45513d806ab..a92c5ebf373e 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2543,6 +2543,15 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
2543 2543
2544 sched_debug [KNL] Enables verbose scheduler debug messages. 2544 sched_debug [KNL] Enables verbose scheduler debug messages.
2545 2545
2546 skew_tick= [KNL] Offset the periodic timer tick per cpu to mitigate
2547 xtime_lock contention on larger systems, and/or RCU lock
2548 contention on all systems with CONFIG_MAXSMP set.
2549 Format: { "0" | "1" }
2550 0 -- disable. (may be 1 via CONFIG_CMDLINE="skew_tick=1"
2551 1 -- enable.
2552 Note: increases power consumption, thus should only be
2553 enabled if running jitter sensitive (HPC/RT) workloads.
2554
2546 security= [SECURITY] Choose a security module to enable at boot. 2555 security= [SECURITY] Choose a security module to enable at boot.
2547 If this boot parameter is not specified, only the first 2556 If this boot parameter is not specified, only the first
2548 security module asking for security registration will be 2557 security module asking for security registration will be
diff --git a/Documentation/vm/frontswap.txt b/Documentation/vm/frontswap.txt
new file mode 100644
index 000000000000..37067cf455f4
--- /dev/null
+++ b/Documentation/vm/frontswap.txt
@@ -0,0 +1,278 @@
1Frontswap provides a "transcendent memory" interface for swap pages.
2In some environments, dramatic performance savings may be obtained because
3swapped pages are saved in RAM (or a RAM-like device) instead of a swap disk.
4
5(Note, frontswap -- and cleancache (merged at 3.0) -- are the "frontends"
6and the only necessary changes to the core kernel for transcendent memory;
7all other supporting code -- the "backends" -- is implemented as drivers.
8See the LWN.net article "Transcendent memory in a nutshell" for a detailed
9overview of frontswap and related kernel parts:
10https://lwn.net/Articles/454795/ )
11
12Frontswap is so named because it can be thought of as the opposite of
13a "backing" store for a swap device. The storage is assumed to be
14a synchronous concurrency-safe page-oriented "pseudo-RAM device" conforming
15to the requirements of transcendent memory (such as Xen's "tmem", or
16in-kernel compressed memory, aka "zcache", or future RAM-like devices);
17this pseudo-RAM device is not directly accessible or addressable by the
18kernel and is of unknown and possibly time-varying size. The driver
19links itself to frontswap by calling frontswap_register_ops to set the
20frontswap_ops funcs appropriately and the functions it provides must
21conform to certain policies as follows:
22
23An "init" prepares the device to receive frontswap pages associated
24with the specified swap device number (aka "type"). A "store" will
25copy the page to transcendent memory and associate it with the type and
26offset associated with the page. A "load" will copy the page, if found,
27from transcendent memory into kernel memory, but will NOT remove the page
28from from transcendent memory. An "invalidate_page" will remove the page
29from transcendent memory and an "invalidate_area" will remove ALL pages
30associated with the swap type (e.g., like swapoff) and notify the "device"
31to refuse further stores with that swap type.
32
33Once a page is successfully stored, a matching load on the page will normally
34succeed. So when the kernel finds itself in a situation where it needs
35to swap out a page, it first attempts to use frontswap. If the store returns
36success, the data has been successfully saved to transcendent memory and
37a disk write and, if the data is later read back, a disk read are avoided.
38If a store returns failure, transcendent memory has rejected the data, and the
39page can be written to swap as usual.
40
41If a backend chooses, frontswap can be configured as a "writethrough
42cache" by calling frontswap_writethrough(). In this mode, the reduction
43in swap device writes is lost (and also a non-trivial performance advantage)
44in order to allow the backend to arbitrarily "reclaim" space used to
45store frontswap pages to more completely manage its memory usage.
46
47Note that if a page is stored and the page already exists in transcendent memory
48(a "duplicate" store), either the store succeeds and the data is overwritten,
49or the store fails AND the page is invalidated. This ensures stale data may
50never be obtained from frontswap.
51
52If properly configured, monitoring of frontswap is done via debugfs in
53the /sys/kernel/debug/frontswap directory. The effectiveness of
54frontswap can be measured (across all swap devices) with:
55
56failed_stores - how many store attempts have failed
57loads - how many loads were attempted (all should succeed)
58succ_stores - how many store attempts have succeeded
59invalidates - how many invalidates were attempted
60
61A backend implementation may provide additional metrics.
62
63FAQ
64
651) Where's the value?
66
67When a workload starts swapping, performance falls through the floor.
68Frontswap significantly increases performance in many such workloads by
69providing a clean, dynamic interface to read and write swap pages to
70"transcendent memory" that is otherwise not directly addressable to the kernel.
71This interface is ideal when data is transformed to a different form
72and size (such as with compression) or secretly moved (as might be
73useful for write-balancing for some RAM-like devices). Swap pages (and
74evicted page-cache pages) are a great use for this kind of slower-than-RAM-
75but-much-faster-than-disk "pseudo-RAM device" and the frontswap (and
76cleancache) interface to transcendent memory provides a nice way to read
77and write -- and indirectly "name" -- the pages.
78
79Frontswap -- and cleancache -- with a fairly small impact on the kernel,
80provides a huge amount of flexibility for more dynamic, flexible RAM
81utilization in various system configurations:
82
83In the single kernel case, aka "zcache", pages are compressed and
84stored in local memory, thus increasing the total anonymous pages
85that can be safely kept in RAM. Zcache essentially trades off CPU
86cycles used in compression/decompression for better memory utilization.
87Benchmarks have shown little or no impact when memory pressure is
88low while providing a significant performance improvement (25%+)
89on some workloads under high memory pressure.
90
91"RAMster" builds on zcache by adding "peer-to-peer" transcendent memory
92support for clustered systems. Frontswap pages are locally compressed
93as in zcache, but then "remotified" to another system's RAM. This
94allows RAM to be dynamically load-balanced back-and-forth as needed,
95i.e. when system A is overcommitted, it can swap to system B, and
96vice versa. RAMster can also be configured as a memory server so
97many servers in a cluster can swap, dynamically as needed, to a single
98server configured with a large amount of RAM... without pre-configuring
99how much of the RAM is available for each of the clients!
100
101In the virtual case, the whole point of virtualization is to statistically
102multiplex physical resources acrosst the varying demands of multiple
103virtual machines. This is really hard to do with RAM and efforts to do
104it well with no kernel changes have essentially failed (except in some
105well-publicized special-case workloads).
106Specifically, the Xen Transcendent Memory backend allows otherwise
107"fallow" hypervisor-owned RAM to not only be "time-shared" between multiple
108virtual machines, but the pages can be compressed and deduplicated to
109optimize RAM utilization. And when guest OS's are induced to surrender
110underutilized RAM (e.g. with "selfballooning"), sudden unexpected
111memory pressure may result in swapping; frontswap allows those pages
112to be swapped to and from hypervisor RAM (if overall host system memory
113conditions allow), thus mitigating the potentially awful performance impact
114of unplanned swapping.
115
116A KVM implementation is underway and has been RFC'ed to lkml. And,
117using frontswap, investigation is also underway on the use of NVM as
118a memory extension technology.
119
1202) Sure there may be performance advantages in some situations, but
121 what's the space/time overhead of frontswap?
122
123If CONFIG_FRONTSWAP is disabled, every frontswap hook compiles into
124nothingness and the only overhead is a few extra bytes per swapon'ed
125swap device. If CONFIG_FRONTSWAP is enabled but no frontswap "backend"
126registers, there is one extra global variable compared to zero for
127every swap page read or written. If CONFIG_FRONTSWAP is enabled
128AND a frontswap backend registers AND the backend fails every "store"
129request (i.e. provides no memory despite claiming it might),
130CPU overhead is still negligible -- and since every frontswap fail
131precedes a swap page write-to-disk, the system is highly likely
132to be I/O bound and using a small fraction of a percent of a CPU
133will be irrelevant anyway.
134
135As for space, if CONFIG_FRONTSWAP is enabled AND a frontswap backend
136registers, one bit is allocated for every swap page for every swap
137device that is swapon'd. This is added to the EIGHT bits (which
138was sixteen until about 2.6.34) that the kernel already allocates
139for every swap page for every swap device that is swapon'd. (Hugh
140Dickins has observed that frontswap could probably steal one of
141the existing eight bits, but let's worry about that minor optimization
142later.) For very large swap disks (which are rare) on a standard
1434K pagesize, this is 1MB per 32GB swap.
144
145When swap pages are stored in transcendent memory instead of written
146out to disk, there is a side effect that this may create more memory
147pressure that can potentially outweigh the other advantages. A
148backend, such as zcache, must implement policies to carefully (but
149dynamically) manage memory limits to ensure this doesn't happen.
150
1513) OK, how about a quick overview of what this frontswap patch does
152 in terms that a kernel hacker can grok?
153
154Let's assume that a frontswap "backend" has registered during
155kernel initialization; this registration indicates that this
156frontswap backend has access to some "memory" that is not directly
157accessible by the kernel. Exactly how much memory it provides is
158entirely dynamic and random.
159
160Whenever a swap-device is swapon'd frontswap_init() is called,
161passing the swap device number (aka "type") as a parameter.
162This notifies frontswap to expect attempts to "store" swap pages
163associated with that number.
164
165Whenever the swap subsystem is readying a page to write to a swap
166device (c.f swap_writepage()), frontswap_store is called. Frontswap
167consults with the frontswap backend and if the backend says it does NOT
168have room, frontswap_store returns -1 and the kernel swaps the page
169to the swap device as normal. Note that the response from the frontswap
170backend is unpredictable to the kernel; it may choose to never accept a
171page, it could accept every ninth page, or it might accept every
172page. But if the backend does accept a page, the data from the page
173has already been copied and associated with the type and offset,
174and the backend guarantees the persistence of the data. In this case,
175frontswap sets a bit in the "frontswap_map" for the swap device
176corresponding to the page offset on the swap device to which it would
177otherwise have written the data.
178
179When the swap subsystem needs to swap-in a page (swap_readpage()),
180it first calls frontswap_load() which checks the frontswap_map to
181see if the page was earlier accepted by the frontswap backend. If
182it was, the page of data is filled from the frontswap backend and
183the swap-in is complete. If not, the normal swap-in code is
184executed to obtain the page of data from the real swap device.
185
186So every time the frontswap backend accepts a page, a swap device read
187and (potentially) a swap device write are replaced by a "frontswap backend
188store" and (possibly) a "frontswap backend loads", which are presumably much
189faster.
190
1914) Can't frontswap be configured as a "special" swap device that is
192 just higher priority than any real swap device (e.g. like zswap,
193 or maybe swap-over-nbd/NFS)?
194
195No. First, the existing swap subsystem doesn't allow for any kind of
196swap hierarchy. Perhaps it could be rewritten to accomodate a hierarchy,
197but this would require fairly drastic changes. Even if it were
198rewritten, the existing swap subsystem uses the block I/O layer which
199assumes a swap device is fixed size and any page in it is linearly
200addressable. Frontswap barely touches the existing swap subsystem,
201and works around the constraints of the block I/O subsystem to provide
202a great deal of flexibility and dynamicity.
203
204For example, the acceptance of any swap page by the frontswap backend is
205entirely unpredictable. This is critical to the definition of frontswap
206backends because it grants completely dynamic discretion to the
207backend. In zcache, one cannot know a priori how compressible a page is.
208"Poorly" compressible pages can be rejected, and "poorly" can itself be
209defined dynamically depending on current memory constraints.
210
211Further, frontswap is entirely synchronous whereas a real swap
212device is, by definition, asynchronous and uses block I/O. The
213block I/O layer is not only unnecessary, but may perform "optimizations"
214that are inappropriate for a RAM-oriented device including delaying
215the write of some pages for a significant amount of time. Synchrony is
216required to ensure the dynamicity of the backend and to avoid thorny race
217conditions that would unnecessarily and greatly complicate frontswap
218and/or the block I/O subsystem. That said, only the initial "store"
219and "load" operations need be synchronous. A separate asynchronous thread
220is free to manipulate the pages stored by frontswap. For example,
221the "remotification" thread in RAMster uses standard asynchronous
222kernel sockets to move compressed frontswap pages to a remote machine.
223Similarly, a KVM guest-side implementation could do in-guest compression
224and use "batched" hypercalls.
225
226In a virtualized environment, the dynamicity allows the hypervisor
227(or host OS) to do "intelligent overcommit". For example, it can
228choose to accept pages only until host-swapping might be imminent,
229then force guests to do their own swapping.
230
231There is a downside to the transcendent memory specifications for
232frontswap: Since any "store" might fail, there must always be a real
233slot on a real swap device to swap the page. Thus frontswap must be
234implemented as a "shadow" to every swapon'd device with the potential
235capability of holding every page that the swap device might have held
236and the possibility that it might hold no pages at all. This means
237that frontswap cannot contain more pages than the total of swapon'd
238swap devices. For example, if NO swap device is configured on some
239installation, frontswap is useless. Swapless portable devices
240can still use frontswap but a backend for such devices must configure
241some kind of "ghost" swap device and ensure that it is never used.
242
2435) Why this weird definition about "duplicate stores"? If a page
244 has been previously successfully stored, can't it always be
245 successfully overwritten?
246
247Nearly always it can, but no, sometimes it cannot. Consider an example
248where data is compressed and the original 4K page has been compressed
249to 1K. Now an attempt is made to overwrite the page with data that
250is non-compressible and so would take the entire 4K. But the backend
251has no more space. In this case, the store must be rejected. Whenever
252frontswap rejects a store that would overwrite, it also must invalidate
253the old data and ensure that it is no longer accessible. Since the
254swap subsystem then writes the new data to the read swap device,
255this is the correct course of action to ensure coherency.
256
2576) What is frontswap_shrink for?
258
259When the (non-frontswap) swap subsystem swaps out a page to a real
260swap device, that page is only taking up low-value pre-allocated disk
261space. But if frontswap has placed a page in transcendent memory, that
262page may be taking up valuable real estate. The frontswap_shrink
263routine allows code outside of the swap subsystem to force pages out
264of the memory managed by frontswap and back into kernel-addressable memory.
265For example, in RAMster, a "suction driver" thread will attempt
266to "repatriate" pages sent to a remote machine back to the local machine;
267this is driven using the frontswap_shrink mechanism when memory pressure
268subsides.
269
2707) Why does the frontswap patch create the new include file swapfile.h?
271
272The frontswap code depends on some swap-subsystem-internal data
273structures that have, over the years, moved back and forth between
274static and global. This seemed a reasonable compromise: Define
275them as global but declare them in a new include file that isn't
276included by the large number of source files that include swap.h.
277
278Dan Magenheimer, last updated April 9, 2012
diff --git a/MAINTAINERS b/MAINTAINERS
index d0526640f717..62b94662506f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1077,7 +1077,7 @@ F: drivers/media/video/s5p-fimc/
1077ARM/SAMSUNG S5P SERIES Multi Format Codec (MFC) SUPPORT 1077ARM/SAMSUNG S5P SERIES Multi Format Codec (MFC) SUPPORT
1078M: Kyungmin Park <kyungmin.park@samsung.com> 1078M: Kyungmin Park <kyungmin.park@samsung.com>
1079M: Kamil Debski <k.debski@samsung.com> 1079M: Kamil Debski <k.debski@samsung.com>
1080M: Jeongtae Park <jtp.park@samsung.com> 1080M: Jeongtae Park <jtp.park@samsung.com>
1081L: linux-arm-kernel@lists.infradead.org 1081L: linux-arm-kernel@lists.infradead.org
1082L: linux-media@vger.kernel.org 1082L: linux-media@vger.kernel.org
1083S: Maintained 1083S: Maintained
@@ -1743,10 +1743,10 @@ F: include/linux/can/platform/
1743CAPABILITIES 1743CAPABILITIES
1744M: Serge Hallyn <serge.hallyn@canonical.com> 1744M: Serge Hallyn <serge.hallyn@canonical.com>
1745L: linux-security-module@vger.kernel.org 1745L: linux-security-module@vger.kernel.org
1746S: Supported 1746S: Supported
1747F: include/linux/capability.h 1747F: include/linux/capability.h
1748F: security/capability.c 1748F: security/capability.c
1749F: security/commoncap.c 1749F: security/commoncap.c
1750F: kernel/capability.c 1750F: kernel/capability.c
1751 1751
1752CELL BROADBAND ENGINE ARCHITECTURE 1752CELL BROADBAND ENGINE ARCHITECTURE
@@ -2146,11 +2146,11 @@ S: Orphan
2146F: drivers/net/wan/pc300* 2146F: drivers/net/wan/pc300*
2147 2147
2148CYTTSP TOUCHSCREEN DRIVER 2148CYTTSP TOUCHSCREEN DRIVER
2149M: Javier Martinez Canillas <javier@dowhile0.org> 2149M: Javier Martinez Canillas <javier@dowhile0.org>
2150L: linux-input@vger.kernel.org 2150L: linux-input@vger.kernel.org
2151S: Maintained 2151S: Maintained
2152F: drivers/input/touchscreen/cyttsp* 2152F: drivers/input/touchscreen/cyttsp*
2153F: include/linux/input/cyttsp.h 2153F: include/linux/input/cyttsp.h
2154 2154
2155DAMA SLAVE for AX.25 2155DAMA SLAVE for AX.25
2156M: Joerg Reuter <jreuter@yaina.de> 2156M: Joerg Reuter <jreuter@yaina.de>
@@ -2270,7 +2270,7 @@ F: include/linux/device-mapper.h
2270F: include/linux/dm-*.h 2270F: include/linux/dm-*.h
2271 2271
2272DIOLAN U2C-12 I2C DRIVER 2272DIOLAN U2C-12 I2C DRIVER
2273M: Guenter Roeck <guenter.roeck@ericsson.com> 2273M: Guenter Roeck <linux@roeck-us.net>
2274L: linux-i2c@vger.kernel.org 2274L: linux-i2c@vger.kernel.org
2275S: Maintained 2275S: Maintained
2276F: drivers/i2c/busses/i2c-diolan-u2c.c 2276F: drivers/i2c/busses/i2c-diolan-u2c.c
@@ -2930,6 +2930,13 @@ F: Documentation/power/freezing-of-tasks.txt
2930F: include/linux/freezer.h 2930F: include/linux/freezer.h
2931F: kernel/freezer.c 2931F: kernel/freezer.c
2932 2932
2933FRONTSWAP API
2934M: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
2935L: linux-kernel@vger.kernel.org
2936S: Maintained
2937F: mm/frontswap.c
2938F: include/linux/frontswap.h
2939
2933FS-CACHE: LOCAL CACHING FOR NETWORK FILESYSTEMS 2940FS-CACHE: LOCAL CACHING FOR NETWORK FILESYSTEMS
2934M: David Howells <dhowells@redhat.com> 2941M: David Howells <dhowells@redhat.com>
2935L: linux-cachefs@redhat.com 2942L: linux-cachefs@redhat.com
@@ -3138,7 +3145,7 @@ F: drivers/tty/hvc/
3138 3145
3139HARDWARE MONITORING 3146HARDWARE MONITORING
3140M: Jean Delvare <khali@linux-fr.org> 3147M: Jean Delvare <khali@linux-fr.org>
3141M: Guenter Roeck <guenter.roeck@ericsson.com> 3148M: Guenter Roeck <linux@roeck-us.net>
3142L: lm-sensors@lm-sensors.org 3149L: lm-sensors@lm-sensors.org
3143W: http://www.lm-sensors.org/ 3150W: http://www.lm-sensors.org/
3144T: quilt kernel.org/pub/linux/kernel/people/jdelvare/linux-2.6/jdelvare-hwmon/ 3151T: quilt kernel.org/pub/linux/kernel/people/jdelvare/linux-2.6/jdelvare-hwmon/
@@ -4096,6 +4103,8 @@ F: drivers/scsi/53c700*
4096LED SUBSYSTEM 4103LED SUBSYSTEM
4097M: Bryan Wu <bryan.wu@canonical.com> 4104M: Bryan Wu <bryan.wu@canonical.com>
4098M: Richard Purdie <rpurdie@rpsys.net> 4105M: Richard Purdie <rpurdie@rpsys.net>
4106L: linux-leds@vger.kernel.org
4107T: git git://git.kernel.org/pub/scm/linux/kernel/git/cooloney/linux-leds.git
4099S: Maintained 4108S: Maintained
4100F: drivers/leds/ 4109F: drivers/leds/
4101F: include/linux/leds.h 4110F: include/linux/leds.h
@@ -4411,6 +4420,13 @@ S: Orphan
4411F: drivers/video/matrox/matroxfb_* 4420F: drivers/video/matrox/matroxfb_*
4412F: include/linux/matroxfb.h 4421F: include/linux/matroxfb.h
4413 4422
4423MAX16065 HARDWARE MONITOR DRIVER
4424M: Guenter Roeck <linux@roeck-us.net>
4425L: lm-sensors@lm-sensors.org
4426S: Maintained
4427F: Documentation/hwmon/max16065
4428F: drivers/hwmon/max16065.c
4429
4414MAX6650 HARDWARE MONITOR AND FAN CONTROLLER DRIVER 4430MAX6650 HARDWARE MONITOR AND FAN CONTROLLER DRIVER
4415M: "Hans J. Koch" <hjk@hansjkoch.de> 4431M: "Hans J. Koch" <hjk@hansjkoch.de>
4416L: lm-sensors@lm-sensors.org 4432L: lm-sensors@lm-sensors.org
@@ -5149,7 +5165,7 @@ F: drivers/leds/leds-pca9532.c
5149F: include/linux/leds-pca9532.h 5165F: include/linux/leds-pca9532.h
5150 5166
5151PCA9541 I2C BUS MASTER SELECTOR DRIVER 5167PCA9541 I2C BUS MASTER SELECTOR DRIVER
5152M: Guenter Roeck <guenter.roeck@ericsson.com> 5168M: Guenter Roeck <linux@roeck-us.net>
5153L: linux-i2c@vger.kernel.org 5169L: linux-i2c@vger.kernel.org
5154S: Maintained 5170S: Maintained
5155F: drivers/i2c/muxes/i2c-mux-pca9541.c 5171F: drivers/i2c/muxes/i2c-mux-pca9541.c
@@ -5169,7 +5185,7 @@ S: Maintained
5169F: drivers/firmware/pcdp.* 5185F: drivers/firmware/pcdp.*
5170 5186
5171PCI ERROR RECOVERY 5187PCI ERROR RECOVERY
5172M: Linas Vepstas <linasvepstas@gmail.com> 5188M: Linas Vepstas <linasvepstas@gmail.com>
5173L: linux-pci@vger.kernel.org 5189L: linux-pci@vger.kernel.org
5174S: Supported 5190S: Supported
5175F: Documentation/PCI/pci-error-recovery.txt 5191F: Documentation/PCI/pci-error-recovery.txt
@@ -5299,7 +5315,7 @@ F: drivers/video/fb-puv3.c
5299F: drivers/rtc/rtc-puv3.c 5315F: drivers/rtc/rtc-puv3.c
5300 5316
5301PMBUS HARDWARE MONITORING DRIVERS 5317PMBUS HARDWARE MONITORING DRIVERS
5302M: Guenter Roeck <guenter.roeck@ericsson.com> 5318M: Guenter Roeck <linux@roeck-us.net>
5303L: lm-sensors@lm-sensors.org 5319L: lm-sensors@lm-sensors.org
5304W: http://www.lm-sensors.org/ 5320W: http://www.lm-sensors.org/
5305W: http://www.roeck-us.net/linux/drivers/ 5321W: http://www.roeck-us.net/linux/drivers/
@@ -7293,11 +7309,11 @@ F: Documentation/DocBook/uio-howto.tmpl
7293F: drivers/uio/ 7309F: drivers/uio/
7294F: include/linux/uio*.h 7310F: include/linux/uio*.h
7295 7311
7296UTIL-LINUX-NG PACKAGE 7312UTIL-LINUX PACKAGE
7297M: Karel Zak <kzak@redhat.com> 7313M: Karel Zak <kzak@redhat.com>
7298L: util-linux-ng@vger.kernel.org 7314L: util-linux@vger.kernel.org
7299W: http://kernel.org/~kzak/util-linux-ng/ 7315W: http://en.wikipedia.org/wiki/Util-linux
7300T: git git://git.kernel.org/pub/scm/utils/util-linux-ng/util-linux-ng.git 7316T: git git://git.kernel.org/pub/scm/utils/util-linux/util-linux.git
7301S: Maintained 7317S: Maintained
7302 7318
7303UVESAFB DRIVER 7319UVESAFB DRIVER
diff --git a/Makefile b/Makefile
index 0d718ede9ea5..d845c2a1aa68 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
1VERSION = 3 1VERSION = 3
2PATCHLEVEL = 5 2PATCHLEVEL = 5
3SUBLEVEL = 0 3SUBLEVEL = 0
4EXTRAVERSION = -rc1 4EXTRAVERSION = -rc2
5NAME = Saber-toothed Squirrel 5NAME = Saber-toothed Squirrel
6 6
7# *DOCUMENTATION* 7# *DOCUMENTATION*
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index b649c5904a4f..84449dd8f031 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -7,7 +7,6 @@ config ARM
7 select HAVE_IDE if PCI || ISA || PCMCIA 7 select HAVE_IDE if PCI || ISA || PCMCIA
8 select HAVE_DMA_ATTRS 8 select HAVE_DMA_ATTRS
9 select HAVE_DMA_CONTIGUOUS if (CPU_V6 || CPU_V6K || CPU_V7) 9 select HAVE_DMA_CONTIGUOUS if (CPU_V6 || CPU_V6K || CPU_V7)
10 select CMA if (CPU_V6 || CPU_V6K || CPU_V7)
11 select HAVE_MEMBLOCK 10 select HAVE_MEMBLOCK
12 select RTC_LIB 11 select RTC_LIB
13 select SYS_SUPPORTS_APM_EMULATION 12 select SYS_SUPPORTS_APM_EMULATION
diff --git a/arch/arm/mach-shmobile/Kconfig b/arch/arm/mach-shmobile/Kconfig
index f31383c32f9c..df33909205e2 100644
--- a/arch/arm/mach-shmobile/Kconfig
+++ b/arch/arm/mach-shmobile/Kconfig
@@ -186,6 +186,12 @@ config SH_TIMER_TMU
186 help 186 help
187 This enables build of the TMU timer driver. 187 This enables build of the TMU timer driver.
188 188
189config EM_TIMER_STI
190 bool "STI timer driver"
191 default y
192 help
193 This enables build of the STI timer driver.
194
189endmenu 195endmenu
190 196
191config SH_CLK_CPG 197config SH_CLK_CPG
diff --git a/arch/arm/mach-ux500/board-mop500.c b/arch/arm/mach-ux500/board-mop500.c
index 9c74ac545849..c8a8fde777bb 100644
--- a/arch/arm/mach-ux500/board-mop500.c
+++ b/arch/arm/mach-ux500/board-mop500.c
@@ -25,6 +25,7 @@
25#include <linux/mfd/tc3589x.h> 25#include <linux/mfd/tc3589x.h>
26#include <linux/mfd/tps6105x.h> 26#include <linux/mfd/tps6105x.h>
27#include <linux/mfd/abx500/ab8500-gpio.h> 27#include <linux/mfd/abx500/ab8500-gpio.h>
28#include <linux/mfd/abx500/ab8500-codec.h>
28#include <linux/leds-lp5521.h> 29#include <linux/leds-lp5521.h>
29#include <linux/input.h> 30#include <linux/input.h>
30#include <linux/smsc911x.h> 31#include <linux/smsc911x.h>
@@ -97,6 +98,18 @@ static struct ab8500_gpio_platform_data ab8500_gpio_pdata = {
97 0x7A, 0x00, 0x00}, 98 0x7A, 0x00, 0x00},
98}; 99};
99 100
101/* ab8500-codec */
102static struct ab8500_codec_platform_data ab8500_codec_pdata = {
103 .amics = {
104 .mic1_type = AMIC_TYPE_DIFFERENTIAL,
105 .mic2_type = AMIC_TYPE_DIFFERENTIAL,
106 .mic1a_micbias = AMIC_MICBIAS_VAMIC1,
107 .mic1b_micbias = AMIC_MICBIAS_VAMIC1,
108 .mic2_micbias = AMIC_MICBIAS_VAMIC2
109 },
110 .ear_cmv = EAR_CMV_0_95V
111};
112
100static struct gpio_keys_button snowball_key_array[] = { 113static struct gpio_keys_button snowball_key_array[] = {
101 { 114 {
102 .gpio = 32, 115 .gpio = 32,
@@ -195,6 +208,7 @@ static struct ab8500_platform_data ab8500_platdata = {
195 .regulator = ab8500_regulators, 208 .regulator = ab8500_regulators,
196 .num_regulator = ARRAY_SIZE(ab8500_regulators), 209 .num_regulator = ARRAY_SIZE(ab8500_regulators),
197 .gpio = &ab8500_gpio_pdata, 210 .gpio = &ab8500_gpio_pdata,
211 .codec = &ab8500_codec_pdata,
198}; 212};
199 213
200static struct resource ab8500_resources[] = { 214static struct resource ab8500_resources[] = {
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index ea6b43154090..106c4c0ebccd 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -268,10 +268,8 @@ static int __init consistent_init(void)
268 unsigned long base = consistent_base; 268 unsigned long base = consistent_base;
269 unsigned long num_ptes = (CONSISTENT_END - base) >> PMD_SHIFT; 269 unsigned long num_ptes = (CONSISTENT_END - base) >> PMD_SHIFT;
270 270
271#ifndef CONFIG_ARM_DMA_USE_IOMMU 271 if (IS_ENABLED(CONFIG_CMA) && !IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU))
272 if (cpu_architecture() >= CPU_ARCH_ARMv6)
273 return 0; 272 return 0;
274#endif
275 273
276 consistent_pte = kmalloc(num_ptes * sizeof(pte_t), GFP_KERNEL); 274 consistent_pte = kmalloc(num_ptes * sizeof(pte_t), GFP_KERNEL);
277 if (!consistent_pte) { 275 if (!consistent_pte) {
@@ -342,7 +340,7 @@ static int __init coherent_init(void)
342 struct page *page; 340 struct page *page;
343 void *ptr; 341 void *ptr;
344 342
345 if (cpu_architecture() < CPU_ARCH_ARMv6) 343 if (!IS_ENABLED(CONFIG_CMA))
346 return 0; 344 return 0;
347 345
348 ptr = __alloc_from_contiguous(NULL, size, prot, &page); 346 ptr = __alloc_from_contiguous(NULL, size, prot, &page);
@@ -704,7 +702,7 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
704 702
705 if (arch_is_coherent() || nommu()) 703 if (arch_is_coherent() || nommu())
706 addr = __alloc_simple_buffer(dev, size, gfp, &page); 704 addr = __alloc_simple_buffer(dev, size, gfp, &page);
707 else if (cpu_architecture() < CPU_ARCH_ARMv6) 705 else if (!IS_ENABLED(CONFIG_CMA))
708 addr = __alloc_remap_buffer(dev, size, gfp, prot, &page, caller); 706 addr = __alloc_remap_buffer(dev, size, gfp, prot, &page, caller);
709 else if (gfp & GFP_ATOMIC) 707 else if (gfp & GFP_ATOMIC)
710 addr = __alloc_from_pool(dev, size, &page, caller); 708 addr = __alloc_from_pool(dev, size, &page, caller);
@@ -773,7 +771,7 @@ void arm_dma_free(struct device *dev, size_t size, void *cpu_addr,
773 771
774 if (arch_is_coherent() || nommu()) { 772 if (arch_is_coherent() || nommu()) {
775 __dma_free_buffer(page, size); 773 __dma_free_buffer(page, size);
776 } else if (cpu_architecture() < CPU_ARCH_ARMv6) { 774 } else if (!IS_ENABLED(CONFIG_CMA)) {
777 __dma_free_remap(cpu_addr, size); 775 __dma_free_remap(cpu_addr, size);
778 __dma_free_buffer(page, size); 776 __dma_free_buffer(page, size);
779 } else { 777 } else {
diff --git a/arch/avr32/kernel/signal.c b/arch/avr32/kernel/signal.c
index c140f9b41dce..d552a854dacc 100644
--- a/arch/avr32/kernel/signal.c
+++ b/arch/avr32/kernel/signal.c
@@ -300,7 +300,7 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, struct thread_info *ti)
300 if ((sysreg_read(SR) & MODE_MASK) == MODE_SUPERVISOR) 300 if ((sysreg_read(SR) & MODE_MASK) == MODE_SUPERVISOR)
301 syscall = 1; 301 syscall = 1;
302 302
303 if (ti->flags & _TIF_SIGPENDING)) 303 if (ti->flags & _TIF_SIGPENDING)
304 do_signal(regs, syscall); 304 do_signal(regs, syscall);
305 305
306 if (ti->flags & _TIF_NOTIFY_RESUME) { 306 if (ti->flags & _TIF_NOTIFY_RESUME) {
diff --git a/arch/blackfin/kernel/process.c b/arch/blackfin/kernel/process.c
index 2e3994b20169..62bcea7dcc6d 100644
--- a/arch/blackfin/kernel/process.c
+++ b/arch/blackfin/kernel/process.c
@@ -173,7 +173,7 @@ asmlinkage int bfin_clone(struct pt_regs *regs)
173 unsigned long newsp; 173 unsigned long newsp;
174 174
175#ifdef __ARCH_SYNC_CORE_DCACHE 175#ifdef __ARCH_SYNC_CORE_DCACHE
176 if (current->rt.nr_cpus_allowed == num_possible_cpus()) 176 if (current->nr_cpus_allowed == num_possible_cpus())
177 set_cpus_allowed_ptr(current, cpumask_of(smp_processor_id())); 177 set_cpus_allowed_ptr(current, cpumask_of(smp_processor_id()));
178#endif 178#endif
179 179
diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile
index dbc3850b1d0d..5707f1a62341 100644
--- a/arch/parisc/Makefile
+++ b/arch/parisc/Makefile
@@ -21,6 +21,7 @@ KBUILD_DEFCONFIG := default_defconfig
21 21
22NM = sh $(srctree)/arch/parisc/nm 22NM = sh $(srctree)/arch/parisc/nm
23CHECKFLAGS += -D__hppa__=1 23CHECKFLAGS += -D__hppa__=1
24LIBGCC = $(shell $(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name)
24 25
25MACHINE := $(shell uname -m) 26MACHINE := $(shell uname -m)
26ifeq ($(MACHINE),parisc*) 27ifeq ($(MACHINE),parisc*)
@@ -79,7 +80,7 @@ kernel-y := mm/ kernel/ math-emu/
79kernel-$(CONFIG_HPUX) += hpux/ 80kernel-$(CONFIG_HPUX) += hpux/
80 81
81core-y += $(addprefix arch/parisc/, $(kernel-y)) 82core-y += $(addprefix arch/parisc/, $(kernel-y))
82libs-y += arch/parisc/lib/ `$(CC) -print-libgcc-file-name` 83libs-y += arch/parisc/lib/ $(LIBGCC)
83 84
84drivers-$(CONFIG_OPROFILE) += arch/parisc/oprofile/ 85drivers-$(CONFIG_OPROFILE) += arch/parisc/oprofile/
85 86
diff --git a/arch/parisc/include/asm/Kbuild b/arch/parisc/include/asm/Kbuild
index 19a434f55059..4383707d9801 100644
--- a/arch/parisc/include/asm/Kbuild
+++ b/arch/parisc/include/asm/Kbuild
@@ -1,3 +1,4 @@
1include include/asm-generic/Kbuild.asm 1include include/asm-generic/Kbuild.asm
2 2
3header-y += pdc.h 3header-y += pdc.h
4generic-y += word-at-a-time.h
diff --git a/arch/parisc/include/asm/bug.h b/arch/parisc/include/asm/bug.h
index 72cfdb0cfdd1..62a33338549c 100644
--- a/arch/parisc/include/asm/bug.h
+++ b/arch/parisc/include/asm/bug.h
@@ -1,6 +1,8 @@
1#ifndef _PARISC_BUG_H 1#ifndef _PARISC_BUG_H
2#define _PARISC_BUG_H 2#define _PARISC_BUG_H
3 3
4#include <linux/kernel.h> /* for BUGFLAG_TAINT */
5
4/* 6/*
5 * Tell the user there is some problem. 7 * Tell the user there is some problem.
6 * The offending file and line are encoded in the __bug_table section. 8 * The offending file and line are encoded in the __bug_table section.
diff --git a/arch/powerpc/kernel/module_32.c b/arch/powerpc/kernel/module_32.c
index 0b6d79617d7b..2e3200ca485f 100644
--- a/arch/powerpc/kernel/module_32.c
+++ b/arch/powerpc/kernel/module_32.c
@@ -176,8 +176,8 @@ int module_frob_arch_sections(Elf32_Ehdr *hdr,
176 176
177static inline int entry_matches(struct ppc_plt_entry *entry, Elf32_Addr val) 177static inline int entry_matches(struct ppc_plt_entry *entry, Elf32_Addr val)
178{ 178{
179 if (entry->jump[0] == 0x3d600000 + ((val + 0x8000) >> 16) 179 if (entry->jump[0] == 0x3d800000 + ((val + 0x8000) >> 16)
180 && entry->jump[1] == 0x396b0000 + (val & 0xffff)) 180 && entry->jump[1] == 0x398c0000 + (val & 0xffff))
181 return 1; 181 return 1;
182 return 0; 182 return 0;
183} 183}
@@ -204,10 +204,9 @@ static uint32_t do_plt_call(void *location,
204 entry++; 204 entry++;
205 } 205 }
206 206
207 /* Stolen from Paul Mackerras as well... */ 207 entry->jump[0] = 0x3d800000+((val+0x8000)>>16); /* lis r12,sym@ha */
208 entry->jump[0] = 0x3d600000+((val+0x8000)>>16); /* lis r11,sym@ha */ 208 entry->jump[1] = 0x398c0000 + (val&0xffff); /* addi r12,r12,sym@l*/
209 entry->jump[1] = 0x396b0000 + (val&0xffff); /* addi r11,r11,sym@l*/ 209 entry->jump[2] = 0x7d8903a6; /* mtctr r12 */
210 entry->jump[2] = 0x7d6903a6; /* mtctr r11 */
211 entry->jump[3] = 0x4e800420; /* bctr */ 210 entry->jump[3] = 0x4e800420; /* bctr */
212 211
213 DEBUGP("Initialized plt for 0x%x at %p\n", val, entry); 212 DEBUGP("Initialized plt for 0x%x at %p\n", val, entry);
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 99a995c2a3f2..be171ee73bf8 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -475,6 +475,7 @@ void timer_interrupt(struct pt_regs * regs)
475 struct pt_regs *old_regs; 475 struct pt_regs *old_regs;
476 u64 *next_tb = &__get_cpu_var(decrementers_next_tb); 476 u64 *next_tb = &__get_cpu_var(decrementers_next_tb);
477 struct clock_event_device *evt = &__get_cpu_var(decrementers); 477 struct clock_event_device *evt = &__get_cpu_var(decrementers);
478 u64 now;
478 479
479 /* Ensure a positive value is written to the decrementer, or else 480 /* Ensure a positive value is written to the decrementer, or else
480 * some CPUs will continue to take decrementer exceptions. 481 * some CPUs will continue to take decrementer exceptions.
@@ -509,9 +510,16 @@ void timer_interrupt(struct pt_regs * regs)
509 irq_work_run(); 510 irq_work_run();
510 } 511 }
511 512
512 *next_tb = ~(u64)0; 513 now = get_tb_or_rtc();
513 if (evt->event_handler) 514 if (now >= *next_tb) {
514 evt->event_handler(evt); 515 *next_tb = ~(u64)0;
516 if (evt->event_handler)
517 evt->event_handler(evt);
518 } else {
519 now = *next_tb - now;
520 if (now <= DECREMENTER_MAX)
521 set_dec((int)now);
522 }
515 523
516#ifdef CONFIG_PPC64 524#ifdef CONFIG_PPC64
517 /* collect purr register values often, for accurate calculations */ 525 /* collect purr register values often, for accurate calculations */
diff --git a/arch/tile/include/asm/thread_info.h b/arch/tile/include/asm/thread_info.h
index 7e1fef36bde6..e9c670d7a7fe 100644
--- a/arch/tile/include/asm/thread_info.h
+++ b/arch/tile/include/asm/thread_info.h
@@ -91,11 +91,6 @@ extern void smp_nap(void);
91/* Enable interrupts racelessly and nap forever: helper for cpu_idle(). */ 91/* Enable interrupts racelessly and nap forever: helper for cpu_idle(). */
92extern void _cpu_idle(void); 92extern void _cpu_idle(void);
93 93
94/* Switch boot idle thread to a freshly-allocated stack and free old stack. */
95extern void cpu_idle_on_new_stack(struct thread_info *old_ti,
96 unsigned long new_sp,
97 unsigned long new_ss10);
98
99#else /* __ASSEMBLY__ */ 94#else /* __ASSEMBLY__ */
100 95
101/* 96/*
diff --git a/arch/tile/kernel/entry.S b/arch/tile/kernel/entry.S
index 133c4b56a99e..c31637baff28 100644
--- a/arch/tile/kernel/entry.S
+++ b/arch/tile/kernel/entry.S
@@ -68,20 +68,6 @@ STD_ENTRY(KBacktraceIterator_init_current)
68 jrp lr /* keep backtracer happy */ 68 jrp lr /* keep backtracer happy */
69 STD_ENDPROC(KBacktraceIterator_init_current) 69 STD_ENDPROC(KBacktraceIterator_init_current)
70 70
71/*
72 * Reset our stack to r1/r2 (sp and ksp0+cpu respectively), then
73 * free the old stack (passed in r0) and re-invoke cpu_idle().
74 * We update sp and ksp0 simultaneously to avoid backtracer warnings.
75 */
76STD_ENTRY(cpu_idle_on_new_stack)
77 {
78 move sp, r1
79 mtspr SPR_SYSTEM_SAVE_K_0, r2
80 }
81 jal free_thread_info
82 j cpu_idle
83 STD_ENDPROC(cpu_idle_on_new_stack)
84
85/* Loop forever on a nap during SMP boot. */ 71/* Loop forever on a nap during SMP boot. */
86STD_ENTRY(smp_nap) 72STD_ENTRY(smp_nap)
87 nap 73 nap
diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c
index 6098ccc59be2..dd87f3420390 100644
--- a/arch/tile/kernel/setup.c
+++ b/arch/tile/kernel/setup.c
@@ -29,6 +29,7 @@
29#include <linux/smp.h> 29#include <linux/smp.h>
30#include <linux/timex.h> 30#include <linux/timex.h>
31#include <linux/hugetlb.h> 31#include <linux/hugetlb.h>
32#include <linux/start_kernel.h>
32#include <asm/setup.h> 33#include <asm/setup.h>
33#include <asm/sections.h> 34#include <asm/sections.h>
34#include <asm/cacheflush.h> 35#include <asm/cacheflush.h>
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index 8bbea6aa40d9..efe5acfc79c3 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -94,10 +94,10 @@ bs_die:
94 94
95 .section ".bsdata", "a" 95 .section ".bsdata", "a"
96bugger_off_msg: 96bugger_off_msg:
97 .ascii "Direct booting from floppy is no longer supported.\r\n" 97 .ascii "Direct floppy boot is not supported. "
98 .ascii "Please use a boot loader program instead.\r\n" 98 .ascii "Use a boot loader program instead.\r\n"
99 .ascii "\n" 99 .ascii "\n"
100 .ascii "Remove disk and press any key to reboot . . .\r\n" 100 .ascii "Remove disk and press any key to reboot ...\r\n"
101 .byte 0 101 .byte 0
102 102
103#ifdef CONFIG_EFI_STUB 103#ifdef CONFIG_EFI_STUB
@@ -111,7 +111,7 @@ coff_header:
111#else 111#else
112 .word 0x8664 # x86-64 112 .word 0x8664 # x86-64
113#endif 113#endif
114 .word 2 # nr_sections 114 .word 3 # nr_sections
115 .long 0 # TimeDateStamp 115 .long 0 # TimeDateStamp
116 .long 0 # PointerToSymbolTable 116 .long 0 # PointerToSymbolTable
117 .long 1 # NumberOfSymbols 117 .long 1 # NumberOfSymbols
@@ -158,8 +158,8 @@ extra_header_fields:
158#else 158#else
159 .quad 0 # ImageBase 159 .quad 0 # ImageBase
160#endif 160#endif
161 .long 0x1000 # SectionAlignment 161 .long 0x20 # SectionAlignment
162 .long 0x200 # FileAlignment 162 .long 0x20 # FileAlignment
163 .word 0 # MajorOperatingSystemVersion 163 .word 0 # MajorOperatingSystemVersion
164 .word 0 # MinorOperatingSystemVersion 164 .word 0 # MinorOperatingSystemVersion
165 .word 0 # MajorImageVersion 165 .word 0 # MajorImageVersion
@@ -200,8 +200,10 @@ extra_header_fields:
200 200
201 # Section table 201 # Section table
202section_table: 202section_table:
203 .ascii ".text" 203 #
204 .byte 0 204 # The offset & size fields are filled in by build.c.
205 #
206 .ascii ".setup"
205 .byte 0 207 .byte 0
206 .byte 0 208 .byte 0
207 .long 0 209 .long 0
@@ -217,9 +219,8 @@ section_table:
217 219
218 # 220 #
219 # The EFI application loader requires a relocation section 221 # The EFI application loader requires a relocation section
220 # because EFI applications must be relocatable. But since 222 # because EFI applications must be relocatable. The .reloc
221 # we don't need the loader to fixup any relocs for us, we 223 # offset & size fields are filled in by build.c.
222 # just create an empty (zero-length) .reloc section header.
223 # 224 #
224 .ascii ".reloc" 225 .ascii ".reloc"
225 .byte 0 226 .byte 0
@@ -233,6 +234,25 @@ section_table:
233 .word 0 # NumberOfRelocations 234 .word 0 # NumberOfRelocations
234 .word 0 # NumberOfLineNumbers 235 .word 0 # NumberOfLineNumbers
235 .long 0x42100040 # Characteristics (section flags) 236 .long 0x42100040 # Characteristics (section flags)
237
238 #
239 # The offset & size fields are filled in by build.c.
240 #
241 .ascii ".text"
242 .byte 0
243 .byte 0
244 .byte 0
245 .long 0
246 .long 0x0 # startup_{32,64}
247 .long 0 # Size of initialized data
248 # on disk
249 .long 0x0 # startup_{32,64}
250 .long 0 # PointerToRelocations
251 .long 0 # PointerToLineNumbers
252 .word 0 # NumberOfRelocations
253 .word 0 # NumberOfLineNumbers
254 .long 0x60500020 # Characteristics (section flags)
255
236#endif /* CONFIG_EFI_STUB */ 256#endif /* CONFIG_EFI_STUB */
237 257
238 # Kernel attributes; used by setup. This is part 1 of the 258 # Kernel attributes; used by setup. This is part 1 of the
diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c
index 3f61f6e2b46f..4b8e165ee572 100644
--- a/arch/x86/boot/tools/build.c
+++ b/arch/x86/boot/tools/build.c
@@ -50,6 +50,8 @@ typedef unsigned int u32;
50u8 buf[SETUP_SECT_MAX*512]; 50u8 buf[SETUP_SECT_MAX*512];
51int is_big_kernel; 51int is_big_kernel;
52 52
53#define PECOFF_RELOC_RESERVE 0x20
54
53/*----------------------------------------------------------------------*/ 55/*----------------------------------------------------------------------*/
54 56
55static const u32 crctab32[] = { 57static const u32 crctab32[] = {
@@ -133,11 +135,103 @@ static void usage(void)
133 die("Usage: build setup system [> image]"); 135 die("Usage: build setup system [> image]");
134} 136}
135 137
136int main(int argc, char ** argv)
137{
138#ifdef CONFIG_EFI_STUB 138#ifdef CONFIG_EFI_STUB
139 unsigned int file_sz, pe_header; 139
140static void update_pecoff_section_header(char *section_name, u32 offset, u32 size)
141{
142 unsigned int pe_header;
143 unsigned short num_sections;
144 u8 *section;
145
146 pe_header = get_unaligned_le32(&buf[0x3c]);
147 num_sections = get_unaligned_le16(&buf[pe_header + 6]);
148
149#ifdef CONFIG_X86_32
150 section = &buf[pe_header + 0xa8];
151#else
152 section = &buf[pe_header + 0xb8];
140#endif 153#endif
154
155 while (num_sections > 0) {
156 if (strncmp((char*)section, section_name, 8) == 0) {
157 /* section header size field */
158 put_unaligned_le32(size, section + 0x8);
159
160 /* section header vma field */
161 put_unaligned_le32(offset, section + 0xc);
162
163 /* section header 'size of initialised data' field */
164 put_unaligned_le32(size, section + 0x10);
165
166 /* section header 'file offset' field */
167 put_unaligned_le32(offset, section + 0x14);
168
169 break;
170 }
171 section += 0x28;
172 num_sections--;
173 }
174}
175
176static void update_pecoff_setup_and_reloc(unsigned int size)
177{
178 u32 setup_offset = 0x200;
179 u32 reloc_offset = size - PECOFF_RELOC_RESERVE;
180 u32 setup_size = reloc_offset - setup_offset;
181
182 update_pecoff_section_header(".setup", setup_offset, setup_size);
183 update_pecoff_section_header(".reloc", reloc_offset, PECOFF_RELOC_RESERVE);
184
185 /*
186 * Modify .reloc section contents with a single entry. The
187 * relocation is applied to offset 10 of the relocation section.
188 */
189 put_unaligned_le32(reloc_offset + 10, &buf[reloc_offset]);
190 put_unaligned_le32(10, &buf[reloc_offset + 4]);
191}
192
193static void update_pecoff_text(unsigned int text_start, unsigned int file_sz)
194{
195 unsigned int pe_header;
196 unsigned int text_sz = file_sz - text_start;
197
198 pe_header = get_unaligned_le32(&buf[0x3c]);
199
200 /* Size of image */
201 put_unaligned_le32(file_sz, &buf[pe_header + 0x50]);
202
203 /*
204 * Size of code: Subtract the size of the first sector (512 bytes)
205 * which includes the header.
206 */
207 put_unaligned_le32(file_sz - 512, &buf[pe_header + 0x1c]);
208
209#ifdef CONFIG_X86_32
210 /*
211 * Address of entry point.
212 *
213 * The EFI stub entry point is +16 bytes from the start of
214 * the .text section.
215 */
216 put_unaligned_le32(text_start + 16, &buf[pe_header + 0x28]);
217#else
218 /*
219 * Address of entry point. startup_32 is at the beginning and
220 * the 64-bit entry point (startup_64) is always 512 bytes
221 * after. The EFI stub entry point is 16 bytes after that, as
222 * the first instruction allows legacy loaders to jump over
223 * the EFI stub initialisation
224 */
225 put_unaligned_le32(text_start + 528, &buf[pe_header + 0x28]);
226#endif /* CONFIG_X86_32 */
227
228 update_pecoff_section_header(".text", text_start, text_sz);
229}
230
231#endif /* CONFIG_EFI_STUB */
232
233int main(int argc, char ** argv)
234{
141 unsigned int i, sz, setup_sectors; 235 unsigned int i, sz, setup_sectors;
142 int c; 236 int c;
143 u32 sys_size; 237 u32 sys_size;
@@ -163,6 +257,12 @@ int main(int argc, char ** argv)
163 die("Boot block hasn't got boot flag (0xAA55)"); 257 die("Boot block hasn't got boot flag (0xAA55)");
164 fclose(file); 258 fclose(file);
165 259
260#ifdef CONFIG_EFI_STUB
261 /* Reserve 0x20 bytes for .reloc section */
262 memset(buf+c, 0, PECOFF_RELOC_RESERVE);
263 c += PECOFF_RELOC_RESERVE;
264#endif
265
166 /* Pad unused space with zeros */ 266 /* Pad unused space with zeros */
167 setup_sectors = (c + 511) / 512; 267 setup_sectors = (c + 511) / 512;
168 if (setup_sectors < SETUP_SECT_MIN) 268 if (setup_sectors < SETUP_SECT_MIN)
@@ -170,6 +270,10 @@ int main(int argc, char ** argv)
170 i = setup_sectors*512; 270 i = setup_sectors*512;
171 memset(buf+c, 0, i-c); 271 memset(buf+c, 0, i-c);
172 272
273#ifdef CONFIG_EFI_STUB
274 update_pecoff_setup_and_reloc(i);
275#endif
276
173 /* Set the default root device */ 277 /* Set the default root device */
174 put_unaligned_le16(DEFAULT_ROOT_DEV, &buf[508]); 278 put_unaligned_le16(DEFAULT_ROOT_DEV, &buf[508]);
175 279
@@ -194,66 +298,8 @@ int main(int argc, char ** argv)
194 put_unaligned_le32(sys_size, &buf[0x1f4]); 298 put_unaligned_le32(sys_size, &buf[0x1f4]);
195 299
196#ifdef CONFIG_EFI_STUB 300#ifdef CONFIG_EFI_STUB
197 file_sz = sz + i + ((sys_size * 16) - sz); 301 update_pecoff_text(setup_sectors * 512, sz + i + ((sys_size * 16) - sz));
198 302#endif
199 pe_header = get_unaligned_le32(&buf[0x3c]);
200
201 /* Size of image */
202 put_unaligned_le32(file_sz, &buf[pe_header + 0x50]);
203
204 /*
205 * Subtract the size of the first section (512 bytes) which
206 * includes the header and .reloc section. The remaining size
207 * is that of the .text section.
208 */
209 file_sz -= 512;
210
211 /* Size of code */
212 put_unaligned_le32(file_sz, &buf[pe_header + 0x1c]);
213
214#ifdef CONFIG_X86_32
215 /*
216 * Address of entry point.
217 *
218 * The EFI stub entry point is +16 bytes from the start of
219 * the .text section.
220 */
221 put_unaligned_le32(i + 16, &buf[pe_header + 0x28]);
222
223 /* .text size */
224 put_unaligned_le32(file_sz, &buf[pe_header + 0xb0]);
225
226 /* .text vma */
227 put_unaligned_le32(0x200, &buf[pe_header + 0xb4]);
228
229 /* .text size of initialised data */
230 put_unaligned_le32(file_sz, &buf[pe_header + 0xb8]);
231
232 /* .text file offset */
233 put_unaligned_le32(0x200, &buf[pe_header + 0xbc]);
234#else
235 /*
236 * Address of entry point. startup_32 is at the beginning and
237 * the 64-bit entry point (startup_64) is always 512 bytes
238 * after. The EFI stub entry point is 16 bytes after that, as
239 * the first instruction allows legacy loaders to jump over
240 * the EFI stub initialisation
241 */
242 put_unaligned_le32(i + 528, &buf[pe_header + 0x28]);
243
244 /* .text size */
245 put_unaligned_le32(file_sz, &buf[pe_header + 0xc0]);
246
247 /* .text vma */
248 put_unaligned_le32(0x200, &buf[pe_header + 0xc4]);
249
250 /* .text size of initialised data */
251 put_unaligned_le32(file_sz, &buf[pe_header + 0xc8]);
252
253 /* .text file offset */
254 put_unaligned_le32(0x200, &buf[pe_header + 0xcc]);
255#endif /* CONFIG_X86_32 */
256#endif /* CONFIG_EFI_STUB */
257 303
258 crc = partial_crc32(buf, i, crc); 304 crc = partial_crc32(buf, i, crc);
259 if (fwrite(buf, 1, i, stdout) != i) 305 if (fwrite(buf, 1, i, stdout) != i)
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
index 0e3793b821ef..dc580c42851c 100644
--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -54,6 +54,20 @@ struct nmiaction {
54 __register_nmi_handler((t), &fn##_na); \ 54 __register_nmi_handler((t), &fn##_na); \
55}) 55})
56 56
57/*
58 * For special handlers that register/unregister in the
59 * init section only. This should be considered rare.
60 */
61#define register_nmi_handler_initonly(t, fn, fg, n) \
62({ \
63 static struct nmiaction fn##_na __initdata = { \
64 .handler = (fn), \
65 .name = (n), \
66 .flags = (fg), \
67 }; \
68 __register_nmi_handler((t), &fn##_na); \
69})
70
57int __register_nmi_handler(unsigned int, struct nmiaction *); 71int __register_nmi_handler(unsigned int, struct nmiaction *);
58 72
59void unregister_nmi_handler(unsigned int, const char *); 73void unregister_nmi_handler(unsigned int, const char *);
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 04cd6882308e..e1f3a17034fc 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -33,9 +33,8 @@
33#define segment_eq(a, b) ((a).seg == (b).seg) 33#define segment_eq(a, b) ((a).seg == (b).seg)
34 34
35#define user_addr_max() (current_thread_info()->addr_limit.seg) 35#define user_addr_max() (current_thread_info()->addr_limit.seg)
36#define __addr_ok(addr) \ 36#define __addr_ok(addr) \
37 ((unsigned long __force)(addr) < \ 37 ((unsigned long __force)(addr) < user_addr_max())
38 (current_thread_info()->addr_limit.seg))
39 38
40/* 39/*
41 * Test whether a block of memory is a valid user space address. 40 * Test whether a block of memory is a valid user space address.
@@ -47,14 +46,14 @@
47 * This needs 33-bit (65-bit for x86_64) arithmetic. We have a carry... 46 * This needs 33-bit (65-bit for x86_64) arithmetic. We have a carry...
48 */ 47 */
49 48
50#define __range_not_ok(addr, size) \ 49#define __range_not_ok(addr, size, limit) \
51({ \ 50({ \
52 unsigned long flag, roksum; \ 51 unsigned long flag, roksum; \
53 __chk_user_ptr(addr); \ 52 __chk_user_ptr(addr); \
54 asm("add %3,%1 ; sbb %0,%0 ; cmp %1,%4 ; sbb $0,%0" \ 53 asm("add %3,%1 ; sbb %0,%0 ; cmp %1,%4 ; sbb $0,%0" \
55 : "=&r" (flag), "=r" (roksum) \ 54 : "=&r" (flag), "=r" (roksum) \
56 : "1" (addr), "g" ((long)(size)), \ 55 : "1" (addr), "g" ((long)(size)), \
57 "rm" (current_thread_info()->addr_limit.seg)); \ 56 "rm" (limit)); \
58 flag; \ 57 flag; \
59}) 58})
60 59
@@ -77,7 +76,8 @@
77 * checks that the pointer is in the user space range - after calling 76 * checks that the pointer is in the user space range - after calling
78 * this function, memory access functions may still return -EFAULT. 77 * this function, memory access functions may still return -EFAULT.
79 */ 78 */
80#define access_ok(type, addr, size) (likely(__range_not_ok(addr, size) == 0)) 79#define access_ok(type, addr, size) \
80 (likely(__range_not_ok(addr, size, user_addr_max()) == 0))
81 81
82/* 82/*
83 * The exception table consists of pairs of addresses relative to the 83 * The exception table consists of pairs of addresses relative to the
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
index becf47b81735..6149b476d9df 100644
--- a/arch/x86/include/asm/uv/uv_bau.h
+++ b/arch/x86/include/asm/uv/uv_bau.h
@@ -149,7 +149,6 @@
149/* 4 bits of software ack period */ 149/* 4 bits of software ack period */
150#define UV2_ACK_MASK 0x7UL 150#define UV2_ACK_MASK 0x7UL
151#define UV2_ACK_UNITS_SHFT 3 151#define UV2_ACK_UNITS_SHFT 3
152#define UV2_LEG_SHFT UV2H_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_SHFT
153#define UV2_EXT_SHFT UV2H_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_SHFT 152#define UV2_EXT_SHFT UV2H_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_SHFT
154 153
155/* 154/*
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index 6e76c191a835..d5fd66f0d4cd 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -20,7 +20,6 @@
20#include <linux/bitops.h> 20#include <linux/bitops.h>
21#include <linux/ioport.h> 21#include <linux/ioport.h>
22#include <linux/suspend.h> 22#include <linux/suspend.h>
23#include <linux/kmemleak.h>
24#include <asm/e820.h> 23#include <asm/e820.h>
25#include <asm/io.h> 24#include <asm/io.h>
26#include <asm/iommu.h> 25#include <asm/iommu.h>
@@ -95,11 +94,6 @@ static u32 __init allocate_aperture(void)
95 return 0; 94 return 0;
96 } 95 }
97 memblock_reserve(addr, aper_size); 96 memblock_reserve(addr, aper_size);
98 /*
99 * Kmemleak should not scan this block as it may not be mapped via the
100 * kernel direct mapping.
101 */
102 kmemleak_ignore(phys_to_virt(addr));
103 printk(KERN_INFO "Mapping aperture over %d KB of RAM @ %lx\n", 97 printk(KERN_INFO "Mapping aperture over %d KB of RAM @ %lx\n",
104 aper_size >> 10, addr); 98 aper_size >> 10, addr);
105 insert_aperture_resource((u32)addr, aper_size); 99 insert_aperture_resource((u32)addr, aper_size);
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index ac96561d1a99..5f0ff597437c 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1195,7 +1195,7 @@ static void __clear_irq_vector(int irq, struct irq_cfg *cfg)
1195 BUG_ON(!cfg->vector); 1195 BUG_ON(!cfg->vector);
1196 1196
1197 vector = cfg->vector; 1197 vector = cfg->vector;
1198 for_each_cpu_and(cpu, cfg->domain, cpu_online_mask) 1198 for_each_cpu(cpu, cfg->domain)
1199 per_cpu(vector_irq, cpu)[vector] = -1; 1199 per_cpu(vector_irq, cpu)[vector] = -1;
1200 1200
1201 cfg->vector = 0; 1201 cfg->vector = 0;
@@ -1203,7 +1203,7 @@ static void __clear_irq_vector(int irq, struct irq_cfg *cfg)
1203 1203
1204 if (likely(!cfg->move_in_progress)) 1204 if (likely(!cfg->move_in_progress))
1205 return; 1205 return;
1206 for_each_cpu_and(cpu, cfg->old_domain, cpu_online_mask) { 1206 for_each_cpu(cpu, cfg->old_domain) {
1207 for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; 1207 for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS;
1208 vector++) { 1208 vector++) {
1209 if (per_cpu(vector_irq, cpu)[vector] != irq) 1209 if (per_cpu(vector_irq, cpu)[vector] != irq)
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 0a687fd185e6..da27c5d2168a 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1274,7 +1274,7 @@ static void mce_timer_fn(unsigned long data)
1274 */ 1274 */
1275 iv = __this_cpu_read(mce_next_interval); 1275 iv = __this_cpu_read(mce_next_interval);
1276 if (mce_notify_irq()) 1276 if (mce_notify_irq())
1277 iv = max(iv, (unsigned long) HZ/100); 1277 iv = max(iv / 2, (unsigned long) HZ/100);
1278 else 1278 else
1279 iv = min(iv * 2, round_jiffies_relative(check_interval * HZ)); 1279 iv = min(iv * 2, round_jiffies_relative(check_interval * HZ));
1280 __this_cpu_write(mce_next_interval, iv); 1280 __this_cpu_write(mce_next_interval, iv);
@@ -1557,7 +1557,7 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
1557static void __mcheck_cpu_init_timer(void) 1557static void __mcheck_cpu_init_timer(void)
1558{ 1558{
1559 struct timer_list *t = &__get_cpu_var(mce_timer); 1559 struct timer_list *t = &__get_cpu_var(mce_timer);
1560 unsigned long iv = __this_cpu_read(mce_next_interval); 1560 unsigned long iv = check_interval * HZ;
1561 1561
1562 setup_timer(t, mce_timer_fn, smp_processor_id()); 1562 setup_timer(t, mce_timer_fn, smp_processor_id());
1563 1563
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index e049d6da0183..c4706cf9c011 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1496,6 +1496,7 @@ static struct cpu_hw_events *allocate_fake_cpuc(void)
1496 if (!cpuc->shared_regs) 1496 if (!cpuc->shared_regs)
1497 goto error; 1497 goto error;
1498 } 1498 }
1499 cpuc->is_fake = 1;
1499 return cpuc; 1500 return cpuc;
1500error: 1501error:
1501 free_fake_cpuc(cpuc); 1502 free_fake_cpuc(cpuc);
@@ -1756,6 +1757,12 @@ perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
1756 dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry); 1757 dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry);
1757} 1758}
1758 1759
1760static inline int
1761valid_user_frame(const void __user *fp, unsigned long size)
1762{
1763 return (__range_not_ok(fp, size, TASK_SIZE) == 0);
1764}
1765
1759#ifdef CONFIG_COMPAT 1766#ifdef CONFIG_COMPAT
1760 1767
1761#include <asm/compat.h> 1768#include <asm/compat.h>
@@ -1780,7 +1787,7 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
1780 if (bytes != sizeof(frame)) 1787 if (bytes != sizeof(frame))
1781 break; 1788 break;
1782 1789
1783 if (fp < compat_ptr(regs->sp)) 1790 if (!valid_user_frame(fp, sizeof(frame)))
1784 break; 1791 break;
1785 1792
1786 perf_callchain_store(entry, frame.return_address); 1793 perf_callchain_store(entry, frame.return_address);
@@ -1826,7 +1833,7 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
1826 if (bytes != sizeof(frame)) 1833 if (bytes != sizeof(frame))
1827 break; 1834 break;
1828 1835
1829 if ((unsigned long)fp < regs->sp) 1836 if (!valid_user_frame(fp, sizeof(frame)))
1830 break; 1837 break;
1831 1838
1832 perf_callchain_store(entry, frame.return_address); 1839 perf_callchain_store(entry, frame.return_address);
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 6638aaf54493..7241e2fc3c17 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -117,6 +117,7 @@ struct cpu_hw_events {
117 struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ 117 struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */
118 118
119 unsigned int group_flag; 119 unsigned int group_flag;
120 int is_fake;
120 121
121 /* 122 /*
122 * Intel DebugStore bits 123 * Intel DebugStore bits
@@ -364,6 +365,7 @@ struct x86_pmu {
364 int pebs_record_size; 365 int pebs_record_size;
365 void (*drain_pebs)(struct pt_regs *regs); 366 void (*drain_pebs)(struct pt_regs *regs);
366 struct event_constraint *pebs_constraints; 367 struct event_constraint *pebs_constraints;
368 void (*pebs_aliases)(struct perf_event *event);
367 369
368 /* 370 /*
369 * Intel LBR 371 * Intel LBR
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 166546ec6aef..187c294bc658 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1119,27 +1119,33 @@ intel_bts_constraints(struct perf_event *event)
1119 return NULL; 1119 return NULL;
1120} 1120}
1121 1121
1122static bool intel_try_alt_er(struct perf_event *event, int orig_idx) 1122static int intel_alt_er(int idx)
1123{ 1123{
1124 if (!(x86_pmu.er_flags & ERF_HAS_RSP_1)) 1124 if (!(x86_pmu.er_flags & ERF_HAS_RSP_1))
1125 return false; 1125 return idx;
1126 1126
1127 if (event->hw.extra_reg.idx == EXTRA_REG_RSP_0) { 1127 if (idx == EXTRA_REG_RSP_0)
1128 event->hw.config &= ~INTEL_ARCH_EVENT_MASK; 1128 return EXTRA_REG_RSP_1;
1129 event->hw.config |= 0x01bb; 1129
1130 event->hw.extra_reg.idx = EXTRA_REG_RSP_1; 1130 if (idx == EXTRA_REG_RSP_1)
1131 event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1; 1131 return EXTRA_REG_RSP_0;
1132 } else if (event->hw.extra_reg.idx == EXTRA_REG_RSP_1) { 1132
1133 return idx;
1134}
1135
1136static void intel_fixup_er(struct perf_event *event, int idx)
1137{
1138 event->hw.extra_reg.idx = idx;
1139
1140 if (idx == EXTRA_REG_RSP_0) {
1133 event->hw.config &= ~INTEL_ARCH_EVENT_MASK; 1141 event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
1134 event->hw.config |= 0x01b7; 1142 event->hw.config |= 0x01b7;
1135 event->hw.extra_reg.idx = EXTRA_REG_RSP_0;
1136 event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0; 1143 event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0;
1144 } else if (idx == EXTRA_REG_RSP_1) {
1145 event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
1146 event->hw.config |= 0x01bb;
1147 event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1;
1137 } 1148 }
1138
1139 if (event->hw.extra_reg.idx == orig_idx)
1140 return false;
1141
1142 return true;
1143} 1149}
1144 1150
1145/* 1151/*
@@ -1157,14 +1163,18 @@ __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
1157 struct event_constraint *c = &emptyconstraint; 1163 struct event_constraint *c = &emptyconstraint;
1158 struct er_account *era; 1164 struct er_account *era;
1159 unsigned long flags; 1165 unsigned long flags;
1160 int orig_idx = reg->idx; 1166 int idx = reg->idx;
1161 1167
1162 /* already allocated shared msr */ 1168 /*
1163 if (reg->alloc) 1169 * reg->alloc can be set due to existing state, so for fake cpuc we
1170 * need to ignore this, otherwise we might fail to allocate proper fake
1171 * state for this extra reg constraint. Also see the comment below.
1172 */
1173 if (reg->alloc && !cpuc->is_fake)
1164 return NULL; /* call x86_get_event_constraint() */ 1174 return NULL; /* call x86_get_event_constraint() */
1165 1175
1166again: 1176again:
1167 era = &cpuc->shared_regs->regs[reg->idx]; 1177 era = &cpuc->shared_regs->regs[idx];
1168 /* 1178 /*
1169 * we use spin_lock_irqsave() to avoid lockdep issues when 1179 * we use spin_lock_irqsave() to avoid lockdep issues when
1170 * passing a fake cpuc 1180 * passing a fake cpuc
@@ -1173,6 +1183,29 @@ again:
1173 1183
1174 if (!atomic_read(&era->ref) || era->config == reg->config) { 1184 if (!atomic_read(&era->ref) || era->config == reg->config) {
1175 1185
1186 /*
1187 * If its a fake cpuc -- as per validate_{group,event}() we
1188 * shouldn't touch event state and we can avoid doing so
1189 * since both will only call get_event_constraints() once
1190 * on each event, this avoids the need for reg->alloc.
1191 *
1192 * Not doing the ER fixup will only result in era->reg being
1193 * wrong, but since we won't actually try and program hardware
1194 * this isn't a problem either.
1195 */
1196 if (!cpuc->is_fake) {
1197 if (idx != reg->idx)
1198 intel_fixup_er(event, idx);
1199
1200 /*
1201 * x86_schedule_events() can call get_event_constraints()
1202 * multiple times on events in the case of incremental
1203 * scheduling(). reg->alloc ensures we only do the ER
1204 * allocation once.
1205 */
1206 reg->alloc = 1;
1207 }
1208
1176 /* lock in msr value */ 1209 /* lock in msr value */
1177 era->config = reg->config; 1210 era->config = reg->config;
1178 era->reg = reg->reg; 1211 era->reg = reg->reg;
@@ -1180,17 +1213,17 @@ again:
1180 /* one more user */ 1213 /* one more user */
1181 atomic_inc(&era->ref); 1214 atomic_inc(&era->ref);
1182 1215
1183 /* no need to reallocate during incremental event scheduling */
1184 reg->alloc = 1;
1185
1186 /* 1216 /*
1187 * need to call x86_get_event_constraint() 1217 * need to call x86_get_event_constraint()
1188 * to check if associated event has constraints 1218 * to check if associated event has constraints
1189 */ 1219 */
1190 c = NULL; 1220 c = NULL;
1191 } else if (intel_try_alt_er(event, orig_idx)) { 1221 } else {
1192 raw_spin_unlock_irqrestore(&era->lock, flags); 1222 idx = intel_alt_er(idx);
1193 goto again; 1223 if (idx != reg->idx) {
1224 raw_spin_unlock_irqrestore(&era->lock, flags);
1225 goto again;
1226 }
1194 } 1227 }
1195 raw_spin_unlock_irqrestore(&era->lock, flags); 1228 raw_spin_unlock_irqrestore(&era->lock, flags);
1196 1229
@@ -1204,11 +1237,14 @@ __intel_shared_reg_put_constraints(struct cpu_hw_events *cpuc,
1204 struct er_account *era; 1237 struct er_account *era;
1205 1238
1206 /* 1239 /*
1207 * only put constraint if extra reg was actually 1240 * Only put constraint if extra reg was actually allocated. Also takes
1208 * allocated. Also takes care of event which do 1241 * care of event which do not use an extra shared reg.
1209 * not use an extra shared reg 1242 *
1243 * Also, if this is a fake cpuc we shouldn't touch any event state
1244 * (reg->alloc) and we don't care about leaving inconsistent cpuc state
1245 * either since it'll be thrown out.
1210 */ 1246 */
1211 if (!reg->alloc) 1247 if (!reg->alloc || cpuc->is_fake)
1212 return; 1248 return;
1213 1249
1214 era = &cpuc->shared_regs->regs[reg->idx]; 1250 era = &cpuc->shared_regs->regs[reg->idx];
@@ -1300,15 +1336,9 @@ static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
1300 intel_put_shared_regs_event_constraints(cpuc, event); 1336 intel_put_shared_regs_event_constraints(cpuc, event);
1301} 1337}
1302 1338
1303static int intel_pmu_hw_config(struct perf_event *event) 1339static void intel_pebs_aliases_core2(struct perf_event *event)
1304{ 1340{
1305 int ret = x86_pmu_hw_config(event); 1341 if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
1306
1307 if (ret)
1308 return ret;
1309
1310 if (event->attr.precise_ip &&
1311 (event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
1312 /* 1342 /*
1313 * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P 1343 * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P
1314 * (0x003c) so that we can use it with PEBS. 1344 * (0x003c) so that we can use it with PEBS.
@@ -1329,10 +1359,48 @@ static int intel_pmu_hw_config(struct perf_event *event)
1329 */ 1359 */
1330 u64 alt_config = X86_CONFIG(.event=0xc0, .inv=1, .cmask=16); 1360 u64 alt_config = X86_CONFIG(.event=0xc0, .inv=1, .cmask=16);
1331 1361
1362 alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
1363 event->hw.config = alt_config;
1364 }
1365}
1366
1367static void intel_pebs_aliases_snb(struct perf_event *event)
1368{
1369 if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
1370 /*
1371 * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P
1372 * (0x003c) so that we can use it with PEBS.
1373 *
1374 * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't
1375 * PEBS capable. However we can use UOPS_RETIRED.ALL
1376 * (0x01c2), which is a PEBS capable event, to get the same
1377 * count.
1378 *
1379 * UOPS_RETIRED.ALL counts the number of cycles that retires
1380 * CNTMASK micro-ops. By setting CNTMASK to a value (16)
1381 * larger than the maximum number of micro-ops that can be
1382 * retired per cycle (4) and then inverting the condition, we
1383 * count all cycles that retire 16 or less micro-ops, which
1384 * is every cycle.
1385 *
1386 * Thereby we gain a PEBS capable cycle counter.
1387 */
1388 u64 alt_config = X86_CONFIG(.event=0xc2, .umask=0x01, .inv=1, .cmask=16);
1332 1389
1333 alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK); 1390 alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
1334 event->hw.config = alt_config; 1391 event->hw.config = alt_config;
1335 } 1392 }
1393}
1394
1395static int intel_pmu_hw_config(struct perf_event *event)
1396{
1397 int ret = x86_pmu_hw_config(event);
1398
1399 if (ret)
1400 return ret;
1401
1402 if (event->attr.precise_ip && x86_pmu.pebs_aliases)
1403 x86_pmu.pebs_aliases(event);
1336 1404
1337 if (intel_pmu_needs_lbr_smpl(event)) { 1405 if (intel_pmu_needs_lbr_smpl(event)) {
1338 ret = intel_pmu_setup_lbr_filter(event); 1406 ret = intel_pmu_setup_lbr_filter(event);
@@ -1607,6 +1675,7 @@ static __initconst const struct x86_pmu intel_pmu = {
1607 .max_period = (1ULL << 31) - 1, 1675 .max_period = (1ULL << 31) - 1,
1608 .get_event_constraints = intel_get_event_constraints, 1676 .get_event_constraints = intel_get_event_constraints,
1609 .put_event_constraints = intel_put_event_constraints, 1677 .put_event_constraints = intel_put_event_constraints,
1678 .pebs_aliases = intel_pebs_aliases_core2,
1610 1679
1611 .format_attrs = intel_arch3_formats_attr, 1680 .format_attrs = intel_arch3_formats_attr,
1612 1681
@@ -1840,8 +1909,9 @@ __init int intel_pmu_init(void)
1840 break; 1909 break;
1841 1910
1842 case 42: /* SandyBridge */ 1911 case 42: /* SandyBridge */
1843 x86_add_quirk(intel_sandybridge_quirk);
1844 case 45: /* SandyBridge, "Romely-EP" */ 1912 case 45: /* SandyBridge, "Romely-EP" */
1913 x86_add_quirk(intel_sandybridge_quirk);
1914 case 58: /* IvyBridge */
1845 memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, 1915 memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
1846 sizeof(hw_cache_event_ids)); 1916 sizeof(hw_cache_event_ids));
1847 1917
@@ -1849,6 +1919,7 @@ __init int intel_pmu_init(void)
1849 1919
1850 x86_pmu.event_constraints = intel_snb_event_constraints; 1920 x86_pmu.event_constraints = intel_snb_event_constraints;
1851 x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints; 1921 x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints;
1922 x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
1852 x86_pmu.extra_regs = intel_snb_extra_regs; 1923 x86_pmu.extra_regs = intel_snb_extra_regs;
1853 /* all extra regs are per-cpu when HT is on */ 1924 /* all extra regs are per-cpu when HT is on */
1854 x86_pmu.er_flags |= ERF_HAS_RSP_1; 1925 x86_pmu.er_flags |= ERF_HAS_RSP_1;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 5a3edc27f6e5..35e2192df9f4 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -400,14 +400,7 @@ struct event_constraint intel_snb_pebs_event_constraints[] = {
400 INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ 400 INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
401 INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */ 401 INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */
402 INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.* */ 402 INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.* */
403 INTEL_UEVENT_CONSTRAINT(0x11d0, 0xf), /* MEM_UOP_RETIRED.STLB_MISS_LOADS */ 403 INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */
404 INTEL_UEVENT_CONSTRAINT(0x12d0, 0xf), /* MEM_UOP_RETIRED.STLB_MISS_STORES */
405 INTEL_UEVENT_CONSTRAINT(0x21d0, 0xf), /* MEM_UOP_RETIRED.LOCK_LOADS */
406 INTEL_UEVENT_CONSTRAINT(0x22d0, 0xf), /* MEM_UOP_RETIRED.LOCK_STORES */
407 INTEL_UEVENT_CONSTRAINT(0x41d0, 0xf), /* MEM_UOP_RETIRED.SPLIT_LOADS */
408 INTEL_UEVENT_CONSTRAINT(0x42d0, 0xf), /* MEM_UOP_RETIRED.SPLIT_STORES */
409 INTEL_UEVENT_CONSTRAINT(0x81d0, 0xf), /* MEM_UOP_RETIRED.ANY_LOADS */
410 INTEL_UEVENT_CONSTRAINT(0x82d0, 0xf), /* MEM_UOP_RETIRED.ANY_STORES */
411 INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ 404 INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
412 INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ 405 INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
413 INTEL_UEVENT_CONSTRAINT(0x02d4, 0xf), /* MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS */ 406 INTEL_UEVENT_CONSTRAINT(0x02d4, 0xf), /* MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS */
diff --git a/arch/x86/kernel/nmi_selftest.c b/arch/x86/kernel/nmi_selftest.c
index e31bf8d5c4d2..149b8d9c6ad4 100644
--- a/arch/x86/kernel/nmi_selftest.c
+++ b/arch/x86/kernel/nmi_selftest.c
@@ -42,7 +42,7 @@ static int __init nmi_unk_cb(unsigned int val, struct pt_regs *regs)
42static void __init init_nmi_testsuite(void) 42static void __init init_nmi_testsuite(void)
43{ 43{
44 /* trap all the unknown NMIs we may generate */ 44 /* trap all the unknown NMIs we may generate */
45 register_nmi_handler(NMI_UNKNOWN, nmi_unk_cb, 0, "nmi_selftest_unk"); 45 register_nmi_handler_initonly(NMI_UNKNOWN, nmi_unk_cb, 0, "nmi_selftest_unk");
46} 46}
47 47
48static void __init cleanup_nmi_testsuite(void) 48static void __init cleanup_nmi_testsuite(void)
@@ -64,7 +64,7 @@ static void __init test_nmi_ipi(struct cpumask *mask)
64{ 64{
65 unsigned long timeout; 65 unsigned long timeout;
66 66
67 if (register_nmi_handler(NMI_LOCAL, test_nmi_ipi_callback, 67 if (register_nmi_handler_initonly(NMI_LOCAL, test_nmi_ipi_callback,
68 NMI_FLAG_FIRST, "nmi_selftest")) { 68 NMI_FLAG_FIRST, "nmi_selftest")) {
69 nmi_fail = FAILURE; 69 nmi_fail = FAILURE;
70 return; 70 return;
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 79c45af81604..25b48edb847c 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -639,9 +639,11 @@ void native_machine_shutdown(void)
639 set_cpus_allowed_ptr(current, cpumask_of(reboot_cpu_id)); 639 set_cpus_allowed_ptr(current, cpumask_of(reboot_cpu_id));
640 640
641 /* 641 /*
642 * O.K Now that I'm on the appropriate processor, 642 * O.K Now that I'm on the appropriate processor, stop all of the
643 * stop all of the others. 643 * others. Also disable the local irq to not receive the per-cpu
644 * timer interrupt which may trigger scheduler's load balance.
644 */ 645 */
646 local_irq_disable();
645 stop_other_cpus(); 647 stop_other_cpus();
646#endif 648#endif
647 649
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index f56f96da77f5..3fab55bea29b 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -382,6 +382,15 @@ void __cpuinit set_cpu_sibling_map(int cpu)
382 if ((i == cpu) || (has_mc && match_llc(c, o))) 382 if ((i == cpu) || (has_mc && match_llc(c, o)))
383 link_mask(llc_shared, cpu, i); 383 link_mask(llc_shared, cpu, i);
384 384
385 }
386
387 /*
388 * This needs a separate iteration over the cpus because we rely on all
389 * cpu_sibling_mask links to be set-up.
390 */
391 for_each_cpu(i, cpu_sibling_setup_mask) {
392 o = &cpu_data(i);
393
385 if ((i == cpu) || (has_mc && match_mc(c, o))) { 394 if ((i == cpu) || (has_mc && match_mc(c, o))) {
386 link_mask(core, cpu, i); 395 link_mask(core, cpu, i);
387 396
@@ -410,15 +419,7 @@ void __cpuinit set_cpu_sibling_map(int cpu)
410/* maps the cpu to the sched domain representing multi-core */ 419/* maps the cpu to the sched domain representing multi-core */
411const struct cpumask *cpu_coregroup_mask(int cpu) 420const struct cpumask *cpu_coregroup_mask(int cpu)
412{ 421{
413 struct cpuinfo_x86 *c = &cpu_data(cpu); 422 return cpu_llc_shared_mask(cpu);
414 /*
415 * For perf, we return last level cache shared map.
416 * And for power savings, we return cpu_core_map
417 */
418 if (!(cpu_has(c, X86_FEATURE_AMD_DCM)))
419 return cpu_core_mask(cpu);
420 else
421 return cpu_llc_shared_mask(cpu);
422} 423}
423 424
424static void impress_friends(void) 425static void impress_friends(void)
diff --git a/arch/x86/lib/usercopy.c b/arch/x86/lib/usercopy.c
index f61ee67ec00f..677b1ed184c9 100644
--- a/arch/x86/lib/usercopy.c
+++ b/arch/x86/lib/usercopy.c
@@ -8,6 +8,7 @@
8#include <linux/module.h> 8#include <linux/module.h>
9 9
10#include <asm/word-at-a-time.h> 10#include <asm/word-at-a-time.h>
11#include <linux/sched.h>
11 12
12/* 13/*
13 * best effort, GUP based copy_from_user() that is NMI-safe 14 * best effort, GUP based copy_from_user() that is NMI-safe
@@ -21,6 +22,9 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
21 void *map; 22 void *map;
22 int ret; 23 int ret;
23 24
25 if (__range_not_ok(from, n, TASK_SIZE) == 0)
26 return len;
27
24 do { 28 do {
25 ret = __get_user_pages_fast(addr, 1, 0, &page); 29 ret = __get_user_pages_fast(addr, 1, 0, &page);
26 if (!ret) 30 if (!ret)
diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt
index 819137904428..5d7e51f3fd28 100644
--- a/arch/x86/lib/x86-opcode-map.txt
+++ b/arch/x86/lib/x86-opcode-map.txt
@@ -28,7 +28,7 @@
28# - (66): the last prefix is 0x66 28# - (66): the last prefix is 0x66
29# - (F3): the last prefix is 0xF3 29# - (F3): the last prefix is 0xF3
30# - (F2): the last prefix is 0xF2 30# - (F2): the last prefix is 0xF2
31# 31# - (!F3) : the last prefix is not 0xF3 (including non-last prefix case)
32 32
33Table: one byte opcode 33Table: one byte opcode
34Referrer: 34Referrer:
@@ -515,12 +515,12 @@ b4: LFS Gv,Mp
515b5: LGS Gv,Mp 515b5: LGS Gv,Mp
516b6: MOVZX Gv,Eb 516b6: MOVZX Gv,Eb
517b7: MOVZX Gv,Ew 517b7: MOVZX Gv,Ew
518b8: JMPE | POPCNT Gv,Ev (F3) 518b8: JMPE (!F3) | POPCNT Gv,Ev (F3)
519b9: Grp10 (1A) 519b9: Grp10 (1A)
520ba: Grp8 Ev,Ib (1A) 520ba: Grp8 Ev,Ib (1A)
521bb: BTC Ev,Gv 521bb: BTC Ev,Gv
522bc: BSF Gv,Ev | TZCNT Gv,Ev (F3) 522bc: BSF Gv,Ev (!F3) | TZCNT Gv,Ev (F3)
523bd: BSR Gv,Ev | LZCNT Gv,Ev (F3) 523bd: BSR Gv,Ev (!F3) | LZCNT Gv,Ev (F3)
524be: MOVSX Gv,Eb 524be: MOVSX Gv,Eb
525bf: MOVSX Gv,Ew 525bf: MOVSX Gv,Ew
526# 0x0f 0xc0-0xcf 526# 0x0f 0xc0-0xcf
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 97141c26a13a..bc4e9d84157f 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -62,7 +62,8 @@ static void __init find_early_table_space(struct map_range *mr, unsigned long en
62 extra += PMD_SIZE; 62 extra += PMD_SIZE;
63#endif 63#endif
64 /* The first 2/4M doesn't use large pages. */ 64 /* The first 2/4M doesn't use large pages. */
65 extra += mr->end - mr->start; 65 if (mr->start < PMD_SIZE)
66 extra += mr->end - mr->start;
66 67
67 ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT; 68 ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT;
68 } else 69 } else
diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c
index 732af3a96183..4599c3e8bcb6 100644
--- a/arch/x86/mm/srat.c
+++ b/arch/x86/mm/srat.c
@@ -176,6 +176,8 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
176 return; 176 return;
177 } 177 }
178 178
179 node_set(node, numa_nodes_parsed);
180
179 printk(KERN_INFO "SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx]\n", 181 printk(KERN_INFO "SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx]\n",
180 node, pxm, 182 node, pxm,
181 (unsigned long long) start, (unsigned long long) end - 1); 183 (unsigned long long) start, (unsigned long long) end - 1);
diff --git a/arch/x86/platform/mrst/mrst.c b/arch/x86/platform/mrst/mrst.c
index e31bcd8f2eee..fd41a9262d65 100644
--- a/arch/x86/platform/mrst/mrst.c
+++ b/arch/x86/platform/mrst/mrst.c
@@ -782,7 +782,7 @@ BLOCKING_NOTIFIER_HEAD(intel_scu_notifier);
782EXPORT_SYMBOL_GPL(intel_scu_notifier); 782EXPORT_SYMBOL_GPL(intel_scu_notifier);
783 783
784/* Called by IPC driver */ 784/* Called by IPC driver */
785void intel_scu_devices_create(void) 785void __devinit intel_scu_devices_create(void)
786{ 786{
787 int i; 787 int i;
788 788
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index 3ae0e61abd23..59880afa851f 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -1295,7 +1295,6 @@ static void __init enable_timeouts(void)
1295 */ 1295 */
1296 mmr_image |= (1L << SOFTACK_MSHIFT); 1296 mmr_image |= (1L << SOFTACK_MSHIFT);
1297 if (is_uv2_hub()) { 1297 if (is_uv2_hub()) {
1298 mmr_image &= ~(1L << UV2_LEG_SHFT);
1299 mmr_image |= (1L << UV2_EXT_SHFT); 1298 mmr_image |= (1L << UV2_EXT_SHFT);
1300 } 1299 }
1301 write_mmr_misc_control(pnode, mmr_image); 1300 write_mmr_misc_control(pnode, mmr_image);
diff --git a/arch/x86/tools/gen-insn-attr-x86.awk b/arch/x86/tools/gen-insn-attr-x86.awk
index 5f6a5b6c3a15..ddcf39b1a18d 100644
--- a/arch/x86/tools/gen-insn-attr-x86.awk
+++ b/arch/x86/tools/gen-insn-attr-x86.awk
@@ -66,9 +66,10 @@ BEGIN {
66 rex_expr = "^REX(\\.[XRWB]+)*" 66 rex_expr = "^REX(\\.[XRWB]+)*"
67 fpu_expr = "^ESC" # TODO 67 fpu_expr = "^ESC" # TODO
68 68
69 lprefix1_expr = "\\(66\\)" 69 lprefix1_expr = "\\((66|!F3)\\)"
70 lprefix2_expr = "\\(F3\\)" 70 lprefix2_expr = "\\(F3\\)"
71 lprefix3_expr = "\\(F2\\)" 71 lprefix3_expr = "\\((F2|!F3)\\)"
72 lprefix_expr = "\\((66|F2|F3)\\)"
72 max_lprefix = 4 73 max_lprefix = 4
73 74
74 # All opcodes starting with lower-case 'v' or with (v1) superscript 75 # All opcodes starting with lower-case 'v' or with (v1) superscript
@@ -333,13 +334,16 @@ function convert_operands(count,opnd, i,j,imm,mod)
333 if (match(ext, lprefix1_expr)) { 334 if (match(ext, lprefix1_expr)) {
334 lptable1[idx] = add_flags(lptable1[idx],flags) 335 lptable1[idx] = add_flags(lptable1[idx],flags)
335 variant = "INAT_VARIANT" 336 variant = "INAT_VARIANT"
336 } else if (match(ext, lprefix2_expr)) { 337 }
338 if (match(ext, lprefix2_expr)) {
337 lptable2[idx] = add_flags(lptable2[idx],flags) 339 lptable2[idx] = add_flags(lptable2[idx],flags)
338 variant = "INAT_VARIANT" 340 variant = "INAT_VARIANT"
339 } else if (match(ext, lprefix3_expr)) { 341 }
342 if (match(ext, lprefix3_expr)) {
340 lptable3[idx] = add_flags(lptable3[idx],flags) 343 lptable3[idx] = add_flags(lptable3[idx],flags)
341 variant = "INAT_VARIANT" 344 variant = "INAT_VARIANT"
342 } else { 345 }
346 if (!match(ext, lprefix_expr)){
343 table[idx] = add_flags(table[idx],flags) 347 table[idx] = add_flags(table[idx],flags)
344 } 348 }
345 } 349 }
diff --git a/arch/xtensa/include/asm/syscall.h b/arch/xtensa/include/asm/syscall.h
index 0b9f2e13c781..c1dacca312f3 100644
--- a/arch/xtensa/include/asm/syscall.h
+++ b/arch/xtensa/include/asm/syscall.h
@@ -31,5 +31,5 @@ asmlinkage long sys_pselect6(int n, fd_set __user *inp, fd_set __user *outp,
31asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds, 31asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds,
32 struct timespec __user *tsp, const sigset_t __user *sigmask, 32 struct timespec __user *tsp, const sigset_t __user *sigmask,
33 size_t sigsetsize); 33 size_t sigsetsize);
34 34asmlinkage long sys_rt_sigsuspend(sigset_t __user *unewset,
35 35 size_t sigsetsize);
diff --git a/arch/xtensa/kernel/signal.c b/arch/xtensa/kernel/signal.c
index b9f8e5850d3a..efe4e854b3cd 100644
--- a/arch/xtensa/kernel/signal.c
+++ b/arch/xtensa/kernel/signal.c
@@ -493,7 +493,7 @@ static void do_signal(struct pt_regs *regs)
493 if (ret) 493 if (ret)
494 return; 494 return;
495 495
496 signal_delivered(signr, info, ka, regs, 0); 496 signal_delivered(signr, &info, &ka, regs, 0);
497 if (current->ptrace & PT_SINGLESTEP) 497 if (current->ptrace & PT_SINGLESTEP)
498 task_pt_regs(current)->icountlevel = 1; 498 task_pt_regs(current)->icountlevel = 1;
499 499
diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index 47768ff87343..80998958cf45 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -208,7 +208,7 @@ config ACPI_IPMI
208 208
209config ACPI_HOTPLUG_CPU 209config ACPI_HOTPLUG_CPU
210 bool 210 bool
211 depends on ACPI_PROCESSOR && HOTPLUG_CPU 211 depends on EXPERIMENTAL && ACPI_PROCESSOR && HOTPLUG_CPU
212 select ACPI_CONTAINER 212 select ACPI_CONTAINER
213 default y 213 default y
214 214
diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c
index 86933ca8b472..7dd3f9fb9f3f 100644
--- a/drivers/acpi/battery.c
+++ b/drivers/acpi/battery.c
@@ -643,11 +643,19 @@ static int acpi_battery_update(struct acpi_battery *battery)
643 643
644static void acpi_battery_refresh(struct acpi_battery *battery) 644static void acpi_battery_refresh(struct acpi_battery *battery)
645{ 645{
646 int power_unit;
647
646 if (!battery->bat.dev) 648 if (!battery->bat.dev)
647 return; 649 return;
648 650
651 power_unit = battery->power_unit;
652
649 acpi_battery_get_info(battery); 653 acpi_battery_get_info(battery);
650 /* The battery may have changed its reporting units. */ 654
655 if (power_unit == battery->power_unit)
656 return;
657
658 /* The battery has changed its reporting units. */
651 sysfs_remove_battery(battery); 659 sysfs_remove_battery(battery);
652 sysfs_add_battery(battery); 660 sysfs_add_battery(battery);
653} 661}
diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index 3188da3df8da..adceafda9c17 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -182,41 +182,66 @@ EXPORT_SYMBOL(acpi_bus_get_private_data);
182 Power Management 182 Power Management
183 -------------------------------------------------------------------------- */ 183 -------------------------------------------------------------------------- */
184 184
185static const char *state_string(int state)
186{
187 switch (state) {
188 case ACPI_STATE_D0:
189 return "D0";
190 case ACPI_STATE_D1:
191 return "D1";
192 case ACPI_STATE_D2:
193 return "D2";
194 case ACPI_STATE_D3_HOT:
195 return "D3hot";
196 case ACPI_STATE_D3_COLD:
197 return "D3";
198 default:
199 return "(unknown)";
200 }
201}
202
185static int __acpi_bus_get_power(struct acpi_device *device, int *state) 203static int __acpi_bus_get_power(struct acpi_device *device, int *state)
186{ 204{
187 int result = 0; 205 int result = ACPI_STATE_UNKNOWN;
188 acpi_status status = 0;
189 unsigned long long psc = 0;
190 206
191 if (!device || !state) 207 if (!device || !state)
192 return -EINVAL; 208 return -EINVAL;
193 209
194 *state = ACPI_STATE_UNKNOWN; 210 if (!device->flags.power_manageable) {
195
196 if (device->flags.power_manageable) {
197 /*
198 * Get the device's power state either directly (via _PSC) or
199 * indirectly (via power resources).
200 */
201 if (device->power.flags.power_resources) {
202 result = acpi_power_get_inferred_state(device, state);
203 if (result)
204 return result;
205 } else if (device->power.flags.explicit_get) {
206 status = acpi_evaluate_integer(device->handle, "_PSC",
207 NULL, &psc);
208 if (ACPI_FAILURE(status))
209 return -ENODEV;
210 *state = (int)psc;
211 }
212 } else {
213 /* TBD: Non-recursive algorithm for walking up hierarchy. */ 211 /* TBD: Non-recursive algorithm for walking up hierarchy. */
214 *state = device->parent ? 212 *state = device->parent ?
215 device->parent->power.state : ACPI_STATE_D0; 213 device->parent->power.state : ACPI_STATE_D0;
214 goto out;
215 }
216
217 /*
218 * Get the device's power state either directly (via _PSC) or
219 * indirectly (via power resources).
220 */
221 if (device->power.flags.explicit_get) {
222 unsigned long long psc;
223 acpi_status status = acpi_evaluate_integer(device->handle,
224 "_PSC", NULL, &psc);
225 if (ACPI_FAILURE(status))
226 return -ENODEV;
227
228 result = psc;
229 }
230 /* The test below covers ACPI_STATE_UNKNOWN too. */
231 if (result <= ACPI_STATE_D2) {
232 ; /* Do nothing. */
233 } else if (device->power.flags.power_resources) {
234 int error = acpi_power_get_inferred_state(device, &result);
235 if (error)
236 return error;
237 } else if (result == ACPI_STATE_D3_HOT) {
238 result = ACPI_STATE_D3;
216 } 239 }
240 *state = result;
217 241
218 ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Device [%s] power state is D%d\n", 242 out:
219 device->pnp.bus_id, *state)); 243 ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Device [%s] power state is %s\n",
244 device->pnp.bus_id, state_string(*state)));
220 245
221 return 0; 246 return 0;
222} 247}
@@ -234,13 +259,14 @@ static int __acpi_bus_set_power(struct acpi_device *device, int state)
234 /* Make sure this is a valid target state */ 259 /* Make sure this is a valid target state */
235 260
236 if (state == device->power.state) { 261 if (state == device->power.state) {
237 ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Device is already at D%d\n", 262 ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Device is already at %s\n",
238 state)); 263 state_string(state)));
239 return 0; 264 return 0;
240 } 265 }
241 266
242 if (!device->power.states[state].flags.valid) { 267 if (!device->power.states[state].flags.valid) {
243 printk(KERN_WARNING PREFIX "Device does not support D%d\n", state); 268 printk(KERN_WARNING PREFIX "Device does not support %s\n",
269 state_string(state));
244 return -ENODEV; 270 return -ENODEV;
245 } 271 }
246 if (device->parent && (state < device->parent->power.state)) { 272 if (device->parent && (state < device->parent->power.state)) {
@@ -294,13 +320,13 @@ static int __acpi_bus_set_power(struct acpi_device *device, int state)
294 end: 320 end:
295 if (result) 321 if (result)
296 printk(KERN_WARNING PREFIX 322 printk(KERN_WARNING PREFIX
297 "Device [%s] failed to transition to D%d\n", 323 "Device [%s] failed to transition to %s\n",
298 device->pnp.bus_id, state); 324 device->pnp.bus_id, state_string(state));
299 else { 325 else {
300 device->power.state = state; 326 device->power.state = state;
301 ACPI_DEBUG_PRINT((ACPI_DB_INFO, 327 ACPI_DEBUG_PRINT((ACPI_DB_INFO,
302 "Device [%s] transitioned to D%d\n", 328 "Device [%s] transitioned to %s\n",
303 device->pnp.bus_id, state)); 329 device->pnp.bus_id, state_string(state)));
304 } 330 }
305 331
306 return result; 332 return result;
diff --git a/drivers/acpi/power.c b/drivers/acpi/power.c
index 0500f719f63e..dd6d6a3c6780 100644
--- a/drivers/acpi/power.c
+++ b/drivers/acpi/power.c
@@ -631,7 +631,7 @@ int acpi_power_get_inferred_state(struct acpi_device *device, int *state)
631 * We know a device's inferred power state when all the resources 631 * We know a device's inferred power state when all the resources
632 * required for a given D-state are 'on'. 632 * required for a given D-state are 'on'.
633 */ 633 */
634 for (i = ACPI_STATE_D0; i < ACPI_STATE_D3_HOT; i++) { 634 for (i = ACPI_STATE_D0; i <= ACPI_STATE_D3_HOT; i++) {
635 list = &device->power.states[i].resources; 635 list = &device->power.states[i].resources;
636 if (list->count < 1) 636 if (list->count < 1)
637 continue; 637 continue;
diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c
index 0af48a8554cd..a093dc163a42 100644
--- a/drivers/acpi/processor_perflib.c
+++ b/drivers/acpi/processor_perflib.c
@@ -333,6 +333,7 @@ static int acpi_processor_get_performance_states(struct acpi_processor *pr)
333 struct acpi_buffer state = { 0, NULL }; 333 struct acpi_buffer state = { 0, NULL };
334 union acpi_object *pss = NULL; 334 union acpi_object *pss = NULL;
335 int i; 335 int i;
336 int last_invalid = -1;
336 337
337 338
338 status = acpi_evaluate_object(pr->handle, "_PSS", NULL, &buffer); 339 status = acpi_evaluate_object(pr->handle, "_PSS", NULL, &buffer);
@@ -394,14 +395,33 @@ static int acpi_processor_get_performance_states(struct acpi_processor *pr)
394 ((u32)(px->core_frequency * 1000) != 395 ((u32)(px->core_frequency * 1000) !=
395 (px->core_frequency * 1000))) { 396 (px->core_frequency * 1000))) {
396 printk(KERN_ERR FW_BUG PREFIX 397 printk(KERN_ERR FW_BUG PREFIX
397 "Invalid BIOS _PSS frequency: 0x%llx MHz\n", 398 "Invalid BIOS _PSS frequency found for processor %d: 0x%llx MHz\n",
398 px->core_frequency); 399 pr->id, px->core_frequency);
399 result = -EFAULT; 400 if (last_invalid == -1)
400 kfree(pr->performance->states); 401 last_invalid = i;
401 goto end; 402 } else {
403 if (last_invalid != -1) {
404 /*
405 * Copy this valid entry over last_invalid entry
406 */
407 memcpy(&(pr->performance->states[last_invalid]),
408 px, sizeof(struct acpi_processor_px));
409 ++last_invalid;
410 }
402 } 411 }
403 } 412 }
404 413
414 if (last_invalid == 0) {
415 printk(KERN_ERR FW_BUG PREFIX
416 "No valid BIOS _PSS frequency found for processor %d\n", pr->id);
417 result = -EFAULT;
418 kfree(pr->performance->states);
419 pr->performance->states = NULL;
420 }
421
422 if (last_invalid > 0)
423 pr->performance->state_count = last_invalid;
424
405 end: 425 end:
406 kfree(buffer.pointer); 426 kfree(buffer.pointer);
407 427
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 85cbfdccc97c..c8a1f3b68110 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -1567,6 +1567,7 @@ static int acpi_bus_scan_fixed(void)
1567 ACPI_BUS_TYPE_POWER_BUTTON, 1567 ACPI_BUS_TYPE_POWER_BUTTON,
1568 ACPI_STA_DEFAULT, 1568 ACPI_STA_DEFAULT,
1569 &ops); 1569 &ops);
1570 device_init_wakeup(&device->dev, true);
1570 } 1571 }
1571 1572
1572 if ((acpi_gbl_FADT.flags & ACPI_FADT_SLEEP_BUTTON) == 0) { 1573 if ((acpi_gbl_FADT.flags & ACPI_FADT_SLEEP_BUTTON) == 0) {
diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c
index 74ee4ab577b6..88561029cca8 100644
--- a/drivers/acpi/sleep.c
+++ b/drivers/acpi/sleep.c
@@ -57,6 +57,7 @@ MODULE_PARM_DESC(gts, "Enable evaluation of _GTS on suspend.");
57MODULE_PARM_DESC(bfs, "Enable evaluation of _BFS on resume".); 57MODULE_PARM_DESC(bfs, "Enable evaluation of _BFS on resume".);
58 58
59static u8 sleep_states[ACPI_S_STATE_COUNT]; 59static u8 sleep_states[ACPI_S_STATE_COUNT];
60static bool pwr_btn_event_pending;
60 61
61static void acpi_sleep_tts_switch(u32 acpi_state) 62static void acpi_sleep_tts_switch(u32 acpi_state)
62{ 63{
@@ -184,6 +185,14 @@ static int acpi_pm_prepare(void)
184 return error; 185 return error;
185} 186}
186 187
188static int find_powerf_dev(struct device *dev, void *data)
189{
190 struct acpi_device *device = to_acpi_device(dev);
191 const char *hid = acpi_device_hid(device);
192
193 return !strcmp(hid, ACPI_BUTTON_HID_POWERF);
194}
195
187/** 196/**
188 * acpi_pm_finish - Instruct the platform to leave a sleep state. 197 * acpi_pm_finish - Instruct the platform to leave a sleep state.
189 * 198 *
@@ -192,6 +201,7 @@ static int acpi_pm_prepare(void)
192 */ 201 */
193static void acpi_pm_finish(void) 202static void acpi_pm_finish(void)
194{ 203{
204 struct device *pwr_btn_dev;
195 u32 acpi_state = acpi_target_sleep_state; 205 u32 acpi_state = acpi_target_sleep_state;
196 206
197 acpi_ec_unblock_transactions(); 207 acpi_ec_unblock_transactions();
@@ -209,6 +219,23 @@ static void acpi_pm_finish(void)
209 acpi_set_firmware_waking_vector((acpi_physical_address) 0); 219 acpi_set_firmware_waking_vector((acpi_physical_address) 0);
210 220
211 acpi_target_sleep_state = ACPI_STATE_S0; 221 acpi_target_sleep_state = ACPI_STATE_S0;
222
223 /* If we were woken with the fixed power button, provide a small
224 * hint to userspace in the form of a wakeup event on the fixed power
225 * button device (if it can be found).
226 *
227 * We delay the event generation til now, as the PM layer requires
228 * timekeeping to be running before we generate events. */
229 if (!pwr_btn_event_pending)
230 return;
231
232 pwr_btn_event_pending = false;
233 pwr_btn_dev = bus_find_device(&acpi_bus_type, NULL, NULL,
234 find_powerf_dev);
235 if (pwr_btn_dev) {
236 pm_wakeup_event(pwr_btn_dev, 0);
237 put_device(pwr_btn_dev);
238 }
212} 239}
213 240
214/** 241/**
@@ -298,9 +325,23 @@ static int acpi_suspend_enter(suspend_state_t pm_state)
298 /* ACPI 3.0 specs (P62) says that it's the responsibility 325 /* ACPI 3.0 specs (P62) says that it's the responsibility
299 * of the OSPM to clear the status bit [ implying that the 326 * of the OSPM to clear the status bit [ implying that the
300 * POWER_BUTTON event should not reach userspace ] 327 * POWER_BUTTON event should not reach userspace ]
328 *
329 * However, we do generate a small hint for userspace in the form of
330 * a wakeup event. We flag this condition for now and generate the
331 * event later, as we're currently too early in resume to be able to
332 * generate wakeup events.
301 */ 333 */
302 if (ACPI_SUCCESS(status) && (acpi_state == ACPI_STATE_S3)) 334 if (ACPI_SUCCESS(status) && (acpi_state == ACPI_STATE_S3)) {
303 acpi_clear_event(ACPI_EVENT_POWER_BUTTON); 335 acpi_event_status pwr_btn_status;
336
337 acpi_get_event_status(ACPI_EVENT_POWER_BUTTON, &pwr_btn_status);
338
339 if (pwr_btn_status & ACPI_EVENT_FLAG_SET) {
340 acpi_clear_event(ACPI_EVENT_POWER_BUTTON);
341 /* Flag for later */
342 pwr_btn_event_pending = true;
343 }
344 }
304 345
305 /* 346 /*
306 * Disable and clear GPE status before interrupt is enabled. Some GPEs 347 * Disable and clear GPE status before interrupt is enabled. Some GPEs
@@ -730,8 +771,8 @@ int acpi_pm_device_sleep_state(struct device *dev, int *d_min_p)
730 * can wake the system. _S0W may be valid, too. 771 * can wake the system. _S0W may be valid, too.
731 */ 772 */
732 if (acpi_target_sleep_state == ACPI_STATE_S0 || 773 if (acpi_target_sleep_state == ACPI_STATE_S0 ||
733 (device_may_wakeup(dev) && 774 (device_may_wakeup(dev) && adev->wakeup.flags.valid &&
734 adev->wakeup.sleep_state <= acpi_target_sleep_state)) { 775 adev->wakeup.sleep_state >= acpi_target_sleep_state)) {
735 acpi_status status; 776 acpi_status status;
736 777
737 acpi_method[3] = 'W'; 778 acpi_method[3] = 'W';
diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c
index 9577b6fa2650..a576575617d7 100644
--- a/drivers/acpi/video.c
+++ b/drivers/acpi/video.c
@@ -1687,10 +1687,6 @@ static int acpi_video_bus_add(struct acpi_device *device)
1687 set_bit(KEY_BRIGHTNESS_ZERO, input->keybit); 1687 set_bit(KEY_BRIGHTNESS_ZERO, input->keybit);
1688 set_bit(KEY_DISPLAY_OFF, input->keybit); 1688 set_bit(KEY_DISPLAY_OFF, input->keybit);
1689 1689
1690 error = input_register_device(input);
1691 if (error)
1692 goto err_stop_video;
1693
1694 printk(KERN_INFO PREFIX "%s [%s] (multi-head: %s rom: %s post: %s)\n", 1690 printk(KERN_INFO PREFIX "%s [%s] (multi-head: %s rom: %s post: %s)\n",
1695 ACPI_VIDEO_DEVICE_NAME, acpi_device_bid(device), 1691 ACPI_VIDEO_DEVICE_NAME, acpi_device_bid(device),
1696 video->flags.multihead ? "yes" : "no", 1692 video->flags.multihead ? "yes" : "no",
@@ -1701,12 +1697,16 @@ static int acpi_video_bus_add(struct acpi_device *device)
1701 video->pm_nb.priority = 0; 1697 video->pm_nb.priority = 0;
1702 error = register_pm_notifier(&video->pm_nb); 1698 error = register_pm_notifier(&video->pm_nb);
1703 if (error) 1699 if (error)
1704 goto err_unregister_input_dev; 1700 goto err_stop_video;
1701
1702 error = input_register_device(input);
1703 if (error)
1704 goto err_unregister_pm_notifier;
1705 1705
1706 return 0; 1706 return 0;
1707 1707
1708 err_unregister_input_dev: 1708 err_unregister_pm_notifier:
1709 input_unregister_device(input); 1709 unregister_pm_notifier(&video->pm_nb);
1710 err_stop_video: 1710 err_stop_video:
1711 acpi_video_bus_stop_devices(video); 1711 acpi_video_bus_stop_devices(video);
1712 err_free_input_dev: 1712 err_free_input_dev:
@@ -1743,9 +1743,18 @@ static int acpi_video_bus_remove(struct acpi_device *device, int type)
1743 return 0; 1743 return 0;
1744} 1744}
1745 1745
1746static int __init is_i740(struct pci_dev *dev)
1747{
1748 if (dev->device == 0x00D1)
1749 return 1;
1750 if (dev->device == 0x7000)
1751 return 1;
1752 return 0;
1753}
1754
1746static int __init intel_opregion_present(void) 1755static int __init intel_opregion_present(void)
1747{ 1756{
1748#if defined(CONFIG_DRM_I915) || defined(CONFIG_DRM_I915_MODULE) 1757 int opregion = 0;
1749 struct pci_dev *dev = NULL; 1758 struct pci_dev *dev = NULL;
1750 u32 address; 1759 u32 address;
1751 1760
@@ -1754,13 +1763,15 @@ static int __init intel_opregion_present(void)
1754 continue; 1763 continue;
1755 if (dev->vendor != PCI_VENDOR_ID_INTEL) 1764 if (dev->vendor != PCI_VENDOR_ID_INTEL)
1756 continue; 1765 continue;
1766 /* We don't want to poke around undefined i740 registers */
1767 if (is_i740(dev))
1768 continue;
1757 pci_read_config_dword(dev, 0xfc, &address); 1769 pci_read_config_dword(dev, 0xfc, &address);
1758 if (!address) 1770 if (!address)
1759 continue; 1771 continue;
1760 return 1; 1772 opregion = 1;
1761 } 1773 }
1762#endif 1774 return opregion;
1763 return 0;
1764} 1775}
1765 1776
1766int acpi_video_register(void) 1777int acpi_video_register(void)
diff --git a/drivers/char/agp/intel-agp.c b/drivers/char/agp/intel-agp.c
index 764f70c5e690..0a4185279417 100644
--- a/drivers/char/agp/intel-agp.c
+++ b/drivers/char/agp/intel-agp.c
@@ -898,6 +898,7 @@ static struct pci_device_id agp_intel_pci_table[] = {
898 ID(PCI_DEVICE_ID_INTEL_B43_HB), 898 ID(PCI_DEVICE_ID_INTEL_B43_HB),
899 ID(PCI_DEVICE_ID_INTEL_B43_1_HB), 899 ID(PCI_DEVICE_ID_INTEL_B43_1_HB),
900 ID(PCI_DEVICE_ID_INTEL_IRONLAKE_D_HB), 900 ID(PCI_DEVICE_ID_INTEL_IRONLAKE_D_HB),
901 ID(PCI_DEVICE_ID_INTEL_IRONLAKE_D2_HB),
901 ID(PCI_DEVICE_ID_INTEL_IRONLAKE_M_HB), 902 ID(PCI_DEVICE_ID_INTEL_IRONLAKE_M_HB),
902 ID(PCI_DEVICE_ID_INTEL_IRONLAKE_MA_HB), 903 ID(PCI_DEVICE_ID_INTEL_IRONLAKE_MA_HB),
903 ID(PCI_DEVICE_ID_INTEL_IRONLAKE_MC2_HB), 904 ID(PCI_DEVICE_ID_INTEL_IRONLAKE_MC2_HB),
diff --git a/drivers/char/agp/intel-agp.h b/drivers/char/agp/intel-agp.h
index c0091753a0d1..8e2d9140f300 100644
--- a/drivers/char/agp/intel-agp.h
+++ b/drivers/char/agp/intel-agp.h
@@ -212,6 +212,7 @@
212#define PCI_DEVICE_ID_INTEL_G41_HB 0x2E30 212#define PCI_DEVICE_ID_INTEL_G41_HB 0x2E30
213#define PCI_DEVICE_ID_INTEL_G41_IG 0x2E32 213#define PCI_DEVICE_ID_INTEL_G41_IG 0x2E32
214#define PCI_DEVICE_ID_INTEL_IRONLAKE_D_HB 0x0040 214#define PCI_DEVICE_ID_INTEL_IRONLAKE_D_HB 0x0040
215#define PCI_DEVICE_ID_INTEL_IRONLAKE_D2_HB 0x0069
215#define PCI_DEVICE_ID_INTEL_IRONLAKE_D_IG 0x0042 216#define PCI_DEVICE_ID_INTEL_IRONLAKE_D_IG 0x0042
216#define PCI_DEVICE_ID_INTEL_IRONLAKE_M_HB 0x0044 217#define PCI_DEVICE_ID_INTEL_IRONLAKE_M_HB 0x0044
217#define PCI_DEVICE_ID_INTEL_IRONLAKE_MA_HB 0x0062 218#define PCI_DEVICE_ID_INTEL_IRONLAKE_MA_HB 0x0062
diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile
index 8d81a1d32653..dd3e661a124d 100644
--- a/drivers/clocksource/Makefile
+++ b/drivers/clocksource/Makefile
@@ -6,6 +6,7 @@ obj-$(CONFIG_CS5535_CLOCK_EVENT_SRC) += cs5535-clockevt.o
6obj-$(CONFIG_SH_TIMER_CMT) += sh_cmt.o 6obj-$(CONFIG_SH_TIMER_CMT) += sh_cmt.o
7obj-$(CONFIG_SH_TIMER_MTU2) += sh_mtu2.o 7obj-$(CONFIG_SH_TIMER_MTU2) += sh_mtu2.o
8obj-$(CONFIG_SH_TIMER_TMU) += sh_tmu.o 8obj-$(CONFIG_SH_TIMER_TMU) += sh_tmu.o
9obj-$(CONFIG_EM_TIMER_STI) += em_sti.o
9obj-$(CONFIG_CLKBLD_I8253) += i8253.o 10obj-$(CONFIG_CLKBLD_I8253) += i8253.o
10obj-$(CONFIG_CLKSRC_MMIO) += mmio.o 11obj-$(CONFIG_CLKSRC_MMIO) += mmio.o
11obj-$(CONFIG_DW_APB_TIMER) += dw_apb_timer.o 12obj-$(CONFIG_DW_APB_TIMER) += dw_apb_timer.o
diff --git a/drivers/clocksource/em_sti.c b/drivers/clocksource/em_sti.c
new file mode 100644
index 000000000000..372051d1bba8
--- /dev/null
+++ b/drivers/clocksource/em_sti.c
@@ -0,0 +1,406 @@
1/*
2 * Emma Mobile Timer Support - STI
3 *
4 * Copyright (C) 2012 Magnus Damm
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
19
20#include <linux/init.h>
21#include <linux/platform_device.h>
22#include <linux/spinlock.h>
23#include <linux/interrupt.h>
24#include <linux/ioport.h>
25#include <linux/io.h>
26#include <linux/clk.h>
27#include <linux/irq.h>
28#include <linux/err.h>
29#include <linux/delay.h>
30#include <linux/clocksource.h>
31#include <linux/clockchips.h>
32#include <linux/slab.h>
33#include <linux/module.h>
34
35enum { USER_CLOCKSOURCE, USER_CLOCKEVENT, USER_NR };
36
37struct em_sti_priv {
38 void __iomem *base;
39 struct clk *clk;
40 struct platform_device *pdev;
41 unsigned int active[USER_NR];
42 unsigned long rate;
43 raw_spinlock_t lock;
44 struct clock_event_device ced;
45 struct clocksource cs;
46};
47
48#define STI_CONTROL 0x00
49#define STI_COMPA_H 0x10
50#define STI_COMPA_L 0x14
51#define STI_COMPB_H 0x18
52#define STI_COMPB_L 0x1c
53#define STI_COUNT_H 0x20
54#define STI_COUNT_L 0x24
55#define STI_COUNT_RAW_H 0x28
56#define STI_COUNT_RAW_L 0x2c
57#define STI_SET_H 0x30
58#define STI_SET_L 0x34
59#define STI_INTSTATUS 0x40
60#define STI_INTRAWSTATUS 0x44
61#define STI_INTENSET 0x48
62#define STI_INTENCLR 0x4c
63#define STI_INTFFCLR 0x50
64
65static inline unsigned long em_sti_read(struct em_sti_priv *p, int offs)
66{
67 return ioread32(p->base + offs);
68}
69
70static inline void em_sti_write(struct em_sti_priv *p, int offs,
71 unsigned long value)
72{
73 iowrite32(value, p->base + offs);
74}
75
76static int em_sti_enable(struct em_sti_priv *p)
77{
78 int ret;
79
80 /* enable clock */
81 ret = clk_enable(p->clk);
82 if (ret) {
83 dev_err(&p->pdev->dev, "cannot enable clock\n");
84 return ret;
85 }
86
87 /* configure channel, periodic mode and maximum timeout */
88 p->rate = clk_get_rate(p->clk);
89
90 /* reset the counter */
91 em_sti_write(p, STI_SET_H, 0x40000000);
92 em_sti_write(p, STI_SET_L, 0x00000000);
93
94 /* mask and clear pending interrupts */
95 em_sti_write(p, STI_INTENCLR, 3);
96 em_sti_write(p, STI_INTFFCLR, 3);
97
98 /* enable updates of counter registers */
99 em_sti_write(p, STI_CONTROL, 1);
100
101 return 0;
102}
103
104static void em_sti_disable(struct em_sti_priv *p)
105{
106 /* mask interrupts */
107 em_sti_write(p, STI_INTENCLR, 3);
108
109 /* stop clock */
110 clk_disable(p->clk);
111}
112
113static cycle_t em_sti_count(struct em_sti_priv *p)
114{
115 cycle_t ticks;
116 unsigned long flags;
117
118 /* the STI hardware buffers the 48-bit count, but to
119 * break it out into two 32-bit access the registers
120 * must be accessed in a certain order.
121 * Always read STI_COUNT_H before STI_COUNT_L.
122 */
123 raw_spin_lock_irqsave(&p->lock, flags);
124 ticks = (cycle_t)(em_sti_read(p, STI_COUNT_H) & 0xffff) << 32;
125 ticks |= em_sti_read(p, STI_COUNT_L);
126 raw_spin_unlock_irqrestore(&p->lock, flags);
127
128 return ticks;
129}
130
131static cycle_t em_sti_set_next(struct em_sti_priv *p, cycle_t next)
132{
133 unsigned long flags;
134
135 raw_spin_lock_irqsave(&p->lock, flags);
136
137 /* mask compare A interrupt */
138 em_sti_write(p, STI_INTENCLR, 1);
139
140 /* update compare A value */
141 em_sti_write(p, STI_COMPA_H, next >> 32);
142 em_sti_write(p, STI_COMPA_L, next & 0xffffffff);
143
144 /* clear compare A interrupt source */
145 em_sti_write(p, STI_INTFFCLR, 1);
146
147 /* unmask compare A interrupt */
148 em_sti_write(p, STI_INTENSET, 1);
149
150 raw_spin_unlock_irqrestore(&p->lock, flags);
151
152 return next;
153}
154
155static irqreturn_t em_sti_interrupt(int irq, void *dev_id)
156{
157 struct em_sti_priv *p = dev_id;
158
159 p->ced.event_handler(&p->ced);
160 return IRQ_HANDLED;
161}
162
163static int em_sti_start(struct em_sti_priv *p, unsigned int user)
164{
165 unsigned long flags;
166 int used_before;
167 int ret = 0;
168
169 raw_spin_lock_irqsave(&p->lock, flags);
170 used_before = p->active[USER_CLOCKSOURCE] | p->active[USER_CLOCKEVENT];
171 if (!used_before)
172 ret = em_sti_enable(p);
173
174 if (!ret)
175 p->active[user] = 1;
176 raw_spin_unlock_irqrestore(&p->lock, flags);
177
178 return ret;
179}
180
181static void em_sti_stop(struct em_sti_priv *p, unsigned int user)
182{
183 unsigned long flags;
184 int used_before, used_after;
185
186 raw_spin_lock_irqsave(&p->lock, flags);
187 used_before = p->active[USER_CLOCKSOURCE] | p->active[USER_CLOCKEVENT];
188 p->active[user] = 0;
189 used_after = p->active[USER_CLOCKSOURCE] | p->active[USER_CLOCKEVENT];
190
191 if (used_before && !used_after)
192 em_sti_disable(p);
193 raw_spin_unlock_irqrestore(&p->lock, flags);
194}
195
196static struct em_sti_priv *cs_to_em_sti(struct clocksource *cs)
197{
198 return container_of(cs, struct em_sti_priv, cs);
199}
200
201static cycle_t em_sti_clocksource_read(struct clocksource *cs)
202{
203 return em_sti_count(cs_to_em_sti(cs));
204}
205
206static int em_sti_clocksource_enable(struct clocksource *cs)
207{
208 int ret;
209 struct em_sti_priv *p = cs_to_em_sti(cs);
210
211 ret = em_sti_start(p, USER_CLOCKSOURCE);
212 if (!ret)
213 __clocksource_updatefreq_hz(cs, p->rate);
214 return ret;
215}
216
217static void em_sti_clocksource_disable(struct clocksource *cs)
218{
219 em_sti_stop(cs_to_em_sti(cs), USER_CLOCKSOURCE);
220}
221
222static void em_sti_clocksource_resume(struct clocksource *cs)
223{
224 em_sti_clocksource_enable(cs);
225}
226
227static int em_sti_register_clocksource(struct em_sti_priv *p)
228{
229 struct clocksource *cs = &p->cs;
230
231 memset(cs, 0, sizeof(*cs));
232 cs->name = dev_name(&p->pdev->dev);
233 cs->rating = 200;
234 cs->read = em_sti_clocksource_read;
235 cs->enable = em_sti_clocksource_enable;
236 cs->disable = em_sti_clocksource_disable;
237 cs->suspend = em_sti_clocksource_disable;
238 cs->resume = em_sti_clocksource_resume;
239 cs->mask = CLOCKSOURCE_MASK(48);
240 cs->flags = CLOCK_SOURCE_IS_CONTINUOUS;
241
242 dev_info(&p->pdev->dev, "used as clock source\n");
243
244 /* Register with dummy 1 Hz value, gets updated in ->enable() */
245 clocksource_register_hz(cs, 1);
246 return 0;
247}
248
249static struct em_sti_priv *ced_to_em_sti(struct clock_event_device *ced)
250{
251 return container_of(ced, struct em_sti_priv, ced);
252}
253
254static void em_sti_clock_event_mode(enum clock_event_mode mode,
255 struct clock_event_device *ced)
256{
257 struct em_sti_priv *p = ced_to_em_sti(ced);
258
259 /* deal with old setting first */
260 switch (ced->mode) {
261 case CLOCK_EVT_MODE_ONESHOT:
262 em_sti_stop(p, USER_CLOCKEVENT);
263 break;
264 default:
265 break;
266 }
267
268 switch (mode) {
269 case CLOCK_EVT_MODE_ONESHOT:
270 dev_info(&p->pdev->dev, "used for oneshot clock events\n");
271 em_sti_start(p, USER_CLOCKEVENT);
272 clockevents_config(&p->ced, p->rate);
273 break;
274 case CLOCK_EVT_MODE_SHUTDOWN:
275 case CLOCK_EVT_MODE_UNUSED:
276 em_sti_stop(p, USER_CLOCKEVENT);
277 break;
278 default:
279 break;
280 }
281}
282
283static int em_sti_clock_event_next(unsigned long delta,
284 struct clock_event_device *ced)
285{
286 struct em_sti_priv *p = ced_to_em_sti(ced);
287 cycle_t next;
288 int safe;
289
290 next = em_sti_set_next(p, em_sti_count(p) + delta);
291 safe = em_sti_count(p) < (next - 1);
292
293 return !safe;
294}
295
296static void em_sti_register_clockevent(struct em_sti_priv *p)
297{
298 struct clock_event_device *ced = &p->ced;
299
300 memset(ced, 0, sizeof(*ced));
301 ced->name = dev_name(&p->pdev->dev);
302 ced->features = CLOCK_EVT_FEAT_ONESHOT;
303 ced->rating = 200;
304 ced->cpumask = cpumask_of(0);
305 ced->set_next_event = em_sti_clock_event_next;
306 ced->set_mode = em_sti_clock_event_mode;
307
308 dev_info(&p->pdev->dev, "used for clock events\n");
309
310 /* Register with dummy 1 Hz value, gets updated in ->set_mode() */
311 clockevents_config_and_register(ced, 1, 2, 0xffffffff);
312}
313
314static int __devinit em_sti_probe(struct platform_device *pdev)
315{
316 struct em_sti_priv *p;
317 struct resource *res;
318 int irq, ret;
319
320 p = kzalloc(sizeof(*p), GFP_KERNEL);
321 if (p == NULL) {
322 dev_err(&pdev->dev, "failed to allocate driver data\n");
323 ret = -ENOMEM;
324 goto err0;
325 }
326
327 p->pdev = pdev;
328 platform_set_drvdata(pdev, p);
329
330 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
331 if (!res) {
332 dev_err(&pdev->dev, "failed to get I/O memory\n");
333 ret = -EINVAL;
334 goto err0;
335 }
336
337 irq = platform_get_irq(pdev, 0);
338 if (irq < 0) {
339 dev_err(&pdev->dev, "failed to get irq\n");
340 ret = -EINVAL;
341 goto err0;
342 }
343
344 /* map memory, let base point to the STI instance */
345 p->base = ioremap_nocache(res->start, resource_size(res));
346 if (p->base == NULL) {
347 dev_err(&pdev->dev, "failed to remap I/O memory\n");
348 ret = -ENXIO;
349 goto err0;
350 }
351
352 /* get hold of clock */
353 p->clk = clk_get(&pdev->dev, "sclk");
354 if (IS_ERR(p->clk)) {
355 dev_err(&pdev->dev, "cannot get clock\n");
356 ret = PTR_ERR(p->clk);
357 goto err1;
358 }
359
360 if (request_irq(irq, em_sti_interrupt,
361 IRQF_TIMER | IRQF_IRQPOLL | IRQF_NOBALANCING,
362 dev_name(&pdev->dev), p)) {
363 dev_err(&pdev->dev, "failed to request low IRQ\n");
364 ret = -ENOENT;
365 goto err2;
366 }
367
368 raw_spin_lock_init(&p->lock);
369 em_sti_register_clockevent(p);
370 em_sti_register_clocksource(p);
371 return 0;
372
373err2:
374 clk_put(p->clk);
375err1:
376 iounmap(p->base);
377err0:
378 kfree(p);
379 return ret;
380}
381
382static int __devexit em_sti_remove(struct platform_device *pdev)
383{
384 return -EBUSY; /* cannot unregister clockevent and clocksource */
385}
386
387static const struct of_device_id em_sti_dt_ids[] __devinitconst = {
388 { .compatible = "renesas,em-sti", },
389 {},
390};
391MODULE_DEVICE_TABLE(of, em_sti_dt_ids);
392
393static struct platform_driver em_sti_device_driver = {
394 .probe = em_sti_probe,
395 .remove = __devexit_p(em_sti_remove),
396 .driver = {
397 .name = "em_sti",
398 .of_match_table = em_sti_dt_ids,
399 }
400};
401
402module_platform_driver(em_sti_device_driver);
403
404MODULE_AUTHOR("Magnus Damm");
405MODULE_DESCRIPTION("Renesas Emma Mobile STI Timer Driver");
406MODULE_LICENSE("GPL v2");
diff --git a/drivers/gpio/gpio-samsung.c b/drivers/gpio/gpio-samsung.c
index 7bb00448e13d..b6453d0e44ad 100644
--- a/drivers/gpio/gpio-samsung.c
+++ b/drivers/gpio/gpio-samsung.c
@@ -2833,7 +2833,7 @@ static __init void exynos5_gpiolib_init(void)
2833 } 2833 }
2834 2834
2835 /* need to set base address for gpc4 */ 2835 /* need to set base address for gpc4 */
2836 exonys5_gpios_1[11].base = gpio_base1 + 0x2E0; 2836 exynos5_gpios_1[11].base = gpio_base1 + 0x2E0;
2837 2837
2838 /* need to set base address for gpx */ 2838 /* need to set base address for gpx */
2839 chip = &exynos5_gpios_1[21]; 2839 chip = &exynos5_gpios_1[21];
diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c b/drivers/gpu/drm/exynos/exynos_drm_drv.c
index 420953197d0a..d6de2e07fa03 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_drv.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c
@@ -244,8 +244,8 @@ static const struct file_operations exynos_drm_driver_fops = {
244}; 244};
245 245
246static struct drm_driver exynos_drm_driver = { 246static struct drm_driver exynos_drm_driver = {
247 .driver_features = DRIVER_HAVE_IRQ | DRIVER_BUS_PLATFORM | 247 .driver_features = DRIVER_HAVE_IRQ | DRIVER_MODESET |
248 DRIVER_MODESET | DRIVER_GEM | DRIVER_PRIME, 248 DRIVER_GEM | DRIVER_PRIME,
249 .load = exynos_drm_load, 249 .load = exynos_drm_load,
250 .unload = exynos_drm_unload, 250 .unload = exynos_drm_unload,
251 .open = exynos_drm_open, 251 .open = exynos_drm_open,
diff --git a/drivers/gpu/drm/exynos/exynos_drm_encoder.c b/drivers/gpu/drm/exynos/exynos_drm_encoder.c
index 6e9ac7bd1dcf..23d5ad379f86 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_encoder.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_encoder.c
@@ -172,19 +172,12 @@ static void exynos_drm_encoder_commit(struct drm_encoder *encoder)
172 manager_ops->commit(manager->dev); 172 manager_ops->commit(manager->dev);
173} 173}
174 174
175static struct drm_crtc *
176exynos_drm_encoder_get_crtc(struct drm_encoder *encoder)
177{
178 return encoder->crtc;
179}
180
181static struct drm_encoder_helper_funcs exynos_encoder_helper_funcs = { 175static struct drm_encoder_helper_funcs exynos_encoder_helper_funcs = {
182 .dpms = exynos_drm_encoder_dpms, 176 .dpms = exynos_drm_encoder_dpms,
183 .mode_fixup = exynos_drm_encoder_mode_fixup, 177 .mode_fixup = exynos_drm_encoder_mode_fixup,
184 .mode_set = exynos_drm_encoder_mode_set, 178 .mode_set = exynos_drm_encoder_mode_set,
185 .prepare = exynos_drm_encoder_prepare, 179 .prepare = exynos_drm_encoder_prepare,
186 .commit = exynos_drm_encoder_commit, 180 .commit = exynos_drm_encoder_commit,
187 .get_crtc = exynos_drm_encoder_get_crtc,
188}; 181};
189 182
190static void exynos_drm_encoder_destroy(struct drm_encoder *encoder) 183static void exynos_drm_encoder_destroy(struct drm_encoder *encoder)
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fb.c b/drivers/gpu/drm/exynos/exynos_drm_fb.c
index f82a299553fb..4ccfe4328fab 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fb.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fb.c
@@ -51,11 +51,22 @@ struct exynos_drm_fb {
51static void exynos_drm_fb_destroy(struct drm_framebuffer *fb) 51static void exynos_drm_fb_destroy(struct drm_framebuffer *fb)
52{ 52{
53 struct exynos_drm_fb *exynos_fb = to_exynos_fb(fb); 53 struct exynos_drm_fb *exynos_fb = to_exynos_fb(fb);
54 unsigned int i;
54 55
55 DRM_DEBUG_KMS("%s\n", __FILE__); 56 DRM_DEBUG_KMS("%s\n", __FILE__);
56 57
57 drm_framebuffer_cleanup(fb); 58 drm_framebuffer_cleanup(fb);
58 59
60 for (i = 0; i < ARRAY_SIZE(exynos_fb->exynos_gem_obj); i++) {
61 struct drm_gem_object *obj;
62
63 if (exynos_fb->exynos_gem_obj[i] == NULL)
64 continue;
65
66 obj = &exynos_fb->exynos_gem_obj[i]->base;
67 drm_gem_object_unreference_unlocked(obj);
68 }
69
59 kfree(exynos_fb); 70 kfree(exynos_fb);
60 exynos_fb = NULL; 71 exynos_fb = NULL;
61} 72}
@@ -134,11 +145,11 @@ exynos_user_fb_create(struct drm_device *dev, struct drm_file *file_priv,
134 return ERR_PTR(-ENOENT); 145 return ERR_PTR(-ENOENT);
135 } 146 }
136 147
137 drm_gem_object_unreference_unlocked(obj);
138
139 fb = exynos_drm_framebuffer_init(dev, mode_cmd, obj); 148 fb = exynos_drm_framebuffer_init(dev, mode_cmd, obj);
140 if (IS_ERR(fb)) 149 if (IS_ERR(fb)) {
150 drm_gem_object_unreference_unlocked(obj);
141 return fb; 151 return fb;
152 }
142 153
143 exynos_fb = to_exynos_fb(fb); 154 exynos_fb = to_exynos_fb(fb);
144 nr = exynos_drm_format_num_buffers(fb->pixel_format); 155 nr = exynos_drm_format_num_buffers(fb->pixel_format);
@@ -152,8 +163,6 @@ exynos_user_fb_create(struct drm_device *dev, struct drm_file *file_priv,
152 return ERR_PTR(-ENOENT); 163 return ERR_PTR(-ENOENT);
153 } 164 }
154 165
155 drm_gem_object_unreference_unlocked(obj);
156
157 exynos_fb->exynos_gem_obj[i] = to_exynos_gem_obj(obj); 166 exynos_fb->exynos_gem_obj[i] = to_exynos_gem_obj(obj);
158 } 167 }
159 168
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fb.h b/drivers/gpu/drm/exynos/exynos_drm_fb.h
index 3ecb30d93552..50823756cdea 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fb.h
+++ b/drivers/gpu/drm/exynos/exynos_drm_fb.h
@@ -31,10 +31,10 @@
31static inline int exynos_drm_format_num_buffers(uint32_t format) 31static inline int exynos_drm_format_num_buffers(uint32_t format)
32{ 32{
33 switch (format) { 33 switch (format) {
34 case DRM_FORMAT_NV12M: 34 case DRM_FORMAT_NV12:
35 case DRM_FORMAT_NV12MT: 35 case DRM_FORMAT_NV12MT:
36 return 2; 36 return 2;
37 case DRM_FORMAT_YUV420M: 37 case DRM_FORMAT_YUV420:
38 return 3; 38 return 3;
39 default: 39 default:
40 return 1; 40 return 1;
diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.c b/drivers/gpu/drm/exynos/exynos_drm_gem.c
index fc91293c4560..5c8b683029ea 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_gem.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_gem.c
@@ -689,7 +689,6 @@ int exynos_drm_gem_dumb_map_offset(struct drm_file *file_priv,
689 struct drm_device *dev, uint32_t handle, 689 struct drm_device *dev, uint32_t handle,
690 uint64_t *offset) 690 uint64_t *offset)
691{ 691{
692 struct exynos_drm_gem_obj *exynos_gem_obj;
693 struct drm_gem_object *obj; 692 struct drm_gem_object *obj;
694 int ret = 0; 693 int ret = 0;
695 694
@@ -710,15 +709,13 @@ int exynos_drm_gem_dumb_map_offset(struct drm_file *file_priv,
710 goto unlock; 709 goto unlock;
711 } 710 }
712 711
713 exynos_gem_obj = to_exynos_gem_obj(obj); 712 if (!obj->map_list.map) {
714 713 ret = drm_gem_create_mmap_offset(obj);
715 if (!exynos_gem_obj->base.map_list.map) {
716 ret = drm_gem_create_mmap_offset(&exynos_gem_obj->base);
717 if (ret) 714 if (ret)
718 goto out; 715 goto out;
719 } 716 }
720 717
721 *offset = (u64)exynos_gem_obj->base.map_list.hash.key << PAGE_SHIFT; 718 *offset = (u64)obj->map_list.hash.key << PAGE_SHIFT;
722 DRM_DEBUG_KMS("offset = 0x%lx\n", (unsigned long)*offset); 719 DRM_DEBUG_KMS("offset = 0x%lx\n", (unsigned long)*offset);
723 720
724out: 721out:
diff --git a/drivers/gpu/drm/exynos/exynos_mixer.c b/drivers/gpu/drm/exynos/exynos_mixer.c
index 68ef01028375..e2147a2ddcec 100644
--- a/drivers/gpu/drm/exynos/exynos_mixer.c
+++ b/drivers/gpu/drm/exynos/exynos_mixer.c
@@ -365,7 +365,7 @@ static void vp_video_buffer(struct mixer_context *ctx, int win)
365 switch (win_data->pixel_format) { 365 switch (win_data->pixel_format) {
366 case DRM_FORMAT_NV12MT: 366 case DRM_FORMAT_NV12MT:
367 tiled_mode = true; 367 tiled_mode = true;
368 case DRM_FORMAT_NV12M: 368 case DRM_FORMAT_NV12:
369 crcb_mode = false; 369 crcb_mode = false;
370 buf_num = 2; 370 buf_num = 2;
371 break; 371 break;
@@ -601,18 +601,20 @@ static void mixer_win_reset(struct mixer_context *ctx)
601 mixer_reg_write(res, MXR_BG_COLOR2, 0x008080); 601 mixer_reg_write(res, MXR_BG_COLOR2, 0x008080);
602 602
603 /* setting graphical layers */ 603 /* setting graphical layers */
604
605 val = MXR_GRP_CFG_COLOR_KEY_DISABLE; /* no blank key */ 604 val = MXR_GRP_CFG_COLOR_KEY_DISABLE; /* no blank key */
606 val |= MXR_GRP_CFG_WIN_BLEND_EN; 605 val |= MXR_GRP_CFG_WIN_BLEND_EN;
606 val |= MXR_GRP_CFG_BLEND_PRE_MUL;
607 val |= MXR_GRP_CFG_PIXEL_BLEND_EN;
607 val |= MXR_GRP_CFG_ALPHA_VAL(0xff); /* non-transparent alpha */ 608 val |= MXR_GRP_CFG_ALPHA_VAL(0xff); /* non-transparent alpha */
608 609
609 /* the same configuration for both layers */ 610 /* the same configuration for both layers */
610 mixer_reg_write(res, MXR_GRAPHIC_CFG(0), val); 611 mixer_reg_write(res, MXR_GRAPHIC_CFG(0), val);
611
612 val |= MXR_GRP_CFG_BLEND_PRE_MUL;
613 val |= MXR_GRP_CFG_PIXEL_BLEND_EN;
614 mixer_reg_write(res, MXR_GRAPHIC_CFG(1), val); 612 mixer_reg_write(res, MXR_GRAPHIC_CFG(1), val);
615 613
614 /* setting video layers */
615 val = MXR_GRP_CFG_ALPHA_VAL(0);
616 mixer_reg_write(res, MXR_VIDEO_CFG, val);
617
616 /* configuration of Video Processor Registers */ 618 /* configuration of Video Processor Registers */
617 vp_win_reset(ctx); 619 vp_win_reset(ctx);
618 vp_default_filter(res); 620 vp_default_filter(res);
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 238a52165833..9fe9ebe52a7a 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -233,6 +233,7 @@ static const struct intel_device_info intel_sandybridge_d_info = {
233 .has_blt_ring = 1, 233 .has_blt_ring = 1,
234 .has_llc = 1, 234 .has_llc = 1,
235 .has_pch_split = 1, 235 .has_pch_split = 1,
236 .has_force_wake = 1,
236}; 237};
237 238
238static const struct intel_device_info intel_sandybridge_m_info = { 239static const struct intel_device_info intel_sandybridge_m_info = {
@@ -243,6 +244,7 @@ static const struct intel_device_info intel_sandybridge_m_info = {
243 .has_blt_ring = 1, 244 .has_blt_ring = 1,
244 .has_llc = 1, 245 .has_llc = 1,
245 .has_pch_split = 1, 246 .has_pch_split = 1,
247 .has_force_wake = 1,
246}; 248};
247 249
248static const struct intel_device_info intel_ivybridge_d_info = { 250static const struct intel_device_info intel_ivybridge_d_info = {
@@ -252,6 +254,7 @@ static const struct intel_device_info intel_ivybridge_d_info = {
252 .has_blt_ring = 1, 254 .has_blt_ring = 1,
253 .has_llc = 1, 255 .has_llc = 1,
254 .has_pch_split = 1, 256 .has_pch_split = 1,
257 .has_force_wake = 1,
255}; 258};
256 259
257static const struct intel_device_info intel_ivybridge_m_info = { 260static const struct intel_device_info intel_ivybridge_m_info = {
@@ -262,6 +265,7 @@ static const struct intel_device_info intel_ivybridge_m_info = {
262 .has_blt_ring = 1, 265 .has_blt_ring = 1,
263 .has_llc = 1, 266 .has_llc = 1,
264 .has_pch_split = 1, 267 .has_pch_split = 1,
268 .has_force_wake = 1,
265}; 269};
266 270
267static const struct intel_device_info intel_valleyview_m_info = { 271static const struct intel_device_info intel_valleyview_m_info = {
@@ -289,6 +293,7 @@ static const struct intel_device_info intel_haswell_d_info = {
289 .has_blt_ring = 1, 293 .has_blt_ring = 1,
290 .has_llc = 1, 294 .has_llc = 1,
291 .has_pch_split = 1, 295 .has_pch_split = 1,
296 .has_force_wake = 1,
292}; 297};
293 298
294static const struct intel_device_info intel_haswell_m_info = { 299static const struct intel_device_info intel_haswell_m_info = {
@@ -298,6 +303,7 @@ static const struct intel_device_info intel_haswell_m_info = {
298 .has_blt_ring = 1, 303 .has_blt_ring = 1,
299 .has_llc = 1, 304 .has_llc = 1,
300 .has_pch_split = 1, 305 .has_pch_split = 1,
306 .has_force_wake = 1,
301}; 307};
302 308
303static const struct pci_device_id pciidlist[] = { /* aka */ 309static const struct pci_device_id pciidlist[] = { /* aka */
@@ -1139,10 +1145,9 @@ MODULE_LICENSE("GPL and additional rights");
1139 1145
1140/* We give fast paths for the really cool registers */ 1146/* We give fast paths for the really cool registers */
1141#define NEEDS_FORCE_WAKE(dev_priv, reg) \ 1147#define NEEDS_FORCE_WAKE(dev_priv, reg) \
1142 (((dev_priv)->info->gen >= 6) && \ 1148 ((HAS_FORCE_WAKE((dev_priv)->dev)) && \
1143 ((reg) < 0x40000) && \ 1149 ((reg) < 0x40000) && \
1144 ((reg) != FORCEWAKE)) && \ 1150 ((reg) != FORCEWAKE))
1145 (!IS_VALLEYVIEW((dev_priv)->dev))
1146 1151
1147#define __i915_read(x, y) \ 1152#define __i915_read(x, y) \
1148u##x i915_read##x(struct drm_i915_private *dev_priv, u32 reg) { \ 1153u##x i915_read##x(struct drm_i915_private *dev_priv, u32 reg) { \
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index c9cfc67c2cf5..b0b676abde0d 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -285,6 +285,7 @@ struct intel_device_info {
285 u8 is_ivybridge:1; 285 u8 is_ivybridge:1;
286 u8 is_valleyview:1; 286 u8 is_valleyview:1;
287 u8 has_pch_split:1; 287 u8 has_pch_split:1;
288 u8 has_force_wake:1;
288 u8 is_haswell:1; 289 u8 is_haswell:1;
289 u8 has_fbc:1; 290 u8 has_fbc:1;
290 u8 has_pipe_cxsr:1; 291 u8 has_pipe_cxsr:1;
@@ -1101,6 +1102,8 @@ struct drm_i915_file_private {
1101#define HAS_PCH_CPT(dev) (INTEL_PCH_TYPE(dev) == PCH_CPT) 1102#define HAS_PCH_CPT(dev) (INTEL_PCH_TYPE(dev) == PCH_CPT)
1102#define HAS_PCH_IBX(dev) (INTEL_PCH_TYPE(dev) == PCH_IBX) 1103#define HAS_PCH_IBX(dev) (INTEL_PCH_TYPE(dev) == PCH_IBX)
1103 1104
1105#define HAS_FORCE_WAKE(dev) (INTEL_INFO(dev)->has_force_wake)
1106
1104#include "i915_trace.h" 1107#include "i915_trace.h"
1105 1108
1106/** 1109/**
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 1417660a93ec..b1fe0edda955 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -510,7 +510,7 @@ out:
510 return ret; 510 return ret;
511} 511}
512 512
513static void pch_irq_handler(struct drm_device *dev, u32 pch_iir) 513static void ibx_irq_handler(struct drm_device *dev, u32 pch_iir)
514{ 514{
515 drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private; 515 drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
516 int pipe; 516 int pipe;
@@ -550,6 +550,35 @@ static void pch_irq_handler(struct drm_device *dev, u32 pch_iir)
550 DRM_DEBUG_DRIVER("PCH transcoder A underrun interrupt\n"); 550 DRM_DEBUG_DRIVER("PCH transcoder A underrun interrupt\n");
551} 551}
552 552
553static void cpt_irq_handler(struct drm_device *dev, u32 pch_iir)
554{
555 drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
556 int pipe;
557
558 if (pch_iir & SDE_AUDIO_POWER_MASK_CPT)
559 DRM_DEBUG_DRIVER("PCH audio power change on port %d\n",
560 (pch_iir & SDE_AUDIO_POWER_MASK_CPT) >>
561 SDE_AUDIO_POWER_SHIFT_CPT);
562
563 if (pch_iir & SDE_AUX_MASK_CPT)
564 DRM_DEBUG_DRIVER("AUX channel interrupt\n");
565
566 if (pch_iir & SDE_GMBUS_CPT)
567 DRM_DEBUG_DRIVER("PCH GMBUS interrupt\n");
568
569 if (pch_iir & SDE_AUDIO_CP_REQ_CPT)
570 DRM_DEBUG_DRIVER("Audio CP request interrupt\n");
571
572 if (pch_iir & SDE_AUDIO_CP_CHG_CPT)
573 DRM_DEBUG_DRIVER("Audio CP change interrupt\n");
574
575 if (pch_iir & SDE_FDI_MASK_CPT)
576 for_each_pipe(pipe)
577 DRM_DEBUG_DRIVER(" pipe %c FDI IIR: 0x%08x\n",
578 pipe_name(pipe),
579 I915_READ(FDI_RX_IIR(pipe)));
580}
581
553static irqreturn_t ivybridge_irq_handler(DRM_IRQ_ARGS) 582static irqreturn_t ivybridge_irq_handler(DRM_IRQ_ARGS)
554{ 583{
555 struct drm_device *dev = (struct drm_device *) arg; 584 struct drm_device *dev = (struct drm_device *) arg;
@@ -591,7 +620,7 @@ static irqreturn_t ivybridge_irq_handler(DRM_IRQ_ARGS)
591 620
592 if (pch_iir & SDE_HOTPLUG_MASK_CPT) 621 if (pch_iir & SDE_HOTPLUG_MASK_CPT)
593 queue_work(dev_priv->wq, &dev_priv->hotplug_work); 622 queue_work(dev_priv->wq, &dev_priv->hotplug_work);
594 pch_irq_handler(dev, pch_iir); 623 cpt_irq_handler(dev, pch_iir);
595 624
596 /* clear PCH hotplug event before clear CPU irq */ 625 /* clear PCH hotplug event before clear CPU irq */
597 I915_WRITE(SDEIIR, pch_iir); 626 I915_WRITE(SDEIIR, pch_iir);
@@ -684,7 +713,10 @@ static irqreturn_t ironlake_irq_handler(DRM_IRQ_ARGS)
684 if (de_iir & DE_PCH_EVENT) { 713 if (de_iir & DE_PCH_EVENT) {
685 if (pch_iir & hotplug_mask) 714 if (pch_iir & hotplug_mask)
686 queue_work(dev_priv->wq, &dev_priv->hotplug_work); 715 queue_work(dev_priv->wq, &dev_priv->hotplug_work);
687 pch_irq_handler(dev, pch_iir); 716 if (HAS_PCH_CPT(dev))
717 cpt_irq_handler(dev, pch_iir);
718 else
719 ibx_irq_handler(dev, pch_iir);
688 } 720 }
689 721
690 if (de_iir & DE_PCU_EVENT) { 722 if (de_iir & DE_PCU_EVENT) {
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 2d49b9507ed0..48d5e8e051cf 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -210,6 +210,14 @@
210#define MI_DISPLAY_FLIP MI_INSTR(0x14, 2) 210#define MI_DISPLAY_FLIP MI_INSTR(0x14, 2)
211#define MI_DISPLAY_FLIP_I915 MI_INSTR(0x14, 1) 211#define MI_DISPLAY_FLIP_I915 MI_INSTR(0x14, 1)
212#define MI_DISPLAY_FLIP_PLANE(n) ((n) << 20) 212#define MI_DISPLAY_FLIP_PLANE(n) ((n) << 20)
213/* IVB has funny definitions for which plane to flip. */
214#define MI_DISPLAY_FLIP_IVB_PLANE_A (0 << 19)
215#define MI_DISPLAY_FLIP_IVB_PLANE_B (1 << 19)
216#define MI_DISPLAY_FLIP_IVB_SPRITE_A (2 << 19)
217#define MI_DISPLAY_FLIP_IVB_SPRITE_B (3 << 19)
218#define MI_DISPLAY_FLIP_IVB_PLANE_C (4 << 19)
219#define MI_DISPLAY_FLIP_IVB_SPRITE_C (5 << 19)
220
213#define MI_SET_CONTEXT MI_INSTR(0x18, 0) 221#define MI_SET_CONTEXT MI_INSTR(0x18, 0)
214#define MI_MM_SPACE_GTT (1<<8) 222#define MI_MM_SPACE_GTT (1<<8)
215#define MI_MM_SPACE_PHYSICAL (0<<8) 223#define MI_MM_SPACE_PHYSICAL (0<<8)
@@ -3313,7 +3321,7 @@
3313 3321
3314/* PCH */ 3322/* PCH */
3315 3323
3316/* south display engine interrupt */ 3324/* south display engine interrupt: IBX */
3317#define SDE_AUDIO_POWER_D (1 << 27) 3325#define SDE_AUDIO_POWER_D (1 << 27)
3318#define SDE_AUDIO_POWER_C (1 << 26) 3326#define SDE_AUDIO_POWER_C (1 << 26)
3319#define SDE_AUDIO_POWER_B (1 << 25) 3327#define SDE_AUDIO_POWER_B (1 << 25)
@@ -3349,15 +3357,44 @@
3349#define SDE_TRANSA_CRC_ERR (1 << 1) 3357#define SDE_TRANSA_CRC_ERR (1 << 1)
3350#define SDE_TRANSA_FIFO_UNDER (1 << 0) 3358#define SDE_TRANSA_FIFO_UNDER (1 << 0)
3351#define SDE_TRANS_MASK (0x3f) 3359#define SDE_TRANS_MASK (0x3f)
3352/* CPT */ 3360
3353#define SDE_CRT_HOTPLUG_CPT (1 << 19) 3361/* south display engine interrupt: CPT/PPT */
3362#define SDE_AUDIO_POWER_D_CPT (1 << 31)
3363#define SDE_AUDIO_POWER_C_CPT (1 << 30)
3364#define SDE_AUDIO_POWER_B_CPT (1 << 29)
3365#define SDE_AUDIO_POWER_SHIFT_CPT 29
3366#define SDE_AUDIO_POWER_MASK_CPT (7 << 29)
3367#define SDE_AUXD_CPT (1 << 27)
3368#define SDE_AUXC_CPT (1 << 26)
3369#define SDE_AUXB_CPT (1 << 25)
3370#define SDE_AUX_MASK_CPT (7 << 25)
3354#define SDE_PORTD_HOTPLUG_CPT (1 << 23) 3371#define SDE_PORTD_HOTPLUG_CPT (1 << 23)
3355#define SDE_PORTC_HOTPLUG_CPT (1 << 22) 3372#define SDE_PORTC_HOTPLUG_CPT (1 << 22)
3356#define SDE_PORTB_HOTPLUG_CPT (1 << 21) 3373#define SDE_PORTB_HOTPLUG_CPT (1 << 21)
3374#define SDE_CRT_HOTPLUG_CPT (1 << 19)
3357#define SDE_HOTPLUG_MASK_CPT (SDE_CRT_HOTPLUG_CPT | \ 3375#define SDE_HOTPLUG_MASK_CPT (SDE_CRT_HOTPLUG_CPT | \
3358 SDE_PORTD_HOTPLUG_CPT | \ 3376 SDE_PORTD_HOTPLUG_CPT | \
3359 SDE_PORTC_HOTPLUG_CPT | \ 3377 SDE_PORTC_HOTPLUG_CPT | \
3360 SDE_PORTB_HOTPLUG_CPT) 3378 SDE_PORTB_HOTPLUG_CPT)
3379#define SDE_GMBUS_CPT (1 << 17)
3380#define SDE_AUDIO_CP_REQ_C_CPT (1 << 10)
3381#define SDE_AUDIO_CP_CHG_C_CPT (1 << 9)
3382#define SDE_FDI_RXC_CPT (1 << 8)
3383#define SDE_AUDIO_CP_REQ_B_CPT (1 << 6)
3384#define SDE_AUDIO_CP_CHG_B_CPT (1 << 5)
3385#define SDE_FDI_RXB_CPT (1 << 4)
3386#define SDE_AUDIO_CP_REQ_A_CPT (1 << 2)
3387#define SDE_AUDIO_CP_CHG_A_CPT (1 << 1)
3388#define SDE_FDI_RXA_CPT (1 << 0)
3389#define SDE_AUDIO_CP_REQ_CPT (SDE_AUDIO_CP_REQ_C_CPT | \
3390 SDE_AUDIO_CP_REQ_B_CPT | \
3391 SDE_AUDIO_CP_REQ_A_CPT)
3392#define SDE_AUDIO_CP_CHG_CPT (SDE_AUDIO_CP_CHG_C_CPT | \
3393 SDE_AUDIO_CP_CHG_B_CPT | \
3394 SDE_AUDIO_CP_CHG_A_CPT)
3395#define SDE_FDI_MASK_CPT (SDE_FDI_RXC_CPT | \
3396 SDE_FDI_RXB_CPT | \
3397 SDE_FDI_RXA_CPT)
3361 3398
3362#define SDEISR 0xc4000 3399#define SDEISR 0xc4000
3363#define SDEIMR 0xc4004 3400#define SDEIMR 0xc4004
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 914789420906..e0aa064def31 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -6158,17 +6158,34 @@ static int intel_gen7_queue_flip(struct drm_device *dev,
6158 struct drm_i915_private *dev_priv = dev->dev_private; 6158 struct drm_i915_private *dev_priv = dev->dev_private;
6159 struct intel_crtc *intel_crtc = to_intel_crtc(crtc); 6159 struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
6160 struct intel_ring_buffer *ring = &dev_priv->ring[BCS]; 6160 struct intel_ring_buffer *ring = &dev_priv->ring[BCS];
6161 uint32_t plane_bit = 0;
6161 int ret; 6162 int ret;
6162 6163
6163 ret = intel_pin_and_fence_fb_obj(dev, obj, ring); 6164 ret = intel_pin_and_fence_fb_obj(dev, obj, ring);
6164 if (ret) 6165 if (ret)
6165 goto err; 6166 goto err;
6166 6167
6168 switch(intel_crtc->plane) {
6169 case PLANE_A:
6170 plane_bit = MI_DISPLAY_FLIP_IVB_PLANE_A;
6171 break;
6172 case PLANE_B:
6173 plane_bit = MI_DISPLAY_FLIP_IVB_PLANE_B;
6174 break;
6175 case PLANE_C:
6176 plane_bit = MI_DISPLAY_FLIP_IVB_PLANE_C;
6177 break;
6178 default:
6179 WARN_ONCE(1, "unknown plane in flip command\n");
6180 ret = -ENODEV;
6181 goto err;
6182 }
6183
6167 ret = intel_ring_begin(ring, 4); 6184 ret = intel_ring_begin(ring, 4);
6168 if (ret) 6185 if (ret)
6169 goto err_unpin; 6186 goto err_unpin;
6170 6187
6171 intel_ring_emit(ring, MI_DISPLAY_FLIP_I915 | (intel_crtc->plane << 19)); 6188 intel_ring_emit(ring, MI_DISPLAY_FLIP_I915 | plane_bit);
6172 intel_ring_emit(ring, (fb->pitches[0] | obj->tiling_mode)); 6189 intel_ring_emit(ring, (fb->pitches[0] | obj->tiling_mode));
6173 intel_ring_emit(ring, (obj->gtt_offset)); 6190 intel_ring_emit(ring, (obj->gtt_offset));
6174 intel_ring_emit(ring, (MI_NOOP)); 6191 intel_ring_emit(ring, (MI_NOOP));
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index b59b6d5b7583..e5b84ff89ca5 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -266,10 +266,15 @@ u32 intel_ring_get_active_head(struct intel_ring_buffer *ring)
266 266
267static int init_ring_common(struct intel_ring_buffer *ring) 267static int init_ring_common(struct intel_ring_buffer *ring)
268{ 268{
269 drm_i915_private_t *dev_priv = ring->dev->dev_private; 269 struct drm_device *dev = ring->dev;
270 drm_i915_private_t *dev_priv = dev->dev_private;
270 struct drm_i915_gem_object *obj = ring->obj; 271 struct drm_i915_gem_object *obj = ring->obj;
272 int ret = 0;
271 u32 head; 273 u32 head;
272 274
275 if (HAS_FORCE_WAKE(dev))
276 gen6_gt_force_wake_get(dev_priv);
277
273 /* Stop the ring if it's running. */ 278 /* Stop the ring if it's running. */
274 I915_WRITE_CTL(ring, 0); 279 I915_WRITE_CTL(ring, 0);
275 I915_WRITE_HEAD(ring, 0); 280 I915_WRITE_HEAD(ring, 0);
@@ -317,7 +322,8 @@ static int init_ring_common(struct intel_ring_buffer *ring)
317 I915_READ_HEAD(ring), 322 I915_READ_HEAD(ring),
318 I915_READ_TAIL(ring), 323 I915_READ_TAIL(ring),
319 I915_READ_START(ring)); 324 I915_READ_START(ring));
320 return -EIO; 325 ret = -EIO;
326 goto out;
321 } 327 }
322 328
323 if (!drm_core_check_feature(ring->dev, DRIVER_MODESET)) 329 if (!drm_core_check_feature(ring->dev, DRIVER_MODESET))
@@ -326,9 +332,14 @@ static int init_ring_common(struct intel_ring_buffer *ring)
326 ring->head = I915_READ_HEAD(ring); 332 ring->head = I915_READ_HEAD(ring);
327 ring->tail = I915_READ_TAIL(ring) & TAIL_ADDR; 333 ring->tail = I915_READ_TAIL(ring) & TAIL_ADDR;
328 ring->space = ring_space(ring); 334 ring->space = ring_space(ring);
335 ring->last_retired_head = -1;
329 } 336 }
330 337
331 return 0; 338out:
339 if (HAS_FORCE_WAKE(dev))
340 gen6_gt_force_wake_put(dev_priv);
341
342 return ret;
332} 343}
333 344
334static int 345static int
@@ -987,6 +998,10 @@ static int intel_init_ring_buffer(struct drm_device *dev,
987 if (ret) 998 if (ret)
988 goto err_unref; 999 goto err_unref;
989 1000
1001 ret = i915_gem_object_set_to_gtt_domain(obj, true);
1002 if (ret)
1003 goto err_unpin;
1004
990 ring->virtual_start = ioremap_wc(dev->agp->base + obj->gtt_offset, 1005 ring->virtual_start = ioremap_wc(dev->agp->base + obj->gtt_offset,
991 ring->size); 1006 ring->size);
992 if (ring->virtual_start == NULL) { 1007 if (ring->virtual_start == NULL) {
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
index 3df4efa11942..3186522a4458 100644
--- a/drivers/gpu/drm/radeon/ni.c
+++ b/drivers/gpu/drm/radeon/ni.c
@@ -460,15 +460,28 @@ static void cayman_gpu_init(struct radeon_device *rdev)
460 rdev->config.cayman.max_pipes_per_simd = 4; 460 rdev->config.cayman.max_pipes_per_simd = 4;
461 rdev->config.cayman.max_tile_pipes = 2; 461 rdev->config.cayman.max_tile_pipes = 2;
462 if ((rdev->pdev->device == 0x9900) || 462 if ((rdev->pdev->device == 0x9900) ||
463 (rdev->pdev->device == 0x9901)) { 463 (rdev->pdev->device == 0x9901) ||
464 (rdev->pdev->device == 0x9905) ||
465 (rdev->pdev->device == 0x9906) ||
466 (rdev->pdev->device == 0x9907) ||
467 (rdev->pdev->device == 0x9908) ||
468 (rdev->pdev->device == 0x9909) ||
469 (rdev->pdev->device == 0x9910) ||
470 (rdev->pdev->device == 0x9917)) {
464 rdev->config.cayman.max_simds_per_se = 6; 471 rdev->config.cayman.max_simds_per_se = 6;
465 rdev->config.cayman.max_backends_per_se = 2; 472 rdev->config.cayman.max_backends_per_se = 2;
466 } else if ((rdev->pdev->device == 0x9903) || 473 } else if ((rdev->pdev->device == 0x9903) ||
467 (rdev->pdev->device == 0x9904)) { 474 (rdev->pdev->device == 0x9904) ||
475 (rdev->pdev->device == 0x990A) ||
476 (rdev->pdev->device == 0x9913) ||
477 (rdev->pdev->device == 0x9918)) {
468 rdev->config.cayman.max_simds_per_se = 4; 478 rdev->config.cayman.max_simds_per_se = 4;
469 rdev->config.cayman.max_backends_per_se = 2; 479 rdev->config.cayman.max_backends_per_se = 2;
470 } else if ((rdev->pdev->device == 0x9990) || 480 } else if ((rdev->pdev->device == 0x9919) ||
471 (rdev->pdev->device == 0x9991)) { 481 (rdev->pdev->device == 0x9990) ||
482 (rdev->pdev->device == 0x9991) ||
483 (rdev->pdev->device == 0x9994) ||
484 (rdev->pdev->device == 0x99A0)) {
472 rdev->config.cayman.max_simds_per_se = 3; 485 rdev->config.cayman.max_simds_per_se = 3;
473 rdev->config.cayman.max_backends_per_se = 1; 486 rdev->config.cayman.max_backends_per_se = 1;
474 } else { 487 } else {
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index 45cfcea63507..f30dc95f83b1 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -2426,6 +2426,12 @@ int r600_startup(struct radeon_device *rdev)
2426 if (r) 2426 if (r)
2427 return r; 2427 return r;
2428 2428
2429 r = r600_audio_init(rdev);
2430 if (r) {
2431 DRM_ERROR("radeon: audio init failed\n");
2432 return r;
2433 }
2434
2429 return 0; 2435 return 0;
2430} 2436}
2431 2437
@@ -2462,12 +2468,6 @@ int r600_resume(struct radeon_device *rdev)
2462 return r; 2468 return r;
2463 } 2469 }
2464 2470
2465 r = r600_audio_init(rdev);
2466 if (r) {
2467 DRM_ERROR("radeon: audio resume failed\n");
2468 return r;
2469 }
2470
2471 return r; 2471 return r;
2472} 2472}
2473 2473
@@ -2577,9 +2577,6 @@ int r600_init(struct radeon_device *rdev)
2577 rdev->accel_working = false; 2577 rdev->accel_working = false;
2578 } 2578 }
2579 2579
2580 r = r600_audio_init(rdev);
2581 if (r)
2582 return r; /* TODO error handling */
2583 return 0; 2580 return 0;
2584} 2581}
2585 2582
diff --git a/drivers/gpu/drm/radeon/r600_audio.c b/drivers/gpu/drm/radeon/r600_audio.c
index 7c4fa77f018f..7479a5c503e4 100644
--- a/drivers/gpu/drm/radeon/r600_audio.c
+++ b/drivers/gpu/drm/radeon/r600_audio.c
@@ -192,6 +192,7 @@ void r600_audio_set_clock(struct drm_encoder *encoder, int clock)
192 struct radeon_device *rdev = dev->dev_private; 192 struct radeon_device *rdev = dev->dev_private;
193 struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); 193 struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
194 struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv; 194 struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv;
195 struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
195 int base_rate = 48000; 196 int base_rate = 48000;
196 197
197 switch (radeon_encoder->encoder_id) { 198 switch (radeon_encoder->encoder_id) {
@@ -217,8 +218,8 @@ void r600_audio_set_clock(struct drm_encoder *encoder, int clock)
217 WREG32(EVERGREEN_AUDIO_PLL1_DIV, clock * 10); 218 WREG32(EVERGREEN_AUDIO_PLL1_DIV, clock * 10);
218 WREG32(EVERGREEN_AUDIO_PLL1_UNK, 0x00000071); 219 WREG32(EVERGREEN_AUDIO_PLL1_UNK, 0x00000071);
219 220
220 /* Some magic trigger or src sel? */ 221 /* Select DTO source */
221 WREG32_P(0x5ac, 0x01, ~0x77); 222 WREG32(0x5ac, radeon_crtc->crtc_id);
222 } else { 223 } else {
223 switch (dig->dig_encoder) { 224 switch (dig->dig_encoder) {
224 case 0: 225 case 0:
diff --git a/drivers/gpu/drm/radeon/r600_hdmi.c b/drivers/gpu/drm/radeon/r600_hdmi.c
index 226379e00ac1..969c27529dfe 100644
--- a/drivers/gpu/drm/radeon/r600_hdmi.c
+++ b/drivers/gpu/drm/radeon/r600_hdmi.c
@@ -348,7 +348,6 @@ void r600_hdmi_setmode(struct drm_encoder *encoder, struct drm_display_mode *mod
348 WREG32(HDMI0_AUDIO_PACKET_CONTROL + offset, 348 WREG32(HDMI0_AUDIO_PACKET_CONTROL + offset,
349 HDMI0_AUDIO_SAMPLE_SEND | /* send audio packets */ 349 HDMI0_AUDIO_SAMPLE_SEND | /* send audio packets */
350 HDMI0_AUDIO_DELAY_EN(1) | /* default audio delay */ 350 HDMI0_AUDIO_DELAY_EN(1) | /* default audio delay */
351 HDMI0_AUDIO_SEND_MAX_PACKETS | /* send NULL packets if no audio is available */
352 HDMI0_AUDIO_PACKETS_PER_LINE(3) | /* should be suffient for all audio modes and small enough for all hblanks */ 351 HDMI0_AUDIO_PACKETS_PER_LINE(3) | /* should be suffient for all audio modes and small enough for all hblanks */
353 HDMI0_60958_CS_UPDATE); /* allow 60958 channel status fields to be updated */ 352 HDMI0_60958_CS_UPDATE); /* allow 60958 channel status fields to be updated */
354 } 353 }
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 85dac33e3cce..fefcca55c1eb 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -1374,9 +1374,9 @@ struct cayman_asic {
1374 1374
1375struct si_asic { 1375struct si_asic {
1376 unsigned max_shader_engines; 1376 unsigned max_shader_engines;
1377 unsigned max_pipes_per_simd;
1378 unsigned max_tile_pipes; 1377 unsigned max_tile_pipes;
1379 unsigned max_simds_per_se; 1378 unsigned max_cu_per_sh;
1379 unsigned max_sh_per_se;
1380 unsigned max_backends_per_se; 1380 unsigned max_backends_per_se;
1381 unsigned max_texture_channel_caches; 1381 unsigned max_texture_channel_caches;
1382 unsigned max_gprs; 1382 unsigned max_gprs;
@@ -1387,7 +1387,6 @@ struct si_asic {
1387 unsigned sc_hiz_tile_fifo_size; 1387 unsigned sc_hiz_tile_fifo_size;
1388 unsigned sc_earlyz_tile_fifo_size; 1388 unsigned sc_earlyz_tile_fifo_size;
1389 1389
1390 unsigned num_shader_engines;
1391 unsigned num_tile_pipes; 1390 unsigned num_tile_pipes;
1392 unsigned num_backends_per_se; 1391 unsigned num_backends_per_se;
1393 unsigned backend_disable_mask_per_asic; 1392 unsigned backend_disable_mask_per_asic;
diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c
index 79db56e6c2ac..59d44937dd9f 100644
--- a/drivers/gpu/drm/radeon/radeon_gart.c
+++ b/drivers/gpu/drm/radeon/radeon_gart.c
@@ -476,12 +476,18 @@ int radeon_vm_bo_add(struct radeon_device *rdev,
476 476
477 mutex_lock(&vm->mutex); 477 mutex_lock(&vm->mutex);
478 if (last_pfn > vm->last_pfn) { 478 if (last_pfn > vm->last_pfn) {
479 /* grow va space 32M by 32M */ 479 /* release mutex and lock in right order */
480 unsigned align = ((32 << 20) >> 12) - 1; 480 mutex_unlock(&vm->mutex);
481 radeon_mutex_lock(&rdev->cs_mutex); 481 radeon_mutex_lock(&rdev->cs_mutex);
482 radeon_vm_unbind_locked(rdev, vm); 482 mutex_lock(&vm->mutex);
483 /* and check again */
484 if (last_pfn > vm->last_pfn) {
485 /* grow va space 32M by 32M */
486 unsigned align = ((32 << 20) >> 12) - 1;
487 radeon_vm_unbind_locked(rdev, vm);
488 vm->last_pfn = (last_pfn + align) & ~align;
489 }
483 radeon_mutex_unlock(&rdev->cs_mutex); 490 radeon_mutex_unlock(&rdev->cs_mutex);
484 vm->last_pfn = (last_pfn + align) & ~align;
485 } 491 }
486 head = &vm->va; 492 head = &vm->va;
487 last_offset = 0; 493 last_offset = 0;
@@ -595,8 +601,8 @@ int radeon_vm_bo_rmv(struct radeon_device *rdev,
595 if (bo_va == NULL) 601 if (bo_va == NULL)
596 return 0; 602 return 0;
597 603
598 mutex_lock(&vm->mutex);
599 radeon_mutex_lock(&rdev->cs_mutex); 604 radeon_mutex_lock(&rdev->cs_mutex);
605 mutex_lock(&vm->mutex);
600 radeon_vm_bo_update_pte(rdev, vm, bo, NULL); 606 radeon_vm_bo_update_pte(rdev, vm, bo, NULL);
601 radeon_mutex_unlock(&rdev->cs_mutex); 607 radeon_mutex_unlock(&rdev->cs_mutex);
602 list_del(&bo_va->vm_list); 608 list_del(&bo_va->vm_list);
@@ -641,9 +647,8 @@ void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm)
641 struct radeon_bo_va *bo_va, *tmp; 647 struct radeon_bo_va *bo_va, *tmp;
642 int r; 648 int r;
643 649
644 mutex_lock(&vm->mutex);
645
646 radeon_mutex_lock(&rdev->cs_mutex); 650 radeon_mutex_lock(&rdev->cs_mutex);
651 mutex_lock(&vm->mutex);
647 radeon_vm_unbind_locked(rdev, vm); 652 radeon_vm_unbind_locked(rdev, vm);
648 radeon_mutex_unlock(&rdev->cs_mutex); 653 radeon_mutex_unlock(&rdev->cs_mutex);
649 654
diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c
index f1016a5820d1..5c58d7d90cb2 100644
--- a/drivers/gpu/drm/radeon/radeon_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_kms.c
@@ -273,7 +273,7 @@ int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
273 break; 273 break;
274 case RADEON_INFO_MAX_PIPES: 274 case RADEON_INFO_MAX_PIPES:
275 if (rdev->family >= CHIP_TAHITI) 275 if (rdev->family >= CHIP_TAHITI)
276 value = rdev->config.si.max_pipes_per_simd; 276 value = rdev->config.si.max_cu_per_sh;
277 else if (rdev->family >= CHIP_CAYMAN) 277 else if (rdev->family >= CHIP_CAYMAN)
278 value = rdev->config.cayman.max_pipes_per_simd; 278 value = rdev->config.cayman.max_pipes_per_simd;
279 else if (rdev->family >= CHIP_CEDAR) 279 else if (rdev->family >= CHIP_CEDAR)
diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c
index 25f9eef12c42..e95c5e61d4e2 100644
--- a/drivers/gpu/drm/radeon/rs600.c
+++ b/drivers/gpu/drm/radeon/rs600.c
@@ -908,12 +908,6 @@ static int rs600_startup(struct radeon_device *rdev)
908 return r; 908 return r;
909 } 909 }
910 910
911 r = r600_audio_init(rdev);
912 if (r) {
913 dev_err(rdev->dev, "failed initializing audio\n");
914 return r;
915 }
916
917 r = radeon_ib_pool_start(rdev); 911 r = radeon_ib_pool_start(rdev);
918 if (r) 912 if (r)
919 return r; 913 return r;
@@ -922,6 +916,12 @@ static int rs600_startup(struct radeon_device *rdev)
922 if (r) 916 if (r)
923 return r; 917 return r;
924 918
919 r = r600_audio_init(rdev);
920 if (r) {
921 dev_err(rdev->dev, "failed initializing audio\n");
922 return r;
923 }
924
925 return 0; 925 return 0;
926} 926}
927 927
diff --git a/drivers/gpu/drm/radeon/rs690.c b/drivers/gpu/drm/radeon/rs690.c
index 3277ddecfe9f..159b6a43fda0 100644
--- a/drivers/gpu/drm/radeon/rs690.c
+++ b/drivers/gpu/drm/radeon/rs690.c
@@ -637,12 +637,6 @@ static int rs690_startup(struct radeon_device *rdev)
637 return r; 637 return r;
638 } 638 }
639 639
640 r = r600_audio_init(rdev);
641 if (r) {
642 dev_err(rdev->dev, "failed initializing audio\n");
643 return r;
644 }
645
646 r = radeon_ib_pool_start(rdev); 640 r = radeon_ib_pool_start(rdev);
647 if (r) 641 if (r)
648 return r; 642 return r;
@@ -651,6 +645,12 @@ static int rs690_startup(struct radeon_device *rdev)
651 if (r) 645 if (r)
652 return r; 646 return r;
653 647
648 r = r600_audio_init(rdev);
649 if (r) {
650 dev_err(rdev->dev, "failed initializing audio\n");
651 return r;
652 }
653
654 return 0; 654 return 0;
655} 655}
656 656
diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c
index 04ddc365a908..4ad0281fdc37 100644
--- a/drivers/gpu/drm/radeon/rv770.c
+++ b/drivers/gpu/drm/radeon/rv770.c
@@ -956,6 +956,12 @@ static int rv770_startup(struct radeon_device *rdev)
956 if (r) 956 if (r)
957 return r; 957 return r;
958 958
959 r = r600_audio_init(rdev);
960 if (r) {
961 DRM_ERROR("radeon: audio init failed\n");
962 return r;
963 }
964
959 return 0; 965 return 0;
960} 966}
961 967
@@ -978,12 +984,6 @@ int rv770_resume(struct radeon_device *rdev)
978 return r; 984 return r;
979 } 985 }
980 986
981 r = r600_audio_init(rdev);
982 if (r) {
983 dev_err(rdev->dev, "radeon: audio init failed\n");
984 return r;
985 }
986
987 return r; 987 return r;
988 988
989} 989}
@@ -1092,12 +1092,6 @@ int rv770_init(struct radeon_device *rdev)
1092 rdev->accel_working = false; 1092 rdev->accel_working = false;
1093 } 1093 }
1094 1094
1095 r = r600_audio_init(rdev);
1096 if (r) {
1097 dev_err(rdev->dev, "radeon: audio init failed\n");
1098 return r;
1099 }
1100
1101 return 0; 1095 return 0;
1102} 1096}
1103 1097
diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c
index 549732e56ca9..c7b61f16ecfd 100644
--- a/drivers/gpu/drm/radeon/si.c
+++ b/drivers/gpu/drm/radeon/si.c
@@ -867,200 +867,6 @@ void dce6_bandwidth_update(struct radeon_device *rdev)
867/* 867/*
868 * Core functions 868 * Core functions
869 */ 869 */
870static u32 si_get_tile_pipe_to_backend_map(struct radeon_device *rdev,
871 u32 num_tile_pipes,
872 u32 num_backends_per_asic,
873 u32 *backend_disable_mask_per_asic,
874 u32 num_shader_engines)
875{
876 u32 backend_map = 0;
877 u32 enabled_backends_mask = 0;
878 u32 enabled_backends_count = 0;
879 u32 num_backends_per_se;
880 u32 cur_pipe;
881 u32 swizzle_pipe[SI_MAX_PIPES];
882 u32 cur_backend = 0;
883 u32 i;
884 bool force_no_swizzle;
885
886 /* force legal values */
887 if (num_tile_pipes < 1)
888 num_tile_pipes = 1;
889 if (num_tile_pipes > rdev->config.si.max_tile_pipes)
890 num_tile_pipes = rdev->config.si.max_tile_pipes;
891 if (num_shader_engines < 1)
892 num_shader_engines = 1;
893 if (num_shader_engines > rdev->config.si.max_shader_engines)
894 num_shader_engines = rdev->config.si.max_shader_engines;
895 if (num_backends_per_asic < num_shader_engines)
896 num_backends_per_asic = num_shader_engines;
897 if (num_backends_per_asic > (rdev->config.si.max_backends_per_se * num_shader_engines))
898 num_backends_per_asic = rdev->config.si.max_backends_per_se * num_shader_engines;
899
900 /* make sure we have the same number of backends per se */
901 num_backends_per_asic = ALIGN(num_backends_per_asic, num_shader_engines);
902 /* set up the number of backends per se */
903 num_backends_per_se = num_backends_per_asic / num_shader_engines;
904 if (num_backends_per_se > rdev->config.si.max_backends_per_se) {
905 num_backends_per_se = rdev->config.si.max_backends_per_se;
906 num_backends_per_asic = num_backends_per_se * num_shader_engines;
907 }
908
909 /* create enable mask and count for enabled backends */
910 for (i = 0; i < SI_MAX_BACKENDS; ++i) {
911 if (((*backend_disable_mask_per_asic >> i) & 1) == 0) {
912 enabled_backends_mask |= (1 << i);
913 ++enabled_backends_count;
914 }
915 if (enabled_backends_count == num_backends_per_asic)
916 break;
917 }
918
919 /* force the backends mask to match the current number of backends */
920 if (enabled_backends_count != num_backends_per_asic) {
921 u32 this_backend_enabled;
922 u32 shader_engine;
923 u32 backend_per_se;
924
925 enabled_backends_mask = 0;
926 enabled_backends_count = 0;
927 *backend_disable_mask_per_asic = SI_MAX_BACKENDS_MASK;
928 for (i = 0; i < SI_MAX_BACKENDS; ++i) {
929 /* calc the current se */
930 shader_engine = i / rdev->config.si.max_backends_per_se;
931 /* calc the backend per se */
932 backend_per_se = i % rdev->config.si.max_backends_per_se;
933 /* default to not enabled */
934 this_backend_enabled = 0;
935 if ((shader_engine < num_shader_engines) &&
936 (backend_per_se < num_backends_per_se))
937 this_backend_enabled = 1;
938 if (this_backend_enabled) {
939 enabled_backends_mask |= (1 << i);
940 *backend_disable_mask_per_asic &= ~(1 << i);
941 ++enabled_backends_count;
942 }
943 }
944 }
945
946
947 memset((uint8_t *)&swizzle_pipe[0], 0, sizeof(u32) * SI_MAX_PIPES);
948 switch (rdev->family) {
949 case CHIP_TAHITI:
950 case CHIP_PITCAIRN:
951 case CHIP_VERDE:
952 force_no_swizzle = true;
953 break;
954 default:
955 force_no_swizzle = false;
956 break;
957 }
958 if (force_no_swizzle) {
959 bool last_backend_enabled = false;
960
961 force_no_swizzle = false;
962 for (i = 0; i < SI_MAX_BACKENDS; ++i) {
963 if (((enabled_backends_mask >> i) & 1) == 1) {
964 if (last_backend_enabled)
965 force_no_swizzle = true;
966 last_backend_enabled = true;
967 } else
968 last_backend_enabled = false;
969 }
970 }
971
972 switch (num_tile_pipes) {
973 case 1:
974 case 3:
975 case 5:
976 case 7:
977 DRM_ERROR("odd number of pipes!\n");
978 break;
979 case 2:
980 swizzle_pipe[0] = 0;
981 swizzle_pipe[1] = 1;
982 break;
983 case 4:
984 if (force_no_swizzle) {
985 swizzle_pipe[0] = 0;
986 swizzle_pipe[1] = 1;
987 swizzle_pipe[2] = 2;
988 swizzle_pipe[3] = 3;
989 } else {
990 swizzle_pipe[0] = 0;
991 swizzle_pipe[1] = 2;
992 swizzle_pipe[2] = 1;
993 swizzle_pipe[3] = 3;
994 }
995 break;
996 case 6:
997 if (force_no_swizzle) {
998 swizzle_pipe[0] = 0;
999 swizzle_pipe[1] = 1;
1000 swizzle_pipe[2] = 2;
1001 swizzle_pipe[3] = 3;
1002 swizzle_pipe[4] = 4;
1003 swizzle_pipe[5] = 5;
1004 } else {
1005 swizzle_pipe[0] = 0;
1006 swizzle_pipe[1] = 2;
1007 swizzle_pipe[2] = 4;
1008 swizzle_pipe[3] = 1;
1009 swizzle_pipe[4] = 3;
1010 swizzle_pipe[5] = 5;
1011 }
1012 break;
1013 case 8:
1014 if (force_no_swizzle) {
1015 swizzle_pipe[0] = 0;
1016 swizzle_pipe[1] = 1;
1017 swizzle_pipe[2] = 2;
1018 swizzle_pipe[3] = 3;
1019 swizzle_pipe[4] = 4;
1020 swizzle_pipe[5] = 5;
1021 swizzle_pipe[6] = 6;
1022 swizzle_pipe[7] = 7;
1023 } else {
1024 swizzle_pipe[0] = 0;
1025 swizzle_pipe[1] = 2;
1026 swizzle_pipe[2] = 4;
1027 swizzle_pipe[3] = 6;
1028 swizzle_pipe[4] = 1;
1029 swizzle_pipe[5] = 3;
1030 swizzle_pipe[6] = 5;
1031 swizzle_pipe[7] = 7;
1032 }
1033 break;
1034 }
1035
1036 for (cur_pipe = 0; cur_pipe < num_tile_pipes; ++cur_pipe) {
1037 while (((1 << cur_backend) & enabled_backends_mask) == 0)
1038 cur_backend = (cur_backend + 1) % SI_MAX_BACKENDS;
1039
1040 backend_map |= (((cur_backend & 0xf) << (swizzle_pipe[cur_pipe] * 4)));
1041
1042 cur_backend = (cur_backend + 1) % SI_MAX_BACKENDS;
1043 }
1044
1045 return backend_map;
1046}
1047
1048static u32 si_get_disable_mask_per_asic(struct radeon_device *rdev,
1049 u32 disable_mask_per_se,
1050 u32 max_disable_mask_per_se,
1051 u32 num_shader_engines)
1052{
1053 u32 disable_field_width_per_se = r600_count_pipe_bits(disable_mask_per_se);
1054 u32 disable_mask_per_asic = disable_mask_per_se & max_disable_mask_per_se;
1055
1056 if (num_shader_engines == 1)
1057 return disable_mask_per_asic;
1058 else if (num_shader_engines == 2)
1059 return disable_mask_per_asic | (disable_mask_per_asic << disable_field_width_per_se);
1060 else
1061 return 0xffffffff;
1062}
1063
1064static void si_tiling_mode_table_init(struct radeon_device *rdev) 870static void si_tiling_mode_table_init(struct radeon_device *rdev)
1065{ 871{
1066 const u32 num_tile_mode_states = 32; 872 const u32 num_tile_mode_states = 32;
@@ -1562,18 +1368,151 @@ static void si_tiling_mode_table_init(struct radeon_device *rdev)
1562 DRM_ERROR("unknown asic: 0x%x\n", rdev->family); 1368 DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
1563} 1369}
1564 1370
1371static void si_select_se_sh(struct radeon_device *rdev,
1372 u32 se_num, u32 sh_num)
1373{
1374 u32 data = INSTANCE_BROADCAST_WRITES;
1375
1376 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
1377 data = SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
1378 else if (se_num == 0xffffffff)
1379 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
1380 else if (sh_num == 0xffffffff)
1381 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
1382 else
1383 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
1384 WREG32(GRBM_GFX_INDEX, data);
1385}
1386
1387static u32 si_create_bitmask(u32 bit_width)
1388{
1389 u32 i, mask = 0;
1390
1391 for (i = 0; i < bit_width; i++) {
1392 mask <<= 1;
1393 mask |= 1;
1394 }
1395 return mask;
1396}
1397
1398static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
1399{
1400 u32 data, mask;
1401
1402 data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
1403 if (data & 1)
1404 data &= INACTIVE_CUS_MASK;
1405 else
1406 data = 0;
1407 data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
1408
1409 data >>= INACTIVE_CUS_SHIFT;
1410
1411 mask = si_create_bitmask(cu_per_sh);
1412
1413 return ~data & mask;
1414}
1415
1416static void si_setup_spi(struct radeon_device *rdev,
1417 u32 se_num, u32 sh_per_se,
1418 u32 cu_per_sh)
1419{
1420 int i, j, k;
1421 u32 data, mask, active_cu;
1422
1423 for (i = 0; i < se_num; i++) {
1424 for (j = 0; j < sh_per_se; j++) {
1425 si_select_se_sh(rdev, i, j);
1426 data = RREG32(SPI_STATIC_THREAD_MGMT_3);
1427 active_cu = si_get_cu_enabled(rdev, cu_per_sh);
1428
1429 mask = 1;
1430 for (k = 0; k < 16; k++) {
1431 mask <<= k;
1432 if (active_cu & mask) {
1433 data &= ~mask;
1434 WREG32(SPI_STATIC_THREAD_MGMT_3, data);
1435 break;
1436 }
1437 }
1438 }
1439 }
1440 si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1441}
1442
1443static u32 si_get_rb_disabled(struct radeon_device *rdev,
1444 u32 max_rb_num, u32 se_num,
1445 u32 sh_per_se)
1446{
1447 u32 data, mask;
1448
1449 data = RREG32(CC_RB_BACKEND_DISABLE);
1450 if (data & 1)
1451 data &= BACKEND_DISABLE_MASK;
1452 else
1453 data = 0;
1454 data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
1455
1456 data >>= BACKEND_DISABLE_SHIFT;
1457
1458 mask = si_create_bitmask(max_rb_num / se_num / sh_per_se);
1459
1460 return data & mask;
1461}
1462
1463static void si_setup_rb(struct radeon_device *rdev,
1464 u32 se_num, u32 sh_per_se,
1465 u32 max_rb_num)
1466{
1467 int i, j;
1468 u32 data, mask;
1469 u32 disabled_rbs = 0;
1470 u32 enabled_rbs = 0;
1471
1472 for (i = 0; i < se_num; i++) {
1473 for (j = 0; j < sh_per_se; j++) {
1474 si_select_se_sh(rdev, i, j);
1475 data = si_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
1476 disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
1477 }
1478 }
1479 si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1480
1481 mask = 1;
1482 for (i = 0; i < max_rb_num; i++) {
1483 if (!(disabled_rbs & mask))
1484 enabled_rbs |= mask;
1485 mask <<= 1;
1486 }
1487
1488 for (i = 0; i < se_num; i++) {
1489 si_select_se_sh(rdev, i, 0xffffffff);
1490 data = 0;
1491 for (j = 0; j < sh_per_se; j++) {
1492 switch (enabled_rbs & 3) {
1493 case 1:
1494 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
1495 break;
1496 case 2:
1497 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
1498 break;
1499 case 3:
1500 default:
1501 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
1502 break;
1503 }
1504 enabled_rbs >>= 2;
1505 }
1506 WREG32(PA_SC_RASTER_CONFIG, data);
1507 }
1508 si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1509}
1510
1565static void si_gpu_init(struct radeon_device *rdev) 1511static void si_gpu_init(struct radeon_device *rdev)
1566{ 1512{
1567 u32 cc_rb_backend_disable = 0;
1568 u32 cc_gc_shader_array_config;
1569 u32 gb_addr_config = 0; 1513 u32 gb_addr_config = 0;
1570 u32 mc_shared_chmap, mc_arb_ramcfg; 1514 u32 mc_shared_chmap, mc_arb_ramcfg;
1571 u32 gb_backend_map;
1572 u32 cgts_tcc_disable;
1573 u32 sx_debug_1; 1515 u32 sx_debug_1;
1574 u32 gc_user_shader_array_config;
1575 u32 gc_user_rb_backend_disable;
1576 u32 cgts_user_tcc_disable;
1577 u32 hdp_host_path_cntl; 1516 u32 hdp_host_path_cntl;
1578 u32 tmp; 1517 u32 tmp;
1579 int i, j; 1518 int i, j;
@@ -1581,9 +1520,9 @@ static void si_gpu_init(struct radeon_device *rdev)
1581 switch (rdev->family) { 1520 switch (rdev->family) {
1582 case CHIP_TAHITI: 1521 case CHIP_TAHITI:
1583 rdev->config.si.max_shader_engines = 2; 1522 rdev->config.si.max_shader_engines = 2;
1584 rdev->config.si.max_pipes_per_simd = 4;
1585 rdev->config.si.max_tile_pipes = 12; 1523 rdev->config.si.max_tile_pipes = 12;
1586 rdev->config.si.max_simds_per_se = 8; 1524 rdev->config.si.max_cu_per_sh = 8;
1525 rdev->config.si.max_sh_per_se = 2;
1587 rdev->config.si.max_backends_per_se = 4; 1526 rdev->config.si.max_backends_per_se = 4;
1588 rdev->config.si.max_texture_channel_caches = 12; 1527 rdev->config.si.max_texture_channel_caches = 12;
1589 rdev->config.si.max_gprs = 256; 1528 rdev->config.si.max_gprs = 256;
@@ -1594,12 +1533,13 @@ static void si_gpu_init(struct radeon_device *rdev)
1594 rdev->config.si.sc_prim_fifo_size_backend = 0x100; 1533 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
1595 rdev->config.si.sc_hiz_tile_fifo_size = 0x30; 1534 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
1596 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130; 1535 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
1536 gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
1597 break; 1537 break;
1598 case CHIP_PITCAIRN: 1538 case CHIP_PITCAIRN:
1599 rdev->config.si.max_shader_engines = 2; 1539 rdev->config.si.max_shader_engines = 2;
1600 rdev->config.si.max_pipes_per_simd = 4;
1601 rdev->config.si.max_tile_pipes = 8; 1540 rdev->config.si.max_tile_pipes = 8;
1602 rdev->config.si.max_simds_per_se = 5; 1541 rdev->config.si.max_cu_per_sh = 5;
1542 rdev->config.si.max_sh_per_se = 2;
1603 rdev->config.si.max_backends_per_se = 4; 1543 rdev->config.si.max_backends_per_se = 4;
1604 rdev->config.si.max_texture_channel_caches = 8; 1544 rdev->config.si.max_texture_channel_caches = 8;
1605 rdev->config.si.max_gprs = 256; 1545 rdev->config.si.max_gprs = 256;
@@ -1610,13 +1550,14 @@ static void si_gpu_init(struct radeon_device *rdev)
1610 rdev->config.si.sc_prim_fifo_size_backend = 0x100; 1550 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
1611 rdev->config.si.sc_hiz_tile_fifo_size = 0x30; 1551 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
1612 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130; 1552 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
1553 gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
1613 break; 1554 break;
1614 case CHIP_VERDE: 1555 case CHIP_VERDE:
1615 default: 1556 default:
1616 rdev->config.si.max_shader_engines = 1; 1557 rdev->config.si.max_shader_engines = 1;
1617 rdev->config.si.max_pipes_per_simd = 4;
1618 rdev->config.si.max_tile_pipes = 4; 1558 rdev->config.si.max_tile_pipes = 4;
1619 rdev->config.si.max_simds_per_se = 2; 1559 rdev->config.si.max_cu_per_sh = 2;
1560 rdev->config.si.max_sh_per_se = 2;
1620 rdev->config.si.max_backends_per_se = 4; 1561 rdev->config.si.max_backends_per_se = 4;
1621 rdev->config.si.max_texture_channel_caches = 4; 1562 rdev->config.si.max_texture_channel_caches = 4;
1622 rdev->config.si.max_gprs = 256; 1563 rdev->config.si.max_gprs = 256;
@@ -1627,6 +1568,7 @@ static void si_gpu_init(struct radeon_device *rdev)
1627 rdev->config.si.sc_prim_fifo_size_backend = 0x40; 1568 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
1628 rdev->config.si.sc_hiz_tile_fifo_size = 0x30; 1569 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
1629 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130; 1570 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
1571 gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
1630 break; 1572 break;
1631 } 1573 }
1632 1574
@@ -1648,31 +1590,7 @@ static void si_gpu_init(struct radeon_device *rdev)
1648 mc_shared_chmap = RREG32(MC_SHARED_CHMAP); 1590 mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
1649 mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG); 1591 mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
1650 1592
1651 cc_rb_backend_disable = RREG32(CC_RB_BACKEND_DISABLE);
1652 cc_gc_shader_array_config = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
1653 cgts_tcc_disable = 0xffff0000;
1654 for (i = 0; i < rdev->config.si.max_texture_channel_caches; i++)
1655 cgts_tcc_disable &= ~(1 << (16 + i));
1656 gc_user_rb_backend_disable = RREG32(GC_USER_RB_BACKEND_DISABLE);
1657 gc_user_shader_array_config = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
1658 cgts_user_tcc_disable = RREG32(CGTS_USER_TCC_DISABLE);
1659
1660 rdev->config.si.num_shader_engines = rdev->config.si.max_shader_engines;
1661 rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes; 1593 rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
1662 tmp = ((~gc_user_rb_backend_disable) & BACKEND_DISABLE_MASK) >> BACKEND_DISABLE_SHIFT;
1663 rdev->config.si.num_backends_per_se = r600_count_pipe_bits(tmp);
1664 tmp = (gc_user_rb_backend_disable & BACKEND_DISABLE_MASK) >> BACKEND_DISABLE_SHIFT;
1665 rdev->config.si.backend_disable_mask_per_asic =
1666 si_get_disable_mask_per_asic(rdev, tmp, SI_MAX_BACKENDS_PER_SE_MASK,
1667 rdev->config.si.num_shader_engines);
1668 rdev->config.si.backend_map =
1669 si_get_tile_pipe_to_backend_map(rdev, rdev->config.si.num_tile_pipes,
1670 rdev->config.si.num_backends_per_se *
1671 rdev->config.si.num_shader_engines,
1672 &rdev->config.si.backend_disable_mask_per_asic,
1673 rdev->config.si.num_shader_engines);
1674 tmp = ((~cgts_user_tcc_disable) & TCC_DISABLE_MASK) >> TCC_DISABLE_SHIFT;
1675 rdev->config.si.num_texture_channel_caches = r600_count_pipe_bits(tmp);
1676 rdev->config.si.mem_max_burst_length_bytes = 256; 1594 rdev->config.si.mem_max_burst_length_bytes = 256;
1677 tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT; 1595 tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
1678 rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024; 1596 rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
@@ -1683,55 +1601,8 @@ static void si_gpu_init(struct radeon_device *rdev)
1683 rdev->config.si.num_gpus = 1; 1601 rdev->config.si.num_gpus = 1;
1684 rdev->config.si.multi_gpu_tile_size = 64; 1602 rdev->config.si.multi_gpu_tile_size = 64;
1685 1603
1686 gb_addr_config = 0; 1604 /* fix up row size */
1687 switch (rdev->config.si.num_tile_pipes) { 1605 gb_addr_config &= ~ROW_SIZE_MASK;
1688 case 1:
1689 gb_addr_config |= NUM_PIPES(0);
1690 break;
1691 case 2:
1692 gb_addr_config |= NUM_PIPES(1);
1693 break;
1694 case 4:
1695 gb_addr_config |= NUM_PIPES(2);
1696 break;
1697 case 8:
1698 default:
1699 gb_addr_config |= NUM_PIPES(3);
1700 break;
1701 }
1702
1703 tmp = (rdev->config.si.mem_max_burst_length_bytes / 256) - 1;
1704 gb_addr_config |= PIPE_INTERLEAVE_SIZE(tmp);
1705 gb_addr_config |= NUM_SHADER_ENGINES(rdev->config.si.num_shader_engines - 1);
1706 tmp = (rdev->config.si.shader_engine_tile_size / 16) - 1;
1707 gb_addr_config |= SHADER_ENGINE_TILE_SIZE(tmp);
1708 switch (rdev->config.si.num_gpus) {
1709 case 1:
1710 default:
1711 gb_addr_config |= NUM_GPUS(0);
1712 break;
1713 case 2:
1714 gb_addr_config |= NUM_GPUS(1);
1715 break;
1716 case 4:
1717 gb_addr_config |= NUM_GPUS(2);
1718 break;
1719 }
1720 switch (rdev->config.si.multi_gpu_tile_size) {
1721 case 16:
1722 gb_addr_config |= MULTI_GPU_TILE_SIZE(0);
1723 break;
1724 case 32:
1725 default:
1726 gb_addr_config |= MULTI_GPU_TILE_SIZE(1);
1727 break;
1728 case 64:
1729 gb_addr_config |= MULTI_GPU_TILE_SIZE(2);
1730 break;
1731 case 128:
1732 gb_addr_config |= MULTI_GPU_TILE_SIZE(3);
1733 break;
1734 }
1735 switch (rdev->config.si.mem_row_size_in_kb) { 1606 switch (rdev->config.si.mem_row_size_in_kb) {
1736 case 1: 1607 case 1:
1737 default: 1608 default:
@@ -1745,26 +1616,6 @@ static void si_gpu_init(struct radeon_device *rdev)
1745 break; 1616 break;
1746 } 1617 }
1747 1618
1748 tmp = (gb_addr_config & NUM_PIPES_MASK) >> NUM_PIPES_SHIFT;
1749 rdev->config.si.num_tile_pipes = (1 << tmp);
1750 tmp = (gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT;
1751 rdev->config.si.mem_max_burst_length_bytes = (tmp + 1) * 256;
1752 tmp = (gb_addr_config & NUM_SHADER_ENGINES_MASK) >> NUM_SHADER_ENGINES_SHIFT;
1753 rdev->config.si.num_shader_engines = tmp + 1;
1754 tmp = (gb_addr_config & NUM_GPUS_MASK) >> NUM_GPUS_SHIFT;
1755 rdev->config.si.num_gpus = tmp + 1;
1756 tmp = (gb_addr_config & MULTI_GPU_TILE_SIZE_MASK) >> MULTI_GPU_TILE_SIZE_SHIFT;
1757 rdev->config.si.multi_gpu_tile_size = 1 << tmp;
1758 tmp = (gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT;
1759 rdev->config.si.mem_row_size_in_kb = 1 << tmp;
1760
1761 gb_backend_map =
1762 si_get_tile_pipe_to_backend_map(rdev, rdev->config.si.num_tile_pipes,
1763 rdev->config.si.num_backends_per_se *
1764 rdev->config.si.num_shader_engines,
1765 &rdev->config.si.backend_disable_mask_per_asic,
1766 rdev->config.si.num_shader_engines);
1767
1768 /* setup tiling info dword. gb_addr_config is not adequate since it does 1619 /* setup tiling info dword. gb_addr_config is not adequate since it does
1769 * not have bank info, so create a custom tiling dword. 1620 * not have bank info, so create a custom tiling dword.
1770 * bits 3:0 num_pipes 1621 * bits 3:0 num_pipes
@@ -1789,33 +1640,29 @@ static void si_gpu_init(struct radeon_device *rdev)
1789 rdev->config.si.tile_config |= (3 << 0); 1640 rdev->config.si.tile_config |= (3 << 0);
1790 break; 1641 break;
1791 } 1642 }
1792 rdev->config.si.tile_config |= 1643 if ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT)
1793 ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4; 1644 rdev->config.si.tile_config |= 1 << 4;
1645 else
1646 rdev->config.si.tile_config |= 0 << 4;
1794 rdev->config.si.tile_config |= 1647 rdev->config.si.tile_config |=
1795 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8; 1648 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
1796 rdev->config.si.tile_config |= 1649 rdev->config.si.tile_config |=
1797 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12; 1650 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
1798 1651
1799 rdev->config.si.backend_map = gb_backend_map;
1800 WREG32(GB_ADDR_CONFIG, gb_addr_config); 1652 WREG32(GB_ADDR_CONFIG, gb_addr_config);
1801 WREG32(DMIF_ADDR_CONFIG, gb_addr_config); 1653 WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
1802 WREG32(HDP_ADDR_CONFIG, gb_addr_config); 1654 WREG32(HDP_ADDR_CONFIG, gb_addr_config);
1803 1655
1804 /* primary versions */ 1656 si_tiling_mode_table_init(rdev);
1805 WREG32(CC_RB_BACKEND_DISABLE, cc_rb_backend_disable);
1806 WREG32(CC_SYS_RB_BACKEND_DISABLE, cc_rb_backend_disable);
1807 WREG32(CC_GC_SHADER_ARRAY_CONFIG, cc_gc_shader_array_config);
1808
1809 WREG32(CGTS_TCC_DISABLE, cgts_tcc_disable);
1810 1657
1811 /* user versions */ 1658 si_setup_rb(rdev, rdev->config.si.max_shader_engines,
1812 WREG32(GC_USER_RB_BACKEND_DISABLE, cc_rb_backend_disable); 1659 rdev->config.si.max_sh_per_se,
1813 WREG32(GC_USER_SYS_RB_BACKEND_DISABLE, cc_rb_backend_disable); 1660 rdev->config.si.max_backends_per_se);
1814 WREG32(GC_USER_SHADER_ARRAY_CONFIG, cc_gc_shader_array_config);
1815 1661
1816 WREG32(CGTS_USER_TCC_DISABLE, cgts_tcc_disable); 1662 si_setup_spi(rdev, rdev->config.si.max_shader_engines,
1663 rdev->config.si.max_sh_per_se,
1664 rdev->config.si.max_cu_per_sh);
1817 1665
1818 si_tiling_mode_table_init(rdev);
1819 1666
1820 /* set HW defaults for 3D engine */ 1667 /* set HW defaults for 3D engine */
1821 WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) | 1668 WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
diff --git a/drivers/gpu/drm/radeon/sid.h b/drivers/gpu/drm/radeon/sid.h
index 53ea2c42dbd6..db4067962868 100644
--- a/drivers/gpu/drm/radeon/sid.h
+++ b/drivers/gpu/drm/radeon/sid.h
@@ -24,6 +24,11 @@
24#ifndef SI_H 24#ifndef SI_H
25#define SI_H 25#define SI_H
26 26
27#define TAHITI_RB_BITMAP_WIDTH_PER_SH 2
28
29#define TAHITI_GB_ADDR_CONFIG_GOLDEN 0x12011003
30#define VERDE_GB_ADDR_CONFIG_GOLDEN 0x12010002
31
27#define CG_MULT_THERMAL_STATUS 0x714 32#define CG_MULT_THERMAL_STATUS 0x714
28#define ASIC_MAX_TEMP(x) ((x) << 0) 33#define ASIC_MAX_TEMP(x) ((x) << 0)
29#define ASIC_MAX_TEMP_MASK 0x000001ff 34#define ASIC_MAX_TEMP_MASK 0x000001ff
@@ -408,6 +413,12 @@
408#define SOFT_RESET_IA (1 << 15) 413#define SOFT_RESET_IA (1 << 15)
409 414
410#define GRBM_GFX_INDEX 0x802C 415#define GRBM_GFX_INDEX 0x802C
416#define INSTANCE_INDEX(x) ((x) << 0)
417#define SH_INDEX(x) ((x) << 8)
418#define SE_INDEX(x) ((x) << 16)
419#define SH_BROADCAST_WRITES (1 << 29)
420#define INSTANCE_BROADCAST_WRITES (1 << 30)
421#define SE_BROADCAST_WRITES (1 << 31)
411 422
412#define GRBM_INT_CNTL 0x8060 423#define GRBM_INT_CNTL 0x8060
413# define RDERR_INT_ENABLE (1 << 0) 424# define RDERR_INT_ENABLE (1 << 0)
@@ -480,6 +491,8 @@
480#define VGT_TF_MEMORY_BASE 0x89B8 491#define VGT_TF_MEMORY_BASE 0x89B8
481 492
482#define CC_GC_SHADER_ARRAY_CONFIG 0x89bc 493#define CC_GC_SHADER_ARRAY_CONFIG 0x89bc
494#define INACTIVE_CUS_MASK 0xFFFF0000
495#define INACTIVE_CUS_SHIFT 16
483#define GC_USER_SHADER_ARRAY_CONFIG 0x89c0 496#define GC_USER_SHADER_ARRAY_CONFIG 0x89c0
484 497
485#define PA_CL_ENHANCE 0x8A14 498#define PA_CL_ENHANCE 0x8A14
@@ -688,6 +701,12 @@
688#define RLC_MC_CNTL 0xC344 701#define RLC_MC_CNTL 0xC344
689#define RLC_UCODE_CNTL 0xC348 702#define RLC_UCODE_CNTL 0xC348
690 703
704#define PA_SC_RASTER_CONFIG 0x28350
705# define RASTER_CONFIG_RB_MAP_0 0
706# define RASTER_CONFIG_RB_MAP_1 1
707# define RASTER_CONFIG_RB_MAP_2 2
708# define RASTER_CONFIG_RB_MAP_3 3
709
691#define VGT_EVENT_INITIATOR 0x28a90 710#define VGT_EVENT_INITIATOR 0x28a90
692# define SAMPLE_STREAMOUTSTATS1 (1 << 0) 711# define SAMPLE_STREAMOUTSTATS1 (1 << 0)
693# define SAMPLE_STREAMOUTSTATS2 (2 << 0) 712# define SAMPLE_STREAMOUTSTATS2 (2 << 0)
diff --git a/drivers/i2c/muxes/Kconfig b/drivers/i2c/muxes/Kconfig
index beb2491db274..a0edd9854218 100644
--- a/drivers/i2c/muxes/Kconfig
+++ b/drivers/i2c/muxes/Kconfig
@@ -37,4 +37,16 @@ config I2C_MUX_PCA954x
37 This driver can also be built as a module. If so, the module 37 This driver can also be built as a module. If so, the module
38 will be called i2c-mux-pca954x. 38 will be called i2c-mux-pca954x.
39 39
40config I2C_MUX_PINCTRL
41 tristate "pinctrl-based I2C multiplexer"
42 depends on PINCTRL
43 help
44 If you say yes to this option, support will be included for an I2C
45 multiplexer that uses the pinctrl subsystem, i.e. pin multiplexing.
46 This is useful for SoCs whose I2C module's signals can be routed to
47 different sets of pins at run-time.
48
49 This driver can also be built as a module. If so, the module will be
50 called pinctrl-i2cmux.
51
40endmenu 52endmenu
diff --git a/drivers/i2c/muxes/Makefile b/drivers/i2c/muxes/Makefile
index 5826249b29ca..76da8692afff 100644
--- a/drivers/i2c/muxes/Makefile
+++ b/drivers/i2c/muxes/Makefile
@@ -4,5 +4,6 @@
4obj-$(CONFIG_I2C_MUX_GPIO) += i2c-mux-gpio.o 4obj-$(CONFIG_I2C_MUX_GPIO) += i2c-mux-gpio.o
5obj-$(CONFIG_I2C_MUX_PCA9541) += i2c-mux-pca9541.o 5obj-$(CONFIG_I2C_MUX_PCA9541) += i2c-mux-pca9541.o
6obj-$(CONFIG_I2C_MUX_PCA954x) += i2c-mux-pca954x.o 6obj-$(CONFIG_I2C_MUX_PCA954x) += i2c-mux-pca954x.o
7obj-$(CONFIG_I2C_MUX_PINCTRL) += i2c-mux-pinctrl.o
7 8
8ccflags-$(CONFIG_I2C_DEBUG_BUS) := -DDEBUG 9ccflags-$(CONFIG_I2C_DEBUG_BUS) := -DDEBUG
diff --git a/drivers/i2c/muxes/i2c-mux-pinctrl.c b/drivers/i2c/muxes/i2c-mux-pinctrl.c
new file mode 100644
index 000000000000..46a669763476
--- /dev/null
+++ b/drivers/i2c/muxes/i2c-mux-pinctrl.c
@@ -0,0 +1,279 @@
1/*
2 * I2C multiplexer using pinctrl API
3 *
4 * Copyright (c) 2012, NVIDIA CORPORATION. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#include <linux/i2c.h>
20#include <linux/i2c-mux.h>
21#include <linux/init.h>
22#include <linux/module.h>
23#include <linux/of_i2c.h>
24#include <linux/pinctrl/consumer.h>
25#include <linux/i2c-mux-pinctrl.h>
26#include <linux/platform_device.h>
27#include <linux/slab.h>
28
29struct i2c_mux_pinctrl {
30 struct device *dev;
31 struct i2c_mux_pinctrl_platform_data *pdata;
32 struct pinctrl *pinctrl;
33 struct pinctrl_state **states;
34 struct pinctrl_state *state_idle;
35 struct i2c_adapter *parent;
36 struct i2c_adapter **busses;
37};
38
39static int i2c_mux_pinctrl_select(struct i2c_adapter *adap, void *data,
40 u32 chan)
41{
42 struct i2c_mux_pinctrl *mux = data;
43
44 return pinctrl_select_state(mux->pinctrl, mux->states[chan]);
45}
46
47static int i2c_mux_pinctrl_deselect(struct i2c_adapter *adap, void *data,
48 u32 chan)
49{
50 struct i2c_mux_pinctrl *mux = data;
51
52 return pinctrl_select_state(mux->pinctrl, mux->state_idle);
53}
54
55#ifdef CONFIG_OF
56static int i2c_mux_pinctrl_parse_dt(struct i2c_mux_pinctrl *mux,
57 struct platform_device *pdev)
58{
59 struct device_node *np = pdev->dev.of_node;
60 int num_names, i, ret;
61 struct device_node *adapter_np;
62 struct i2c_adapter *adapter;
63
64 if (!np)
65 return 0;
66
67 mux->pdata = devm_kzalloc(&pdev->dev, sizeof(*mux->pdata), GFP_KERNEL);
68 if (!mux->pdata) {
69 dev_err(mux->dev,
70 "Cannot allocate i2c_mux_pinctrl_platform_data\n");
71 return -ENOMEM;
72 }
73
74 num_names = of_property_count_strings(np, "pinctrl-names");
75 if (num_names < 0) {
76 dev_err(mux->dev, "Cannot parse pinctrl-names: %d\n",
77 num_names);
78 return num_names;
79 }
80
81 mux->pdata->pinctrl_states = devm_kzalloc(&pdev->dev,
82 sizeof(*mux->pdata->pinctrl_states) * num_names,
83 GFP_KERNEL);
84 if (!mux->pdata->pinctrl_states) {
85 dev_err(mux->dev, "Cannot allocate pinctrl_states\n");
86 return -ENOMEM;
87 }
88
89 for (i = 0; i < num_names; i++) {
90 ret = of_property_read_string_index(np, "pinctrl-names", i,
91 &mux->pdata->pinctrl_states[mux->pdata->bus_count]);
92 if (ret < 0) {
93 dev_err(mux->dev, "Cannot parse pinctrl-names: %d\n",
94 ret);
95 return ret;
96 }
97 if (!strcmp(mux->pdata->pinctrl_states[mux->pdata->bus_count],
98 "idle")) {
99 if (i != num_names - 1) {
100 dev_err(mux->dev, "idle state must be last\n");
101 return -EINVAL;
102 }
103 mux->pdata->pinctrl_state_idle = "idle";
104 } else {
105 mux->pdata->bus_count++;
106 }
107 }
108
109 adapter_np = of_parse_phandle(np, "i2c-parent", 0);
110 if (!adapter_np) {
111 dev_err(mux->dev, "Cannot parse i2c-parent\n");
112 return -ENODEV;
113 }
114 adapter = of_find_i2c_adapter_by_node(adapter_np);
115 if (!adapter) {
116 dev_err(mux->dev, "Cannot find parent bus\n");
117 return -ENODEV;
118 }
119 mux->pdata->parent_bus_num = i2c_adapter_id(adapter);
120 put_device(&adapter->dev);
121
122 return 0;
123}
124#else
125static inline int i2c_mux_pinctrl_parse_dt(struct i2c_mux_pinctrl *mux,
126 struct platform_device *pdev)
127{
128 return 0;
129}
130#endif
131
132static int __devinit i2c_mux_pinctrl_probe(struct platform_device *pdev)
133{
134 struct i2c_mux_pinctrl *mux;
135 int (*deselect)(struct i2c_adapter *, void *, u32);
136 int i, ret;
137
138 mux = devm_kzalloc(&pdev->dev, sizeof(*mux), GFP_KERNEL);
139 if (!mux) {
140 dev_err(&pdev->dev, "Cannot allocate i2c_mux_pinctrl\n");
141 ret = -ENOMEM;
142 goto err;
143 }
144 platform_set_drvdata(pdev, mux);
145
146 mux->dev = &pdev->dev;
147
148 mux->pdata = pdev->dev.platform_data;
149 if (!mux->pdata) {
150 ret = i2c_mux_pinctrl_parse_dt(mux, pdev);
151 if (ret < 0)
152 goto err;
153 }
154 if (!mux->pdata) {
155 dev_err(&pdev->dev, "Missing platform data\n");
156 ret = -ENODEV;
157 goto err;
158 }
159
160 mux->states = devm_kzalloc(&pdev->dev,
161 sizeof(*mux->states) * mux->pdata->bus_count,
162 GFP_KERNEL);
163 if (!mux->states) {
164 dev_err(&pdev->dev, "Cannot allocate states\n");
165 ret = -ENOMEM;
166 goto err;
167 }
168
169 mux->busses = devm_kzalloc(&pdev->dev,
170 sizeof(mux->busses) * mux->pdata->bus_count,
171 GFP_KERNEL);
172 if (!mux->states) {
173 dev_err(&pdev->dev, "Cannot allocate busses\n");
174 ret = -ENOMEM;
175 goto err;
176 }
177
178 mux->pinctrl = devm_pinctrl_get(&pdev->dev);
179 if (IS_ERR(mux->pinctrl)) {
180 ret = PTR_ERR(mux->pinctrl);
181 dev_err(&pdev->dev, "Cannot get pinctrl: %d\n", ret);
182 goto err;
183 }
184 for (i = 0; i < mux->pdata->bus_count; i++) {
185 mux->states[i] = pinctrl_lookup_state(mux->pinctrl,
186 mux->pdata->pinctrl_states[i]);
187 if (IS_ERR(mux->states[i])) {
188 ret = PTR_ERR(mux->states[i]);
189 dev_err(&pdev->dev,
190 "Cannot look up pinctrl state %s: %d\n",
191 mux->pdata->pinctrl_states[i], ret);
192 goto err;
193 }
194 }
195 if (mux->pdata->pinctrl_state_idle) {
196 mux->state_idle = pinctrl_lookup_state(mux->pinctrl,
197 mux->pdata->pinctrl_state_idle);
198 if (IS_ERR(mux->state_idle)) {
199 ret = PTR_ERR(mux->state_idle);
200 dev_err(&pdev->dev,
201 "Cannot look up pinctrl state %s: %d\n",
202 mux->pdata->pinctrl_state_idle, ret);
203 goto err;
204 }
205
206 deselect = i2c_mux_pinctrl_deselect;
207 } else {
208 deselect = NULL;
209 }
210
211 mux->parent = i2c_get_adapter(mux->pdata->parent_bus_num);
212 if (!mux->parent) {
213 dev_err(&pdev->dev, "Parent adapter (%d) not found\n",
214 mux->pdata->parent_bus_num);
215 ret = -ENODEV;
216 goto err;
217 }
218
219 for (i = 0; i < mux->pdata->bus_count; i++) {
220 u32 bus = mux->pdata->base_bus_num ?
221 (mux->pdata->base_bus_num + i) : 0;
222
223 mux->busses[i] = i2c_add_mux_adapter(mux->parent, &pdev->dev,
224 mux, bus, i,
225 i2c_mux_pinctrl_select,
226 deselect);
227 if (!mux->busses[i]) {
228 ret = -ENODEV;
229 dev_err(&pdev->dev, "Failed to add adapter %d\n", i);
230 goto err_del_adapter;
231 }
232 }
233
234 return 0;
235
236err_del_adapter:
237 for (; i > 0; i--)
238 i2c_del_mux_adapter(mux->busses[i - 1]);
239 i2c_put_adapter(mux->parent);
240err:
241 return ret;
242}
243
244static int __devexit i2c_mux_pinctrl_remove(struct platform_device *pdev)
245{
246 struct i2c_mux_pinctrl *mux = platform_get_drvdata(pdev);
247 int i;
248
249 for (i = 0; i < mux->pdata->bus_count; i++)
250 i2c_del_mux_adapter(mux->busses[i]);
251
252 i2c_put_adapter(mux->parent);
253
254 return 0;
255}
256
257#ifdef CONFIG_OF
258static const struct of_device_id i2c_mux_pinctrl_of_match[] __devinitconst = {
259 { .compatible = "i2c-mux-pinctrl", },
260 {},
261};
262MODULE_DEVICE_TABLE(of, i2c_mux_pinctrl_of_match);
263#endif
264
265static struct platform_driver i2c_mux_pinctrl_driver = {
266 .driver = {
267 .name = "i2c-mux-pinctrl",
268 .owner = THIS_MODULE,
269 .of_match_table = of_match_ptr(i2c_mux_pinctrl_of_match),
270 },
271 .probe = i2c_mux_pinctrl_probe,
272 .remove = __devexit_p(i2c_mux_pinctrl_remove),
273};
274module_platform_driver(i2c_mux_pinctrl_driver);
275
276MODULE_DESCRIPTION("pinctrl-based I2C multiplexer driver");
277MODULE_AUTHOR("Stephen Warren <swarren@nvidia.com>");
278MODULE_LICENSE("GPL v2");
279MODULE_ALIAS("platform:i2c-mux-pinctrl");
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 55ab284e22f2..b18870c455ad 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -1593,6 +1593,10 @@ static int import_ep(struct c4iw_ep *ep, __be32 peer_ip, struct dst_entry *dst,
1593 struct net_device *pdev; 1593 struct net_device *pdev;
1594 1594
1595 pdev = ip_dev_find(&init_net, peer_ip); 1595 pdev = ip_dev_find(&init_net, peer_ip);
1596 if (!pdev) {
1597 err = -ENODEV;
1598 goto out;
1599 }
1596 ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t, 1600 ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t,
1597 n, pdev, 0); 1601 n, pdev, 0);
1598 if (!ep->l2t) 1602 if (!ep->l2t)
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index ee1c577238f7..3530c41fcd1f 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -140,7 +140,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
140 props->max_mr_size = ~0ull; 140 props->max_mr_size = ~0ull;
141 props->page_size_cap = dev->dev->caps.page_size_cap; 141 props->page_size_cap = dev->dev->caps.page_size_cap;
142 props->max_qp = dev->dev->caps.num_qps - dev->dev->caps.reserved_qps; 142 props->max_qp = dev->dev->caps.num_qps - dev->dev->caps.reserved_qps;
143 props->max_qp_wr = dev->dev->caps.max_wqes; 143 props->max_qp_wr = dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE;
144 props->max_sge = min(dev->dev->caps.max_sq_sg, 144 props->max_sge = min(dev->dev->caps.max_sq_sg,
145 dev->dev->caps.max_rq_sg); 145 dev->dev->caps.max_rq_sg);
146 props->max_cq = dev->dev->caps.num_cqs - dev->dev->caps.reserved_cqs; 146 props->max_cq = dev->dev->caps.num_cqs - dev->dev->caps.reserved_cqs;
@@ -1084,12 +1084,9 @@ static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev)
1084 int total_eqs = 0; 1084 int total_eqs = 0;
1085 int i, j, eq; 1085 int i, j, eq;
1086 1086
1087 /* Init eq table */ 1087 /* Legacy mode or comp_pool is not large enough */
1088 ibdev->eq_table = NULL; 1088 if (dev->caps.comp_pool == 0 ||
1089 ibdev->eq_added = 0; 1089 dev->caps.num_ports > dev->caps.comp_pool)
1090
1091 /* Legacy mode? */
1092 if (dev->caps.comp_pool == 0)
1093 return; 1090 return;
1094 1091
1095 eq_per_port = rounddown_pow_of_two(dev->caps.comp_pool/ 1092 eq_per_port = rounddown_pow_of_two(dev->caps.comp_pool/
@@ -1135,7 +1132,10 @@ static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev)
1135static void mlx4_ib_free_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev) 1132static void mlx4_ib_free_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev)
1136{ 1133{
1137 int i; 1134 int i;
1138 int total_eqs; 1135
1136 /* no additional eqs were added */
1137 if (!ibdev->eq_table)
1138 return;
1139 1139
1140 /* Reset the advertised EQ number */ 1140 /* Reset the advertised EQ number */
1141 ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors; 1141 ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors;
@@ -1148,12 +1148,7 @@ static void mlx4_ib_free_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev)
1148 mlx4_release_eq(dev, ibdev->eq_table[i]); 1148 mlx4_release_eq(dev, ibdev->eq_table[i]);
1149 } 1149 }
1150 1150
1151 total_eqs = dev->caps.num_comp_vectors + ibdev->eq_added;
1152 memset(ibdev->eq_table, 0, total_eqs * sizeof(int));
1153 kfree(ibdev->eq_table); 1151 kfree(ibdev->eq_table);
1154
1155 ibdev->eq_table = NULL;
1156 ibdev->eq_added = 0;
1157} 1152}
1158 1153
1159static void *mlx4_ib_add(struct mlx4_dev *dev) 1154static void *mlx4_ib_add(struct mlx4_dev *dev)
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index e62297cc77cc..ff36655d23d3 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -44,6 +44,14 @@
44#include <linux/mlx4/device.h> 44#include <linux/mlx4/device.h>
45#include <linux/mlx4/doorbell.h> 45#include <linux/mlx4/doorbell.h>
46 46
47enum {
48 MLX4_IB_SQ_MIN_WQE_SHIFT = 6,
49 MLX4_IB_MAX_HEADROOM = 2048
50};
51
52#define MLX4_IB_SQ_HEADROOM(shift) ((MLX4_IB_MAX_HEADROOM >> (shift)) + 1)
53#define MLX4_IB_SQ_MAX_SPARE (MLX4_IB_SQ_HEADROOM(MLX4_IB_SQ_MIN_WQE_SHIFT))
54
47struct mlx4_ib_ucontext { 55struct mlx4_ib_ucontext {
48 struct ib_ucontext ibucontext; 56 struct ib_ucontext ibucontext;
49 struct mlx4_uar uar; 57 struct mlx4_uar uar;
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index ceb33327091a..8d4ed24aef93 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -310,8 +310,8 @@ static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
310 int is_user, int has_rq, struct mlx4_ib_qp *qp) 310 int is_user, int has_rq, struct mlx4_ib_qp *qp)
311{ 311{
312 /* Sanity check RQ size before proceeding */ 312 /* Sanity check RQ size before proceeding */
313 if (cap->max_recv_wr > dev->dev->caps.max_wqes || 313 if (cap->max_recv_wr > dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE ||
314 cap->max_recv_sge > dev->dev->caps.max_rq_sg) 314 cap->max_recv_sge > min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg))
315 return -EINVAL; 315 return -EINVAL;
316 316
317 if (!has_rq) { 317 if (!has_rq) {
@@ -329,8 +329,17 @@ static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
329 qp->rq.wqe_shift = ilog2(qp->rq.max_gs * sizeof (struct mlx4_wqe_data_seg)); 329 qp->rq.wqe_shift = ilog2(qp->rq.max_gs * sizeof (struct mlx4_wqe_data_seg));
330 } 330 }
331 331
332 cap->max_recv_wr = qp->rq.max_post = qp->rq.wqe_cnt; 332 /* leave userspace return values as they were, so as not to break ABI */
333 cap->max_recv_sge = qp->rq.max_gs; 333 if (is_user) {
334 cap->max_recv_wr = qp->rq.max_post = qp->rq.wqe_cnt;
335 cap->max_recv_sge = qp->rq.max_gs;
336 } else {
337 cap->max_recv_wr = qp->rq.max_post =
338 min(dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE, qp->rq.wqe_cnt);
339 cap->max_recv_sge = min(qp->rq.max_gs,
340 min(dev->dev->caps.max_sq_sg,
341 dev->dev->caps.max_rq_sg));
342 }
334 343
335 return 0; 344 return 0;
336} 345}
@@ -341,8 +350,8 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
341 int s; 350 int s;
342 351
343 /* Sanity check SQ size before proceeding */ 352 /* Sanity check SQ size before proceeding */
344 if (cap->max_send_wr > dev->dev->caps.max_wqes || 353 if (cap->max_send_wr > (dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE) ||
345 cap->max_send_sge > dev->dev->caps.max_sq_sg || 354 cap->max_send_sge > min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg) ||
346 cap->max_inline_data + send_wqe_overhead(type, qp->flags) + 355 cap->max_inline_data + send_wqe_overhead(type, qp->flags) +
347 sizeof (struct mlx4_wqe_inline_seg) > dev->dev->caps.max_sq_desc_sz) 356 sizeof (struct mlx4_wqe_inline_seg) > dev->dev->caps.max_sq_desc_sz)
348 return -EINVAL; 357 return -EINVAL;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h
index 85a69c958559..037f5cea85bd 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma.h
@@ -231,7 +231,6 @@ struct ocrdma_qp_hwq_info {
231 u32 entry_size; 231 u32 entry_size;
232 u32 max_cnt; 232 u32 max_cnt;
233 u32 max_wqe_idx; 233 u32 max_wqe_idx;
234 u32 free_delta;
235 u16 dbid; /* qid, where to ring the doorbell. */ 234 u16 dbid; /* qid, where to ring the doorbell. */
236 u32 len; 235 u32 len;
237 dma_addr_t pa; 236 dma_addr_t pa;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_abi.h b/drivers/infiniband/hw/ocrdma/ocrdma_abi.h
index a411a4e3193d..517ab20b727c 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_abi.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_abi.h
@@ -101,8 +101,6 @@ struct ocrdma_create_qp_uresp {
101 u32 rsvd1; 101 u32 rsvd1;
102 u32 num_wqe_allocated; 102 u32 num_wqe_allocated;
103 u32 num_rqe_allocated; 103 u32 num_rqe_allocated;
104 u32 free_wqe_delta;
105 u32 free_rqe_delta;
106 u32 db_sq_offset; 104 u32 db_sq_offset;
107 u32 db_rq_offset; 105 u32 db_rq_offset;
108 u32 db_shift; 106 u32 db_shift;
@@ -126,8 +124,7 @@ struct ocrdma_create_srq_uresp {
126 u32 db_rq_offset; 124 u32 db_rq_offset;
127 u32 db_shift; 125 u32 db_shift;
128 126
129 u32 free_rqe_delta; 127 u64 rsvd2;
130 u32 rsvd2;
131 u64 rsvd3; 128 u64 rsvd3;
132} __packed; 129} __packed;
133 130
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
index 9b204b1ba336..9343a1522977 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
@@ -732,7 +732,7 @@ static void ocrdma_dispatch_ibevent(struct ocrdma_dev *dev,
732 break; 732 break;
733 case OCRDMA_SRQ_LIMIT_EVENT: 733 case OCRDMA_SRQ_LIMIT_EVENT:
734 ib_evt.element.srq = &qp->srq->ibsrq; 734 ib_evt.element.srq = &qp->srq->ibsrq;
735 ib_evt.event = IB_EVENT_QP_LAST_WQE_REACHED; 735 ib_evt.event = IB_EVENT_SRQ_LIMIT_REACHED;
736 srq_event = 1; 736 srq_event = 1;
737 qp_event = 0; 737 qp_event = 0;
738 break; 738 break;
@@ -1990,19 +1990,12 @@ static void ocrdma_get_create_qp_rsp(struct ocrdma_create_qp_rsp *rsp,
1990 max_wqe_allocated = 1 << max_wqe_allocated; 1990 max_wqe_allocated = 1 << max_wqe_allocated;
1991 max_rqe_allocated = 1 << ((u16)rsp->max_wqe_rqe); 1991 max_rqe_allocated = 1 << ((u16)rsp->max_wqe_rqe);
1992 1992
1993 if (qp->dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) {
1994 qp->sq.free_delta = 0;
1995 qp->rq.free_delta = 1;
1996 } else
1997 qp->sq.free_delta = 1;
1998
1999 qp->sq.max_cnt = max_wqe_allocated; 1993 qp->sq.max_cnt = max_wqe_allocated;
2000 qp->sq.max_wqe_idx = max_wqe_allocated - 1; 1994 qp->sq.max_wqe_idx = max_wqe_allocated - 1;
2001 1995
2002 if (!attrs->srq) { 1996 if (!attrs->srq) {
2003 qp->rq.max_cnt = max_rqe_allocated; 1997 qp->rq.max_cnt = max_rqe_allocated;
2004 qp->rq.max_wqe_idx = max_rqe_allocated - 1; 1998 qp->rq.max_wqe_idx = max_rqe_allocated - 1;
2005 qp->rq.free_delta = 1;
2006 } 1999 }
2007} 2000}
2008 2001
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
index a20d16eaae71..04fef3de6d75 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
@@ -26,7 +26,6 @@
26 *******************************************************************/ 26 *******************************************************************/
27 27
28#include <linux/module.h> 28#include <linux/module.h>
29#include <linux/version.h>
30#include <linux/idr.h> 29#include <linux/idr.h>
31#include <rdma/ib_verbs.h> 30#include <rdma/ib_verbs.h>
32#include <rdma/ib_user_verbs.h> 31#include <rdma/ib_user_verbs.h>
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index e9f74d1b48f6..d16d172b6b6b 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -940,8 +940,6 @@ static int ocrdma_copy_qp_uresp(struct ocrdma_qp *qp,
940 uresp.db_rq_offset = OCRDMA_DB_RQ_OFFSET; 940 uresp.db_rq_offset = OCRDMA_DB_RQ_OFFSET;
941 uresp.db_shift = 16; 941 uresp.db_shift = 16;
942 } 942 }
943 uresp.free_wqe_delta = qp->sq.free_delta;
944 uresp.free_rqe_delta = qp->rq.free_delta;
945 943
946 if (qp->dpp_enabled) { 944 if (qp->dpp_enabled) {
947 uresp.dpp_credit = dpp_credit_lmt; 945 uresp.dpp_credit = dpp_credit_lmt;
@@ -1307,8 +1305,6 @@ static int ocrdma_hwq_free_cnt(struct ocrdma_qp_hwq_info *q)
1307 free_cnt = (q->max_cnt - q->head) + q->tail; 1305 free_cnt = (q->max_cnt - q->head) + q->tail;
1308 else 1306 else
1309 free_cnt = q->tail - q->head; 1307 free_cnt = q->tail - q->head;
1310 if (q->free_delta)
1311 free_cnt -= q->free_delta;
1312 return free_cnt; 1308 return free_cnt;
1313} 1309}
1314 1310
@@ -1501,7 +1497,6 @@ static int ocrdma_copy_srq_uresp(struct ocrdma_srq *srq, struct ib_udata *udata)
1501 (srq->pd->id * srq->dev->nic_info.db_page_size); 1497 (srq->pd->id * srq->dev->nic_info.db_page_size);
1502 uresp.db_page_size = srq->dev->nic_info.db_page_size; 1498 uresp.db_page_size = srq->dev->nic_info.db_page_size;
1503 uresp.num_rqe_allocated = srq->rq.max_cnt; 1499 uresp.num_rqe_allocated = srq->rq.max_cnt;
1504 uresp.free_rqe_delta = 1;
1505 if (srq->dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) { 1500 if (srq->dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) {
1506 uresp.db_rq_offset = OCRDMA_DB_GEN2_RQ1_OFFSET; 1501 uresp.db_rq_offset = OCRDMA_DB_GEN2_RQ1_OFFSET;
1507 uresp.db_shift = 24; 1502 uresp.db_shift = 24;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
index e6483439f25f..633f03d80274 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
@@ -28,7 +28,6 @@
28#ifndef __OCRDMA_VERBS_H__ 28#ifndef __OCRDMA_VERBS_H__
29#define __OCRDMA_VERBS_H__ 29#define __OCRDMA_VERBS_H__
30 30
31#include <linux/version.h>
32int ocrdma_post_send(struct ib_qp *, struct ib_send_wr *, 31int ocrdma_post_send(struct ib_qp *, struct ib_send_wr *,
33 struct ib_send_wr **bad_wr); 32 struct ib_send_wr **bad_wr);
34int ocrdma_post_recv(struct ib_qp *, struct ib_recv_wr *, 33int ocrdma_post_recv(struct ib_qp *, struct ib_recv_wr *,
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index d90a421e9cac..a2e418cba0ff 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -547,26 +547,12 @@ static void iommu_poll_events(struct amd_iommu *iommu)
547 spin_unlock_irqrestore(&iommu->lock, flags); 547 spin_unlock_irqrestore(&iommu->lock, flags);
548} 548}
549 549
550static void iommu_handle_ppr_entry(struct amd_iommu *iommu, u32 head) 550static void iommu_handle_ppr_entry(struct amd_iommu *iommu, u64 *raw)
551{ 551{
552 struct amd_iommu_fault fault; 552 struct amd_iommu_fault fault;
553 volatile u64 *raw;
554 int i;
555 553
556 INC_STATS_COUNTER(pri_requests); 554 INC_STATS_COUNTER(pri_requests);
557 555
558 raw = (u64 *)(iommu->ppr_log + head);
559
560 /*
561 * Hardware bug: Interrupt may arrive before the entry is written to
562 * memory. If this happens we need to wait for the entry to arrive.
563 */
564 for (i = 0; i < LOOP_TIMEOUT; ++i) {
565 if (PPR_REQ_TYPE(raw[0]) != 0)
566 break;
567 udelay(1);
568 }
569
570 if (PPR_REQ_TYPE(raw[0]) != PPR_REQ_FAULT) { 556 if (PPR_REQ_TYPE(raw[0]) != PPR_REQ_FAULT) {
571 pr_err_ratelimited("AMD-Vi: Unknown PPR request received\n"); 557 pr_err_ratelimited("AMD-Vi: Unknown PPR request received\n");
572 return; 558 return;
@@ -578,12 +564,6 @@ static void iommu_handle_ppr_entry(struct amd_iommu *iommu, u32 head)
578 fault.tag = PPR_TAG(raw[0]); 564 fault.tag = PPR_TAG(raw[0]);
579 fault.flags = PPR_FLAGS(raw[0]); 565 fault.flags = PPR_FLAGS(raw[0]);
580 566
581 /*
582 * To detect the hardware bug we need to clear the entry
583 * to back to zero.
584 */
585 raw[0] = raw[1] = 0;
586
587 atomic_notifier_call_chain(&ppr_notifier, 0, &fault); 567 atomic_notifier_call_chain(&ppr_notifier, 0, &fault);
588} 568}
589 569
@@ -595,25 +575,62 @@ static void iommu_poll_ppr_log(struct amd_iommu *iommu)
595 if (iommu->ppr_log == NULL) 575 if (iommu->ppr_log == NULL)
596 return; 576 return;
597 577
578 /* enable ppr interrupts again */
579 writel(MMIO_STATUS_PPR_INT_MASK, iommu->mmio_base + MMIO_STATUS_OFFSET);
580
598 spin_lock_irqsave(&iommu->lock, flags); 581 spin_lock_irqsave(&iommu->lock, flags);
599 582
600 head = readl(iommu->mmio_base + MMIO_PPR_HEAD_OFFSET); 583 head = readl(iommu->mmio_base + MMIO_PPR_HEAD_OFFSET);
601 tail = readl(iommu->mmio_base + MMIO_PPR_TAIL_OFFSET); 584 tail = readl(iommu->mmio_base + MMIO_PPR_TAIL_OFFSET);
602 585
603 while (head != tail) { 586 while (head != tail) {
587 volatile u64 *raw;
588 u64 entry[2];
589 int i;
604 590
605 /* Handle PPR entry */ 591 raw = (u64 *)(iommu->ppr_log + head);
606 iommu_handle_ppr_entry(iommu, head); 592
593 /*
594 * Hardware bug: Interrupt may arrive before the entry is
595 * written to memory. If this happens we need to wait for the
596 * entry to arrive.
597 */
598 for (i = 0; i < LOOP_TIMEOUT; ++i) {
599 if (PPR_REQ_TYPE(raw[0]) != 0)
600 break;
601 udelay(1);
602 }
603
604 /* Avoid memcpy function-call overhead */
605 entry[0] = raw[0];
606 entry[1] = raw[1];
607 607
608 /* Update and refresh ring-buffer state*/ 608 /*
609 * To detect the hardware bug we need to clear the entry
610 * back to zero.
611 */
612 raw[0] = raw[1] = 0UL;
613
614 /* Update head pointer of hardware ring-buffer */
609 head = (head + PPR_ENTRY_SIZE) % PPR_LOG_SIZE; 615 head = (head + PPR_ENTRY_SIZE) % PPR_LOG_SIZE;
610 writel(head, iommu->mmio_base + MMIO_PPR_HEAD_OFFSET); 616 writel(head, iommu->mmio_base + MMIO_PPR_HEAD_OFFSET);
617
618 /*
619 * Release iommu->lock because ppr-handling might need to
620 * re-aquire it
621 */
622 spin_unlock_irqrestore(&iommu->lock, flags);
623
624 /* Handle PPR entry */
625 iommu_handle_ppr_entry(iommu, entry);
626
627 spin_lock_irqsave(&iommu->lock, flags);
628
629 /* Refresh ring-buffer information */
630 head = readl(iommu->mmio_base + MMIO_PPR_HEAD_OFFSET);
611 tail = readl(iommu->mmio_base + MMIO_PPR_TAIL_OFFSET); 631 tail = readl(iommu->mmio_base + MMIO_PPR_TAIL_OFFSET);
612 } 632 }
613 633
614 /* enable ppr interrupts again */
615 writel(MMIO_STATUS_PPR_INT_MASK, iommu->mmio_base + MMIO_STATUS_OFFSET);
616
617 spin_unlock_irqrestore(&iommu->lock, flags); 634 spin_unlock_irqrestore(&iommu->lock, flags);
618} 635}
619 636
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index c56790375e0f..542024ba6dba 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -1029,6 +1029,9 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
1029 if (!iommu->dev) 1029 if (!iommu->dev)
1030 return 1; 1030 return 1;
1031 1031
1032 iommu->root_pdev = pci_get_bus_and_slot(iommu->dev->bus->number,
1033 PCI_DEVFN(0, 0));
1034
1032 iommu->cap_ptr = h->cap_ptr; 1035 iommu->cap_ptr = h->cap_ptr;
1033 iommu->pci_seg = h->pci_seg; 1036 iommu->pci_seg = h->pci_seg;
1034 iommu->mmio_phys = h->mmio_phys; 1037 iommu->mmio_phys = h->mmio_phys;
@@ -1323,20 +1326,16 @@ static void iommu_apply_resume_quirks(struct amd_iommu *iommu)
1323{ 1326{
1324 int i, j; 1327 int i, j;
1325 u32 ioc_feature_control; 1328 u32 ioc_feature_control;
1326 struct pci_dev *pdev = NULL; 1329 struct pci_dev *pdev = iommu->root_pdev;
1327 1330
1328 /* RD890 BIOSes may not have completely reconfigured the iommu */ 1331 /* RD890 BIOSes may not have completely reconfigured the iommu */
1329 if (!is_rd890_iommu(iommu->dev)) 1332 if (!is_rd890_iommu(iommu->dev) || !pdev)
1330 return; 1333 return;
1331 1334
1332 /* 1335 /*
1333 * First, we need to ensure that the iommu is enabled. This is 1336 * First, we need to ensure that the iommu is enabled. This is
1334 * controlled by a register in the northbridge 1337 * controlled by a register in the northbridge
1335 */ 1338 */
1336 pdev = pci_get_bus_and_slot(iommu->dev->bus->number, PCI_DEVFN(0, 0));
1337
1338 if (!pdev)
1339 return;
1340 1339
1341 /* Select Northbridge indirect register 0x75 and enable writing */ 1340 /* Select Northbridge indirect register 0x75 and enable writing */
1342 pci_write_config_dword(pdev, 0x60, 0x75 | (1 << 7)); 1341 pci_write_config_dword(pdev, 0x60, 0x75 | (1 << 7));
@@ -1346,8 +1345,6 @@ static void iommu_apply_resume_quirks(struct amd_iommu *iommu)
1346 if (!(ioc_feature_control & 0x1)) 1345 if (!(ioc_feature_control & 0x1))
1347 pci_write_config_dword(pdev, 0x64, ioc_feature_control | 1); 1346 pci_write_config_dword(pdev, 0x64, ioc_feature_control | 1);
1348 1347
1349 pci_dev_put(pdev);
1350
1351 /* Restore the iommu BAR */ 1348 /* Restore the iommu BAR */
1352 pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4, 1349 pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4,
1353 iommu->stored_addr_lo); 1350 iommu->stored_addr_lo);
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index 2452f3b71736..24355559a2ad 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -481,6 +481,9 @@ struct amd_iommu {
481 /* Pointer to PCI device of this IOMMU */ 481 /* Pointer to PCI device of this IOMMU */
482 struct pci_dev *dev; 482 struct pci_dev *dev;
483 483
484 /* Cache pdev to root device for resume quirks */
485 struct pci_dev *root_pdev;
486
484 /* physical address of MMIO space */ 487 /* physical address of MMIO space */
485 u64 mmio_phys; 488 u64 mmio_phys;
486 /* virtual address of MMIO space */ 489 /* virtual address of MMIO space */
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 835de7168cd3..a9c7981ddd24 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -2550,6 +2550,7 @@ static struct r1conf *setup_conf(struct mddev *mddev)
2550 err = -EINVAL; 2550 err = -EINVAL;
2551 spin_lock_init(&conf->device_lock); 2551 spin_lock_init(&conf->device_lock);
2552 rdev_for_each(rdev, mddev) { 2552 rdev_for_each(rdev, mddev) {
2553 struct request_queue *q;
2553 int disk_idx = rdev->raid_disk; 2554 int disk_idx = rdev->raid_disk;
2554 if (disk_idx >= mddev->raid_disks 2555 if (disk_idx >= mddev->raid_disks
2555 || disk_idx < 0) 2556 || disk_idx < 0)
@@ -2562,6 +2563,9 @@ static struct r1conf *setup_conf(struct mddev *mddev)
2562 if (disk->rdev) 2563 if (disk->rdev)
2563 goto abort; 2564 goto abort;
2564 disk->rdev = rdev; 2565 disk->rdev = rdev;
2566 q = bdev_get_queue(rdev->bdev);
2567 if (q->merge_bvec_fn)
2568 mddev->merge_check_needed = 1;
2565 2569
2566 disk->head_position = 0; 2570 disk->head_position = 0;
2567 } 2571 }
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 987db37cb875..99ae6068e456 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -3475,6 +3475,7 @@ static int run(struct mddev *mddev)
3475 3475
3476 rdev_for_each(rdev, mddev) { 3476 rdev_for_each(rdev, mddev) {
3477 long long diff; 3477 long long diff;
3478 struct request_queue *q;
3478 3479
3479 disk_idx = rdev->raid_disk; 3480 disk_idx = rdev->raid_disk;
3480 if (disk_idx < 0) 3481 if (disk_idx < 0)
@@ -3493,6 +3494,9 @@ static int run(struct mddev *mddev)
3493 goto out_free_conf; 3494 goto out_free_conf;
3494 disk->rdev = rdev; 3495 disk->rdev = rdev;
3495 } 3496 }
3497 q = bdev_get_queue(rdev->bdev);
3498 if (q->merge_bvec_fn)
3499 mddev->merge_check_needed = 1;
3496 diff = (rdev->new_data_offset - rdev->data_offset); 3500 diff = (rdev->new_data_offset - rdev->data_offset);
3497 if (!mddev->reshape_backwards) 3501 if (!mddev->reshape_backwards)
3498 diff = -diff; 3502 diff = -diff;
diff --git a/drivers/mtd/ubi/debug.c b/drivers/mtd/ubi/debug.c
index 9f957c2d48e9..09d4f8d9d592 100644
--- a/drivers/mtd/ubi/debug.c
+++ b/drivers/mtd/ubi/debug.c
@@ -264,6 +264,9 @@ static struct dentry *dfs_rootdir;
264 */ 264 */
265int ubi_debugfs_init(void) 265int ubi_debugfs_init(void)
266{ 266{
267 if (!IS_ENABLED(DEBUG_FS))
268 return 0;
269
267 dfs_rootdir = debugfs_create_dir("ubi", NULL); 270 dfs_rootdir = debugfs_create_dir("ubi", NULL);
268 if (IS_ERR_OR_NULL(dfs_rootdir)) { 271 if (IS_ERR_OR_NULL(dfs_rootdir)) {
269 int err = dfs_rootdir ? -ENODEV : PTR_ERR(dfs_rootdir); 272 int err = dfs_rootdir ? -ENODEV : PTR_ERR(dfs_rootdir);
@@ -281,7 +284,8 @@ int ubi_debugfs_init(void)
281 */ 284 */
282void ubi_debugfs_exit(void) 285void ubi_debugfs_exit(void)
283{ 286{
284 debugfs_remove(dfs_rootdir); 287 if (IS_ENABLED(DEBUG_FS))
288 debugfs_remove(dfs_rootdir);
285} 289}
286 290
287/* Read an UBI debugfs file */ 291/* Read an UBI debugfs file */
@@ -403,6 +407,9 @@ int ubi_debugfs_init_dev(struct ubi_device *ubi)
403 struct dentry *dent; 407 struct dentry *dent;
404 struct ubi_debug_info *d = ubi->dbg; 408 struct ubi_debug_info *d = ubi->dbg;
405 409
410 if (!IS_ENABLED(DEBUG_FS))
411 return 0;
412
406 n = snprintf(d->dfs_dir_name, UBI_DFS_DIR_LEN + 1, UBI_DFS_DIR_NAME, 413 n = snprintf(d->dfs_dir_name, UBI_DFS_DIR_LEN + 1, UBI_DFS_DIR_NAME,
407 ubi->ubi_num); 414 ubi->ubi_num);
408 if (n == UBI_DFS_DIR_LEN) { 415 if (n == UBI_DFS_DIR_LEN) {
@@ -470,5 +477,6 @@ out:
470 */ 477 */
471void ubi_debugfs_exit_dev(struct ubi_device *ubi) 478void ubi_debugfs_exit_dev(struct ubi_device *ubi)
472{ 479{
473 debugfs_remove_recursive(ubi->dbg->dfs_dir); 480 if (IS_ENABLED(DEBUG_FS))
481 debugfs_remove_recursive(ubi->dbg->dfs_dir);
474} 482}
diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c
index 9df100a4ec38..b6be644e7b85 100644
--- a/drivers/mtd/ubi/wl.c
+++ b/drivers/mtd/ubi/wl.c
@@ -1262,11 +1262,11 @@ int ubi_wl_flush(struct ubi_device *ubi, int vol_id, int lnum)
1262 dbg_wl("flush pending work for LEB %d:%d (%d pending works)", 1262 dbg_wl("flush pending work for LEB %d:%d (%d pending works)",
1263 vol_id, lnum, ubi->works_count); 1263 vol_id, lnum, ubi->works_count);
1264 1264
1265 down_write(&ubi->work_sem);
1266 while (found) { 1265 while (found) {
1267 struct ubi_work *wrk; 1266 struct ubi_work *wrk;
1268 found = 0; 1267 found = 0;
1269 1268
1269 down_read(&ubi->work_sem);
1270 spin_lock(&ubi->wl_lock); 1270 spin_lock(&ubi->wl_lock);
1271 list_for_each_entry(wrk, &ubi->works, list) { 1271 list_for_each_entry(wrk, &ubi->works, list) {
1272 if ((vol_id == UBI_ALL || wrk->vol_id == vol_id) && 1272 if ((vol_id == UBI_ALL || wrk->vol_id == vol_id) &&
@@ -1277,18 +1277,27 @@ int ubi_wl_flush(struct ubi_device *ubi, int vol_id, int lnum)
1277 spin_unlock(&ubi->wl_lock); 1277 spin_unlock(&ubi->wl_lock);
1278 1278
1279 err = wrk->func(ubi, wrk, 0); 1279 err = wrk->func(ubi, wrk, 0);
1280 if (err) 1280 if (err) {
1281 goto out; 1281 up_read(&ubi->work_sem);
1282 return err;
1283 }
1284
1282 spin_lock(&ubi->wl_lock); 1285 spin_lock(&ubi->wl_lock);
1283 found = 1; 1286 found = 1;
1284 break; 1287 break;
1285 } 1288 }
1286 } 1289 }
1287 spin_unlock(&ubi->wl_lock); 1290 spin_unlock(&ubi->wl_lock);
1291 up_read(&ubi->work_sem);
1288 } 1292 }
1289 1293
1290out: 1294 /*
1295 * Make sure all the works which have been done in parallel are
1296 * finished.
1297 */
1298 down_write(&ubi->work_sem);
1291 up_write(&ubi->work_sem); 1299 up_write(&ubi->work_sem);
1300
1292 return err; 1301 return err;
1293} 1302}
1294 1303
diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c
index 1fe2c7a8b40c..a8fb52992c64 100644
--- a/drivers/net/ethernet/mellanox/mlx4/port.c
+++ b/drivers/net/ethernet/mellanox/mlx4/port.c
@@ -697,10 +697,10 @@ static int mlx4_common_set_port(struct mlx4_dev *dev, int slave, u32 in_mod,
697 if (slave != dev->caps.function) 697 if (slave != dev->caps.function)
698 memset(inbox->buf, 0, 256); 698 memset(inbox->buf, 0, 256);
699 if (dev->flags & MLX4_FLAG_OLD_PORT_CMDS) { 699 if (dev->flags & MLX4_FLAG_OLD_PORT_CMDS) {
700 *(u8 *) inbox->buf = !!reset_qkey_viols << 6; 700 *(u8 *) inbox->buf |= !!reset_qkey_viols << 6;
701 ((__be32 *) inbox->buf)[2] = agg_cap_mask; 701 ((__be32 *) inbox->buf)[2] = agg_cap_mask;
702 } else { 702 } else {
703 ((u8 *) inbox->buf)[3] = !!reset_qkey_viols; 703 ((u8 *) inbox->buf)[3] |= !!reset_qkey_viols;
704 ((__be32 *) inbox->buf)[1] = agg_cap_mask; 704 ((__be32 *) inbox->buf)[1] = agg_cap_mask;
705 } 705 }
706 706
diff --git a/drivers/platform/x86/acerhdf.c b/drivers/platform/x86/acerhdf.c
index 639db4d0aa76..2fd9d36acd15 100644
--- a/drivers/platform/x86/acerhdf.c
+++ b/drivers/platform/x86/acerhdf.c
@@ -5,7 +5,7 @@
5 * 5 *
6 * (C) 2009 - Peter Feuerer peter (a) piie.net 6 * (C) 2009 - Peter Feuerer peter (a) piie.net
7 * http://piie.net 7 * http://piie.net
8 * 2009 Borislav Petkov <petkovbb@gmail.com> 8 * 2009 Borislav Petkov bp (a) alien8.de
9 * 9 *
10 * Inspired by and many thanks to: 10 * Inspired by and many thanks to:
11 * o acerfand - Rachel Greenham 11 * o acerfand - Rachel Greenham
diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c
index 7d5f56edb8ef..4267789ca995 100644
--- a/drivers/rtc/rtc-cmos.c
+++ b/drivers/rtc/rtc-cmos.c
@@ -910,14 +910,17 @@ static inline int cmos_poweroff(struct device *dev)
910 910
911static u32 rtc_handler(void *context) 911static u32 rtc_handler(void *context)
912{ 912{
913 struct device *dev = context;
914
915 pm_wakeup_event(dev, 0);
913 acpi_clear_event(ACPI_EVENT_RTC); 916 acpi_clear_event(ACPI_EVENT_RTC);
914 acpi_disable_event(ACPI_EVENT_RTC, 0); 917 acpi_disable_event(ACPI_EVENT_RTC, 0);
915 return ACPI_INTERRUPT_HANDLED; 918 return ACPI_INTERRUPT_HANDLED;
916} 919}
917 920
918static inline void rtc_wake_setup(void) 921static inline void rtc_wake_setup(struct device *dev)
919{ 922{
920 acpi_install_fixed_event_handler(ACPI_EVENT_RTC, rtc_handler, NULL); 923 acpi_install_fixed_event_handler(ACPI_EVENT_RTC, rtc_handler, dev);
921 /* 924 /*
922 * After the RTC handler is installed, the Fixed_RTC event should 925 * After the RTC handler is installed, the Fixed_RTC event should
923 * be disabled. Only when the RTC alarm is set will it be enabled. 926 * be disabled. Only when the RTC alarm is set will it be enabled.
@@ -950,7 +953,7 @@ cmos_wake_setup(struct device *dev)
950 if (acpi_disabled) 953 if (acpi_disabled)
951 return; 954 return;
952 955
953 rtc_wake_setup(); 956 rtc_wake_setup(dev);
954 acpi_rtc_info.wake_on = rtc_wake_on; 957 acpi_rtc_info.wake_on = rtc_wake_on;
955 acpi_rtc_info.wake_off = rtc_wake_off; 958 acpi_rtc_info.wake_off = rtc_wake_off;
956 959
diff --git a/drivers/staging/ramster/zcache-main.c b/drivers/staging/ramster/zcache-main.c
index 4e7ef0e6b79c..d46764b5aaba 100644
--- a/drivers/staging/ramster/zcache-main.c
+++ b/drivers/staging/ramster/zcache-main.c
@@ -3002,7 +3002,7 @@ static inline struct tmem_oid oswiz(unsigned type, u32 ind)
3002 return oid; 3002 return oid;
3003} 3003}
3004 3004
3005static int zcache_frontswap_put_page(unsigned type, pgoff_t offset, 3005static int zcache_frontswap_store(unsigned type, pgoff_t offset,
3006 struct page *page) 3006 struct page *page)
3007{ 3007{
3008 u64 ind64 = (u64)offset; 3008 u64 ind64 = (u64)offset;
@@ -3025,7 +3025,7 @@ static int zcache_frontswap_put_page(unsigned type, pgoff_t offset,
3025 3025
3026/* returns 0 if the page was successfully gotten from frontswap, -1 if 3026/* returns 0 if the page was successfully gotten from frontswap, -1 if
3027 * was not present (should never happen!) */ 3027 * was not present (should never happen!) */
3028static int zcache_frontswap_get_page(unsigned type, pgoff_t offset, 3028static int zcache_frontswap_load(unsigned type, pgoff_t offset,
3029 struct page *page) 3029 struct page *page)
3030{ 3030{
3031 u64 ind64 = (u64)offset; 3031 u64 ind64 = (u64)offset;
@@ -3080,8 +3080,8 @@ static void zcache_frontswap_init(unsigned ignored)
3080} 3080}
3081 3081
3082static struct frontswap_ops zcache_frontswap_ops = { 3082static struct frontswap_ops zcache_frontswap_ops = {
3083 .put_page = zcache_frontswap_put_page, 3083 .store = zcache_frontswap_store,
3084 .get_page = zcache_frontswap_get_page, 3084 .load = zcache_frontswap_load,
3085 .invalidate_page = zcache_frontswap_flush_page, 3085 .invalidate_page = zcache_frontswap_flush_page,
3086 .invalidate_area = zcache_frontswap_flush_area, 3086 .invalidate_area = zcache_frontswap_flush_area,
3087 .init = zcache_frontswap_init 3087 .init = zcache_frontswap_init
diff --git a/drivers/staging/zcache/zcache-main.c b/drivers/staging/zcache/zcache-main.c
index 2734dacacbaf..784c796b9848 100644
--- a/drivers/staging/zcache/zcache-main.c
+++ b/drivers/staging/zcache/zcache-main.c
@@ -1835,7 +1835,7 @@ static int zcache_frontswap_poolid = -1;
1835 * Swizzling increases objects per swaptype, increasing tmem concurrency 1835 * Swizzling increases objects per swaptype, increasing tmem concurrency
1836 * for heavy swaploads. Later, larger nr_cpus -> larger SWIZ_BITS 1836 * for heavy swaploads. Later, larger nr_cpus -> larger SWIZ_BITS
1837 * Setting SWIZ_BITS to 27 basically reconstructs the swap entry from 1837 * Setting SWIZ_BITS to 27 basically reconstructs the swap entry from
1838 * frontswap_get_page(), but has side-effects. Hence using 8. 1838 * frontswap_load(), but has side-effects. Hence using 8.
1839 */ 1839 */
1840#define SWIZ_BITS 8 1840#define SWIZ_BITS 8
1841#define SWIZ_MASK ((1 << SWIZ_BITS) - 1) 1841#define SWIZ_MASK ((1 << SWIZ_BITS) - 1)
@@ -1849,7 +1849,7 @@ static inline struct tmem_oid oswiz(unsigned type, u32 ind)
1849 return oid; 1849 return oid;
1850} 1850}
1851 1851
1852static int zcache_frontswap_put_page(unsigned type, pgoff_t offset, 1852static int zcache_frontswap_store(unsigned type, pgoff_t offset,
1853 struct page *page) 1853 struct page *page)
1854{ 1854{
1855 u64 ind64 = (u64)offset; 1855 u64 ind64 = (u64)offset;
@@ -1870,7 +1870,7 @@ static int zcache_frontswap_put_page(unsigned type, pgoff_t offset,
1870 1870
1871/* returns 0 if the page was successfully gotten from frontswap, -1 if 1871/* returns 0 if the page was successfully gotten from frontswap, -1 if
1872 * was not present (should never happen!) */ 1872 * was not present (should never happen!) */
1873static int zcache_frontswap_get_page(unsigned type, pgoff_t offset, 1873static int zcache_frontswap_load(unsigned type, pgoff_t offset,
1874 struct page *page) 1874 struct page *page)
1875{ 1875{
1876 u64 ind64 = (u64)offset; 1876 u64 ind64 = (u64)offset;
@@ -1919,8 +1919,8 @@ static void zcache_frontswap_init(unsigned ignored)
1919} 1919}
1920 1920
1921static struct frontswap_ops zcache_frontswap_ops = { 1921static struct frontswap_ops zcache_frontswap_ops = {
1922 .put_page = zcache_frontswap_put_page, 1922 .store = zcache_frontswap_store,
1923 .get_page = zcache_frontswap_get_page, 1923 .load = zcache_frontswap_load,
1924 .invalidate_page = zcache_frontswap_flush_page, 1924 .invalidate_page = zcache_frontswap_flush_page,
1925 .invalidate_area = zcache_frontswap_flush_area, 1925 .invalidate_area = zcache_frontswap_flush_area,
1926 .init = zcache_frontswap_init 1926 .init = zcache_frontswap_init
diff --git a/drivers/target/sbp/sbp_target.c b/drivers/target/sbp/sbp_target.c
index 37c609898f84..7e6136e2ce81 100644
--- a/drivers/target/sbp/sbp_target.c
+++ b/drivers/target/sbp/sbp_target.c
@@ -587,14 +587,14 @@ static void sbp_management_request_logout(
587{ 587{
588 struct sbp_tport *tport = agent->tport; 588 struct sbp_tport *tport = agent->tport;
589 struct sbp_tpg *tpg = tport->tpg; 589 struct sbp_tpg *tpg = tport->tpg;
590 int login_id; 590 int id;
591 struct sbp_login_descriptor *login; 591 struct sbp_login_descriptor *login;
592 592
593 login_id = LOGOUT_ORB_LOGIN_ID(be32_to_cpu(req->orb.misc)); 593 id = LOGOUT_ORB_LOGIN_ID(be32_to_cpu(req->orb.misc));
594 594
595 login = sbp_login_find_by_id(tpg, login_id); 595 login = sbp_login_find_by_id(tpg, id);
596 if (!login) { 596 if (!login) {
597 pr_warn("cannot find login: %d\n", login_id); 597 pr_warn("cannot find login: %d\n", id);
598 598
599 req->status.status = cpu_to_be32( 599 req->status.status = cpu_to_be32(
600 STATUS_BLOCK_RESP(STATUS_RESP_REQUEST_COMPLETE) | 600 STATUS_BLOCK_RESP(STATUS_RESP_REQUEST_COMPLETE) |
diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c
index 686dba189f8e..9f99d0404908 100644
--- a/drivers/target/target_core_file.c
+++ b/drivers/target/target_core_file.c
@@ -133,16 +133,11 @@ static struct se_device *fd_create_virtdevice(
133 ret = PTR_ERR(dev_p); 133 ret = PTR_ERR(dev_p);
134 goto fail; 134 goto fail;
135 } 135 }
136
137 /* O_DIRECT too? */
138 flags = O_RDWR | O_CREAT | O_LARGEFILE;
139
140 /* 136 /*
141 * If fd_buffered_io=1 has not been set explicitly (the default), 137 * Use O_DSYNC by default instead of O_SYNC to forgo syncing
142 * use O_SYNC to force FILEIO writes to disk. 138 * of pure timestamp updates.
143 */ 139 */
144 if (!(fd_dev->fbd_flags & FDBD_USE_BUFFERED_IO)) 140 flags = O_RDWR | O_CREAT | O_LARGEFILE | O_DSYNC;
145 flags |= O_SYNC;
146 141
147 file = filp_open(dev_p, flags, 0600); 142 file = filp_open(dev_p, flags, 0600);
148 if (IS_ERR(file)) { 143 if (IS_ERR(file)) {
@@ -380,23 +375,6 @@ static void fd_emulate_sync_cache(struct se_cmd *cmd)
380 } 375 }
381} 376}
382 377
383static void fd_emulate_write_fua(struct se_cmd *cmd)
384{
385 struct se_device *dev = cmd->se_dev;
386 struct fd_dev *fd_dev = dev->dev_ptr;
387 loff_t start = cmd->t_task_lba *
388 dev->se_sub_dev->se_dev_attrib.block_size;
389 loff_t end = start + cmd->data_length;
390 int ret;
391
392 pr_debug("FILEIO: FUA WRITE LBA: %llu, bytes: %u\n",
393 cmd->t_task_lba, cmd->data_length);
394
395 ret = vfs_fsync_range(fd_dev->fd_file, start, end, 1);
396 if (ret != 0)
397 pr_err("FILEIO: vfs_fsync_range() failed: %d\n", ret);
398}
399
400static int fd_execute_cmd(struct se_cmd *cmd, struct scatterlist *sgl, 378static int fd_execute_cmd(struct se_cmd *cmd, struct scatterlist *sgl,
401 u32 sgl_nents, enum dma_data_direction data_direction) 379 u32 sgl_nents, enum dma_data_direction data_direction)
402{ 380{
@@ -411,19 +389,21 @@ static int fd_execute_cmd(struct se_cmd *cmd, struct scatterlist *sgl,
411 ret = fd_do_readv(cmd, sgl, sgl_nents); 389 ret = fd_do_readv(cmd, sgl, sgl_nents);
412 } else { 390 } else {
413 ret = fd_do_writev(cmd, sgl, sgl_nents); 391 ret = fd_do_writev(cmd, sgl, sgl_nents);
414 392 /*
393 * Perform implict vfs_fsync_range() for fd_do_writev() ops
394 * for SCSI WRITEs with Forced Unit Access (FUA) set.
395 * Allow this to happen independent of WCE=0 setting.
396 */
415 if (ret > 0 && 397 if (ret > 0 &&
416 dev->se_sub_dev->se_dev_attrib.emulate_write_cache > 0 &&
417 dev->se_sub_dev->se_dev_attrib.emulate_fua_write > 0 && 398 dev->se_sub_dev->se_dev_attrib.emulate_fua_write > 0 &&
418 (cmd->se_cmd_flags & SCF_FUA)) { 399 (cmd->se_cmd_flags & SCF_FUA)) {
419 /* 400 struct fd_dev *fd_dev = dev->dev_ptr;
420 * We might need to be a bit smarter here 401 loff_t start = cmd->t_task_lba *
421 * and return some sense data to let the initiator 402 dev->se_sub_dev->se_dev_attrib.block_size;
422 * know the FUA WRITE cache sync failed..? 403 loff_t end = start + cmd->data_length;
423 */
424 fd_emulate_write_fua(cmd);
425 }
426 404
405 vfs_fsync_range(fd_dev->fd_file, start, end, 1);
406 }
427 } 407 }
428 408
429 if (ret < 0) { 409 if (ret < 0) {
@@ -442,7 +422,6 @@ enum {
442static match_table_t tokens = { 422static match_table_t tokens = {
443 {Opt_fd_dev_name, "fd_dev_name=%s"}, 423 {Opt_fd_dev_name, "fd_dev_name=%s"},
444 {Opt_fd_dev_size, "fd_dev_size=%s"}, 424 {Opt_fd_dev_size, "fd_dev_size=%s"},
445 {Opt_fd_buffered_io, "fd_buffered_io=%d"},
446 {Opt_err, NULL} 425 {Opt_err, NULL}
447}; 426};
448 427
@@ -454,7 +433,7 @@ static ssize_t fd_set_configfs_dev_params(
454 struct fd_dev *fd_dev = se_dev->se_dev_su_ptr; 433 struct fd_dev *fd_dev = se_dev->se_dev_su_ptr;
455 char *orig, *ptr, *arg_p, *opts; 434 char *orig, *ptr, *arg_p, *opts;
456 substring_t args[MAX_OPT_ARGS]; 435 substring_t args[MAX_OPT_ARGS];
457 int ret = 0, arg, token; 436 int ret = 0, token;
458 437
459 opts = kstrdup(page, GFP_KERNEL); 438 opts = kstrdup(page, GFP_KERNEL);
460 if (!opts) 439 if (!opts)
@@ -498,19 +477,6 @@ static ssize_t fd_set_configfs_dev_params(
498 " bytes\n", fd_dev->fd_dev_size); 477 " bytes\n", fd_dev->fd_dev_size);
499 fd_dev->fbd_flags |= FBDF_HAS_SIZE; 478 fd_dev->fbd_flags |= FBDF_HAS_SIZE;
500 break; 479 break;
501 case Opt_fd_buffered_io:
502 match_int(args, &arg);
503 if (arg != 1) {
504 pr_err("bogus fd_buffered_io=%d value\n", arg);
505 ret = -EINVAL;
506 goto out;
507 }
508
509 pr_debug("FILEIO: Using buffered I/O"
510 " operations for struct fd_dev\n");
511
512 fd_dev->fbd_flags |= FDBD_USE_BUFFERED_IO;
513 break;
514 default: 480 default:
515 break; 481 break;
516 } 482 }
@@ -542,10 +508,8 @@ static ssize_t fd_show_configfs_dev_params(
542 ssize_t bl = 0; 508 ssize_t bl = 0;
543 509
544 bl = sprintf(b + bl, "TCM FILEIO ID: %u", fd_dev->fd_dev_id); 510 bl = sprintf(b + bl, "TCM FILEIO ID: %u", fd_dev->fd_dev_id);
545 bl += sprintf(b + bl, " File: %s Size: %llu Mode: %s\n", 511 bl += sprintf(b + bl, " File: %s Size: %llu Mode: O_DSYNC\n",
546 fd_dev->fd_dev_name, fd_dev->fd_dev_size, 512 fd_dev->fd_dev_name, fd_dev->fd_dev_size);
547 (fd_dev->fbd_flags & FDBD_USE_BUFFERED_IO) ?
548 "Buffered" : "Synchronous");
549 return bl; 513 return bl;
550} 514}
551 515
diff --git a/drivers/target/target_core_file.h b/drivers/target/target_core_file.h
index fbd59ef7d8be..70ce7fd7111d 100644
--- a/drivers/target/target_core_file.h
+++ b/drivers/target/target_core_file.h
@@ -14,7 +14,6 @@
14 14
15#define FBDF_HAS_PATH 0x01 15#define FBDF_HAS_PATH 0x01
16#define FBDF_HAS_SIZE 0x02 16#define FBDF_HAS_SIZE 0x02
17#define FDBD_USE_BUFFERED_IO 0x04
18 17
19struct fd_dev { 18struct fd_dev {
20 u32 fbd_flags; 19 u32 fbd_flags;
diff --git a/drivers/xen/tmem.c b/drivers/xen/tmem.c
index dcb79521e6c8..89f264c67420 100644
--- a/drivers/xen/tmem.c
+++ b/drivers/xen/tmem.c
@@ -269,7 +269,7 @@ static inline struct tmem_oid oswiz(unsigned type, u32 ind)
269} 269}
270 270
271/* returns 0 if the page was successfully put into frontswap, -1 if not */ 271/* returns 0 if the page was successfully put into frontswap, -1 if not */
272static int tmem_frontswap_put_page(unsigned type, pgoff_t offset, 272static int tmem_frontswap_store(unsigned type, pgoff_t offset,
273 struct page *page) 273 struct page *page)
274{ 274{
275 u64 ind64 = (u64)offset; 275 u64 ind64 = (u64)offset;
@@ -295,7 +295,7 @@ static int tmem_frontswap_put_page(unsigned type, pgoff_t offset,
295 * returns 0 if the page was successfully gotten from frontswap, -1 if 295 * returns 0 if the page was successfully gotten from frontswap, -1 if
296 * was not present (should never happen!) 296 * was not present (should never happen!)
297 */ 297 */
298static int tmem_frontswap_get_page(unsigned type, pgoff_t offset, 298static int tmem_frontswap_load(unsigned type, pgoff_t offset,
299 struct page *page) 299 struct page *page)
300{ 300{
301 u64 ind64 = (u64)offset; 301 u64 ind64 = (u64)offset;
@@ -362,8 +362,8 @@ static int __init no_frontswap(char *s)
362__setup("nofrontswap", no_frontswap); 362__setup("nofrontswap", no_frontswap);
363 363
364static struct frontswap_ops __initdata tmem_frontswap_ops = { 364static struct frontswap_ops __initdata tmem_frontswap_ops = {
365 .put_page = tmem_frontswap_put_page, 365 .store = tmem_frontswap_store,
366 .get_page = tmem_frontswap_get_page, 366 .load = tmem_frontswap_load,
367 .invalidate_page = tmem_frontswap_flush_page, 367 .invalidate_page = tmem_frontswap_flush_page,
368 .invalidate_area = tmem_frontswap_flush_area, 368 .invalidate_area = tmem_frontswap_flush_area,
369 .init = tmem_frontswap_init 369 .init = tmem_frontswap_init
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 20350a93ed99..6df0cbe1cbc9 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -174,6 +174,7 @@ struct smb_version_operations {
174 void (*add_credits)(struct TCP_Server_Info *, const unsigned int); 174 void (*add_credits)(struct TCP_Server_Info *, const unsigned int);
175 void (*set_credits)(struct TCP_Server_Info *, const int); 175 void (*set_credits)(struct TCP_Server_Info *, const int);
176 int * (*get_credits_field)(struct TCP_Server_Info *); 176 int * (*get_credits_field)(struct TCP_Server_Info *);
177 __u64 (*get_next_mid)(struct TCP_Server_Info *);
177 /* data offset from read response message */ 178 /* data offset from read response message */
178 unsigned int (*read_data_offset)(char *); 179 unsigned int (*read_data_offset)(char *);
179 /* data length from read response message */ 180 /* data length from read response message */
@@ -399,6 +400,12 @@ set_credits(struct TCP_Server_Info *server, const int val)
399 server->ops->set_credits(server, val); 400 server->ops->set_credits(server, val);
400} 401}
401 402
403static inline __u64
404get_next_mid(struct TCP_Server_Info *server)
405{
406 return server->ops->get_next_mid(server);
407}
408
402/* 409/*
403 * Macros to allow the TCP_Server_Info->net field and related code to drop out 410 * Macros to allow the TCP_Server_Info->net field and related code to drop out
404 * when CONFIG_NET_NS isn't set. 411 * when CONFIG_NET_NS isn't set.
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 5ec21ecf7980..0a6cbfe2761e 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -114,7 +114,6 @@ extern int small_smb_init_no_tc(const int smb_cmd, const int wct,
114 void **request_buf); 114 void **request_buf);
115extern int CIFS_SessSetup(unsigned int xid, struct cifs_ses *ses, 115extern int CIFS_SessSetup(unsigned int xid, struct cifs_ses *ses,
116 const struct nls_table *nls_cp); 116 const struct nls_table *nls_cp);
117extern __u64 GetNextMid(struct TCP_Server_Info *server);
118extern struct timespec cifs_NTtimeToUnix(__le64 utc_nanoseconds_since_1601); 117extern struct timespec cifs_NTtimeToUnix(__le64 utc_nanoseconds_since_1601);
119extern u64 cifs_UnixTimeToNT(struct timespec); 118extern u64 cifs_UnixTimeToNT(struct timespec);
120extern struct timespec cnvrtDosUnixTm(__le16 le_date, __le16 le_time, 119extern struct timespec cnvrtDosUnixTm(__le16 le_date, __le16 le_time,
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index b5ad716b2642..5b400730c213 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -268,7 +268,7 @@ small_smb_init_no_tc(const int smb_command, const int wct,
268 return rc; 268 return rc;
269 269
270 buffer = (struct smb_hdr *)*request_buf; 270 buffer = (struct smb_hdr *)*request_buf;
271 buffer->Mid = GetNextMid(ses->server); 271 buffer->Mid = get_next_mid(ses->server);
272 if (ses->capabilities & CAP_UNICODE) 272 if (ses->capabilities & CAP_UNICODE)
273 buffer->Flags2 |= SMBFLG2_UNICODE; 273 buffer->Flags2 |= SMBFLG2_UNICODE;
274 if (ses->capabilities & CAP_STATUS32) 274 if (ses->capabilities & CAP_STATUS32)
@@ -402,7 +402,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifs_ses *ses)
402 402
403 cFYI(1, "secFlags 0x%x", secFlags); 403 cFYI(1, "secFlags 0x%x", secFlags);
404 404
405 pSMB->hdr.Mid = GetNextMid(server); 405 pSMB->hdr.Mid = get_next_mid(server);
406 pSMB->hdr.Flags2 |= (SMBFLG2_UNICODE | SMBFLG2_ERR_STATUS); 406 pSMB->hdr.Flags2 |= (SMBFLG2_UNICODE | SMBFLG2_ERR_STATUS);
407 407
408 if ((secFlags & CIFSSEC_MUST_KRB5) == CIFSSEC_MUST_KRB5) 408 if ((secFlags & CIFSSEC_MUST_KRB5) == CIFSSEC_MUST_KRB5)
@@ -782,7 +782,7 @@ CIFSSMBLogoff(const int xid, struct cifs_ses *ses)
782 return rc; 782 return rc;
783 } 783 }
784 784
785 pSMB->hdr.Mid = GetNextMid(ses->server); 785 pSMB->hdr.Mid = get_next_mid(ses->server);
786 786
787 if (ses->server->sec_mode & 787 if (ses->server->sec_mode &
788 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) 788 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
@@ -4762,7 +4762,7 @@ getDFSRetry:
4762 4762
4763 /* server pointer checked in called function, 4763 /* server pointer checked in called function,
4764 but should never be null here anyway */ 4764 but should never be null here anyway */
4765 pSMB->hdr.Mid = GetNextMid(ses->server); 4765 pSMB->hdr.Mid = get_next_mid(ses->server);
4766 pSMB->hdr.Tid = ses->ipc_tid; 4766 pSMB->hdr.Tid = ses->ipc_tid;
4767 pSMB->hdr.Uid = ses->Suid; 4767 pSMB->hdr.Uid = ses->Suid;
4768 if (ses->capabilities & CAP_STATUS32) 4768 if (ses->capabilities & CAP_STATUS32)
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index ccafdedd0dbc..78db68a5cf44 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -1058,13 +1058,15 @@ cifs_demultiplex_thread(void *p)
1058 if (mid_entry != NULL) { 1058 if (mid_entry != NULL) {
1059 if (!mid_entry->multiRsp || mid_entry->multiEnd) 1059 if (!mid_entry->multiRsp || mid_entry->multiEnd)
1060 mid_entry->callback(mid_entry); 1060 mid_entry->callback(mid_entry);
1061 } else if (!server->ops->is_oplock_break(buf, server)) { 1061 } else if (!server->ops->is_oplock_break ||
1062 !server->ops->is_oplock_break(buf, server)) {
1062 cERROR(1, "No task to wake, unknown frame received! " 1063 cERROR(1, "No task to wake, unknown frame received! "
1063 "NumMids %d", atomic_read(&midCount)); 1064 "NumMids %d", atomic_read(&midCount));
1064 cifs_dump_mem("Received Data is: ", buf, 1065 cifs_dump_mem("Received Data is: ", buf,
1065 HEADER_SIZE(server)); 1066 HEADER_SIZE(server));
1066#ifdef CONFIG_CIFS_DEBUG2 1067#ifdef CONFIG_CIFS_DEBUG2
1067 server->ops->dump_detail(buf); 1068 if (server->ops->dump_detail)
1069 server->ops->dump_detail(buf);
1068 cifs_dump_mids(server); 1070 cifs_dump_mids(server);
1069#endif /* CIFS_DEBUG2 */ 1071#endif /* CIFS_DEBUG2 */
1070 1072
@@ -3938,7 +3940,7 @@ CIFSTCon(unsigned int xid, struct cifs_ses *ses,
3938 header_assemble(smb_buffer, SMB_COM_TREE_CONNECT_ANDX, 3940 header_assemble(smb_buffer, SMB_COM_TREE_CONNECT_ANDX,
3939 NULL /*no tid */ , 4 /*wct */ ); 3941 NULL /*no tid */ , 4 /*wct */ );
3940 3942
3941 smb_buffer->Mid = GetNextMid(ses->server); 3943 smb_buffer->Mid = get_next_mid(ses->server);
3942 smb_buffer->Uid = ses->Suid; 3944 smb_buffer->Uid = ses->Suid;
3943 pSMB = (TCONX_REQ *) smb_buffer; 3945 pSMB = (TCONX_REQ *) smb_buffer;
3944 pSMBr = (TCONX_RSP *) smb_buffer_response; 3946 pSMBr = (TCONX_RSP *) smb_buffer_response;
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 253170dfa716..513adbc211d7 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -876,7 +876,7 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
876 struct cifsLockInfo *li, *tmp; 876 struct cifsLockInfo *li, *tmp;
877 struct cifs_tcon *tcon; 877 struct cifs_tcon *tcon;
878 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode); 878 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
879 unsigned int num, max_num; 879 unsigned int num, max_num, max_buf;
880 LOCKING_ANDX_RANGE *buf, *cur; 880 LOCKING_ANDX_RANGE *buf, *cur;
881 int types[] = {LOCKING_ANDX_LARGE_FILES, 881 int types[] = {LOCKING_ANDX_LARGE_FILES,
882 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES}; 882 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
@@ -892,8 +892,19 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
892 return rc; 892 return rc;
893 } 893 }
894 894
895 max_num = (tcon->ses->server->maxBuf - sizeof(struct smb_hdr)) / 895 /*
896 sizeof(LOCKING_ANDX_RANGE); 896 * Accessing maxBuf is racy with cifs_reconnect - need to store value
897 * and check it for zero before using.
898 */
899 max_buf = tcon->ses->server->maxBuf;
900 if (!max_buf) {
901 mutex_unlock(&cinode->lock_mutex);
902 FreeXid(xid);
903 return -EINVAL;
904 }
905
906 max_num = (max_buf - sizeof(struct smb_hdr)) /
907 sizeof(LOCKING_ANDX_RANGE);
897 buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL); 908 buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
898 if (!buf) { 909 if (!buf) {
899 mutex_unlock(&cinode->lock_mutex); 910 mutex_unlock(&cinode->lock_mutex);
@@ -1218,7 +1229,7 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, int xid)
1218 int types[] = {LOCKING_ANDX_LARGE_FILES, 1229 int types[] = {LOCKING_ANDX_LARGE_FILES,
1219 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES}; 1230 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1220 unsigned int i; 1231 unsigned int i;
1221 unsigned int max_num, num; 1232 unsigned int max_num, num, max_buf;
1222 LOCKING_ANDX_RANGE *buf, *cur; 1233 LOCKING_ANDX_RANGE *buf, *cur;
1223 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 1234 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1224 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode); 1235 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
@@ -1228,8 +1239,16 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, int xid)
1228 1239
1229 INIT_LIST_HEAD(&tmp_llist); 1240 INIT_LIST_HEAD(&tmp_llist);
1230 1241
1231 max_num = (tcon->ses->server->maxBuf - sizeof(struct smb_hdr)) / 1242 /*
1232 sizeof(LOCKING_ANDX_RANGE); 1243 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1244 * and check it for zero before using.
1245 */
1246 max_buf = tcon->ses->server->maxBuf;
1247 if (!max_buf)
1248 return -EINVAL;
1249
1250 max_num = (max_buf - sizeof(struct smb_hdr)) /
1251 sizeof(LOCKING_ANDX_RANGE);
1233 buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL); 1252 buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1234 if (!buf) 1253 if (!buf)
1235 return -ENOMEM; 1254 return -ENOMEM;
@@ -1247,46 +1266,7 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, int xid)
1247 continue; 1266 continue;
1248 if (types[i] != li->type) 1267 if (types[i] != li->type)
1249 continue; 1268 continue;
1250 if (!cinode->can_cache_brlcks) { 1269 if (cinode->can_cache_brlcks) {
1251 cur->Pid = cpu_to_le16(li->pid);
1252 cur->LengthLow = cpu_to_le32((u32)li->length);
1253 cur->LengthHigh =
1254 cpu_to_le32((u32)(li->length>>32));
1255 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1256 cur->OffsetHigh =
1257 cpu_to_le32((u32)(li->offset>>32));
1258 /*
1259 * We need to save a lock here to let us add
1260 * it again to the file's list if the unlock
1261 * range request fails on the server.
1262 */
1263 list_move(&li->llist, &tmp_llist);
1264 if (++num == max_num) {
1265 stored_rc = cifs_lockv(xid, tcon,
1266 cfile->netfid,
1267 li->type, num,
1268 0, buf);
1269 if (stored_rc) {
1270 /*
1271 * We failed on the unlock range
1272 * request - add all locks from
1273 * the tmp list to the head of
1274 * the file's list.
1275 */
1276 cifs_move_llist(&tmp_llist,
1277 &cfile->llist);
1278 rc = stored_rc;
1279 } else
1280 /*
1281 * The unlock range request
1282 * succeed - free the tmp list.
1283 */
1284 cifs_free_llist(&tmp_llist);
1285 cur = buf;
1286 num = 0;
1287 } else
1288 cur++;
1289 } else {
1290 /* 1270 /*
1291 * We can cache brlock requests - simply remove 1271 * We can cache brlock requests - simply remove
1292 * a lock from the file's list. 1272 * a lock from the file's list.
@@ -1294,7 +1274,41 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, int xid)
1294 list_del(&li->llist); 1274 list_del(&li->llist);
1295 cifs_del_lock_waiters(li); 1275 cifs_del_lock_waiters(li);
1296 kfree(li); 1276 kfree(li);
1277 continue;
1297 } 1278 }
1279 cur->Pid = cpu_to_le16(li->pid);
1280 cur->LengthLow = cpu_to_le32((u32)li->length);
1281 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1282 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1283 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1284 /*
1285 * We need to save a lock here to let us add it again to
1286 * the file's list if the unlock range request fails on
1287 * the server.
1288 */
1289 list_move(&li->llist, &tmp_llist);
1290 if (++num == max_num) {
1291 stored_rc = cifs_lockv(xid, tcon, cfile->netfid,
1292 li->type, num, 0, buf);
1293 if (stored_rc) {
1294 /*
1295 * We failed on the unlock range
1296 * request - add all locks from the tmp
1297 * list to the head of the file's list.
1298 */
1299 cifs_move_llist(&tmp_llist,
1300 &cfile->llist);
1301 rc = stored_rc;
1302 } else
1303 /*
1304 * The unlock range request succeed -
1305 * free the tmp list.
1306 */
1307 cifs_free_llist(&tmp_llist);
1308 cur = buf;
1309 num = 0;
1310 } else
1311 cur++;
1298 } 1312 }
1299 if (num) { 1313 if (num) {
1300 stored_rc = cifs_lockv(xid, tcon, cfile->netfid, 1314 stored_rc = cifs_lockv(xid, tcon, cfile->netfid,
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index e2552d2b2e42..557506ae1e2a 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -212,93 +212,6 @@ cifs_small_buf_release(void *buf_to_free)
212 return; 212 return;
213} 213}
214 214
215/*
216 * Find a free multiplex id (SMB mid). Otherwise there could be
217 * mid collisions which might cause problems, demultiplexing the
218 * wrong response to this request. Multiplex ids could collide if
219 * one of a series requests takes much longer than the others, or
220 * if a very large number of long lived requests (byte range
221 * locks or FindNotify requests) are pending. No more than
222 * 64K-1 requests can be outstanding at one time. If no
223 * mids are available, return zero. A future optimization
224 * could make the combination of mids and uid the key we use
225 * to demultiplex on (rather than mid alone).
226 * In addition to the above check, the cifs demultiplex
227 * code already used the command code as a secondary
228 * check of the frame and if signing is negotiated the
229 * response would be discarded if the mid were the same
230 * but the signature was wrong. Since the mid is not put in the
231 * pending queue until later (when it is about to be dispatched)
232 * we do have to limit the number of outstanding requests
233 * to somewhat less than 64K-1 although it is hard to imagine
234 * so many threads being in the vfs at one time.
235 */
236__u64 GetNextMid(struct TCP_Server_Info *server)
237{
238 __u64 mid = 0;
239 __u16 last_mid, cur_mid;
240 bool collision;
241
242 spin_lock(&GlobalMid_Lock);
243
244 /* mid is 16 bit only for CIFS/SMB */
245 cur_mid = (__u16)((server->CurrentMid) & 0xffff);
246 /* we do not want to loop forever */
247 last_mid = cur_mid;
248 cur_mid++;
249
250 /*
251 * This nested loop looks more expensive than it is.
252 * In practice the list of pending requests is short,
253 * fewer than 50, and the mids are likely to be unique
254 * on the first pass through the loop unless some request
255 * takes longer than the 64 thousand requests before it
256 * (and it would also have to have been a request that
257 * did not time out).
258 */
259 while (cur_mid != last_mid) {
260 struct mid_q_entry *mid_entry;
261 unsigned int num_mids;
262
263 collision = false;
264 if (cur_mid == 0)
265 cur_mid++;
266
267 num_mids = 0;
268 list_for_each_entry(mid_entry, &server->pending_mid_q, qhead) {
269 ++num_mids;
270 if (mid_entry->mid == cur_mid &&
271 mid_entry->mid_state == MID_REQUEST_SUBMITTED) {
272 /* This mid is in use, try a different one */
273 collision = true;
274 break;
275 }
276 }
277
278 /*
279 * if we have more than 32k mids in the list, then something
280 * is very wrong. Possibly a local user is trying to DoS the
281 * box by issuing long-running calls and SIGKILL'ing them. If
282 * we get to 2^16 mids then we're in big trouble as this
283 * function could loop forever.
284 *
285 * Go ahead and assign out the mid in this situation, but force
286 * an eventual reconnect to clean out the pending_mid_q.
287 */
288 if (num_mids > 32768)
289 server->tcpStatus = CifsNeedReconnect;
290
291 if (!collision) {
292 mid = (__u64)cur_mid;
293 server->CurrentMid = mid;
294 break;
295 }
296 cur_mid++;
297 }
298 spin_unlock(&GlobalMid_Lock);
299 return mid;
300}
301
302/* NB: MID can not be set if treeCon not passed in, in that 215/* NB: MID can not be set if treeCon not passed in, in that
303 case it is responsbility of caller to set the mid */ 216 case it is responsbility of caller to set the mid */
304void 217void
@@ -334,7 +247,7 @@ header_assemble(struct smb_hdr *buffer, char smb_command /* command */ ,
334 247
335 /* Uid is not converted */ 248 /* Uid is not converted */
336 buffer->Uid = treeCon->ses->Suid; 249 buffer->Uid = treeCon->ses->Suid;
337 buffer->Mid = GetNextMid(treeCon->ses->server); 250 buffer->Mid = get_next_mid(treeCon->ses->server);
338 } 251 }
339 if (treeCon->Flags & SMB_SHARE_IS_IN_DFS) 252 if (treeCon->Flags & SMB_SHARE_IS_IN_DFS)
340 buffer->Flags2 |= SMBFLG2_DFS; 253 buffer->Flags2 |= SMBFLG2_DFS;
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index d9d615fbed3f..6dec38f5522d 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -125,6 +125,94 @@ cifs_get_credits_field(struct TCP_Server_Info *server)
125 return &server->credits; 125 return &server->credits;
126} 126}
127 127
128/*
129 * Find a free multiplex id (SMB mid). Otherwise there could be
130 * mid collisions which might cause problems, demultiplexing the
131 * wrong response to this request. Multiplex ids could collide if
132 * one of a series requests takes much longer than the others, or
133 * if a very large number of long lived requests (byte range
134 * locks or FindNotify requests) are pending. No more than
135 * 64K-1 requests can be outstanding at one time. If no
136 * mids are available, return zero. A future optimization
137 * could make the combination of mids and uid the key we use
138 * to demultiplex on (rather than mid alone).
139 * In addition to the above check, the cifs demultiplex
140 * code already used the command code as a secondary
141 * check of the frame and if signing is negotiated the
142 * response would be discarded if the mid were the same
143 * but the signature was wrong. Since the mid is not put in the
144 * pending queue until later (when it is about to be dispatched)
145 * we do have to limit the number of outstanding requests
146 * to somewhat less than 64K-1 although it is hard to imagine
147 * so many threads being in the vfs at one time.
148 */
149static __u64
150cifs_get_next_mid(struct TCP_Server_Info *server)
151{
152 __u64 mid = 0;
153 __u16 last_mid, cur_mid;
154 bool collision;
155
156 spin_lock(&GlobalMid_Lock);
157
158 /* mid is 16 bit only for CIFS/SMB */
159 cur_mid = (__u16)((server->CurrentMid) & 0xffff);
160 /* we do not want to loop forever */
161 last_mid = cur_mid;
162 cur_mid++;
163
164 /*
165 * This nested loop looks more expensive than it is.
166 * In practice the list of pending requests is short,
167 * fewer than 50, and the mids are likely to be unique
168 * on the first pass through the loop unless some request
169 * takes longer than the 64 thousand requests before it
170 * (and it would also have to have been a request that
171 * did not time out).
172 */
173 while (cur_mid != last_mid) {
174 struct mid_q_entry *mid_entry;
175 unsigned int num_mids;
176
177 collision = false;
178 if (cur_mid == 0)
179 cur_mid++;
180
181 num_mids = 0;
182 list_for_each_entry(mid_entry, &server->pending_mid_q, qhead) {
183 ++num_mids;
184 if (mid_entry->mid == cur_mid &&
185 mid_entry->mid_state == MID_REQUEST_SUBMITTED) {
186 /* This mid is in use, try a different one */
187 collision = true;
188 break;
189 }
190 }
191
192 /*
193 * if we have more than 32k mids in the list, then something
194 * is very wrong. Possibly a local user is trying to DoS the
195 * box by issuing long-running calls and SIGKILL'ing them. If
196 * we get to 2^16 mids then we're in big trouble as this
197 * function could loop forever.
198 *
199 * Go ahead and assign out the mid in this situation, but force
200 * an eventual reconnect to clean out the pending_mid_q.
201 */
202 if (num_mids > 32768)
203 server->tcpStatus = CifsNeedReconnect;
204
205 if (!collision) {
206 mid = (__u64)cur_mid;
207 server->CurrentMid = mid;
208 break;
209 }
210 cur_mid++;
211 }
212 spin_unlock(&GlobalMid_Lock);
213 return mid;
214}
215
128struct smb_version_operations smb1_operations = { 216struct smb_version_operations smb1_operations = {
129 .send_cancel = send_nt_cancel, 217 .send_cancel = send_nt_cancel,
130 .compare_fids = cifs_compare_fids, 218 .compare_fids = cifs_compare_fids,
@@ -133,6 +221,7 @@ struct smb_version_operations smb1_operations = {
133 .add_credits = cifs_add_credits, 221 .add_credits = cifs_add_credits,
134 .set_credits = cifs_set_credits, 222 .set_credits = cifs_set_credits,
135 .get_credits_field = cifs_get_credits_field, 223 .get_credits_field = cifs_get_credits_field,
224 .get_next_mid = cifs_get_next_mid,
136 .read_data_offset = cifs_read_data_offset, 225 .read_data_offset = cifs_read_data_offset,
137 .read_data_length = cifs_read_data_length, 226 .read_data_length = cifs_read_data_length,
138 .map_error = map_smb_to_linux_error, 227 .map_error = map_smb_to_linux_error,
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 1b36ffe6a47b..3097ee58fd7d 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -779,7 +779,7 @@ send_lock_cancel(const unsigned int xid, struct cifs_tcon *tcon,
779 779
780 pSMB->LockType = LOCKING_ANDX_CANCEL_LOCK|LOCKING_ANDX_LARGE_FILES; 780 pSMB->LockType = LOCKING_ANDX_CANCEL_LOCK|LOCKING_ANDX_LARGE_FILES;
781 pSMB->Timeout = 0; 781 pSMB->Timeout = 0;
782 pSMB->hdr.Mid = GetNextMid(ses->server); 782 pSMB->hdr.Mid = get_next_mid(ses->server);
783 783
784 return SendReceive(xid, ses, in_buf, out_buf, 784 return SendReceive(xid, ses, in_buf, out_buf,
785 &bytes_returned, 0); 785 &bytes_returned, 0);
diff --git a/fs/dcache.c b/fs/dcache.c
index 85c9e2bff8e6..40469044088d 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -683,6 +683,8 @@ EXPORT_SYMBOL(dget_parent);
683/** 683/**
684 * d_find_alias - grab a hashed alias of inode 684 * d_find_alias - grab a hashed alias of inode
685 * @inode: inode in question 685 * @inode: inode in question
686 * @want_discon: flag, used by d_splice_alias, to request
687 * that only a DISCONNECTED alias be returned.
686 * 688 *
687 * If inode has a hashed alias, or is a directory and has any alias, 689 * If inode has a hashed alias, or is a directory and has any alias,
688 * acquire the reference to alias and return it. Otherwise return NULL. 690 * acquire the reference to alias and return it. Otherwise return NULL.
@@ -691,9 +693,10 @@ EXPORT_SYMBOL(dget_parent);
691 * of a filesystem. 693 * of a filesystem.
692 * 694 *
693 * If the inode has an IS_ROOT, DCACHE_DISCONNECTED alias, then prefer 695 * If the inode has an IS_ROOT, DCACHE_DISCONNECTED alias, then prefer
694 * any other hashed alias over that. 696 * any other hashed alias over that one unless @want_discon is set,
697 * in which case only return an IS_ROOT, DCACHE_DISCONNECTED alias.
695 */ 698 */
696static struct dentry *__d_find_alias(struct inode *inode) 699static struct dentry *__d_find_alias(struct inode *inode, int want_discon)
697{ 700{
698 struct dentry *alias, *discon_alias; 701 struct dentry *alias, *discon_alias;
699 702
@@ -705,7 +708,7 @@ again:
705 if (IS_ROOT(alias) && 708 if (IS_ROOT(alias) &&
706 (alias->d_flags & DCACHE_DISCONNECTED)) { 709 (alias->d_flags & DCACHE_DISCONNECTED)) {
707 discon_alias = alias; 710 discon_alias = alias;
708 } else { 711 } else if (!want_discon) {
709 __dget_dlock(alias); 712 __dget_dlock(alias);
710 spin_unlock(&alias->d_lock); 713 spin_unlock(&alias->d_lock);
711 return alias; 714 return alias;
@@ -736,7 +739,7 @@ struct dentry *d_find_alias(struct inode *inode)
736 739
737 if (!list_empty(&inode->i_dentry)) { 740 if (!list_empty(&inode->i_dentry)) {
738 spin_lock(&inode->i_lock); 741 spin_lock(&inode->i_lock);
739 de = __d_find_alias(inode); 742 de = __d_find_alias(inode, 0);
740 spin_unlock(&inode->i_lock); 743 spin_unlock(&inode->i_lock);
741 } 744 }
742 return de; 745 return de;
@@ -1647,8 +1650,9 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
1647 1650
1648 if (inode && S_ISDIR(inode->i_mode)) { 1651 if (inode && S_ISDIR(inode->i_mode)) {
1649 spin_lock(&inode->i_lock); 1652 spin_lock(&inode->i_lock);
1650 new = __d_find_any_alias(inode); 1653 new = __d_find_alias(inode, 1);
1651 if (new) { 1654 if (new) {
1655 BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED));
1652 spin_unlock(&inode->i_lock); 1656 spin_unlock(&inode->i_lock);
1653 security_d_instantiate(new, inode); 1657 security_d_instantiate(new, inode);
1654 d_move(new, dentry); 1658 d_move(new, dentry);
@@ -2478,7 +2482,7 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
2478 struct dentry *alias; 2482 struct dentry *alias;
2479 2483
2480 /* Does an aliased dentry already exist? */ 2484 /* Does an aliased dentry already exist? */
2481 alias = __d_find_alias(inode); 2485 alias = __d_find_alias(inode, 0);
2482 if (alias) { 2486 if (alias) {
2483 actual = alias; 2487 actual = alias;
2484 write_seqlock(&rename_lock); 2488 write_seqlock(&rename_lock);
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 99b6324290db..cee7812cc3cf 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -90,8 +90,8 @@ unsigned ext4_num_overhead_clusters(struct super_block *sb,
90 * unusual file system layouts. 90 * unusual file system layouts.
91 */ 91 */
92 if (ext4_block_in_group(sb, ext4_block_bitmap(sb, gdp), block_group)) { 92 if (ext4_block_in_group(sb, ext4_block_bitmap(sb, gdp), block_group)) {
93 block_cluster = EXT4_B2C(sbi, (start - 93 block_cluster = EXT4_B2C(sbi,
94 ext4_block_bitmap(sb, gdp))); 94 ext4_block_bitmap(sb, gdp) - start);
95 if (block_cluster < num_clusters) 95 if (block_cluster < num_clusters)
96 block_cluster = -1; 96 block_cluster = -1;
97 else if (block_cluster == num_clusters) { 97 else if (block_cluster == num_clusters) {
@@ -102,7 +102,7 @@ unsigned ext4_num_overhead_clusters(struct super_block *sb,
102 102
103 if (ext4_block_in_group(sb, ext4_inode_bitmap(sb, gdp), block_group)) { 103 if (ext4_block_in_group(sb, ext4_inode_bitmap(sb, gdp), block_group)) {
104 inode_cluster = EXT4_B2C(sbi, 104 inode_cluster = EXT4_B2C(sbi,
105 start - ext4_inode_bitmap(sb, gdp)); 105 ext4_inode_bitmap(sb, gdp) - start);
106 if (inode_cluster < num_clusters) 106 if (inode_cluster < num_clusters)
107 inode_cluster = -1; 107 inode_cluster = -1;
108 else if (inode_cluster == num_clusters) { 108 else if (inode_cluster == num_clusters) {
@@ -114,7 +114,7 @@ unsigned ext4_num_overhead_clusters(struct super_block *sb,
114 itbl_blk = ext4_inode_table(sb, gdp); 114 itbl_blk = ext4_inode_table(sb, gdp);
115 for (i = 0; i < sbi->s_itb_per_group; i++) { 115 for (i = 0; i < sbi->s_itb_per_group; i++) {
116 if (ext4_block_in_group(sb, itbl_blk + i, block_group)) { 116 if (ext4_block_in_group(sb, itbl_blk + i, block_group)) {
117 c = EXT4_B2C(sbi, start - itbl_blk + i); 117 c = EXT4_B2C(sbi, itbl_blk + i - start);
118 if ((c < num_clusters) || (c == inode_cluster) || 118 if ((c < num_clusters) || (c == inode_cluster) ||
119 (c == block_cluster) || (c == itbl_cluster)) 119 (c == block_cluster) || (c == itbl_cluster))
120 continue; 120 continue;
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 8ad112ae0ade..e34deac3f366 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -123,7 +123,6 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
123 else 123 else
124 ext4_clear_inode_flag(inode, i); 124 ext4_clear_inode_flag(inode, i);
125 } 125 }
126 ei->i_flags = flags;
127 126
128 ext4_set_inode_flags(inode); 127 ext4_set_inode_flags(inode);
129 inode->i_ctime = ext4_current_time(inode); 128 inode->i_ctime = ext4_current_time(inode);
diff --git a/fs/fuse/control.c b/fs/fuse/control.c
index 42593c587d48..03ff5b1eba93 100644
--- a/fs/fuse/control.c
+++ b/fs/fuse/control.c
@@ -75,19 +75,13 @@ static ssize_t fuse_conn_limit_write(struct file *file, const char __user *buf,
75 unsigned global_limit) 75 unsigned global_limit)
76{ 76{
77 unsigned long t; 77 unsigned long t;
78 char tmp[32];
79 unsigned limit = (1 << 16) - 1; 78 unsigned limit = (1 << 16) - 1;
80 int err; 79 int err;
81 80
82 if (*ppos || count >= sizeof(tmp) - 1) 81 if (*ppos)
83 return -EINVAL;
84
85 if (copy_from_user(tmp, buf, count))
86 return -EINVAL; 82 return -EINVAL;
87 83
88 tmp[count] = '\0'; 84 err = kstrtoul_from_user(buf, count, 0, &t);
89
90 err = strict_strtoul(tmp, 0, &t);
91 if (err) 85 if (err)
92 return err; 86 return err;
93 87
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index df5ac048dc74..334e0b18a014 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -775,6 +775,8 @@ static int fuse_link(struct dentry *entry, struct inode *newdir,
775static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr, 775static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr,
776 struct kstat *stat) 776 struct kstat *stat)
777{ 777{
778 unsigned int blkbits;
779
778 stat->dev = inode->i_sb->s_dev; 780 stat->dev = inode->i_sb->s_dev;
779 stat->ino = attr->ino; 781 stat->ino = attr->ino;
780 stat->mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777); 782 stat->mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
@@ -790,7 +792,13 @@ static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr,
790 stat->ctime.tv_nsec = attr->ctimensec; 792 stat->ctime.tv_nsec = attr->ctimensec;
791 stat->size = attr->size; 793 stat->size = attr->size;
792 stat->blocks = attr->blocks; 794 stat->blocks = attr->blocks;
793 stat->blksize = (1 << inode->i_blkbits); 795
796 if (attr->blksize != 0)
797 blkbits = ilog2(attr->blksize);
798 else
799 blkbits = inode->i_sb->s_blocksize_bits;
800
801 stat->blksize = 1 << blkbits;
794} 802}
795 803
796static int fuse_do_getattr(struct inode *inode, struct kstat *stat, 804static int fuse_do_getattr(struct inode *inode, struct kstat *stat,
@@ -863,6 +871,7 @@ int fuse_update_attributes(struct inode *inode, struct kstat *stat,
863 if (stat) { 871 if (stat) {
864 generic_fillattr(inode, stat); 872 generic_fillattr(inode, stat);
865 stat->mode = fi->orig_i_mode; 873 stat->mode = fi->orig_i_mode;
874 stat->ino = fi->orig_ino;
866 } 875 }
867 } 876 }
868 877
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 9562109d3a87..b321a688cde7 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -2173,6 +2173,44 @@ fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
2173 return ret; 2173 return ret;
2174} 2174}
2175 2175
2176long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
2177 loff_t length)
2178{
2179 struct fuse_file *ff = file->private_data;
2180 struct fuse_conn *fc = ff->fc;
2181 struct fuse_req *req;
2182 struct fuse_fallocate_in inarg = {
2183 .fh = ff->fh,
2184 .offset = offset,
2185 .length = length,
2186 .mode = mode
2187 };
2188 int err;
2189
2190 if (fc->no_fallocate)
2191 return -EOPNOTSUPP;
2192
2193 req = fuse_get_req(fc);
2194 if (IS_ERR(req))
2195 return PTR_ERR(req);
2196
2197 req->in.h.opcode = FUSE_FALLOCATE;
2198 req->in.h.nodeid = ff->nodeid;
2199 req->in.numargs = 1;
2200 req->in.args[0].size = sizeof(inarg);
2201 req->in.args[0].value = &inarg;
2202 fuse_request_send(fc, req);
2203 err = req->out.h.error;
2204 if (err == -ENOSYS) {
2205 fc->no_fallocate = 1;
2206 err = -EOPNOTSUPP;
2207 }
2208 fuse_put_request(fc, req);
2209
2210 return err;
2211}
2212EXPORT_SYMBOL_GPL(fuse_file_fallocate);
2213
2176static const struct file_operations fuse_file_operations = { 2214static const struct file_operations fuse_file_operations = {
2177 .llseek = fuse_file_llseek, 2215 .llseek = fuse_file_llseek,
2178 .read = do_sync_read, 2216 .read = do_sync_read,
@@ -2190,6 +2228,7 @@ static const struct file_operations fuse_file_operations = {
2190 .unlocked_ioctl = fuse_file_ioctl, 2228 .unlocked_ioctl = fuse_file_ioctl,
2191 .compat_ioctl = fuse_file_compat_ioctl, 2229 .compat_ioctl = fuse_file_compat_ioctl,
2192 .poll = fuse_file_poll, 2230 .poll = fuse_file_poll,
2231 .fallocate = fuse_file_fallocate,
2193}; 2232};
2194 2233
2195static const struct file_operations fuse_direct_io_file_operations = { 2234static const struct file_operations fuse_direct_io_file_operations = {
@@ -2206,6 +2245,7 @@ static const struct file_operations fuse_direct_io_file_operations = {
2206 .unlocked_ioctl = fuse_file_ioctl, 2245 .unlocked_ioctl = fuse_file_ioctl,
2207 .compat_ioctl = fuse_file_compat_ioctl, 2246 .compat_ioctl = fuse_file_compat_ioctl,
2208 .poll = fuse_file_poll, 2247 .poll = fuse_file_poll,
2248 .fallocate = fuse_file_fallocate,
2209 /* no splice_read */ 2249 /* no splice_read */
2210}; 2250};
2211 2251
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 572cefc78012..771fb6322c07 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -82,6 +82,9 @@ struct fuse_inode {
82 preserve the original mode */ 82 preserve the original mode */
83 umode_t orig_i_mode; 83 umode_t orig_i_mode;
84 84
85 /** 64 bit inode number */
86 u64 orig_ino;
87
85 /** Version of last attribute change */ 88 /** Version of last attribute change */
86 u64 attr_version; 89 u64 attr_version;
87 90
@@ -478,6 +481,9 @@ struct fuse_conn {
478 /** Are BSD file locking primitives not implemented by fs? */ 481 /** Are BSD file locking primitives not implemented by fs? */
479 unsigned no_flock:1; 482 unsigned no_flock:1;
480 483
484 /** Is fallocate not implemented by fs? */
485 unsigned no_fallocate:1;
486
481 /** The number of requests waiting for completion */ 487 /** The number of requests waiting for completion */
482 atomic_t num_waiting; 488 atomic_t num_waiting;
483 489
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 42678a33b7bb..1cd61652018c 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -91,6 +91,7 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
91 fi->nlookup = 0; 91 fi->nlookup = 0;
92 fi->attr_version = 0; 92 fi->attr_version = 0;
93 fi->writectr = 0; 93 fi->writectr = 0;
94 fi->orig_ino = 0;
94 INIT_LIST_HEAD(&fi->write_files); 95 INIT_LIST_HEAD(&fi->write_files);
95 INIT_LIST_HEAD(&fi->queued_writes); 96 INIT_LIST_HEAD(&fi->queued_writes);
96 INIT_LIST_HEAD(&fi->writepages); 97 INIT_LIST_HEAD(&fi->writepages);
@@ -139,6 +140,18 @@ static int fuse_remount_fs(struct super_block *sb, int *flags, char *data)
139 return 0; 140 return 0;
140} 141}
141 142
143/*
144 * ino_t is 32-bits on 32-bit arch. We have to squash the 64-bit value down
145 * so that it will fit.
146 */
147static ino_t fuse_squash_ino(u64 ino64)
148{
149 ino_t ino = (ino_t) ino64;
150 if (sizeof(ino_t) < sizeof(u64))
151 ino ^= ino64 >> (sizeof(u64) - sizeof(ino_t)) * 8;
152 return ino;
153}
154
142void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, 155void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
143 u64 attr_valid) 156 u64 attr_valid)
144{ 157{
@@ -148,7 +161,7 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
148 fi->attr_version = ++fc->attr_version; 161 fi->attr_version = ++fc->attr_version;
149 fi->i_time = attr_valid; 162 fi->i_time = attr_valid;
150 163
151 inode->i_ino = attr->ino; 164 inode->i_ino = fuse_squash_ino(attr->ino);
152 inode->i_mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777); 165 inode->i_mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
153 set_nlink(inode, attr->nlink); 166 set_nlink(inode, attr->nlink);
154 inode->i_uid = attr->uid; 167 inode->i_uid = attr->uid;
@@ -174,6 +187,8 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
174 fi->orig_i_mode = inode->i_mode; 187 fi->orig_i_mode = inode->i_mode;
175 if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS)) 188 if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS))
176 inode->i_mode &= ~S_ISVTX; 189 inode->i_mode &= ~S_ISVTX;
190
191 fi->orig_ino = attr->ino;
177} 192}
178 193
179void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, 194void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 616f41a7cde6..437195f204e1 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1803,7 +1803,7 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
1803 rcu_read_lock(); 1803 rcu_read_lock();
1804 file = fcheck_files(files, fd); 1804 file = fcheck_files(files, fd);
1805 if (file) { 1805 if (file) {
1806 unsigned i_mode, f_mode = file->f_mode; 1806 unsigned f_mode = file->f_mode;
1807 1807
1808 rcu_read_unlock(); 1808 rcu_read_unlock();
1809 put_files_struct(files); 1809 put_files_struct(files);
@@ -1819,12 +1819,14 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
1819 inode->i_gid = GLOBAL_ROOT_GID; 1819 inode->i_gid = GLOBAL_ROOT_GID;
1820 } 1820 }
1821 1821
1822 i_mode = S_IFLNK; 1822 if (S_ISLNK(inode->i_mode)) {
1823 if (f_mode & FMODE_READ) 1823 unsigned i_mode = S_IFLNK;
1824 i_mode |= S_IRUSR | S_IXUSR; 1824 if (f_mode & FMODE_READ)
1825 if (f_mode & FMODE_WRITE) 1825 i_mode |= S_IRUSR | S_IXUSR;
1826 i_mode |= S_IWUSR | S_IXUSR; 1826 if (f_mode & FMODE_WRITE)
1827 inode->i_mode = i_mode; 1827 i_mode |= S_IWUSR | S_IXUSR;
1828 inode->i_mode = i_mode;
1829 }
1828 1830
1829 security_task_to_inode(task, inode); 1831 security_task_to_inode(task, inode);
1830 put_task_struct(task); 1832 put_task_struct(task);
@@ -1859,6 +1861,7 @@ static struct dentry *proc_fd_instantiate(struct inode *dir,
1859 ei = PROC_I(inode); 1861 ei = PROC_I(inode);
1860 ei->fd = fd; 1862 ei->fd = fd;
1861 1863
1864 inode->i_mode = S_IFLNK;
1862 inode->i_op = &proc_pid_link_inode_operations; 1865 inode->i_op = &proc_pid_link_inode_operations;
1863 inode->i_size = 64; 1866 inode->i_size = 64;
1864 ei->op.proc_get_link = proc_fd_link; 1867 ei->op.proc_get_link = proc_fd_link;
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index 685a83756b2b..84a7e6f3c046 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -2918,6 +2918,9 @@ int dbg_debugfs_init_fs(struct ubifs_info *c)
2918 struct dentry *dent; 2918 struct dentry *dent;
2919 struct ubifs_debug_info *d = c->dbg; 2919 struct ubifs_debug_info *d = c->dbg;
2920 2920
2921 if (!IS_ENABLED(DEBUG_FS))
2922 return 0;
2923
2921 n = snprintf(d->dfs_dir_name, UBIFS_DFS_DIR_LEN + 1, UBIFS_DFS_DIR_NAME, 2924 n = snprintf(d->dfs_dir_name, UBIFS_DFS_DIR_LEN + 1, UBIFS_DFS_DIR_NAME,
2922 c->vi.ubi_num, c->vi.vol_id); 2925 c->vi.ubi_num, c->vi.vol_id);
2923 if (n == UBIFS_DFS_DIR_LEN) { 2926 if (n == UBIFS_DFS_DIR_LEN) {
@@ -3010,7 +3013,8 @@ out:
3010 */ 3013 */
3011void dbg_debugfs_exit_fs(struct ubifs_info *c) 3014void dbg_debugfs_exit_fs(struct ubifs_info *c)
3012{ 3015{
3013 debugfs_remove_recursive(c->dbg->dfs_dir); 3016 if (IS_ENABLED(DEBUG_FS))
3017 debugfs_remove_recursive(c->dbg->dfs_dir);
3014} 3018}
3015 3019
3016struct ubifs_global_debug_info ubifs_dbg; 3020struct ubifs_global_debug_info ubifs_dbg;
@@ -3095,6 +3099,9 @@ int dbg_debugfs_init(void)
3095 const char *fname; 3099 const char *fname;
3096 struct dentry *dent; 3100 struct dentry *dent;
3097 3101
3102 if (!IS_ENABLED(DEBUG_FS))
3103 return 0;
3104
3098 fname = "ubifs"; 3105 fname = "ubifs";
3099 dent = debugfs_create_dir(fname, NULL); 3106 dent = debugfs_create_dir(fname, NULL);
3100 if (IS_ERR_OR_NULL(dent)) 3107 if (IS_ERR_OR_NULL(dent))
@@ -3159,7 +3166,8 @@ out:
3159 */ 3166 */
3160void dbg_debugfs_exit(void) 3167void dbg_debugfs_exit(void)
3161{ 3168{
3162 debugfs_remove_recursive(dfs_rootdir); 3169 if (IS_ENABLED(DEBUG_FS))
3170 debugfs_remove_recursive(dfs_rootdir);
3163} 3171}
3164 3172
3165/** 3173/**
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index b0d62820ada1..9e6e1c6eb60a 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -440,8 +440,8 @@ static inline int acpi_pm_device_sleep_wake(struct device *dev, bool enable)
440 440
441#else /* CONFIG_ACPI */ 441#else /* CONFIG_ACPI */
442 442
443static int register_acpi_bus_type(struct acpi_bus_type *bus) { return 0; } 443static inline int register_acpi_bus_type(void *bus) { return 0; }
444static int unregister_acpi_bus_type(struct acpi_bus_type *bus) { return 0; } 444static inline int unregister_acpi_bus_type(void *bus) { return 0; }
445 445
446#endif /* CONFIG_ACPI */ 446#endif /* CONFIG_ACPI */
447 447
diff --git a/include/drm/drm_pciids.h b/include/drm/drm_pciids.h
index 58d0bdab68dd..81368ab6c611 100644
--- a/include/drm/drm_pciids.h
+++ b/include/drm/drm_pciids.h
@@ -181,6 +181,7 @@
181 {0x1002, 0x6747, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TURKS|RADEON_NEW_MEMMAP}, \ 181 {0x1002, 0x6747, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TURKS|RADEON_NEW_MEMMAP}, \
182 {0x1002, 0x6748, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TURKS|RADEON_NEW_MEMMAP}, \ 182 {0x1002, 0x6748, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TURKS|RADEON_NEW_MEMMAP}, \
183 {0x1002, 0x6749, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TURKS|RADEON_NEW_MEMMAP}, \ 183 {0x1002, 0x6749, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TURKS|RADEON_NEW_MEMMAP}, \
184 {0x1002, 0x674A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TURKS|RADEON_NEW_MEMMAP}, \
184 {0x1002, 0x6750, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TURKS|RADEON_NEW_MEMMAP}, \ 185 {0x1002, 0x6750, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TURKS|RADEON_NEW_MEMMAP}, \
185 {0x1002, 0x6751, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TURKS|RADEON_NEW_MEMMAP}, \ 186 {0x1002, 0x6751, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TURKS|RADEON_NEW_MEMMAP}, \
186 {0x1002, 0x6758, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TURKS|RADEON_NEW_MEMMAP}, \ 187 {0x1002, 0x6758, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TURKS|RADEON_NEW_MEMMAP}, \
@@ -198,6 +199,7 @@
198 {0x1002, 0x6767, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CAICOS|RADEON_NEW_MEMMAP}, \ 199 {0x1002, 0x6767, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CAICOS|RADEON_NEW_MEMMAP}, \
199 {0x1002, 0x6768, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CAICOS|RADEON_NEW_MEMMAP}, \ 200 {0x1002, 0x6768, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CAICOS|RADEON_NEW_MEMMAP}, \
200 {0x1002, 0x6770, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CAICOS|RADEON_NEW_MEMMAP}, \ 201 {0x1002, 0x6770, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CAICOS|RADEON_NEW_MEMMAP}, \
202 {0x1002, 0x6771, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CAICOS|RADEON_NEW_MEMMAP}, \
201 {0x1002, 0x6772, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CAICOS|RADEON_NEW_MEMMAP}, \ 203 {0x1002, 0x6772, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CAICOS|RADEON_NEW_MEMMAP}, \
202 {0x1002, 0x6778, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CAICOS|RADEON_NEW_MEMMAP}, \ 204 {0x1002, 0x6778, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CAICOS|RADEON_NEW_MEMMAP}, \
203 {0x1002, 0x6779, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CAICOS|RADEON_NEW_MEMMAP}, \ 205 {0x1002, 0x6779, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CAICOS|RADEON_NEW_MEMMAP}, \
@@ -229,10 +231,11 @@
229 {0x1002, 0x6827, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ 231 {0x1002, 0x6827, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
230 {0x1002, 0x6828, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_NEW_MEMMAP}, \ 232 {0x1002, 0x6828, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_NEW_MEMMAP}, \
231 {0x1002, 0x6829, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_NEW_MEMMAP}, \ 233 {0x1002, 0x6829, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_NEW_MEMMAP}, \
234 {0x1002, 0x682B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
232 {0x1002, 0x682D, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ 235 {0x1002, 0x682D, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
233 {0x1002, 0x682F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ 236 {0x1002, 0x682F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
234 {0x1002, 0x6830, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_NEW_MEMMAP}, \ 237 {0x1002, 0x6830, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
235 {0x1002, 0x6831, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_NEW_MEMMAP}, \ 238 {0x1002, 0x6831, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
236 {0x1002, 0x6837, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_NEW_MEMMAP}, \ 239 {0x1002, 0x6837, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_NEW_MEMMAP}, \
237 {0x1002, 0x6838, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_NEW_MEMMAP}, \ 240 {0x1002, 0x6838, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_NEW_MEMMAP}, \
238 {0x1002, 0x6839, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_NEW_MEMMAP}, \ 241 {0x1002, 0x6839, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_NEW_MEMMAP}, \
@@ -531,6 +534,7 @@
531 {0x1002, 0x9645, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SUMO2|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ 534 {0x1002, 0x9645, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SUMO2|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
532 {0x1002, 0x9647, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SUMO|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP},\ 535 {0x1002, 0x9647, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SUMO|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP},\
533 {0x1002, 0x9648, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SUMO|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP},\ 536 {0x1002, 0x9648, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SUMO|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP},\
537 {0x1002, 0x9649, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SUMO|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP},\
534 {0x1002, 0x964a, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SUMO|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ 538 {0x1002, 0x964a, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SUMO|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
535 {0x1002, 0x964b, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SUMO|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ 539 {0x1002, 0x964b, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SUMO|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
536 {0x1002, 0x964c, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SUMO|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ 540 {0x1002, 0x964c, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SUMO|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
@@ -550,6 +554,7 @@
550 {0x1002, 0x9807, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PALM|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ 554 {0x1002, 0x9807, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PALM|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
551 {0x1002, 0x9808, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PALM|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ 555 {0x1002, 0x9808, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PALM|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
552 {0x1002, 0x9809, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PALM|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ 556 {0x1002, 0x9809, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PALM|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
557 {0x1002, 0x980A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PALM|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
553 {0x1002, 0x9900, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ 558 {0x1002, 0x9900, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
554 {0x1002, 0x9901, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ 559 {0x1002, 0x9901, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
555 {0x1002, 0x9903, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ 560 {0x1002, 0x9903, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
@@ -561,11 +566,19 @@
561 {0x1002, 0x9909, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ 566 {0x1002, 0x9909, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
562 {0x1002, 0x990A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ 567 {0x1002, 0x990A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
563 {0x1002, 0x990F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ 568 {0x1002, 0x990F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
569 {0x1002, 0x9910, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
570 {0x1002, 0x9913, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
571 {0x1002, 0x9917, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
572 {0x1002, 0x9918, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
573 {0x1002, 0x9919, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
564 {0x1002, 0x9990, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ 574 {0x1002, 0x9990, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
565 {0x1002, 0x9991, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ 575 {0x1002, 0x9991, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
566 {0x1002, 0x9992, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ 576 {0x1002, 0x9992, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
567 {0x1002, 0x9993, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ 577 {0x1002, 0x9993, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
568 {0x1002, 0x9994, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ 578 {0x1002, 0x9994, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
579 {0x1002, 0x99A0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
580 {0x1002, 0x99A2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
581 {0x1002, 0x99A4, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
569 {0, 0, 0} 582 {0, 0, 0}
570 583
571#define r128_PCI_IDS \ 584#define r128_PCI_IDS \
diff --git a/include/drm/exynos_drm.h b/include/drm/exynos_drm.h
index b6d7ce92eadd..68733587e700 100644
--- a/include/drm/exynos_drm.h
+++ b/include/drm/exynos_drm.h
@@ -64,6 +64,7 @@ struct drm_exynos_gem_map_off {
64 * A structure for mapping buffer. 64 * A structure for mapping buffer.
65 * 65 *
66 * @handle: a handle to gem object created. 66 * @handle: a handle to gem object created.
67 * @pad: just padding to be 64-bit aligned.
67 * @size: memory size to be mapped. 68 * @size: memory size to be mapped.
68 * @mapped: having user virtual address mmaped. 69 * @mapped: having user virtual address mmaped.
69 * - this variable would be filled by exynos gem module 70 * - this variable would be filled by exynos gem module
@@ -72,7 +73,8 @@ struct drm_exynos_gem_map_off {
72 */ 73 */
73struct drm_exynos_gem_mmap { 74struct drm_exynos_gem_mmap {
74 unsigned int handle; 75 unsigned int handle;
75 unsigned int size; 76 unsigned int pad;
77 uint64_t size;
76 uint64_t mapped; 78 uint64_t mapped;
77}; 79};
78 80
diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h
index 81e803e90aa4..acba894374a1 100644
--- a/include/linux/clockchips.h
+++ b/include/linux/clockchips.h
@@ -132,6 +132,7 @@ extern u64 clockevent_delta2ns(unsigned long latch,
132 struct clock_event_device *evt); 132 struct clock_event_device *evt);
133extern void clockevents_register_device(struct clock_event_device *dev); 133extern void clockevents_register_device(struct clock_event_device *dev);
134 134
135extern void clockevents_config(struct clock_event_device *dev, u32 freq);
135extern void clockevents_config_and_register(struct clock_event_device *dev, 136extern void clockevents_config_and_register(struct clock_event_device *dev,
136 u32 freq, unsigned long min_delta, 137 u32 freq, unsigned long min_delta,
137 unsigned long max_delta); 138 unsigned long max_delta);
diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index e988037abd2a..51a90b7f2d60 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -1,8 +1,6 @@
1#ifndef _LINUX_COMPACTION_H 1#ifndef _LINUX_COMPACTION_H
2#define _LINUX_COMPACTION_H 2#define _LINUX_COMPACTION_H
3 3
4#include <linux/node.h>
5
6/* Return values for compact_zone() and try_to_compact_pages() */ 4/* Return values for compact_zone() and try_to_compact_pages() */
7/* compaction didn't start as it was not possible or direct reclaim was more suitable */ 5/* compaction didn't start as it was not possible or direct reclaim was more suitable */
8#define COMPACT_SKIPPED 0 6#define COMPACT_SKIPPED 0
@@ -13,23 +11,6 @@
13/* The full zone was compacted */ 11/* The full zone was compacted */
14#define COMPACT_COMPLETE 3 12#define COMPACT_COMPLETE 3
15 13
16/*
17 * compaction supports three modes
18 *
19 * COMPACT_ASYNC_MOVABLE uses asynchronous migration and only scans
20 * MIGRATE_MOVABLE pageblocks as migration sources and targets.
21 * COMPACT_ASYNC_UNMOVABLE uses asynchronous migration and only scans
22 * MIGRATE_MOVABLE pageblocks as migration sources.
23 * MIGRATE_UNMOVABLE pageblocks are scanned as potential migration
24 * targets and convers them to MIGRATE_MOVABLE if possible
25 * COMPACT_SYNC uses synchronous migration and scans all pageblocks
26 */
27enum compact_mode {
28 COMPACT_ASYNC_MOVABLE,
29 COMPACT_ASYNC_UNMOVABLE,
30 COMPACT_SYNC,
31};
32
33#ifdef CONFIG_COMPACTION 14#ifdef CONFIG_COMPACTION
34extern int sysctl_compact_memory; 15extern int sysctl_compact_memory;
35extern int sysctl_compaction_handler(struct ctl_table *table, int write, 16extern int sysctl_compaction_handler(struct ctl_table *table, int write,
diff --git a/include/linux/frontswap.h b/include/linux/frontswap.h
new file mode 100644
index 000000000000..0e4e2eec5c1d
--- /dev/null
+++ b/include/linux/frontswap.h
@@ -0,0 +1,127 @@
1#ifndef _LINUX_FRONTSWAP_H
2#define _LINUX_FRONTSWAP_H
3
4#include <linux/swap.h>
5#include <linux/mm.h>
6#include <linux/bitops.h>
7
8struct frontswap_ops {
9 void (*init)(unsigned);
10 int (*store)(unsigned, pgoff_t, struct page *);
11 int (*load)(unsigned, pgoff_t, struct page *);
12 void (*invalidate_page)(unsigned, pgoff_t);
13 void (*invalidate_area)(unsigned);
14};
15
16extern bool frontswap_enabled;
17extern struct frontswap_ops
18 frontswap_register_ops(struct frontswap_ops *ops);
19extern void frontswap_shrink(unsigned long);
20extern unsigned long frontswap_curr_pages(void);
21extern void frontswap_writethrough(bool);
22
23extern void __frontswap_init(unsigned type);
24extern int __frontswap_store(struct page *page);
25extern int __frontswap_load(struct page *page);
26extern void __frontswap_invalidate_page(unsigned, pgoff_t);
27extern void __frontswap_invalidate_area(unsigned);
28
29#ifdef CONFIG_FRONTSWAP
30
31static inline bool frontswap_test(struct swap_info_struct *sis, pgoff_t offset)
32{
33 bool ret = false;
34
35 if (frontswap_enabled && sis->frontswap_map)
36 ret = test_bit(offset, sis->frontswap_map);
37 return ret;
38}
39
40static inline void frontswap_set(struct swap_info_struct *sis, pgoff_t offset)
41{
42 if (frontswap_enabled && sis->frontswap_map)
43 set_bit(offset, sis->frontswap_map);
44}
45
46static inline void frontswap_clear(struct swap_info_struct *sis, pgoff_t offset)
47{
48 if (frontswap_enabled && sis->frontswap_map)
49 clear_bit(offset, sis->frontswap_map);
50}
51
52static inline void frontswap_map_set(struct swap_info_struct *p,
53 unsigned long *map)
54{
55 p->frontswap_map = map;
56}
57
58static inline unsigned long *frontswap_map_get(struct swap_info_struct *p)
59{
60 return p->frontswap_map;
61}
62#else
63/* all inline routines become no-ops and all externs are ignored */
64
65#define frontswap_enabled (0)
66
67static inline bool frontswap_test(struct swap_info_struct *sis, pgoff_t offset)
68{
69 return false;
70}
71
72static inline void frontswap_set(struct swap_info_struct *sis, pgoff_t offset)
73{
74}
75
76static inline void frontswap_clear(struct swap_info_struct *sis, pgoff_t offset)
77{
78}
79
80static inline void frontswap_map_set(struct swap_info_struct *p,
81 unsigned long *map)
82{
83}
84
85static inline unsigned long *frontswap_map_get(struct swap_info_struct *p)
86{
87 return NULL;
88}
89#endif
90
91static inline int frontswap_store(struct page *page)
92{
93 int ret = -1;
94
95 if (frontswap_enabled)
96 ret = __frontswap_store(page);
97 return ret;
98}
99
100static inline int frontswap_load(struct page *page)
101{
102 int ret = -1;
103
104 if (frontswap_enabled)
105 ret = __frontswap_load(page);
106 return ret;
107}
108
109static inline void frontswap_invalidate_page(unsigned type, pgoff_t offset)
110{
111 if (frontswap_enabled)
112 __frontswap_invalidate_page(type, offset);
113}
114
115static inline void frontswap_invalidate_area(unsigned type)
116{
117 if (frontswap_enabled)
118 __frontswap_invalidate_area(type);
119}
120
121static inline void frontswap_init(unsigned type)
122{
123 if (frontswap_enabled)
124 __frontswap_init(type);
125}
126
127#endif /* _LINUX_FRONTSWAP_H */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 51978ed43e97..17fd887c798f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -802,13 +802,14 @@ struct inode {
802 unsigned int __i_nlink; 802 unsigned int __i_nlink;
803 }; 803 };
804 dev_t i_rdev; 804 dev_t i_rdev;
805 loff_t i_size;
805 struct timespec i_atime; 806 struct timespec i_atime;
806 struct timespec i_mtime; 807 struct timespec i_mtime;
807 struct timespec i_ctime; 808 struct timespec i_ctime;
808 spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ 809 spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */
809 unsigned short i_bytes; 810 unsigned short i_bytes;
811 unsigned int i_blkbits;
810 blkcnt_t i_blocks; 812 blkcnt_t i_blocks;
811 loff_t i_size;
812 813
813#ifdef __NEED_I_SIZE_ORDERED 814#ifdef __NEED_I_SIZE_ORDERED
814 seqcount_t i_size_seqcount; 815 seqcount_t i_size_seqcount;
@@ -828,9 +829,8 @@ struct inode {
828 struct list_head i_dentry; 829 struct list_head i_dentry;
829 struct rcu_head i_rcu; 830 struct rcu_head i_rcu;
830 }; 831 };
831 atomic_t i_count;
832 unsigned int i_blkbits;
833 u64 i_version; 832 u64 i_version;
833 atomic_t i_count;
834 atomic_t i_dio_count; 834 atomic_t i_dio_count;
835 atomic_t i_writecount; 835 atomic_t i_writecount;
836 const struct file_operations *i_fop; /* former ->i_op->default_file_ops */ 836 const struct file_operations *i_fop; /* former ->i_op->default_file_ops */
diff --git a/include/linux/fuse.h b/include/linux/fuse.h
index 8f2ab8fef929..9303348965fb 100644
--- a/include/linux/fuse.h
+++ b/include/linux/fuse.h
@@ -54,6 +54,9 @@
54 * 7.18 54 * 7.18
55 * - add FUSE_IOCTL_DIR flag 55 * - add FUSE_IOCTL_DIR flag
56 * - add FUSE_NOTIFY_DELETE 56 * - add FUSE_NOTIFY_DELETE
57 *
58 * 7.19
59 * - add FUSE_FALLOCATE
57 */ 60 */
58 61
59#ifndef _LINUX_FUSE_H 62#ifndef _LINUX_FUSE_H
@@ -85,7 +88,7 @@
85#define FUSE_KERNEL_VERSION 7 88#define FUSE_KERNEL_VERSION 7
86 89
87/** Minor version number of this interface */ 90/** Minor version number of this interface */
88#define FUSE_KERNEL_MINOR_VERSION 18 91#define FUSE_KERNEL_MINOR_VERSION 19
89 92
90/** The node ID of the root inode */ 93/** The node ID of the root inode */
91#define FUSE_ROOT_ID 1 94#define FUSE_ROOT_ID 1
@@ -278,6 +281,7 @@ enum fuse_opcode {
278 FUSE_POLL = 40, 281 FUSE_POLL = 40,
279 FUSE_NOTIFY_REPLY = 41, 282 FUSE_NOTIFY_REPLY = 41,
280 FUSE_BATCH_FORGET = 42, 283 FUSE_BATCH_FORGET = 42,
284 FUSE_FALLOCATE = 43,
281 285
282 /* CUSE specific operations */ 286 /* CUSE specific operations */
283 CUSE_INIT = 4096, 287 CUSE_INIT = 4096,
@@ -571,6 +575,14 @@ struct fuse_notify_poll_wakeup_out {
571 __u64 kh; 575 __u64 kh;
572}; 576};
573 577
578struct fuse_fallocate_in {
579 __u64 fh;
580 __u64 offset;
581 __u64 length;
582 __u32 mode;
583 __u32 padding;
584};
585
574struct fuse_in_header { 586struct fuse_in_header {
575 __u32 len; 587 __u32 len;
576 __u32 opcode; 588 __u32 opcode;
diff --git a/include/linux/i2c-mux-pinctrl.h b/include/linux/i2c-mux-pinctrl.h
new file mode 100644
index 000000000000..a65c86429e84
--- /dev/null
+++ b/include/linux/i2c-mux-pinctrl.h
@@ -0,0 +1,41 @@
1/*
2 * i2c-mux-pinctrl platform data
3 *
4 * Copyright (c) 2012, NVIDIA CORPORATION. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#ifndef _LINUX_I2C_MUX_PINCTRL_H
20#define _LINUX_I2C_MUX_PINCTRL_H
21
22/**
23 * struct i2c_mux_pinctrl_platform_data - Platform data for i2c-mux-pinctrl
24 * @parent_bus_num: Parent I2C bus number
25 * @base_bus_num: Base I2C bus number for the child busses. 0 for dynamic.
26 * @bus_count: Number of child busses. Also the number of elements in
27 * @pinctrl_states
28 * @pinctrl_states: The names of the pinctrl state to select for each child bus
29 * @pinctrl_state_idle: The pinctrl state to select when no child bus is being
30 * accessed. If NULL, the most recently used pinctrl state will be left
31 * selected.
32 */
33struct i2c_mux_pinctrl_platform_data {
34 int parent_bus_num;
35 int base_bus_num;
36 int bus_count;
37 const char **pinctrl_states;
38 const char *pinctrl_state_idle;
39};
40
41#endif
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index e4baff5f7ff4..9e65eff6af3b 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -149,6 +149,7 @@ extern struct cred init_cred;
149 .normal_prio = MAX_PRIO-20, \ 149 .normal_prio = MAX_PRIO-20, \
150 .policy = SCHED_NORMAL, \ 150 .policy = SCHED_NORMAL, \
151 .cpus_allowed = CPU_MASK_ALL, \ 151 .cpus_allowed = CPU_MASK_ALL, \
152 .nr_cpus_allowed= NR_CPUS, \
152 .mm = NULL, \ 153 .mm = NULL, \
153 .active_mm = &init_mm, \ 154 .active_mm = &init_mm, \
154 .se = { \ 155 .se = { \
@@ -157,7 +158,6 @@ extern struct cred init_cred;
157 .rt = { \ 158 .rt = { \
158 .run_list = LIST_HEAD_INIT(tsk.rt.run_list), \ 159 .run_list = LIST_HEAD_INIT(tsk.rt.run_list), \
159 .time_slice = RR_TIMESLICE, \ 160 .time_slice = RR_TIMESLICE, \
160 .nr_cpus_allowed = NR_CPUS, \
161 }, \ 161 }, \
162 .tasks = LIST_HEAD_INIT(tsk.tasks), \ 162 .tasks = LIST_HEAD_INIT(tsk.tasks), \
163 INIT_PUSHABLE_TASKS(tsk) \ 163 INIT_PUSHABLE_TASKS(tsk) \
diff --git a/include/linux/mfd/abx500/ab8500-codec.h b/include/linux/mfd/abx500/ab8500-codec.h
new file mode 100644
index 000000000000..dc6529202cdd
--- /dev/null
+++ b/include/linux/mfd/abx500/ab8500-codec.h
@@ -0,0 +1,52 @@
1/*
2 * Copyright (C) ST-Ericsson SA 2012
3 *
4 * Author: Ola Lilja <ola.o.lilja@stericsson.com>
5 * for ST-Ericsson.
6 *
7 * License terms:
8 *
9 * This program is free software; you can redistribute it and/or modify it
10 * under the terms of the GNU General Public License version 2 as published
11 * by the Free Software Foundation.
12 */
13
14#ifndef AB8500_CORE_CODEC_H
15#define AB8500_CORE_CODEC_H
16
17/* Mic-types */
18enum amic_type {
19 AMIC_TYPE_SINGLE_ENDED,
20 AMIC_TYPE_DIFFERENTIAL
21};
22
23/* Mic-biases */
24enum amic_micbias {
25 AMIC_MICBIAS_VAMIC1,
26 AMIC_MICBIAS_VAMIC2
27};
28
29/* Bias-voltage */
30enum ear_cm_voltage {
31 EAR_CMV_0_95V,
32 EAR_CMV_1_10V,
33 EAR_CMV_1_27V,
34 EAR_CMV_1_58V
35};
36
37/* Analog microphone settings */
38struct amic_settings {
39 enum amic_type mic1_type;
40 enum amic_type mic2_type;
41 enum amic_micbias mic1a_micbias;
42 enum amic_micbias mic1b_micbias;
43 enum amic_micbias mic2_micbias;
44};
45
46/* Platform data structure for the audio-parts of the AB8500 */
47struct ab8500_codec_platform_data {
48 struct amic_settings amics;
49 enum ear_cm_voltage ear_cmv;
50};
51
52#endif
diff --git a/include/linux/mfd/abx500/ab8500.h b/include/linux/mfd/abx500/ab8500.h
index 91dd3ef63e99..bc9b84b60ec6 100644
--- a/include/linux/mfd/abx500/ab8500.h
+++ b/include/linux/mfd/abx500/ab8500.h
@@ -266,6 +266,7 @@ struct ab8500 {
266struct regulator_reg_init; 266struct regulator_reg_init;
267struct regulator_init_data; 267struct regulator_init_data;
268struct ab8500_gpio_platform_data; 268struct ab8500_gpio_platform_data;
269struct ab8500_codec_platform_data;
269 270
270/** 271/**
271 * struct ab8500_platform_data - AB8500 platform data 272 * struct ab8500_platform_data - AB8500 platform data
@@ -284,6 +285,7 @@ struct ab8500_platform_data {
284 int num_regulator; 285 int num_regulator;
285 struct regulator_init_data *regulator; 286 struct regulator_init_data *regulator;
286 struct ab8500_gpio_platform_data *gpio; 287 struct ab8500_gpio_platform_data *gpio;
288 struct ab8500_codec_platform_data *codec;
287}; 289};
288 290
289extern int __devinit ab8500_init(struct ab8500 *ab8500, 291extern int __devinit ab8500_init(struct ab8500 *ab8500,
diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h
index 1b14d25162cb..d6a58065c09c 100644
--- a/include/linux/moduleparam.h
+++ b/include/linux/moduleparam.h
@@ -128,7 +128,7 @@ struct kparam_array
128 * The ops can have NULL set or get functions. 128 * The ops can have NULL set or get functions.
129 */ 129 */
130#define module_param_cb(name, ops, arg, perm) \ 130#define module_param_cb(name, ops, arg, perm) \
131 __module_param_call(MODULE_PARAM_PREFIX, name, ops, arg, perm, 0) 131 __module_param_call(MODULE_PARAM_PREFIX, name, ops, arg, perm, -1)
132 132
133/** 133/**
134 * <level>_param_cb - general callback for a module/cmdline parameter 134 * <level>_param_cb - general callback for a module/cmdline parameter
@@ -192,7 +192,7 @@ struct kparam_array
192 { (void *)set, (void *)get }; \ 192 { (void *)set, (void *)get }; \
193 __module_param_call(MODULE_PARAM_PREFIX, \ 193 __module_param_call(MODULE_PARAM_PREFIX, \
194 name, &__param_ops_##name, arg, \ 194 name, &__param_ops_##name, arg, \
195 (perm) + sizeof(__check_old_set_param(set))*0, 0) 195 (perm) + sizeof(__check_old_set_param(set))*0, -1)
196 196
197/* We don't get oldget: it's often a new-style param_get_uint, etc. */ 197/* We don't get oldget: it's often a new-style param_get_uint, etc. */
198static inline int 198static inline int
@@ -272,7 +272,7 @@ static inline void __kernel_param_unlock(void)
272 */ 272 */
273#define core_param(name, var, type, perm) \ 273#define core_param(name, var, type, perm) \
274 param_check_##type(name, &(var)); \ 274 param_check_##type(name, &(var)); \
275 __module_param_call("", name, &param_ops_##type, &var, perm, 0) 275 __module_param_call("", name, &param_ops_##type, &var, perm, -1)
276#endif /* !MODULE */ 276#endif /* !MODULE */
277 277
278/** 278/**
@@ -290,7 +290,7 @@ static inline void __kernel_param_unlock(void)
290 = { len, string }; \ 290 = { len, string }; \
291 __module_param_call(MODULE_PARAM_PREFIX, name, \ 291 __module_param_call(MODULE_PARAM_PREFIX, name, \
292 &param_ops_string, \ 292 &param_ops_string, \
293 .str = &__param_string_##name, perm, 0); \ 293 .str = &__param_string_##name, perm, -1); \
294 __MODULE_PARM_TYPE(name, "string") 294 __MODULE_PARM_TYPE(name, "string")
295 295
296/** 296/**
@@ -432,7 +432,7 @@ extern int param_set_bint(const char *val, const struct kernel_param *kp);
432 __module_param_call(MODULE_PARAM_PREFIX, name, \ 432 __module_param_call(MODULE_PARAM_PREFIX, name, \
433 &param_array_ops, \ 433 &param_array_ops, \
434 .arr = &__param_arr_##name, \ 434 .arr = &__param_arr_##name, \
435 perm, 0); \ 435 perm, -1); \
436 __MODULE_PARM_TYPE(name, "array of " #type) 436 __MODULE_PARM_TYPE(name, "array of " #type)
437 437
438extern struct kernel_param_ops param_array_ops; 438extern struct kernel_param_ops param_array_ops;
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index f32578634d9d..45db49f64bb4 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -555,6 +555,8 @@ enum perf_event_type {
555 PERF_RECORD_MAX, /* non-ABI */ 555 PERF_RECORD_MAX, /* non-ABI */
556}; 556};
557 557
558#define PERF_MAX_STACK_DEPTH 127
559
558enum perf_callchain_context { 560enum perf_callchain_context {
559 PERF_CONTEXT_HV = (__u64)-32, 561 PERF_CONTEXT_HV = (__u64)-32,
560 PERF_CONTEXT_KERNEL = (__u64)-128, 562 PERF_CONTEXT_KERNEL = (__u64)-128,
@@ -609,8 +611,6 @@ struct perf_guest_info_callbacks {
609#include <linux/sysfs.h> 611#include <linux/sysfs.h>
610#include <asm/local.h> 612#include <asm/local.h>
611 613
612#define PERF_MAX_STACK_DEPTH 255
613
614struct perf_callchain_entry { 614struct perf_callchain_entry {
615 __u64 nr; 615 __u64 nr;
616 __u64 ip[PERF_MAX_STACK_DEPTH]; 616 __u64 ip[PERF_MAX_STACK_DEPTH];
diff --git a/include/linux/prctl.h b/include/linux/prctl.h
index 711e0a30aacc..3988012255dc 100644
--- a/include/linux/prctl.h
+++ b/include/linux/prctl.h
@@ -127,8 +127,8 @@
127#define PR_SET_PTRACER 0x59616d61 127#define PR_SET_PTRACER 0x59616d61
128# define PR_SET_PTRACER_ANY ((unsigned long)-1) 128# define PR_SET_PTRACER_ANY ((unsigned long)-1)
129 129
130#define PR_SET_CHILD_SUBREAPER 36 130#define PR_SET_CHILD_SUBREAPER 36
131#define PR_GET_CHILD_SUBREAPER 37 131#define PR_GET_CHILD_SUBREAPER 37
132 132
133/* 133/*
134 * If no_new_privs is set, then operations that grant new privileges (i.e. 134 * If no_new_privs is set, then operations that grant new privileges (i.e.
@@ -142,7 +142,9 @@
142 * asking selinux for a specific new context (e.g. with runcon) will result 142 * asking selinux for a specific new context (e.g. with runcon) will result
143 * in execve returning -EPERM. 143 * in execve returning -EPERM.
144 */ 144 */
145#define PR_SET_NO_NEW_PRIVS 38 145#define PR_SET_NO_NEW_PRIVS 38
146#define PR_GET_NO_NEW_PRIVS 39 146#define PR_GET_NO_NEW_PRIVS 39
147
148#define PR_GET_TID_ADDRESS 40
147 149
148#endif /* _LINUX_PRCTL_H */ 150#endif /* _LINUX_PRCTL_H */
diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 0d04cd69ab9b..ffc444c38b0a 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -368,8 +368,11 @@ radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, unsigned flags)
368 iter->index++; 368 iter->index++;
369 if (likely(*slot)) 369 if (likely(*slot))
370 return slot; 370 return slot;
371 if (flags & RADIX_TREE_ITER_CONTIG) 371 if (flags & RADIX_TREE_ITER_CONTIG) {
372 /* forbid switching to the next chunk */
373 iter->next_index = 0;
372 break; 374 break;
375 }
373 } 376 }
374 } 377 }
375 return NULL; 378 return NULL;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index f34437e835a7..4059c0f33f07 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -145,6 +145,7 @@ extern unsigned long this_cpu_load(void);
145 145
146 146
147extern void calc_global_load(unsigned long ticks); 147extern void calc_global_load(unsigned long ticks);
148extern void update_cpu_load_nohz(void);
148 149
149extern unsigned long get_parent_ip(unsigned long addr); 150extern unsigned long get_parent_ip(unsigned long addr);
150 151
@@ -438,6 +439,7 @@ extern int get_dumpable(struct mm_struct *mm);
438 /* leave room for more dump flags */ 439 /* leave room for more dump flags */
439#define MMF_VM_MERGEABLE 16 /* KSM may merge identical pages */ 440#define MMF_VM_MERGEABLE 16 /* KSM may merge identical pages */
440#define MMF_VM_HUGEPAGE 17 /* set when VM_HUGEPAGE is set on vma */ 441#define MMF_VM_HUGEPAGE 17 /* set when VM_HUGEPAGE is set on vma */
442#define MMF_EXE_FILE_CHANGED 18 /* see prctl_set_mm_exe_file() */
441 443
442#define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK) 444#define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK)
443 445
@@ -875,6 +877,8 @@ struct sched_group_power {
875 * Number of busy cpus in this group. 877 * Number of busy cpus in this group.
876 */ 878 */
877 atomic_t nr_busy_cpus; 879 atomic_t nr_busy_cpus;
880
881 unsigned long cpumask[0]; /* iteration mask */
878}; 882};
879 883
880struct sched_group { 884struct sched_group {
@@ -899,6 +903,15 @@ static inline struct cpumask *sched_group_cpus(struct sched_group *sg)
899 return to_cpumask(sg->cpumask); 903 return to_cpumask(sg->cpumask);
900} 904}
901 905
906/*
907 * cpumask masking which cpus in the group are allowed to iterate up the domain
908 * tree.
909 */
910static inline struct cpumask *sched_group_mask(struct sched_group *sg)
911{
912 return to_cpumask(sg->sgp->cpumask);
913}
914
902/** 915/**
903 * group_first_cpu - Returns the first cpu in the cpumask of a sched_group. 916 * group_first_cpu - Returns the first cpu in the cpumask of a sched_group.
904 * @group: The group whose first cpu is to be returned. 917 * @group: The group whose first cpu is to be returned.
@@ -1187,7 +1200,6 @@ struct sched_rt_entity {
1187 struct list_head run_list; 1200 struct list_head run_list;
1188 unsigned long timeout; 1201 unsigned long timeout;
1189 unsigned int time_slice; 1202 unsigned int time_slice;
1190 int nr_cpus_allowed;
1191 1203
1192 struct sched_rt_entity *back; 1204 struct sched_rt_entity *back;
1193#ifdef CONFIG_RT_GROUP_SCHED 1205#ifdef CONFIG_RT_GROUP_SCHED
@@ -1252,6 +1264,7 @@ struct task_struct {
1252#endif 1264#endif
1253 1265
1254 unsigned int policy; 1266 unsigned int policy;
1267 int nr_cpus_allowed;
1255 cpumask_t cpus_allowed; 1268 cpumask_t cpus_allowed;
1256 1269
1257#ifdef CONFIG_PREEMPT_RCU 1270#ifdef CONFIG_PREEMPT_RCU
diff --git a/include/linux/swap.h b/include/linux/swap.h
index b6661933e252..c84ec68eaec9 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -197,6 +197,10 @@ struct swap_info_struct {
197 struct block_device *bdev; /* swap device or bdev of swap file */ 197 struct block_device *bdev; /* swap device or bdev of swap file */
198 struct file *swap_file; /* seldom referenced */ 198 struct file *swap_file; /* seldom referenced */
199 unsigned int old_block_size; /* seldom referenced */ 199 unsigned int old_block_size; /* seldom referenced */
200#ifdef CONFIG_FRONTSWAP
201 unsigned long *frontswap_map; /* frontswap in-use, one bit per page */
202 atomic_t frontswap_pages; /* frontswap pages in-use counter */
203#endif
200}; 204};
201 205
202struct swap_list_t { 206struct swap_list_t {
diff --git a/include/linux/swapfile.h b/include/linux/swapfile.h
new file mode 100644
index 000000000000..e282624e8c10
--- /dev/null
+++ b/include/linux/swapfile.h
@@ -0,0 +1,13 @@
1#ifndef _LINUX_SWAPFILE_H
2#define _LINUX_SWAPFILE_H
3
4/*
5 * these were static in swapfile.c but frontswap.c needs them and we don't
6 * want to expose them to the dozens of source files that include swap.h
7 */
8extern spinlock_t swap_lock;
9extern struct swap_list_t swap_list;
10extern struct swap_info_struct *swap_info[];
11extern int try_to_unuse(unsigned int, bool, unsigned long);
12
13#endif /* _LINUX_SWAPFILE_H */
diff --git a/init/main.c b/init/main.c
index 1ca6b32c4828..b5cc0a7c4708 100644
--- a/init/main.c
+++ b/init/main.c
@@ -508,7 +508,7 @@ asmlinkage void __init start_kernel(void)
508 parse_early_param(); 508 parse_early_param();
509 parse_args("Booting kernel", static_command_line, __start___param, 509 parse_args("Booting kernel", static_command_line, __start___param,
510 __stop___param - __start___param, 510 __stop___param - __start___param,
511 0, 0, &unknown_bootoption); 511 -1, -1, &unknown_bootoption);
512 512
513 jump_label_init(); 513 jump_label_init();
514 514
@@ -755,13 +755,8 @@ static void __init do_initcalls(void)
755{ 755{
756 int level; 756 int level;
757 757
758 for (level = 0; level < ARRAY_SIZE(initcall_levels) - 1; level++) { 758 for (level = 0; level < ARRAY_SIZE(initcall_levels) - 1; level++)
759 pr_info("initlevel:%d=%s, %d registered initcalls\n",
760 level, initcall_level_names[level],
761 (int) (initcall_levels[level+1]
762 - initcall_levels[level]));
763 do_initcall_level(level); 759 do_initcall_level(level);
764 }
765} 760}
766 761
767/* 762/*
diff --git a/ipc/shm.c b/ipc/shm.c
index 5e2cbfdab6fc..41c1285d697a 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -393,6 +393,16 @@ static int shm_fsync(struct file *file, loff_t start, loff_t end, int datasync)
393 return sfd->file->f_op->fsync(sfd->file, start, end, datasync); 393 return sfd->file->f_op->fsync(sfd->file, start, end, datasync);
394} 394}
395 395
396static long shm_fallocate(struct file *file, int mode, loff_t offset,
397 loff_t len)
398{
399 struct shm_file_data *sfd = shm_file_data(file);
400
401 if (!sfd->file->f_op->fallocate)
402 return -EOPNOTSUPP;
403 return sfd->file->f_op->fallocate(file, mode, offset, len);
404}
405
396static unsigned long shm_get_unmapped_area(struct file *file, 406static unsigned long shm_get_unmapped_area(struct file *file,
397 unsigned long addr, unsigned long len, unsigned long pgoff, 407 unsigned long addr, unsigned long len, unsigned long pgoff,
398 unsigned long flags) 408 unsigned long flags)
@@ -410,6 +420,7 @@ static const struct file_operations shm_file_operations = {
410 .get_unmapped_area = shm_get_unmapped_area, 420 .get_unmapped_area = shm_get_unmapped_area,
411#endif 421#endif
412 .llseek = noop_llseek, 422 .llseek = noop_llseek,
423 .fallocate = shm_fallocate,
413}; 424};
414 425
415static const struct file_operations shm_file_operations_huge = { 426static const struct file_operations shm_file_operations_huge = {
@@ -418,6 +429,7 @@ static const struct file_operations shm_file_operations_huge = {
418 .release = shm_release, 429 .release = shm_release,
419 .get_unmapped_area = shm_get_unmapped_area, 430 .get_unmapped_area = shm_get_unmapped_area,
420 .llseek = noop_llseek, 431 .llseek = noop_llseek,
432 .fallocate = shm_fallocate,
421}; 433};
422 434
423int is_file_shm_hugepages(struct file *file) 435int is_file_shm_hugepages(struct file *file)
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 0f3527d6184a..72fcd3069a90 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -896,10 +896,13 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode)
896 mutex_unlock(&cgroup_mutex); 896 mutex_unlock(&cgroup_mutex);
897 897
898 /* 898 /*
899 * Drop the active superblock reference that we took when we 899 * We want to drop the active superblock reference from the
900 * created the cgroup 900 * cgroup creation after all the dentry refs are gone -
901 * kill_sb gets mighty unhappy otherwise. Mark
902 * dentry->d_fsdata with cgroup_diput() to tell
903 * cgroup_d_release() to call deactivate_super().
901 */ 904 */
902 deactivate_super(cgrp->root->sb); 905 dentry->d_fsdata = cgroup_diput;
903 906
904 /* 907 /*
905 * if we're getting rid of the cgroup, refcount should ensure 908 * if we're getting rid of the cgroup, refcount should ensure
@@ -925,6 +928,13 @@ static int cgroup_delete(const struct dentry *d)
925 return 1; 928 return 1;
926} 929}
927 930
931static void cgroup_d_release(struct dentry *dentry)
932{
933 /* did cgroup_diput() tell me to deactivate super? */
934 if (dentry->d_fsdata == cgroup_diput)
935 deactivate_super(dentry->d_sb);
936}
937
928static void remove_dir(struct dentry *d) 938static void remove_dir(struct dentry *d)
929{ 939{
930 struct dentry *parent = dget(d->d_parent); 940 struct dentry *parent = dget(d->d_parent);
@@ -1532,6 +1542,7 @@ static int cgroup_get_rootdir(struct super_block *sb)
1532 static const struct dentry_operations cgroup_dops = { 1542 static const struct dentry_operations cgroup_dops = {
1533 .d_iput = cgroup_diput, 1543 .d_iput = cgroup_diput,
1534 .d_delete = cgroup_delete, 1544 .d_delete = cgroup_delete,
1545 .d_release = cgroup_d_release,
1535 }; 1546 };
1536 1547
1537 struct inode *inode = 1548 struct inode *inode =
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 5b06cbbf6931..f85c0154b333 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3181,7 +3181,6 @@ static void perf_event_for_each(struct perf_event *event,
3181 event = event->group_leader; 3181 event = event->group_leader;
3182 3182
3183 perf_event_for_each_child(event, func); 3183 perf_event_for_each_child(event, func);
3184 func(event);
3185 list_for_each_entry(sibling, &event->sibling_list, group_entry) 3184 list_for_each_entry(sibling, &event->sibling_list, group_entry)
3186 perf_event_for_each_child(sibling, func); 3185 perf_event_for_each_child(sibling, func);
3187 mutex_unlock(&ctx->mutex); 3186 mutex_unlock(&ctx->mutex);
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index fc275e4f629b..eebd6d5cfb44 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -275,8 +275,10 @@ void handle_nested_irq(unsigned int irq)
275 kstat_incr_irqs_this_cpu(irq, desc); 275 kstat_incr_irqs_this_cpu(irq, desc);
276 276
277 action = desc->action; 277 action = desc->action;
278 if (unlikely(!action || irqd_irq_disabled(&desc->irq_data))) 278 if (unlikely(!action || irqd_irq_disabled(&desc->irq_data))) {
279 desc->istate |= IRQS_PENDING;
279 goto out_unlock; 280 goto out_unlock;
281 }
280 282
281 irqd_set(&desc->irq_data, IRQD_IRQ_INPROGRESS); 283 irqd_set(&desc->irq_data, IRQD_IRQ_INPROGRESS);
282 raw_spin_unlock_irq(&desc->lock); 284 raw_spin_unlock_irq(&desc->lock);
@@ -324,8 +326,10 @@ handle_simple_irq(unsigned int irq, struct irq_desc *desc)
324 desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING); 326 desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);
325 kstat_incr_irqs_this_cpu(irq, desc); 327 kstat_incr_irqs_this_cpu(irq, desc);
326 328
327 if (unlikely(!desc->action || irqd_irq_disabled(&desc->irq_data))) 329 if (unlikely(!desc->action || irqd_irq_disabled(&desc->irq_data))) {
330 desc->istate |= IRQS_PENDING;
328 goto out_unlock; 331 goto out_unlock;
332 }
329 333
330 handle_irq_event(desc); 334 handle_irq_event(desc);
331 335
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index 8e5c56b3b7d9..001fa5bab490 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -101,6 +101,9 @@ extern int irq_select_affinity_usr(unsigned int irq, struct cpumask *mask);
101 101
102extern void irq_set_thread_affinity(struct irq_desc *desc); 102extern void irq_set_thread_affinity(struct irq_desc *desc);
103 103
104extern int irq_do_set_affinity(struct irq_data *data,
105 const struct cpumask *dest, bool force);
106
104/* Inline functions for support of irq chips on slow busses */ 107/* Inline functions for support of irq chips on slow busses */
105static inline void chip_bus_lock(struct irq_desc *desc) 108static inline void chip_bus_lock(struct irq_desc *desc)
106{ 109{
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index ea0c6c2ae6f7..8c548232ba39 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -142,6 +142,25 @@ static inline void
142irq_get_pending(struct cpumask *mask, struct irq_desc *desc) { } 142irq_get_pending(struct cpumask *mask, struct irq_desc *desc) { }
143#endif 143#endif
144 144
145int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask,
146 bool force)
147{
148 struct irq_desc *desc = irq_data_to_desc(data);
149 struct irq_chip *chip = irq_data_get_irq_chip(data);
150 int ret;
151
152 ret = chip->irq_set_affinity(data, mask, false);
153 switch (ret) {
154 case IRQ_SET_MASK_OK:
155 cpumask_copy(data->affinity, mask);
156 case IRQ_SET_MASK_OK_NOCOPY:
157 irq_set_thread_affinity(desc);
158 ret = 0;
159 }
160
161 return ret;
162}
163
145int __irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask) 164int __irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask)
146{ 165{
147 struct irq_chip *chip = irq_data_get_irq_chip(data); 166 struct irq_chip *chip = irq_data_get_irq_chip(data);
@@ -152,14 +171,7 @@ int __irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask)
152 return -EINVAL; 171 return -EINVAL;
153 172
154 if (irq_can_move_pcntxt(data)) { 173 if (irq_can_move_pcntxt(data)) {
155 ret = chip->irq_set_affinity(data, mask, false); 174 ret = irq_do_set_affinity(data, mask, false);
156 switch (ret) {
157 case IRQ_SET_MASK_OK:
158 cpumask_copy(data->affinity, mask);
159 case IRQ_SET_MASK_OK_NOCOPY:
160 irq_set_thread_affinity(desc);
161 ret = 0;
162 }
163 } else { 175 } else {
164 irqd_set_move_pending(data); 176 irqd_set_move_pending(data);
165 irq_copy_pending(desc, mask); 177 irq_copy_pending(desc, mask);
@@ -283,9 +295,8 @@ EXPORT_SYMBOL_GPL(irq_set_affinity_notifier);
283static int 295static int
284setup_affinity(unsigned int irq, struct irq_desc *desc, struct cpumask *mask) 296setup_affinity(unsigned int irq, struct irq_desc *desc, struct cpumask *mask)
285{ 297{
286 struct irq_chip *chip = irq_desc_get_chip(desc);
287 struct cpumask *set = irq_default_affinity; 298 struct cpumask *set = irq_default_affinity;
288 int ret, node = desc->irq_data.node; 299 int node = desc->irq_data.node;
289 300
290 /* Excludes PER_CPU and NO_BALANCE interrupts */ 301 /* Excludes PER_CPU and NO_BALANCE interrupts */
291 if (!irq_can_set_affinity(irq)) 302 if (!irq_can_set_affinity(irq))
@@ -311,13 +322,7 @@ setup_affinity(unsigned int irq, struct irq_desc *desc, struct cpumask *mask)
311 if (cpumask_intersects(mask, nodemask)) 322 if (cpumask_intersects(mask, nodemask))
312 cpumask_and(mask, mask, nodemask); 323 cpumask_and(mask, mask, nodemask);
313 } 324 }
314 ret = chip->irq_set_affinity(&desc->irq_data, mask, false); 325 irq_do_set_affinity(&desc->irq_data, mask, false);
315 switch (ret) {
316 case IRQ_SET_MASK_OK:
317 cpumask_copy(desc->irq_data.affinity, mask);
318 case IRQ_SET_MASK_OK_NOCOPY:
319 irq_set_thread_affinity(desc);
320 }
321 return 0; 326 return 0;
322} 327}
323#else 328#else
diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c
index c3c89751b327..ca3f4aaff707 100644
--- a/kernel/irq/migration.c
+++ b/kernel/irq/migration.c
@@ -42,17 +42,8 @@ void irq_move_masked_irq(struct irq_data *idata)
42 * For correct operation this depends on the caller 42 * For correct operation this depends on the caller
43 * masking the irqs. 43 * masking the irqs.
44 */ 44 */
45 if (likely(cpumask_any_and(desc->pending_mask, cpu_online_mask) 45 if (cpumask_any_and(desc->pending_mask, cpu_online_mask) < nr_cpu_ids)
46 < nr_cpu_ids)) { 46 irq_do_set_affinity(&desc->irq_data, desc->pending_mask, false);
47 int ret = chip->irq_set_affinity(&desc->irq_data,
48 desc->pending_mask, false);
49 switch (ret) {
50 case IRQ_SET_MASK_OK:
51 cpumask_copy(desc->irq_data.affinity, desc->pending_mask);
52 case IRQ_SET_MASK_OK_NOCOPY:
53 irq_set_thread_affinity(desc);
54 }
55 }
56 47
57 cpumask_clear(desc->pending_mask); 48 cpumask_clear(desc->pending_mask);
58} 49}
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 39eb6011bc38..d5594a4268d4 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -142,9 +142,8 @@ const_debug unsigned int sysctl_sched_features =
142#define SCHED_FEAT(name, enabled) \ 142#define SCHED_FEAT(name, enabled) \
143 #name , 143 #name ,
144 144
145static __read_mostly char *sched_feat_names[] = { 145static const char * const sched_feat_names[] = {
146#include "features.h" 146#include "features.h"
147 NULL
148}; 147};
149 148
150#undef SCHED_FEAT 149#undef SCHED_FEAT
@@ -2517,25 +2516,32 @@ static void __update_cpu_load(struct rq *this_rq, unsigned long this_load,
2517 sched_avg_update(this_rq); 2516 sched_avg_update(this_rq);
2518} 2517}
2519 2518
2519#ifdef CONFIG_NO_HZ
2520/*
2521 * There is no sane way to deal with nohz on smp when using jiffies because the
2522 * cpu doing the jiffies update might drift wrt the cpu doing the jiffy reading
2523 * causing off-by-one errors in observed deltas; {0,2} instead of {1,1}.
2524 *
2525 * Therefore we cannot use the delta approach from the regular tick since that
2526 * would seriously skew the load calculation. However we'll make do for those
2527 * updates happening while idle (nohz_idle_balance) or coming out of idle
2528 * (tick_nohz_idle_exit).
2529 *
2530 * This means we might still be one tick off for nohz periods.
2531 */
2532
2520/* 2533/*
2521 * Called from nohz_idle_balance() to update the load ratings before doing the 2534 * Called from nohz_idle_balance() to update the load ratings before doing the
2522 * idle balance. 2535 * idle balance.
2523 */ 2536 */
2524void update_idle_cpu_load(struct rq *this_rq) 2537void update_idle_cpu_load(struct rq *this_rq)
2525{ 2538{
2526 unsigned long curr_jiffies = jiffies; 2539 unsigned long curr_jiffies = ACCESS_ONCE(jiffies);
2527 unsigned long load = this_rq->load.weight; 2540 unsigned long load = this_rq->load.weight;
2528 unsigned long pending_updates; 2541 unsigned long pending_updates;
2529 2542
2530 /* 2543 /*
2531 * Bloody broken means of dealing with nohz, but better than nothing.. 2544 * bail if there's load or we're actually up-to-date.
2532 * jiffies is updated by one cpu, another cpu can drift wrt the jiffy
2533 * update and see 0 difference the one time and 2 the next, even though
2534 * we ticked at roughtly the same rate.
2535 *
2536 * Hence we only use this from nohz_idle_balance() and skip this
2537 * nonsense when called from the scheduler_tick() since that's
2538 * guaranteed a stable rate.
2539 */ 2545 */
2540 if (load || curr_jiffies == this_rq->last_load_update_tick) 2546 if (load || curr_jiffies == this_rq->last_load_update_tick)
2541 return; 2547 return;
@@ -2547,12 +2553,38 @@ void update_idle_cpu_load(struct rq *this_rq)
2547} 2553}
2548 2554
2549/* 2555/*
2556 * Called from tick_nohz_idle_exit() -- try and fix up the ticks we missed.
2557 */
2558void update_cpu_load_nohz(void)
2559{
2560 struct rq *this_rq = this_rq();
2561 unsigned long curr_jiffies = ACCESS_ONCE(jiffies);
2562 unsigned long pending_updates;
2563
2564 if (curr_jiffies == this_rq->last_load_update_tick)
2565 return;
2566
2567 raw_spin_lock(&this_rq->lock);
2568 pending_updates = curr_jiffies - this_rq->last_load_update_tick;
2569 if (pending_updates) {
2570 this_rq->last_load_update_tick = curr_jiffies;
2571 /*
2572 * We were idle, this means load 0, the current load might be
2573 * !0 due to remote wakeups and the sort.
2574 */
2575 __update_cpu_load(this_rq, 0, pending_updates);
2576 }
2577 raw_spin_unlock(&this_rq->lock);
2578}
2579#endif /* CONFIG_NO_HZ */
2580
2581/*
2550 * Called from scheduler_tick() 2582 * Called from scheduler_tick()
2551 */ 2583 */
2552static void update_cpu_load_active(struct rq *this_rq) 2584static void update_cpu_load_active(struct rq *this_rq)
2553{ 2585{
2554 /* 2586 /*
2555 * See the mess in update_idle_cpu_load(). 2587 * See the mess around update_idle_cpu_load() / update_cpu_load_nohz().
2556 */ 2588 */
2557 this_rq->last_load_update_tick = jiffies; 2589 this_rq->last_load_update_tick = jiffies;
2558 __update_cpu_load(this_rq, this_rq->load.weight, 1); 2590 __update_cpu_load(this_rq, this_rq->load.weight, 1);
@@ -4982,7 +5014,7 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
4982 p->sched_class->set_cpus_allowed(p, new_mask); 5014 p->sched_class->set_cpus_allowed(p, new_mask);
4983 5015
4984 cpumask_copy(&p->cpus_allowed, new_mask); 5016 cpumask_copy(&p->cpus_allowed, new_mask);
4985 p->rt.nr_cpus_allowed = cpumask_weight(new_mask); 5017 p->nr_cpus_allowed = cpumask_weight(new_mask);
4986} 5018}
4987 5019
4988/* 5020/*
@@ -5524,15 +5556,20 @@ static cpumask_var_t sched_domains_tmpmask; /* sched_domains_mutex */
5524 5556
5525#ifdef CONFIG_SCHED_DEBUG 5557#ifdef CONFIG_SCHED_DEBUG
5526 5558
5527static __read_mostly int sched_domain_debug_enabled; 5559static __read_mostly int sched_debug_enabled;
5528 5560
5529static int __init sched_domain_debug_setup(char *str) 5561static int __init sched_debug_setup(char *str)
5530{ 5562{
5531 sched_domain_debug_enabled = 1; 5563 sched_debug_enabled = 1;
5532 5564
5533 return 0; 5565 return 0;
5534} 5566}
5535early_param("sched_debug", sched_domain_debug_setup); 5567early_param("sched_debug", sched_debug_setup);
5568
5569static inline bool sched_debug(void)
5570{
5571 return sched_debug_enabled;
5572}
5536 5573
5537static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, 5574static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
5538 struct cpumask *groupmask) 5575 struct cpumask *groupmask)
@@ -5572,7 +5609,12 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
5572 break; 5609 break;
5573 } 5610 }
5574 5611
5575 if (!group->sgp->power) { 5612 /*
5613 * Even though we initialize ->power to something semi-sane,
5614 * we leave power_orig unset. This allows us to detect if
5615 * domain iteration is still funny without causing /0 traps.
5616 */
5617 if (!group->sgp->power_orig) {
5576 printk(KERN_CONT "\n"); 5618 printk(KERN_CONT "\n");
5577 printk(KERN_ERR "ERROR: domain->cpu_power not " 5619 printk(KERN_ERR "ERROR: domain->cpu_power not "
5578 "set\n"); 5620 "set\n");
@@ -5620,7 +5662,7 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu)
5620{ 5662{
5621 int level = 0; 5663 int level = 0;
5622 5664
5623 if (!sched_domain_debug_enabled) 5665 if (!sched_debug_enabled)
5624 return; 5666 return;
5625 5667
5626 if (!sd) { 5668 if (!sd) {
@@ -5641,6 +5683,10 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu)
5641} 5683}
5642#else /* !CONFIG_SCHED_DEBUG */ 5684#else /* !CONFIG_SCHED_DEBUG */
5643# define sched_domain_debug(sd, cpu) do { } while (0) 5685# define sched_domain_debug(sd, cpu) do { } while (0)
5686static inline bool sched_debug(void)
5687{
5688 return false;
5689}
5644#endif /* CONFIG_SCHED_DEBUG */ 5690#endif /* CONFIG_SCHED_DEBUG */
5645 5691
5646static int sd_degenerate(struct sched_domain *sd) 5692static int sd_degenerate(struct sched_domain *sd)
@@ -5962,6 +6008,44 @@ struct sched_domain_topology_level {
5962 struct sd_data data; 6008 struct sd_data data;
5963}; 6009};
5964 6010
6011/*
6012 * Build an iteration mask that can exclude certain CPUs from the upwards
6013 * domain traversal.
6014 *
6015 * Asymmetric node setups can result in situations where the domain tree is of
6016 * unequal depth, make sure to skip domains that already cover the entire
6017 * range.
6018 *
6019 * In that case build_sched_domains() will have terminated the iteration early
6020 * and our sibling sd spans will be empty. Domains should always include the
6021 * cpu they're built on, so check that.
6022 *
6023 */
6024static void build_group_mask(struct sched_domain *sd, struct sched_group *sg)
6025{
6026 const struct cpumask *span = sched_domain_span(sd);
6027 struct sd_data *sdd = sd->private;
6028 struct sched_domain *sibling;
6029 int i;
6030
6031 for_each_cpu(i, span) {
6032 sibling = *per_cpu_ptr(sdd->sd, i);
6033 if (!cpumask_test_cpu(i, sched_domain_span(sibling)))
6034 continue;
6035
6036 cpumask_set_cpu(i, sched_group_mask(sg));
6037 }
6038}
6039
6040/*
6041 * Return the canonical balance cpu for this group, this is the first cpu
6042 * of this group that's also in the iteration mask.
6043 */
6044int group_balance_cpu(struct sched_group *sg)
6045{
6046 return cpumask_first_and(sched_group_cpus(sg), sched_group_mask(sg));
6047}
6048
5965static int 6049static int
5966build_overlap_sched_groups(struct sched_domain *sd, int cpu) 6050build_overlap_sched_groups(struct sched_domain *sd, int cpu)
5967{ 6051{
@@ -5980,6 +6064,12 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu)
5980 if (cpumask_test_cpu(i, covered)) 6064 if (cpumask_test_cpu(i, covered))
5981 continue; 6065 continue;
5982 6066
6067 child = *per_cpu_ptr(sdd->sd, i);
6068
6069 /* See the comment near build_group_mask(). */
6070 if (!cpumask_test_cpu(i, sched_domain_span(child)))
6071 continue;
6072
5983 sg = kzalloc_node(sizeof(struct sched_group) + cpumask_size(), 6073 sg = kzalloc_node(sizeof(struct sched_group) + cpumask_size(),
5984 GFP_KERNEL, cpu_to_node(cpu)); 6074 GFP_KERNEL, cpu_to_node(cpu));
5985 6075
@@ -5987,8 +6077,6 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu)
5987 goto fail; 6077 goto fail;
5988 6078
5989 sg_span = sched_group_cpus(sg); 6079 sg_span = sched_group_cpus(sg);
5990
5991 child = *per_cpu_ptr(sdd->sd, i);
5992 if (child->child) { 6080 if (child->child) {
5993 child = child->child; 6081 child = child->child;
5994 cpumask_copy(sg_span, sched_domain_span(child)); 6082 cpumask_copy(sg_span, sched_domain_span(child));
@@ -5997,10 +6085,24 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu)
5997 6085
5998 cpumask_or(covered, covered, sg_span); 6086 cpumask_or(covered, covered, sg_span);
5999 6087
6000 sg->sgp = *per_cpu_ptr(sdd->sgp, cpumask_first(sg_span)); 6088 sg->sgp = *per_cpu_ptr(sdd->sgp, i);
6001 atomic_inc(&sg->sgp->ref); 6089 if (atomic_inc_return(&sg->sgp->ref) == 1)
6090 build_group_mask(sd, sg);
6002 6091
6003 if (cpumask_test_cpu(cpu, sg_span)) 6092 /*
6093 * Initialize sgp->power such that even if we mess up the
6094 * domains and no possible iteration will get us here, we won't
6095 * die on a /0 trap.
6096 */
6097 sg->sgp->power = SCHED_POWER_SCALE * cpumask_weight(sg_span);
6098
6099 /*
6100 * Make sure the first group of this domain contains the
6101 * canonical balance cpu. Otherwise the sched_domain iteration
6102 * breaks. See update_sg_lb_stats().
6103 */
6104 if ((!groups && cpumask_test_cpu(cpu, sg_span)) ||
6105 group_balance_cpu(sg) == cpu)
6004 groups = sg; 6106 groups = sg;
6005 6107
6006 if (!first) 6108 if (!first)
@@ -6074,6 +6176,7 @@ build_sched_groups(struct sched_domain *sd, int cpu)
6074 6176
6075 cpumask_clear(sched_group_cpus(sg)); 6177 cpumask_clear(sched_group_cpus(sg));
6076 sg->sgp->power = 0; 6178 sg->sgp->power = 0;
6179 cpumask_setall(sched_group_mask(sg));
6077 6180
6078 for_each_cpu(j, span) { 6181 for_each_cpu(j, span) {
6079 if (get_group(j, sdd, NULL) != group) 6182 if (get_group(j, sdd, NULL) != group)
@@ -6115,7 +6218,7 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd)
6115 sg = sg->next; 6218 sg = sg->next;
6116 } while (sg != sd->groups); 6219 } while (sg != sd->groups);
6117 6220
6118 if (cpu != group_first_cpu(sg)) 6221 if (cpu != group_balance_cpu(sg))
6119 return; 6222 return;
6120 6223
6121 update_group_power(sd, cpu); 6224 update_group_power(sd, cpu);
@@ -6165,11 +6268,8 @@ int sched_domain_level_max;
6165 6268
6166static int __init setup_relax_domain_level(char *str) 6269static int __init setup_relax_domain_level(char *str)
6167{ 6270{
6168 unsigned long val; 6271 if (kstrtoint(str, 0, &default_relax_domain_level))
6169 6272 pr_warn("Unable to set relax_domain_level\n");
6170 val = simple_strtoul(str, NULL, 0);
6171 if (val < sched_domain_level_max)
6172 default_relax_domain_level = val;
6173 6273
6174 return 1; 6274 return 1;
6175} 6275}
@@ -6279,14 +6379,13 @@ static struct sched_domain_topology_level *sched_domain_topology = default_topol
6279#ifdef CONFIG_NUMA 6379#ifdef CONFIG_NUMA
6280 6380
6281static int sched_domains_numa_levels; 6381static int sched_domains_numa_levels;
6282static int sched_domains_numa_scale;
6283static int *sched_domains_numa_distance; 6382static int *sched_domains_numa_distance;
6284static struct cpumask ***sched_domains_numa_masks; 6383static struct cpumask ***sched_domains_numa_masks;
6285static int sched_domains_curr_level; 6384static int sched_domains_curr_level;
6286 6385
6287static inline int sd_local_flags(int level) 6386static inline int sd_local_flags(int level)
6288{ 6387{
6289 if (sched_domains_numa_distance[level] > REMOTE_DISTANCE) 6388 if (sched_domains_numa_distance[level] > RECLAIM_DISTANCE)
6290 return 0; 6389 return 0;
6291 6390
6292 return SD_BALANCE_EXEC | SD_BALANCE_FORK | SD_WAKE_AFFINE; 6391 return SD_BALANCE_EXEC | SD_BALANCE_FORK | SD_WAKE_AFFINE;
@@ -6344,6 +6443,42 @@ static const struct cpumask *sd_numa_mask(int cpu)
6344 return sched_domains_numa_masks[sched_domains_curr_level][cpu_to_node(cpu)]; 6443 return sched_domains_numa_masks[sched_domains_curr_level][cpu_to_node(cpu)];
6345} 6444}
6346 6445
6446static void sched_numa_warn(const char *str)
6447{
6448 static int done = false;
6449 int i,j;
6450
6451 if (done)
6452 return;
6453
6454 done = true;
6455
6456 printk(KERN_WARNING "ERROR: %s\n\n", str);
6457
6458 for (i = 0; i < nr_node_ids; i++) {
6459 printk(KERN_WARNING " ");
6460 for (j = 0; j < nr_node_ids; j++)
6461 printk(KERN_CONT "%02d ", node_distance(i,j));
6462 printk(KERN_CONT "\n");
6463 }
6464 printk(KERN_WARNING "\n");
6465}
6466
6467static bool find_numa_distance(int distance)
6468{
6469 int i;
6470
6471 if (distance == node_distance(0, 0))
6472 return true;
6473
6474 for (i = 0; i < sched_domains_numa_levels; i++) {
6475 if (sched_domains_numa_distance[i] == distance)
6476 return true;
6477 }
6478
6479 return false;
6480}
6481
6347static void sched_init_numa(void) 6482static void sched_init_numa(void)
6348{ 6483{
6349 int next_distance, curr_distance = node_distance(0, 0); 6484 int next_distance, curr_distance = node_distance(0, 0);
@@ -6351,7 +6486,6 @@ static void sched_init_numa(void)
6351 int level = 0; 6486 int level = 0;
6352 int i, j, k; 6487 int i, j, k;
6353 6488
6354 sched_domains_numa_scale = curr_distance;
6355 sched_domains_numa_distance = kzalloc(sizeof(int) * nr_node_ids, GFP_KERNEL); 6489 sched_domains_numa_distance = kzalloc(sizeof(int) * nr_node_ids, GFP_KERNEL);
6356 if (!sched_domains_numa_distance) 6490 if (!sched_domains_numa_distance)
6357 return; 6491 return;
@@ -6362,23 +6496,41 @@ static void sched_init_numa(void)
6362 * 6496 *
6363 * Assumes node_distance(0,j) includes all distances in 6497 * Assumes node_distance(0,j) includes all distances in
6364 * node_distance(i,j) in order to avoid cubic time. 6498 * node_distance(i,j) in order to avoid cubic time.
6365 *
6366 * XXX: could be optimized to O(n log n) by using sort()
6367 */ 6499 */
6368 next_distance = curr_distance; 6500 next_distance = curr_distance;
6369 for (i = 0; i < nr_node_ids; i++) { 6501 for (i = 0; i < nr_node_ids; i++) {
6370 for (j = 0; j < nr_node_ids; j++) { 6502 for (j = 0; j < nr_node_ids; j++) {
6371 int distance = node_distance(0, j); 6503 for (k = 0; k < nr_node_ids; k++) {
6372 if (distance > curr_distance && 6504 int distance = node_distance(i, k);
6373 (distance < next_distance || 6505
6374 next_distance == curr_distance)) 6506 if (distance > curr_distance &&
6375 next_distance = distance; 6507 (distance < next_distance ||
6508 next_distance == curr_distance))
6509 next_distance = distance;
6510
6511 /*
6512 * While not a strong assumption it would be nice to know
6513 * about cases where if node A is connected to B, B is not
6514 * equally connected to A.
6515 */
6516 if (sched_debug() && node_distance(k, i) != distance)
6517 sched_numa_warn("Node-distance not symmetric");
6518
6519 if (sched_debug() && i && !find_numa_distance(distance))
6520 sched_numa_warn("Node-0 not representative");
6521 }
6522 if (next_distance != curr_distance) {
6523 sched_domains_numa_distance[level++] = next_distance;
6524 sched_domains_numa_levels = level;
6525 curr_distance = next_distance;
6526 } else break;
6376 } 6527 }
6377 if (next_distance != curr_distance) { 6528
6378 sched_domains_numa_distance[level++] = next_distance; 6529 /*
6379 sched_domains_numa_levels = level; 6530 * In case of sched_debug() we verify the above assumption.
6380 curr_distance = next_distance; 6531 */
6381 } else break; 6532 if (!sched_debug())
6533 break;
6382 } 6534 }
6383 /* 6535 /*
6384 * 'level' contains the number of unique distances, excluding the 6536 * 'level' contains the number of unique distances, excluding the
@@ -6403,7 +6555,7 @@ static void sched_init_numa(void)
6403 return; 6555 return;
6404 6556
6405 for (j = 0; j < nr_node_ids; j++) { 6557 for (j = 0; j < nr_node_ids; j++) {
6406 struct cpumask *mask = kzalloc_node(cpumask_size(), GFP_KERNEL, j); 6558 struct cpumask *mask = kzalloc(cpumask_size(), GFP_KERNEL);
6407 if (!mask) 6559 if (!mask)
6408 return; 6560 return;
6409 6561
@@ -6490,7 +6642,7 @@ static int __sdt_alloc(const struct cpumask *cpu_map)
6490 6642
6491 *per_cpu_ptr(sdd->sg, j) = sg; 6643 *per_cpu_ptr(sdd->sg, j) = sg;
6492 6644
6493 sgp = kzalloc_node(sizeof(struct sched_group_power), 6645 sgp = kzalloc_node(sizeof(struct sched_group_power) + cpumask_size(),
6494 GFP_KERNEL, cpu_to_node(j)); 6646 GFP_KERNEL, cpu_to_node(j));
6495 if (!sgp) 6647 if (!sgp)
6496 return -ENOMEM; 6648 return -ENOMEM;
@@ -6543,7 +6695,6 @@ struct sched_domain *build_sched_domain(struct sched_domain_topology_level *tl,
6543 if (!sd) 6695 if (!sd)
6544 return child; 6696 return child;
6545 6697
6546 set_domain_attribute(sd, attr);
6547 cpumask_and(sched_domain_span(sd), cpu_map, tl->mask(cpu)); 6698 cpumask_and(sched_domain_span(sd), cpu_map, tl->mask(cpu));
6548 if (child) { 6699 if (child) {
6549 sd->level = child->level + 1; 6700 sd->level = child->level + 1;
@@ -6551,6 +6702,7 @@ struct sched_domain *build_sched_domain(struct sched_domain_topology_level *tl,
6551 child->parent = sd; 6702 child->parent = sd;
6552 } 6703 }
6553 sd->child = child; 6704 sd->child = child;
6705 set_domain_attribute(sd, attr);
6554 6706
6555 return sd; 6707 return sd;
6556} 6708}
@@ -6691,7 +6843,6 @@ static int init_sched_domains(const struct cpumask *cpu_map)
6691 if (!doms_cur) 6843 if (!doms_cur)
6692 doms_cur = &fallback_doms; 6844 doms_cur = &fallback_doms;
6693 cpumask_andnot(doms_cur[0], cpu_map, cpu_isolated_map); 6845 cpumask_andnot(doms_cur[0], cpu_map, cpu_isolated_map);
6694 dattr_cur = NULL;
6695 err = build_sched_domains(doms_cur[0], NULL); 6846 err = build_sched_domains(doms_cur[0], NULL);
6696 register_sched_domain_sysctl(); 6847 register_sched_domain_sysctl();
6697 6848
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 940e6d17cf96..c099cc6eebe3 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2703,7 +2703,7 @@ select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags)
2703 int want_sd = 1; 2703 int want_sd = 1;
2704 int sync = wake_flags & WF_SYNC; 2704 int sync = wake_flags & WF_SYNC;
2705 2705
2706 if (p->rt.nr_cpus_allowed == 1) 2706 if (p->nr_cpus_allowed == 1)
2707 return prev_cpu; 2707 return prev_cpu;
2708 2708
2709 if (sd_flag & SD_BALANCE_WAKE) { 2709 if (sd_flag & SD_BALANCE_WAKE) {
@@ -3503,15 +3503,22 @@ unsigned long __weak arch_scale_smt_power(struct sched_domain *sd, int cpu)
3503unsigned long scale_rt_power(int cpu) 3503unsigned long scale_rt_power(int cpu)
3504{ 3504{
3505 struct rq *rq = cpu_rq(cpu); 3505 struct rq *rq = cpu_rq(cpu);
3506 u64 total, available; 3506 u64 total, available, age_stamp, avg;
3507 3507
3508 total = sched_avg_period() + (rq->clock - rq->age_stamp); 3508 /*
3509 * Since we're reading these variables without serialization make sure
3510 * we read them once before doing sanity checks on them.
3511 */
3512 age_stamp = ACCESS_ONCE(rq->age_stamp);
3513 avg = ACCESS_ONCE(rq->rt_avg);
3514
3515 total = sched_avg_period() + (rq->clock - age_stamp);
3509 3516
3510 if (unlikely(total < rq->rt_avg)) { 3517 if (unlikely(total < avg)) {
3511 /* Ensures that power won't end up being negative */ 3518 /* Ensures that power won't end up being negative */
3512 available = 0; 3519 available = 0;
3513 } else { 3520 } else {
3514 available = total - rq->rt_avg; 3521 available = total - avg;
3515 } 3522 }
3516 3523
3517 if (unlikely((s64)total < SCHED_POWER_SCALE)) 3524 if (unlikely((s64)total < SCHED_POWER_SCALE))
@@ -3574,13 +3581,28 @@ void update_group_power(struct sched_domain *sd, int cpu)
3574 3581
3575 power = 0; 3582 power = 0;
3576 3583
3577 group = child->groups; 3584 if (child->flags & SD_OVERLAP) {
3578 do { 3585 /*
3579 power += group->sgp->power; 3586 * SD_OVERLAP domains cannot assume that child groups
3580 group = group->next; 3587 * span the current group.
3581 } while (group != child->groups); 3588 */
3582 3589
3583 sdg->sgp->power = power; 3590 for_each_cpu(cpu, sched_group_cpus(sdg))
3591 power += power_of(cpu);
3592 } else {
3593 /*
3594 * !SD_OVERLAP domains can assume that child groups
3595 * span the current group.
3596 */
3597
3598 group = child->groups;
3599 do {
3600 power += group->sgp->power;
3601 group = group->next;
3602 } while (group != child->groups);
3603 }
3604
3605 sdg->sgp->power_orig = sdg->sgp->power = power;
3584} 3606}
3585 3607
3586/* 3608/*
@@ -3610,7 +3632,7 @@ fix_small_capacity(struct sched_domain *sd, struct sched_group *group)
3610 3632
3611/** 3633/**
3612 * update_sg_lb_stats - Update sched_group's statistics for load balancing. 3634 * update_sg_lb_stats - Update sched_group's statistics for load balancing.
3613 * @sd: The sched_domain whose statistics are to be updated. 3635 * @env: The load balancing environment.
3614 * @group: sched_group whose statistics are to be updated. 3636 * @group: sched_group whose statistics are to be updated.
3615 * @load_idx: Load index of sched_domain of this_cpu for load calc. 3637 * @load_idx: Load index of sched_domain of this_cpu for load calc.
3616 * @local_group: Does group contain this_cpu. 3638 * @local_group: Does group contain this_cpu.
@@ -3630,7 +3652,7 @@ static inline void update_sg_lb_stats(struct lb_env *env,
3630 int i; 3652 int i;
3631 3653
3632 if (local_group) 3654 if (local_group)
3633 balance_cpu = group_first_cpu(group); 3655 balance_cpu = group_balance_cpu(group);
3634 3656
3635 /* Tally up the load of all CPUs in the group */ 3657 /* Tally up the load of all CPUs in the group */
3636 max_cpu_load = 0; 3658 max_cpu_load = 0;
@@ -3645,7 +3667,8 @@ static inline void update_sg_lb_stats(struct lb_env *env,
3645 3667
3646 /* Bias balancing toward cpus of our domain */ 3668 /* Bias balancing toward cpus of our domain */
3647 if (local_group) { 3669 if (local_group) {
3648 if (idle_cpu(i) && !first_idle_cpu) { 3670 if (idle_cpu(i) && !first_idle_cpu &&
3671 cpumask_test_cpu(i, sched_group_mask(group))) {
3649 first_idle_cpu = 1; 3672 first_idle_cpu = 1;
3650 balance_cpu = i; 3673 balance_cpu = i;
3651 } 3674 }
@@ -3719,11 +3742,10 @@ static inline void update_sg_lb_stats(struct lb_env *env,
3719 3742
3720/** 3743/**
3721 * update_sd_pick_busiest - return 1 on busiest group 3744 * update_sd_pick_busiest - return 1 on busiest group
3722 * @sd: sched_domain whose statistics are to be checked 3745 * @env: The load balancing environment.
3723 * @sds: sched_domain statistics 3746 * @sds: sched_domain statistics
3724 * @sg: sched_group candidate to be checked for being the busiest 3747 * @sg: sched_group candidate to be checked for being the busiest
3725 * @sgs: sched_group statistics 3748 * @sgs: sched_group statistics
3726 * @this_cpu: the current cpu
3727 * 3749 *
3728 * Determine if @sg is a busier group than the previously selected 3750 * Determine if @sg is a busier group than the previously selected
3729 * busiest group. 3751 * busiest group.
@@ -3761,9 +3783,7 @@ static bool update_sd_pick_busiest(struct lb_env *env,
3761 3783
3762/** 3784/**
3763 * update_sd_lb_stats - Update sched_domain's statistics for load balancing. 3785 * update_sd_lb_stats - Update sched_domain's statistics for load balancing.
3764 * @sd: sched_domain whose statistics are to be updated. 3786 * @env: The load balancing environment.
3765 * @this_cpu: Cpu for which load balance is currently performed.
3766 * @idle: Idle status of this_cpu
3767 * @cpus: Set of cpus considered for load balancing. 3787 * @cpus: Set of cpus considered for load balancing.
3768 * @balance: Should we balance. 3788 * @balance: Should we balance.
3769 * @sds: variable to hold the statistics for this sched_domain. 3789 * @sds: variable to hold the statistics for this sched_domain.
@@ -3852,10 +3872,8 @@ static inline void update_sd_lb_stats(struct lb_env *env,
3852 * Returns 1 when packing is required and a task should be moved to 3872 * Returns 1 when packing is required and a task should be moved to
3853 * this CPU. The amount of the imbalance is returned in *imbalance. 3873 * this CPU. The amount of the imbalance is returned in *imbalance.
3854 * 3874 *
3855 * @sd: The sched_domain whose packing is to be checked. 3875 * @env: The load balancing environment.
3856 * @sds: Statistics of the sched_domain which is to be packed 3876 * @sds: Statistics of the sched_domain which is to be packed
3857 * @this_cpu: The cpu at whose sched_domain we're performing load-balance.
3858 * @imbalance: returns amount of imbalanced due to packing.
3859 */ 3877 */
3860static int check_asym_packing(struct lb_env *env, struct sd_lb_stats *sds) 3878static int check_asym_packing(struct lb_env *env, struct sd_lb_stats *sds)
3861{ 3879{
@@ -3881,9 +3899,8 @@ static int check_asym_packing(struct lb_env *env, struct sd_lb_stats *sds)
3881 * fix_small_imbalance - Calculate the minor imbalance that exists 3899 * fix_small_imbalance - Calculate the minor imbalance that exists
3882 * amongst the groups of a sched_domain, during 3900 * amongst the groups of a sched_domain, during
3883 * load balancing. 3901 * load balancing.
3902 * @env: The load balancing environment.
3884 * @sds: Statistics of the sched_domain whose imbalance is to be calculated. 3903 * @sds: Statistics of the sched_domain whose imbalance is to be calculated.
3885 * @this_cpu: The cpu at whose sched_domain we're performing load-balance.
3886 * @imbalance: Variable to store the imbalance.
3887 */ 3904 */
3888static inline 3905static inline
3889void fix_small_imbalance(struct lb_env *env, struct sd_lb_stats *sds) 3906void fix_small_imbalance(struct lb_env *env, struct sd_lb_stats *sds)
@@ -4026,11 +4043,7 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
4026 * Also calculates the amount of weighted load which should be moved 4043 * Also calculates the amount of weighted load which should be moved
4027 * to restore balance. 4044 * to restore balance.
4028 * 4045 *
4029 * @sd: The sched_domain whose busiest group is to be returned. 4046 * @env: The load balancing environment.
4030 * @this_cpu: The cpu for which load balancing is currently being performed.
4031 * @imbalance: Variable which stores amount of weighted load which should
4032 * be moved to restore balance/put a group to idle.
4033 * @idle: The idle status of this_cpu.
4034 * @cpus: The set of CPUs under consideration for load-balancing. 4047 * @cpus: The set of CPUs under consideration for load-balancing.
4035 * @balance: Pointer to a variable indicating if this_cpu 4048 * @balance: Pointer to a variable indicating if this_cpu
4036 * is the appropriate cpu to perform load balancing at this_level. 4049 * is the appropriate cpu to perform load balancing at this_level.
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index c5565c3c515f..573e1ca01102 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -274,13 +274,16 @@ static void update_rt_migration(struct rt_rq *rt_rq)
274 274
275static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 275static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
276{ 276{
277 struct task_struct *p;
278
277 if (!rt_entity_is_task(rt_se)) 279 if (!rt_entity_is_task(rt_se))
278 return; 280 return;
279 281
282 p = rt_task_of(rt_se);
280 rt_rq = &rq_of_rt_rq(rt_rq)->rt; 283 rt_rq = &rq_of_rt_rq(rt_rq)->rt;
281 284
282 rt_rq->rt_nr_total++; 285 rt_rq->rt_nr_total++;
283 if (rt_se->nr_cpus_allowed > 1) 286 if (p->nr_cpus_allowed > 1)
284 rt_rq->rt_nr_migratory++; 287 rt_rq->rt_nr_migratory++;
285 288
286 update_rt_migration(rt_rq); 289 update_rt_migration(rt_rq);
@@ -288,13 +291,16 @@ static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
288 291
289static void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 292static void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
290{ 293{
294 struct task_struct *p;
295
291 if (!rt_entity_is_task(rt_se)) 296 if (!rt_entity_is_task(rt_se))
292 return; 297 return;
293 298
299 p = rt_task_of(rt_se);
294 rt_rq = &rq_of_rt_rq(rt_rq)->rt; 300 rt_rq = &rq_of_rt_rq(rt_rq)->rt;
295 301
296 rt_rq->rt_nr_total--; 302 rt_rq->rt_nr_total--;
297 if (rt_se->nr_cpus_allowed > 1) 303 if (p->nr_cpus_allowed > 1)
298 rt_rq->rt_nr_migratory--; 304 rt_rq->rt_nr_migratory--;
299 305
300 update_rt_migration(rt_rq); 306 update_rt_migration(rt_rq);
@@ -1161,7 +1167,7 @@ enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags)
1161 1167
1162 enqueue_rt_entity(rt_se, flags & ENQUEUE_HEAD); 1168 enqueue_rt_entity(rt_se, flags & ENQUEUE_HEAD);
1163 1169
1164 if (!task_current(rq, p) && p->rt.nr_cpus_allowed > 1) 1170 if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
1165 enqueue_pushable_task(rq, p); 1171 enqueue_pushable_task(rq, p);
1166 1172
1167 inc_nr_running(rq); 1173 inc_nr_running(rq);
@@ -1225,7 +1231,7 @@ select_task_rq_rt(struct task_struct *p, int sd_flag, int flags)
1225 1231
1226 cpu = task_cpu(p); 1232 cpu = task_cpu(p);
1227 1233
1228 if (p->rt.nr_cpus_allowed == 1) 1234 if (p->nr_cpus_allowed == 1)
1229 goto out; 1235 goto out;
1230 1236
1231 /* For anything but wake ups, just return the task_cpu */ 1237 /* For anything but wake ups, just return the task_cpu */
@@ -1260,9 +1266,9 @@ select_task_rq_rt(struct task_struct *p, int sd_flag, int flags)
1260 * will have to sort it out. 1266 * will have to sort it out.
1261 */ 1267 */
1262 if (curr && unlikely(rt_task(curr)) && 1268 if (curr && unlikely(rt_task(curr)) &&
1263 (curr->rt.nr_cpus_allowed < 2 || 1269 (curr->nr_cpus_allowed < 2 ||
1264 curr->prio <= p->prio) && 1270 curr->prio <= p->prio) &&
1265 (p->rt.nr_cpus_allowed > 1)) { 1271 (p->nr_cpus_allowed > 1)) {
1266 int target = find_lowest_rq(p); 1272 int target = find_lowest_rq(p);
1267 1273
1268 if (target != -1) 1274 if (target != -1)
@@ -1276,10 +1282,10 @@ out:
1276 1282
1277static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) 1283static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
1278{ 1284{
1279 if (rq->curr->rt.nr_cpus_allowed == 1) 1285 if (rq->curr->nr_cpus_allowed == 1)
1280 return; 1286 return;
1281 1287
1282 if (p->rt.nr_cpus_allowed != 1 1288 if (p->nr_cpus_allowed != 1
1283 && cpupri_find(&rq->rd->cpupri, p, NULL)) 1289 && cpupri_find(&rq->rd->cpupri, p, NULL))
1284 return; 1290 return;
1285 1291
@@ -1395,7 +1401,7 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
1395 * The previous task needs to be made eligible for pushing 1401 * The previous task needs to be made eligible for pushing
1396 * if it is still active 1402 * if it is still active
1397 */ 1403 */
1398 if (on_rt_rq(&p->rt) && p->rt.nr_cpus_allowed > 1) 1404 if (on_rt_rq(&p->rt) && p->nr_cpus_allowed > 1)
1399 enqueue_pushable_task(rq, p); 1405 enqueue_pushable_task(rq, p);
1400} 1406}
1401 1407
@@ -1408,7 +1414,7 @@ static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
1408{ 1414{
1409 if (!task_running(rq, p) && 1415 if (!task_running(rq, p) &&
1410 (cpu < 0 || cpumask_test_cpu(cpu, tsk_cpus_allowed(p))) && 1416 (cpu < 0 || cpumask_test_cpu(cpu, tsk_cpus_allowed(p))) &&
1411 (p->rt.nr_cpus_allowed > 1)) 1417 (p->nr_cpus_allowed > 1))
1412 return 1; 1418 return 1;
1413 return 0; 1419 return 0;
1414} 1420}
@@ -1464,7 +1470,7 @@ static int find_lowest_rq(struct task_struct *task)
1464 if (unlikely(!lowest_mask)) 1470 if (unlikely(!lowest_mask))
1465 return -1; 1471 return -1;
1466 1472
1467 if (task->rt.nr_cpus_allowed == 1) 1473 if (task->nr_cpus_allowed == 1)
1468 return -1; /* No other targets possible */ 1474 return -1; /* No other targets possible */
1469 1475
1470 if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask)) 1476 if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask))
@@ -1556,7 +1562,7 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
1556 task_running(rq, task) || 1562 task_running(rq, task) ||
1557 !task->on_rq)) { 1563 !task->on_rq)) {
1558 1564
1559 raw_spin_unlock(&lowest_rq->lock); 1565 double_unlock_balance(rq, lowest_rq);
1560 lowest_rq = NULL; 1566 lowest_rq = NULL;
1561 break; 1567 break;
1562 } 1568 }
@@ -1586,7 +1592,7 @@ static struct task_struct *pick_next_pushable_task(struct rq *rq)
1586 1592
1587 BUG_ON(rq->cpu != task_cpu(p)); 1593 BUG_ON(rq->cpu != task_cpu(p));
1588 BUG_ON(task_current(rq, p)); 1594 BUG_ON(task_current(rq, p));
1589 BUG_ON(p->rt.nr_cpus_allowed <= 1); 1595 BUG_ON(p->nr_cpus_allowed <= 1);
1590 1596
1591 BUG_ON(!p->on_rq); 1597 BUG_ON(!p->on_rq);
1592 BUG_ON(!rt_task(p)); 1598 BUG_ON(!rt_task(p));
@@ -1793,9 +1799,9 @@ static void task_woken_rt(struct rq *rq, struct task_struct *p)
1793 if (!task_running(rq, p) && 1799 if (!task_running(rq, p) &&
1794 !test_tsk_need_resched(rq->curr) && 1800 !test_tsk_need_resched(rq->curr) &&
1795 has_pushable_tasks(rq) && 1801 has_pushable_tasks(rq) &&
1796 p->rt.nr_cpus_allowed > 1 && 1802 p->nr_cpus_allowed > 1 &&
1797 rt_task(rq->curr) && 1803 rt_task(rq->curr) &&
1798 (rq->curr->rt.nr_cpus_allowed < 2 || 1804 (rq->curr->nr_cpus_allowed < 2 ||
1799 rq->curr->prio <= p->prio)) 1805 rq->curr->prio <= p->prio))
1800 push_rt_tasks(rq); 1806 push_rt_tasks(rq);
1801} 1807}
@@ -1817,7 +1823,7 @@ static void set_cpus_allowed_rt(struct task_struct *p,
1817 * Only update if the process changes its state from whether it 1823 * Only update if the process changes its state from whether it
1818 * can migrate or not. 1824 * can migrate or not.
1819 */ 1825 */
1820 if ((p->rt.nr_cpus_allowed > 1) == (weight > 1)) 1826 if ((p->nr_cpus_allowed > 1) == (weight > 1))
1821 return; 1827 return;
1822 1828
1823 rq = task_rq(p); 1829 rq = task_rq(p);
@@ -1979,6 +1985,8 @@ static void watchdog(struct rq *rq, struct task_struct *p)
1979 1985
1980static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued) 1986static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued)
1981{ 1987{
1988 struct sched_rt_entity *rt_se = &p->rt;
1989
1982 update_curr_rt(rq); 1990 update_curr_rt(rq);
1983 1991
1984 watchdog(rq, p); 1992 watchdog(rq, p);
@@ -1996,12 +2004,15 @@ static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued)
1996 p->rt.time_slice = RR_TIMESLICE; 2004 p->rt.time_slice = RR_TIMESLICE;
1997 2005
1998 /* 2006 /*
1999 * Requeue to the end of queue if we are not the only element 2007 * Requeue to the end of queue if we (and all of our ancestors) are the
2000 * on the queue: 2008 * only element on the queue
2001 */ 2009 */
2002 if (p->rt.run_list.prev != p->rt.run_list.next) { 2010 for_each_sched_rt_entity(rt_se) {
2003 requeue_task_rt(rq, p, 0); 2011 if (rt_se->run_list.prev != rt_se->run_list.next) {
2004 set_tsk_need_resched(p); 2012 requeue_task_rt(rq, p, 0);
2013 set_tsk_need_resched(p);
2014 return;
2015 }
2005 } 2016 }
2006} 2017}
2007 2018
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index ba9dccfd24ce..6d52cea7f33d 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -526,6 +526,8 @@ static inline struct sched_domain *highest_flag_domain(int cpu, int flag)
526DECLARE_PER_CPU(struct sched_domain *, sd_llc); 526DECLARE_PER_CPU(struct sched_domain *, sd_llc);
527DECLARE_PER_CPU(int, sd_llc_id); 527DECLARE_PER_CPU(int, sd_llc_id);
528 528
529extern int group_balance_cpu(struct sched_group *sg);
530
529#endif /* CONFIG_SMP */ 531#endif /* CONFIG_SMP */
530 532
531#include "stats.h" 533#include "stats.h"
diff --git a/kernel/smpboot.c b/kernel/smpboot.c
index e1a797e028a3..98f60c5caa1b 100644
--- a/kernel/smpboot.c
+++ b/kernel/smpboot.c
@@ -31,6 +31,12 @@ void __init idle_thread_set_boot_cpu(void)
31 per_cpu(idle_threads, smp_processor_id()) = current; 31 per_cpu(idle_threads, smp_processor_id()) = current;
32} 32}
33 33
34/**
35 * idle_init - Initialize the idle thread for a cpu
36 * @cpu: The cpu for which the idle thread should be initialized
37 *
38 * Creates the thread if it does not exist.
39 */
34static inline void idle_init(unsigned int cpu) 40static inline void idle_init(unsigned int cpu)
35{ 41{
36 struct task_struct *tsk = per_cpu(idle_threads, cpu); 42 struct task_struct *tsk = per_cpu(idle_threads, cpu);
@@ -45,17 +51,16 @@ static inline void idle_init(unsigned int cpu)
45} 51}
46 52
47/** 53/**
48 * idle_thread_init - Initialize the idle thread for a cpu 54 * idle_threads_init - Initialize idle threads for all cpus
49 * @cpu: The cpu for which the idle thread should be initialized
50 *
51 * Creates the thread if it does not exist.
52 */ 55 */
53void __init idle_threads_init(void) 56void __init idle_threads_init(void)
54{ 57{
55 unsigned int cpu; 58 unsigned int cpu, boot_cpu;
59
60 boot_cpu = smp_processor_id();
56 61
57 for_each_possible_cpu(cpu) { 62 for_each_possible_cpu(cpu) {
58 if (cpu != smp_processor_id()) 63 if (cpu != boot_cpu)
59 idle_init(cpu); 64 idle_init(cpu);
60 } 65 }
61} 66}
diff --git a/kernel/sys.c b/kernel/sys.c
index 9ff89cb9657a..f0ec44dcd415 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1786,27 +1786,13 @@ SYSCALL_DEFINE1(umask, int, mask)
1786} 1786}
1787 1787
1788#ifdef CONFIG_CHECKPOINT_RESTORE 1788#ifdef CONFIG_CHECKPOINT_RESTORE
1789static bool vma_flags_mismatch(struct vm_area_struct *vma,
1790 unsigned long required,
1791 unsigned long banned)
1792{
1793 return (vma->vm_flags & required) != required ||
1794 (vma->vm_flags & banned);
1795}
1796
1797static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) 1789static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd)
1798{ 1790{
1791 struct vm_area_struct *vma;
1799 struct file *exe_file; 1792 struct file *exe_file;
1800 struct dentry *dentry; 1793 struct dentry *dentry;
1801 int err; 1794 int err;
1802 1795
1803 /*
1804 * Setting new mm::exe_file is only allowed when no VM_EXECUTABLE vma's
1805 * remain. So perform a quick test first.
1806 */
1807 if (mm->num_exe_file_vmas)
1808 return -EBUSY;
1809
1810 exe_file = fget(fd); 1796 exe_file = fget(fd);
1811 if (!exe_file) 1797 if (!exe_file)
1812 return -EBADF; 1798 return -EBADF;
@@ -1827,17 +1813,30 @@ static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd)
1827 if (err) 1813 if (err)
1828 goto exit; 1814 goto exit;
1829 1815
1816 down_write(&mm->mmap_sem);
1817
1818 /*
1819 * Forbid mm->exe_file change if there are mapped other files.
1820 */
1821 err = -EBUSY;
1822 for (vma = mm->mmap; vma; vma = vma->vm_next) {
1823 if (vma->vm_file && !path_equal(&vma->vm_file->f_path,
1824 &exe_file->f_path))
1825 goto exit_unlock;
1826 }
1827
1830 /* 1828 /*
1831 * The symlink can be changed only once, just to disallow arbitrary 1829 * The symlink can be changed only once, just to disallow arbitrary
1832 * transitions malicious software might bring in. This means one 1830 * transitions malicious software might bring in. This means one
1833 * could make a snapshot over all processes running and monitor 1831 * could make a snapshot over all processes running and monitor
1834 * /proc/pid/exe changes to notice unusual activity if needed. 1832 * /proc/pid/exe changes to notice unusual activity if needed.
1835 */ 1833 */
1836 down_write(&mm->mmap_sem); 1834 err = -EPERM;
1837 if (likely(!mm->exe_file)) 1835 if (test_and_set_bit(MMF_EXE_FILE_CHANGED, &mm->flags))
1838 set_mm_exe_file(mm, exe_file); 1836 goto exit_unlock;
1839 else 1837
1840 err = -EBUSY; 1838 set_mm_exe_file(mm, exe_file);
1839exit_unlock:
1841 up_write(&mm->mmap_sem); 1840 up_write(&mm->mmap_sem);
1842 1841
1843exit: 1842exit:
@@ -1862,7 +1861,7 @@ static int prctl_set_mm(int opt, unsigned long addr,
1862 if (opt == PR_SET_MM_EXE_FILE) 1861 if (opt == PR_SET_MM_EXE_FILE)
1863 return prctl_set_mm_exe_file(mm, (unsigned int)addr); 1862 return prctl_set_mm_exe_file(mm, (unsigned int)addr);
1864 1863
1865 if (addr >= TASK_SIZE) 1864 if (addr >= TASK_SIZE || addr < mmap_min_addr)
1866 return -EINVAL; 1865 return -EINVAL;
1867 1866
1868 error = -EINVAL; 1867 error = -EINVAL;
@@ -1924,12 +1923,6 @@ static int prctl_set_mm(int opt, unsigned long addr,
1924 error = -EFAULT; 1923 error = -EFAULT;
1925 goto out; 1924 goto out;
1926 } 1925 }
1927#ifdef CONFIG_STACK_GROWSUP
1928 if (vma_flags_mismatch(vma, VM_READ | VM_WRITE | VM_GROWSUP, 0))
1929#else
1930 if (vma_flags_mismatch(vma, VM_READ | VM_WRITE | VM_GROWSDOWN, 0))
1931#endif
1932 goto out;
1933 if (opt == PR_SET_MM_START_STACK) 1926 if (opt == PR_SET_MM_START_STACK)
1934 mm->start_stack = addr; 1927 mm->start_stack = addr;
1935 else if (opt == PR_SET_MM_ARG_START) 1928 else if (opt == PR_SET_MM_ARG_START)
@@ -1981,12 +1974,22 @@ out:
1981 up_read(&mm->mmap_sem); 1974 up_read(&mm->mmap_sem);
1982 return error; 1975 return error;
1983} 1976}
1977
1978static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr)
1979{
1980 return put_user(me->clear_child_tid, tid_addr);
1981}
1982
1984#else /* CONFIG_CHECKPOINT_RESTORE */ 1983#else /* CONFIG_CHECKPOINT_RESTORE */
1985static int prctl_set_mm(int opt, unsigned long addr, 1984static int prctl_set_mm(int opt, unsigned long addr,
1986 unsigned long arg4, unsigned long arg5) 1985 unsigned long arg4, unsigned long arg5)
1987{ 1986{
1988 return -EINVAL; 1987 return -EINVAL;
1989} 1988}
1989static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr)
1990{
1991 return -EINVAL;
1992}
1990#endif 1993#endif
1991 1994
1992SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, 1995SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
@@ -2124,6 +2127,9 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
2124 else 2127 else
2125 return -EINVAL; 2128 return -EINVAL;
2126 break; 2129 break;
2130 case PR_GET_TID_ADDRESS:
2131 error = prctl_get_tid_address(me, (int __user **)arg2);
2132 break;
2127 default: 2133 default:
2128 return -EINVAL; 2134 return -EINVAL;
2129 } 2135 }
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 9cd928f7a7c6..7e1ce012a851 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -297,8 +297,7 @@ void clockevents_register_device(struct clock_event_device *dev)
297} 297}
298EXPORT_SYMBOL_GPL(clockevents_register_device); 298EXPORT_SYMBOL_GPL(clockevents_register_device);
299 299
300static void clockevents_config(struct clock_event_device *dev, 300void clockevents_config(struct clock_event_device *dev, u32 freq)
301 u32 freq)
302{ 301{
303 u64 sec; 302 u64 sec;
304 303
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 6a3a5b9ff561..da70c6db496c 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -576,6 +576,7 @@ void tick_nohz_idle_exit(void)
576 /* Update jiffies first */ 576 /* Update jiffies first */
577 select_nohz_load_balancer(0); 577 select_nohz_load_balancer(0);
578 tick_do_update_jiffies64(now); 578 tick_do_update_jiffies64(now);
579 update_cpu_load_nohz();
579 580
580#ifndef CONFIG_VIRT_CPU_ACCOUNTING 581#ifndef CONFIG_VIRT_CPU_ACCOUNTING
581 /* 582 /*
@@ -814,6 +815,16 @@ static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer)
814 return HRTIMER_RESTART; 815 return HRTIMER_RESTART;
815} 816}
816 817
818static int sched_skew_tick;
819
820static int __init skew_tick(char *str)
821{
822 get_option(&str, &sched_skew_tick);
823
824 return 0;
825}
826early_param("skew_tick", skew_tick);
827
817/** 828/**
818 * tick_setup_sched_timer - setup the tick emulation timer 829 * tick_setup_sched_timer - setup the tick emulation timer
819 */ 830 */
@@ -831,6 +842,14 @@ void tick_setup_sched_timer(void)
831 /* Get the next period (per cpu) */ 842 /* Get the next period (per cpu) */
832 hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update()); 843 hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update());
833 844
845 /* Offset the tick to avert xtime_lock contention. */
846 if (sched_skew_tick) {
847 u64 offset = ktime_to_ns(tick_period) >> 1;
848 do_div(offset, num_possible_cpus());
849 offset *= smp_processor_id();
850 hrtimer_add_expires_ns(&ts->sched_timer, offset);
851 }
852
834 for (;;) { 853 for (;;) {
835 hrtimer_forward(&ts->sched_timer, now, tick_period); 854 hrtimer_forward(&ts->sched_timer, now, tick_period);
836 hrtimer_start_expires(&ts->sched_timer, 855 hrtimer_start_expires(&ts->sched_timer,
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 6e46cacf5969..6f46a00a1e8a 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -962,6 +962,7 @@ static cycle_t logarithmic_accumulation(cycle_t offset, int shift)
962 timekeeper.xtime.tv_sec++; 962 timekeeper.xtime.tv_sec++;
963 leap = second_overflow(timekeeper.xtime.tv_sec); 963 leap = second_overflow(timekeeper.xtime.tv_sec);
964 timekeeper.xtime.tv_sec += leap; 964 timekeeper.xtime.tv_sec += leap;
965 timekeeper.wall_to_monotonic.tv_sec -= leap;
965 } 966 }
966 967
967 /* Accumulate raw time */ 968 /* Accumulate raw time */
@@ -1077,6 +1078,7 @@ static void update_wall_time(void)
1077 timekeeper.xtime.tv_sec++; 1078 timekeeper.xtime.tv_sec++;
1078 leap = second_overflow(timekeeper.xtime.tv_sec); 1079 leap = second_overflow(timekeeper.xtime.tv_sec);
1079 timekeeper.xtime.tv_sec += leap; 1080 timekeeper.xtime.tv_sec += leap;
1081 timekeeper.wall_to_monotonic.tv_sec -= leap;
1080 } 1082 }
1081 1083
1082 timekeeping_update(false); 1084 timekeeping_update(false);
diff --git a/lib/btree.c b/lib/btree.c
index e5ec1e9c1aa5..f9a484676cb6 100644
--- a/lib/btree.c
+++ b/lib/btree.c
@@ -319,8 +319,8 @@ void *btree_get_prev(struct btree_head *head, struct btree_geo *geo,
319 319
320 if (head->height == 0) 320 if (head->height == 0)
321 return NULL; 321 return NULL;
322retry:
323 longcpy(key, __key, geo->keylen); 322 longcpy(key, __key, geo->keylen);
323retry:
324 dec_key(geo, key); 324 dec_key(geo, key);
325 325
326 node = head->node; 326 node = head->node;
@@ -351,7 +351,7 @@ retry:
351 } 351 }
352miss: 352miss:
353 if (retry_key) { 353 if (retry_key) {
354 __key = retry_key; 354 longcpy(key, retry_key, geo->keylen);
355 retry_key = NULL; 355 retry_key = NULL;
356 goto retry; 356 goto retry;
357 } 357 }
@@ -509,6 +509,7 @@ retry:
509int btree_insert(struct btree_head *head, struct btree_geo *geo, 509int btree_insert(struct btree_head *head, struct btree_geo *geo,
510 unsigned long *key, void *val, gfp_t gfp) 510 unsigned long *key, void *val, gfp_t gfp)
511{ 511{
512 BUG_ON(!val);
512 return btree_insert_level(head, geo, key, val, 1, gfp); 513 return btree_insert_level(head, geo, key, val, 1, gfp);
513} 514}
514EXPORT_SYMBOL_GPL(btree_insert); 515EXPORT_SYMBOL_GPL(btree_insert);
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index d7c878cc006c..e7964296fd50 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -686,6 +686,9 @@ void **radix_tree_next_chunk(struct radix_tree_root *root,
686 * during iterating; it can be zero only at the beginning. 686 * during iterating; it can be zero only at the beginning.
687 * And we cannot overflow iter->next_index in a single step, 687 * And we cannot overflow iter->next_index in a single step,
688 * because RADIX_TREE_MAP_SHIFT < BITS_PER_LONG. 688 * because RADIX_TREE_MAP_SHIFT < BITS_PER_LONG.
689 *
690 * This condition also used by radix_tree_next_slot() to stop
691 * contiguous iterating, and forbid swithing to the next chunk.
689 */ 692 */
690 index = iter->next_index; 693 index = iter->next_index;
691 if (!index && iter->index) 694 if (!index && iter->index)
diff --git a/lib/raid6/recov.c b/lib/raid6/recov.c
index 1805a5cc5daa..a95bccb8497d 100644
--- a/lib/raid6/recov.c
+++ b/lib/raid6/recov.c
@@ -22,8 +22,8 @@
22#include <linux/raid/pq.h> 22#include <linux/raid/pq.h>
23 23
24/* Recover two failed data blocks. */ 24/* Recover two failed data blocks. */
25void raid6_2data_recov_intx1(int disks, size_t bytes, int faila, int failb, 25static void raid6_2data_recov_intx1(int disks, size_t bytes, int faila,
26 void **ptrs) 26 int failb, void **ptrs)
27{ 27{
28 u8 *p, *q, *dp, *dq; 28 u8 *p, *q, *dp, *dq;
29 u8 px, qx, db; 29 u8 px, qx, db;
@@ -66,7 +66,8 @@ void raid6_2data_recov_intx1(int disks, size_t bytes, int faila, int failb,
66} 66}
67 67
68/* Recover failure of one data block plus the P block */ 68/* Recover failure of one data block plus the P block */
69void raid6_datap_recov_intx1(int disks, size_t bytes, int faila, void **ptrs) 69static void raid6_datap_recov_intx1(int disks, size_t bytes, int faila,
70 void **ptrs)
70{ 71{
71 u8 *p, *q, *dq; 72 u8 *p, *q, *dq;
72 const u8 *qmul; /* Q multiplier table */ 73 const u8 *qmul; /* Q multiplier table */
diff --git a/lib/raid6/recov_ssse3.c b/lib/raid6/recov_ssse3.c
index 37ae61930559..ecb710c0b4d9 100644
--- a/lib/raid6/recov_ssse3.c
+++ b/lib/raid6/recov_ssse3.c
@@ -19,8 +19,8 @@ static int raid6_has_ssse3(void)
19 boot_cpu_has(X86_FEATURE_SSSE3); 19 boot_cpu_has(X86_FEATURE_SSSE3);
20} 20}
21 21
22void raid6_2data_recov_ssse3(int disks, size_t bytes, int faila, int failb, 22static void raid6_2data_recov_ssse3(int disks, size_t bytes, int faila,
23 void **ptrs) 23 int failb, void **ptrs)
24{ 24{
25 u8 *p, *q, *dp, *dq; 25 u8 *p, *q, *dp, *dq;
26 const u8 *pbmul; /* P multiplier table for B data */ 26 const u8 *pbmul; /* P multiplier table for B data */
@@ -194,7 +194,8 @@ void raid6_2data_recov_ssse3(int disks, size_t bytes, int faila, int failb,
194} 194}
195 195
196 196
197void raid6_datap_recov_ssse3(int disks, size_t bytes, int faila, void **ptrs) 197static void raid6_datap_recov_ssse3(int disks, size_t bytes, int faila,
198 void **ptrs)
198{ 199{
199 u8 *p, *q, *dq; 200 u8 *p, *q, *dq;
200 const u8 *qmul; /* Q multiplier table */ 201 const u8 *qmul; /* Q multiplier table */
diff --git a/mm/Kconfig b/mm/Kconfig
index b2176374b98e..82fed4eb2b6f 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -389,3 +389,20 @@ config CLEANCACHE
389 in a negligible performance hit. 389 in a negligible performance hit.
390 390
391 If unsure, say Y to enable cleancache 391 If unsure, say Y to enable cleancache
392
393config FRONTSWAP
394 bool "Enable frontswap to cache swap pages if tmem is present"
395 depends on SWAP
396 default n
397 help
398 Frontswap is so named because it can be thought of as the opposite
399 of a "backing" store for a swap device. The data is stored into
400 "transcendent memory", memory that is not directly accessible or
401 addressable by the kernel and is of unknown and possibly
402 time-varying size. When space in transcendent memory is available,
403 a significant swap I/O reduction may be achieved. When none is
404 available, all frontswap calls are reduced to a single pointer-
405 compare-against-NULL resulting in a negligible performance hit
406 and swap data is stored as normal on the matching swap device.
407
408 If unsure, say Y to enable frontswap.
diff --git a/mm/Makefile b/mm/Makefile
index a156285ce88d..2e2fbbefb99f 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -29,6 +29,7 @@ obj-$(CONFIG_HAVE_MEMBLOCK) += memblock.o
29 29
30obj-$(CONFIG_BOUNCE) += bounce.o 30obj-$(CONFIG_BOUNCE) += bounce.o
31obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o 31obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o
32obj-$(CONFIG_FRONTSWAP) += frontswap.o
32obj-$(CONFIG_HAS_DMA) += dmapool.o 33obj-$(CONFIG_HAS_DMA) += dmapool.o
33obj-$(CONFIG_HUGETLBFS) += hugetlb.o 34obj-$(CONFIG_HUGETLBFS) += hugetlb.o
34obj-$(CONFIG_NUMA) += mempolicy.o 35obj-$(CONFIG_NUMA) += mempolicy.o
diff --git a/mm/compaction.c b/mm/compaction.c
index 4ac338af5120..7ea259d82a99 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -236,7 +236,7 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
236 */ 236 */
237 while (unlikely(too_many_isolated(zone))) { 237 while (unlikely(too_many_isolated(zone))) {
238 /* async migration should just abort */ 238 /* async migration should just abort */
239 if (cc->mode != COMPACT_SYNC) 239 if (!cc->sync)
240 return 0; 240 return 0;
241 241
242 congestion_wait(BLK_RW_ASYNC, HZ/10); 242 congestion_wait(BLK_RW_ASYNC, HZ/10);
@@ -304,8 +304,7 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
304 * satisfies the allocation 304 * satisfies the allocation
305 */ 305 */
306 pageblock_nr = low_pfn >> pageblock_order; 306 pageblock_nr = low_pfn >> pageblock_order;
307 if (cc->mode != COMPACT_SYNC && 307 if (!cc->sync && last_pageblock_nr != pageblock_nr &&
308 last_pageblock_nr != pageblock_nr &&
309 !migrate_async_suitable(get_pageblock_migratetype(page))) { 308 !migrate_async_suitable(get_pageblock_migratetype(page))) {
310 low_pfn += pageblock_nr_pages; 309 low_pfn += pageblock_nr_pages;
311 low_pfn = ALIGN(low_pfn, pageblock_nr_pages) - 1; 310 low_pfn = ALIGN(low_pfn, pageblock_nr_pages) - 1;
@@ -326,7 +325,7 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
326 continue; 325 continue;
327 } 326 }
328 327
329 if (cc->mode != COMPACT_SYNC) 328 if (!cc->sync)
330 mode |= ISOLATE_ASYNC_MIGRATE; 329 mode |= ISOLATE_ASYNC_MIGRATE;
331 330
332 lruvec = mem_cgroup_page_lruvec(page, zone); 331 lruvec = mem_cgroup_page_lruvec(page, zone);
@@ -361,90 +360,27 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
361 360
362#endif /* CONFIG_COMPACTION || CONFIG_CMA */ 361#endif /* CONFIG_COMPACTION || CONFIG_CMA */
363#ifdef CONFIG_COMPACTION 362#ifdef CONFIG_COMPACTION
364/*
365 * Returns true if MIGRATE_UNMOVABLE pageblock was successfully
366 * converted to MIGRATE_MOVABLE type, false otherwise.
367 */
368static bool rescue_unmovable_pageblock(struct page *page)
369{
370 unsigned long pfn, start_pfn, end_pfn;
371 struct page *start_page, *end_page;
372
373 pfn = page_to_pfn(page);
374 start_pfn = pfn & ~(pageblock_nr_pages - 1);
375 end_pfn = start_pfn + pageblock_nr_pages;
376
377 start_page = pfn_to_page(start_pfn);
378 end_page = pfn_to_page(end_pfn);
379
380 /* Do not deal with pageblocks that overlap zones */
381 if (page_zone(start_page) != page_zone(end_page))
382 return false;
383
384 for (page = start_page, pfn = start_pfn; page < end_page; pfn++,
385 page++) {
386 if (!pfn_valid_within(pfn))
387 continue;
388
389 if (PageBuddy(page)) {
390 int order = page_order(page);
391
392 pfn += (1 << order) - 1;
393 page += (1 << order) - 1;
394
395 continue;
396 } else if (page_count(page) == 0 || PageLRU(page))
397 continue;
398
399 return false;
400 }
401
402 set_pageblock_migratetype(page, MIGRATE_MOVABLE);
403 move_freepages_block(page_zone(page), page, MIGRATE_MOVABLE);
404 return true;
405}
406 363
407enum smt_result { 364/* Returns true if the page is within a block suitable for migration to */
408 GOOD_AS_MIGRATION_TARGET, 365static bool suitable_migration_target(struct page *page)
409 FAIL_UNMOVABLE_TARGET,
410 FAIL_BAD_TARGET,
411};
412
413/*
414 * Returns GOOD_AS_MIGRATION_TARGET if the page is within a block
415 * suitable for migration to, FAIL_UNMOVABLE_TARGET if the page
416 * is within a MIGRATE_UNMOVABLE block, FAIL_BAD_TARGET otherwise.
417 */
418static enum smt_result suitable_migration_target(struct page *page,
419 struct compact_control *cc)
420{ 366{
421 367
422 int migratetype = get_pageblock_migratetype(page); 368 int migratetype = get_pageblock_migratetype(page);
423 369
424 /* Don't interfere with memory hot-remove or the min_free_kbytes blocks */ 370 /* Don't interfere with memory hot-remove or the min_free_kbytes blocks */
425 if (migratetype == MIGRATE_ISOLATE || migratetype == MIGRATE_RESERVE) 371 if (migratetype == MIGRATE_ISOLATE || migratetype == MIGRATE_RESERVE)
426 return FAIL_BAD_TARGET; 372 return false;
427 373
428 /* If the page is a large free page, then allow migration */ 374 /* If the page is a large free page, then allow migration */
429 if (PageBuddy(page) && page_order(page) >= pageblock_order) 375 if (PageBuddy(page) && page_order(page) >= pageblock_order)
430 return GOOD_AS_MIGRATION_TARGET; 376 return true;
431 377
432 /* If the block is MIGRATE_MOVABLE or MIGRATE_CMA, allow migration */ 378 /* If the block is MIGRATE_MOVABLE or MIGRATE_CMA, allow migration */
433 if (cc->mode != COMPACT_ASYNC_UNMOVABLE && 379 if (migrate_async_suitable(migratetype))
434 migrate_async_suitable(migratetype)) 380 return true;
435 return GOOD_AS_MIGRATION_TARGET;
436
437 if (cc->mode == COMPACT_ASYNC_MOVABLE &&
438 migratetype == MIGRATE_UNMOVABLE)
439 return FAIL_UNMOVABLE_TARGET;
440
441 if (cc->mode != COMPACT_ASYNC_MOVABLE &&
442 migratetype == MIGRATE_UNMOVABLE &&
443 rescue_unmovable_pageblock(page))
444 return GOOD_AS_MIGRATION_TARGET;
445 381
446 /* Otherwise skip the block */ 382 /* Otherwise skip the block */
447 return FAIL_BAD_TARGET; 383 return false;
448} 384}
449 385
450/* 386/*
@@ -478,13 +414,6 @@ static void isolate_freepages(struct zone *zone,
478 zone_end_pfn = zone->zone_start_pfn + zone->spanned_pages; 414 zone_end_pfn = zone->zone_start_pfn + zone->spanned_pages;
479 415
480 /* 416 /*
481 * isolate_freepages() may be called more than once during
482 * compact_zone_order() run and we want only the most recent
483 * count.
484 */
485 cc->nr_pageblocks_skipped = 0;
486
487 /*
488 * Isolate free pages until enough are available to migrate the 417 * Isolate free pages until enough are available to migrate the
489 * pages on cc->migratepages. We stop searching if the migrate 418 * pages on cc->migratepages. We stop searching if the migrate
490 * and free page scanners meet or enough free pages are isolated. 419 * and free page scanners meet or enough free pages are isolated.
@@ -492,7 +421,6 @@ static void isolate_freepages(struct zone *zone,
492 for (; pfn > low_pfn && cc->nr_migratepages > nr_freepages; 421 for (; pfn > low_pfn && cc->nr_migratepages > nr_freepages;
493 pfn -= pageblock_nr_pages) { 422 pfn -= pageblock_nr_pages) {
494 unsigned long isolated; 423 unsigned long isolated;
495 enum smt_result ret;
496 424
497 if (!pfn_valid(pfn)) 425 if (!pfn_valid(pfn))
498 continue; 426 continue;
@@ -509,12 +437,9 @@ static void isolate_freepages(struct zone *zone,
509 continue; 437 continue;
510 438
511 /* Check the block is suitable for migration */ 439 /* Check the block is suitable for migration */
512 ret = suitable_migration_target(page, cc); 440 if (!suitable_migration_target(page))
513 if (ret != GOOD_AS_MIGRATION_TARGET) {
514 if (ret == FAIL_UNMOVABLE_TARGET)
515 cc->nr_pageblocks_skipped++;
516 continue; 441 continue;
517 } 442
518 /* 443 /*
519 * Found a block suitable for isolating free pages from. Now 444 * Found a block suitable for isolating free pages from. Now
520 * we disabled interrupts, double check things are ok and 445 * we disabled interrupts, double check things are ok and
@@ -523,14 +448,12 @@ static void isolate_freepages(struct zone *zone,
523 */ 448 */
524 isolated = 0; 449 isolated = 0;
525 spin_lock_irqsave(&zone->lock, flags); 450 spin_lock_irqsave(&zone->lock, flags);
526 ret = suitable_migration_target(page, cc); 451 if (suitable_migration_target(page)) {
527 if (ret == GOOD_AS_MIGRATION_TARGET) {
528 end_pfn = min(pfn + pageblock_nr_pages, zone_end_pfn); 452 end_pfn = min(pfn + pageblock_nr_pages, zone_end_pfn);
529 isolated = isolate_freepages_block(pfn, end_pfn, 453 isolated = isolate_freepages_block(pfn, end_pfn,
530 freelist, false); 454 freelist, false);
531 nr_freepages += isolated; 455 nr_freepages += isolated;
532 } else if (ret == FAIL_UNMOVABLE_TARGET) 456 }
533 cc->nr_pageblocks_skipped++;
534 spin_unlock_irqrestore(&zone->lock, flags); 457 spin_unlock_irqrestore(&zone->lock, flags);
535 458
536 /* 459 /*
@@ -762,9 +685,8 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
762 685
763 nr_migrate = cc->nr_migratepages; 686 nr_migrate = cc->nr_migratepages;
764 err = migrate_pages(&cc->migratepages, compaction_alloc, 687 err = migrate_pages(&cc->migratepages, compaction_alloc,
765 (unsigned long)&cc->freepages, false, 688 (unsigned long)cc, false,
766 (cc->mode == COMPACT_SYNC) ? MIGRATE_SYNC_LIGHT 689 cc->sync ? MIGRATE_SYNC_LIGHT : MIGRATE_ASYNC);
767 : MIGRATE_ASYNC);
768 update_nr_listpages(cc); 690 update_nr_listpages(cc);
769 nr_remaining = cc->nr_migratepages; 691 nr_remaining = cc->nr_migratepages;
770 692
@@ -793,8 +715,7 @@ out:
793 715
794static unsigned long compact_zone_order(struct zone *zone, 716static unsigned long compact_zone_order(struct zone *zone,
795 int order, gfp_t gfp_mask, 717 int order, gfp_t gfp_mask,
796 enum compact_mode mode, 718 bool sync)
797 unsigned long *nr_pageblocks_skipped)
798{ 719{
799 struct compact_control cc = { 720 struct compact_control cc = {
800 .nr_freepages = 0, 721 .nr_freepages = 0,
@@ -802,17 +723,12 @@ static unsigned long compact_zone_order(struct zone *zone,
802 .order = order, 723 .order = order,
803 .migratetype = allocflags_to_migratetype(gfp_mask), 724 .migratetype = allocflags_to_migratetype(gfp_mask),
804 .zone = zone, 725 .zone = zone,
805 .mode = mode, 726 .sync = sync,
806 }; 727 };
807 unsigned long rc;
808
809 INIT_LIST_HEAD(&cc.freepages); 728 INIT_LIST_HEAD(&cc.freepages);
810 INIT_LIST_HEAD(&cc.migratepages); 729 INIT_LIST_HEAD(&cc.migratepages);
811 730
812 rc = compact_zone(zone, &cc); 731 return compact_zone(zone, &cc);
813 *nr_pageblocks_skipped = cc.nr_pageblocks_skipped;
814
815 return rc;
816} 732}
817 733
818int sysctl_extfrag_threshold = 500; 734int sysctl_extfrag_threshold = 500;
@@ -837,8 +753,6 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
837 struct zoneref *z; 753 struct zoneref *z;
838 struct zone *zone; 754 struct zone *zone;
839 int rc = COMPACT_SKIPPED; 755 int rc = COMPACT_SKIPPED;
840 unsigned long nr_pageblocks_skipped;
841 enum compact_mode mode;
842 756
843 /* 757 /*
844 * Check whether it is worth even starting compaction. The order check is 758 * Check whether it is worth even starting compaction. The order check is
@@ -855,22 +769,12 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
855 nodemask) { 769 nodemask) {
856 int status; 770 int status;
857 771
858 mode = sync ? COMPACT_SYNC : COMPACT_ASYNC_MOVABLE; 772 status = compact_zone_order(zone, order, gfp_mask, sync);
859retry:
860 status = compact_zone_order(zone, order, gfp_mask, mode,
861 &nr_pageblocks_skipped);
862 rc = max(status, rc); 773 rc = max(status, rc);
863 774
864 /* If a normal allocation would succeed, stop compacting */ 775 /* If a normal allocation would succeed, stop compacting */
865 if (zone_watermark_ok(zone, order, low_wmark_pages(zone), 0, 0)) 776 if (zone_watermark_ok(zone, order, low_wmark_pages(zone), 0, 0))
866 break; 777 break;
867
868 if (rc == COMPACT_COMPLETE && mode == COMPACT_ASYNC_MOVABLE) {
869 if (nr_pageblocks_skipped) {
870 mode = COMPACT_ASYNC_UNMOVABLE;
871 goto retry;
872 }
873 }
874 } 778 }
875 779
876 return rc; 780 return rc;
@@ -904,7 +808,7 @@ static int __compact_pgdat(pg_data_t *pgdat, struct compact_control *cc)
904 if (ok && cc->order > zone->compact_order_failed) 808 if (ok && cc->order > zone->compact_order_failed)
905 zone->compact_order_failed = cc->order + 1; 809 zone->compact_order_failed = cc->order + 1;
906 /* Currently async compaction is never deferred. */ 810 /* Currently async compaction is never deferred. */
907 else if (!ok && cc->mode == COMPACT_SYNC) 811 else if (!ok && cc->sync)
908 defer_compaction(zone, cc->order); 812 defer_compaction(zone, cc->order);
909 } 813 }
910 814
@@ -919,7 +823,7 @@ int compact_pgdat(pg_data_t *pgdat, int order)
919{ 823{
920 struct compact_control cc = { 824 struct compact_control cc = {
921 .order = order, 825 .order = order,
922 .mode = COMPACT_ASYNC_MOVABLE, 826 .sync = false,
923 }; 827 };
924 828
925 return __compact_pgdat(pgdat, &cc); 829 return __compact_pgdat(pgdat, &cc);
@@ -929,7 +833,7 @@ static int compact_node(int nid)
929{ 833{
930 struct compact_control cc = { 834 struct compact_control cc = {
931 .order = -1, 835 .order = -1,
932 .mode = COMPACT_SYNC, 836 .sync = true,
933 }; 837 };
934 838
935 return __compact_pgdat(NODE_DATA(nid), &cc); 839 return __compact_pgdat(NODE_DATA(nid), &cc);
diff --git a/mm/frontswap.c b/mm/frontswap.c
new file mode 100644
index 000000000000..e25025574a02
--- /dev/null
+++ b/mm/frontswap.c
@@ -0,0 +1,314 @@
1/*
2 * Frontswap frontend
3 *
4 * This code provides the generic "frontend" layer to call a matching
5 * "backend" driver implementation of frontswap. See
6 * Documentation/vm/frontswap.txt for more information.
7 *
8 * Copyright (C) 2009-2012 Oracle Corp. All rights reserved.
9 * Author: Dan Magenheimer
10 *
11 * This work is licensed under the terms of the GNU GPL, version 2.
12 */
13
14#include <linux/mm.h>
15#include <linux/mman.h>
16#include <linux/swap.h>
17#include <linux/swapops.h>
18#include <linux/proc_fs.h>
19#include <linux/security.h>
20#include <linux/capability.h>
21#include <linux/module.h>
22#include <linux/uaccess.h>
23#include <linux/debugfs.h>
24#include <linux/frontswap.h>
25#include <linux/swapfile.h>
26
27/*
28 * frontswap_ops is set by frontswap_register_ops to contain the pointers
29 * to the frontswap "backend" implementation functions.
30 */
31static struct frontswap_ops frontswap_ops __read_mostly;
32
33/*
34 * This global enablement flag reduces overhead on systems where frontswap_ops
35 * has not been registered, so is preferred to the slower alternative: a
36 * function call that checks a non-global.
37 */
38bool frontswap_enabled __read_mostly;
39EXPORT_SYMBOL(frontswap_enabled);
40
41/*
42 * If enabled, frontswap_store will return failure even on success. As
43 * a result, the swap subsystem will always write the page to swap, in
44 * effect converting frontswap into a writethrough cache. In this mode,
45 * there is no direct reduction in swap writes, but a frontswap backend
46 * can unilaterally "reclaim" any pages in use with no data loss, thus
47 * providing increases control over maximum memory usage due to frontswap.
48 */
49static bool frontswap_writethrough_enabled __read_mostly;
50
51#ifdef CONFIG_DEBUG_FS
52/*
53 * Counters available via /sys/kernel/debug/frontswap (if debugfs is
54 * properly configured). These are for information only so are not protected
55 * against increment races.
56 */
57static u64 frontswap_loads;
58static u64 frontswap_succ_stores;
59static u64 frontswap_failed_stores;
60static u64 frontswap_invalidates;
61
62static inline void inc_frontswap_loads(void) {
63 frontswap_loads++;
64}
65static inline void inc_frontswap_succ_stores(void) {
66 frontswap_succ_stores++;
67}
68static inline void inc_frontswap_failed_stores(void) {
69 frontswap_failed_stores++;
70}
71static inline void inc_frontswap_invalidates(void) {
72 frontswap_invalidates++;
73}
74#else
75static inline void inc_frontswap_loads(void) { }
76static inline void inc_frontswap_succ_stores(void) { }
77static inline void inc_frontswap_failed_stores(void) { }
78static inline void inc_frontswap_invalidates(void) { }
79#endif
80/*
81 * Register operations for frontswap, returning previous thus allowing
82 * detection of multiple backends and possible nesting.
83 */
84struct frontswap_ops frontswap_register_ops(struct frontswap_ops *ops)
85{
86 struct frontswap_ops old = frontswap_ops;
87
88 frontswap_ops = *ops;
89 frontswap_enabled = true;
90 return old;
91}
92EXPORT_SYMBOL(frontswap_register_ops);
93
94/*
95 * Enable/disable frontswap writethrough (see above).
96 */
97void frontswap_writethrough(bool enable)
98{
99 frontswap_writethrough_enabled = enable;
100}
101EXPORT_SYMBOL(frontswap_writethrough);
102
103/*
104 * Called when a swap device is swapon'd.
105 */
106void __frontswap_init(unsigned type)
107{
108 struct swap_info_struct *sis = swap_info[type];
109
110 BUG_ON(sis == NULL);
111 if (sis->frontswap_map == NULL)
112 return;
113 if (frontswap_enabled)
114 (*frontswap_ops.init)(type);
115}
116EXPORT_SYMBOL(__frontswap_init);
117
118/*
119 * "Store" data from a page to frontswap and associate it with the page's
120 * swaptype and offset. Page must be locked and in the swap cache.
121 * If frontswap already contains a page with matching swaptype and
122 * offset, the frontswap implmentation may either overwrite the data and
123 * return success or invalidate the page from frontswap and return failure.
124 */
125int __frontswap_store(struct page *page)
126{
127 int ret = -1, dup = 0;
128 swp_entry_t entry = { .val = page_private(page), };
129 int type = swp_type(entry);
130 struct swap_info_struct *sis = swap_info[type];
131 pgoff_t offset = swp_offset(entry);
132
133 BUG_ON(!PageLocked(page));
134 BUG_ON(sis == NULL);
135 if (frontswap_test(sis, offset))
136 dup = 1;
137 ret = (*frontswap_ops.store)(type, offset, page);
138 if (ret == 0) {
139 frontswap_set(sis, offset);
140 inc_frontswap_succ_stores();
141 if (!dup)
142 atomic_inc(&sis->frontswap_pages);
143 } else if (dup) {
144 /*
145 failed dup always results in automatic invalidate of
146 the (older) page from frontswap
147 */
148 frontswap_clear(sis, offset);
149 atomic_dec(&sis->frontswap_pages);
150 inc_frontswap_failed_stores();
151 } else
152 inc_frontswap_failed_stores();
153 if (frontswap_writethrough_enabled)
154 /* report failure so swap also writes to swap device */
155 ret = -1;
156 return ret;
157}
158EXPORT_SYMBOL(__frontswap_store);
159
160/*
161 * "Get" data from frontswap associated with swaptype and offset that were
162 * specified when the data was put to frontswap and use it to fill the
163 * specified page with data. Page must be locked and in the swap cache.
164 */
165int __frontswap_load(struct page *page)
166{
167 int ret = -1;
168 swp_entry_t entry = { .val = page_private(page), };
169 int type = swp_type(entry);
170 struct swap_info_struct *sis = swap_info[type];
171 pgoff_t offset = swp_offset(entry);
172
173 BUG_ON(!PageLocked(page));
174 BUG_ON(sis == NULL);
175 if (frontswap_test(sis, offset))
176 ret = (*frontswap_ops.load)(type, offset, page);
177 if (ret == 0)
178 inc_frontswap_loads();
179 return ret;
180}
181EXPORT_SYMBOL(__frontswap_load);
182
183/*
184 * Invalidate any data from frontswap associated with the specified swaptype
185 * and offset so that a subsequent "get" will fail.
186 */
187void __frontswap_invalidate_page(unsigned type, pgoff_t offset)
188{
189 struct swap_info_struct *sis = swap_info[type];
190
191 BUG_ON(sis == NULL);
192 if (frontswap_test(sis, offset)) {
193 (*frontswap_ops.invalidate_page)(type, offset);
194 atomic_dec(&sis->frontswap_pages);
195 frontswap_clear(sis, offset);
196 inc_frontswap_invalidates();
197 }
198}
199EXPORT_SYMBOL(__frontswap_invalidate_page);
200
201/*
202 * Invalidate all data from frontswap associated with all offsets for the
203 * specified swaptype.
204 */
205void __frontswap_invalidate_area(unsigned type)
206{
207 struct swap_info_struct *sis = swap_info[type];
208
209 BUG_ON(sis == NULL);
210 if (sis->frontswap_map == NULL)
211 return;
212 (*frontswap_ops.invalidate_area)(type);
213 atomic_set(&sis->frontswap_pages, 0);
214 memset(sis->frontswap_map, 0, sis->max / sizeof(long));
215}
216EXPORT_SYMBOL(__frontswap_invalidate_area);
217
218/*
219 * Frontswap, like a true swap device, may unnecessarily retain pages
220 * under certain circumstances; "shrink" frontswap is essentially a
221 * "partial swapoff" and works by calling try_to_unuse to attempt to
222 * unuse enough frontswap pages to attempt to -- subject to memory
223 * constraints -- reduce the number of pages in frontswap to the
224 * number given in the parameter target_pages.
225 */
226void frontswap_shrink(unsigned long target_pages)
227{
228 struct swap_info_struct *si = NULL;
229 int si_frontswap_pages;
230 unsigned long total_pages = 0, total_pages_to_unuse;
231 unsigned long pages = 0, pages_to_unuse = 0;
232 int type;
233 bool locked = false;
234
235 /*
236 * we don't want to hold swap_lock while doing a very
237 * lengthy try_to_unuse, but swap_list may change
238 * so restart scan from swap_list.head each time
239 */
240 spin_lock(&swap_lock);
241 locked = true;
242 total_pages = 0;
243 for (type = swap_list.head; type >= 0; type = si->next) {
244 si = swap_info[type];
245 total_pages += atomic_read(&si->frontswap_pages);
246 }
247 if (total_pages <= target_pages)
248 goto out;
249 total_pages_to_unuse = total_pages - target_pages;
250 for (type = swap_list.head; type >= 0; type = si->next) {
251 si = swap_info[type];
252 si_frontswap_pages = atomic_read(&si->frontswap_pages);
253 if (total_pages_to_unuse < si_frontswap_pages)
254 pages = pages_to_unuse = total_pages_to_unuse;
255 else {
256 pages = si_frontswap_pages;
257 pages_to_unuse = 0; /* unuse all */
258 }
259 /* ensure there is enough RAM to fetch pages from frontswap */
260 if (security_vm_enough_memory_mm(current->mm, pages))
261 continue;
262 vm_unacct_memory(pages);
263 break;
264 }
265 if (type < 0)
266 goto out;
267 locked = false;
268 spin_unlock(&swap_lock);
269 try_to_unuse(type, true, pages_to_unuse);
270out:
271 if (locked)
272 spin_unlock(&swap_lock);
273 return;
274}
275EXPORT_SYMBOL(frontswap_shrink);
276
277/*
278 * Count and return the number of frontswap pages across all
279 * swap devices. This is exported so that backend drivers can
280 * determine current usage without reading debugfs.
281 */
282unsigned long frontswap_curr_pages(void)
283{
284 int type;
285 unsigned long totalpages = 0;
286 struct swap_info_struct *si = NULL;
287
288 spin_lock(&swap_lock);
289 for (type = swap_list.head; type >= 0; type = si->next) {
290 si = swap_info[type];
291 totalpages += atomic_read(&si->frontswap_pages);
292 }
293 spin_unlock(&swap_lock);
294 return totalpages;
295}
296EXPORT_SYMBOL(frontswap_curr_pages);
297
298static int __init init_frontswap(void)
299{
300#ifdef CONFIG_DEBUG_FS
301 struct dentry *root = debugfs_create_dir("frontswap", NULL);
302 if (root == NULL)
303 return -ENXIO;
304 debugfs_create_u64("loads", S_IRUGO, root, &frontswap_loads);
305 debugfs_create_u64("succ_stores", S_IRUGO, root, &frontswap_succ_stores);
306 debugfs_create_u64("failed_stores", S_IRUGO, root,
307 &frontswap_failed_stores);
308 debugfs_create_u64("invalidates", S_IRUGO,
309 root, &frontswap_invalidates);
310#endif
311 return 0;
312}
313
314module_init(init_frontswap);
diff --git a/mm/internal.h b/mm/internal.h
index 5cbb78190041..2ba87fbfb75b 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -94,9 +94,6 @@ extern void putback_lru_page(struct page *page);
94/* 94/*
95 * in mm/page_alloc.c 95 * in mm/page_alloc.c
96 */ 96 */
97extern void set_pageblock_migratetype(struct page *page, int migratetype);
98extern int move_freepages_block(struct zone *zone, struct page *page,
99 int migratetype);
100extern void __free_pages_bootmem(struct page *page, unsigned int order); 97extern void __free_pages_bootmem(struct page *page, unsigned int order);
101extern void prep_compound_page(struct page *page, unsigned long order); 98extern void prep_compound_page(struct page *page, unsigned long order);
102#ifdef CONFIG_MEMORY_FAILURE 99#ifdef CONFIG_MEMORY_FAILURE
@@ -104,7 +101,6 @@ extern bool is_free_buddy_page(struct page *page);
104#endif 101#endif
105 102
106#if defined CONFIG_COMPACTION || defined CONFIG_CMA 103#if defined CONFIG_COMPACTION || defined CONFIG_CMA
107#include <linux/compaction.h>
108 104
109/* 105/*
110 * in mm/compaction.c 106 * in mm/compaction.c
@@ -123,14 +119,11 @@ struct compact_control {
123 unsigned long nr_migratepages; /* Number of pages to migrate */ 119 unsigned long nr_migratepages; /* Number of pages to migrate */
124 unsigned long free_pfn; /* isolate_freepages search base */ 120 unsigned long free_pfn; /* isolate_freepages search base */
125 unsigned long migrate_pfn; /* isolate_migratepages search base */ 121 unsigned long migrate_pfn; /* isolate_migratepages search base */
126 enum compact_mode mode; /* Compaction mode */ 122 bool sync; /* Synchronous migration */
127 123
128 int order; /* order a direct compactor needs */ 124 int order; /* order a direct compactor needs */
129 int migratetype; /* MOVABLE, RECLAIMABLE etc */ 125 int migratetype; /* MOVABLE, RECLAIMABLE etc */
130 struct zone *zone; 126 struct zone *zone;
131
132 /* Number of UNMOVABLE destination pageblocks skipped during scan */
133 unsigned long nr_pageblocks_skipped;
134}; 127};
135 128
136unsigned long 129unsigned long
diff --git a/mm/migrate.c b/mm/migrate.c
index ab81d482ae6f..be26d5cbe56b 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -436,7 +436,10 @@ void migrate_page_copy(struct page *newpage, struct page *page)
436 * is actually a signal that all of the page has become dirty. 436 * is actually a signal that all of the page has become dirty.
437 * Whereas only part of our page may be dirty. 437 * Whereas only part of our page may be dirty.
438 */ 438 */
439 __set_page_dirty_nobuffers(newpage); 439 if (PageSwapBacked(page))
440 SetPageDirty(newpage);
441 else
442 __set_page_dirty_nobuffers(newpage);
440 } 443 }
441 444
442 mlock_migrate_page(newpage, page); 445 mlock_migrate_page(newpage, page);
diff --git a/mm/nommu.c b/mm/nommu.c
index c4acfbc09972..d4b0c10872de 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1486,7 +1486,7 @@ SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len,
1486 1486
1487 flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); 1487 flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
1488 1488
1489 ret = vm_mmap_pgoff(file, addr, len, prot, flags, pgoff); 1489 retval = vm_mmap_pgoff(file, addr, len, prot, flags, pgoff);
1490 1490
1491 if (file) 1491 if (file)
1492 fput(file); 1492 fput(file);
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index ed0e19677360..416637f0e924 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -183,7 +183,7 @@ static bool oom_unkillable_task(struct task_struct *p,
183unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg, 183unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg,
184 const nodemask_t *nodemask, unsigned long totalpages) 184 const nodemask_t *nodemask, unsigned long totalpages)
185{ 185{
186 unsigned long points; 186 long points;
187 187
188 if (oom_unkillable_task(p, memcg, nodemask)) 188 if (oom_unkillable_task(p, memcg, nodemask))
189 return 0; 189 return 0;
@@ -223,7 +223,7 @@ unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg,
223 * Never return 0 for an eligible task regardless of the root bonus and 223 * Never return 0 for an eligible task regardless of the root bonus and
224 * oom_score_adj (oom_score_adj can't be OOM_SCORE_ADJ_MIN here). 224 * oom_score_adj (oom_score_adj can't be OOM_SCORE_ADJ_MIN here).
225 */ 225 */
226 return points ? points : 1; 226 return points > 0 ? points : 1;
227} 227}
228 228
229/* 229/*
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 6092f331b32e..44030096da63 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -219,7 +219,7 @@ EXPORT_SYMBOL(nr_online_nodes);
219 219
220int page_group_by_mobility_disabled __read_mostly; 220int page_group_by_mobility_disabled __read_mostly;
221 221
222void set_pageblock_migratetype(struct page *page, int migratetype) 222static void set_pageblock_migratetype(struct page *page, int migratetype)
223{ 223{
224 224
225 if (unlikely(page_group_by_mobility_disabled)) 225 if (unlikely(page_group_by_mobility_disabled))
@@ -954,8 +954,8 @@ static int move_freepages(struct zone *zone,
954 return pages_moved; 954 return pages_moved;
955} 955}
956 956
957int move_freepages_block(struct zone *zone, struct page *page, 957static int move_freepages_block(struct zone *zone, struct page *page,
958 int migratetype) 958 int migratetype)
959{ 959{
960 unsigned long start_pfn, end_pfn; 960 unsigned long start_pfn, end_pfn;
961 struct page *start_page, *end_page; 961 struct page *start_page, *end_page;
@@ -5651,7 +5651,7 @@ static int __alloc_contig_migrate_range(unsigned long start, unsigned long end)
5651 .nr_migratepages = 0, 5651 .nr_migratepages = 0,
5652 .order = -1, 5652 .order = -1,
5653 .zone = page_zone(pfn_to_page(start)), 5653 .zone = page_zone(pfn_to_page(start)),
5654 .mode = COMPACT_SYNC, 5654 .sync = true,
5655 }; 5655 };
5656 INIT_LIST_HEAD(&cc.migratepages); 5656 INIT_LIST_HEAD(&cc.migratepages);
5657 5657
diff --git a/mm/page_io.c b/mm/page_io.c
index dc76b4d0611e..34f02923744c 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -18,6 +18,7 @@
18#include <linux/bio.h> 18#include <linux/bio.h>
19#include <linux/swapops.h> 19#include <linux/swapops.h>
20#include <linux/writeback.h> 20#include <linux/writeback.h>
21#include <linux/frontswap.h>
21#include <asm/pgtable.h> 22#include <asm/pgtable.h>
22 23
23static struct bio *get_swap_bio(gfp_t gfp_flags, 24static struct bio *get_swap_bio(gfp_t gfp_flags,
@@ -98,6 +99,12 @@ int swap_writepage(struct page *page, struct writeback_control *wbc)
98 unlock_page(page); 99 unlock_page(page);
99 goto out; 100 goto out;
100 } 101 }
102 if (frontswap_store(page) == 0) {
103 set_page_writeback(page);
104 unlock_page(page);
105 end_page_writeback(page);
106 goto out;
107 }
101 bio = get_swap_bio(GFP_NOIO, page, end_swap_bio_write); 108 bio = get_swap_bio(GFP_NOIO, page, end_swap_bio_write);
102 if (bio == NULL) { 109 if (bio == NULL) {
103 set_page_dirty(page); 110 set_page_dirty(page);
@@ -122,6 +129,11 @@ int swap_readpage(struct page *page)
122 129
123 VM_BUG_ON(!PageLocked(page)); 130 VM_BUG_ON(!PageLocked(page));
124 VM_BUG_ON(PageUptodate(page)); 131 VM_BUG_ON(PageUptodate(page));
132 if (frontswap_load(page) == 0) {
133 SetPageUptodate(page);
134 unlock_page(page);
135 goto out;
136 }
125 bio = get_swap_bio(GFP_KERNEL, page, end_swap_bio_read); 137 bio = get_swap_bio(GFP_KERNEL, page, end_swap_bio_read);
126 if (bio == NULL) { 138 if (bio == NULL) {
127 unlock_page(page); 139 unlock_page(page);
diff --git a/mm/shmem.c b/mm/shmem.c
index 585bd220a21e..a15a466d0d1d 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -683,10 +683,21 @@ static int shmem_unuse_inode(struct shmem_inode_info *info,
683 mutex_lock(&shmem_swaplist_mutex); 683 mutex_lock(&shmem_swaplist_mutex);
684 /* 684 /*
685 * We needed to drop mutex to make that restrictive page 685 * We needed to drop mutex to make that restrictive page
686 * allocation; but the inode might already be freed by now, 686 * allocation, but the inode might have been freed while we
687 * and we cannot refer to inode or mapping or info to check. 687 * dropped it: although a racing shmem_evict_inode() cannot
688 * However, we do hold page lock on the PageSwapCache page, 688 * complete without emptying the radix_tree, our page lock
689 * so can check if that still has our reference remaining. 689 * on this swapcache page is not enough to prevent that -
690 * free_swap_and_cache() of our swap entry will only
691 * trylock_page(), removing swap from radix_tree whatever.
692 *
693 * We must not proceed to shmem_add_to_page_cache() if the
694 * inode has been freed, but of course we cannot rely on
695 * inode or mapping or info to check that. However, we can
696 * safely check if our swap entry is still in use (and here
697 * it can't have got reused for another page): if it's still
698 * in use, then the inode cannot have been freed yet, and we
699 * can safely proceed (if it's no longer in use, that tells
700 * nothing about the inode, but we don't need to unuse swap).
690 */ 701 */
691 if (!page_swapcount(*pagep)) 702 if (!page_swapcount(*pagep))
692 error = -ENOENT; 703 error = -ENOENT;
@@ -730,9 +741,9 @@ int shmem_unuse(swp_entry_t swap, struct page *page)
730 741
731 /* 742 /*
732 * There's a faint possibility that swap page was replaced before 743 * There's a faint possibility that swap page was replaced before
733 * caller locked it: it will come back later with the right page. 744 * caller locked it: caller will come back later with the right page.
734 */ 745 */
735 if (unlikely(!PageSwapCache(page))) 746 if (unlikely(!PageSwapCache(page) || page_private(page) != swap.val))
736 goto out; 747 goto out;
737 748
738 /* 749 /*
@@ -995,21 +1006,15 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp,
995 newpage = shmem_alloc_page(gfp, info, index); 1006 newpage = shmem_alloc_page(gfp, info, index);
996 if (!newpage) 1007 if (!newpage)
997 return -ENOMEM; 1008 return -ENOMEM;
998 VM_BUG_ON(shmem_should_replace_page(newpage, gfp));
999 1009
1000 *pagep = newpage;
1001 page_cache_get(newpage); 1010 page_cache_get(newpage);
1002 copy_highpage(newpage, oldpage); 1011 copy_highpage(newpage, oldpage);
1012 flush_dcache_page(newpage);
1003 1013
1004 VM_BUG_ON(!PageLocked(oldpage));
1005 __set_page_locked(newpage); 1014 __set_page_locked(newpage);
1006 VM_BUG_ON(!PageUptodate(oldpage));
1007 SetPageUptodate(newpage); 1015 SetPageUptodate(newpage);
1008 VM_BUG_ON(!PageSwapBacked(oldpage));
1009 SetPageSwapBacked(newpage); 1016 SetPageSwapBacked(newpage);
1010 VM_BUG_ON(!swap_index);
1011 set_page_private(newpage, swap_index); 1017 set_page_private(newpage, swap_index);
1012 VM_BUG_ON(!PageSwapCache(oldpage));
1013 SetPageSwapCache(newpage); 1018 SetPageSwapCache(newpage);
1014 1019
1015 /* 1020 /*
@@ -1019,13 +1024,24 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp,
1019 spin_lock_irq(&swap_mapping->tree_lock); 1024 spin_lock_irq(&swap_mapping->tree_lock);
1020 error = shmem_radix_tree_replace(swap_mapping, swap_index, oldpage, 1025 error = shmem_radix_tree_replace(swap_mapping, swap_index, oldpage,
1021 newpage); 1026 newpage);
1022 __inc_zone_page_state(newpage, NR_FILE_PAGES); 1027 if (!error) {
1023 __dec_zone_page_state(oldpage, NR_FILE_PAGES); 1028 __inc_zone_page_state(newpage, NR_FILE_PAGES);
1029 __dec_zone_page_state(oldpage, NR_FILE_PAGES);
1030 }
1024 spin_unlock_irq(&swap_mapping->tree_lock); 1031 spin_unlock_irq(&swap_mapping->tree_lock);
1025 BUG_ON(error);
1026 1032
1027 mem_cgroup_replace_page_cache(oldpage, newpage); 1033 if (unlikely(error)) {
1028 lru_cache_add_anon(newpage); 1034 /*
1035 * Is this possible? I think not, now that our callers check
1036 * both PageSwapCache and page_private after getting page lock;
1037 * but be defensive. Reverse old to newpage for clear and free.
1038 */
1039 oldpage = newpage;
1040 } else {
1041 mem_cgroup_replace_page_cache(oldpage, newpage);
1042 lru_cache_add_anon(newpage);
1043 *pagep = newpage;
1044 }
1029 1045
1030 ClearPageSwapCache(oldpage); 1046 ClearPageSwapCache(oldpage);
1031 set_page_private(oldpage, 0); 1047 set_page_private(oldpage, 0);
@@ -1033,7 +1049,7 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp,
1033 unlock_page(oldpage); 1049 unlock_page(oldpage);
1034 page_cache_release(oldpage); 1050 page_cache_release(oldpage);
1035 page_cache_release(oldpage); 1051 page_cache_release(oldpage);
1036 return 0; 1052 return error;
1037} 1053}
1038 1054
1039/* 1055/*
@@ -1107,7 +1123,8 @@ repeat:
1107 1123
1108 /* We have to do this with page locked to prevent races */ 1124 /* We have to do this with page locked to prevent races */
1109 lock_page(page); 1125 lock_page(page);
1110 if (!PageSwapCache(page) || page->mapping) { 1126 if (!PageSwapCache(page) || page_private(page) != swap.val ||
1127 page->mapping) {
1111 error = -EEXIST; /* try again */ 1128 error = -EEXIST; /* try again */
1112 goto failed; 1129 goto failed;
1113 } 1130 }
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 457b10baef59..de5bc51c4a66 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -31,6 +31,8 @@
31#include <linux/memcontrol.h> 31#include <linux/memcontrol.h>
32#include <linux/poll.h> 32#include <linux/poll.h>
33#include <linux/oom.h> 33#include <linux/oom.h>
34#include <linux/frontswap.h>
35#include <linux/swapfile.h>
34 36
35#include <asm/pgtable.h> 37#include <asm/pgtable.h>
36#include <asm/tlbflush.h> 38#include <asm/tlbflush.h>
@@ -42,7 +44,7 @@ static bool swap_count_continued(struct swap_info_struct *, pgoff_t,
42static void free_swap_count_continuations(struct swap_info_struct *); 44static void free_swap_count_continuations(struct swap_info_struct *);
43static sector_t map_swap_entry(swp_entry_t, struct block_device**); 45static sector_t map_swap_entry(swp_entry_t, struct block_device**);
44 46
45static DEFINE_SPINLOCK(swap_lock); 47DEFINE_SPINLOCK(swap_lock);
46static unsigned int nr_swapfiles; 48static unsigned int nr_swapfiles;
47long nr_swap_pages; 49long nr_swap_pages;
48long total_swap_pages; 50long total_swap_pages;
@@ -53,9 +55,9 @@ static const char Unused_file[] = "Unused swap file entry ";
53static const char Bad_offset[] = "Bad swap offset entry "; 55static const char Bad_offset[] = "Bad swap offset entry ";
54static const char Unused_offset[] = "Unused swap offset entry "; 56static const char Unused_offset[] = "Unused swap offset entry ";
55 57
56static struct swap_list_t swap_list = {-1, -1}; 58struct swap_list_t swap_list = {-1, -1};
57 59
58static struct swap_info_struct *swap_info[MAX_SWAPFILES]; 60struct swap_info_struct *swap_info[MAX_SWAPFILES];
59 61
60static DEFINE_MUTEX(swapon_mutex); 62static DEFINE_MUTEX(swapon_mutex);
61 63
@@ -556,6 +558,7 @@ static unsigned char swap_entry_free(struct swap_info_struct *p,
556 swap_list.next = p->type; 558 swap_list.next = p->type;
557 nr_swap_pages++; 559 nr_swap_pages++;
558 p->inuse_pages--; 560 p->inuse_pages--;
561 frontswap_invalidate_page(p->type, offset);
559 if ((p->flags & SWP_BLKDEV) && 562 if ((p->flags & SWP_BLKDEV) &&
560 disk->fops->swap_slot_free_notify) 563 disk->fops->swap_slot_free_notify)
561 disk->fops->swap_slot_free_notify(p->bdev, offset); 564 disk->fops->swap_slot_free_notify(p->bdev, offset);
@@ -985,11 +988,12 @@ static int unuse_mm(struct mm_struct *mm,
985} 988}
986 989
987/* 990/*
988 * Scan swap_map from current position to next entry still in use. 991 * Scan swap_map (or frontswap_map if frontswap parameter is true)
992 * from current position to next entry still in use.
989 * Recycle to start on reaching the end, returning 0 when empty. 993 * Recycle to start on reaching the end, returning 0 when empty.
990 */ 994 */
991static unsigned int find_next_to_unuse(struct swap_info_struct *si, 995static unsigned int find_next_to_unuse(struct swap_info_struct *si,
992 unsigned int prev) 996 unsigned int prev, bool frontswap)
993{ 997{
994 unsigned int max = si->max; 998 unsigned int max = si->max;
995 unsigned int i = prev; 999 unsigned int i = prev;
@@ -1015,6 +1019,12 @@ static unsigned int find_next_to_unuse(struct swap_info_struct *si,
1015 prev = 0; 1019 prev = 0;
1016 i = 1; 1020 i = 1;
1017 } 1021 }
1022 if (frontswap) {
1023 if (frontswap_test(si, i))
1024 break;
1025 else
1026 continue;
1027 }
1018 count = si->swap_map[i]; 1028 count = si->swap_map[i];
1019 if (count && swap_count(count) != SWAP_MAP_BAD) 1029 if (count && swap_count(count) != SWAP_MAP_BAD)
1020 break; 1030 break;
@@ -1026,8 +1036,12 @@ static unsigned int find_next_to_unuse(struct swap_info_struct *si,
1026 * We completely avoid races by reading each swap page in advance, 1036 * We completely avoid races by reading each swap page in advance,
1027 * and then search for the process using it. All the necessary 1037 * and then search for the process using it. All the necessary
1028 * page table adjustments can then be made atomically. 1038 * page table adjustments can then be made atomically.
1039 *
1040 * if the boolean frontswap is true, only unuse pages_to_unuse pages;
1041 * pages_to_unuse==0 means all pages; ignored if frontswap is false
1029 */ 1042 */
1030static int try_to_unuse(unsigned int type) 1043int try_to_unuse(unsigned int type, bool frontswap,
1044 unsigned long pages_to_unuse)
1031{ 1045{
1032 struct swap_info_struct *si = swap_info[type]; 1046 struct swap_info_struct *si = swap_info[type];
1033 struct mm_struct *start_mm; 1047 struct mm_struct *start_mm;
@@ -1060,7 +1074,7 @@ static int try_to_unuse(unsigned int type)
1060 * one pass through swap_map is enough, but not necessarily: 1074 * one pass through swap_map is enough, but not necessarily:
1061 * there are races when an instance of an entry might be missed. 1075 * there are races when an instance of an entry might be missed.
1062 */ 1076 */
1063 while ((i = find_next_to_unuse(si, i)) != 0) { 1077 while ((i = find_next_to_unuse(si, i, frontswap)) != 0) {
1064 if (signal_pending(current)) { 1078 if (signal_pending(current)) {
1065 retval = -EINTR; 1079 retval = -EINTR;
1066 break; 1080 break;
@@ -1227,6 +1241,10 @@ static int try_to_unuse(unsigned int type)
1227 * interactive performance. 1241 * interactive performance.
1228 */ 1242 */
1229 cond_resched(); 1243 cond_resched();
1244 if (frontswap && pages_to_unuse > 0) {
1245 if (!--pages_to_unuse)
1246 break;
1247 }
1230 } 1248 }
1231 1249
1232 mmput(start_mm); 1250 mmput(start_mm);
@@ -1486,7 +1504,8 @@ bad_bmap:
1486} 1504}
1487 1505
1488static void enable_swap_info(struct swap_info_struct *p, int prio, 1506static void enable_swap_info(struct swap_info_struct *p, int prio,
1489 unsigned char *swap_map) 1507 unsigned char *swap_map,
1508 unsigned long *frontswap_map)
1490{ 1509{
1491 int i, prev; 1510 int i, prev;
1492 1511
@@ -1496,6 +1515,7 @@ static void enable_swap_info(struct swap_info_struct *p, int prio,
1496 else 1515 else
1497 p->prio = --least_priority; 1516 p->prio = --least_priority;
1498 p->swap_map = swap_map; 1517 p->swap_map = swap_map;
1518 frontswap_map_set(p, frontswap_map);
1499 p->flags |= SWP_WRITEOK; 1519 p->flags |= SWP_WRITEOK;
1500 nr_swap_pages += p->pages; 1520 nr_swap_pages += p->pages;
1501 total_swap_pages += p->pages; 1521 total_swap_pages += p->pages;
@@ -1512,6 +1532,7 @@ static void enable_swap_info(struct swap_info_struct *p, int prio,
1512 swap_list.head = swap_list.next = p->type; 1532 swap_list.head = swap_list.next = p->type;
1513 else 1533 else
1514 swap_info[prev]->next = p->type; 1534 swap_info[prev]->next = p->type;
1535 frontswap_init(p->type);
1515 spin_unlock(&swap_lock); 1536 spin_unlock(&swap_lock);
1516} 1537}
1517 1538
@@ -1585,7 +1606,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
1585 spin_unlock(&swap_lock); 1606 spin_unlock(&swap_lock);
1586 1607
1587 oom_score_adj = test_set_oom_score_adj(OOM_SCORE_ADJ_MAX); 1608 oom_score_adj = test_set_oom_score_adj(OOM_SCORE_ADJ_MAX);
1588 err = try_to_unuse(type); 1609 err = try_to_unuse(type, false, 0); /* force all pages to be unused */
1589 compare_swap_oom_score_adj(OOM_SCORE_ADJ_MAX, oom_score_adj); 1610 compare_swap_oom_score_adj(OOM_SCORE_ADJ_MAX, oom_score_adj);
1590 1611
1591 if (err) { 1612 if (err) {
@@ -1596,7 +1617,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
1596 * sys_swapoff for this swap_info_struct at this point. 1617 * sys_swapoff for this swap_info_struct at this point.
1597 */ 1618 */
1598 /* re-insert swap space back into swap_list */ 1619 /* re-insert swap space back into swap_list */
1599 enable_swap_info(p, p->prio, p->swap_map); 1620 enable_swap_info(p, p->prio, p->swap_map, frontswap_map_get(p));
1600 goto out_dput; 1621 goto out_dput;
1601 } 1622 }
1602 1623
@@ -1622,9 +1643,11 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
1622 swap_map = p->swap_map; 1643 swap_map = p->swap_map;
1623 p->swap_map = NULL; 1644 p->swap_map = NULL;
1624 p->flags = 0; 1645 p->flags = 0;
1646 frontswap_invalidate_area(type);
1625 spin_unlock(&swap_lock); 1647 spin_unlock(&swap_lock);
1626 mutex_unlock(&swapon_mutex); 1648 mutex_unlock(&swapon_mutex);
1627 vfree(swap_map); 1649 vfree(swap_map);
1650 vfree(frontswap_map_get(p));
1628 /* Destroy swap account informatin */ 1651 /* Destroy swap account informatin */
1629 swap_cgroup_swapoff(type); 1652 swap_cgroup_swapoff(type);
1630 1653
@@ -1988,6 +2011,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
1988 sector_t span; 2011 sector_t span;
1989 unsigned long maxpages; 2012 unsigned long maxpages;
1990 unsigned char *swap_map = NULL; 2013 unsigned char *swap_map = NULL;
2014 unsigned long *frontswap_map = NULL;
1991 struct page *page = NULL; 2015 struct page *page = NULL;
1992 struct inode *inode = NULL; 2016 struct inode *inode = NULL;
1993 2017
@@ -2071,6 +2095,9 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
2071 error = nr_extents; 2095 error = nr_extents;
2072 goto bad_swap; 2096 goto bad_swap;
2073 } 2097 }
2098 /* frontswap enabled? set up bit-per-page map for frontswap */
2099 if (frontswap_enabled)
2100 frontswap_map = vzalloc(maxpages / sizeof(long));
2074 2101
2075 if (p->bdev) { 2102 if (p->bdev) {
2076 if (blk_queue_nonrot(bdev_get_queue(p->bdev))) { 2103 if (blk_queue_nonrot(bdev_get_queue(p->bdev))) {
@@ -2086,14 +2113,15 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
2086 if (swap_flags & SWAP_FLAG_PREFER) 2113 if (swap_flags & SWAP_FLAG_PREFER)
2087 prio = 2114 prio =
2088 (swap_flags & SWAP_FLAG_PRIO_MASK) >> SWAP_FLAG_PRIO_SHIFT; 2115 (swap_flags & SWAP_FLAG_PRIO_MASK) >> SWAP_FLAG_PRIO_SHIFT;
2089 enable_swap_info(p, prio, swap_map); 2116 enable_swap_info(p, prio, swap_map, frontswap_map);
2090 2117
2091 printk(KERN_INFO "Adding %uk swap on %s. " 2118 printk(KERN_INFO "Adding %uk swap on %s. "
2092 "Priority:%d extents:%d across:%lluk %s%s\n", 2119 "Priority:%d extents:%d across:%lluk %s%s%s\n",
2093 p->pages<<(PAGE_SHIFT-10), name, p->prio, 2120 p->pages<<(PAGE_SHIFT-10), name, p->prio,
2094 nr_extents, (unsigned long long)span<<(PAGE_SHIFT-10), 2121 nr_extents, (unsigned long long)span<<(PAGE_SHIFT-10),
2095 (p->flags & SWP_SOLIDSTATE) ? "SS" : "", 2122 (p->flags & SWP_SOLIDSTATE) ? "SS" : "",
2096 (p->flags & SWP_DISCARDABLE) ? "D" : ""); 2123 (p->flags & SWP_DISCARDABLE) ? "D" : "",
2124 (frontswap_map) ? "FS" : "");
2097 2125
2098 mutex_unlock(&swapon_mutex); 2126 mutex_unlock(&swapon_mutex);
2099 atomic_inc(&proc_poll_event); 2127 atomic_inc(&proc_poll_event);
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST
index 5476bc0a1eac..b4b572e8c100 100644
--- a/tools/perf/MANIFEST
+++ b/tools/perf/MANIFEST
@@ -1,4 +1,6 @@
1tools/perf 1tools/perf
2tools/scripts
3tools/lib/traceevent
2include/linux/const.h 4include/linux/const.h
3include/linux/perf_event.h 5include/linux/perf_event.h
4include/linux/rbtree.h 6include/linux/rbtree.h
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 8c767c6bca91..25249f76329d 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -152,7 +152,7 @@ static int perf_evsel__add_hist_entry(struct perf_evsel *evsel,
152 152
153 if (symbol_conf.use_callchain) { 153 if (symbol_conf.use_callchain) {
154 err = callchain_append(he->callchain, 154 err = callchain_append(he->callchain,
155 &evsel->hists.callchain_cursor, 155 &callchain_cursor,
156 sample->period); 156 sample->period);
157 if (err) 157 if (err)
158 return err; 158 return err;
@@ -162,7 +162,7 @@ static int perf_evsel__add_hist_entry(struct perf_evsel *evsel,
162 * so we don't allocated the extra space needed because the stdio 162 * so we don't allocated the extra space needed because the stdio
163 * code will not use it. 163 * code will not use it.
164 */ 164 */
165 if (al->sym != NULL && use_browser > 0) { 165 if (he->ms.sym != NULL && use_browser > 0) {
166 struct annotation *notes = symbol__annotation(he->ms.sym); 166 struct annotation *notes = symbol__annotation(he->ms.sym);
167 167
168 assert(evsel != NULL); 168 assert(evsel != NULL);
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 62ae30d34fa6..262589991ea4 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -1129,7 +1129,7 @@ static int add_default_attributes(void)
1129 return 0; 1129 return 0;
1130 1130
1131 if (!evsel_list->nr_entries) { 1131 if (!evsel_list->nr_entries) {
1132 if (perf_evlist__add_attrs_array(evsel_list, default_attrs) < 0) 1132 if (perf_evlist__add_default_attrs(evsel_list, default_attrs) < 0)
1133 return -1; 1133 return -1;
1134 } 1134 }
1135 1135
@@ -1139,21 +1139,21 @@ static int add_default_attributes(void)
1139 return 0; 1139 return 0;
1140 1140
1141 /* Append detailed run extra attributes: */ 1141 /* Append detailed run extra attributes: */
1142 if (perf_evlist__add_attrs_array(evsel_list, detailed_attrs) < 0) 1142 if (perf_evlist__add_default_attrs(evsel_list, detailed_attrs) < 0)
1143 return -1; 1143 return -1;
1144 1144
1145 if (detailed_run < 2) 1145 if (detailed_run < 2)
1146 return 0; 1146 return 0;
1147 1147
1148 /* Append very detailed run extra attributes: */ 1148 /* Append very detailed run extra attributes: */
1149 if (perf_evlist__add_attrs_array(evsel_list, very_detailed_attrs) < 0) 1149 if (perf_evlist__add_default_attrs(evsel_list, very_detailed_attrs) < 0)
1150 return -1; 1150 return -1;
1151 1151
1152 if (detailed_run < 3) 1152 if (detailed_run < 3)
1153 return 0; 1153 return 0;
1154 1154
1155 /* Append very, very detailed run extra attributes: */ 1155 /* Append very, very detailed run extra attributes: */
1156 return perf_evlist__add_attrs_array(evsel_list, very_very_detailed_attrs); 1156 return perf_evlist__add_default_attrs(evsel_list, very_very_detailed_attrs);
1157} 1157}
1158 1158
1159int cmd_stat(int argc, const char **argv, const char *prefix __used) 1159int cmd_stat(int argc, const char **argv, const char *prefix __used)
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 871b540293e1..6bb0277b7dfe 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -787,7 +787,7 @@ static void perf_event__process_sample(struct perf_tool *tool,
787 } 787 }
788 788
789 if (symbol_conf.use_callchain) { 789 if (symbol_conf.use_callchain) {
790 err = callchain_append(he->callchain, &evsel->hists.callchain_cursor, 790 err = callchain_append(he->callchain, &callchain_cursor,
791 sample->period); 791 sample->period);
792 if (err) 792 if (err)
793 return; 793 return;
diff --git a/tools/perf/design.txt b/tools/perf/design.txt
index bd0bb1b1279b..67e5d0cace85 100644
--- a/tools/perf/design.txt
+++ b/tools/perf/design.txt
@@ -409,14 +409,15 @@ Counters can be enabled and disabled in two ways: via ioctl and via
409prctl. When a counter is disabled, it doesn't count or generate 409prctl. When a counter is disabled, it doesn't count or generate
410events but does continue to exist and maintain its count value. 410events but does continue to exist and maintain its count value.
411 411
412An individual counter or counter group can be enabled with 412An individual counter can be enabled with
413 413
414 ioctl(fd, PERF_EVENT_IOC_ENABLE); 414 ioctl(fd, PERF_EVENT_IOC_ENABLE, 0);
415 415
416or disabled with 416or disabled with
417 417
418 ioctl(fd, PERF_EVENT_IOC_DISABLE); 418 ioctl(fd, PERF_EVENT_IOC_DISABLE, 0);
419 419
420For a counter group, pass PERF_IOC_FLAG_GROUP as the third argument.
420Enabling or disabling the leader of a group enables or disables the 421Enabling or disabling the leader of a group enables or disables the
421whole group; that is, while the group leader is disabled, none of the 422whole group; that is, while the group leader is disabled, none of the
422counters in the group will count. Enabling or disabling a member of a 423counters in the group will count. Enabling or disabling a member of a
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index 4deea6aaf927..34b1c46eaf42 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -668,7 +668,7 @@ static int annotate_browser__run(struct annotate_browser *browser, int evidx,
668 "q/ESC/CTRL+C Exit\n\n" 668 "q/ESC/CTRL+C Exit\n\n"
669 "-> Go to target\n" 669 "-> Go to target\n"
670 "<- Exit\n" 670 "<- Exit\n"
671 "h Cycle thru hottest instructions\n" 671 "H Cycle thru hottest instructions\n"
672 "j Toggle showing jump to target arrows\n" 672 "j Toggle showing jump to target arrows\n"
673 "J Toggle showing number of jump sources on targets\n" 673 "J Toggle showing number of jump sources on targets\n"
674 "n Search next string\n" 674 "n Search next string\n"
diff --git a/tools/perf/util/PERF-VERSION-GEN b/tools/perf/util/PERF-VERSION-GEN
index ad73300f7bac..95264f304179 100755
--- a/tools/perf/util/PERF-VERSION-GEN
+++ b/tools/perf/util/PERF-VERSION-GEN
@@ -12,7 +12,7 @@ LF='
12# First check if there is a .git to get the version from git describe 12# First check if there is a .git to get the version from git describe
13# otherwise try to get the version from the kernel makefile 13# otherwise try to get the version from the kernel makefile
14if test -d ../../.git -o -f ../../.git && 14if test -d ../../.git -o -f ../../.git &&
15 VN=$(git describe --abbrev=4 HEAD 2>/dev/null) && 15 VN=$(git describe --match 'v[0-9].[0-9]*' --abbrev=4 HEAD 2>/dev/null) &&
16 case "$VN" in 16 case "$VN" in
17 *$LF*) (exit 1) ;; 17 *$LF*) (exit 1) ;;
18 v[0-9]*) 18 v[0-9]*)
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 9f7106a8d9a4..3a6bff47614f 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -18,6 +18,8 @@
18#include "util.h" 18#include "util.h"
19#include "callchain.h" 19#include "callchain.h"
20 20
21__thread struct callchain_cursor callchain_cursor;
22
21bool ip_callchain__valid(struct ip_callchain *chain, 23bool ip_callchain__valid(struct ip_callchain *chain,
22 const union perf_event *event) 24 const union perf_event *event)
23{ 25{
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index 7f9c0f1ae3a9..3bdb407f9cd9 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -76,6 +76,8 @@ struct callchain_cursor {
76 struct callchain_cursor_node *curr; 76 struct callchain_cursor_node *curr;
77}; 77};
78 78
79extern __thread struct callchain_cursor callchain_cursor;
80
79static inline void callchain_init(struct callchain_root *root) 81static inline void callchain_init(struct callchain_root *root)
80{ 82{
81 INIT_LIST_HEAD(&root->node.siblings); 83 INIT_LIST_HEAD(&root->node.siblings);
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 4ac5f5ae4ce9..7400fb3fc50c 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -159,6 +159,17 @@ out_delete_partial_list:
159 return -1; 159 return -1;
160} 160}
161 161
162int __perf_evlist__add_default_attrs(struct perf_evlist *evlist,
163 struct perf_event_attr *attrs, size_t nr_attrs)
164{
165 size_t i;
166
167 for (i = 0; i < nr_attrs; i++)
168 event_attr_init(attrs + i);
169
170 return perf_evlist__add_attrs(evlist, attrs, nr_attrs);
171}
172
162static int trace_event__id(const char *evname) 173static int trace_event__id(const char *evname)
163{ 174{
164 char *filename, *colon; 175 char *filename, *colon;
@@ -263,7 +274,8 @@ void perf_evlist__disable(struct perf_evlist *evlist)
263 for (cpu = 0; cpu < evlist->cpus->nr; cpu++) { 274 for (cpu = 0; cpu < evlist->cpus->nr; cpu++) {
264 list_for_each_entry(pos, &evlist->entries, node) { 275 list_for_each_entry(pos, &evlist->entries, node) {
265 for (thread = 0; thread < evlist->threads->nr; thread++) 276 for (thread = 0; thread < evlist->threads->nr; thread++)
266 ioctl(FD(pos, cpu, thread), PERF_EVENT_IOC_DISABLE); 277 ioctl(FD(pos, cpu, thread),
278 PERF_EVENT_IOC_DISABLE, 0);
267 } 279 }
268 } 280 }
269} 281}
@@ -276,7 +288,8 @@ void perf_evlist__enable(struct perf_evlist *evlist)
276 for (cpu = 0; cpu < evlist->cpus->nr; cpu++) { 288 for (cpu = 0; cpu < evlist->cpus->nr; cpu++) {
277 list_for_each_entry(pos, &evlist->entries, node) { 289 list_for_each_entry(pos, &evlist->entries, node) {
278 for (thread = 0; thread < evlist->threads->nr; thread++) 290 for (thread = 0; thread < evlist->threads->nr; thread++)
279 ioctl(FD(pos, cpu, thread), PERF_EVENT_IOC_ENABLE); 291 ioctl(FD(pos, cpu, thread),
292 PERF_EVENT_IOC_ENABLE, 0);
280 } 293 }
281 } 294 }
282} 295}
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 58abb63ac13a..989bee9624c2 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -54,6 +54,8 @@ void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry);
54int perf_evlist__add_default(struct perf_evlist *evlist); 54int perf_evlist__add_default(struct perf_evlist *evlist);
55int perf_evlist__add_attrs(struct perf_evlist *evlist, 55int perf_evlist__add_attrs(struct perf_evlist *evlist,
56 struct perf_event_attr *attrs, size_t nr_attrs); 56 struct perf_event_attr *attrs, size_t nr_attrs);
57int __perf_evlist__add_default_attrs(struct perf_evlist *evlist,
58 struct perf_event_attr *attrs, size_t nr_attrs);
57int perf_evlist__add_tracepoints(struct perf_evlist *evlist, 59int perf_evlist__add_tracepoints(struct perf_evlist *evlist,
58 const char *tracepoints[], size_t nr_tracepoints); 60 const char *tracepoints[], size_t nr_tracepoints);
59int perf_evlist__set_tracepoints_handlers(struct perf_evlist *evlist, 61int perf_evlist__set_tracepoints_handlers(struct perf_evlist *evlist,
@@ -62,6 +64,8 @@ int perf_evlist__set_tracepoints_handlers(struct perf_evlist *evlist,
62 64
63#define perf_evlist__add_attrs_array(evlist, array) \ 65#define perf_evlist__add_attrs_array(evlist, array) \
64 perf_evlist__add_attrs(evlist, array, ARRAY_SIZE(array)) 66 perf_evlist__add_attrs(evlist, array, ARRAY_SIZE(array))
67#define perf_evlist__add_default_attrs(evlist, array) \
68 __perf_evlist__add_default_attrs(evlist, array, ARRAY_SIZE(array))
65 69
66#define perf_evlist__add_tracepoints_array(evlist, array) \ 70#define perf_evlist__add_tracepoints_array(evlist, array) \
67 perf_evlist__add_tracepoints(evlist, array, ARRAY_SIZE(array)) 71 perf_evlist__add_tracepoints(evlist, array, ARRAY_SIZE(array))
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 91d19138f3ec..9f6cebd798ee 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -494,16 +494,24 @@ int perf_evsel__open_per_thread(struct perf_evsel *evsel,
494} 494}
495 495
496static int perf_event__parse_id_sample(const union perf_event *event, u64 type, 496static int perf_event__parse_id_sample(const union perf_event *event, u64 type,
497 struct perf_sample *sample) 497 struct perf_sample *sample,
498 bool swapped)
498{ 499{
499 const u64 *array = event->sample.array; 500 const u64 *array = event->sample.array;
501 union u64_swap u;
500 502
501 array += ((event->header.size - 503 array += ((event->header.size -
502 sizeof(event->header)) / sizeof(u64)) - 1; 504 sizeof(event->header)) / sizeof(u64)) - 1;
503 505
504 if (type & PERF_SAMPLE_CPU) { 506 if (type & PERF_SAMPLE_CPU) {
505 u32 *p = (u32 *)array; 507 u.val64 = *array;
506 sample->cpu = *p; 508 if (swapped) {
509 /* undo swap of u64, then swap on individual u32s */
510 u.val64 = bswap_64(u.val64);
511 u.val32[0] = bswap_32(u.val32[0]);
512 }
513
514 sample->cpu = u.val32[0];
507 array--; 515 array--;
508 } 516 }
509 517
@@ -523,9 +531,16 @@ static int perf_event__parse_id_sample(const union perf_event *event, u64 type,
523 } 531 }
524 532
525 if (type & PERF_SAMPLE_TID) { 533 if (type & PERF_SAMPLE_TID) {
526 u32 *p = (u32 *)array; 534 u.val64 = *array;
527 sample->pid = p[0]; 535 if (swapped) {
528 sample->tid = p[1]; 536 /* undo swap of u64, then swap on individual u32s */
537 u.val64 = bswap_64(u.val64);
538 u.val32[0] = bswap_32(u.val32[0]);
539 u.val32[1] = bswap_32(u.val32[1]);
540 }
541
542 sample->pid = u.val32[0];
543 sample->tid = u.val32[1];
529 } 544 }
530 545
531 return 0; 546 return 0;
@@ -562,7 +577,7 @@ int perf_event__parse_sample(const union perf_event *event, u64 type,
562 if (event->header.type != PERF_RECORD_SAMPLE) { 577 if (event->header.type != PERF_RECORD_SAMPLE) {
563 if (!sample_id_all) 578 if (!sample_id_all)
564 return 0; 579 return 0;
565 return perf_event__parse_id_sample(event, type, data); 580 return perf_event__parse_id_sample(event, type, data, swapped);
566 } 581 }
567 582
568 array = event->sample.array; 583 array = event->sample.array;
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 1293b5ebea4d..514e2a4b367d 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -378,7 +378,7 @@ void hist_entry__free(struct hist_entry *he)
378 * collapse the histogram 378 * collapse the histogram
379 */ 379 */
380 380
381static bool hists__collapse_insert_entry(struct hists *hists, 381static bool hists__collapse_insert_entry(struct hists *hists __used,
382 struct rb_root *root, 382 struct rb_root *root,
383 struct hist_entry *he) 383 struct hist_entry *he)
384{ 384{
@@ -397,8 +397,9 @@ static bool hists__collapse_insert_entry(struct hists *hists,
397 iter->period += he->period; 397 iter->period += he->period;
398 iter->nr_events += he->nr_events; 398 iter->nr_events += he->nr_events;
399 if (symbol_conf.use_callchain) { 399 if (symbol_conf.use_callchain) {
400 callchain_cursor_reset(&hists->callchain_cursor); 400 callchain_cursor_reset(&callchain_cursor);
401 callchain_merge(&hists->callchain_cursor, iter->callchain, 401 callchain_merge(&callchain_cursor,
402 iter->callchain,
402 he->callchain); 403 he->callchain);
403 } 404 }
404 hist_entry__free(he); 405 hist_entry__free(he);
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index cfc64e293f90..34bb556d6219 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -67,8 +67,6 @@ struct hists {
67 struct events_stats stats; 67 struct events_stats stats;
68 u64 event_stream; 68 u64 event_stream;
69 u16 col_len[HISTC_NR_COLS]; 69 u16 col_len[HISTC_NR_COLS];
70 /* Best would be to reuse the session callchain cursor */
71 struct callchain_cursor callchain_cursor;
72}; 70};
73 71
74struct hist_entry *__hists__add_entry(struct hists *self, 72struct hist_entry *__hists__add_entry(struct hists *self,
diff --git a/tools/perf/util/pager.c b/tools/perf/util/pager.c
index 1915de20dcac..3322b8446e89 100644
--- a/tools/perf/util/pager.c
+++ b/tools/perf/util/pager.c
@@ -57,6 +57,10 @@ void setup_pager(void)
57 } 57 }
58 if (!pager) 58 if (!pager)
59 pager = getenv("PAGER"); 59 pager = getenv("PAGER");
60 if (!pager) {
61 if (!access("/usr/bin/pager", X_OK))
62 pager = "/usr/bin/pager";
63 }
60 if (!pager) 64 if (!pager)
61 pager = "less"; 65 pager = "less";
62 else if (!*pager || !strcmp(pager, "cat")) 66 else if (!*pager || !strcmp(pager, "cat"))
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 59dccc98b554..0dda25d82d06 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -2164,16 +2164,12 @@ int del_perf_probe_events(struct strlist *dellist)
2164 2164
2165error: 2165error:
2166 if (kfd >= 0) { 2166 if (kfd >= 0) {
2167 if (namelist) 2167 strlist__delete(namelist);
2168 strlist__delete(namelist);
2169
2170 close(kfd); 2168 close(kfd);
2171 } 2169 }
2172 2170
2173 if (ufd >= 0) { 2171 if (ufd >= 0) {
2174 if (unamelist) 2172 strlist__delete(unamelist);
2175 strlist__delete(unamelist);
2176
2177 close(ufd); 2173 close(ufd);
2178 } 2174 }
2179 2175
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 93d355d27109..2600916efa83 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -288,7 +288,8 @@ struct branch_info *machine__resolve_bstack(struct machine *self,
288 return bi; 288 return bi;
289} 289}
290 290
291int machine__resolve_callchain(struct machine *self, struct perf_evsel *evsel, 291int machine__resolve_callchain(struct machine *self,
292 struct perf_evsel *evsel __used,
292 struct thread *thread, 293 struct thread *thread,
293 struct ip_callchain *chain, 294 struct ip_callchain *chain,
294 struct symbol **parent) 295 struct symbol **parent)
@@ -297,7 +298,12 @@ int machine__resolve_callchain(struct machine *self, struct perf_evsel *evsel,
297 unsigned int i; 298 unsigned int i;
298 int err; 299 int err;
299 300
300 callchain_cursor_reset(&evsel->hists.callchain_cursor); 301 callchain_cursor_reset(&callchain_cursor);
302
303 if (chain->nr > PERF_MAX_STACK_DEPTH) {
304 pr_warning("corrupted callchain. skipping...\n");
305 return 0;
306 }
301 307
302 for (i = 0; i < chain->nr; i++) { 308 for (i = 0; i < chain->nr; i++) {
303 u64 ip; 309 u64 ip;
@@ -317,7 +323,14 @@ int machine__resolve_callchain(struct machine *self, struct perf_evsel *evsel,
317 case PERF_CONTEXT_USER: 323 case PERF_CONTEXT_USER:
318 cpumode = PERF_RECORD_MISC_USER; break; 324 cpumode = PERF_RECORD_MISC_USER; break;
319 default: 325 default:
320 break; 326 pr_debug("invalid callchain context: "
327 "%"PRId64"\n", (s64) ip);
328 /*
329 * It seems the callchain is corrupted.
330 * Discard all.
331 */
332 callchain_cursor_reset(&callchain_cursor);
333 return 0;
321 } 334 }
322 continue; 335 continue;
323 } 336 }
@@ -333,7 +346,7 @@ int machine__resolve_callchain(struct machine *self, struct perf_evsel *evsel,
333 break; 346 break;
334 } 347 }
335 348
336 err = callchain_cursor_append(&evsel->hists.callchain_cursor, 349 err = callchain_cursor_append(&callchain_cursor,
337 ip, al.map, al.sym); 350 ip, al.map, al.sym);
338 if (err) 351 if (err)
339 return err; 352 return err;
@@ -441,37 +454,65 @@ void mem_bswap_64(void *src, int byte_size)
441 } 454 }
442} 455}
443 456
444static void perf_event__all64_swap(union perf_event *event) 457static void swap_sample_id_all(union perf_event *event, void *data)
458{
459 void *end = (void *) event + event->header.size;
460 int size = end - data;
461
462 BUG_ON(size % sizeof(u64));
463 mem_bswap_64(data, size);
464}
465
466static void perf_event__all64_swap(union perf_event *event,
467 bool sample_id_all __used)
445{ 468{
446 struct perf_event_header *hdr = &event->header; 469 struct perf_event_header *hdr = &event->header;
447 mem_bswap_64(hdr + 1, event->header.size - sizeof(*hdr)); 470 mem_bswap_64(hdr + 1, event->header.size - sizeof(*hdr));
448} 471}
449 472
450static void perf_event__comm_swap(union perf_event *event) 473static void perf_event__comm_swap(union perf_event *event, bool sample_id_all)
451{ 474{
452 event->comm.pid = bswap_32(event->comm.pid); 475 event->comm.pid = bswap_32(event->comm.pid);
453 event->comm.tid = bswap_32(event->comm.tid); 476 event->comm.tid = bswap_32(event->comm.tid);
477
478 if (sample_id_all) {
479 void *data = &event->comm.comm;
480
481 data += ALIGN(strlen(data) + 1, sizeof(u64));
482 swap_sample_id_all(event, data);
483 }
454} 484}
455 485
456static void perf_event__mmap_swap(union perf_event *event) 486static void perf_event__mmap_swap(union perf_event *event,
487 bool sample_id_all)
457{ 488{
458 event->mmap.pid = bswap_32(event->mmap.pid); 489 event->mmap.pid = bswap_32(event->mmap.pid);
459 event->mmap.tid = bswap_32(event->mmap.tid); 490 event->mmap.tid = bswap_32(event->mmap.tid);
460 event->mmap.start = bswap_64(event->mmap.start); 491 event->mmap.start = bswap_64(event->mmap.start);
461 event->mmap.len = bswap_64(event->mmap.len); 492 event->mmap.len = bswap_64(event->mmap.len);
462 event->mmap.pgoff = bswap_64(event->mmap.pgoff); 493 event->mmap.pgoff = bswap_64(event->mmap.pgoff);
494
495 if (sample_id_all) {
496 void *data = &event->mmap.filename;
497
498 data += ALIGN(strlen(data) + 1, sizeof(u64));
499 swap_sample_id_all(event, data);
500 }
463} 501}
464 502
465static void perf_event__task_swap(union perf_event *event) 503static void perf_event__task_swap(union perf_event *event, bool sample_id_all)
466{ 504{
467 event->fork.pid = bswap_32(event->fork.pid); 505 event->fork.pid = bswap_32(event->fork.pid);
468 event->fork.tid = bswap_32(event->fork.tid); 506 event->fork.tid = bswap_32(event->fork.tid);
469 event->fork.ppid = bswap_32(event->fork.ppid); 507 event->fork.ppid = bswap_32(event->fork.ppid);
470 event->fork.ptid = bswap_32(event->fork.ptid); 508 event->fork.ptid = bswap_32(event->fork.ptid);
471 event->fork.time = bswap_64(event->fork.time); 509 event->fork.time = bswap_64(event->fork.time);
510
511 if (sample_id_all)
512 swap_sample_id_all(event, &event->fork + 1);
472} 513}
473 514
474static void perf_event__read_swap(union perf_event *event) 515static void perf_event__read_swap(union perf_event *event, bool sample_id_all)
475{ 516{
476 event->read.pid = bswap_32(event->read.pid); 517 event->read.pid = bswap_32(event->read.pid);
477 event->read.tid = bswap_32(event->read.tid); 518 event->read.tid = bswap_32(event->read.tid);
@@ -479,6 +520,9 @@ static void perf_event__read_swap(union perf_event *event)
479 event->read.time_enabled = bswap_64(event->read.time_enabled); 520 event->read.time_enabled = bswap_64(event->read.time_enabled);
480 event->read.time_running = bswap_64(event->read.time_running); 521 event->read.time_running = bswap_64(event->read.time_running);
481 event->read.id = bswap_64(event->read.id); 522 event->read.id = bswap_64(event->read.id);
523
524 if (sample_id_all)
525 swap_sample_id_all(event, &event->read + 1);
482} 526}
483 527
484static u8 revbyte(u8 b) 528static u8 revbyte(u8 b)
@@ -530,7 +574,8 @@ void perf_event__attr_swap(struct perf_event_attr *attr)
530 swap_bitfield((u8 *) (&attr->read_format + 1), sizeof(u64)); 574 swap_bitfield((u8 *) (&attr->read_format + 1), sizeof(u64));
531} 575}
532 576
533static void perf_event__hdr_attr_swap(union perf_event *event) 577static void perf_event__hdr_attr_swap(union perf_event *event,
578 bool sample_id_all __used)
534{ 579{
535 size_t size; 580 size_t size;
536 581
@@ -541,18 +586,21 @@ static void perf_event__hdr_attr_swap(union perf_event *event)
541 mem_bswap_64(event->attr.id, size); 586 mem_bswap_64(event->attr.id, size);
542} 587}
543 588
544static void perf_event__event_type_swap(union perf_event *event) 589static void perf_event__event_type_swap(union perf_event *event,
590 bool sample_id_all __used)
545{ 591{
546 event->event_type.event_type.event_id = 592 event->event_type.event_type.event_id =
547 bswap_64(event->event_type.event_type.event_id); 593 bswap_64(event->event_type.event_type.event_id);
548} 594}
549 595
550static void perf_event__tracing_data_swap(union perf_event *event) 596static void perf_event__tracing_data_swap(union perf_event *event,
597 bool sample_id_all __used)
551{ 598{
552 event->tracing_data.size = bswap_32(event->tracing_data.size); 599 event->tracing_data.size = bswap_32(event->tracing_data.size);
553} 600}
554 601
555typedef void (*perf_event__swap_op)(union perf_event *event); 602typedef void (*perf_event__swap_op)(union perf_event *event,
603 bool sample_id_all);
556 604
557static perf_event__swap_op perf_event__swap_ops[] = { 605static perf_event__swap_op perf_event__swap_ops[] = {
558 [PERF_RECORD_MMAP] = perf_event__mmap_swap, 606 [PERF_RECORD_MMAP] = perf_event__mmap_swap,
@@ -986,6 +1034,15 @@ static int perf_session__process_user_event(struct perf_session *session, union
986 } 1034 }
987} 1035}
988 1036
1037static void event_swap(union perf_event *event, bool sample_id_all)
1038{
1039 perf_event__swap_op swap;
1040
1041 swap = perf_event__swap_ops[event->header.type];
1042 if (swap)
1043 swap(event, sample_id_all);
1044}
1045
989static int perf_session__process_event(struct perf_session *session, 1046static int perf_session__process_event(struct perf_session *session,
990 union perf_event *event, 1047 union perf_event *event,
991 struct perf_tool *tool, 1048 struct perf_tool *tool,
@@ -994,9 +1051,8 @@ static int perf_session__process_event(struct perf_session *session,
994 struct perf_sample sample; 1051 struct perf_sample sample;
995 int ret; 1052 int ret;
996 1053
997 if (session->header.needs_swap && 1054 if (session->header.needs_swap)
998 perf_event__swap_ops[event->header.type]) 1055 event_swap(event, session->sample_id_all);
999 perf_event__swap_ops[event->header.type](event);
1000 1056
1001 if (event->header.type >= PERF_RECORD_HEADER_MAX) 1057 if (event->header.type >= PERF_RECORD_HEADER_MAX)
1002 return -EINVAL; 1058 return -EINVAL;
@@ -1428,7 +1484,6 @@ void perf_event__print_ip(union perf_event *event, struct perf_sample *sample,
1428 int print_sym, int print_dso, int print_symoffset) 1484 int print_sym, int print_dso, int print_symoffset)
1429{ 1485{
1430 struct addr_location al; 1486 struct addr_location al;
1431 struct callchain_cursor *cursor = &evsel->hists.callchain_cursor;
1432 struct callchain_cursor_node *node; 1487 struct callchain_cursor_node *node;
1433 1488
1434 if (perf_event__preprocess_sample(event, machine, &al, sample, 1489 if (perf_event__preprocess_sample(event, machine, &al, sample,
@@ -1446,10 +1501,10 @@ void perf_event__print_ip(union perf_event *event, struct perf_sample *sample,
1446 error("Failed to resolve callchain. Skipping\n"); 1501 error("Failed to resolve callchain. Skipping\n");
1447 return; 1502 return;
1448 } 1503 }
1449 callchain_cursor_commit(cursor); 1504 callchain_cursor_commit(&callchain_cursor);
1450 1505
1451 while (1) { 1506 while (1) {
1452 node = callchain_cursor_current(cursor); 1507 node = callchain_cursor_current(&callchain_cursor);
1453 if (!node) 1508 if (!node)
1454 break; 1509 break;
1455 1510
@@ -1460,12 +1515,12 @@ void perf_event__print_ip(union perf_event *event, struct perf_sample *sample,
1460 } 1515 }
1461 if (print_dso) { 1516 if (print_dso) {
1462 printf(" ("); 1517 printf(" (");
1463 map__fprintf_dsoname(al.map, stdout); 1518 map__fprintf_dsoname(node->map, stdout);
1464 printf(")"); 1519 printf(")");
1465 } 1520 }
1466 printf("\n"); 1521 printf("\n");
1467 1522
1468 callchain_cursor_advance(cursor); 1523 callchain_cursor_advance(&callchain_cursor);
1469 } 1524 }
1470 1525
1471 } else { 1526 } else {
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index e2ba8858f3e1..3e2e5ea0f03f 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -323,6 +323,7 @@ struct dso *dso__new(const char *name)
323 dso->sorted_by_name = 0; 323 dso->sorted_by_name = 0;
324 dso->has_build_id = 0; 324 dso->has_build_id = 0;
325 dso->kernel = DSO_TYPE_USER; 325 dso->kernel = DSO_TYPE_USER;
326 dso->needs_swap = DSO_SWAP__UNSET;
326 INIT_LIST_HEAD(&dso->node); 327 INIT_LIST_HEAD(&dso->node);
327 } 328 }
328 329
@@ -1156,6 +1157,33 @@ static size_t elf_addr_to_index(Elf *elf, GElf_Addr addr)
1156 return -1; 1157 return -1;
1157} 1158}
1158 1159
1160static int dso__swap_init(struct dso *dso, unsigned char eidata)
1161{
1162 static unsigned int const endian = 1;
1163
1164 dso->needs_swap = DSO_SWAP__NO;
1165
1166 switch (eidata) {
1167 case ELFDATA2LSB:
1168 /* We are big endian, DSO is little endian. */
1169 if (*(unsigned char const *)&endian != 1)
1170 dso->needs_swap = DSO_SWAP__YES;
1171 break;
1172
1173 case ELFDATA2MSB:
1174 /* We are little endian, DSO is big endian. */
1175 if (*(unsigned char const *)&endian != 0)
1176 dso->needs_swap = DSO_SWAP__YES;
1177 break;
1178
1179 default:
1180 pr_err("unrecognized DSO data encoding %d\n", eidata);
1181 return -EINVAL;
1182 }
1183
1184 return 0;
1185}
1186
1159static int dso__load_sym(struct dso *dso, struct map *map, const char *name, 1187static int dso__load_sym(struct dso *dso, struct map *map, const char *name,
1160 int fd, symbol_filter_t filter, int kmodule, 1188 int fd, symbol_filter_t filter, int kmodule,
1161 int want_symtab) 1189 int want_symtab)
@@ -1187,6 +1215,9 @@ static int dso__load_sym(struct dso *dso, struct map *map, const char *name,
1187 goto out_elf_end; 1215 goto out_elf_end;
1188 } 1216 }
1189 1217
1218 if (dso__swap_init(dso, ehdr.e_ident[EI_DATA]))
1219 goto out_elf_end;
1220
1190 /* Always reject images with a mismatched build-id: */ 1221 /* Always reject images with a mismatched build-id: */
1191 if (dso->has_build_id) { 1222 if (dso->has_build_id) {
1192 u8 build_id[BUILD_ID_SIZE]; 1223 u8 build_id[BUILD_ID_SIZE];
@@ -1272,7 +1303,7 @@ static int dso__load_sym(struct dso *dso, struct map *map, const char *name,
1272 if (opdsec && sym.st_shndx == opdidx) { 1303 if (opdsec && sym.st_shndx == opdidx) {
1273 u32 offset = sym.st_value - opdshdr.sh_addr; 1304 u32 offset = sym.st_value - opdshdr.sh_addr;
1274 u64 *opd = opddata->d_buf + offset; 1305 u64 *opd = opddata->d_buf + offset;
1275 sym.st_value = *opd; 1306 sym.st_value = DSO__SWAP(dso, u64, *opd);
1276 sym.st_shndx = elf_addr_to_index(elf, sym.st_value); 1307 sym.st_shndx = elf_addr_to_index(elf, sym.st_value);
1277 } 1308 }
1278 1309
@@ -2786,8 +2817,11 @@ int machine__load_vmlinux_path(struct machine *machine, enum map_type type,
2786 2817
2787struct map *dso__new_map(const char *name) 2818struct map *dso__new_map(const char *name)
2788{ 2819{
2820 struct map *map = NULL;
2789 struct dso *dso = dso__new(name); 2821 struct dso *dso = dso__new(name);
2790 struct map *map = map__new2(0, dso, MAP__FUNCTION); 2822
2823 if (dso)
2824 map = map__new2(0, dso, MAP__FUNCTION);
2791 2825
2792 return map; 2826 return map;
2793} 2827}
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 5649d63798cb..af0752b1aca1 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -9,6 +9,7 @@
9#include <linux/list.h> 9#include <linux/list.h>
10#include <linux/rbtree.h> 10#include <linux/rbtree.h>
11#include <stdio.h> 11#include <stdio.h>
12#include <byteswap.h>
12 13
13#ifdef HAVE_CPLUS_DEMANGLE 14#ifdef HAVE_CPLUS_DEMANGLE
14extern char *cplus_demangle(const char *, int); 15extern char *cplus_demangle(const char *, int);
@@ -160,11 +161,18 @@ enum dso_kernel_type {
160 DSO_TYPE_GUEST_KERNEL 161 DSO_TYPE_GUEST_KERNEL
161}; 162};
162 163
164enum dso_swap_type {
165 DSO_SWAP__UNSET,
166 DSO_SWAP__NO,
167 DSO_SWAP__YES,
168};
169
163struct dso { 170struct dso {
164 struct list_head node; 171 struct list_head node;
165 struct rb_root symbols[MAP__NR_TYPES]; 172 struct rb_root symbols[MAP__NR_TYPES];
166 struct rb_root symbol_names[MAP__NR_TYPES]; 173 struct rb_root symbol_names[MAP__NR_TYPES];
167 enum dso_kernel_type kernel; 174 enum dso_kernel_type kernel;
175 enum dso_swap_type needs_swap;
168 u8 adjust_symbols:1; 176 u8 adjust_symbols:1;
169 u8 has_build_id:1; 177 u8 has_build_id:1;
170 u8 hit:1; 178 u8 hit:1;
@@ -182,6 +190,28 @@ struct dso {
182 char name[0]; 190 char name[0];
183}; 191};
184 192
193#define DSO__SWAP(dso, type, val) \
194({ \
195 type ____r = val; \
196 BUG_ON(dso->needs_swap == DSO_SWAP__UNSET); \
197 if (dso->needs_swap == DSO_SWAP__YES) { \
198 switch (sizeof(____r)) { \
199 case 2: \
200 ____r = bswap_16(val); \
201 break; \
202 case 4: \
203 ____r = bswap_32(val); \
204 break; \
205 case 8: \
206 ____r = bswap_64(val); \
207 break; \
208 default: \
209 BUG_ON(1); \
210 } \
211 } \
212 ____r; \
213})
214
185struct dso *dso__new(const char *name); 215struct dso *dso__new(const char *name);
186void dso__delete(struct dso *dso); 216void dso__delete(struct dso *dso);
187 217
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index ab2f682fd44c..16de7ad4850f 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -73,8 +73,8 @@ int backwards_count;
73char *progname; 73char *progname;
74 74
75int num_cpus; 75int num_cpus;
76cpu_set_t *cpu_mask; 76cpu_set_t *cpu_present_set, *cpu_mask;
77size_t cpu_mask_size; 77size_t cpu_present_setsize, cpu_mask_size;
78 78
79struct counters { 79struct counters {
80 unsigned long long tsc; /* per thread */ 80 unsigned long long tsc; /* per thread */
@@ -103,6 +103,12 @@ struct timeval tv_even;
103struct timeval tv_odd; 103struct timeval tv_odd;
104struct timeval tv_delta; 104struct timeval tv_delta;
105 105
106int mark_cpu_present(int pkg, int core, int cpu)
107{
108 CPU_SET_S(cpu, cpu_present_setsize, cpu_present_set);
109 return 0;
110}
111
106/* 112/*
107 * cpu_mask_init(ncpus) 113 * cpu_mask_init(ncpus)
108 * 114 *
@@ -118,6 +124,18 @@ void cpu_mask_init(int ncpus)
118 } 124 }
119 cpu_mask_size = CPU_ALLOC_SIZE(ncpus); 125 cpu_mask_size = CPU_ALLOC_SIZE(ncpus);
120 CPU_ZERO_S(cpu_mask_size, cpu_mask); 126 CPU_ZERO_S(cpu_mask_size, cpu_mask);
127
128 /*
129 * Allocate and initialize cpu_present_set
130 */
131 cpu_present_set = CPU_ALLOC(ncpus);
132 if (cpu_present_set == NULL) {
133 perror("CPU_ALLOC");
134 exit(3);
135 }
136 cpu_present_setsize = CPU_ALLOC_SIZE(ncpus);
137 CPU_ZERO_S(cpu_present_setsize, cpu_present_set);
138 for_all_cpus(mark_cpu_present);
121} 139}
122 140
123void cpu_mask_uninit() 141void cpu_mask_uninit()
@@ -125,6 +143,9 @@ void cpu_mask_uninit()
125 CPU_FREE(cpu_mask); 143 CPU_FREE(cpu_mask);
126 cpu_mask = NULL; 144 cpu_mask = NULL;
127 cpu_mask_size = 0; 145 cpu_mask_size = 0;
146 CPU_FREE(cpu_present_set);
147 cpu_present_set = NULL;
148 cpu_present_setsize = 0;
128} 149}
129 150
130int cpu_migrate(int cpu) 151int cpu_migrate(int cpu)
@@ -912,6 +933,8 @@ int is_snb(unsigned int family, unsigned int model)
912 switch (model) { 933 switch (model) {
913 case 0x2A: 934 case 0x2A:
914 case 0x2D: 935 case 0x2D:
936 case 0x3A: /* IVB */
937 case 0x3D: /* IVB Xeon */
915 return 1; 938 return 1;
916 } 939 }
917 return 0; 940 return 0;
@@ -1047,6 +1070,9 @@ int fork_it(char **argv)
1047 int retval; 1070 int retval;
1048 pid_t child_pid; 1071 pid_t child_pid;
1049 get_counters(cnt_even); 1072 get_counters(cnt_even);
1073
1074 /* clear affinity side-effect of get_counters() */
1075 sched_setaffinity(0, cpu_present_setsize, cpu_present_set);
1050 gettimeofday(&tv_even, (struct timezone *)NULL); 1076 gettimeofday(&tv_even, (struct timezone *)NULL);
1051 1077
1052 child_pid = fork(); 1078 child_pid = fork();
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index a6a0365475ed..5afb43114020 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -332,6 +332,7 @@ static int setup_routing_entry(struct kvm_irq_routing_table *rt,
332 */ 332 */
333 hlist_for_each_entry(ei, n, &rt->map[ue->gsi], link) 333 hlist_for_each_entry(ei, n, &rt->map[ue->gsi], link)
334 if (ei->type == KVM_IRQ_ROUTING_MSI || 334 if (ei->type == KVM_IRQ_ROUTING_MSI ||
335 ue->type == KVM_IRQ_ROUTING_MSI ||
335 ue->u.irqchip.irqchip == ei->irqchip.irqchip) 336 ue->u.irqchip.irqchip == ei->irqchip.irqchip)
336 return r; 337 return r;
337 338