aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-mux-pinctrl.txt93
-rw-r--r--Documentation/kernel-parameters.txt9
-rw-r--r--Documentation/vm/frontswap.txt278
-rw-r--r--MAINTAINERS17
-rw-r--r--arch/arm/mach-shmobile/Kconfig6
-rw-r--r--arch/avr32/kernel/signal.c2
-rw-r--r--arch/blackfin/kernel/process.c2
-rw-r--r--arch/x86/kernel/smpboot.c10
-rw-r--r--arch/xtensa/include/asm/syscall.h4
-rw-r--r--arch/xtensa/kernel/signal.c2
-rw-r--r--drivers/acpi/bus.c88
-rw-r--r--drivers/acpi/power.c2
-rw-r--r--drivers/acpi/scan.c1
-rw-r--r--drivers/acpi/sleep.c49
-rw-r--r--drivers/clocksource/Makefile1
-rw-r--r--drivers/clocksource/em_sti.c406
-rw-r--r--drivers/gpio/gpio-samsung.c2
-rw-r--r--drivers/gpu/drm/radeon/ni.c21
-rw-r--r--drivers/gpu/drm/radeon/r600.c15
-rw-r--r--drivers/gpu/drm/radeon/r600_audio.c5
-rw-r--r--drivers/gpu/drm/radeon/r600_hdmi.c1
-rw-r--r--drivers/gpu/drm/radeon/radeon.h5
-rw-r--r--drivers/gpu/drm/radeon/radeon_gart.c19
-rw-r--r--drivers/gpu/drm/radeon/radeon_kms.c2
-rw-r--r--drivers/gpu/drm/radeon/rs600.c12
-rw-r--r--drivers/gpu/drm/radeon/rs690.c12
-rw-r--r--drivers/gpu/drm/radeon/rv770.c18
-rw-r--r--drivers/gpu/drm/radeon/si.c477
-rw-r--r--drivers/gpu/drm/radeon/sid.h19
-rw-r--r--drivers/i2c/muxes/Kconfig12
-rw-r--r--drivers/i2c/muxes/Makefile1
-rw-r--r--drivers/i2c/muxes/i2c-mux-pinctrl.c279
-rw-r--r--drivers/rtc/rtc-cmos.c9
-rw-r--r--drivers/staging/ramster/zcache-main.c8
-rw-r--r--drivers/staging/zcache/zcache-main.c10
-rw-r--r--drivers/target/sbp/sbp_target.c8
-rw-r--r--drivers/target/target_core_file.c70
-rw-r--r--drivers/target/target_core_file.h1
-rw-r--r--drivers/xen/tmem.c8
-rw-r--r--fs/cifs/cifsglob.h7
-rw-r--r--fs/cifs/cifsproto.h1
-rw-r--r--fs/cifs/cifssmb.c8
-rw-r--r--fs/cifs/connect.c8
-rw-r--r--fs/cifs/file.c106
-rw-r--r--fs/cifs/misc.c89
-rw-r--r--fs/cifs/smb1ops.c89
-rw-r--r--fs/cifs/transport.c2
-rw-r--r--fs/fuse/control.c10
-rw-r--r--fs/fuse/dir.c11
-rw-r--r--fs/fuse/file.c40
-rw-r--r--fs/fuse/fuse_i.h6
-rw-r--r--fs/fuse/inode.c17
-rw-r--r--fs/proc/base.c17
-rw-r--r--include/drm/drm_pciids.h17
-rw-r--r--include/linux/clockchips.h1
-rw-r--r--include/linux/compaction.h19
-rw-r--r--include/linux/frontswap.h127
-rw-r--r--include/linux/fs.h6
-rw-r--r--include/linux/fuse.h14
-rw-r--r--include/linux/i2c-mux-pinctrl.h41
-rw-r--r--include/linux/init_task.h2
-rw-r--r--include/linux/radix-tree.h5
-rw-r--r--include/linux/sched.h3
-rw-r--r--include/linux/swap.h4
-rw-r--r--include/linux/swapfile.h13
-rw-r--r--kernel/cgroup.c17
-rw-r--r--kernel/irq/chip.c8
-rw-r--r--kernel/irq/internals.h3
-rw-r--r--kernel/irq/manage.c39
-rw-r--r--kernel/irq/migration.c13
-rw-r--r--kernel/sched/core.c68
-rw-r--r--kernel/sched/fair.c42
-rw-r--r--kernel/sched/rt.c51
-rw-r--r--kernel/smpboot.c17
-rw-r--r--kernel/time/clockevents.c3
-rw-r--r--kernel/time/tick-sched.c19
-rw-r--r--lib/radix-tree.c3
-rw-r--r--mm/Kconfig17
-rw-r--r--mm/Makefile1
-rw-r--r--mm/compaction.c142
-rw-r--r--mm/frontswap.c314
-rw-r--r--mm/internal.h9
-rw-r--r--mm/migrate.c5
-rw-r--r--mm/nommu.c2
-rw-r--r--mm/page_alloc.c8
-rw-r--r--mm/page_io.c12
-rw-r--r--mm/swapfile.c54
-rw-r--r--virt/kvm/irq_comm.c1
88 files changed, 2578 insertions, 917 deletions
diff --git a/Documentation/devicetree/bindings/i2c/i2c-mux-pinctrl.txt b/Documentation/devicetree/bindings/i2c/i2c-mux-pinctrl.txt
new file mode 100644
index 000000000000..ae8af1694e95
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/i2c-mux-pinctrl.txt
@@ -0,0 +1,93 @@
1Pinctrl-based I2C Bus Mux
2
3This binding describes an I2C bus multiplexer that uses pin multiplexing to
4route the I2C signals, and represents the pin multiplexing configuration
5using the pinctrl device tree bindings.
6
7 +-----+ +-----+
8 | dev | | dev |
9 +------------------------+ +-----+ +-----+
10 | SoC | | |
11 | /----|------+--------+
12 | +---+ +------+ | child bus A, on first set of pins
13 | |I2C|---|Pinmux| |
14 | +---+ +------+ | child bus B, on second set of pins
15 | \----|------+--------+--------+
16 | | | | |
17 +------------------------+ +-----+ +-----+ +-----+
18 | dev | | dev | | dev |
19 +-----+ +-----+ +-----+
20
21Required properties:
22- compatible: i2c-mux-pinctrl
23- i2c-parent: The phandle of the I2C bus that this multiplexer's master-side
24 port is connected to.
25
26Also required are:
27
28* Standard pinctrl properties that specify the pin mux state for each child
29 bus. See ../pinctrl/pinctrl-bindings.txt.
30
31* Standard I2C mux properties. See mux.txt in this directory.
32
33* I2C child bus nodes. See mux.txt in this directory.
34
35For each named state defined in the pinctrl-names property, an I2C child bus
36will be created. I2C child bus numbers are assigned based on the index into
37the pinctrl-names property.
38
39The only exception is that no bus will be created for a state named "idle". If
40such a state is defined, it must be the last entry in pinctrl-names. For
41example:
42
43 pinctrl-names = "ddc", "pta", "idle" -> ddc = bus 0, pta = bus 1
44 pinctrl-names = "ddc", "idle", "pta" -> Invalid ("idle" not last)
45 pinctrl-names = "idle", "ddc", "pta" -> Invalid ("idle" not last)
46
47Whenever an access is made to a device on a child bus, the relevant pinctrl
48state will be programmed into hardware.
49
50If an idle state is defined, whenever an access is not being made to a device
51on a child bus, the idle pinctrl state will be programmed into hardware.
52
53If an idle state is not defined, the most recently used pinctrl state will be
54left programmed into hardware whenever no access is being made of a device on
55a child bus.
56
57Example:
58
59 i2cmux {
60 compatible = "i2c-mux-pinctrl";
61 #address-cells = <1>;
62 #size-cells = <0>;
63
64 i2c-parent = <&i2c1>;
65
66 pinctrl-names = "ddc", "pta", "idle";
67 pinctrl-0 = <&state_i2cmux_ddc>;
68 pinctrl-1 = <&state_i2cmux_pta>;
69 pinctrl-2 = <&state_i2cmux_idle>;
70
71 i2c@0 {
72 reg = <0>;
73 #address-cells = <1>;
74 #size-cells = <0>;
75
76 eeprom {
77 compatible = "eeprom";
78 reg = <0x50>;
79 };
80 };
81
82 i2c@1 {
83 reg = <1>;
84 #address-cells = <1>;
85 #size-cells = <0>;
86
87 eeprom {
88 compatible = "eeprom";
89 reg = <0x50>;
90 };
91 };
92 };
93
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index c45513d806ab..a92c5ebf373e 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2543,6 +2543,15 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
2543 2543
2544 sched_debug [KNL] Enables verbose scheduler debug messages. 2544 sched_debug [KNL] Enables verbose scheduler debug messages.
2545 2545
2546 skew_tick= [KNL] Offset the periodic timer tick per cpu to mitigate
2547 xtime_lock contention on larger systems, and/or RCU lock
2548 contention on all systems with CONFIG_MAXSMP set.
2549 Format: { "0" | "1" }
2550 0 -- disable. (may be 1 via CONFIG_CMDLINE="skew_tick=1"
2551 1 -- enable.
2552 Note: increases power consumption, thus should only be
2553 enabled if running jitter sensitive (HPC/RT) workloads.
2554
2546 security= [SECURITY] Choose a security module to enable at boot. 2555 security= [SECURITY] Choose a security module to enable at boot.
2547 If this boot parameter is not specified, only the first 2556 If this boot parameter is not specified, only the first
2548 security module asking for security registration will be 2557 security module asking for security registration will be
diff --git a/Documentation/vm/frontswap.txt b/Documentation/vm/frontswap.txt
new file mode 100644
index 000000000000..37067cf455f4
--- /dev/null
+++ b/Documentation/vm/frontswap.txt
@@ -0,0 +1,278 @@
1Frontswap provides a "transcendent memory" interface for swap pages.
2In some environments, dramatic performance savings may be obtained because
3swapped pages are saved in RAM (or a RAM-like device) instead of a swap disk.
4
5(Note, frontswap -- and cleancache (merged at 3.0) -- are the "frontends"
6and the only necessary changes to the core kernel for transcendent memory;
7all other supporting code -- the "backends" -- is implemented as drivers.
8See the LWN.net article "Transcendent memory in a nutshell" for a detailed
9overview of frontswap and related kernel parts:
10https://lwn.net/Articles/454795/ )
11
12Frontswap is so named because it can be thought of as the opposite of
13a "backing" store for a swap device. The storage is assumed to be
14a synchronous concurrency-safe page-oriented "pseudo-RAM device" conforming
15to the requirements of transcendent memory (such as Xen's "tmem", or
16in-kernel compressed memory, aka "zcache", or future RAM-like devices);
17this pseudo-RAM device is not directly accessible or addressable by the
18kernel and is of unknown and possibly time-varying size. The driver
19links itself to frontswap by calling frontswap_register_ops to set the
20frontswap_ops funcs appropriately and the functions it provides must
21conform to certain policies as follows:
22
23An "init" prepares the device to receive frontswap pages associated
24with the specified swap device number (aka "type"). A "store" will
25copy the page to transcendent memory and associate it with the type and
26offset associated with the page. A "load" will copy the page, if found,
27from transcendent memory into kernel memory, but will NOT remove the page
28from from transcendent memory. An "invalidate_page" will remove the page
29from transcendent memory and an "invalidate_area" will remove ALL pages
30associated with the swap type (e.g., like swapoff) and notify the "device"
31to refuse further stores with that swap type.
32
33Once a page is successfully stored, a matching load on the page will normally
34succeed. So when the kernel finds itself in a situation where it needs
35to swap out a page, it first attempts to use frontswap. If the store returns
36success, the data has been successfully saved to transcendent memory and
37a disk write and, if the data is later read back, a disk read are avoided.
38If a store returns failure, transcendent memory has rejected the data, and the
39page can be written to swap as usual.
40
41If a backend chooses, frontswap can be configured as a "writethrough
42cache" by calling frontswap_writethrough(). In this mode, the reduction
43in swap device writes is lost (and also a non-trivial performance advantage)
44in order to allow the backend to arbitrarily "reclaim" space used to
45store frontswap pages to more completely manage its memory usage.
46
47Note that if a page is stored and the page already exists in transcendent memory
48(a "duplicate" store), either the store succeeds and the data is overwritten,
49or the store fails AND the page is invalidated. This ensures stale data may
50never be obtained from frontswap.
51
52If properly configured, monitoring of frontswap is done via debugfs in
53the /sys/kernel/debug/frontswap directory. The effectiveness of
54frontswap can be measured (across all swap devices) with:
55
56failed_stores - how many store attempts have failed
57loads - how many loads were attempted (all should succeed)
58succ_stores - how many store attempts have succeeded
59invalidates - how many invalidates were attempted
60
61A backend implementation may provide additional metrics.
62
63FAQ
64
651) Where's the value?
66
67When a workload starts swapping, performance falls through the floor.
68Frontswap significantly increases performance in many such workloads by
69providing a clean, dynamic interface to read and write swap pages to
70"transcendent memory" that is otherwise not directly addressable to the kernel.
71This interface is ideal when data is transformed to a different form
72and size (such as with compression) or secretly moved (as might be
73useful for write-balancing for some RAM-like devices). Swap pages (and
74evicted page-cache pages) are a great use for this kind of slower-than-RAM-
75but-much-faster-than-disk "pseudo-RAM device" and the frontswap (and
76cleancache) interface to transcendent memory provides a nice way to read
77and write -- and indirectly "name" -- the pages.
78
79Frontswap -- and cleancache -- with a fairly small impact on the kernel,
80provides a huge amount of flexibility for more dynamic, flexible RAM
81utilization in various system configurations:
82
83In the single kernel case, aka "zcache", pages are compressed and
84stored in local memory, thus increasing the total anonymous pages
85that can be safely kept in RAM. Zcache essentially trades off CPU
86cycles used in compression/decompression for better memory utilization.
87Benchmarks have shown little or no impact when memory pressure is
88low while providing a significant performance improvement (25%+)
89on some workloads under high memory pressure.
90
91"RAMster" builds on zcache by adding "peer-to-peer" transcendent memory
92support for clustered systems. Frontswap pages are locally compressed
93as in zcache, but then "remotified" to another system's RAM. This
94allows RAM to be dynamically load-balanced back-and-forth as needed,
95i.e. when system A is overcommitted, it can swap to system B, and
96vice versa. RAMster can also be configured as a memory server so
97many servers in a cluster can swap, dynamically as needed, to a single
98server configured with a large amount of RAM... without pre-configuring
99how much of the RAM is available for each of the clients!
100
101In the virtual case, the whole point of virtualization is to statistically
102multiplex physical resources acrosst the varying demands of multiple
103virtual machines. This is really hard to do with RAM and efforts to do
104it well with no kernel changes have essentially failed (except in some
105well-publicized special-case workloads).
106Specifically, the Xen Transcendent Memory backend allows otherwise
107"fallow" hypervisor-owned RAM to not only be "time-shared" between multiple
108virtual machines, but the pages can be compressed and deduplicated to
109optimize RAM utilization. And when guest OS's are induced to surrender
110underutilized RAM (e.g. with "selfballooning"), sudden unexpected
111memory pressure may result in swapping; frontswap allows those pages
112to be swapped to and from hypervisor RAM (if overall host system memory
113conditions allow), thus mitigating the potentially awful performance impact
114of unplanned swapping.
115
116A KVM implementation is underway and has been RFC'ed to lkml. And,
117using frontswap, investigation is also underway on the use of NVM as
118a memory extension technology.
119
1202) Sure there may be performance advantages in some situations, but
121 what's the space/time overhead of frontswap?
122
123If CONFIG_FRONTSWAP is disabled, every frontswap hook compiles into
124nothingness and the only overhead is a few extra bytes per swapon'ed
125swap device. If CONFIG_FRONTSWAP is enabled but no frontswap "backend"
126registers, there is one extra global variable compared to zero for
127every swap page read or written. If CONFIG_FRONTSWAP is enabled
128AND a frontswap backend registers AND the backend fails every "store"
129request (i.e. provides no memory despite claiming it might),
130CPU overhead is still negligible -- and since every frontswap fail
131precedes a swap page write-to-disk, the system is highly likely
132to be I/O bound and using a small fraction of a percent of a CPU
133will be irrelevant anyway.
134
135As for space, if CONFIG_FRONTSWAP is enabled AND a frontswap backend
136registers, one bit is allocated for every swap page for every swap
137device that is swapon'd. This is added to the EIGHT bits (which
138was sixteen until about 2.6.34) that the kernel already allocates
139for every swap page for every swap device that is swapon'd. (Hugh
140Dickins has observed that frontswap could probably steal one of
141the existing eight bits, but let's worry about that minor optimization
142later.) For very large swap disks (which are rare) on a standard
1434K pagesize, this is 1MB per 32GB swap.
144
145When swap pages are stored in transcendent memory instead of written
146out to disk, there is a side effect that this may create more memory
147pressure that can potentially outweigh the other advantages. A
148backend, such as zcache, must implement policies to carefully (but
149dynamically) manage memory limits to ensure this doesn't happen.
150
1513) OK, how about a quick overview of what this frontswap patch does
152 in terms that a kernel hacker can grok?
153
154Let's assume that a frontswap "backend" has registered during
155kernel initialization; this registration indicates that this
156frontswap backend has access to some "memory" that is not directly
157accessible by the kernel. Exactly how much memory it provides is
158entirely dynamic and random.
159
160Whenever a swap-device is swapon'd frontswap_init() is called,
161passing the swap device number (aka "type") as a parameter.
162This notifies frontswap to expect attempts to "store" swap pages
163associated with that number.
164
165Whenever the swap subsystem is readying a page to write to a swap
166device (c.f swap_writepage()), frontswap_store is called. Frontswap
167consults with the frontswap backend and if the backend says it does NOT
168have room, frontswap_store returns -1 and the kernel swaps the page
169to the swap device as normal. Note that the response from the frontswap
170backend is unpredictable to the kernel; it may choose to never accept a
171page, it could accept every ninth page, or it might accept every
172page. But if the backend does accept a page, the data from the page
173has already been copied and associated with the type and offset,
174and the backend guarantees the persistence of the data. In this case,
175frontswap sets a bit in the "frontswap_map" for the swap device
176corresponding to the page offset on the swap device to which it would
177otherwise have written the data.
178
179When the swap subsystem needs to swap-in a page (swap_readpage()),
180it first calls frontswap_load() which checks the frontswap_map to
181see if the page was earlier accepted by the frontswap backend. If
182it was, the page of data is filled from the frontswap backend and
183the swap-in is complete. If not, the normal swap-in code is
184executed to obtain the page of data from the real swap device.
185
186So every time the frontswap backend accepts a page, a swap device read
187and (potentially) a swap device write are replaced by a "frontswap backend
188store" and (possibly) a "frontswap backend loads", which are presumably much
189faster.
190
1914) Can't frontswap be configured as a "special" swap device that is
192 just higher priority than any real swap device (e.g. like zswap,
193 or maybe swap-over-nbd/NFS)?
194
195No. First, the existing swap subsystem doesn't allow for any kind of
196swap hierarchy. Perhaps it could be rewritten to accomodate a hierarchy,
197but this would require fairly drastic changes. Even if it were
198rewritten, the existing swap subsystem uses the block I/O layer which
199assumes a swap device is fixed size and any page in it is linearly
200addressable. Frontswap barely touches the existing swap subsystem,
201and works around the constraints of the block I/O subsystem to provide
202a great deal of flexibility and dynamicity.
203
204For example, the acceptance of any swap page by the frontswap backend is
205entirely unpredictable. This is critical to the definition of frontswap
206backends because it grants completely dynamic discretion to the
207backend. In zcache, one cannot know a priori how compressible a page is.
208"Poorly" compressible pages can be rejected, and "poorly" can itself be
209defined dynamically depending on current memory constraints.
210
211Further, frontswap is entirely synchronous whereas a real swap
212device is, by definition, asynchronous and uses block I/O. The
213block I/O layer is not only unnecessary, but may perform "optimizations"
214that are inappropriate for a RAM-oriented device including delaying
215the write of some pages for a significant amount of time. Synchrony is
216required to ensure the dynamicity of the backend and to avoid thorny race
217conditions that would unnecessarily and greatly complicate frontswap
218and/or the block I/O subsystem. That said, only the initial "store"
219and "load" operations need be synchronous. A separate asynchronous thread
220is free to manipulate the pages stored by frontswap. For example,
221the "remotification" thread in RAMster uses standard asynchronous
222kernel sockets to move compressed frontswap pages to a remote machine.
223Similarly, a KVM guest-side implementation could do in-guest compression
224and use "batched" hypercalls.
225
226In a virtualized environment, the dynamicity allows the hypervisor
227(or host OS) to do "intelligent overcommit". For example, it can
228choose to accept pages only until host-swapping might be imminent,
229then force guests to do their own swapping.
230
231There is a downside to the transcendent memory specifications for
232frontswap: Since any "store" might fail, there must always be a real
233slot on a real swap device to swap the page. Thus frontswap must be
234implemented as a "shadow" to every swapon'd device with the potential
235capability of holding every page that the swap device might have held
236and the possibility that it might hold no pages at all. This means
237that frontswap cannot contain more pages than the total of swapon'd
238swap devices. For example, if NO swap device is configured on some
239installation, frontswap is useless. Swapless portable devices
240can still use frontswap but a backend for such devices must configure
241some kind of "ghost" swap device and ensure that it is never used.
242
2435) Why this weird definition about "duplicate stores"? If a page
244 has been previously successfully stored, can't it always be
245 successfully overwritten?
246
247Nearly always it can, but no, sometimes it cannot. Consider an example
248where data is compressed and the original 4K page has been compressed
249to 1K. Now an attempt is made to overwrite the page with data that
250is non-compressible and so would take the entire 4K. But the backend
251has no more space. In this case, the store must be rejected. Whenever
252frontswap rejects a store that would overwrite, it also must invalidate
253the old data and ensure that it is no longer accessible. Since the
254swap subsystem then writes the new data to the read swap device,
255this is the correct course of action to ensure coherency.
256
2576) What is frontswap_shrink for?
258
259When the (non-frontswap) swap subsystem swaps out a page to a real
260swap device, that page is only taking up low-value pre-allocated disk
261space. But if frontswap has placed a page in transcendent memory, that
262page may be taking up valuable real estate. The frontswap_shrink
263routine allows code outside of the swap subsystem to force pages out
264of the memory managed by frontswap and back into kernel-addressable memory.
265For example, in RAMster, a "suction driver" thread will attempt
266to "repatriate" pages sent to a remote machine back to the local machine;
267this is driven using the frontswap_shrink mechanism when memory pressure
268subsides.
269
2707) Why does the frontswap patch create the new include file swapfile.h?
271
272The frontswap code depends on some swap-subsystem-internal data
273structures that have, over the years, moved back and forth between
274static and global. This seemed a reasonable compromise: Define
275them as global but declare them in a new include file that isn't
276included by the large number of source files that include swap.h.
277
278Dan Magenheimer, last updated April 9, 2012
diff --git a/MAINTAINERS b/MAINTAINERS
index 55f0fda602ec..f935a0cef404 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2930,6 +2930,13 @@ F: Documentation/power/freezing-of-tasks.txt
2930F: include/linux/freezer.h 2930F: include/linux/freezer.h
2931F: kernel/freezer.c 2931F: kernel/freezer.c
2932 2932
2933FRONTSWAP API
2934M: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
2935L: linux-kernel@vger.kernel.org
2936S: Maintained
2937F: mm/frontswap.c
2938F: include/linux/frontswap.h
2939
2933FS-CACHE: LOCAL CACHING FOR NETWORK FILESYSTEMS 2940FS-CACHE: LOCAL CACHING FOR NETWORK FILESYSTEMS
2934M: David Howells <dhowells@redhat.com> 2941M: David Howells <dhowells@redhat.com>
2935L: linux-cachefs@redhat.com 2942L: linux-cachefs@redhat.com
@@ -4096,6 +4103,8 @@ F: drivers/scsi/53c700*
4096LED SUBSYSTEM 4103LED SUBSYSTEM
4097M: Bryan Wu <bryan.wu@canonical.com> 4104M: Bryan Wu <bryan.wu@canonical.com>
4098M: Richard Purdie <rpurdie@rpsys.net> 4105M: Richard Purdie <rpurdie@rpsys.net>
4106L: linux-leds@vger.kernel.org
4107T: git git://git.kernel.org/pub/scm/linux/kernel/git/cooloney/linux-leds.git
4099S: Maintained 4108S: Maintained
4100F: drivers/leds/ 4109F: drivers/leds/
4101F: include/linux/leds.h 4110F: include/linux/leds.h
@@ -7291,11 +7300,11 @@ F: Documentation/DocBook/uio-howto.tmpl
7291F: drivers/uio/ 7300F: drivers/uio/
7292F: include/linux/uio*.h 7301F: include/linux/uio*.h
7293 7302
7294UTIL-LINUX-NG PACKAGE 7303UTIL-LINUX PACKAGE
7295M: Karel Zak <kzak@redhat.com> 7304M: Karel Zak <kzak@redhat.com>
7296L: util-linux-ng@vger.kernel.org 7305L: util-linux@vger.kernel.org
7297W: http://kernel.org/~kzak/util-linux-ng/ 7306W: http://en.wikipedia.org/wiki/Util-linux
7298T: git git://git.kernel.org/pub/scm/utils/util-linux-ng/util-linux-ng.git 7307T: git git://git.kernel.org/pub/scm/utils/util-linux/util-linux.git
7299S: Maintained 7308S: Maintained
7300 7309
7301UVESAFB DRIVER 7310UVESAFB DRIVER
diff --git a/arch/arm/mach-shmobile/Kconfig b/arch/arm/mach-shmobile/Kconfig
index f31383c32f9c..df33909205e2 100644
--- a/arch/arm/mach-shmobile/Kconfig
+++ b/arch/arm/mach-shmobile/Kconfig
@@ -186,6 +186,12 @@ config SH_TIMER_TMU
186 help 186 help
187 This enables build of the TMU timer driver. 187 This enables build of the TMU timer driver.
188 188
189config EM_TIMER_STI
190 bool "STI timer driver"
191 default y
192 help
193 This enables build of the STI timer driver.
194
189endmenu 195endmenu
190 196
191config SH_CLK_CPG 197config SH_CLK_CPG
diff --git a/arch/avr32/kernel/signal.c b/arch/avr32/kernel/signal.c
index c140f9b41dce..d552a854dacc 100644
--- a/arch/avr32/kernel/signal.c
+++ b/arch/avr32/kernel/signal.c
@@ -300,7 +300,7 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, struct thread_info *ti)
300 if ((sysreg_read(SR) & MODE_MASK) == MODE_SUPERVISOR) 300 if ((sysreg_read(SR) & MODE_MASK) == MODE_SUPERVISOR)
301 syscall = 1; 301 syscall = 1;
302 302
303 if (ti->flags & _TIF_SIGPENDING)) 303 if (ti->flags & _TIF_SIGPENDING)
304 do_signal(regs, syscall); 304 do_signal(regs, syscall);
305 305
306 if (ti->flags & _TIF_NOTIFY_RESUME) { 306 if (ti->flags & _TIF_NOTIFY_RESUME) {
diff --git a/arch/blackfin/kernel/process.c b/arch/blackfin/kernel/process.c
index 2e3994b20169..62bcea7dcc6d 100644
--- a/arch/blackfin/kernel/process.c
+++ b/arch/blackfin/kernel/process.c
@@ -173,7 +173,7 @@ asmlinkage int bfin_clone(struct pt_regs *regs)
173 unsigned long newsp; 173 unsigned long newsp;
174 174
175#ifdef __ARCH_SYNC_CORE_DCACHE 175#ifdef __ARCH_SYNC_CORE_DCACHE
176 if (current->rt.nr_cpus_allowed == num_possible_cpus()) 176 if (current->nr_cpus_allowed == num_possible_cpus())
177 set_cpus_allowed_ptr(current, cpumask_of(smp_processor_id())); 177 set_cpus_allowed_ptr(current, cpumask_of(smp_processor_id()));
178#endif 178#endif
179 179
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index f56f96da77f5..fd019d78b1f4 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -410,15 +410,7 @@ void __cpuinit set_cpu_sibling_map(int cpu)
410/* maps the cpu to the sched domain representing multi-core */ 410/* maps the cpu to the sched domain representing multi-core */
411const struct cpumask *cpu_coregroup_mask(int cpu) 411const struct cpumask *cpu_coregroup_mask(int cpu)
412{ 412{
413 struct cpuinfo_x86 *c = &cpu_data(cpu); 413 return cpu_llc_shared_mask(cpu);
414 /*
415 * For perf, we return last level cache shared map.
416 * And for power savings, we return cpu_core_map
417 */
418 if (!(cpu_has(c, X86_FEATURE_AMD_DCM)))
419 return cpu_core_mask(cpu);
420 else
421 return cpu_llc_shared_mask(cpu);
422} 414}
423 415
424static void impress_friends(void) 416static void impress_friends(void)
diff --git a/arch/xtensa/include/asm/syscall.h b/arch/xtensa/include/asm/syscall.h
index 0b9f2e13c781..c1dacca312f3 100644
--- a/arch/xtensa/include/asm/syscall.h
+++ b/arch/xtensa/include/asm/syscall.h
@@ -31,5 +31,5 @@ asmlinkage long sys_pselect6(int n, fd_set __user *inp, fd_set __user *outp,
31asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds, 31asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds,
32 struct timespec __user *tsp, const sigset_t __user *sigmask, 32 struct timespec __user *tsp, const sigset_t __user *sigmask,
33 size_t sigsetsize); 33 size_t sigsetsize);
34 34asmlinkage long sys_rt_sigsuspend(sigset_t __user *unewset,
35 35 size_t sigsetsize);
diff --git a/arch/xtensa/kernel/signal.c b/arch/xtensa/kernel/signal.c
index b9f8e5850d3a..efe4e854b3cd 100644
--- a/arch/xtensa/kernel/signal.c
+++ b/arch/xtensa/kernel/signal.c
@@ -493,7 +493,7 @@ static void do_signal(struct pt_regs *regs)
493 if (ret) 493 if (ret)
494 return; 494 return;
495 495
496 signal_delivered(signr, info, ka, regs, 0); 496 signal_delivered(signr, &info, &ka, regs, 0);
497 if (current->ptrace & PT_SINGLESTEP) 497 if (current->ptrace & PT_SINGLESTEP)
498 task_pt_regs(current)->icountlevel = 1; 498 task_pt_regs(current)->icountlevel = 1;
499 499
diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index 3188da3df8da..adceafda9c17 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -182,41 +182,66 @@ EXPORT_SYMBOL(acpi_bus_get_private_data);
182 Power Management 182 Power Management
183 -------------------------------------------------------------------------- */ 183 -------------------------------------------------------------------------- */
184 184
185static const char *state_string(int state)
186{
187 switch (state) {
188 case ACPI_STATE_D0:
189 return "D0";
190 case ACPI_STATE_D1:
191 return "D1";
192 case ACPI_STATE_D2:
193 return "D2";
194 case ACPI_STATE_D3_HOT:
195 return "D3hot";
196 case ACPI_STATE_D3_COLD:
197 return "D3";
198 default:
199 return "(unknown)";
200 }
201}
202
185static int __acpi_bus_get_power(struct acpi_device *device, int *state) 203static int __acpi_bus_get_power(struct acpi_device *device, int *state)
186{ 204{
187 int result = 0; 205 int result = ACPI_STATE_UNKNOWN;
188 acpi_status status = 0;
189 unsigned long long psc = 0;
190 206
191 if (!device || !state) 207 if (!device || !state)
192 return -EINVAL; 208 return -EINVAL;
193 209
194 *state = ACPI_STATE_UNKNOWN; 210 if (!device->flags.power_manageable) {
195
196 if (device->flags.power_manageable) {
197 /*
198 * Get the device's power state either directly (via _PSC) or
199 * indirectly (via power resources).
200 */
201 if (device->power.flags.power_resources) {
202 result = acpi_power_get_inferred_state(device, state);
203 if (result)
204 return result;
205 } else if (device->power.flags.explicit_get) {
206 status = acpi_evaluate_integer(device->handle, "_PSC",
207 NULL, &psc);
208 if (ACPI_FAILURE(status))
209 return -ENODEV;
210 *state = (int)psc;
211 }
212 } else {
213 /* TBD: Non-recursive algorithm for walking up hierarchy. */ 211 /* TBD: Non-recursive algorithm for walking up hierarchy. */
214 *state = device->parent ? 212 *state = device->parent ?
215 device->parent->power.state : ACPI_STATE_D0; 213 device->parent->power.state : ACPI_STATE_D0;
214 goto out;
215 }
216
217 /*
218 * Get the device's power state either directly (via _PSC) or
219 * indirectly (via power resources).
220 */
221 if (device->power.flags.explicit_get) {
222 unsigned long long psc;
223 acpi_status status = acpi_evaluate_integer(device->handle,
224 "_PSC", NULL, &psc);
225 if (ACPI_FAILURE(status))
226 return -ENODEV;
227
228 result = psc;
229 }
230 /* The test below covers ACPI_STATE_UNKNOWN too. */
231 if (result <= ACPI_STATE_D2) {
232 ; /* Do nothing. */
233 } else if (device->power.flags.power_resources) {
234 int error = acpi_power_get_inferred_state(device, &result);
235 if (error)
236 return error;
237 } else if (result == ACPI_STATE_D3_HOT) {
238 result = ACPI_STATE_D3;
216 } 239 }
240 *state = result;
217 241
218 ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Device [%s] power state is D%d\n", 242 out:
219 device->pnp.bus_id, *state)); 243 ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Device [%s] power state is %s\n",
244 device->pnp.bus_id, state_string(*state)));
220 245
221 return 0; 246 return 0;
222} 247}
@@ -234,13 +259,14 @@ static int __acpi_bus_set_power(struct acpi_device *device, int state)
234 /* Make sure this is a valid target state */ 259 /* Make sure this is a valid target state */
235 260
236 if (state == device->power.state) { 261 if (state == device->power.state) {
237 ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Device is already at D%d\n", 262 ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Device is already at %s\n",
238 state)); 263 state_string(state)));
239 return 0; 264 return 0;
240 } 265 }
241 266
242 if (!device->power.states[state].flags.valid) { 267 if (!device->power.states[state].flags.valid) {
243 printk(KERN_WARNING PREFIX "Device does not support D%d\n", state); 268 printk(KERN_WARNING PREFIX "Device does not support %s\n",
269 state_string(state));
244 return -ENODEV; 270 return -ENODEV;
245 } 271 }
246 if (device->parent && (state < device->parent->power.state)) { 272 if (device->parent && (state < device->parent->power.state)) {
@@ -294,13 +320,13 @@ static int __acpi_bus_set_power(struct acpi_device *device, int state)
294 end: 320 end:
295 if (result) 321 if (result)
296 printk(KERN_WARNING PREFIX 322 printk(KERN_WARNING PREFIX
297 "Device [%s] failed to transition to D%d\n", 323 "Device [%s] failed to transition to %s\n",
298 device->pnp.bus_id, state); 324 device->pnp.bus_id, state_string(state));
299 else { 325 else {
300 device->power.state = state; 326 device->power.state = state;
301 ACPI_DEBUG_PRINT((ACPI_DB_INFO, 327 ACPI_DEBUG_PRINT((ACPI_DB_INFO,
302 "Device [%s] transitioned to D%d\n", 328 "Device [%s] transitioned to %s\n",
303 device->pnp.bus_id, state)); 329 device->pnp.bus_id, state_string(state)));
304 } 330 }
305 331
306 return result; 332 return result;
diff --git a/drivers/acpi/power.c b/drivers/acpi/power.c
index 0500f719f63e..dd6d6a3c6780 100644
--- a/drivers/acpi/power.c
+++ b/drivers/acpi/power.c
@@ -631,7 +631,7 @@ int acpi_power_get_inferred_state(struct acpi_device *device, int *state)
631 * We know a device's inferred power state when all the resources 631 * We know a device's inferred power state when all the resources
632 * required for a given D-state are 'on'. 632 * required for a given D-state are 'on'.
633 */ 633 */
634 for (i = ACPI_STATE_D0; i < ACPI_STATE_D3_HOT; i++) { 634 for (i = ACPI_STATE_D0; i <= ACPI_STATE_D3_HOT; i++) {
635 list = &device->power.states[i].resources; 635 list = &device->power.states[i].resources;
636 if (list->count < 1) 636 if (list->count < 1)
637 continue; 637 continue;
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 85cbfdccc97c..c8a1f3b68110 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -1567,6 +1567,7 @@ static int acpi_bus_scan_fixed(void)
1567 ACPI_BUS_TYPE_POWER_BUTTON, 1567 ACPI_BUS_TYPE_POWER_BUTTON,
1568 ACPI_STA_DEFAULT, 1568 ACPI_STA_DEFAULT,
1569 &ops); 1569 &ops);
1570 device_init_wakeup(&device->dev, true);
1570 } 1571 }
1571 1572
1572 if ((acpi_gbl_FADT.flags & ACPI_FADT_SLEEP_BUTTON) == 0) { 1573 if ((acpi_gbl_FADT.flags & ACPI_FADT_SLEEP_BUTTON) == 0) {
diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c
index 74ee4ab577b6..88561029cca8 100644
--- a/drivers/acpi/sleep.c
+++ b/drivers/acpi/sleep.c
@@ -57,6 +57,7 @@ MODULE_PARM_DESC(gts, "Enable evaluation of _GTS on suspend.");
57MODULE_PARM_DESC(bfs, "Enable evaluation of _BFS on resume".); 57MODULE_PARM_DESC(bfs, "Enable evaluation of _BFS on resume".);
58 58
59static u8 sleep_states[ACPI_S_STATE_COUNT]; 59static u8 sleep_states[ACPI_S_STATE_COUNT];
60static bool pwr_btn_event_pending;
60 61
61static void acpi_sleep_tts_switch(u32 acpi_state) 62static void acpi_sleep_tts_switch(u32 acpi_state)
62{ 63{
@@ -184,6 +185,14 @@ static int acpi_pm_prepare(void)
184 return error; 185 return error;
185} 186}
186 187
188static int find_powerf_dev(struct device *dev, void *data)
189{
190 struct acpi_device *device = to_acpi_device(dev);
191 const char *hid = acpi_device_hid(device);
192
193 return !strcmp(hid, ACPI_BUTTON_HID_POWERF);
194}
195
187/** 196/**
188 * acpi_pm_finish - Instruct the platform to leave a sleep state. 197 * acpi_pm_finish - Instruct the platform to leave a sleep state.
189 * 198 *
@@ -192,6 +201,7 @@ static int acpi_pm_prepare(void)
192 */ 201 */
193static void acpi_pm_finish(void) 202static void acpi_pm_finish(void)
194{ 203{
204 struct device *pwr_btn_dev;
195 u32 acpi_state = acpi_target_sleep_state; 205 u32 acpi_state = acpi_target_sleep_state;
196 206
197 acpi_ec_unblock_transactions(); 207 acpi_ec_unblock_transactions();
@@ -209,6 +219,23 @@ static void acpi_pm_finish(void)
209 acpi_set_firmware_waking_vector((acpi_physical_address) 0); 219 acpi_set_firmware_waking_vector((acpi_physical_address) 0);
210 220
211 acpi_target_sleep_state = ACPI_STATE_S0; 221 acpi_target_sleep_state = ACPI_STATE_S0;
222
223 /* If we were woken with the fixed power button, provide a small
224 * hint to userspace in the form of a wakeup event on the fixed power
225 * button device (if it can be found).
226 *
227 * We delay the event generation til now, as the PM layer requires
228 * timekeeping to be running before we generate events. */
229 if (!pwr_btn_event_pending)
230 return;
231
232 pwr_btn_event_pending = false;
233 pwr_btn_dev = bus_find_device(&acpi_bus_type, NULL, NULL,
234 find_powerf_dev);
235 if (pwr_btn_dev) {
236 pm_wakeup_event(pwr_btn_dev, 0);
237 put_device(pwr_btn_dev);
238 }
212} 239}
213 240
214/** 241/**
@@ -298,9 +325,23 @@ static int acpi_suspend_enter(suspend_state_t pm_state)
298 /* ACPI 3.0 specs (P62) says that it's the responsibility 325 /* ACPI 3.0 specs (P62) says that it's the responsibility
299 * of the OSPM to clear the status bit [ implying that the 326 * of the OSPM to clear the status bit [ implying that the
300 * POWER_BUTTON event should not reach userspace ] 327 * POWER_BUTTON event should not reach userspace ]
328 *
329 * However, we do generate a small hint for userspace in the form of
330 * a wakeup event. We flag this condition for now and generate the
331 * event later, as we're currently too early in resume to be able to
332 * generate wakeup events.
301 */ 333 */
302 if (ACPI_SUCCESS(status) && (acpi_state == ACPI_STATE_S3)) 334 if (ACPI_SUCCESS(status) && (acpi_state == ACPI_STATE_S3)) {
303 acpi_clear_event(ACPI_EVENT_POWER_BUTTON); 335 acpi_event_status pwr_btn_status;
336
337 acpi_get_event_status(ACPI_EVENT_POWER_BUTTON, &pwr_btn_status);
338
339 if (pwr_btn_status & ACPI_EVENT_FLAG_SET) {
340 acpi_clear_event(ACPI_EVENT_POWER_BUTTON);
341 /* Flag for later */
342 pwr_btn_event_pending = true;
343 }
344 }
304 345
305 /* 346 /*
306 * Disable and clear GPE status before interrupt is enabled. Some GPEs 347 * Disable and clear GPE status before interrupt is enabled. Some GPEs
@@ -730,8 +771,8 @@ int acpi_pm_device_sleep_state(struct device *dev, int *d_min_p)
730 * can wake the system. _S0W may be valid, too. 771 * can wake the system. _S0W may be valid, too.
731 */ 772 */
732 if (acpi_target_sleep_state == ACPI_STATE_S0 || 773 if (acpi_target_sleep_state == ACPI_STATE_S0 ||
733 (device_may_wakeup(dev) && 774 (device_may_wakeup(dev) && adev->wakeup.flags.valid &&
734 adev->wakeup.sleep_state <= acpi_target_sleep_state)) { 775 adev->wakeup.sleep_state >= acpi_target_sleep_state)) {
735 acpi_status status; 776 acpi_status status;
736 777
737 acpi_method[3] = 'W'; 778 acpi_method[3] = 'W';
diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile
index 8d81a1d32653..dd3e661a124d 100644
--- a/drivers/clocksource/Makefile
+++ b/drivers/clocksource/Makefile
@@ -6,6 +6,7 @@ obj-$(CONFIG_CS5535_CLOCK_EVENT_SRC) += cs5535-clockevt.o
6obj-$(CONFIG_SH_TIMER_CMT) += sh_cmt.o 6obj-$(CONFIG_SH_TIMER_CMT) += sh_cmt.o
7obj-$(CONFIG_SH_TIMER_MTU2) += sh_mtu2.o 7obj-$(CONFIG_SH_TIMER_MTU2) += sh_mtu2.o
8obj-$(CONFIG_SH_TIMER_TMU) += sh_tmu.o 8obj-$(CONFIG_SH_TIMER_TMU) += sh_tmu.o
9obj-$(CONFIG_EM_TIMER_STI) += em_sti.o
9obj-$(CONFIG_CLKBLD_I8253) += i8253.o 10obj-$(CONFIG_CLKBLD_I8253) += i8253.o
10obj-$(CONFIG_CLKSRC_MMIO) += mmio.o 11obj-$(CONFIG_CLKSRC_MMIO) += mmio.o
11obj-$(CONFIG_DW_APB_TIMER) += dw_apb_timer.o 12obj-$(CONFIG_DW_APB_TIMER) += dw_apb_timer.o
diff --git a/drivers/clocksource/em_sti.c b/drivers/clocksource/em_sti.c
new file mode 100644
index 000000000000..372051d1bba8
--- /dev/null
+++ b/drivers/clocksource/em_sti.c
@@ -0,0 +1,406 @@
1/*
2 * Emma Mobile Timer Support - STI
3 *
4 * Copyright (C) 2012 Magnus Damm
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
19
20#include <linux/init.h>
21#include <linux/platform_device.h>
22#include <linux/spinlock.h>
23#include <linux/interrupt.h>
24#include <linux/ioport.h>
25#include <linux/io.h>
26#include <linux/clk.h>
27#include <linux/irq.h>
28#include <linux/err.h>
29#include <linux/delay.h>
30#include <linux/clocksource.h>
31#include <linux/clockchips.h>
32#include <linux/slab.h>
33#include <linux/module.h>
34
35enum { USER_CLOCKSOURCE, USER_CLOCKEVENT, USER_NR };
36
37struct em_sti_priv {
38 void __iomem *base;
39 struct clk *clk;
40 struct platform_device *pdev;
41 unsigned int active[USER_NR];
42 unsigned long rate;
43 raw_spinlock_t lock;
44 struct clock_event_device ced;
45 struct clocksource cs;
46};
47
48#define STI_CONTROL 0x00
49#define STI_COMPA_H 0x10
50#define STI_COMPA_L 0x14
51#define STI_COMPB_H 0x18
52#define STI_COMPB_L 0x1c
53#define STI_COUNT_H 0x20
54#define STI_COUNT_L 0x24
55#define STI_COUNT_RAW_H 0x28
56#define STI_COUNT_RAW_L 0x2c
57#define STI_SET_H 0x30
58#define STI_SET_L 0x34
59#define STI_INTSTATUS 0x40
60#define STI_INTRAWSTATUS 0x44
61#define STI_INTENSET 0x48
62#define STI_INTENCLR 0x4c
63#define STI_INTFFCLR 0x50
64
65static inline unsigned long em_sti_read(struct em_sti_priv *p, int offs)
66{
67 return ioread32(p->base + offs);
68}
69
70static inline void em_sti_write(struct em_sti_priv *p, int offs,
71 unsigned long value)
72{
73 iowrite32(value, p->base + offs);
74}
75
76static int em_sti_enable(struct em_sti_priv *p)
77{
78 int ret;
79
80 /* enable clock */
81 ret = clk_enable(p->clk);
82 if (ret) {
83 dev_err(&p->pdev->dev, "cannot enable clock\n");
84 return ret;
85 }
86
87 /* configure channel, periodic mode and maximum timeout */
88 p->rate = clk_get_rate(p->clk);
89
90 /* reset the counter */
91 em_sti_write(p, STI_SET_H, 0x40000000);
92 em_sti_write(p, STI_SET_L, 0x00000000);
93
94 /* mask and clear pending interrupts */
95 em_sti_write(p, STI_INTENCLR, 3);
96 em_sti_write(p, STI_INTFFCLR, 3);
97
98 /* enable updates of counter registers */
99 em_sti_write(p, STI_CONTROL, 1);
100
101 return 0;
102}
103
104static void em_sti_disable(struct em_sti_priv *p)
105{
106 /* mask interrupts */
107 em_sti_write(p, STI_INTENCLR, 3);
108
109 /* stop clock */
110 clk_disable(p->clk);
111}
112
113static cycle_t em_sti_count(struct em_sti_priv *p)
114{
115 cycle_t ticks;
116 unsigned long flags;
117
118 /* the STI hardware buffers the 48-bit count, but to
119 * break it out into two 32-bit access the registers
120 * must be accessed in a certain order.
121 * Always read STI_COUNT_H before STI_COUNT_L.
122 */
123 raw_spin_lock_irqsave(&p->lock, flags);
124 ticks = (cycle_t)(em_sti_read(p, STI_COUNT_H) & 0xffff) << 32;
125 ticks |= em_sti_read(p, STI_COUNT_L);
126 raw_spin_unlock_irqrestore(&p->lock, flags);
127
128 return ticks;
129}
130
131static cycle_t em_sti_set_next(struct em_sti_priv *p, cycle_t next)
132{
133 unsigned long flags;
134
135 raw_spin_lock_irqsave(&p->lock, flags);
136
137 /* mask compare A interrupt */
138 em_sti_write(p, STI_INTENCLR, 1);
139
140 /* update compare A value */
141 em_sti_write(p, STI_COMPA_H, next >> 32);
142 em_sti_write(p, STI_COMPA_L, next & 0xffffffff);
143
144 /* clear compare A interrupt source */
145 em_sti_write(p, STI_INTFFCLR, 1);
146
147 /* unmask compare A interrupt */
148 em_sti_write(p, STI_INTENSET, 1);
149
150 raw_spin_unlock_irqrestore(&p->lock, flags);
151
152 return next;
153}
154
155static irqreturn_t em_sti_interrupt(int irq, void *dev_id)
156{
157 struct em_sti_priv *p = dev_id;
158
159 p->ced.event_handler(&p->ced);
160 return IRQ_HANDLED;
161}
162
163static int em_sti_start(struct em_sti_priv *p, unsigned int user)
164{
165 unsigned long flags;
166 int used_before;
167 int ret = 0;
168
169 raw_spin_lock_irqsave(&p->lock, flags);
170 used_before = p->active[USER_CLOCKSOURCE] | p->active[USER_CLOCKEVENT];
171 if (!used_before)
172 ret = em_sti_enable(p);
173
174 if (!ret)
175 p->active[user] = 1;
176 raw_spin_unlock_irqrestore(&p->lock, flags);
177
178 return ret;
179}
180
181static void em_sti_stop(struct em_sti_priv *p, unsigned int user)
182{
183 unsigned long flags;
184 int used_before, used_after;
185
186 raw_spin_lock_irqsave(&p->lock, flags);
187 used_before = p->active[USER_CLOCKSOURCE] | p->active[USER_CLOCKEVENT];
188 p->active[user] = 0;
189 used_after = p->active[USER_CLOCKSOURCE] | p->active[USER_CLOCKEVENT];
190
191 if (used_before && !used_after)
192 em_sti_disable(p);
193 raw_spin_unlock_irqrestore(&p->lock, flags);
194}
195
196static struct em_sti_priv *cs_to_em_sti(struct clocksource *cs)
197{
198 return container_of(cs, struct em_sti_priv, cs);
199}
200
201static cycle_t em_sti_clocksource_read(struct clocksource *cs)
202{
203 return em_sti_count(cs_to_em_sti(cs));
204}
205
206static int em_sti_clocksource_enable(struct clocksource *cs)
207{
208 int ret;
209 struct em_sti_priv *p = cs_to_em_sti(cs);
210
211 ret = em_sti_start(p, USER_CLOCKSOURCE);
212 if (!ret)
213 __clocksource_updatefreq_hz(cs, p->rate);
214 return ret;
215}
216
217static void em_sti_clocksource_disable(struct clocksource *cs)
218{
219 em_sti_stop(cs_to_em_sti(cs), USER_CLOCKSOURCE);
220}
221
222static void em_sti_clocksource_resume(struct clocksource *cs)
223{
224 em_sti_clocksource_enable(cs);
225}
226
227static int em_sti_register_clocksource(struct em_sti_priv *p)
228{
229 struct clocksource *cs = &p->cs;
230
231 memset(cs, 0, sizeof(*cs));
232 cs->name = dev_name(&p->pdev->dev);
233 cs->rating = 200;
234 cs->read = em_sti_clocksource_read;
235 cs->enable = em_sti_clocksource_enable;
236 cs->disable = em_sti_clocksource_disable;
237 cs->suspend = em_sti_clocksource_disable;
238 cs->resume = em_sti_clocksource_resume;
239 cs->mask = CLOCKSOURCE_MASK(48);
240 cs->flags = CLOCK_SOURCE_IS_CONTINUOUS;
241
242 dev_info(&p->pdev->dev, "used as clock source\n");
243
244 /* Register with dummy 1 Hz value, gets updated in ->enable() */
245 clocksource_register_hz(cs, 1);
246 return 0;
247}
248
249static struct em_sti_priv *ced_to_em_sti(struct clock_event_device *ced)
250{
251 return container_of(ced, struct em_sti_priv, ced);
252}
253
254static void em_sti_clock_event_mode(enum clock_event_mode mode,
255 struct clock_event_device *ced)
256{
257 struct em_sti_priv *p = ced_to_em_sti(ced);
258
259 /* deal with old setting first */
260 switch (ced->mode) {
261 case CLOCK_EVT_MODE_ONESHOT:
262 em_sti_stop(p, USER_CLOCKEVENT);
263 break;
264 default:
265 break;
266 }
267
268 switch (mode) {
269 case CLOCK_EVT_MODE_ONESHOT:
270 dev_info(&p->pdev->dev, "used for oneshot clock events\n");
271 em_sti_start(p, USER_CLOCKEVENT);
272 clockevents_config(&p->ced, p->rate);
273 break;
274 case CLOCK_EVT_MODE_SHUTDOWN:
275 case CLOCK_EVT_MODE_UNUSED:
276 em_sti_stop(p, USER_CLOCKEVENT);
277 break;
278 default:
279 break;
280 }
281}
282
283static int em_sti_clock_event_next(unsigned long delta,
284 struct clock_event_device *ced)
285{
286 struct em_sti_priv *p = ced_to_em_sti(ced);
287 cycle_t next;
288 int safe;
289
290 next = em_sti_set_next(p, em_sti_count(p) + delta);
291 safe = em_sti_count(p) < (next - 1);
292
293 return !safe;
294}
295
296static void em_sti_register_clockevent(struct em_sti_priv *p)
297{
298 struct clock_event_device *ced = &p->ced;
299
300 memset(ced, 0, sizeof(*ced));
301 ced->name = dev_name(&p->pdev->dev);
302 ced->features = CLOCK_EVT_FEAT_ONESHOT;
303 ced->rating = 200;
304 ced->cpumask = cpumask_of(0);
305 ced->set_next_event = em_sti_clock_event_next;
306 ced->set_mode = em_sti_clock_event_mode;
307
308 dev_info(&p->pdev->dev, "used for clock events\n");
309
310 /* Register with dummy 1 Hz value, gets updated in ->set_mode() */
311 clockevents_config_and_register(ced, 1, 2, 0xffffffff);
312}
313
314static int __devinit em_sti_probe(struct platform_device *pdev)
315{
316 struct em_sti_priv *p;
317 struct resource *res;
318 int irq, ret;
319
320 p = kzalloc(sizeof(*p), GFP_KERNEL);
321 if (p == NULL) {
322 dev_err(&pdev->dev, "failed to allocate driver data\n");
323 ret = -ENOMEM;
324 goto err0;
325 }
326
327 p->pdev = pdev;
328 platform_set_drvdata(pdev, p);
329
330 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
331 if (!res) {
332 dev_err(&pdev->dev, "failed to get I/O memory\n");
333 ret = -EINVAL;
334 goto err0;
335 }
336
337 irq = platform_get_irq(pdev, 0);
338 if (irq < 0) {
339 dev_err(&pdev->dev, "failed to get irq\n");
340 ret = -EINVAL;
341 goto err0;
342 }
343
344 /* map memory, let base point to the STI instance */
345 p->base = ioremap_nocache(res->start, resource_size(res));
346 if (p->base == NULL) {
347 dev_err(&pdev->dev, "failed to remap I/O memory\n");
348 ret = -ENXIO;
349 goto err0;
350 }
351
352 /* get hold of clock */
353 p->clk = clk_get(&pdev->dev, "sclk");
354 if (IS_ERR(p->clk)) {
355 dev_err(&pdev->dev, "cannot get clock\n");
356 ret = PTR_ERR(p->clk);
357 goto err1;
358 }
359
360 if (request_irq(irq, em_sti_interrupt,
361 IRQF_TIMER | IRQF_IRQPOLL | IRQF_NOBALANCING,
362 dev_name(&pdev->dev), p)) {
363 dev_err(&pdev->dev, "failed to request low IRQ\n");
364 ret = -ENOENT;
365 goto err2;
366 }
367
368 raw_spin_lock_init(&p->lock);
369 em_sti_register_clockevent(p);
370 em_sti_register_clocksource(p);
371 return 0;
372
373err2:
374 clk_put(p->clk);
375err1:
376 iounmap(p->base);
377err0:
378 kfree(p);
379 return ret;
380}
381
382static int __devexit em_sti_remove(struct platform_device *pdev)
383{
384 return -EBUSY; /* cannot unregister clockevent and clocksource */
385}
386
387static const struct of_device_id em_sti_dt_ids[] __devinitconst = {
388 { .compatible = "renesas,em-sti", },
389 {},
390};
391MODULE_DEVICE_TABLE(of, em_sti_dt_ids);
392
393static struct platform_driver em_sti_device_driver = {
394 .probe = em_sti_probe,
395 .remove = __devexit_p(em_sti_remove),
396 .driver = {
397 .name = "em_sti",
398 .of_match_table = em_sti_dt_ids,
399 }
400};
401
402module_platform_driver(em_sti_device_driver);
403
404MODULE_AUTHOR("Magnus Damm");
405MODULE_DESCRIPTION("Renesas Emma Mobile STI Timer Driver");
406MODULE_LICENSE("GPL v2");
diff --git a/drivers/gpio/gpio-samsung.c b/drivers/gpio/gpio-samsung.c
index 7bb00448e13d..b6453d0e44ad 100644
--- a/drivers/gpio/gpio-samsung.c
+++ b/drivers/gpio/gpio-samsung.c
@@ -2833,7 +2833,7 @@ static __init void exynos5_gpiolib_init(void)
2833 } 2833 }
2834 2834
2835 /* need to set base address for gpc4 */ 2835 /* need to set base address for gpc4 */
2836 exonys5_gpios_1[11].base = gpio_base1 + 0x2E0; 2836 exynos5_gpios_1[11].base = gpio_base1 + 0x2E0;
2837 2837
2838 /* need to set base address for gpx */ 2838 /* need to set base address for gpx */
2839 chip = &exynos5_gpios_1[21]; 2839 chip = &exynos5_gpios_1[21];
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
index 3df4efa11942..3186522a4458 100644
--- a/drivers/gpu/drm/radeon/ni.c
+++ b/drivers/gpu/drm/radeon/ni.c
@@ -460,15 +460,28 @@ static void cayman_gpu_init(struct radeon_device *rdev)
460 rdev->config.cayman.max_pipes_per_simd = 4; 460 rdev->config.cayman.max_pipes_per_simd = 4;
461 rdev->config.cayman.max_tile_pipes = 2; 461 rdev->config.cayman.max_tile_pipes = 2;
462 if ((rdev->pdev->device == 0x9900) || 462 if ((rdev->pdev->device == 0x9900) ||
463 (rdev->pdev->device == 0x9901)) { 463 (rdev->pdev->device == 0x9901) ||
464 (rdev->pdev->device == 0x9905) ||
465 (rdev->pdev->device == 0x9906) ||
466 (rdev->pdev->device == 0x9907) ||
467 (rdev->pdev->device == 0x9908) ||
468 (rdev->pdev->device == 0x9909) ||
469 (rdev->pdev->device == 0x9910) ||
470 (rdev->pdev->device == 0x9917)) {
464 rdev->config.cayman.max_simds_per_se = 6; 471 rdev->config.cayman.max_simds_per_se = 6;
465 rdev->config.cayman.max_backends_per_se = 2; 472 rdev->config.cayman.max_backends_per_se = 2;
466 } else if ((rdev->pdev->device == 0x9903) || 473 } else if ((rdev->pdev->device == 0x9903) ||
467 (rdev->pdev->device == 0x9904)) { 474 (rdev->pdev->device == 0x9904) ||
475 (rdev->pdev->device == 0x990A) ||
476 (rdev->pdev->device == 0x9913) ||
477 (rdev->pdev->device == 0x9918)) {
468 rdev->config.cayman.max_simds_per_se = 4; 478 rdev->config.cayman.max_simds_per_se = 4;
469 rdev->config.cayman.max_backends_per_se = 2; 479 rdev->config.cayman.max_backends_per_se = 2;
470 } else if ((rdev->pdev->device == 0x9990) || 480 } else if ((rdev->pdev->device == 0x9919) ||
471 (rdev->pdev->device == 0x9991)) { 481 (rdev->pdev->device == 0x9990) ||
482 (rdev->pdev->device == 0x9991) ||
483 (rdev->pdev->device == 0x9994) ||
484 (rdev->pdev->device == 0x99A0)) {
472 rdev->config.cayman.max_simds_per_se = 3; 485 rdev->config.cayman.max_simds_per_se = 3;
473 rdev->config.cayman.max_backends_per_se = 1; 486 rdev->config.cayman.max_backends_per_se = 1;
474 } else { 487 } else {
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index 45cfcea63507..f30dc95f83b1 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -2426,6 +2426,12 @@ int r600_startup(struct radeon_device *rdev)
2426 if (r) 2426 if (r)
2427 return r; 2427 return r;
2428 2428
2429 r = r600_audio_init(rdev);
2430 if (r) {
2431 DRM_ERROR("radeon: audio init failed\n");
2432 return r;
2433 }
2434
2429 return 0; 2435 return 0;
2430} 2436}
2431 2437
@@ -2462,12 +2468,6 @@ int r600_resume(struct radeon_device *rdev)
2462 return r; 2468 return r;
2463 } 2469 }
2464 2470
2465 r = r600_audio_init(rdev);
2466 if (r) {
2467 DRM_ERROR("radeon: audio resume failed\n");
2468 return r;
2469 }
2470
2471 return r; 2471 return r;
2472} 2472}
2473 2473
@@ -2577,9 +2577,6 @@ int r600_init(struct radeon_device *rdev)
2577 rdev->accel_working = false; 2577 rdev->accel_working = false;
2578 } 2578 }
2579 2579
2580 r = r600_audio_init(rdev);
2581 if (r)
2582 return r; /* TODO error handling */
2583 return 0; 2580 return 0;
2584} 2581}
2585 2582
diff --git a/drivers/gpu/drm/radeon/r600_audio.c b/drivers/gpu/drm/radeon/r600_audio.c
index 7c4fa77f018f..7479a5c503e4 100644
--- a/drivers/gpu/drm/radeon/r600_audio.c
+++ b/drivers/gpu/drm/radeon/r600_audio.c
@@ -192,6 +192,7 @@ void r600_audio_set_clock(struct drm_encoder *encoder, int clock)
192 struct radeon_device *rdev = dev->dev_private; 192 struct radeon_device *rdev = dev->dev_private;
193 struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); 193 struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
194 struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv; 194 struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv;
195 struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
195 int base_rate = 48000; 196 int base_rate = 48000;
196 197
197 switch (radeon_encoder->encoder_id) { 198 switch (radeon_encoder->encoder_id) {
@@ -217,8 +218,8 @@ void r600_audio_set_clock(struct drm_encoder *encoder, int clock)
217 WREG32(EVERGREEN_AUDIO_PLL1_DIV, clock * 10); 218 WREG32(EVERGREEN_AUDIO_PLL1_DIV, clock * 10);
218 WREG32(EVERGREEN_AUDIO_PLL1_UNK, 0x00000071); 219 WREG32(EVERGREEN_AUDIO_PLL1_UNK, 0x00000071);
219 220
220 /* Some magic trigger or src sel? */ 221 /* Select DTO source */
221 WREG32_P(0x5ac, 0x01, ~0x77); 222 WREG32(0x5ac, radeon_crtc->crtc_id);
222 } else { 223 } else {
223 switch (dig->dig_encoder) { 224 switch (dig->dig_encoder) {
224 case 0: 225 case 0:
diff --git a/drivers/gpu/drm/radeon/r600_hdmi.c b/drivers/gpu/drm/radeon/r600_hdmi.c
index 226379e00ac1..969c27529dfe 100644
--- a/drivers/gpu/drm/radeon/r600_hdmi.c
+++ b/drivers/gpu/drm/radeon/r600_hdmi.c
@@ -348,7 +348,6 @@ void r600_hdmi_setmode(struct drm_encoder *encoder, struct drm_display_mode *mod
348 WREG32(HDMI0_AUDIO_PACKET_CONTROL + offset, 348 WREG32(HDMI0_AUDIO_PACKET_CONTROL + offset,
349 HDMI0_AUDIO_SAMPLE_SEND | /* send audio packets */ 349 HDMI0_AUDIO_SAMPLE_SEND | /* send audio packets */
350 HDMI0_AUDIO_DELAY_EN(1) | /* default audio delay */ 350 HDMI0_AUDIO_DELAY_EN(1) | /* default audio delay */
351 HDMI0_AUDIO_SEND_MAX_PACKETS | /* send NULL packets if no audio is available */
352 HDMI0_AUDIO_PACKETS_PER_LINE(3) | /* should be suffient for all audio modes and small enough for all hblanks */ 351 HDMI0_AUDIO_PACKETS_PER_LINE(3) | /* should be suffient for all audio modes and small enough for all hblanks */
353 HDMI0_60958_CS_UPDATE); /* allow 60958 channel status fields to be updated */ 352 HDMI0_60958_CS_UPDATE); /* allow 60958 channel status fields to be updated */
354 } 353 }
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 85dac33e3cce..fefcca55c1eb 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -1374,9 +1374,9 @@ struct cayman_asic {
1374 1374
1375struct si_asic { 1375struct si_asic {
1376 unsigned max_shader_engines; 1376 unsigned max_shader_engines;
1377 unsigned max_pipes_per_simd;
1378 unsigned max_tile_pipes; 1377 unsigned max_tile_pipes;
1379 unsigned max_simds_per_se; 1378 unsigned max_cu_per_sh;
1379 unsigned max_sh_per_se;
1380 unsigned max_backends_per_se; 1380 unsigned max_backends_per_se;
1381 unsigned max_texture_channel_caches; 1381 unsigned max_texture_channel_caches;
1382 unsigned max_gprs; 1382 unsigned max_gprs;
@@ -1387,7 +1387,6 @@ struct si_asic {
1387 unsigned sc_hiz_tile_fifo_size; 1387 unsigned sc_hiz_tile_fifo_size;
1388 unsigned sc_earlyz_tile_fifo_size; 1388 unsigned sc_earlyz_tile_fifo_size;
1389 1389
1390 unsigned num_shader_engines;
1391 unsigned num_tile_pipes; 1390 unsigned num_tile_pipes;
1392 unsigned num_backends_per_se; 1391 unsigned num_backends_per_se;
1393 unsigned backend_disable_mask_per_asic; 1392 unsigned backend_disable_mask_per_asic;
diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c
index 79db56e6c2ac..59d44937dd9f 100644
--- a/drivers/gpu/drm/radeon/radeon_gart.c
+++ b/drivers/gpu/drm/radeon/radeon_gart.c
@@ -476,12 +476,18 @@ int radeon_vm_bo_add(struct radeon_device *rdev,
476 476
477 mutex_lock(&vm->mutex); 477 mutex_lock(&vm->mutex);
478 if (last_pfn > vm->last_pfn) { 478 if (last_pfn > vm->last_pfn) {
479 /* grow va space 32M by 32M */ 479 /* release mutex and lock in right order */
480 unsigned align = ((32 << 20) >> 12) - 1; 480 mutex_unlock(&vm->mutex);
481 radeon_mutex_lock(&rdev->cs_mutex); 481 radeon_mutex_lock(&rdev->cs_mutex);
482 radeon_vm_unbind_locked(rdev, vm); 482 mutex_lock(&vm->mutex);
483 /* and check again */
484 if (last_pfn > vm->last_pfn) {
485 /* grow va space 32M by 32M */
486 unsigned align = ((32 << 20) >> 12) - 1;
487 radeon_vm_unbind_locked(rdev, vm);
488 vm->last_pfn = (last_pfn + align) & ~align;
489 }
483 radeon_mutex_unlock(&rdev->cs_mutex); 490 radeon_mutex_unlock(&rdev->cs_mutex);
484 vm->last_pfn = (last_pfn + align) & ~align;
485 } 491 }
486 head = &vm->va; 492 head = &vm->va;
487 last_offset = 0; 493 last_offset = 0;
@@ -595,8 +601,8 @@ int radeon_vm_bo_rmv(struct radeon_device *rdev,
595 if (bo_va == NULL) 601 if (bo_va == NULL)
596 return 0; 602 return 0;
597 603
598 mutex_lock(&vm->mutex);
599 radeon_mutex_lock(&rdev->cs_mutex); 604 radeon_mutex_lock(&rdev->cs_mutex);
605 mutex_lock(&vm->mutex);
600 radeon_vm_bo_update_pte(rdev, vm, bo, NULL); 606 radeon_vm_bo_update_pte(rdev, vm, bo, NULL);
601 radeon_mutex_unlock(&rdev->cs_mutex); 607 radeon_mutex_unlock(&rdev->cs_mutex);
602 list_del(&bo_va->vm_list); 608 list_del(&bo_va->vm_list);
@@ -641,9 +647,8 @@ void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm)
641 struct radeon_bo_va *bo_va, *tmp; 647 struct radeon_bo_va *bo_va, *tmp;
642 int r; 648 int r;
643 649
644 mutex_lock(&vm->mutex);
645
646 radeon_mutex_lock(&rdev->cs_mutex); 650 radeon_mutex_lock(&rdev->cs_mutex);
651 mutex_lock(&vm->mutex);
647 radeon_vm_unbind_locked(rdev, vm); 652 radeon_vm_unbind_locked(rdev, vm);
648 radeon_mutex_unlock(&rdev->cs_mutex); 653 radeon_mutex_unlock(&rdev->cs_mutex);
649 654
diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c
index f1016a5820d1..5c58d7d90cb2 100644
--- a/drivers/gpu/drm/radeon/radeon_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_kms.c
@@ -273,7 +273,7 @@ int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
273 break; 273 break;
274 case RADEON_INFO_MAX_PIPES: 274 case RADEON_INFO_MAX_PIPES:
275 if (rdev->family >= CHIP_TAHITI) 275 if (rdev->family >= CHIP_TAHITI)
276 value = rdev->config.si.max_pipes_per_simd; 276 value = rdev->config.si.max_cu_per_sh;
277 else if (rdev->family >= CHIP_CAYMAN) 277 else if (rdev->family >= CHIP_CAYMAN)
278 value = rdev->config.cayman.max_pipes_per_simd; 278 value = rdev->config.cayman.max_pipes_per_simd;
279 else if (rdev->family >= CHIP_CEDAR) 279 else if (rdev->family >= CHIP_CEDAR)
diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c
index 25f9eef12c42..e95c5e61d4e2 100644
--- a/drivers/gpu/drm/radeon/rs600.c
+++ b/drivers/gpu/drm/radeon/rs600.c
@@ -908,12 +908,6 @@ static int rs600_startup(struct radeon_device *rdev)
908 return r; 908 return r;
909 } 909 }
910 910
911 r = r600_audio_init(rdev);
912 if (r) {
913 dev_err(rdev->dev, "failed initializing audio\n");
914 return r;
915 }
916
917 r = radeon_ib_pool_start(rdev); 911 r = radeon_ib_pool_start(rdev);
918 if (r) 912 if (r)
919 return r; 913 return r;
@@ -922,6 +916,12 @@ static int rs600_startup(struct radeon_device *rdev)
922 if (r) 916 if (r)
923 return r; 917 return r;
924 918
919 r = r600_audio_init(rdev);
920 if (r) {
921 dev_err(rdev->dev, "failed initializing audio\n");
922 return r;
923 }
924
925 return 0; 925 return 0;
926} 926}
927 927
diff --git a/drivers/gpu/drm/radeon/rs690.c b/drivers/gpu/drm/radeon/rs690.c
index 3277ddecfe9f..159b6a43fda0 100644
--- a/drivers/gpu/drm/radeon/rs690.c
+++ b/drivers/gpu/drm/radeon/rs690.c
@@ -637,12 +637,6 @@ static int rs690_startup(struct radeon_device *rdev)
637 return r; 637 return r;
638 } 638 }
639 639
640 r = r600_audio_init(rdev);
641 if (r) {
642 dev_err(rdev->dev, "failed initializing audio\n");
643 return r;
644 }
645
646 r = radeon_ib_pool_start(rdev); 640 r = radeon_ib_pool_start(rdev);
647 if (r) 641 if (r)
648 return r; 642 return r;
@@ -651,6 +645,12 @@ static int rs690_startup(struct radeon_device *rdev)
651 if (r) 645 if (r)
652 return r; 646 return r;
653 647
648 r = r600_audio_init(rdev);
649 if (r) {
650 dev_err(rdev->dev, "failed initializing audio\n");
651 return r;
652 }
653
654 return 0; 654 return 0;
655} 655}
656 656
diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c
index 04ddc365a908..4ad0281fdc37 100644
--- a/drivers/gpu/drm/radeon/rv770.c
+++ b/drivers/gpu/drm/radeon/rv770.c
@@ -956,6 +956,12 @@ static int rv770_startup(struct radeon_device *rdev)
956 if (r) 956 if (r)
957 return r; 957 return r;
958 958
959 r = r600_audio_init(rdev);
960 if (r) {
961 DRM_ERROR("radeon: audio init failed\n");
962 return r;
963 }
964
959 return 0; 965 return 0;
960} 966}
961 967
@@ -978,12 +984,6 @@ int rv770_resume(struct radeon_device *rdev)
978 return r; 984 return r;
979 } 985 }
980 986
981 r = r600_audio_init(rdev);
982 if (r) {
983 dev_err(rdev->dev, "radeon: audio init failed\n");
984 return r;
985 }
986
987 return r; 987 return r;
988 988
989} 989}
@@ -1092,12 +1092,6 @@ int rv770_init(struct radeon_device *rdev)
1092 rdev->accel_working = false; 1092 rdev->accel_working = false;
1093 } 1093 }
1094 1094
1095 r = r600_audio_init(rdev);
1096 if (r) {
1097 dev_err(rdev->dev, "radeon: audio init failed\n");
1098 return r;
1099 }
1100
1101 return 0; 1095 return 0;
1102} 1096}
1103 1097
diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c
index 549732e56ca9..c7b61f16ecfd 100644
--- a/drivers/gpu/drm/radeon/si.c
+++ b/drivers/gpu/drm/radeon/si.c
@@ -867,200 +867,6 @@ void dce6_bandwidth_update(struct radeon_device *rdev)
867/* 867/*
868 * Core functions 868 * Core functions
869 */ 869 */
870static u32 si_get_tile_pipe_to_backend_map(struct radeon_device *rdev,
871 u32 num_tile_pipes,
872 u32 num_backends_per_asic,
873 u32 *backend_disable_mask_per_asic,
874 u32 num_shader_engines)
875{
876 u32 backend_map = 0;
877 u32 enabled_backends_mask = 0;
878 u32 enabled_backends_count = 0;
879 u32 num_backends_per_se;
880 u32 cur_pipe;
881 u32 swizzle_pipe[SI_MAX_PIPES];
882 u32 cur_backend = 0;
883 u32 i;
884 bool force_no_swizzle;
885
886 /* force legal values */
887 if (num_tile_pipes < 1)
888 num_tile_pipes = 1;
889 if (num_tile_pipes > rdev->config.si.max_tile_pipes)
890 num_tile_pipes = rdev->config.si.max_tile_pipes;
891 if (num_shader_engines < 1)
892 num_shader_engines = 1;
893 if (num_shader_engines > rdev->config.si.max_shader_engines)
894 num_shader_engines = rdev->config.si.max_shader_engines;
895 if (num_backends_per_asic < num_shader_engines)
896 num_backends_per_asic = num_shader_engines;
897 if (num_backends_per_asic > (rdev->config.si.max_backends_per_se * num_shader_engines))
898 num_backends_per_asic = rdev->config.si.max_backends_per_se * num_shader_engines;
899
900 /* make sure we have the same number of backends per se */
901 num_backends_per_asic = ALIGN(num_backends_per_asic, num_shader_engines);
902 /* set up the number of backends per se */
903 num_backends_per_se = num_backends_per_asic / num_shader_engines;
904 if (num_backends_per_se > rdev->config.si.max_backends_per_se) {
905 num_backends_per_se = rdev->config.si.max_backends_per_se;
906 num_backends_per_asic = num_backends_per_se * num_shader_engines;
907 }
908
909 /* create enable mask and count for enabled backends */
910 for (i = 0; i < SI_MAX_BACKENDS; ++i) {
911 if (((*backend_disable_mask_per_asic >> i) & 1) == 0) {
912 enabled_backends_mask |= (1 << i);
913 ++enabled_backends_count;
914 }
915 if (enabled_backends_count == num_backends_per_asic)
916 break;
917 }
918
919 /* force the backends mask to match the current number of backends */
920 if (enabled_backends_count != num_backends_per_asic) {
921 u32 this_backend_enabled;
922 u32 shader_engine;
923 u32 backend_per_se;
924
925 enabled_backends_mask = 0;
926 enabled_backends_count = 0;
927 *backend_disable_mask_per_asic = SI_MAX_BACKENDS_MASK;
928 for (i = 0; i < SI_MAX_BACKENDS; ++i) {
929 /* calc the current se */
930 shader_engine = i / rdev->config.si.max_backends_per_se;
931 /* calc the backend per se */
932 backend_per_se = i % rdev->config.si.max_backends_per_se;
933 /* default to not enabled */
934 this_backend_enabled = 0;
935 if ((shader_engine < num_shader_engines) &&
936 (backend_per_se < num_backends_per_se))
937 this_backend_enabled = 1;
938 if (this_backend_enabled) {
939 enabled_backends_mask |= (1 << i);
940 *backend_disable_mask_per_asic &= ~(1 << i);
941 ++enabled_backends_count;
942 }
943 }
944 }
945
946
947 memset((uint8_t *)&swizzle_pipe[0], 0, sizeof(u32) * SI_MAX_PIPES);
948 switch (rdev->family) {
949 case CHIP_TAHITI:
950 case CHIP_PITCAIRN:
951 case CHIP_VERDE:
952 force_no_swizzle = true;
953 break;
954 default:
955 force_no_swizzle = false;
956 break;
957 }
958 if (force_no_swizzle) {
959 bool last_backend_enabled = false;
960
961 force_no_swizzle = false;
962 for (i = 0; i < SI_MAX_BACKENDS; ++i) {
963 if (((enabled_backends_mask >> i) & 1) == 1) {
964 if (last_backend_enabled)
965 force_no_swizzle = true;
966 last_backend_enabled = true;
967 } else
968 last_backend_enabled = false;
969 }
970 }
971
972 switch (num_tile_pipes) {
973 case 1:
974 case 3:
975 case 5:
976 case 7:
977 DRM_ERROR("odd number of pipes!\n");
978 break;
979 case 2:
980 swizzle_pipe[0] = 0;
981 swizzle_pipe[1] = 1;
982 break;
983 case 4:
984 if (force_no_swizzle) {
985 swizzle_pipe[0] = 0;
986 swizzle_pipe[1] = 1;
987 swizzle_pipe[2] = 2;
988 swizzle_pipe[3] = 3;
989 } else {
990 swizzle_pipe[0] = 0;
991 swizzle_pipe[1] = 2;
992 swizzle_pipe[2] = 1;
993 swizzle_pipe[3] = 3;
994 }
995 break;
996 case 6:
997 if (force_no_swizzle) {
998 swizzle_pipe[0] = 0;
999 swizzle_pipe[1] = 1;
1000 swizzle_pipe[2] = 2;
1001 swizzle_pipe[3] = 3;
1002 swizzle_pipe[4] = 4;
1003 swizzle_pipe[5] = 5;
1004 } else {
1005 swizzle_pipe[0] = 0;
1006 swizzle_pipe[1] = 2;
1007 swizzle_pipe[2] = 4;
1008 swizzle_pipe[3] = 1;
1009 swizzle_pipe[4] = 3;
1010 swizzle_pipe[5] = 5;
1011 }
1012 break;
1013 case 8:
1014 if (force_no_swizzle) {
1015 swizzle_pipe[0] = 0;
1016 swizzle_pipe[1] = 1;
1017 swizzle_pipe[2] = 2;
1018 swizzle_pipe[3] = 3;
1019 swizzle_pipe[4] = 4;
1020 swizzle_pipe[5] = 5;
1021 swizzle_pipe[6] = 6;
1022 swizzle_pipe[7] = 7;
1023 } else {
1024 swizzle_pipe[0] = 0;
1025 swizzle_pipe[1] = 2;
1026 swizzle_pipe[2] = 4;
1027 swizzle_pipe[3] = 6;
1028 swizzle_pipe[4] = 1;
1029 swizzle_pipe[5] = 3;
1030 swizzle_pipe[6] = 5;
1031 swizzle_pipe[7] = 7;
1032 }
1033 break;
1034 }
1035
1036 for (cur_pipe = 0; cur_pipe < num_tile_pipes; ++cur_pipe) {
1037 while (((1 << cur_backend) & enabled_backends_mask) == 0)
1038 cur_backend = (cur_backend + 1) % SI_MAX_BACKENDS;
1039
1040 backend_map |= (((cur_backend & 0xf) << (swizzle_pipe[cur_pipe] * 4)));
1041
1042 cur_backend = (cur_backend + 1) % SI_MAX_BACKENDS;
1043 }
1044
1045 return backend_map;
1046}
1047
1048static u32 si_get_disable_mask_per_asic(struct radeon_device *rdev,
1049 u32 disable_mask_per_se,
1050 u32 max_disable_mask_per_se,
1051 u32 num_shader_engines)
1052{
1053 u32 disable_field_width_per_se = r600_count_pipe_bits(disable_mask_per_se);
1054 u32 disable_mask_per_asic = disable_mask_per_se & max_disable_mask_per_se;
1055
1056 if (num_shader_engines == 1)
1057 return disable_mask_per_asic;
1058 else if (num_shader_engines == 2)
1059 return disable_mask_per_asic | (disable_mask_per_asic << disable_field_width_per_se);
1060 else
1061 return 0xffffffff;
1062}
1063
1064static void si_tiling_mode_table_init(struct radeon_device *rdev) 870static void si_tiling_mode_table_init(struct radeon_device *rdev)
1065{ 871{
1066 const u32 num_tile_mode_states = 32; 872 const u32 num_tile_mode_states = 32;
@@ -1562,18 +1368,151 @@ static void si_tiling_mode_table_init(struct radeon_device *rdev)
1562 DRM_ERROR("unknown asic: 0x%x\n", rdev->family); 1368 DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
1563} 1369}
1564 1370
1371static void si_select_se_sh(struct radeon_device *rdev,
1372 u32 se_num, u32 sh_num)
1373{
1374 u32 data = INSTANCE_BROADCAST_WRITES;
1375
1376 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
1377 data = SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
1378 else if (se_num == 0xffffffff)
1379 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
1380 else if (sh_num == 0xffffffff)
1381 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
1382 else
1383 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
1384 WREG32(GRBM_GFX_INDEX, data);
1385}
1386
1387static u32 si_create_bitmask(u32 bit_width)
1388{
1389 u32 i, mask = 0;
1390
1391 for (i = 0; i < bit_width; i++) {
1392 mask <<= 1;
1393 mask |= 1;
1394 }
1395 return mask;
1396}
1397
1398static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
1399{
1400 u32 data, mask;
1401
1402 data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
1403 if (data & 1)
1404 data &= INACTIVE_CUS_MASK;
1405 else
1406 data = 0;
1407 data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
1408
1409 data >>= INACTIVE_CUS_SHIFT;
1410
1411 mask = si_create_bitmask(cu_per_sh);
1412
1413 return ~data & mask;
1414}
1415
1416static void si_setup_spi(struct radeon_device *rdev,
1417 u32 se_num, u32 sh_per_se,
1418 u32 cu_per_sh)
1419{
1420 int i, j, k;
1421 u32 data, mask, active_cu;
1422
1423 for (i = 0; i < se_num; i++) {
1424 for (j = 0; j < sh_per_se; j++) {
1425 si_select_se_sh(rdev, i, j);
1426 data = RREG32(SPI_STATIC_THREAD_MGMT_3);
1427 active_cu = si_get_cu_enabled(rdev, cu_per_sh);
1428
1429 mask = 1;
1430 for (k = 0; k < 16; k++) {
1431 mask <<= k;
1432 if (active_cu & mask) {
1433 data &= ~mask;
1434 WREG32(SPI_STATIC_THREAD_MGMT_3, data);
1435 break;
1436 }
1437 }
1438 }
1439 }
1440 si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1441}
1442
1443static u32 si_get_rb_disabled(struct radeon_device *rdev,
1444 u32 max_rb_num, u32 se_num,
1445 u32 sh_per_se)
1446{
1447 u32 data, mask;
1448
1449 data = RREG32(CC_RB_BACKEND_DISABLE);
1450 if (data & 1)
1451 data &= BACKEND_DISABLE_MASK;
1452 else
1453 data = 0;
1454 data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
1455
1456 data >>= BACKEND_DISABLE_SHIFT;
1457
1458 mask = si_create_bitmask(max_rb_num / se_num / sh_per_se);
1459
1460 return data & mask;
1461}
1462
1463static void si_setup_rb(struct radeon_device *rdev,
1464 u32 se_num, u32 sh_per_se,
1465 u32 max_rb_num)
1466{
1467 int i, j;
1468 u32 data, mask;
1469 u32 disabled_rbs = 0;
1470 u32 enabled_rbs = 0;
1471
1472 for (i = 0; i < se_num; i++) {
1473 for (j = 0; j < sh_per_se; j++) {
1474 si_select_se_sh(rdev, i, j);
1475 data = si_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
1476 disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
1477 }
1478 }
1479 si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1480
1481 mask = 1;
1482 for (i = 0; i < max_rb_num; i++) {
1483 if (!(disabled_rbs & mask))
1484 enabled_rbs |= mask;
1485 mask <<= 1;
1486 }
1487
1488 for (i = 0; i < se_num; i++) {
1489 si_select_se_sh(rdev, i, 0xffffffff);
1490 data = 0;
1491 for (j = 0; j < sh_per_se; j++) {
1492 switch (enabled_rbs & 3) {
1493 case 1:
1494 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
1495 break;
1496 case 2:
1497 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
1498 break;
1499 case 3:
1500 default:
1501 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
1502 break;
1503 }
1504 enabled_rbs >>= 2;
1505 }
1506 WREG32(PA_SC_RASTER_CONFIG, data);
1507 }
1508 si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1509}
1510
1565static void si_gpu_init(struct radeon_device *rdev) 1511static void si_gpu_init(struct radeon_device *rdev)
1566{ 1512{
1567 u32 cc_rb_backend_disable = 0;
1568 u32 cc_gc_shader_array_config;
1569 u32 gb_addr_config = 0; 1513 u32 gb_addr_config = 0;
1570 u32 mc_shared_chmap, mc_arb_ramcfg; 1514 u32 mc_shared_chmap, mc_arb_ramcfg;
1571 u32 gb_backend_map;
1572 u32 cgts_tcc_disable;
1573 u32 sx_debug_1; 1515 u32 sx_debug_1;
1574 u32 gc_user_shader_array_config;
1575 u32 gc_user_rb_backend_disable;
1576 u32 cgts_user_tcc_disable;
1577 u32 hdp_host_path_cntl; 1516 u32 hdp_host_path_cntl;
1578 u32 tmp; 1517 u32 tmp;
1579 int i, j; 1518 int i, j;
@@ -1581,9 +1520,9 @@ static void si_gpu_init(struct radeon_device *rdev)
1581 switch (rdev->family) { 1520 switch (rdev->family) {
1582 case CHIP_TAHITI: 1521 case CHIP_TAHITI:
1583 rdev->config.si.max_shader_engines = 2; 1522 rdev->config.si.max_shader_engines = 2;
1584 rdev->config.si.max_pipes_per_simd = 4;
1585 rdev->config.si.max_tile_pipes = 12; 1523 rdev->config.si.max_tile_pipes = 12;
1586 rdev->config.si.max_simds_per_se = 8; 1524 rdev->config.si.max_cu_per_sh = 8;
1525 rdev->config.si.max_sh_per_se = 2;
1587 rdev->config.si.max_backends_per_se = 4; 1526 rdev->config.si.max_backends_per_se = 4;
1588 rdev->config.si.max_texture_channel_caches = 12; 1527 rdev->config.si.max_texture_channel_caches = 12;
1589 rdev->config.si.max_gprs = 256; 1528 rdev->config.si.max_gprs = 256;
@@ -1594,12 +1533,13 @@ static void si_gpu_init(struct radeon_device *rdev)
1594 rdev->config.si.sc_prim_fifo_size_backend = 0x100; 1533 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
1595 rdev->config.si.sc_hiz_tile_fifo_size = 0x30; 1534 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
1596 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130; 1535 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
1536 gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
1597 break; 1537 break;
1598 case CHIP_PITCAIRN: 1538 case CHIP_PITCAIRN:
1599 rdev->config.si.max_shader_engines = 2; 1539 rdev->config.si.max_shader_engines = 2;
1600 rdev->config.si.max_pipes_per_simd = 4;
1601 rdev->config.si.max_tile_pipes = 8; 1540 rdev->config.si.max_tile_pipes = 8;
1602 rdev->config.si.max_simds_per_se = 5; 1541 rdev->config.si.max_cu_per_sh = 5;
1542 rdev->config.si.max_sh_per_se = 2;
1603 rdev->config.si.max_backends_per_se = 4; 1543 rdev->config.si.max_backends_per_se = 4;
1604 rdev->config.si.max_texture_channel_caches = 8; 1544 rdev->config.si.max_texture_channel_caches = 8;
1605 rdev->config.si.max_gprs = 256; 1545 rdev->config.si.max_gprs = 256;
@@ -1610,13 +1550,14 @@ static void si_gpu_init(struct radeon_device *rdev)
1610 rdev->config.si.sc_prim_fifo_size_backend = 0x100; 1550 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
1611 rdev->config.si.sc_hiz_tile_fifo_size = 0x30; 1551 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
1612 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130; 1552 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
1553 gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
1613 break; 1554 break;
1614 case CHIP_VERDE: 1555 case CHIP_VERDE:
1615 default: 1556 default:
1616 rdev->config.si.max_shader_engines = 1; 1557 rdev->config.si.max_shader_engines = 1;
1617 rdev->config.si.max_pipes_per_simd = 4;
1618 rdev->config.si.max_tile_pipes = 4; 1558 rdev->config.si.max_tile_pipes = 4;
1619 rdev->config.si.max_simds_per_se = 2; 1559 rdev->config.si.max_cu_per_sh = 2;
1560 rdev->config.si.max_sh_per_se = 2;
1620 rdev->config.si.max_backends_per_se = 4; 1561 rdev->config.si.max_backends_per_se = 4;
1621 rdev->config.si.max_texture_channel_caches = 4; 1562 rdev->config.si.max_texture_channel_caches = 4;
1622 rdev->config.si.max_gprs = 256; 1563 rdev->config.si.max_gprs = 256;
@@ -1627,6 +1568,7 @@ static void si_gpu_init(struct radeon_device *rdev)
1627 rdev->config.si.sc_prim_fifo_size_backend = 0x40; 1568 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
1628 rdev->config.si.sc_hiz_tile_fifo_size = 0x30; 1569 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
1629 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130; 1570 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
1571 gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
1630 break; 1572 break;
1631 } 1573 }
1632 1574
@@ -1648,31 +1590,7 @@ static void si_gpu_init(struct radeon_device *rdev)
1648 mc_shared_chmap = RREG32(MC_SHARED_CHMAP); 1590 mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
1649 mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG); 1591 mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
1650 1592
1651 cc_rb_backend_disable = RREG32(CC_RB_BACKEND_DISABLE);
1652 cc_gc_shader_array_config = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
1653 cgts_tcc_disable = 0xffff0000;
1654 for (i = 0; i < rdev->config.si.max_texture_channel_caches; i++)
1655 cgts_tcc_disable &= ~(1 << (16 + i));
1656 gc_user_rb_backend_disable = RREG32(GC_USER_RB_BACKEND_DISABLE);
1657 gc_user_shader_array_config = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
1658 cgts_user_tcc_disable = RREG32(CGTS_USER_TCC_DISABLE);
1659
1660 rdev->config.si.num_shader_engines = rdev->config.si.max_shader_engines;
1661 rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes; 1593 rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
1662 tmp = ((~gc_user_rb_backend_disable) & BACKEND_DISABLE_MASK) >> BACKEND_DISABLE_SHIFT;
1663 rdev->config.si.num_backends_per_se = r600_count_pipe_bits(tmp);
1664 tmp = (gc_user_rb_backend_disable & BACKEND_DISABLE_MASK) >> BACKEND_DISABLE_SHIFT;
1665 rdev->config.si.backend_disable_mask_per_asic =
1666 si_get_disable_mask_per_asic(rdev, tmp, SI_MAX_BACKENDS_PER_SE_MASK,
1667 rdev->config.si.num_shader_engines);
1668 rdev->config.si.backend_map =
1669 si_get_tile_pipe_to_backend_map(rdev, rdev->config.si.num_tile_pipes,
1670 rdev->config.si.num_backends_per_se *
1671 rdev->config.si.num_shader_engines,
1672 &rdev->config.si.backend_disable_mask_per_asic,
1673 rdev->config.si.num_shader_engines);
1674 tmp = ((~cgts_user_tcc_disable) & TCC_DISABLE_MASK) >> TCC_DISABLE_SHIFT;
1675 rdev->config.si.num_texture_channel_caches = r600_count_pipe_bits(tmp);
1676 rdev->config.si.mem_max_burst_length_bytes = 256; 1594 rdev->config.si.mem_max_burst_length_bytes = 256;
1677 tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT; 1595 tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
1678 rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024; 1596 rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
@@ -1683,55 +1601,8 @@ static void si_gpu_init(struct radeon_device *rdev)
1683 rdev->config.si.num_gpus = 1; 1601 rdev->config.si.num_gpus = 1;
1684 rdev->config.si.multi_gpu_tile_size = 64; 1602 rdev->config.si.multi_gpu_tile_size = 64;
1685 1603
1686 gb_addr_config = 0; 1604 /* fix up row size */
1687 switch (rdev->config.si.num_tile_pipes) { 1605 gb_addr_config &= ~ROW_SIZE_MASK;
1688 case 1:
1689 gb_addr_config |= NUM_PIPES(0);
1690 break;
1691 case 2:
1692 gb_addr_config |= NUM_PIPES(1);
1693 break;
1694 case 4:
1695 gb_addr_config |= NUM_PIPES(2);
1696 break;
1697 case 8:
1698 default:
1699 gb_addr_config |= NUM_PIPES(3);
1700 break;
1701 }
1702
1703 tmp = (rdev->config.si.mem_max_burst_length_bytes / 256) - 1;
1704 gb_addr_config |= PIPE_INTERLEAVE_SIZE(tmp);
1705 gb_addr_config |= NUM_SHADER_ENGINES(rdev->config.si.num_shader_engines - 1);
1706 tmp = (rdev->config.si.shader_engine_tile_size / 16) - 1;
1707 gb_addr_config |= SHADER_ENGINE_TILE_SIZE(tmp);
1708 switch (rdev->config.si.num_gpus) {
1709 case 1:
1710 default:
1711 gb_addr_config |= NUM_GPUS(0);
1712 break;
1713 case 2:
1714 gb_addr_config |= NUM_GPUS(1);
1715 break;
1716 case 4:
1717 gb_addr_config |= NUM_GPUS(2);
1718 break;
1719 }
1720 switch (rdev->config.si.multi_gpu_tile_size) {
1721 case 16:
1722 gb_addr_config |= MULTI_GPU_TILE_SIZE(0);
1723 break;
1724 case 32:
1725 default:
1726 gb_addr_config |= MULTI_GPU_TILE_SIZE(1);
1727 break;
1728 case 64:
1729 gb_addr_config |= MULTI_GPU_TILE_SIZE(2);
1730 break;
1731 case 128:
1732 gb_addr_config |= MULTI_GPU_TILE_SIZE(3);
1733 break;
1734 }
1735 switch (rdev->config.si.mem_row_size_in_kb) { 1606 switch (rdev->config.si.mem_row_size_in_kb) {
1736 case 1: 1607 case 1:
1737 default: 1608 default:
@@ -1745,26 +1616,6 @@ static void si_gpu_init(struct radeon_device *rdev)
1745 break; 1616 break;
1746 } 1617 }
1747 1618
1748 tmp = (gb_addr_config & NUM_PIPES_MASK) >> NUM_PIPES_SHIFT;
1749 rdev->config.si.num_tile_pipes = (1 << tmp);
1750 tmp = (gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT;
1751 rdev->config.si.mem_max_burst_length_bytes = (tmp + 1) * 256;
1752 tmp = (gb_addr_config & NUM_SHADER_ENGINES_MASK) >> NUM_SHADER_ENGINES_SHIFT;
1753 rdev->config.si.num_shader_engines = tmp + 1;
1754 tmp = (gb_addr_config & NUM_GPUS_MASK) >> NUM_GPUS_SHIFT;
1755 rdev->config.si.num_gpus = tmp + 1;
1756 tmp = (gb_addr_config & MULTI_GPU_TILE_SIZE_MASK) >> MULTI_GPU_TILE_SIZE_SHIFT;
1757 rdev->config.si.multi_gpu_tile_size = 1 << tmp;
1758 tmp = (gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT;
1759 rdev->config.si.mem_row_size_in_kb = 1 << tmp;
1760
1761 gb_backend_map =
1762 si_get_tile_pipe_to_backend_map(rdev, rdev->config.si.num_tile_pipes,
1763 rdev->config.si.num_backends_per_se *
1764 rdev->config.si.num_shader_engines,
1765 &rdev->config.si.backend_disable_mask_per_asic,
1766 rdev->config.si.num_shader_engines);
1767
1768 /* setup tiling info dword. gb_addr_config is not adequate since it does 1619 /* setup tiling info dword. gb_addr_config is not adequate since it does
1769 * not have bank info, so create a custom tiling dword. 1620 * not have bank info, so create a custom tiling dword.
1770 * bits 3:0 num_pipes 1621 * bits 3:0 num_pipes
@@ -1789,33 +1640,29 @@ static void si_gpu_init(struct radeon_device *rdev)
1789 rdev->config.si.tile_config |= (3 << 0); 1640 rdev->config.si.tile_config |= (3 << 0);
1790 break; 1641 break;
1791 } 1642 }
1792 rdev->config.si.tile_config |= 1643 if ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT)
1793 ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4; 1644 rdev->config.si.tile_config |= 1 << 4;
1645 else
1646 rdev->config.si.tile_config |= 0 << 4;
1794 rdev->config.si.tile_config |= 1647 rdev->config.si.tile_config |=
1795 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8; 1648 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
1796 rdev->config.si.tile_config |= 1649 rdev->config.si.tile_config |=
1797 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12; 1650 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
1798 1651
1799 rdev->config.si.backend_map = gb_backend_map;
1800 WREG32(GB_ADDR_CONFIG, gb_addr_config); 1652 WREG32(GB_ADDR_CONFIG, gb_addr_config);
1801 WREG32(DMIF_ADDR_CONFIG, gb_addr_config); 1653 WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
1802 WREG32(HDP_ADDR_CONFIG, gb_addr_config); 1654 WREG32(HDP_ADDR_CONFIG, gb_addr_config);
1803 1655
1804 /* primary versions */ 1656 si_tiling_mode_table_init(rdev);
1805 WREG32(CC_RB_BACKEND_DISABLE, cc_rb_backend_disable);
1806 WREG32(CC_SYS_RB_BACKEND_DISABLE, cc_rb_backend_disable);
1807 WREG32(CC_GC_SHADER_ARRAY_CONFIG, cc_gc_shader_array_config);
1808
1809 WREG32(CGTS_TCC_DISABLE, cgts_tcc_disable);
1810 1657
1811 /* user versions */ 1658 si_setup_rb(rdev, rdev->config.si.max_shader_engines,
1812 WREG32(GC_USER_RB_BACKEND_DISABLE, cc_rb_backend_disable); 1659 rdev->config.si.max_sh_per_se,
1813 WREG32(GC_USER_SYS_RB_BACKEND_DISABLE, cc_rb_backend_disable); 1660 rdev->config.si.max_backends_per_se);
1814 WREG32(GC_USER_SHADER_ARRAY_CONFIG, cc_gc_shader_array_config);
1815 1661
1816 WREG32(CGTS_USER_TCC_DISABLE, cgts_tcc_disable); 1662 si_setup_spi(rdev, rdev->config.si.max_shader_engines,
1663 rdev->config.si.max_sh_per_se,
1664 rdev->config.si.max_cu_per_sh);
1817 1665
1818 si_tiling_mode_table_init(rdev);
1819 1666
1820 /* set HW defaults for 3D engine */ 1667 /* set HW defaults for 3D engine */
1821 WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) | 1668 WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
diff --git a/drivers/gpu/drm/radeon/sid.h b/drivers/gpu/drm/radeon/sid.h
index 53ea2c42dbd6..db4067962868 100644
--- a/drivers/gpu/drm/radeon/sid.h
+++ b/drivers/gpu/drm/radeon/sid.h
@@ -24,6 +24,11 @@
24#ifndef SI_H 24#ifndef SI_H
25#define SI_H 25#define SI_H
26 26
27#define TAHITI_RB_BITMAP_WIDTH_PER_SH 2
28
29#define TAHITI_GB_ADDR_CONFIG_GOLDEN 0x12011003
30#define VERDE_GB_ADDR_CONFIG_GOLDEN 0x12010002
31
27#define CG_MULT_THERMAL_STATUS 0x714 32#define CG_MULT_THERMAL_STATUS 0x714
28#define ASIC_MAX_TEMP(x) ((x) << 0) 33#define ASIC_MAX_TEMP(x) ((x) << 0)
29#define ASIC_MAX_TEMP_MASK 0x000001ff 34#define ASIC_MAX_TEMP_MASK 0x000001ff
@@ -408,6 +413,12 @@
408#define SOFT_RESET_IA (1 << 15) 413#define SOFT_RESET_IA (1 << 15)
409 414
410#define GRBM_GFX_INDEX 0x802C 415#define GRBM_GFX_INDEX 0x802C
416#define INSTANCE_INDEX(x) ((x) << 0)
417#define SH_INDEX(x) ((x) << 8)
418#define SE_INDEX(x) ((x) << 16)
419#define SH_BROADCAST_WRITES (1 << 29)
420#define INSTANCE_BROADCAST_WRITES (1 << 30)
421#define SE_BROADCAST_WRITES (1 << 31)
411 422
412#define GRBM_INT_CNTL 0x8060 423#define GRBM_INT_CNTL 0x8060
413# define RDERR_INT_ENABLE (1 << 0) 424# define RDERR_INT_ENABLE (1 << 0)
@@ -480,6 +491,8 @@
480#define VGT_TF_MEMORY_BASE 0x89B8 491#define VGT_TF_MEMORY_BASE 0x89B8
481 492
482#define CC_GC_SHADER_ARRAY_CONFIG 0x89bc 493#define CC_GC_SHADER_ARRAY_CONFIG 0x89bc
494#define INACTIVE_CUS_MASK 0xFFFF0000
495#define INACTIVE_CUS_SHIFT 16
483#define GC_USER_SHADER_ARRAY_CONFIG 0x89c0 496#define GC_USER_SHADER_ARRAY_CONFIG 0x89c0
484 497
485#define PA_CL_ENHANCE 0x8A14 498#define PA_CL_ENHANCE 0x8A14
@@ -688,6 +701,12 @@
688#define RLC_MC_CNTL 0xC344 701#define RLC_MC_CNTL 0xC344
689#define RLC_UCODE_CNTL 0xC348 702#define RLC_UCODE_CNTL 0xC348
690 703
704#define PA_SC_RASTER_CONFIG 0x28350
705# define RASTER_CONFIG_RB_MAP_0 0
706# define RASTER_CONFIG_RB_MAP_1 1
707# define RASTER_CONFIG_RB_MAP_2 2
708# define RASTER_CONFIG_RB_MAP_3 3
709
691#define VGT_EVENT_INITIATOR 0x28a90 710#define VGT_EVENT_INITIATOR 0x28a90
692# define SAMPLE_STREAMOUTSTATS1 (1 << 0) 711# define SAMPLE_STREAMOUTSTATS1 (1 << 0)
693# define SAMPLE_STREAMOUTSTATS2 (2 << 0) 712# define SAMPLE_STREAMOUTSTATS2 (2 << 0)
diff --git a/drivers/i2c/muxes/Kconfig b/drivers/i2c/muxes/Kconfig
index beb2491db274..a0edd9854218 100644
--- a/drivers/i2c/muxes/Kconfig
+++ b/drivers/i2c/muxes/Kconfig
@@ -37,4 +37,16 @@ config I2C_MUX_PCA954x
37 This driver can also be built as a module. If so, the module 37 This driver can also be built as a module. If so, the module
38 will be called i2c-mux-pca954x. 38 will be called i2c-mux-pca954x.
39 39
40config I2C_MUX_PINCTRL
41 tristate "pinctrl-based I2C multiplexer"
42 depends on PINCTRL
43 help
44 If you say yes to this option, support will be included for an I2C
45 multiplexer that uses the pinctrl subsystem, i.e. pin multiplexing.
46 This is useful for SoCs whose I2C module's signals can be routed to
47 different sets of pins at run-time.
48
49 This driver can also be built as a module. If so, the module will be
50 called pinctrl-i2cmux.
51
40endmenu 52endmenu
diff --git a/drivers/i2c/muxes/Makefile b/drivers/i2c/muxes/Makefile
index 5826249b29ca..76da8692afff 100644
--- a/drivers/i2c/muxes/Makefile
+++ b/drivers/i2c/muxes/Makefile
@@ -4,5 +4,6 @@
4obj-$(CONFIG_I2C_MUX_GPIO) += i2c-mux-gpio.o 4obj-$(CONFIG_I2C_MUX_GPIO) += i2c-mux-gpio.o
5obj-$(CONFIG_I2C_MUX_PCA9541) += i2c-mux-pca9541.o 5obj-$(CONFIG_I2C_MUX_PCA9541) += i2c-mux-pca9541.o
6obj-$(CONFIG_I2C_MUX_PCA954x) += i2c-mux-pca954x.o 6obj-$(CONFIG_I2C_MUX_PCA954x) += i2c-mux-pca954x.o
7obj-$(CONFIG_I2C_MUX_PINCTRL) += i2c-mux-pinctrl.o
7 8
8ccflags-$(CONFIG_I2C_DEBUG_BUS) := -DDEBUG 9ccflags-$(CONFIG_I2C_DEBUG_BUS) := -DDEBUG
diff --git a/drivers/i2c/muxes/i2c-mux-pinctrl.c b/drivers/i2c/muxes/i2c-mux-pinctrl.c
new file mode 100644
index 000000000000..46a669763476
--- /dev/null
+++ b/drivers/i2c/muxes/i2c-mux-pinctrl.c
@@ -0,0 +1,279 @@
1/*
2 * I2C multiplexer using pinctrl API
3 *
4 * Copyright (c) 2012, NVIDIA CORPORATION. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#include <linux/i2c.h>
20#include <linux/i2c-mux.h>
21#include <linux/init.h>
22#include <linux/module.h>
23#include <linux/of_i2c.h>
24#include <linux/pinctrl/consumer.h>
25#include <linux/i2c-mux-pinctrl.h>
26#include <linux/platform_device.h>
27#include <linux/slab.h>
28
29struct i2c_mux_pinctrl {
30 struct device *dev;
31 struct i2c_mux_pinctrl_platform_data *pdata;
32 struct pinctrl *pinctrl;
33 struct pinctrl_state **states;
34 struct pinctrl_state *state_idle;
35 struct i2c_adapter *parent;
36 struct i2c_adapter **busses;
37};
38
39static int i2c_mux_pinctrl_select(struct i2c_adapter *adap, void *data,
40 u32 chan)
41{
42 struct i2c_mux_pinctrl *mux = data;
43
44 return pinctrl_select_state(mux->pinctrl, mux->states[chan]);
45}
46
47static int i2c_mux_pinctrl_deselect(struct i2c_adapter *adap, void *data,
48 u32 chan)
49{
50 struct i2c_mux_pinctrl *mux = data;
51
52 return pinctrl_select_state(mux->pinctrl, mux->state_idle);
53}
54
55#ifdef CONFIG_OF
56static int i2c_mux_pinctrl_parse_dt(struct i2c_mux_pinctrl *mux,
57 struct platform_device *pdev)
58{
59 struct device_node *np = pdev->dev.of_node;
60 int num_names, i, ret;
61 struct device_node *adapter_np;
62 struct i2c_adapter *adapter;
63
64 if (!np)
65 return 0;
66
67 mux->pdata = devm_kzalloc(&pdev->dev, sizeof(*mux->pdata), GFP_KERNEL);
68 if (!mux->pdata) {
69 dev_err(mux->dev,
70 "Cannot allocate i2c_mux_pinctrl_platform_data\n");
71 return -ENOMEM;
72 }
73
74 num_names = of_property_count_strings(np, "pinctrl-names");
75 if (num_names < 0) {
76 dev_err(mux->dev, "Cannot parse pinctrl-names: %d\n",
77 num_names);
78 return num_names;
79 }
80
81 mux->pdata->pinctrl_states = devm_kzalloc(&pdev->dev,
82 sizeof(*mux->pdata->pinctrl_states) * num_names,
83 GFP_KERNEL);
84 if (!mux->pdata->pinctrl_states) {
85 dev_err(mux->dev, "Cannot allocate pinctrl_states\n");
86 return -ENOMEM;
87 }
88
89 for (i = 0; i < num_names; i++) {
90 ret = of_property_read_string_index(np, "pinctrl-names", i,
91 &mux->pdata->pinctrl_states[mux->pdata->bus_count]);
92 if (ret < 0) {
93 dev_err(mux->dev, "Cannot parse pinctrl-names: %d\n",
94 ret);
95 return ret;
96 }
97 if (!strcmp(mux->pdata->pinctrl_states[mux->pdata->bus_count],
98 "idle")) {
99 if (i != num_names - 1) {
100 dev_err(mux->dev, "idle state must be last\n");
101 return -EINVAL;
102 }
103 mux->pdata->pinctrl_state_idle = "idle";
104 } else {
105 mux->pdata->bus_count++;
106 }
107 }
108
109 adapter_np = of_parse_phandle(np, "i2c-parent", 0);
110 if (!adapter_np) {
111 dev_err(mux->dev, "Cannot parse i2c-parent\n");
112 return -ENODEV;
113 }
114 adapter = of_find_i2c_adapter_by_node(adapter_np);
115 if (!adapter) {
116 dev_err(mux->dev, "Cannot find parent bus\n");
117 return -ENODEV;
118 }
119 mux->pdata->parent_bus_num = i2c_adapter_id(adapter);
120 put_device(&adapter->dev);
121
122 return 0;
123}
124#else
125static inline int i2c_mux_pinctrl_parse_dt(struct i2c_mux_pinctrl *mux,
126 struct platform_device *pdev)
127{
128 return 0;
129}
130#endif
131
132static int __devinit i2c_mux_pinctrl_probe(struct platform_device *pdev)
133{
134 struct i2c_mux_pinctrl *mux;
135 int (*deselect)(struct i2c_adapter *, void *, u32);
136 int i, ret;
137
138 mux = devm_kzalloc(&pdev->dev, sizeof(*mux), GFP_KERNEL);
139 if (!mux) {
140 dev_err(&pdev->dev, "Cannot allocate i2c_mux_pinctrl\n");
141 ret = -ENOMEM;
142 goto err;
143 }
144 platform_set_drvdata(pdev, mux);
145
146 mux->dev = &pdev->dev;
147
148 mux->pdata = pdev->dev.platform_data;
149 if (!mux->pdata) {
150 ret = i2c_mux_pinctrl_parse_dt(mux, pdev);
151 if (ret < 0)
152 goto err;
153 }
154 if (!mux->pdata) {
155 dev_err(&pdev->dev, "Missing platform data\n");
156 ret = -ENODEV;
157 goto err;
158 }
159
160 mux->states = devm_kzalloc(&pdev->dev,
161 sizeof(*mux->states) * mux->pdata->bus_count,
162 GFP_KERNEL);
163 if (!mux->states) {
164 dev_err(&pdev->dev, "Cannot allocate states\n");
165 ret = -ENOMEM;
166 goto err;
167 }
168
169 mux->busses = devm_kzalloc(&pdev->dev,
170 sizeof(mux->busses) * mux->pdata->bus_count,
171 GFP_KERNEL);
172 if (!mux->states) {
173 dev_err(&pdev->dev, "Cannot allocate busses\n");
174 ret = -ENOMEM;
175 goto err;
176 }
177
178 mux->pinctrl = devm_pinctrl_get(&pdev->dev);
179 if (IS_ERR(mux->pinctrl)) {
180 ret = PTR_ERR(mux->pinctrl);
181 dev_err(&pdev->dev, "Cannot get pinctrl: %d\n", ret);
182 goto err;
183 }
184 for (i = 0; i < mux->pdata->bus_count; i++) {
185 mux->states[i] = pinctrl_lookup_state(mux->pinctrl,
186 mux->pdata->pinctrl_states[i]);
187 if (IS_ERR(mux->states[i])) {
188 ret = PTR_ERR(mux->states[i]);
189 dev_err(&pdev->dev,
190 "Cannot look up pinctrl state %s: %d\n",
191 mux->pdata->pinctrl_states[i], ret);
192 goto err;
193 }
194 }
195 if (mux->pdata->pinctrl_state_idle) {
196 mux->state_idle = pinctrl_lookup_state(mux->pinctrl,
197 mux->pdata->pinctrl_state_idle);
198 if (IS_ERR(mux->state_idle)) {
199 ret = PTR_ERR(mux->state_idle);
200 dev_err(&pdev->dev,
201 "Cannot look up pinctrl state %s: %d\n",
202 mux->pdata->pinctrl_state_idle, ret);
203 goto err;
204 }
205
206 deselect = i2c_mux_pinctrl_deselect;
207 } else {
208 deselect = NULL;
209 }
210
211 mux->parent = i2c_get_adapter(mux->pdata->parent_bus_num);
212 if (!mux->parent) {
213 dev_err(&pdev->dev, "Parent adapter (%d) not found\n",
214 mux->pdata->parent_bus_num);
215 ret = -ENODEV;
216 goto err;
217 }
218
219 for (i = 0; i < mux->pdata->bus_count; i++) {
220 u32 bus = mux->pdata->base_bus_num ?
221 (mux->pdata->base_bus_num + i) : 0;
222
223 mux->busses[i] = i2c_add_mux_adapter(mux->parent, &pdev->dev,
224 mux, bus, i,
225 i2c_mux_pinctrl_select,
226 deselect);
227 if (!mux->busses[i]) {
228 ret = -ENODEV;
229 dev_err(&pdev->dev, "Failed to add adapter %d\n", i);
230 goto err_del_adapter;
231 }
232 }
233
234 return 0;
235
236err_del_adapter:
237 for (; i > 0; i--)
238 i2c_del_mux_adapter(mux->busses[i - 1]);
239 i2c_put_adapter(mux->parent);
240err:
241 return ret;
242}
243
244static int __devexit i2c_mux_pinctrl_remove(struct platform_device *pdev)
245{
246 struct i2c_mux_pinctrl *mux = platform_get_drvdata(pdev);
247 int i;
248
249 for (i = 0; i < mux->pdata->bus_count; i++)
250 i2c_del_mux_adapter(mux->busses[i]);
251
252 i2c_put_adapter(mux->parent);
253
254 return 0;
255}
256
257#ifdef CONFIG_OF
258static const struct of_device_id i2c_mux_pinctrl_of_match[] __devinitconst = {
259 { .compatible = "i2c-mux-pinctrl", },
260 {},
261};
262MODULE_DEVICE_TABLE(of, i2c_mux_pinctrl_of_match);
263#endif
264
265static struct platform_driver i2c_mux_pinctrl_driver = {
266 .driver = {
267 .name = "i2c-mux-pinctrl",
268 .owner = THIS_MODULE,
269 .of_match_table = of_match_ptr(i2c_mux_pinctrl_of_match),
270 },
271 .probe = i2c_mux_pinctrl_probe,
272 .remove = __devexit_p(i2c_mux_pinctrl_remove),
273};
274module_platform_driver(i2c_mux_pinctrl_driver);
275
276MODULE_DESCRIPTION("pinctrl-based I2C multiplexer driver");
277MODULE_AUTHOR("Stephen Warren <swarren@nvidia.com>");
278MODULE_LICENSE("GPL v2");
279MODULE_ALIAS("platform:i2c-mux-pinctrl");
diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c
index 7d5f56edb8ef..4267789ca995 100644
--- a/drivers/rtc/rtc-cmos.c
+++ b/drivers/rtc/rtc-cmos.c
@@ -910,14 +910,17 @@ static inline int cmos_poweroff(struct device *dev)
910 910
911static u32 rtc_handler(void *context) 911static u32 rtc_handler(void *context)
912{ 912{
913 struct device *dev = context;
914
915 pm_wakeup_event(dev, 0);
913 acpi_clear_event(ACPI_EVENT_RTC); 916 acpi_clear_event(ACPI_EVENT_RTC);
914 acpi_disable_event(ACPI_EVENT_RTC, 0); 917 acpi_disable_event(ACPI_EVENT_RTC, 0);
915 return ACPI_INTERRUPT_HANDLED; 918 return ACPI_INTERRUPT_HANDLED;
916} 919}
917 920
918static inline void rtc_wake_setup(void) 921static inline void rtc_wake_setup(struct device *dev)
919{ 922{
920 acpi_install_fixed_event_handler(ACPI_EVENT_RTC, rtc_handler, NULL); 923 acpi_install_fixed_event_handler(ACPI_EVENT_RTC, rtc_handler, dev);
921 /* 924 /*
922 * After the RTC handler is installed, the Fixed_RTC event should 925 * After the RTC handler is installed, the Fixed_RTC event should
923 * be disabled. Only when the RTC alarm is set will it be enabled. 926 * be disabled. Only when the RTC alarm is set will it be enabled.
@@ -950,7 +953,7 @@ cmos_wake_setup(struct device *dev)
950 if (acpi_disabled) 953 if (acpi_disabled)
951 return; 954 return;
952 955
953 rtc_wake_setup(); 956 rtc_wake_setup(dev);
954 acpi_rtc_info.wake_on = rtc_wake_on; 957 acpi_rtc_info.wake_on = rtc_wake_on;
955 acpi_rtc_info.wake_off = rtc_wake_off; 958 acpi_rtc_info.wake_off = rtc_wake_off;
956 959
diff --git a/drivers/staging/ramster/zcache-main.c b/drivers/staging/ramster/zcache-main.c
index 4e7ef0e6b79c..d46764b5aaba 100644
--- a/drivers/staging/ramster/zcache-main.c
+++ b/drivers/staging/ramster/zcache-main.c
@@ -3002,7 +3002,7 @@ static inline struct tmem_oid oswiz(unsigned type, u32 ind)
3002 return oid; 3002 return oid;
3003} 3003}
3004 3004
3005static int zcache_frontswap_put_page(unsigned type, pgoff_t offset, 3005static int zcache_frontswap_store(unsigned type, pgoff_t offset,
3006 struct page *page) 3006 struct page *page)
3007{ 3007{
3008 u64 ind64 = (u64)offset; 3008 u64 ind64 = (u64)offset;
@@ -3025,7 +3025,7 @@ static int zcache_frontswap_put_page(unsigned type, pgoff_t offset,
3025 3025
3026/* returns 0 if the page was successfully gotten from frontswap, -1 if 3026/* returns 0 if the page was successfully gotten from frontswap, -1 if
3027 * was not present (should never happen!) */ 3027 * was not present (should never happen!) */
3028static int zcache_frontswap_get_page(unsigned type, pgoff_t offset, 3028static int zcache_frontswap_load(unsigned type, pgoff_t offset,
3029 struct page *page) 3029 struct page *page)
3030{ 3030{
3031 u64 ind64 = (u64)offset; 3031 u64 ind64 = (u64)offset;
@@ -3080,8 +3080,8 @@ static void zcache_frontswap_init(unsigned ignored)
3080} 3080}
3081 3081
3082static struct frontswap_ops zcache_frontswap_ops = { 3082static struct frontswap_ops zcache_frontswap_ops = {
3083 .put_page = zcache_frontswap_put_page, 3083 .store = zcache_frontswap_store,
3084 .get_page = zcache_frontswap_get_page, 3084 .load = zcache_frontswap_load,
3085 .invalidate_page = zcache_frontswap_flush_page, 3085 .invalidate_page = zcache_frontswap_flush_page,
3086 .invalidate_area = zcache_frontswap_flush_area, 3086 .invalidate_area = zcache_frontswap_flush_area,
3087 .init = zcache_frontswap_init 3087 .init = zcache_frontswap_init
diff --git a/drivers/staging/zcache/zcache-main.c b/drivers/staging/zcache/zcache-main.c
index 2734dacacbaf..784c796b9848 100644
--- a/drivers/staging/zcache/zcache-main.c
+++ b/drivers/staging/zcache/zcache-main.c
@@ -1835,7 +1835,7 @@ static int zcache_frontswap_poolid = -1;
1835 * Swizzling increases objects per swaptype, increasing tmem concurrency 1835 * Swizzling increases objects per swaptype, increasing tmem concurrency
1836 * for heavy swaploads. Later, larger nr_cpus -> larger SWIZ_BITS 1836 * for heavy swaploads. Later, larger nr_cpus -> larger SWIZ_BITS
1837 * Setting SWIZ_BITS to 27 basically reconstructs the swap entry from 1837 * Setting SWIZ_BITS to 27 basically reconstructs the swap entry from
1838 * frontswap_get_page(), but has side-effects. Hence using 8. 1838 * frontswap_load(), but has side-effects. Hence using 8.
1839 */ 1839 */
1840#define SWIZ_BITS 8 1840#define SWIZ_BITS 8
1841#define SWIZ_MASK ((1 << SWIZ_BITS) - 1) 1841#define SWIZ_MASK ((1 << SWIZ_BITS) - 1)
@@ -1849,7 +1849,7 @@ static inline struct tmem_oid oswiz(unsigned type, u32 ind)
1849 return oid; 1849 return oid;
1850} 1850}
1851 1851
1852static int zcache_frontswap_put_page(unsigned type, pgoff_t offset, 1852static int zcache_frontswap_store(unsigned type, pgoff_t offset,
1853 struct page *page) 1853 struct page *page)
1854{ 1854{
1855 u64 ind64 = (u64)offset; 1855 u64 ind64 = (u64)offset;
@@ -1870,7 +1870,7 @@ static int zcache_frontswap_put_page(unsigned type, pgoff_t offset,
1870 1870
1871/* returns 0 if the page was successfully gotten from frontswap, -1 if 1871/* returns 0 if the page was successfully gotten from frontswap, -1 if
1872 * was not present (should never happen!) */ 1872 * was not present (should never happen!) */
1873static int zcache_frontswap_get_page(unsigned type, pgoff_t offset, 1873static int zcache_frontswap_load(unsigned type, pgoff_t offset,
1874 struct page *page) 1874 struct page *page)
1875{ 1875{
1876 u64 ind64 = (u64)offset; 1876 u64 ind64 = (u64)offset;
@@ -1919,8 +1919,8 @@ static void zcache_frontswap_init(unsigned ignored)
1919} 1919}
1920 1920
1921static struct frontswap_ops zcache_frontswap_ops = { 1921static struct frontswap_ops zcache_frontswap_ops = {
1922 .put_page = zcache_frontswap_put_page, 1922 .store = zcache_frontswap_store,
1923 .get_page = zcache_frontswap_get_page, 1923 .load = zcache_frontswap_load,
1924 .invalidate_page = zcache_frontswap_flush_page, 1924 .invalidate_page = zcache_frontswap_flush_page,
1925 .invalidate_area = zcache_frontswap_flush_area, 1925 .invalidate_area = zcache_frontswap_flush_area,
1926 .init = zcache_frontswap_init 1926 .init = zcache_frontswap_init
diff --git a/drivers/target/sbp/sbp_target.c b/drivers/target/sbp/sbp_target.c
index 37c609898f84..7e6136e2ce81 100644
--- a/drivers/target/sbp/sbp_target.c
+++ b/drivers/target/sbp/sbp_target.c
@@ -587,14 +587,14 @@ static void sbp_management_request_logout(
587{ 587{
588 struct sbp_tport *tport = agent->tport; 588 struct sbp_tport *tport = agent->tport;
589 struct sbp_tpg *tpg = tport->tpg; 589 struct sbp_tpg *tpg = tport->tpg;
590 int login_id; 590 int id;
591 struct sbp_login_descriptor *login; 591 struct sbp_login_descriptor *login;
592 592
593 login_id = LOGOUT_ORB_LOGIN_ID(be32_to_cpu(req->orb.misc)); 593 id = LOGOUT_ORB_LOGIN_ID(be32_to_cpu(req->orb.misc));
594 594
595 login = sbp_login_find_by_id(tpg, login_id); 595 login = sbp_login_find_by_id(tpg, id);
596 if (!login) { 596 if (!login) {
597 pr_warn("cannot find login: %d\n", login_id); 597 pr_warn("cannot find login: %d\n", id);
598 598
599 req->status.status = cpu_to_be32( 599 req->status.status = cpu_to_be32(
600 STATUS_BLOCK_RESP(STATUS_RESP_REQUEST_COMPLETE) | 600 STATUS_BLOCK_RESP(STATUS_RESP_REQUEST_COMPLETE) |
diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c
index 686dba189f8e..9f99d0404908 100644
--- a/drivers/target/target_core_file.c
+++ b/drivers/target/target_core_file.c
@@ -133,16 +133,11 @@ static struct se_device *fd_create_virtdevice(
133 ret = PTR_ERR(dev_p); 133 ret = PTR_ERR(dev_p);
134 goto fail; 134 goto fail;
135 } 135 }
136
137 /* O_DIRECT too? */
138 flags = O_RDWR | O_CREAT | O_LARGEFILE;
139
140 /* 136 /*
141 * If fd_buffered_io=1 has not been set explicitly (the default), 137 * Use O_DSYNC by default instead of O_SYNC to forgo syncing
142 * use O_SYNC to force FILEIO writes to disk. 138 * of pure timestamp updates.
143 */ 139 */
144 if (!(fd_dev->fbd_flags & FDBD_USE_BUFFERED_IO)) 140 flags = O_RDWR | O_CREAT | O_LARGEFILE | O_DSYNC;
145 flags |= O_SYNC;
146 141
147 file = filp_open(dev_p, flags, 0600); 142 file = filp_open(dev_p, flags, 0600);
148 if (IS_ERR(file)) { 143 if (IS_ERR(file)) {
@@ -380,23 +375,6 @@ static void fd_emulate_sync_cache(struct se_cmd *cmd)
380 } 375 }
381} 376}
382 377
383static void fd_emulate_write_fua(struct se_cmd *cmd)
384{
385 struct se_device *dev = cmd->se_dev;
386 struct fd_dev *fd_dev = dev->dev_ptr;
387 loff_t start = cmd->t_task_lba *
388 dev->se_sub_dev->se_dev_attrib.block_size;
389 loff_t end = start + cmd->data_length;
390 int ret;
391
392 pr_debug("FILEIO: FUA WRITE LBA: %llu, bytes: %u\n",
393 cmd->t_task_lba, cmd->data_length);
394
395 ret = vfs_fsync_range(fd_dev->fd_file, start, end, 1);
396 if (ret != 0)
397 pr_err("FILEIO: vfs_fsync_range() failed: %d\n", ret);
398}
399
400static int fd_execute_cmd(struct se_cmd *cmd, struct scatterlist *sgl, 378static int fd_execute_cmd(struct se_cmd *cmd, struct scatterlist *sgl,
401 u32 sgl_nents, enum dma_data_direction data_direction) 379 u32 sgl_nents, enum dma_data_direction data_direction)
402{ 380{
@@ -411,19 +389,21 @@ static int fd_execute_cmd(struct se_cmd *cmd, struct scatterlist *sgl,
411 ret = fd_do_readv(cmd, sgl, sgl_nents); 389 ret = fd_do_readv(cmd, sgl, sgl_nents);
412 } else { 390 } else {
413 ret = fd_do_writev(cmd, sgl, sgl_nents); 391 ret = fd_do_writev(cmd, sgl, sgl_nents);
414 392 /*
393 * Perform implict vfs_fsync_range() for fd_do_writev() ops
394 * for SCSI WRITEs with Forced Unit Access (FUA) set.
395 * Allow this to happen independent of WCE=0 setting.
396 */
415 if (ret > 0 && 397 if (ret > 0 &&
416 dev->se_sub_dev->se_dev_attrib.emulate_write_cache > 0 &&
417 dev->se_sub_dev->se_dev_attrib.emulate_fua_write > 0 && 398 dev->se_sub_dev->se_dev_attrib.emulate_fua_write > 0 &&
418 (cmd->se_cmd_flags & SCF_FUA)) { 399 (cmd->se_cmd_flags & SCF_FUA)) {
419 /* 400 struct fd_dev *fd_dev = dev->dev_ptr;
420 * We might need to be a bit smarter here 401 loff_t start = cmd->t_task_lba *
421 * and return some sense data to let the initiator 402 dev->se_sub_dev->se_dev_attrib.block_size;
422 * know the FUA WRITE cache sync failed..? 403 loff_t end = start + cmd->data_length;
423 */
424 fd_emulate_write_fua(cmd);
425 }
426 404
405 vfs_fsync_range(fd_dev->fd_file, start, end, 1);
406 }
427 } 407 }
428 408
429 if (ret < 0) { 409 if (ret < 0) {
@@ -442,7 +422,6 @@ enum {
442static match_table_t tokens = { 422static match_table_t tokens = {
443 {Opt_fd_dev_name, "fd_dev_name=%s"}, 423 {Opt_fd_dev_name, "fd_dev_name=%s"},
444 {Opt_fd_dev_size, "fd_dev_size=%s"}, 424 {Opt_fd_dev_size, "fd_dev_size=%s"},
445 {Opt_fd_buffered_io, "fd_buffered_io=%d"},
446 {Opt_err, NULL} 425 {Opt_err, NULL}
447}; 426};
448 427
@@ -454,7 +433,7 @@ static ssize_t fd_set_configfs_dev_params(
454 struct fd_dev *fd_dev = se_dev->se_dev_su_ptr; 433 struct fd_dev *fd_dev = se_dev->se_dev_su_ptr;
455 char *orig, *ptr, *arg_p, *opts; 434 char *orig, *ptr, *arg_p, *opts;
456 substring_t args[MAX_OPT_ARGS]; 435 substring_t args[MAX_OPT_ARGS];
457 int ret = 0, arg, token; 436 int ret = 0, token;
458 437
459 opts = kstrdup(page, GFP_KERNEL); 438 opts = kstrdup(page, GFP_KERNEL);
460 if (!opts) 439 if (!opts)
@@ -498,19 +477,6 @@ static ssize_t fd_set_configfs_dev_params(
498 " bytes\n", fd_dev->fd_dev_size); 477 " bytes\n", fd_dev->fd_dev_size);
499 fd_dev->fbd_flags |= FBDF_HAS_SIZE; 478 fd_dev->fbd_flags |= FBDF_HAS_SIZE;
500 break; 479 break;
501 case Opt_fd_buffered_io:
502 match_int(args, &arg);
503 if (arg != 1) {
504 pr_err("bogus fd_buffered_io=%d value\n", arg);
505 ret = -EINVAL;
506 goto out;
507 }
508
509 pr_debug("FILEIO: Using buffered I/O"
510 " operations for struct fd_dev\n");
511
512 fd_dev->fbd_flags |= FDBD_USE_BUFFERED_IO;
513 break;
514 default: 480 default:
515 break; 481 break;
516 } 482 }
@@ -542,10 +508,8 @@ static ssize_t fd_show_configfs_dev_params(
542 ssize_t bl = 0; 508 ssize_t bl = 0;
543 509
544 bl = sprintf(b + bl, "TCM FILEIO ID: %u", fd_dev->fd_dev_id); 510 bl = sprintf(b + bl, "TCM FILEIO ID: %u", fd_dev->fd_dev_id);
545 bl += sprintf(b + bl, " File: %s Size: %llu Mode: %s\n", 511 bl += sprintf(b + bl, " File: %s Size: %llu Mode: O_DSYNC\n",
546 fd_dev->fd_dev_name, fd_dev->fd_dev_size, 512 fd_dev->fd_dev_name, fd_dev->fd_dev_size);
547 (fd_dev->fbd_flags & FDBD_USE_BUFFERED_IO) ?
548 "Buffered" : "Synchronous");
549 return bl; 513 return bl;
550} 514}
551 515
diff --git a/drivers/target/target_core_file.h b/drivers/target/target_core_file.h
index fbd59ef7d8be..70ce7fd7111d 100644
--- a/drivers/target/target_core_file.h
+++ b/drivers/target/target_core_file.h
@@ -14,7 +14,6 @@
14 14
15#define FBDF_HAS_PATH 0x01 15#define FBDF_HAS_PATH 0x01
16#define FBDF_HAS_SIZE 0x02 16#define FBDF_HAS_SIZE 0x02
17#define FDBD_USE_BUFFERED_IO 0x04
18 17
19struct fd_dev { 18struct fd_dev {
20 u32 fbd_flags; 19 u32 fbd_flags;
diff --git a/drivers/xen/tmem.c b/drivers/xen/tmem.c
index dcb79521e6c8..89f264c67420 100644
--- a/drivers/xen/tmem.c
+++ b/drivers/xen/tmem.c
@@ -269,7 +269,7 @@ static inline struct tmem_oid oswiz(unsigned type, u32 ind)
269} 269}
270 270
271/* returns 0 if the page was successfully put into frontswap, -1 if not */ 271/* returns 0 if the page was successfully put into frontswap, -1 if not */
272static int tmem_frontswap_put_page(unsigned type, pgoff_t offset, 272static int tmem_frontswap_store(unsigned type, pgoff_t offset,
273 struct page *page) 273 struct page *page)
274{ 274{
275 u64 ind64 = (u64)offset; 275 u64 ind64 = (u64)offset;
@@ -295,7 +295,7 @@ static int tmem_frontswap_put_page(unsigned type, pgoff_t offset,
295 * returns 0 if the page was successfully gotten from frontswap, -1 if 295 * returns 0 if the page was successfully gotten from frontswap, -1 if
296 * was not present (should never happen!) 296 * was not present (should never happen!)
297 */ 297 */
298static int tmem_frontswap_get_page(unsigned type, pgoff_t offset, 298static int tmem_frontswap_load(unsigned type, pgoff_t offset,
299 struct page *page) 299 struct page *page)
300{ 300{
301 u64 ind64 = (u64)offset; 301 u64 ind64 = (u64)offset;
@@ -362,8 +362,8 @@ static int __init no_frontswap(char *s)
362__setup("nofrontswap", no_frontswap); 362__setup("nofrontswap", no_frontswap);
363 363
364static struct frontswap_ops __initdata tmem_frontswap_ops = { 364static struct frontswap_ops __initdata tmem_frontswap_ops = {
365 .put_page = tmem_frontswap_put_page, 365 .store = tmem_frontswap_store,
366 .get_page = tmem_frontswap_get_page, 366 .load = tmem_frontswap_load,
367 .invalidate_page = tmem_frontswap_flush_page, 367 .invalidate_page = tmem_frontswap_flush_page,
368 .invalidate_area = tmem_frontswap_flush_area, 368 .invalidate_area = tmem_frontswap_flush_area,
369 .init = tmem_frontswap_init 369 .init = tmem_frontswap_init
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 20350a93ed99..6df0cbe1cbc9 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -174,6 +174,7 @@ struct smb_version_operations {
174 void (*add_credits)(struct TCP_Server_Info *, const unsigned int); 174 void (*add_credits)(struct TCP_Server_Info *, const unsigned int);
175 void (*set_credits)(struct TCP_Server_Info *, const int); 175 void (*set_credits)(struct TCP_Server_Info *, const int);
176 int * (*get_credits_field)(struct TCP_Server_Info *); 176 int * (*get_credits_field)(struct TCP_Server_Info *);
177 __u64 (*get_next_mid)(struct TCP_Server_Info *);
177 /* data offset from read response message */ 178 /* data offset from read response message */
178 unsigned int (*read_data_offset)(char *); 179 unsigned int (*read_data_offset)(char *);
179 /* data length from read response message */ 180 /* data length from read response message */
@@ -399,6 +400,12 @@ set_credits(struct TCP_Server_Info *server, const int val)
399 server->ops->set_credits(server, val); 400 server->ops->set_credits(server, val);
400} 401}
401 402
403static inline __u64
404get_next_mid(struct TCP_Server_Info *server)
405{
406 return server->ops->get_next_mid(server);
407}
408
402/* 409/*
403 * Macros to allow the TCP_Server_Info->net field and related code to drop out 410 * Macros to allow the TCP_Server_Info->net field and related code to drop out
404 * when CONFIG_NET_NS isn't set. 411 * when CONFIG_NET_NS isn't set.
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 5ec21ecf7980..0a6cbfe2761e 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -114,7 +114,6 @@ extern int small_smb_init_no_tc(const int smb_cmd, const int wct,
114 void **request_buf); 114 void **request_buf);
115extern int CIFS_SessSetup(unsigned int xid, struct cifs_ses *ses, 115extern int CIFS_SessSetup(unsigned int xid, struct cifs_ses *ses,
116 const struct nls_table *nls_cp); 116 const struct nls_table *nls_cp);
117extern __u64 GetNextMid(struct TCP_Server_Info *server);
118extern struct timespec cifs_NTtimeToUnix(__le64 utc_nanoseconds_since_1601); 117extern struct timespec cifs_NTtimeToUnix(__le64 utc_nanoseconds_since_1601);
119extern u64 cifs_UnixTimeToNT(struct timespec); 118extern u64 cifs_UnixTimeToNT(struct timespec);
120extern struct timespec cnvrtDosUnixTm(__le16 le_date, __le16 le_time, 119extern struct timespec cnvrtDosUnixTm(__le16 le_date, __le16 le_time,
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index b5ad716b2642..5b400730c213 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -268,7 +268,7 @@ small_smb_init_no_tc(const int smb_command, const int wct,
268 return rc; 268 return rc;
269 269
270 buffer = (struct smb_hdr *)*request_buf; 270 buffer = (struct smb_hdr *)*request_buf;
271 buffer->Mid = GetNextMid(ses->server); 271 buffer->Mid = get_next_mid(ses->server);
272 if (ses->capabilities & CAP_UNICODE) 272 if (ses->capabilities & CAP_UNICODE)
273 buffer->Flags2 |= SMBFLG2_UNICODE; 273 buffer->Flags2 |= SMBFLG2_UNICODE;
274 if (ses->capabilities & CAP_STATUS32) 274 if (ses->capabilities & CAP_STATUS32)
@@ -402,7 +402,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifs_ses *ses)
402 402
403 cFYI(1, "secFlags 0x%x", secFlags); 403 cFYI(1, "secFlags 0x%x", secFlags);
404 404
405 pSMB->hdr.Mid = GetNextMid(server); 405 pSMB->hdr.Mid = get_next_mid(server);
406 pSMB->hdr.Flags2 |= (SMBFLG2_UNICODE | SMBFLG2_ERR_STATUS); 406 pSMB->hdr.Flags2 |= (SMBFLG2_UNICODE | SMBFLG2_ERR_STATUS);
407 407
408 if ((secFlags & CIFSSEC_MUST_KRB5) == CIFSSEC_MUST_KRB5) 408 if ((secFlags & CIFSSEC_MUST_KRB5) == CIFSSEC_MUST_KRB5)
@@ -782,7 +782,7 @@ CIFSSMBLogoff(const int xid, struct cifs_ses *ses)
782 return rc; 782 return rc;
783 } 783 }
784 784
785 pSMB->hdr.Mid = GetNextMid(ses->server); 785 pSMB->hdr.Mid = get_next_mid(ses->server);
786 786
787 if (ses->server->sec_mode & 787 if (ses->server->sec_mode &
788 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) 788 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
@@ -4762,7 +4762,7 @@ getDFSRetry:
4762 4762
4763 /* server pointer checked in called function, 4763 /* server pointer checked in called function,
4764 but should never be null here anyway */ 4764 but should never be null here anyway */
4765 pSMB->hdr.Mid = GetNextMid(ses->server); 4765 pSMB->hdr.Mid = get_next_mid(ses->server);
4766 pSMB->hdr.Tid = ses->ipc_tid; 4766 pSMB->hdr.Tid = ses->ipc_tid;
4767 pSMB->hdr.Uid = ses->Suid; 4767 pSMB->hdr.Uid = ses->Suid;
4768 if (ses->capabilities & CAP_STATUS32) 4768 if (ses->capabilities & CAP_STATUS32)
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index ccafdedd0dbc..78db68a5cf44 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -1058,13 +1058,15 @@ cifs_demultiplex_thread(void *p)
1058 if (mid_entry != NULL) { 1058 if (mid_entry != NULL) {
1059 if (!mid_entry->multiRsp || mid_entry->multiEnd) 1059 if (!mid_entry->multiRsp || mid_entry->multiEnd)
1060 mid_entry->callback(mid_entry); 1060 mid_entry->callback(mid_entry);
1061 } else if (!server->ops->is_oplock_break(buf, server)) { 1061 } else if (!server->ops->is_oplock_break ||
1062 !server->ops->is_oplock_break(buf, server)) {
1062 cERROR(1, "No task to wake, unknown frame received! " 1063 cERROR(1, "No task to wake, unknown frame received! "
1063 "NumMids %d", atomic_read(&midCount)); 1064 "NumMids %d", atomic_read(&midCount));
1064 cifs_dump_mem("Received Data is: ", buf, 1065 cifs_dump_mem("Received Data is: ", buf,
1065 HEADER_SIZE(server)); 1066 HEADER_SIZE(server));
1066#ifdef CONFIG_CIFS_DEBUG2 1067#ifdef CONFIG_CIFS_DEBUG2
1067 server->ops->dump_detail(buf); 1068 if (server->ops->dump_detail)
1069 server->ops->dump_detail(buf);
1068 cifs_dump_mids(server); 1070 cifs_dump_mids(server);
1069#endif /* CIFS_DEBUG2 */ 1071#endif /* CIFS_DEBUG2 */
1070 1072
@@ -3938,7 +3940,7 @@ CIFSTCon(unsigned int xid, struct cifs_ses *ses,
3938 header_assemble(smb_buffer, SMB_COM_TREE_CONNECT_ANDX, 3940 header_assemble(smb_buffer, SMB_COM_TREE_CONNECT_ANDX,
3939 NULL /*no tid */ , 4 /*wct */ ); 3941 NULL /*no tid */ , 4 /*wct */ );
3940 3942
3941 smb_buffer->Mid = GetNextMid(ses->server); 3943 smb_buffer->Mid = get_next_mid(ses->server);
3942 smb_buffer->Uid = ses->Suid; 3944 smb_buffer->Uid = ses->Suid;
3943 pSMB = (TCONX_REQ *) smb_buffer; 3945 pSMB = (TCONX_REQ *) smb_buffer;
3944 pSMBr = (TCONX_RSP *) smb_buffer_response; 3946 pSMBr = (TCONX_RSP *) smb_buffer_response;
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 253170dfa716..513adbc211d7 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -876,7 +876,7 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
876 struct cifsLockInfo *li, *tmp; 876 struct cifsLockInfo *li, *tmp;
877 struct cifs_tcon *tcon; 877 struct cifs_tcon *tcon;
878 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode); 878 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
879 unsigned int num, max_num; 879 unsigned int num, max_num, max_buf;
880 LOCKING_ANDX_RANGE *buf, *cur; 880 LOCKING_ANDX_RANGE *buf, *cur;
881 int types[] = {LOCKING_ANDX_LARGE_FILES, 881 int types[] = {LOCKING_ANDX_LARGE_FILES,
882 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES}; 882 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
@@ -892,8 +892,19 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
892 return rc; 892 return rc;
893 } 893 }
894 894
895 max_num = (tcon->ses->server->maxBuf - sizeof(struct smb_hdr)) / 895 /*
896 sizeof(LOCKING_ANDX_RANGE); 896 * Accessing maxBuf is racy with cifs_reconnect - need to store value
897 * and check it for zero before using.
898 */
899 max_buf = tcon->ses->server->maxBuf;
900 if (!max_buf) {
901 mutex_unlock(&cinode->lock_mutex);
902 FreeXid(xid);
903 return -EINVAL;
904 }
905
906 max_num = (max_buf - sizeof(struct smb_hdr)) /
907 sizeof(LOCKING_ANDX_RANGE);
897 buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL); 908 buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
898 if (!buf) { 909 if (!buf) {
899 mutex_unlock(&cinode->lock_mutex); 910 mutex_unlock(&cinode->lock_mutex);
@@ -1218,7 +1229,7 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, int xid)
1218 int types[] = {LOCKING_ANDX_LARGE_FILES, 1229 int types[] = {LOCKING_ANDX_LARGE_FILES,
1219 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES}; 1230 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1220 unsigned int i; 1231 unsigned int i;
1221 unsigned int max_num, num; 1232 unsigned int max_num, num, max_buf;
1222 LOCKING_ANDX_RANGE *buf, *cur; 1233 LOCKING_ANDX_RANGE *buf, *cur;
1223 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 1234 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1224 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode); 1235 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
@@ -1228,8 +1239,16 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, int xid)
1228 1239
1229 INIT_LIST_HEAD(&tmp_llist); 1240 INIT_LIST_HEAD(&tmp_llist);
1230 1241
1231 max_num = (tcon->ses->server->maxBuf - sizeof(struct smb_hdr)) / 1242 /*
1232 sizeof(LOCKING_ANDX_RANGE); 1243 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1244 * and check it for zero before using.
1245 */
1246 max_buf = tcon->ses->server->maxBuf;
1247 if (!max_buf)
1248 return -EINVAL;
1249
1250 max_num = (max_buf - sizeof(struct smb_hdr)) /
1251 sizeof(LOCKING_ANDX_RANGE);
1233 buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL); 1252 buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1234 if (!buf) 1253 if (!buf)
1235 return -ENOMEM; 1254 return -ENOMEM;
@@ -1247,46 +1266,7 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, int xid)
1247 continue; 1266 continue;
1248 if (types[i] != li->type) 1267 if (types[i] != li->type)
1249 continue; 1268 continue;
1250 if (!cinode->can_cache_brlcks) { 1269 if (cinode->can_cache_brlcks) {
1251 cur->Pid = cpu_to_le16(li->pid);
1252 cur->LengthLow = cpu_to_le32((u32)li->length);
1253 cur->LengthHigh =
1254 cpu_to_le32((u32)(li->length>>32));
1255 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1256 cur->OffsetHigh =
1257 cpu_to_le32((u32)(li->offset>>32));
1258 /*
1259 * We need to save a lock here to let us add
1260 * it again to the file's list if the unlock
1261 * range request fails on the server.
1262 */
1263 list_move(&li->llist, &tmp_llist);
1264 if (++num == max_num) {
1265 stored_rc = cifs_lockv(xid, tcon,
1266 cfile->netfid,
1267 li->type, num,
1268 0, buf);
1269 if (stored_rc) {
1270 /*
1271 * We failed on the unlock range
1272 * request - add all locks from
1273 * the tmp list to the head of
1274 * the file's list.
1275 */
1276 cifs_move_llist(&tmp_llist,
1277 &cfile->llist);
1278 rc = stored_rc;
1279 } else
1280 /*
1281 * The unlock range request
1282 * succeed - free the tmp list.
1283 */
1284 cifs_free_llist(&tmp_llist);
1285 cur = buf;
1286 num = 0;
1287 } else
1288 cur++;
1289 } else {
1290 /* 1270 /*
1291 * We can cache brlock requests - simply remove 1271 * We can cache brlock requests - simply remove
1292 * a lock from the file's list. 1272 * a lock from the file's list.
@@ -1294,7 +1274,41 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, int xid)
1294 list_del(&li->llist); 1274 list_del(&li->llist);
1295 cifs_del_lock_waiters(li); 1275 cifs_del_lock_waiters(li);
1296 kfree(li); 1276 kfree(li);
1277 continue;
1297 } 1278 }
1279 cur->Pid = cpu_to_le16(li->pid);
1280 cur->LengthLow = cpu_to_le32((u32)li->length);
1281 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1282 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1283 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1284 /*
1285 * We need to save a lock here to let us add it again to
1286 * the file's list if the unlock range request fails on
1287 * the server.
1288 */
1289 list_move(&li->llist, &tmp_llist);
1290 if (++num == max_num) {
1291 stored_rc = cifs_lockv(xid, tcon, cfile->netfid,
1292 li->type, num, 0, buf);
1293 if (stored_rc) {
1294 /*
1295 * We failed on the unlock range
1296 * request - add all locks from the tmp
1297 * list to the head of the file's list.
1298 */
1299 cifs_move_llist(&tmp_llist,
1300 &cfile->llist);
1301 rc = stored_rc;
1302 } else
1303 /*
1304 * The unlock range request succeed -
1305 * free the tmp list.
1306 */
1307 cifs_free_llist(&tmp_llist);
1308 cur = buf;
1309 num = 0;
1310 } else
1311 cur++;
1298 } 1312 }
1299 if (num) { 1313 if (num) {
1300 stored_rc = cifs_lockv(xid, tcon, cfile->netfid, 1314 stored_rc = cifs_lockv(xid, tcon, cfile->netfid,
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index e2552d2b2e42..557506ae1e2a 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -212,93 +212,6 @@ cifs_small_buf_release(void *buf_to_free)
212 return; 212 return;
213} 213}
214 214
215/*
216 * Find a free multiplex id (SMB mid). Otherwise there could be
217 * mid collisions which might cause problems, demultiplexing the
218 * wrong response to this request. Multiplex ids could collide if
219 * one of a series requests takes much longer than the others, or
220 * if a very large number of long lived requests (byte range
221 * locks or FindNotify requests) are pending. No more than
222 * 64K-1 requests can be outstanding at one time. If no
223 * mids are available, return zero. A future optimization
224 * could make the combination of mids and uid the key we use
225 * to demultiplex on (rather than mid alone).
226 * In addition to the above check, the cifs demultiplex
227 * code already used the command code as a secondary
228 * check of the frame and if signing is negotiated the
229 * response would be discarded if the mid were the same
230 * but the signature was wrong. Since the mid is not put in the
231 * pending queue until later (when it is about to be dispatched)
232 * we do have to limit the number of outstanding requests
233 * to somewhat less than 64K-1 although it is hard to imagine
234 * so many threads being in the vfs at one time.
235 */
236__u64 GetNextMid(struct TCP_Server_Info *server)
237{
238 __u64 mid = 0;
239 __u16 last_mid, cur_mid;
240 bool collision;
241
242 spin_lock(&GlobalMid_Lock);
243
244 /* mid is 16 bit only for CIFS/SMB */
245 cur_mid = (__u16)((server->CurrentMid) & 0xffff);
246 /* we do not want to loop forever */
247 last_mid = cur_mid;
248 cur_mid++;
249
250 /*
251 * This nested loop looks more expensive than it is.
252 * In practice the list of pending requests is short,
253 * fewer than 50, and the mids are likely to be unique
254 * on the first pass through the loop unless some request
255 * takes longer than the 64 thousand requests before it
256 * (and it would also have to have been a request that
257 * did not time out).
258 */
259 while (cur_mid != last_mid) {
260 struct mid_q_entry *mid_entry;
261 unsigned int num_mids;
262
263 collision = false;
264 if (cur_mid == 0)
265 cur_mid++;
266
267 num_mids = 0;
268 list_for_each_entry(mid_entry, &server->pending_mid_q, qhead) {
269 ++num_mids;
270 if (mid_entry->mid == cur_mid &&
271 mid_entry->mid_state == MID_REQUEST_SUBMITTED) {
272 /* This mid is in use, try a different one */
273 collision = true;
274 break;
275 }
276 }
277
278 /*
279 * if we have more than 32k mids in the list, then something
280 * is very wrong. Possibly a local user is trying to DoS the
281 * box by issuing long-running calls and SIGKILL'ing them. If
282 * we get to 2^16 mids then we're in big trouble as this
283 * function could loop forever.
284 *
285 * Go ahead and assign out the mid in this situation, but force
286 * an eventual reconnect to clean out the pending_mid_q.
287 */
288 if (num_mids > 32768)
289 server->tcpStatus = CifsNeedReconnect;
290
291 if (!collision) {
292 mid = (__u64)cur_mid;
293 server->CurrentMid = mid;
294 break;
295 }
296 cur_mid++;
297 }
298 spin_unlock(&GlobalMid_Lock);
299 return mid;
300}
301
302/* NB: MID can not be set if treeCon not passed in, in that 215/* NB: MID can not be set if treeCon not passed in, in that
303 case it is responsbility of caller to set the mid */ 216 case it is responsbility of caller to set the mid */
304void 217void
@@ -334,7 +247,7 @@ header_assemble(struct smb_hdr *buffer, char smb_command /* command */ ,
334 247
335 /* Uid is not converted */ 248 /* Uid is not converted */
336 buffer->Uid = treeCon->ses->Suid; 249 buffer->Uid = treeCon->ses->Suid;
337 buffer->Mid = GetNextMid(treeCon->ses->server); 250 buffer->Mid = get_next_mid(treeCon->ses->server);
338 } 251 }
339 if (treeCon->Flags & SMB_SHARE_IS_IN_DFS) 252 if (treeCon->Flags & SMB_SHARE_IS_IN_DFS)
340 buffer->Flags2 |= SMBFLG2_DFS; 253 buffer->Flags2 |= SMBFLG2_DFS;
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index d9d615fbed3f..6dec38f5522d 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -125,6 +125,94 @@ cifs_get_credits_field(struct TCP_Server_Info *server)
125 return &server->credits; 125 return &server->credits;
126} 126}
127 127
128/*
129 * Find a free multiplex id (SMB mid). Otherwise there could be
130 * mid collisions which might cause problems, demultiplexing the
131 * wrong response to this request. Multiplex ids could collide if
132 * one of a series requests takes much longer than the others, or
133 * if a very large number of long lived requests (byte range
134 * locks or FindNotify requests) are pending. No more than
135 * 64K-1 requests can be outstanding at one time. If no
136 * mids are available, return zero. A future optimization
137 * could make the combination of mids and uid the key we use
138 * to demultiplex on (rather than mid alone).
139 * In addition to the above check, the cifs demultiplex
140 * code already used the command code as a secondary
141 * check of the frame and if signing is negotiated the
142 * response would be discarded if the mid were the same
143 * but the signature was wrong. Since the mid is not put in the
144 * pending queue until later (when it is about to be dispatched)
145 * we do have to limit the number of outstanding requests
146 * to somewhat less than 64K-1 although it is hard to imagine
147 * so many threads being in the vfs at one time.
148 */
149static __u64
150cifs_get_next_mid(struct TCP_Server_Info *server)
151{
152 __u64 mid = 0;
153 __u16 last_mid, cur_mid;
154 bool collision;
155
156 spin_lock(&GlobalMid_Lock);
157
158 /* mid is 16 bit only for CIFS/SMB */
159 cur_mid = (__u16)((server->CurrentMid) & 0xffff);
160 /* we do not want to loop forever */
161 last_mid = cur_mid;
162 cur_mid++;
163
164 /*
165 * This nested loop looks more expensive than it is.
166 * In practice the list of pending requests is short,
167 * fewer than 50, and the mids are likely to be unique
168 * on the first pass through the loop unless some request
169 * takes longer than the 64 thousand requests before it
170 * (and it would also have to have been a request that
171 * did not time out).
172 */
173 while (cur_mid != last_mid) {
174 struct mid_q_entry *mid_entry;
175 unsigned int num_mids;
176
177 collision = false;
178 if (cur_mid == 0)
179 cur_mid++;
180
181 num_mids = 0;
182 list_for_each_entry(mid_entry, &server->pending_mid_q, qhead) {
183 ++num_mids;
184 if (mid_entry->mid == cur_mid &&
185 mid_entry->mid_state == MID_REQUEST_SUBMITTED) {
186 /* This mid is in use, try a different one */
187 collision = true;
188 break;
189 }
190 }
191
192 /*
193 * if we have more than 32k mids in the list, then something
194 * is very wrong. Possibly a local user is trying to DoS the
195 * box by issuing long-running calls and SIGKILL'ing them. If
196 * we get to 2^16 mids then we're in big trouble as this
197 * function could loop forever.
198 *
199 * Go ahead and assign out the mid in this situation, but force
200 * an eventual reconnect to clean out the pending_mid_q.
201 */
202 if (num_mids > 32768)
203 server->tcpStatus = CifsNeedReconnect;
204
205 if (!collision) {
206 mid = (__u64)cur_mid;
207 server->CurrentMid = mid;
208 break;
209 }
210 cur_mid++;
211 }
212 spin_unlock(&GlobalMid_Lock);
213 return mid;
214}
215
128struct smb_version_operations smb1_operations = { 216struct smb_version_operations smb1_operations = {
129 .send_cancel = send_nt_cancel, 217 .send_cancel = send_nt_cancel,
130 .compare_fids = cifs_compare_fids, 218 .compare_fids = cifs_compare_fids,
@@ -133,6 +221,7 @@ struct smb_version_operations smb1_operations = {
133 .add_credits = cifs_add_credits, 221 .add_credits = cifs_add_credits,
134 .set_credits = cifs_set_credits, 222 .set_credits = cifs_set_credits,
135 .get_credits_field = cifs_get_credits_field, 223 .get_credits_field = cifs_get_credits_field,
224 .get_next_mid = cifs_get_next_mid,
136 .read_data_offset = cifs_read_data_offset, 225 .read_data_offset = cifs_read_data_offset,
137 .read_data_length = cifs_read_data_length, 226 .read_data_length = cifs_read_data_length,
138 .map_error = map_smb_to_linux_error, 227 .map_error = map_smb_to_linux_error,
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 1b36ffe6a47b..3097ee58fd7d 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -779,7 +779,7 @@ send_lock_cancel(const unsigned int xid, struct cifs_tcon *tcon,
779 779
780 pSMB->LockType = LOCKING_ANDX_CANCEL_LOCK|LOCKING_ANDX_LARGE_FILES; 780 pSMB->LockType = LOCKING_ANDX_CANCEL_LOCK|LOCKING_ANDX_LARGE_FILES;
781 pSMB->Timeout = 0; 781 pSMB->Timeout = 0;
782 pSMB->hdr.Mid = GetNextMid(ses->server); 782 pSMB->hdr.Mid = get_next_mid(ses->server);
783 783
784 return SendReceive(xid, ses, in_buf, out_buf, 784 return SendReceive(xid, ses, in_buf, out_buf,
785 &bytes_returned, 0); 785 &bytes_returned, 0);
diff --git a/fs/fuse/control.c b/fs/fuse/control.c
index 42593c587d48..03ff5b1eba93 100644
--- a/fs/fuse/control.c
+++ b/fs/fuse/control.c
@@ -75,19 +75,13 @@ static ssize_t fuse_conn_limit_write(struct file *file, const char __user *buf,
75 unsigned global_limit) 75 unsigned global_limit)
76{ 76{
77 unsigned long t; 77 unsigned long t;
78 char tmp[32];
79 unsigned limit = (1 << 16) - 1; 78 unsigned limit = (1 << 16) - 1;
80 int err; 79 int err;
81 80
82 if (*ppos || count >= sizeof(tmp) - 1) 81 if (*ppos)
83 return -EINVAL;
84
85 if (copy_from_user(tmp, buf, count))
86 return -EINVAL; 82 return -EINVAL;
87 83
88 tmp[count] = '\0'; 84 err = kstrtoul_from_user(buf, count, 0, &t);
89
90 err = strict_strtoul(tmp, 0, &t);
91 if (err) 85 if (err)
92 return err; 86 return err;
93 87
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index df5ac048dc74..334e0b18a014 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -775,6 +775,8 @@ static int fuse_link(struct dentry *entry, struct inode *newdir,
775static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr, 775static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr,
776 struct kstat *stat) 776 struct kstat *stat)
777{ 777{
778 unsigned int blkbits;
779
778 stat->dev = inode->i_sb->s_dev; 780 stat->dev = inode->i_sb->s_dev;
779 stat->ino = attr->ino; 781 stat->ino = attr->ino;
780 stat->mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777); 782 stat->mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
@@ -790,7 +792,13 @@ static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr,
790 stat->ctime.tv_nsec = attr->ctimensec; 792 stat->ctime.tv_nsec = attr->ctimensec;
791 stat->size = attr->size; 793 stat->size = attr->size;
792 stat->blocks = attr->blocks; 794 stat->blocks = attr->blocks;
793 stat->blksize = (1 << inode->i_blkbits); 795
796 if (attr->blksize != 0)
797 blkbits = ilog2(attr->blksize);
798 else
799 blkbits = inode->i_sb->s_blocksize_bits;
800
801 stat->blksize = 1 << blkbits;
794} 802}
795 803
796static int fuse_do_getattr(struct inode *inode, struct kstat *stat, 804static int fuse_do_getattr(struct inode *inode, struct kstat *stat,
@@ -863,6 +871,7 @@ int fuse_update_attributes(struct inode *inode, struct kstat *stat,
863 if (stat) { 871 if (stat) {
864 generic_fillattr(inode, stat); 872 generic_fillattr(inode, stat);
865 stat->mode = fi->orig_i_mode; 873 stat->mode = fi->orig_i_mode;
874 stat->ino = fi->orig_ino;
866 } 875 }
867 } 876 }
868 877
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 9562109d3a87..b321a688cde7 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -2173,6 +2173,44 @@ fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
2173 return ret; 2173 return ret;
2174} 2174}
2175 2175
2176long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
2177 loff_t length)
2178{
2179 struct fuse_file *ff = file->private_data;
2180 struct fuse_conn *fc = ff->fc;
2181 struct fuse_req *req;
2182 struct fuse_fallocate_in inarg = {
2183 .fh = ff->fh,
2184 .offset = offset,
2185 .length = length,
2186 .mode = mode
2187 };
2188 int err;
2189
2190 if (fc->no_fallocate)
2191 return -EOPNOTSUPP;
2192
2193 req = fuse_get_req(fc);
2194 if (IS_ERR(req))
2195 return PTR_ERR(req);
2196
2197 req->in.h.opcode = FUSE_FALLOCATE;
2198 req->in.h.nodeid = ff->nodeid;
2199 req->in.numargs = 1;
2200 req->in.args[0].size = sizeof(inarg);
2201 req->in.args[0].value = &inarg;
2202 fuse_request_send(fc, req);
2203 err = req->out.h.error;
2204 if (err == -ENOSYS) {
2205 fc->no_fallocate = 1;
2206 err = -EOPNOTSUPP;
2207 }
2208 fuse_put_request(fc, req);
2209
2210 return err;
2211}
2212EXPORT_SYMBOL_GPL(fuse_file_fallocate);
2213
2176static const struct file_operations fuse_file_operations = { 2214static const struct file_operations fuse_file_operations = {
2177 .llseek = fuse_file_llseek, 2215 .llseek = fuse_file_llseek,
2178 .read = do_sync_read, 2216 .read = do_sync_read,
@@ -2190,6 +2228,7 @@ static const struct file_operations fuse_file_operations = {
2190 .unlocked_ioctl = fuse_file_ioctl, 2228 .unlocked_ioctl = fuse_file_ioctl,
2191 .compat_ioctl = fuse_file_compat_ioctl, 2229 .compat_ioctl = fuse_file_compat_ioctl,
2192 .poll = fuse_file_poll, 2230 .poll = fuse_file_poll,
2231 .fallocate = fuse_file_fallocate,
2193}; 2232};
2194 2233
2195static const struct file_operations fuse_direct_io_file_operations = { 2234static const struct file_operations fuse_direct_io_file_operations = {
@@ -2206,6 +2245,7 @@ static const struct file_operations fuse_direct_io_file_operations = {
2206 .unlocked_ioctl = fuse_file_ioctl, 2245 .unlocked_ioctl = fuse_file_ioctl,
2207 .compat_ioctl = fuse_file_compat_ioctl, 2246 .compat_ioctl = fuse_file_compat_ioctl,
2208 .poll = fuse_file_poll, 2247 .poll = fuse_file_poll,
2248 .fallocate = fuse_file_fallocate,
2209 /* no splice_read */ 2249 /* no splice_read */
2210}; 2250};
2211 2251
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 572cefc78012..771fb6322c07 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -82,6 +82,9 @@ struct fuse_inode {
82 preserve the original mode */ 82 preserve the original mode */
83 umode_t orig_i_mode; 83 umode_t orig_i_mode;
84 84
85 /** 64 bit inode number */
86 u64 orig_ino;
87
85 /** Version of last attribute change */ 88 /** Version of last attribute change */
86 u64 attr_version; 89 u64 attr_version;
87 90
@@ -478,6 +481,9 @@ struct fuse_conn {
478 /** Are BSD file locking primitives not implemented by fs? */ 481 /** Are BSD file locking primitives not implemented by fs? */
479 unsigned no_flock:1; 482 unsigned no_flock:1;
480 483
484 /** Is fallocate not implemented by fs? */
485 unsigned no_fallocate:1;
486
481 /** The number of requests waiting for completion */ 487 /** The number of requests waiting for completion */
482 atomic_t num_waiting; 488 atomic_t num_waiting;
483 489
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 42678a33b7bb..1cd61652018c 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -91,6 +91,7 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
91 fi->nlookup = 0; 91 fi->nlookup = 0;
92 fi->attr_version = 0; 92 fi->attr_version = 0;
93 fi->writectr = 0; 93 fi->writectr = 0;
94 fi->orig_ino = 0;
94 INIT_LIST_HEAD(&fi->write_files); 95 INIT_LIST_HEAD(&fi->write_files);
95 INIT_LIST_HEAD(&fi->queued_writes); 96 INIT_LIST_HEAD(&fi->queued_writes);
96 INIT_LIST_HEAD(&fi->writepages); 97 INIT_LIST_HEAD(&fi->writepages);
@@ -139,6 +140,18 @@ static int fuse_remount_fs(struct super_block *sb, int *flags, char *data)
139 return 0; 140 return 0;
140} 141}
141 142
143/*
144 * ino_t is 32-bits on 32-bit arch. We have to squash the 64-bit value down
145 * so that it will fit.
146 */
147static ino_t fuse_squash_ino(u64 ino64)
148{
149 ino_t ino = (ino_t) ino64;
150 if (sizeof(ino_t) < sizeof(u64))
151 ino ^= ino64 >> (sizeof(u64) - sizeof(ino_t)) * 8;
152 return ino;
153}
154
142void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, 155void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
143 u64 attr_valid) 156 u64 attr_valid)
144{ 157{
@@ -148,7 +161,7 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
148 fi->attr_version = ++fc->attr_version; 161 fi->attr_version = ++fc->attr_version;
149 fi->i_time = attr_valid; 162 fi->i_time = attr_valid;
150 163
151 inode->i_ino = attr->ino; 164 inode->i_ino = fuse_squash_ino(attr->ino);
152 inode->i_mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777); 165 inode->i_mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
153 set_nlink(inode, attr->nlink); 166 set_nlink(inode, attr->nlink);
154 inode->i_uid = attr->uid; 167 inode->i_uid = attr->uid;
@@ -174,6 +187,8 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
174 fi->orig_i_mode = inode->i_mode; 187 fi->orig_i_mode = inode->i_mode;
175 if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS)) 188 if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS))
176 inode->i_mode &= ~S_ISVTX; 189 inode->i_mode &= ~S_ISVTX;
190
191 fi->orig_ino = attr->ino;
177} 192}
178 193
179void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, 194void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 616f41a7cde6..437195f204e1 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1803,7 +1803,7 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
1803 rcu_read_lock(); 1803 rcu_read_lock();
1804 file = fcheck_files(files, fd); 1804 file = fcheck_files(files, fd);
1805 if (file) { 1805 if (file) {
1806 unsigned i_mode, f_mode = file->f_mode; 1806 unsigned f_mode = file->f_mode;
1807 1807
1808 rcu_read_unlock(); 1808 rcu_read_unlock();
1809 put_files_struct(files); 1809 put_files_struct(files);
@@ -1819,12 +1819,14 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
1819 inode->i_gid = GLOBAL_ROOT_GID; 1819 inode->i_gid = GLOBAL_ROOT_GID;
1820 } 1820 }
1821 1821
1822 i_mode = S_IFLNK; 1822 if (S_ISLNK(inode->i_mode)) {
1823 if (f_mode & FMODE_READ) 1823 unsigned i_mode = S_IFLNK;
1824 i_mode |= S_IRUSR | S_IXUSR; 1824 if (f_mode & FMODE_READ)
1825 if (f_mode & FMODE_WRITE) 1825 i_mode |= S_IRUSR | S_IXUSR;
1826 i_mode |= S_IWUSR | S_IXUSR; 1826 if (f_mode & FMODE_WRITE)
1827 inode->i_mode = i_mode; 1827 i_mode |= S_IWUSR | S_IXUSR;
1828 inode->i_mode = i_mode;
1829 }
1828 1830
1829 security_task_to_inode(task, inode); 1831 security_task_to_inode(task, inode);
1830 put_task_struct(task); 1832 put_task_struct(task);
@@ -1859,6 +1861,7 @@ static struct dentry *proc_fd_instantiate(struct inode *dir,
1859 ei = PROC_I(inode); 1861 ei = PROC_I(inode);
1860 ei->fd = fd; 1862 ei->fd = fd;
1861 1863
1864 inode->i_mode = S_IFLNK;
1862 inode->i_op = &proc_pid_link_inode_operations; 1865 inode->i_op = &proc_pid_link_inode_operations;
1863 inode->i_size = 64; 1866 inode->i_size = 64;
1864 ei->op.proc_get_link = proc_fd_link; 1867 ei->op.proc_get_link = proc_fd_link;
diff --git a/include/drm/drm_pciids.h b/include/drm/drm_pciids.h
index 58d0bdab68dd..81368ab6c611 100644
--- a/include/drm/drm_pciids.h
+++ b/include/drm/drm_pciids.h
@@ -181,6 +181,7 @@
181 {0x1002, 0x6747, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TURKS|RADEON_NEW_MEMMAP}, \ 181 {0x1002, 0x6747, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TURKS|RADEON_NEW_MEMMAP}, \
182 {0x1002, 0x6748, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TURKS|RADEON_NEW_MEMMAP}, \ 182 {0x1002, 0x6748, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TURKS|RADEON_NEW_MEMMAP}, \
183 {0x1002, 0x6749, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TURKS|RADEON_NEW_MEMMAP}, \ 183 {0x1002, 0x6749, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TURKS|RADEON_NEW_MEMMAP}, \
184 {0x1002, 0x674A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TURKS|RADEON_NEW_MEMMAP}, \
184 {0x1002, 0x6750, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TURKS|RADEON_NEW_MEMMAP}, \ 185 {0x1002, 0x6750, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TURKS|RADEON_NEW_MEMMAP}, \
185 {0x1002, 0x6751, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TURKS|RADEON_NEW_MEMMAP}, \ 186 {0x1002, 0x6751, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TURKS|RADEON_NEW_MEMMAP}, \
186 {0x1002, 0x6758, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TURKS|RADEON_NEW_MEMMAP}, \ 187 {0x1002, 0x6758, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TURKS|RADEON_NEW_MEMMAP}, \
@@ -198,6 +199,7 @@
198 {0x1002, 0x6767, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CAICOS|RADEON_NEW_MEMMAP}, \ 199 {0x1002, 0x6767, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CAICOS|RADEON_NEW_MEMMAP}, \
199 {0x1002, 0x6768, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CAICOS|RADEON_NEW_MEMMAP}, \ 200 {0x1002, 0x6768, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CAICOS|RADEON_NEW_MEMMAP}, \
200 {0x1002, 0x6770, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CAICOS|RADEON_NEW_MEMMAP}, \ 201 {0x1002, 0x6770, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CAICOS|RADEON_NEW_MEMMAP}, \
202 {0x1002, 0x6771, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CAICOS|RADEON_NEW_MEMMAP}, \
201 {0x1002, 0x6772, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CAICOS|RADEON_NEW_MEMMAP}, \ 203 {0x1002, 0x6772, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CAICOS|RADEON_NEW_MEMMAP}, \
202 {0x1002, 0x6778, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CAICOS|RADEON_NEW_MEMMAP}, \ 204 {0x1002, 0x6778, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CAICOS|RADEON_NEW_MEMMAP}, \
203 {0x1002, 0x6779, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CAICOS|RADEON_NEW_MEMMAP}, \ 205 {0x1002, 0x6779, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CAICOS|RADEON_NEW_MEMMAP}, \
@@ -229,10 +231,11 @@
229 {0x1002, 0x6827, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ 231 {0x1002, 0x6827, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
230 {0x1002, 0x6828, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_NEW_MEMMAP}, \ 232 {0x1002, 0x6828, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_NEW_MEMMAP}, \
231 {0x1002, 0x6829, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_NEW_MEMMAP}, \ 233 {0x1002, 0x6829, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_NEW_MEMMAP}, \
234 {0x1002, 0x682B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
232 {0x1002, 0x682D, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ 235 {0x1002, 0x682D, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
233 {0x1002, 0x682F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ 236 {0x1002, 0x682F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
234 {0x1002, 0x6830, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_NEW_MEMMAP}, \ 237 {0x1002, 0x6830, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
235 {0x1002, 0x6831, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_NEW_MEMMAP}, \ 238 {0x1002, 0x6831, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
236 {0x1002, 0x6837, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_NEW_MEMMAP}, \ 239 {0x1002, 0x6837, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_NEW_MEMMAP}, \
237 {0x1002, 0x6838, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_NEW_MEMMAP}, \ 240 {0x1002, 0x6838, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_NEW_MEMMAP}, \
238 {0x1002, 0x6839, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_NEW_MEMMAP}, \ 241 {0x1002, 0x6839, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_NEW_MEMMAP}, \
@@ -531,6 +534,7 @@
531 {0x1002, 0x9645, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SUMO2|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ 534 {0x1002, 0x9645, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SUMO2|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
532 {0x1002, 0x9647, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SUMO|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP},\ 535 {0x1002, 0x9647, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SUMO|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP},\
533 {0x1002, 0x9648, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SUMO|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP},\ 536 {0x1002, 0x9648, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SUMO|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP},\
537 {0x1002, 0x9649, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SUMO|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP},\
534 {0x1002, 0x964a, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SUMO|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ 538 {0x1002, 0x964a, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SUMO|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
535 {0x1002, 0x964b, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SUMO|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ 539 {0x1002, 0x964b, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SUMO|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
536 {0x1002, 0x964c, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SUMO|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ 540 {0x1002, 0x964c, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SUMO|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
@@ -550,6 +554,7 @@
550 {0x1002, 0x9807, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PALM|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ 554 {0x1002, 0x9807, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PALM|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
551 {0x1002, 0x9808, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PALM|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ 555 {0x1002, 0x9808, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PALM|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
552 {0x1002, 0x9809, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PALM|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ 556 {0x1002, 0x9809, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PALM|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
557 {0x1002, 0x980A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PALM|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
553 {0x1002, 0x9900, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ 558 {0x1002, 0x9900, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
554 {0x1002, 0x9901, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ 559 {0x1002, 0x9901, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
555 {0x1002, 0x9903, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ 560 {0x1002, 0x9903, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
@@ -561,11 +566,19 @@
561 {0x1002, 0x9909, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ 566 {0x1002, 0x9909, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
562 {0x1002, 0x990A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ 567 {0x1002, 0x990A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
563 {0x1002, 0x990F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ 568 {0x1002, 0x990F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
569 {0x1002, 0x9910, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
570 {0x1002, 0x9913, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
571 {0x1002, 0x9917, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
572 {0x1002, 0x9918, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
573 {0x1002, 0x9919, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
564 {0x1002, 0x9990, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ 574 {0x1002, 0x9990, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
565 {0x1002, 0x9991, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ 575 {0x1002, 0x9991, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
566 {0x1002, 0x9992, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ 576 {0x1002, 0x9992, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
567 {0x1002, 0x9993, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ 577 {0x1002, 0x9993, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
568 {0x1002, 0x9994, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ 578 {0x1002, 0x9994, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
579 {0x1002, 0x99A0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
580 {0x1002, 0x99A2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
581 {0x1002, 0x99A4, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
569 {0, 0, 0} 582 {0, 0, 0}
570 583
571#define r128_PCI_IDS \ 584#define r128_PCI_IDS \
diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h
index 81e803e90aa4..acba894374a1 100644
--- a/include/linux/clockchips.h
+++ b/include/linux/clockchips.h
@@ -132,6 +132,7 @@ extern u64 clockevent_delta2ns(unsigned long latch,
132 struct clock_event_device *evt); 132 struct clock_event_device *evt);
133extern void clockevents_register_device(struct clock_event_device *dev); 133extern void clockevents_register_device(struct clock_event_device *dev);
134 134
135extern void clockevents_config(struct clock_event_device *dev, u32 freq);
135extern void clockevents_config_and_register(struct clock_event_device *dev, 136extern void clockevents_config_and_register(struct clock_event_device *dev,
136 u32 freq, unsigned long min_delta, 137 u32 freq, unsigned long min_delta,
137 unsigned long max_delta); 138 unsigned long max_delta);
diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index e988037abd2a..51a90b7f2d60 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -1,8 +1,6 @@
1#ifndef _LINUX_COMPACTION_H 1#ifndef _LINUX_COMPACTION_H
2#define _LINUX_COMPACTION_H 2#define _LINUX_COMPACTION_H
3 3
4#include <linux/node.h>
5
6/* Return values for compact_zone() and try_to_compact_pages() */ 4/* Return values for compact_zone() and try_to_compact_pages() */
7/* compaction didn't start as it was not possible or direct reclaim was more suitable */ 5/* compaction didn't start as it was not possible or direct reclaim was more suitable */
8#define COMPACT_SKIPPED 0 6#define COMPACT_SKIPPED 0
@@ -13,23 +11,6 @@
13/* The full zone was compacted */ 11/* The full zone was compacted */
14#define COMPACT_COMPLETE 3 12#define COMPACT_COMPLETE 3
15 13
16/*
17 * compaction supports three modes
18 *
19 * COMPACT_ASYNC_MOVABLE uses asynchronous migration and only scans
20 * MIGRATE_MOVABLE pageblocks as migration sources and targets.
21 * COMPACT_ASYNC_UNMOVABLE uses asynchronous migration and only scans
22 * MIGRATE_MOVABLE pageblocks as migration sources.
23 * MIGRATE_UNMOVABLE pageblocks are scanned as potential migration
24 * targets and convers them to MIGRATE_MOVABLE if possible
25 * COMPACT_SYNC uses synchronous migration and scans all pageblocks
26 */
27enum compact_mode {
28 COMPACT_ASYNC_MOVABLE,
29 COMPACT_ASYNC_UNMOVABLE,
30 COMPACT_SYNC,
31};
32
33#ifdef CONFIG_COMPACTION 14#ifdef CONFIG_COMPACTION
34extern int sysctl_compact_memory; 15extern int sysctl_compact_memory;
35extern int sysctl_compaction_handler(struct ctl_table *table, int write, 16extern int sysctl_compaction_handler(struct ctl_table *table, int write,
diff --git a/include/linux/frontswap.h b/include/linux/frontswap.h
new file mode 100644
index 000000000000..0e4e2eec5c1d
--- /dev/null
+++ b/include/linux/frontswap.h
@@ -0,0 +1,127 @@
1#ifndef _LINUX_FRONTSWAP_H
2#define _LINUX_FRONTSWAP_H
3
4#include <linux/swap.h>
5#include <linux/mm.h>
6#include <linux/bitops.h>
7
8struct frontswap_ops {
9 void (*init)(unsigned);
10 int (*store)(unsigned, pgoff_t, struct page *);
11 int (*load)(unsigned, pgoff_t, struct page *);
12 void (*invalidate_page)(unsigned, pgoff_t);
13 void (*invalidate_area)(unsigned);
14};
15
16extern bool frontswap_enabled;
17extern struct frontswap_ops
18 frontswap_register_ops(struct frontswap_ops *ops);
19extern void frontswap_shrink(unsigned long);
20extern unsigned long frontswap_curr_pages(void);
21extern void frontswap_writethrough(bool);
22
23extern void __frontswap_init(unsigned type);
24extern int __frontswap_store(struct page *page);
25extern int __frontswap_load(struct page *page);
26extern void __frontswap_invalidate_page(unsigned, pgoff_t);
27extern void __frontswap_invalidate_area(unsigned);
28
29#ifdef CONFIG_FRONTSWAP
30
31static inline bool frontswap_test(struct swap_info_struct *sis, pgoff_t offset)
32{
33 bool ret = false;
34
35 if (frontswap_enabled && sis->frontswap_map)
36 ret = test_bit(offset, sis->frontswap_map);
37 return ret;
38}
39
40static inline void frontswap_set(struct swap_info_struct *sis, pgoff_t offset)
41{
42 if (frontswap_enabled && sis->frontswap_map)
43 set_bit(offset, sis->frontswap_map);
44}
45
46static inline void frontswap_clear(struct swap_info_struct *sis, pgoff_t offset)
47{
48 if (frontswap_enabled && sis->frontswap_map)
49 clear_bit(offset, sis->frontswap_map);
50}
51
52static inline void frontswap_map_set(struct swap_info_struct *p,
53 unsigned long *map)
54{
55 p->frontswap_map = map;
56}
57
58static inline unsigned long *frontswap_map_get(struct swap_info_struct *p)
59{
60 return p->frontswap_map;
61}
62#else
63/* all inline routines become no-ops and all externs are ignored */
64
65#define frontswap_enabled (0)
66
67static inline bool frontswap_test(struct swap_info_struct *sis, pgoff_t offset)
68{
69 return false;
70}
71
72static inline void frontswap_set(struct swap_info_struct *sis, pgoff_t offset)
73{
74}
75
76static inline void frontswap_clear(struct swap_info_struct *sis, pgoff_t offset)
77{
78}
79
80static inline void frontswap_map_set(struct swap_info_struct *p,
81 unsigned long *map)
82{
83}
84
85static inline unsigned long *frontswap_map_get(struct swap_info_struct *p)
86{
87 return NULL;
88}
89#endif
90
91static inline int frontswap_store(struct page *page)
92{
93 int ret = -1;
94
95 if (frontswap_enabled)
96 ret = __frontswap_store(page);
97 return ret;
98}
99
100static inline int frontswap_load(struct page *page)
101{
102 int ret = -1;
103
104 if (frontswap_enabled)
105 ret = __frontswap_load(page);
106 return ret;
107}
108
109static inline void frontswap_invalidate_page(unsigned type, pgoff_t offset)
110{
111 if (frontswap_enabled)
112 __frontswap_invalidate_page(type, offset);
113}
114
115static inline void frontswap_invalidate_area(unsigned type)
116{
117 if (frontswap_enabled)
118 __frontswap_invalidate_area(type);
119}
120
121static inline void frontswap_init(unsigned type)
122{
123 if (frontswap_enabled)
124 __frontswap_init(type);
125}
126
127#endif /* _LINUX_FRONTSWAP_H */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 51978ed43e97..17fd887c798f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -802,13 +802,14 @@ struct inode {
802 unsigned int __i_nlink; 802 unsigned int __i_nlink;
803 }; 803 };
804 dev_t i_rdev; 804 dev_t i_rdev;
805 loff_t i_size;
805 struct timespec i_atime; 806 struct timespec i_atime;
806 struct timespec i_mtime; 807 struct timespec i_mtime;
807 struct timespec i_ctime; 808 struct timespec i_ctime;
808 spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ 809 spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */
809 unsigned short i_bytes; 810 unsigned short i_bytes;
811 unsigned int i_blkbits;
810 blkcnt_t i_blocks; 812 blkcnt_t i_blocks;
811 loff_t i_size;
812 813
813#ifdef __NEED_I_SIZE_ORDERED 814#ifdef __NEED_I_SIZE_ORDERED
814 seqcount_t i_size_seqcount; 815 seqcount_t i_size_seqcount;
@@ -828,9 +829,8 @@ struct inode {
828 struct list_head i_dentry; 829 struct list_head i_dentry;
829 struct rcu_head i_rcu; 830 struct rcu_head i_rcu;
830 }; 831 };
831 atomic_t i_count;
832 unsigned int i_blkbits;
833 u64 i_version; 832 u64 i_version;
833 atomic_t i_count;
834 atomic_t i_dio_count; 834 atomic_t i_dio_count;
835 atomic_t i_writecount; 835 atomic_t i_writecount;
836 const struct file_operations *i_fop; /* former ->i_op->default_file_ops */ 836 const struct file_operations *i_fop; /* former ->i_op->default_file_ops */
diff --git a/include/linux/fuse.h b/include/linux/fuse.h
index 8f2ab8fef929..9303348965fb 100644
--- a/include/linux/fuse.h
+++ b/include/linux/fuse.h
@@ -54,6 +54,9 @@
54 * 7.18 54 * 7.18
55 * - add FUSE_IOCTL_DIR flag 55 * - add FUSE_IOCTL_DIR flag
56 * - add FUSE_NOTIFY_DELETE 56 * - add FUSE_NOTIFY_DELETE
57 *
58 * 7.19
59 * - add FUSE_FALLOCATE
57 */ 60 */
58 61
59#ifndef _LINUX_FUSE_H 62#ifndef _LINUX_FUSE_H
@@ -85,7 +88,7 @@
85#define FUSE_KERNEL_VERSION 7 88#define FUSE_KERNEL_VERSION 7
86 89
87/** Minor version number of this interface */ 90/** Minor version number of this interface */
88#define FUSE_KERNEL_MINOR_VERSION 18 91#define FUSE_KERNEL_MINOR_VERSION 19
89 92
90/** The node ID of the root inode */ 93/** The node ID of the root inode */
91#define FUSE_ROOT_ID 1 94#define FUSE_ROOT_ID 1
@@ -278,6 +281,7 @@ enum fuse_opcode {
278 FUSE_POLL = 40, 281 FUSE_POLL = 40,
279 FUSE_NOTIFY_REPLY = 41, 282 FUSE_NOTIFY_REPLY = 41,
280 FUSE_BATCH_FORGET = 42, 283 FUSE_BATCH_FORGET = 42,
284 FUSE_FALLOCATE = 43,
281 285
282 /* CUSE specific operations */ 286 /* CUSE specific operations */
283 CUSE_INIT = 4096, 287 CUSE_INIT = 4096,
@@ -571,6 +575,14 @@ struct fuse_notify_poll_wakeup_out {
571 __u64 kh; 575 __u64 kh;
572}; 576};
573 577
578struct fuse_fallocate_in {
579 __u64 fh;
580 __u64 offset;
581 __u64 length;
582 __u32 mode;
583 __u32 padding;
584};
585
574struct fuse_in_header { 586struct fuse_in_header {
575 __u32 len; 587 __u32 len;
576 __u32 opcode; 588 __u32 opcode;
diff --git a/include/linux/i2c-mux-pinctrl.h b/include/linux/i2c-mux-pinctrl.h
new file mode 100644
index 000000000000..a65c86429e84
--- /dev/null
+++ b/include/linux/i2c-mux-pinctrl.h
@@ -0,0 +1,41 @@
1/*
2 * i2c-mux-pinctrl platform data
3 *
4 * Copyright (c) 2012, NVIDIA CORPORATION. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#ifndef _LINUX_I2C_MUX_PINCTRL_H
20#define _LINUX_I2C_MUX_PINCTRL_H
21
22/**
23 * struct i2c_mux_pinctrl_platform_data - Platform data for i2c-mux-pinctrl
24 * @parent_bus_num: Parent I2C bus number
25 * @base_bus_num: Base I2C bus number for the child busses. 0 for dynamic.
26 * @bus_count: Number of child busses. Also the number of elements in
27 * @pinctrl_states
28 * @pinctrl_states: The names of the pinctrl state to select for each child bus
29 * @pinctrl_state_idle: The pinctrl state to select when no child bus is being
30 * accessed. If NULL, the most recently used pinctrl state will be left
31 * selected.
32 */
33struct i2c_mux_pinctrl_platform_data {
34 int parent_bus_num;
35 int base_bus_num;
36 int bus_count;
37 const char **pinctrl_states;
38 const char *pinctrl_state_idle;
39};
40
41#endif
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index e4baff5f7ff4..9e65eff6af3b 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -149,6 +149,7 @@ extern struct cred init_cred;
149 .normal_prio = MAX_PRIO-20, \ 149 .normal_prio = MAX_PRIO-20, \
150 .policy = SCHED_NORMAL, \ 150 .policy = SCHED_NORMAL, \
151 .cpus_allowed = CPU_MASK_ALL, \ 151 .cpus_allowed = CPU_MASK_ALL, \
152 .nr_cpus_allowed= NR_CPUS, \
152 .mm = NULL, \ 153 .mm = NULL, \
153 .active_mm = &init_mm, \ 154 .active_mm = &init_mm, \
154 .se = { \ 155 .se = { \
@@ -157,7 +158,6 @@ extern struct cred init_cred;
157 .rt = { \ 158 .rt = { \
158 .run_list = LIST_HEAD_INIT(tsk.rt.run_list), \ 159 .run_list = LIST_HEAD_INIT(tsk.rt.run_list), \
159 .time_slice = RR_TIMESLICE, \ 160 .time_slice = RR_TIMESLICE, \
160 .nr_cpus_allowed = NR_CPUS, \
161 }, \ 161 }, \
162 .tasks = LIST_HEAD_INIT(tsk.tasks), \ 162 .tasks = LIST_HEAD_INIT(tsk.tasks), \
163 INIT_PUSHABLE_TASKS(tsk) \ 163 INIT_PUSHABLE_TASKS(tsk) \
diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 0d04cd69ab9b..ffc444c38b0a 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -368,8 +368,11 @@ radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, unsigned flags)
368 iter->index++; 368 iter->index++;
369 if (likely(*slot)) 369 if (likely(*slot))
370 return slot; 370 return slot;
371 if (flags & RADIX_TREE_ITER_CONTIG) 371 if (flags & RADIX_TREE_ITER_CONTIG) {
372 /* forbid switching to the next chunk */
373 iter->next_index = 0;
372 break; 374 break;
375 }
373 } 376 }
374 } 377 }
375 return NULL; 378 return NULL;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index f34437e835a7..6029d8c54476 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -145,6 +145,7 @@ extern unsigned long this_cpu_load(void);
145 145
146 146
147extern void calc_global_load(unsigned long ticks); 147extern void calc_global_load(unsigned long ticks);
148extern void update_cpu_load_nohz(void);
148 149
149extern unsigned long get_parent_ip(unsigned long addr); 150extern unsigned long get_parent_ip(unsigned long addr);
150 151
@@ -1187,7 +1188,6 @@ struct sched_rt_entity {
1187 struct list_head run_list; 1188 struct list_head run_list;
1188 unsigned long timeout; 1189 unsigned long timeout;
1189 unsigned int time_slice; 1190 unsigned int time_slice;
1190 int nr_cpus_allowed;
1191 1191
1192 struct sched_rt_entity *back; 1192 struct sched_rt_entity *back;
1193#ifdef CONFIG_RT_GROUP_SCHED 1193#ifdef CONFIG_RT_GROUP_SCHED
@@ -1252,6 +1252,7 @@ struct task_struct {
1252#endif 1252#endif
1253 1253
1254 unsigned int policy; 1254 unsigned int policy;
1255 int nr_cpus_allowed;
1255 cpumask_t cpus_allowed; 1256 cpumask_t cpus_allowed;
1256 1257
1257#ifdef CONFIG_PREEMPT_RCU 1258#ifdef CONFIG_PREEMPT_RCU
diff --git a/include/linux/swap.h b/include/linux/swap.h
index b6661933e252..c84ec68eaec9 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -197,6 +197,10 @@ struct swap_info_struct {
197 struct block_device *bdev; /* swap device or bdev of swap file */ 197 struct block_device *bdev; /* swap device or bdev of swap file */
198 struct file *swap_file; /* seldom referenced */ 198 struct file *swap_file; /* seldom referenced */
199 unsigned int old_block_size; /* seldom referenced */ 199 unsigned int old_block_size; /* seldom referenced */
200#ifdef CONFIG_FRONTSWAP
201 unsigned long *frontswap_map; /* frontswap in-use, one bit per page */
202 atomic_t frontswap_pages; /* frontswap pages in-use counter */
203#endif
200}; 204};
201 205
202struct swap_list_t { 206struct swap_list_t {
diff --git a/include/linux/swapfile.h b/include/linux/swapfile.h
new file mode 100644
index 000000000000..e282624e8c10
--- /dev/null
+++ b/include/linux/swapfile.h
@@ -0,0 +1,13 @@
1#ifndef _LINUX_SWAPFILE_H
2#define _LINUX_SWAPFILE_H
3
4/*
5 * these were static in swapfile.c but frontswap.c needs them and we don't
6 * want to expose them to the dozens of source files that include swap.h
7 */
8extern spinlock_t swap_lock;
9extern struct swap_list_t swap_list;
10extern struct swap_info_struct *swap_info[];
11extern int try_to_unuse(unsigned int, bool, unsigned long);
12
13#endif /* _LINUX_SWAPFILE_H */
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 0f3527d6184a..72fcd3069a90 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -896,10 +896,13 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode)
896 mutex_unlock(&cgroup_mutex); 896 mutex_unlock(&cgroup_mutex);
897 897
898 /* 898 /*
899 * Drop the active superblock reference that we took when we 899 * We want to drop the active superblock reference from the
900 * created the cgroup 900 * cgroup creation after all the dentry refs are gone -
901 * kill_sb gets mighty unhappy otherwise. Mark
902 * dentry->d_fsdata with cgroup_diput() to tell
903 * cgroup_d_release() to call deactivate_super().
901 */ 904 */
902 deactivate_super(cgrp->root->sb); 905 dentry->d_fsdata = cgroup_diput;
903 906
904 /* 907 /*
905 * if we're getting rid of the cgroup, refcount should ensure 908 * if we're getting rid of the cgroup, refcount should ensure
@@ -925,6 +928,13 @@ static int cgroup_delete(const struct dentry *d)
925 return 1; 928 return 1;
926} 929}
927 930
931static void cgroup_d_release(struct dentry *dentry)
932{
933 /* did cgroup_diput() tell me to deactivate super? */
934 if (dentry->d_fsdata == cgroup_diput)
935 deactivate_super(dentry->d_sb);
936}
937
928static void remove_dir(struct dentry *d) 938static void remove_dir(struct dentry *d)
929{ 939{
930 struct dentry *parent = dget(d->d_parent); 940 struct dentry *parent = dget(d->d_parent);
@@ -1532,6 +1542,7 @@ static int cgroup_get_rootdir(struct super_block *sb)
1532 static const struct dentry_operations cgroup_dops = { 1542 static const struct dentry_operations cgroup_dops = {
1533 .d_iput = cgroup_diput, 1543 .d_iput = cgroup_diput,
1534 .d_delete = cgroup_delete, 1544 .d_delete = cgroup_delete,
1545 .d_release = cgroup_d_release,
1535 }; 1546 };
1536 1547
1537 struct inode *inode = 1548 struct inode *inode =
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index fc275e4f629b..eebd6d5cfb44 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -275,8 +275,10 @@ void handle_nested_irq(unsigned int irq)
275 kstat_incr_irqs_this_cpu(irq, desc); 275 kstat_incr_irqs_this_cpu(irq, desc);
276 276
277 action = desc->action; 277 action = desc->action;
278 if (unlikely(!action || irqd_irq_disabled(&desc->irq_data))) 278 if (unlikely(!action || irqd_irq_disabled(&desc->irq_data))) {
279 desc->istate |= IRQS_PENDING;
279 goto out_unlock; 280 goto out_unlock;
281 }
280 282
281 irqd_set(&desc->irq_data, IRQD_IRQ_INPROGRESS); 283 irqd_set(&desc->irq_data, IRQD_IRQ_INPROGRESS);
282 raw_spin_unlock_irq(&desc->lock); 284 raw_spin_unlock_irq(&desc->lock);
@@ -324,8 +326,10 @@ handle_simple_irq(unsigned int irq, struct irq_desc *desc)
324 desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING); 326 desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);
325 kstat_incr_irqs_this_cpu(irq, desc); 327 kstat_incr_irqs_this_cpu(irq, desc);
326 328
327 if (unlikely(!desc->action || irqd_irq_disabled(&desc->irq_data))) 329 if (unlikely(!desc->action || irqd_irq_disabled(&desc->irq_data))) {
330 desc->istate |= IRQS_PENDING;
328 goto out_unlock; 331 goto out_unlock;
332 }
329 333
330 handle_irq_event(desc); 334 handle_irq_event(desc);
331 335
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index 8e5c56b3b7d9..001fa5bab490 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -101,6 +101,9 @@ extern int irq_select_affinity_usr(unsigned int irq, struct cpumask *mask);
101 101
102extern void irq_set_thread_affinity(struct irq_desc *desc); 102extern void irq_set_thread_affinity(struct irq_desc *desc);
103 103
104extern int irq_do_set_affinity(struct irq_data *data,
105 const struct cpumask *dest, bool force);
106
104/* Inline functions for support of irq chips on slow busses */ 107/* Inline functions for support of irq chips on slow busses */
105static inline void chip_bus_lock(struct irq_desc *desc) 108static inline void chip_bus_lock(struct irq_desc *desc)
106{ 109{
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index ea0c6c2ae6f7..8c548232ba39 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -142,6 +142,25 @@ static inline void
142irq_get_pending(struct cpumask *mask, struct irq_desc *desc) { } 142irq_get_pending(struct cpumask *mask, struct irq_desc *desc) { }
143#endif 143#endif
144 144
145int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask,
146 bool force)
147{
148 struct irq_desc *desc = irq_data_to_desc(data);
149 struct irq_chip *chip = irq_data_get_irq_chip(data);
150 int ret;
151
152 ret = chip->irq_set_affinity(data, mask, false);
153 switch (ret) {
154 case IRQ_SET_MASK_OK:
155 cpumask_copy(data->affinity, mask);
156 case IRQ_SET_MASK_OK_NOCOPY:
157 irq_set_thread_affinity(desc);
158 ret = 0;
159 }
160
161 return ret;
162}
163
145int __irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask) 164int __irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask)
146{ 165{
147 struct irq_chip *chip = irq_data_get_irq_chip(data); 166 struct irq_chip *chip = irq_data_get_irq_chip(data);
@@ -152,14 +171,7 @@ int __irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask)
152 return -EINVAL; 171 return -EINVAL;
153 172
154 if (irq_can_move_pcntxt(data)) { 173 if (irq_can_move_pcntxt(data)) {
155 ret = chip->irq_set_affinity(data, mask, false); 174 ret = irq_do_set_affinity(data, mask, false);
156 switch (ret) {
157 case IRQ_SET_MASK_OK:
158 cpumask_copy(data->affinity, mask);
159 case IRQ_SET_MASK_OK_NOCOPY:
160 irq_set_thread_affinity(desc);
161 ret = 0;
162 }
163 } else { 175 } else {
164 irqd_set_move_pending(data); 176 irqd_set_move_pending(data);
165 irq_copy_pending(desc, mask); 177 irq_copy_pending(desc, mask);
@@ -283,9 +295,8 @@ EXPORT_SYMBOL_GPL(irq_set_affinity_notifier);
283static int 295static int
284setup_affinity(unsigned int irq, struct irq_desc *desc, struct cpumask *mask) 296setup_affinity(unsigned int irq, struct irq_desc *desc, struct cpumask *mask)
285{ 297{
286 struct irq_chip *chip = irq_desc_get_chip(desc);
287 struct cpumask *set = irq_default_affinity; 298 struct cpumask *set = irq_default_affinity;
288 int ret, node = desc->irq_data.node; 299 int node = desc->irq_data.node;
289 300
290 /* Excludes PER_CPU and NO_BALANCE interrupts */ 301 /* Excludes PER_CPU and NO_BALANCE interrupts */
291 if (!irq_can_set_affinity(irq)) 302 if (!irq_can_set_affinity(irq))
@@ -311,13 +322,7 @@ setup_affinity(unsigned int irq, struct irq_desc *desc, struct cpumask *mask)
311 if (cpumask_intersects(mask, nodemask)) 322 if (cpumask_intersects(mask, nodemask))
312 cpumask_and(mask, mask, nodemask); 323 cpumask_and(mask, mask, nodemask);
313 } 324 }
314 ret = chip->irq_set_affinity(&desc->irq_data, mask, false); 325 irq_do_set_affinity(&desc->irq_data, mask, false);
315 switch (ret) {
316 case IRQ_SET_MASK_OK:
317 cpumask_copy(desc->irq_data.affinity, mask);
318 case IRQ_SET_MASK_OK_NOCOPY:
319 irq_set_thread_affinity(desc);
320 }
321 return 0; 326 return 0;
322} 327}
323#else 328#else
diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c
index c3c89751b327..ca3f4aaff707 100644
--- a/kernel/irq/migration.c
+++ b/kernel/irq/migration.c
@@ -42,17 +42,8 @@ void irq_move_masked_irq(struct irq_data *idata)
42 * For correct operation this depends on the caller 42 * For correct operation this depends on the caller
43 * masking the irqs. 43 * masking the irqs.
44 */ 44 */
45 if (likely(cpumask_any_and(desc->pending_mask, cpu_online_mask) 45 if (cpumask_any_and(desc->pending_mask, cpu_online_mask) < nr_cpu_ids)
46 < nr_cpu_ids)) { 46 irq_do_set_affinity(&desc->irq_data, desc->pending_mask, false);
47 int ret = chip->irq_set_affinity(&desc->irq_data,
48 desc->pending_mask, false);
49 switch (ret) {
50 case IRQ_SET_MASK_OK:
51 cpumask_copy(desc->irq_data.affinity, desc->pending_mask);
52 case IRQ_SET_MASK_OK_NOCOPY:
53 irq_set_thread_affinity(desc);
54 }
55 }
56 47
57 cpumask_clear(desc->pending_mask); 48 cpumask_clear(desc->pending_mask);
58} 49}
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 39eb6011bc38..c46958e26121 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -142,9 +142,8 @@ const_debug unsigned int sysctl_sched_features =
142#define SCHED_FEAT(name, enabled) \ 142#define SCHED_FEAT(name, enabled) \
143 #name , 143 #name ,
144 144
145static __read_mostly char *sched_feat_names[] = { 145static const char * const sched_feat_names[] = {
146#include "features.h" 146#include "features.h"
147 NULL
148}; 147};
149 148
150#undef SCHED_FEAT 149#undef SCHED_FEAT
@@ -2517,25 +2516,32 @@ static void __update_cpu_load(struct rq *this_rq, unsigned long this_load,
2517 sched_avg_update(this_rq); 2516 sched_avg_update(this_rq);
2518} 2517}
2519 2518
2519#ifdef CONFIG_NO_HZ
2520/*
2521 * There is no sane way to deal with nohz on smp when using jiffies because the
2522 * cpu doing the jiffies update might drift wrt the cpu doing the jiffy reading
2523 * causing off-by-one errors in observed deltas; {0,2} instead of {1,1}.
2524 *
2525 * Therefore we cannot use the delta approach from the regular tick since that
2526 * would seriously skew the load calculation. However we'll make do for those
2527 * updates happening while idle (nohz_idle_balance) or coming out of idle
2528 * (tick_nohz_idle_exit).
2529 *
2530 * This means we might still be one tick off for nohz periods.
2531 */
2532
2520/* 2533/*
2521 * Called from nohz_idle_balance() to update the load ratings before doing the 2534 * Called from nohz_idle_balance() to update the load ratings before doing the
2522 * idle balance. 2535 * idle balance.
2523 */ 2536 */
2524void update_idle_cpu_load(struct rq *this_rq) 2537void update_idle_cpu_load(struct rq *this_rq)
2525{ 2538{
2526 unsigned long curr_jiffies = jiffies; 2539 unsigned long curr_jiffies = ACCESS_ONCE(jiffies);
2527 unsigned long load = this_rq->load.weight; 2540 unsigned long load = this_rq->load.weight;
2528 unsigned long pending_updates; 2541 unsigned long pending_updates;
2529 2542
2530 /* 2543 /*
2531 * Bloody broken means of dealing with nohz, but better than nothing.. 2544 * bail if there's load or we're actually up-to-date.
2532 * jiffies is updated by one cpu, another cpu can drift wrt the jiffy
2533 * update and see 0 difference the one time and 2 the next, even though
2534 * we ticked at roughtly the same rate.
2535 *
2536 * Hence we only use this from nohz_idle_balance() and skip this
2537 * nonsense when called from the scheduler_tick() since that's
2538 * guaranteed a stable rate.
2539 */ 2545 */
2540 if (load || curr_jiffies == this_rq->last_load_update_tick) 2546 if (load || curr_jiffies == this_rq->last_load_update_tick)
2541 return; 2547 return;
@@ -2547,12 +2553,38 @@ void update_idle_cpu_load(struct rq *this_rq)
2547} 2553}
2548 2554
2549/* 2555/*
2556 * Called from tick_nohz_idle_exit() -- try and fix up the ticks we missed.
2557 */
2558void update_cpu_load_nohz(void)
2559{
2560 struct rq *this_rq = this_rq();
2561 unsigned long curr_jiffies = ACCESS_ONCE(jiffies);
2562 unsigned long pending_updates;
2563
2564 if (curr_jiffies == this_rq->last_load_update_tick)
2565 return;
2566
2567 raw_spin_lock(&this_rq->lock);
2568 pending_updates = curr_jiffies - this_rq->last_load_update_tick;
2569 if (pending_updates) {
2570 this_rq->last_load_update_tick = curr_jiffies;
2571 /*
2572 * We were idle, this means load 0, the current load might be
2573 * !0 due to remote wakeups and the sort.
2574 */
2575 __update_cpu_load(this_rq, 0, pending_updates);
2576 }
2577 raw_spin_unlock(&this_rq->lock);
2578}
2579#endif /* CONFIG_NO_HZ */
2580
2581/*
2550 * Called from scheduler_tick() 2582 * Called from scheduler_tick()
2551 */ 2583 */
2552static void update_cpu_load_active(struct rq *this_rq) 2584static void update_cpu_load_active(struct rq *this_rq)
2553{ 2585{
2554 /* 2586 /*
2555 * See the mess in update_idle_cpu_load(). 2587 * See the mess around update_idle_cpu_load() / update_cpu_load_nohz().
2556 */ 2588 */
2557 this_rq->last_load_update_tick = jiffies; 2589 this_rq->last_load_update_tick = jiffies;
2558 __update_cpu_load(this_rq, this_rq->load.weight, 1); 2590 __update_cpu_load(this_rq, this_rq->load.weight, 1);
@@ -4982,7 +5014,7 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
4982 p->sched_class->set_cpus_allowed(p, new_mask); 5014 p->sched_class->set_cpus_allowed(p, new_mask);
4983 5015
4984 cpumask_copy(&p->cpus_allowed, new_mask); 5016 cpumask_copy(&p->cpus_allowed, new_mask);
4985 p->rt.nr_cpus_allowed = cpumask_weight(new_mask); 5017 p->nr_cpus_allowed = cpumask_weight(new_mask);
4986} 5018}
4987 5019
4988/* 5020/*
@@ -5997,11 +6029,14 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu)
5997 6029
5998 cpumask_or(covered, covered, sg_span); 6030 cpumask_or(covered, covered, sg_span);
5999 6031
6000 sg->sgp = *per_cpu_ptr(sdd->sgp, cpumask_first(sg_span)); 6032 sg->sgp = *per_cpu_ptr(sdd->sgp, i);
6001 atomic_inc(&sg->sgp->ref); 6033 atomic_inc(&sg->sgp->ref);
6002 6034
6003 if (cpumask_test_cpu(cpu, sg_span)) 6035 if ((!groups && cpumask_test_cpu(cpu, sg_span)) ||
6036 cpumask_first(sg_span) == cpu) {
6037 WARN_ON_ONCE(!cpumask_test_cpu(cpu, sg_span));
6004 groups = sg; 6038 groups = sg;
6039 }
6005 6040
6006 if (!first) 6041 if (!first)
6007 first = sg; 6042 first = sg;
@@ -6403,7 +6438,7 @@ static void sched_init_numa(void)
6403 return; 6438 return;
6404 6439
6405 for (j = 0; j < nr_node_ids; j++) { 6440 for (j = 0; j < nr_node_ids; j++) {
6406 struct cpumask *mask = kzalloc_node(cpumask_size(), GFP_KERNEL, j); 6441 struct cpumask *mask = kzalloc(cpumask_size(), GFP_KERNEL);
6407 if (!mask) 6442 if (!mask)
6408 return; 6443 return;
6409 6444
@@ -6691,7 +6726,6 @@ static int init_sched_domains(const struct cpumask *cpu_map)
6691 if (!doms_cur) 6726 if (!doms_cur)
6692 doms_cur = &fallback_doms; 6727 doms_cur = &fallback_doms;
6693 cpumask_andnot(doms_cur[0], cpu_map, cpu_isolated_map); 6728 cpumask_andnot(doms_cur[0], cpu_map, cpu_isolated_map);
6694 dattr_cur = NULL;
6695 err = build_sched_domains(doms_cur[0], NULL); 6729 err = build_sched_domains(doms_cur[0], NULL);
6696 register_sched_domain_sysctl(); 6730 register_sched_domain_sysctl();
6697 6731
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 940e6d17cf96..b2a2d236f27b 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2703,7 +2703,7 @@ select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags)
2703 int want_sd = 1; 2703 int want_sd = 1;
2704 int sync = wake_flags & WF_SYNC; 2704 int sync = wake_flags & WF_SYNC;
2705 2705
2706 if (p->rt.nr_cpus_allowed == 1) 2706 if (p->nr_cpus_allowed == 1)
2707 return prev_cpu; 2707 return prev_cpu;
2708 2708
2709 if (sd_flag & SD_BALANCE_WAKE) { 2709 if (sd_flag & SD_BALANCE_WAKE) {
@@ -3503,15 +3503,22 @@ unsigned long __weak arch_scale_smt_power(struct sched_domain *sd, int cpu)
3503unsigned long scale_rt_power(int cpu) 3503unsigned long scale_rt_power(int cpu)
3504{ 3504{
3505 struct rq *rq = cpu_rq(cpu); 3505 struct rq *rq = cpu_rq(cpu);
3506 u64 total, available; 3506 u64 total, available, age_stamp, avg;
3507 3507
3508 total = sched_avg_period() + (rq->clock - rq->age_stamp); 3508 /*
3509 * Since we're reading these variables without serialization make sure
3510 * we read them once before doing sanity checks on them.
3511 */
3512 age_stamp = ACCESS_ONCE(rq->age_stamp);
3513 avg = ACCESS_ONCE(rq->rt_avg);
3514
3515 total = sched_avg_period() + (rq->clock - age_stamp);
3509 3516
3510 if (unlikely(total < rq->rt_avg)) { 3517 if (unlikely(total < avg)) {
3511 /* Ensures that power won't end up being negative */ 3518 /* Ensures that power won't end up being negative */
3512 available = 0; 3519 available = 0;
3513 } else { 3520 } else {
3514 available = total - rq->rt_avg; 3521 available = total - avg;
3515 } 3522 }
3516 3523
3517 if (unlikely((s64)total < SCHED_POWER_SCALE)) 3524 if (unlikely((s64)total < SCHED_POWER_SCALE))
@@ -3574,11 +3581,26 @@ void update_group_power(struct sched_domain *sd, int cpu)
3574 3581
3575 power = 0; 3582 power = 0;
3576 3583
3577 group = child->groups; 3584 if (child->flags & SD_OVERLAP) {
3578 do { 3585 /*
3579 power += group->sgp->power; 3586 * SD_OVERLAP domains cannot assume that child groups
3580 group = group->next; 3587 * span the current group.
3581 } while (group != child->groups); 3588 */
3589
3590 for_each_cpu(cpu, sched_group_cpus(sdg))
3591 power += power_of(cpu);
3592 } else {
3593 /*
3594 * !SD_OVERLAP domains can assume that child groups
3595 * span the current group.
3596 */
3597
3598 group = child->groups;
3599 do {
3600 power += group->sgp->power;
3601 group = group->next;
3602 } while (group != child->groups);
3603 }
3582 3604
3583 sdg->sgp->power = power; 3605 sdg->sgp->power = power;
3584} 3606}
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index c5565c3c515f..2a4e8dffbd6b 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -274,13 +274,16 @@ static void update_rt_migration(struct rt_rq *rt_rq)
274 274
275static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 275static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
276{ 276{
277 struct task_struct *p;
278
277 if (!rt_entity_is_task(rt_se)) 279 if (!rt_entity_is_task(rt_se))
278 return; 280 return;
279 281
282 p = rt_task_of(rt_se);
280 rt_rq = &rq_of_rt_rq(rt_rq)->rt; 283 rt_rq = &rq_of_rt_rq(rt_rq)->rt;
281 284
282 rt_rq->rt_nr_total++; 285 rt_rq->rt_nr_total++;
283 if (rt_se->nr_cpus_allowed > 1) 286 if (p->nr_cpus_allowed > 1)
284 rt_rq->rt_nr_migratory++; 287 rt_rq->rt_nr_migratory++;
285 288
286 update_rt_migration(rt_rq); 289 update_rt_migration(rt_rq);
@@ -288,13 +291,16 @@ static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
288 291
289static void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 292static void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
290{ 293{
294 struct task_struct *p;
295
291 if (!rt_entity_is_task(rt_se)) 296 if (!rt_entity_is_task(rt_se))
292 return; 297 return;
293 298
299 p = rt_task_of(rt_se);
294 rt_rq = &rq_of_rt_rq(rt_rq)->rt; 300 rt_rq = &rq_of_rt_rq(rt_rq)->rt;
295 301
296 rt_rq->rt_nr_total--; 302 rt_rq->rt_nr_total--;
297 if (rt_se->nr_cpus_allowed > 1) 303 if (p->nr_cpus_allowed > 1)
298 rt_rq->rt_nr_migratory--; 304 rt_rq->rt_nr_migratory--;
299 305
300 update_rt_migration(rt_rq); 306 update_rt_migration(rt_rq);
@@ -1161,7 +1167,7 @@ enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags)
1161 1167
1162 enqueue_rt_entity(rt_se, flags & ENQUEUE_HEAD); 1168 enqueue_rt_entity(rt_se, flags & ENQUEUE_HEAD);
1163 1169
1164 if (!task_current(rq, p) && p->rt.nr_cpus_allowed > 1) 1170 if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
1165 enqueue_pushable_task(rq, p); 1171 enqueue_pushable_task(rq, p);
1166 1172
1167 inc_nr_running(rq); 1173 inc_nr_running(rq);
@@ -1225,7 +1231,7 @@ select_task_rq_rt(struct task_struct *p, int sd_flag, int flags)
1225 1231
1226 cpu = task_cpu(p); 1232 cpu = task_cpu(p);
1227 1233
1228 if (p->rt.nr_cpus_allowed == 1) 1234 if (p->nr_cpus_allowed == 1)
1229 goto out; 1235 goto out;
1230 1236
1231 /* For anything but wake ups, just return the task_cpu */ 1237 /* For anything but wake ups, just return the task_cpu */
@@ -1260,9 +1266,9 @@ select_task_rq_rt(struct task_struct *p, int sd_flag, int flags)
1260 * will have to sort it out. 1266 * will have to sort it out.
1261 */ 1267 */
1262 if (curr && unlikely(rt_task(curr)) && 1268 if (curr && unlikely(rt_task(curr)) &&
1263 (curr->rt.nr_cpus_allowed < 2 || 1269 (curr->nr_cpus_allowed < 2 ||
1264 curr->prio <= p->prio) && 1270 curr->prio <= p->prio) &&
1265 (p->rt.nr_cpus_allowed > 1)) { 1271 (p->nr_cpus_allowed > 1)) {
1266 int target = find_lowest_rq(p); 1272 int target = find_lowest_rq(p);
1267 1273
1268 if (target != -1) 1274 if (target != -1)
@@ -1276,10 +1282,10 @@ out:
1276 1282
1277static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) 1283static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
1278{ 1284{
1279 if (rq->curr->rt.nr_cpus_allowed == 1) 1285 if (rq->curr->nr_cpus_allowed == 1)
1280 return; 1286 return;
1281 1287
1282 if (p->rt.nr_cpus_allowed != 1 1288 if (p->nr_cpus_allowed != 1
1283 && cpupri_find(&rq->rd->cpupri, p, NULL)) 1289 && cpupri_find(&rq->rd->cpupri, p, NULL))
1284 return; 1290 return;
1285 1291
@@ -1395,7 +1401,7 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
1395 * The previous task needs to be made eligible for pushing 1401 * The previous task needs to be made eligible for pushing
1396 * if it is still active 1402 * if it is still active
1397 */ 1403 */
1398 if (on_rt_rq(&p->rt) && p->rt.nr_cpus_allowed > 1) 1404 if (on_rt_rq(&p->rt) && p->nr_cpus_allowed > 1)
1399 enqueue_pushable_task(rq, p); 1405 enqueue_pushable_task(rq, p);
1400} 1406}
1401 1407
@@ -1408,7 +1414,7 @@ static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
1408{ 1414{
1409 if (!task_running(rq, p) && 1415 if (!task_running(rq, p) &&
1410 (cpu < 0 || cpumask_test_cpu(cpu, tsk_cpus_allowed(p))) && 1416 (cpu < 0 || cpumask_test_cpu(cpu, tsk_cpus_allowed(p))) &&
1411 (p->rt.nr_cpus_allowed > 1)) 1417 (p->nr_cpus_allowed > 1))
1412 return 1; 1418 return 1;
1413 return 0; 1419 return 0;
1414} 1420}
@@ -1464,7 +1470,7 @@ static int find_lowest_rq(struct task_struct *task)
1464 if (unlikely(!lowest_mask)) 1470 if (unlikely(!lowest_mask))
1465 return -1; 1471 return -1;
1466 1472
1467 if (task->rt.nr_cpus_allowed == 1) 1473 if (task->nr_cpus_allowed == 1)
1468 return -1; /* No other targets possible */ 1474 return -1; /* No other targets possible */
1469 1475
1470 if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask)) 1476 if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask))
@@ -1586,7 +1592,7 @@ static struct task_struct *pick_next_pushable_task(struct rq *rq)
1586 1592
1587 BUG_ON(rq->cpu != task_cpu(p)); 1593 BUG_ON(rq->cpu != task_cpu(p));
1588 BUG_ON(task_current(rq, p)); 1594 BUG_ON(task_current(rq, p));
1589 BUG_ON(p->rt.nr_cpus_allowed <= 1); 1595 BUG_ON(p->nr_cpus_allowed <= 1);
1590 1596
1591 BUG_ON(!p->on_rq); 1597 BUG_ON(!p->on_rq);
1592 BUG_ON(!rt_task(p)); 1598 BUG_ON(!rt_task(p));
@@ -1793,9 +1799,9 @@ static void task_woken_rt(struct rq *rq, struct task_struct *p)
1793 if (!task_running(rq, p) && 1799 if (!task_running(rq, p) &&
1794 !test_tsk_need_resched(rq->curr) && 1800 !test_tsk_need_resched(rq->curr) &&
1795 has_pushable_tasks(rq) && 1801 has_pushable_tasks(rq) &&
1796 p->rt.nr_cpus_allowed > 1 && 1802 p->nr_cpus_allowed > 1 &&
1797 rt_task(rq->curr) && 1803 rt_task(rq->curr) &&
1798 (rq->curr->rt.nr_cpus_allowed < 2 || 1804 (rq->curr->nr_cpus_allowed < 2 ||
1799 rq->curr->prio <= p->prio)) 1805 rq->curr->prio <= p->prio))
1800 push_rt_tasks(rq); 1806 push_rt_tasks(rq);
1801} 1807}
@@ -1817,7 +1823,7 @@ static void set_cpus_allowed_rt(struct task_struct *p,
1817 * Only update if the process changes its state from whether it 1823 * Only update if the process changes its state from whether it
1818 * can migrate or not. 1824 * can migrate or not.
1819 */ 1825 */
1820 if ((p->rt.nr_cpus_allowed > 1) == (weight > 1)) 1826 if ((p->nr_cpus_allowed > 1) == (weight > 1))
1821 return; 1827 return;
1822 1828
1823 rq = task_rq(p); 1829 rq = task_rq(p);
@@ -1979,6 +1985,8 @@ static void watchdog(struct rq *rq, struct task_struct *p)
1979 1985
1980static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued) 1986static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued)
1981{ 1987{
1988 struct sched_rt_entity *rt_se = &p->rt;
1989
1982 update_curr_rt(rq); 1990 update_curr_rt(rq);
1983 1991
1984 watchdog(rq, p); 1992 watchdog(rq, p);
@@ -1996,12 +2004,15 @@ static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued)
1996 p->rt.time_slice = RR_TIMESLICE; 2004 p->rt.time_slice = RR_TIMESLICE;
1997 2005
1998 /* 2006 /*
1999 * Requeue to the end of queue if we are not the only element 2007 * Requeue to the end of queue if we (and all of our ancestors) are the
2000 * on the queue: 2008 * only element on the queue
2001 */ 2009 */
2002 if (p->rt.run_list.prev != p->rt.run_list.next) { 2010 for_each_sched_rt_entity(rt_se) {
2003 requeue_task_rt(rq, p, 0); 2011 if (rt_se->run_list.prev != rt_se->run_list.next) {
2004 set_tsk_need_resched(p); 2012 requeue_task_rt(rq, p, 0);
2013 set_tsk_need_resched(p);
2014 return;
2015 }
2005 } 2016 }
2006} 2017}
2007 2018
diff --git a/kernel/smpboot.c b/kernel/smpboot.c
index e1a797e028a3..98f60c5caa1b 100644
--- a/kernel/smpboot.c
+++ b/kernel/smpboot.c
@@ -31,6 +31,12 @@ void __init idle_thread_set_boot_cpu(void)
31 per_cpu(idle_threads, smp_processor_id()) = current; 31 per_cpu(idle_threads, smp_processor_id()) = current;
32} 32}
33 33
34/**
35 * idle_init - Initialize the idle thread for a cpu
36 * @cpu: The cpu for which the idle thread should be initialized
37 *
38 * Creates the thread if it does not exist.
39 */
34static inline void idle_init(unsigned int cpu) 40static inline void idle_init(unsigned int cpu)
35{ 41{
36 struct task_struct *tsk = per_cpu(idle_threads, cpu); 42 struct task_struct *tsk = per_cpu(idle_threads, cpu);
@@ -45,17 +51,16 @@ static inline void idle_init(unsigned int cpu)
45} 51}
46 52
47/** 53/**
48 * idle_thread_init - Initialize the idle thread for a cpu 54 * idle_threads_init - Initialize idle threads for all cpus
49 * @cpu: The cpu for which the idle thread should be initialized
50 *
51 * Creates the thread if it does not exist.
52 */ 55 */
53void __init idle_threads_init(void) 56void __init idle_threads_init(void)
54{ 57{
55 unsigned int cpu; 58 unsigned int cpu, boot_cpu;
59
60 boot_cpu = smp_processor_id();
56 61
57 for_each_possible_cpu(cpu) { 62 for_each_possible_cpu(cpu) {
58 if (cpu != smp_processor_id()) 63 if (cpu != boot_cpu)
59 idle_init(cpu); 64 idle_init(cpu);
60 } 65 }
61} 66}
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 9cd928f7a7c6..7e1ce012a851 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -297,8 +297,7 @@ void clockevents_register_device(struct clock_event_device *dev)
297} 297}
298EXPORT_SYMBOL_GPL(clockevents_register_device); 298EXPORT_SYMBOL_GPL(clockevents_register_device);
299 299
300static void clockevents_config(struct clock_event_device *dev, 300void clockevents_config(struct clock_event_device *dev, u32 freq)
301 u32 freq)
302{ 301{
303 u64 sec; 302 u64 sec;
304 303
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 6a3a5b9ff561..da70c6db496c 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -576,6 +576,7 @@ void tick_nohz_idle_exit(void)
576 /* Update jiffies first */ 576 /* Update jiffies first */
577 select_nohz_load_balancer(0); 577 select_nohz_load_balancer(0);
578 tick_do_update_jiffies64(now); 578 tick_do_update_jiffies64(now);
579 update_cpu_load_nohz();
579 580
580#ifndef CONFIG_VIRT_CPU_ACCOUNTING 581#ifndef CONFIG_VIRT_CPU_ACCOUNTING
581 /* 582 /*
@@ -814,6 +815,16 @@ static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer)
814 return HRTIMER_RESTART; 815 return HRTIMER_RESTART;
815} 816}
816 817
818static int sched_skew_tick;
819
820static int __init skew_tick(char *str)
821{
822 get_option(&str, &sched_skew_tick);
823
824 return 0;
825}
826early_param("skew_tick", skew_tick);
827
817/** 828/**
818 * tick_setup_sched_timer - setup the tick emulation timer 829 * tick_setup_sched_timer - setup the tick emulation timer
819 */ 830 */
@@ -831,6 +842,14 @@ void tick_setup_sched_timer(void)
831 /* Get the next period (per cpu) */ 842 /* Get the next period (per cpu) */
832 hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update()); 843 hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update());
833 844
845 /* Offset the tick to avert xtime_lock contention. */
846 if (sched_skew_tick) {
847 u64 offset = ktime_to_ns(tick_period) >> 1;
848 do_div(offset, num_possible_cpus());
849 offset *= smp_processor_id();
850 hrtimer_add_expires_ns(&ts->sched_timer, offset);
851 }
852
834 for (;;) { 853 for (;;) {
835 hrtimer_forward(&ts->sched_timer, now, tick_period); 854 hrtimer_forward(&ts->sched_timer, now, tick_period);
836 hrtimer_start_expires(&ts->sched_timer, 855 hrtimer_start_expires(&ts->sched_timer,
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index d7c878cc006c..e7964296fd50 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -686,6 +686,9 @@ void **radix_tree_next_chunk(struct radix_tree_root *root,
686 * during iterating; it can be zero only at the beginning. 686 * during iterating; it can be zero only at the beginning.
687 * And we cannot overflow iter->next_index in a single step, 687 * And we cannot overflow iter->next_index in a single step,
688 * because RADIX_TREE_MAP_SHIFT < BITS_PER_LONG. 688 * because RADIX_TREE_MAP_SHIFT < BITS_PER_LONG.
689 *
690 * This condition also used by radix_tree_next_slot() to stop
691 * contiguous iterating, and forbid swithing to the next chunk.
689 */ 692 */
690 index = iter->next_index; 693 index = iter->next_index;
691 if (!index && iter->index) 694 if (!index && iter->index)
diff --git a/mm/Kconfig b/mm/Kconfig
index b2176374b98e..82fed4eb2b6f 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -389,3 +389,20 @@ config CLEANCACHE
389 in a negligible performance hit. 389 in a negligible performance hit.
390 390
391 If unsure, say Y to enable cleancache 391 If unsure, say Y to enable cleancache
392
393config FRONTSWAP
394 bool "Enable frontswap to cache swap pages if tmem is present"
395 depends on SWAP
396 default n
397 help
398 Frontswap is so named because it can be thought of as the opposite
399 of a "backing" store for a swap device. The data is stored into
400 "transcendent memory", memory that is not directly accessible or
401 addressable by the kernel and is of unknown and possibly
402 time-varying size. When space in transcendent memory is available,
403 a significant swap I/O reduction may be achieved. When none is
404 available, all frontswap calls are reduced to a single pointer-
405 compare-against-NULL resulting in a negligible performance hit
406 and swap data is stored as normal on the matching swap device.
407
408 If unsure, say Y to enable frontswap.
diff --git a/mm/Makefile b/mm/Makefile
index a156285ce88d..2e2fbbefb99f 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -29,6 +29,7 @@ obj-$(CONFIG_HAVE_MEMBLOCK) += memblock.o
29 29
30obj-$(CONFIG_BOUNCE) += bounce.o 30obj-$(CONFIG_BOUNCE) += bounce.o
31obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o 31obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o
32obj-$(CONFIG_FRONTSWAP) += frontswap.o
32obj-$(CONFIG_HAS_DMA) += dmapool.o 33obj-$(CONFIG_HAS_DMA) += dmapool.o
33obj-$(CONFIG_HUGETLBFS) += hugetlb.o 34obj-$(CONFIG_HUGETLBFS) += hugetlb.o
34obj-$(CONFIG_NUMA) += mempolicy.o 35obj-$(CONFIG_NUMA) += mempolicy.o
diff --git a/mm/compaction.c b/mm/compaction.c
index 4ac338af5120..7ea259d82a99 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -236,7 +236,7 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
236 */ 236 */
237 while (unlikely(too_many_isolated(zone))) { 237 while (unlikely(too_many_isolated(zone))) {
238 /* async migration should just abort */ 238 /* async migration should just abort */
239 if (cc->mode != COMPACT_SYNC) 239 if (!cc->sync)
240 return 0; 240 return 0;
241 241
242 congestion_wait(BLK_RW_ASYNC, HZ/10); 242 congestion_wait(BLK_RW_ASYNC, HZ/10);
@@ -304,8 +304,7 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
304 * satisfies the allocation 304 * satisfies the allocation
305 */ 305 */
306 pageblock_nr = low_pfn >> pageblock_order; 306 pageblock_nr = low_pfn >> pageblock_order;
307 if (cc->mode != COMPACT_SYNC && 307 if (!cc->sync && last_pageblock_nr != pageblock_nr &&
308 last_pageblock_nr != pageblock_nr &&
309 !migrate_async_suitable(get_pageblock_migratetype(page))) { 308 !migrate_async_suitable(get_pageblock_migratetype(page))) {
310 low_pfn += pageblock_nr_pages; 309 low_pfn += pageblock_nr_pages;
311 low_pfn = ALIGN(low_pfn, pageblock_nr_pages) - 1; 310 low_pfn = ALIGN(low_pfn, pageblock_nr_pages) - 1;
@@ -326,7 +325,7 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
326 continue; 325 continue;
327 } 326 }
328 327
329 if (cc->mode != COMPACT_SYNC) 328 if (!cc->sync)
330 mode |= ISOLATE_ASYNC_MIGRATE; 329 mode |= ISOLATE_ASYNC_MIGRATE;
331 330
332 lruvec = mem_cgroup_page_lruvec(page, zone); 331 lruvec = mem_cgroup_page_lruvec(page, zone);
@@ -361,90 +360,27 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
361 360
362#endif /* CONFIG_COMPACTION || CONFIG_CMA */ 361#endif /* CONFIG_COMPACTION || CONFIG_CMA */
363#ifdef CONFIG_COMPACTION 362#ifdef CONFIG_COMPACTION
364/*
365 * Returns true if MIGRATE_UNMOVABLE pageblock was successfully
366 * converted to MIGRATE_MOVABLE type, false otherwise.
367 */
368static bool rescue_unmovable_pageblock(struct page *page)
369{
370 unsigned long pfn, start_pfn, end_pfn;
371 struct page *start_page, *end_page;
372
373 pfn = page_to_pfn(page);
374 start_pfn = pfn & ~(pageblock_nr_pages - 1);
375 end_pfn = start_pfn + pageblock_nr_pages;
376
377 start_page = pfn_to_page(start_pfn);
378 end_page = pfn_to_page(end_pfn);
379
380 /* Do not deal with pageblocks that overlap zones */
381 if (page_zone(start_page) != page_zone(end_page))
382 return false;
383
384 for (page = start_page, pfn = start_pfn; page < end_page; pfn++,
385 page++) {
386 if (!pfn_valid_within(pfn))
387 continue;
388
389 if (PageBuddy(page)) {
390 int order = page_order(page);
391
392 pfn += (1 << order) - 1;
393 page += (1 << order) - 1;
394
395 continue;
396 } else if (page_count(page) == 0 || PageLRU(page))
397 continue;
398
399 return false;
400 }
401
402 set_pageblock_migratetype(page, MIGRATE_MOVABLE);
403 move_freepages_block(page_zone(page), page, MIGRATE_MOVABLE);
404 return true;
405}
406 363
407enum smt_result { 364/* Returns true if the page is within a block suitable for migration to */
408 GOOD_AS_MIGRATION_TARGET, 365static bool suitable_migration_target(struct page *page)
409 FAIL_UNMOVABLE_TARGET,
410 FAIL_BAD_TARGET,
411};
412
413/*
414 * Returns GOOD_AS_MIGRATION_TARGET if the page is within a block
415 * suitable for migration to, FAIL_UNMOVABLE_TARGET if the page
416 * is within a MIGRATE_UNMOVABLE block, FAIL_BAD_TARGET otherwise.
417 */
418static enum smt_result suitable_migration_target(struct page *page,
419 struct compact_control *cc)
420{ 366{
421 367
422 int migratetype = get_pageblock_migratetype(page); 368 int migratetype = get_pageblock_migratetype(page);
423 369
424 /* Don't interfere with memory hot-remove or the min_free_kbytes blocks */ 370 /* Don't interfere with memory hot-remove or the min_free_kbytes blocks */
425 if (migratetype == MIGRATE_ISOLATE || migratetype == MIGRATE_RESERVE) 371 if (migratetype == MIGRATE_ISOLATE || migratetype == MIGRATE_RESERVE)
426 return FAIL_BAD_TARGET; 372 return false;
427 373
428 /* If the page is a large free page, then allow migration */ 374 /* If the page is a large free page, then allow migration */
429 if (PageBuddy(page) && page_order(page) >= pageblock_order) 375 if (PageBuddy(page) && page_order(page) >= pageblock_order)
430 return GOOD_AS_MIGRATION_TARGET; 376 return true;
431 377
432 /* If the block is MIGRATE_MOVABLE or MIGRATE_CMA, allow migration */ 378 /* If the block is MIGRATE_MOVABLE or MIGRATE_CMA, allow migration */
433 if (cc->mode != COMPACT_ASYNC_UNMOVABLE && 379 if (migrate_async_suitable(migratetype))
434 migrate_async_suitable(migratetype)) 380 return true;
435 return GOOD_AS_MIGRATION_TARGET;
436
437 if (cc->mode == COMPACT_ASYNC_MOVABLE &&
438 migratetype == MIGRATE_UNMOVABLE)
439 return FAIL_UNMOVABLE_TARGET;
440
441 if (cc->mode != COMPACT_ASYNC_MOVABLE &&
442 migratetype == MIGRATE_UNMOVABLE &&
443 rescue_unmovable_pageblock(page))
444 return GOOD_AS_MIGRATION_TARGET;
445 381
446 /* Otherwise skip the block */ 382 /* Otherwise skip the block */
447 return FAIL_BAD_TARGET; 383 return false;
448} 384}
449 385
450/* 386/*
@@ -478,13 +414,6 @@ static void isolate_freepages(struct zone *zone,
478 zone_end_pfn = zone->zone_start_pfn + zone->spanned_pages; 414 zone_end_pfn = zone->zone_start_pfn + zone->spanned_pages;
479 415
480 /* 416 /*
481 * isolate_freepages() may be called more than once during
482 * compact_zone_order() run and we want only the most recent
483 * count.
484 */
485 cc->nr_pageblocks_skipped = 0;
486
487 /*
488 * Isolate free pages until enough are available to migrate the 417 * Isolate free pages until enough are available to migrate the
489 * pages on cc->migratepages. We stop searching if the migrate 418 * pages on cc->migratepages. We stop searching if the migrate
490 * and free page scanners meet or enough free pages are isolated. 419 * and free page scanners meet or enough free pages are isolated.
@@ -492,7 +421,6 @@ static void isolate_freepages(struct zone *zone,
492 for (; pfn > low_pfn && cc->nr_migratepages > nr_freepages; 421 for (; pfn > low_pfn && cc->nr_migratepages > nr_freepages;
493 pfn -= pageblock_nr_pages) { 422 pfn -= pageblock_nr_pages) {
494 unsigned long isolated; 423 unsigned long isolated;
495 enum smt_result ret;
496 424
497 if (!pfn_valid(pfn)) 425 if (!pfn_valid(pfn))
498 continue; 426 continue;
@@ -509,12 +437,9 @@ static void isolate_freepages(struct zone *zone,
509 continue; 437 continue;
510 438
511 /* Check the block is suitable for migration */ 439 /* Check the block is suitable for migration */
512 ret = suitable_migration_target(page, cc); 440 if (!suitable_migration_target(page))
513 if (ret != GOOD_AS_MIGRATION_TARGET) {
514 if (ret == FAIL_UNMOVABLE_TARGET)
515 cc->nr_pageblocks_skipped++;
516 continue; 441 continue;
517 } 442
518 /* 443 /*
519 * Found a block suitable for isolating free pages from. Now 444 * Found a block suitable for isolating free pages from. Now
520 * we disabled interrupts, double check things are ok and 445 * we disabled interrupts, double check things are ok and
@@ -523,14 +448,12 @@ static void isolate_freepages(struct zone *zone,
523 */ 448 */
524 isolated = 0; 449 isolated = 0;
525 spin_lock_irqsave(&zone->lock, flags); 450 spin_lock_irqsave(&zone->lock, flags);
526 ret = suitable_migration_target(page, cc); 451 if (suitable_migration_target(page)) {
527 if (ret == GOOD_AS_MIGRATION_TARGET) {
528 end_pfn = min(pfn + pageblock_nr_pages, zone_end_pfn); 452 end_pfn = min(pfn + pageblock_nr_pages, zone_end_pfn);
529 isolated = isolate_freepages_block(pfn, end_pfn, 453 isolated = isolate_freepages_block(pfn, end_pfn,
530 freelist, false); 454 freelist, false);
531 nr_freepages += isolated; 455 nr_freepages += isolated;
532 } else if (ret == FAIL_UNMOVABLE_TARGET) 456 }
533 cc->nr_pageblocks_skipped++;
534 spin_unlock_irqrestore(&zone->lock, flags); 457 spin_unlock_irqrestore(&zone->lock, flags);
535 458
536 /* 459 /*
@@ -762,9 +685,8 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
762 685
763 nr_migrate = cc->nr_migratepages; 686 nr_migrate = cc->nr_migratepages;
764 err = migrate_pages(&cc->migratepages, compaction_alloc, 687 err = migrate_pages(&cc->migratepages, compaction_alloc,
765 (unsigned long)&cc->freepages, false, 688 (unsigned long)cc, false,
766 (cc->mode == COMPACT_SYNC) ? MIGRATE_SYNC_LIGHT 689 cc->sync ? MIGRATE_SYNC_LIGHT : MIGRATE_ASYNC);
767 : MIGRATE_ASYNC);
768 update_nr_listpages(cc); 690 update_nr_listpages(cc);
769 nr_remaining = cc->nr_migratepages; 691 nr_remaining = cc->nr_migratepages;
770 692
@@ -793,8 +715,7 @@ out:
793 715
794static unsigned long compact_zone_order(struct zone *zone, 716static unsigned long compact_zone_order(struct zone *zone,
795 int order, gfp_t gfp_mask, 717 int order, gfp_t gfp_mask,
796 enum compact_mode mode, 718 bool sync)
797 unsigned long *nr_pageblocks_skipped)
798{ 719{
799 struct compact_control cc = { 720 struct compact_control cc = {
800 .nr_freepages = 0, 721 .nr_freepages = 0,
@@ -802,17 +723,12 @@ static unsigned long compact_zone_order(struct zone *zone,
802 .order = order, 723 .order = order,
803 .migratetype = allocflags_to_migratetype(gfp_mask), 724 .migratetype = allocflags_to_migratetype(gfp_mask),
804 .zone = zone, 725 .zone = zone,
805 .mode = mode, 726 .sync = sync,
806 }; 727 };
807 unsigned long rc;
808
809 INIT_LIST_HEAD(&cc.freepages); 728 INIT_LIST_HEAD(&cc.freepages);
810 INIT_LIST_HEAD(&cc.migratepages); 729 INIT_LIST_HEAD(&cc.migratepages);
811 730
812 rc = compact_zone(zone, &cc); 731 return compact_zone(zone, &cc);
813 *nr_pageblocks_skipped = cc.nr_pageblocks_skipped;
814
815 return rc;
816} 732}
817 733
818int sysctl_extfrag_threshold = 500; 734int sysctl_extfrag_threshold = 500;
@@ -837,8 +753,6 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
837 struct zoneref *z; 753 struct zoneref *z;
838 struct zone *zone; 754 struct zone *zone;
839 int rc = COMPACT_SKIPPED; 755 int rc = COMPACT_SKIPPED;
840 unsigned long nr_pageblocks_skipped;
841 enum compact_mode mode;
842 756
843 /* 757 /*
844 * Check whether it is worth even starting compaction. The order check is 758 * Check whether it is worth even starting compaction. The order check is
@@ -855,22 +769,12 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
855 nodemask) { 769 nodemask) {
856 int status; 770 int status;
857 771
858 mode = sync ? COMPACT_SYNC : COMPACT_ASYNC_MOVABLE; 772 status = compact_zone_order(zone, order, gfp_mask, sync);
859retry:
860 status = compact_zone_order(zone, order, gfp_mask, mode,
861 &nr_pageblocks_skipped);
862 rc = max(status, rc); 773 rc = max(status, rc);
863 774
864 /* If a normal allocation would succeed, stop compacting */ 775 /* If a normal allocation would succeed, stop compacting */
865 if (zone_watermark_ok(zone, order, low_wmark_pages(zone), 0, 0)) 776 if (zone_watermark_ok(zone, order, low_wmark_pages(zone), 0, 0))
866 break; 777 break;
867
868 if (rc == COMPACT_COMPLETE && mode == COMPACT_ASYNC_MOVABLE) {
869 if (nr_pageblocks_skipped) {
870 mode = COMPACT_ASYNC_UNMOVABLE;
871 goto retry;
872 }
873 }
874 } 778 }
875 779
876 return rc; 780 return rc;
@@ -904,7 +808,7 @@ static int __compact_pgdat(pg_data_t *pgdat, struct compact_control *cc)
904 if (ok && cc->order > zone->compact_order_failed) 808 if (ok && cc->order > zone->compact_order_failed)
905 zone->compact_order_failed = cc->order + 1; 809 zone->compact_order_failed = cc->order + 1;
906 /* Currently async compaction is never deferred. */ 810 /* Currently async compaction is never deferred. */
907 else if (!ok && cc->mode == COMPACT_SYNC) 811 else if (!ok && cc->sync)
908 defer_compaction(zone, cc->order); 812 defer_compaction(zone, cc->order);
909 } 813 }
910 814
@@ -919,7 +823,7 @@ int compact_pgdat(pg_data_t *pgdat, int order)
919{ 823{
920 struct compact_control cc = { 824 struct compact_control cc = {
921 .order = order, 825 .order = order,
922 .mode = COMPACT_ASYNC_MOVABLE, 826 .sync = false,
923 }; 827 };
924 828
925 return __compact_pgdat(pgdat, &cc); 829 return __compact_pgdat(pgdat, &cc);
@@ -929,7 +833,7 @@ static int compact_node(int nid)
929{ 833{
930 struct compact_control cc = { 834 struct compact_control cc = {
931 .order = -1, 835 .order = -1,
932 .mode = COMPACT_SYNC, 836 .sync = true,
933 }; 837 };
934 838
935 return __compact_pgdat(NODE_DATA(nid), &cc); 839 return __compact_pgdat(NODE_DATA(nid), &cc);
diff --git a/mm/frontswap.c b/mm/frontswap.c
new file mode 100644
index 000000000000..e25025574a02
--- /dev/null
+++ b/mm/frontswap.c
@@ -0,0 +1,314 @@
1/*
2 * Frontswap frontend
3 *
4 * This code provides the generic "frontend" layer to call a matching
5 * "backend" driver implementation of frontswap. See
6 * Documentation/vm/frontswap.txt for more information.
7 *
8 * Copyright (C) 2009-2012 Oracle Corp. All rights reserved.
9 * Author: Dan Magenheimer
10 *
11 * This work is licensed under the terms of the GNU GPL, version 2.
12 */
13
14#include <linux/mm.h>
15#include <linux/mman.h>
16#include <linux/swap.h>
17#include <linux/swapops.h>
18#include <linux/proc_fs.h>
19#include <linux/security.h>
20#include <linux/capability.h>
21#include <linux/module.h>
22#include <linux/uaccess.h>
23#include <linux/debugfs.h>
24#include <linux/frontswap.h>
25#include <linux/swapfile.h>
26
27/*
28 * frontswap_ops is set by frontswap_register_ops to contain the pointers
29 * to the frontswap "backend" implementation functions.
30 */
31static struct frontswap_ops frontswap_ops __read_mostly;
32
33/*
34 * This global enablement flag reduces overhead on systems where frontswap_ops
35 * has not been registered, so is preferred to the slower alternative: a
36 * function call that checks a non-global.
37 */
38bool frontswap_enabled __read_mostly;
39EXPORT_SYMBOL(frontswap_enabled);
40
41/*
42 * If enabled, frontswap_store will return failure even on success. As
43 * a result, the swap subsystem will always write the page to swap, in
44 * effect converting frontswap into a writethrough cache. In this mode,
45 * there is no direct reduction in swap writes, but a frontswap backend
46 * can unilaterally "reclaim" any pages in use with no data loss, thus
47 * providing increases control over maximum memory usage due to frontswap.
48 */
49static bool frontswap_writethrough_enabled __read_mostly;
50
51#ifdef CONFIG_DEBUG_FS
52/*
53 * Counters available via /sys/kernel/debug/frontswap (if debugfs is
54 * properly configured). These are for information only so are not protected
55 * against increment races.
56 */
57static u64 frontswap_loads;
58static u64 frontswap_succ_stores;
59static u64 frontswap_failed_stores;
60static u64 frontswap_invalidates;
61
62static inline void inc_frontswap_loads(void) {
63 frontswap_loads++;
64}
65static inline void inc_frontswap_succ_stores(void) {
66 frontswap_succ_stores++;
67}
68static inline void inc_frontswap_failed_stores(void) {
69 frontswap_failed_stores++;
70}
71static inline void inc_frontswap_invalidates(void) {
72 frontswap_invalidates++;
73}
74#else
75static inline void inc_frontswap_loads(void) { }
76static inline void inc_frontswap_succ_stores(void) { }
77static inline void inc_frontswap_failed_stores(void) { }
78static inline void inc_frontswap_invalidates(void) { }
79#endif
80/*
81 * Register operations for frontswap, returning previous thus allowing
82 * detection of multiple backends and possible nesting.
83 */
84struct frontswap_ops frontswap_register_ops(struct frontswap_ops *ops)
85{
86 struct frontswap_ops old = frontswap_ops;
87
88 frontswap_ops = *ops;
89 frontswap_enabled = true;
90 return old;
91}
92EXPORT_SYMBOL(frontswap_register_ops);
93
94/*
95 * Enable/disable frontswap writethrough (see above).
96 */
97void frontswap_writethrough(bool enable)
98{
99 frontswap_writethrough_enabled = enable;
100}
101EXPORT_SYMBOL(frontswap_writethrough);
102
103/*
104 * Called when a swap device is swapon'd.
105 */
106void __frontswap_init(unsigned type)
107{
108 struct swap_info_struct *sis = swap_info[type];
109
110 BUG_ON(sis == NULL);
111 if (sis->frontswap_map == NULL)
112 return;
113 if (frontswap_enabled)
114 (*frontswap_ops.init)(type);
115}
116EXPORT_SYMBOL(__frontswap_init);
117
118/*
119 * "Store" data from a page to frontswap and associate it with the page's
120 * swaptype and offset. Page must be locked and in the swap cache.
121 * If frontswap already contains a page with matching swaptype and
122 * offset, the frontswap implmentation may either overwrite the data and
123 * return success or invalidate the page from frontswap and return failure.
124 */
125int __frontswap_store(struct page *page)
126{
127 int ret = -1, dup = 0;
128 swp_entry_t entry = { .val = page_private(page), };
129 int type = swp_type(entry);
130 struct swap_info_struct *sis = swap_info[type];
131 pgoff_t offset = swp_offset(entry);
132
133 BUG_ON(!PageLocked(page));
134 BUG_ON(sis == NULL);
135 if (frontswap_test(sis, offset))
136 dup = 1;
137 ret = (*frontswap_ops.store)(type, offset, page);
138 if (ret == 0) {
139 frontswap_set(sis, offset);
140 inc_frontswap_succ_stores();
141 if (!dup)
142 atomic_inc(&sis->frontswap_pages);
143 } else if (dup) {
144 /*
145 failed dup always results in automatic invalidate of
146 the (older) page from frontswap
147 */
148 frontswap_clear(sis, offset);
149 atomic_dec(&sis->frontswap_pages);
150 inc_frontswap_failed_stores();
151 } else
152 inc_frontswap_failed_stores();
153 if (frontswap_writethrough_enabled)
154 /* report failure so swap also writes to swap device */
155 ret = -1;
156 return ret;
157}
158EXPORT_SYMBOL(__frontswap_store);
159
160/*
161 * "Get" data from frontswap associated with swaptype and offset that were
162 * specified when the data was put to frontswap and use it to fill the
163 * specified page with data. Page must be locked and in the swap cache.
164 */
165int __frontswap_load(struct page *page)
166{
167 int ret = -1;
168 swp_entry_t entry = { .val = page_private(page), };
169 int type = swp_type(entry);
170 struct swap_info_struct *sis = swap_info[type];
171 pgoff_t offset = swp_offset(entry);
172
173 BUG_ON(!PageLocked(page));
174 BUG_ON(sis == NULL);
175 if (frontswap_test(sis, offset))
176 ret = (*frontswap_ops.load)(type, offset, page);
177 if (ret == 0)
178 inc_frontswap_loads();
179 return ret;
180}
181EXPORT_SYMBOL(__frontswap_load);
182
183/*
184 * Invalidate any data from frontswap associated with the specified swaptype
185 * and offset so that a subsequent "get" will fail.
186 */
187void __frontswap_invalidate_page(unsigned type, pgoff_t offset)
188{
189 struct swap_info_struct *sis = swap_info[type];
190
191 BUG_ON(sis == NULL);
192 if (frontswap_test(sis, offset)) {
193 (*frontswap_ops.invalidate_page)(type, offset);
194 atomic_dec(&sis->frontswap_pages);
195 frontswap_clear(sis, offset);
196 inc_frontswap_invalidates();
197 }
198}
199EXPORT_SYMBOL(__frontswap_invalidate_page);
200
201/*
202 * Invalidate all data from frontswap associated with all offsets for the
203 * specified swaptype.
204 */
205void __frontswap_invalidate_area(unsigned type)
206{
207 struct swap_info_struct *sis = swap_info[type];
208
209 BUG_ON(sis == NULL);
210 if (sis->frontswap_map == NULL)
211 return;
212 (*frontswap_ops.invalidate_area)(type);
213 atomic_set(&sis->frontswap_pages, 0);
214 memset(sis->frontswap_map, 0, sis->max / sizeof(long));
215}
216EXPORT_SYMBOL(__frontswap_invalidate_area);
217
218/*
219 * Frontswap, like a true swap device, may unnecessarily retain pages
220 * under certain circumstances; "shrink" frontswap is essentially a
221 * "partial swapoff" and works by calling try_to_unuse to attempt to
222 * unuse enough frontswap pages to attempt to -- subject to memory
223 * constraints -- reduce the number of pages in frontswap to the
224 * number given in the parameter target_pages.
225 */
226void frontswap_shrink(unsigned long target_pages)
227{
228 struct swap_info_struct *si = NULL;
229 int si_frontswap_pages;
230 unsigned long total_pages = 0, total_pages_to_unuse;
231 unsigned long pages = 0, pages_to_unuse = 0;
232 int type;
233 bool locked = false;
234
235 /*
236 * we don't want to hold swap_lock while doing a very
237 * lengthy try_to_unuse, but swap_list may change
238 * so restart scan from swap_list.head each time
239 */
240 spin_lock(&swap_lock);
241 locked = true;
242 total_pages = 0;
243 for (type = swap_list.head; type >= 0; type = si->next) {
244 si = swap_info[type];
245 total_pages += atomic_read(&si->frontswap_pages);
246 }
247 if (total_pages <= target_pages)
248 goto out;
249 total_pages_to_unuse = total_pages - target_pages;
250 for (type = swap_list.head; type >= 0; type = si->next) {
251 si = swap_info[type];
252 si_frontswap_pages = atomic_read(&si->frontswap_pages);
253 if (total_pages_to_unuse < si_frontswap_pages)
254 pages = pages_to_unuse = total_pages_to_unuse;
255 else {
256 pages = si_frontswap_pages;
257 pages_to_unuse = 0; /* unuse all */
258 }
259 /* ensure there is enough RAM to fetch pages from frontswap */
260 if (security_vm_enough_memory_mm(current->mm, pages))
261 continue;
262 vm_unacct_memory(pages);
263 break;
264 }
265 if (type < 0)
266 goto out;
267 locked = false;
268 spin_unlock(&swap_lock);
269 try_to_unuse(type, true, pages_to_unuse);
270out:
271 if (locked)
272 spin_unlock(&swap_lock);
273 return;
274}
275EXPORT_SYMBOL(frontswap_shrink);
276
277/*
278 * Count and return the number of frontswap pages across all
279 * swap devices. This is exported so that backend drivers can
280 * determine current usage without reading debugfs.
281 */
282unsigned long frontswap_curr_pages(void)
283{
284 int type;
285 unsigned long totalpages = 0;
286 struct swap_info_struct *si = NULL;
287
288 spin_lock(&swap_lock);
289 for (type = swap_list.head; type >= 0; type = si->next) {
290 si = swap_info[type];
291 totalpages += atomic_read(&si->frontswap_pages);
292 }
293 spin_unlock(&swap_lock);
294 return totalpages;
295}
296EXPORT_SYMBOL(frontswap_curr_pages);
297
298static int __init init_frontswap(void)
299{
300#ifdef CONFIG_DEBUG_FS
301 struct dentry *root = debugfs_create_dir("frontswap", NULL);
302 if (root == NULL)
303 return -ENXIO;
304 debugfs_create_u64("loads", S_IRUGO, root, &frontswap_loads);
305 debugfs_create_u64("succ_stores", S_IRUGO, root, &frontswap_succ_stores);
306 debugfs_create_u64("failed_stores", S_IRUGO, root,
307 &frontswap_failed_stores);
308 debugfs_create_u64("invalidates", S_IRUGO,
309 root, &frontswap_invalidates);
310#endif
311 return 0;
312}
313
314module_init(init_frontswap);
diff --git a/mm/internal.h b/mm/internal.h
index 5cbb78190041..2ba87fbfb75b 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -94,9 +94,6 @@ extern void putback_lru_page(struct page *page);
94/* 94/*
95 * in mm/page_alloc.c 95 * in mm/page_alloc.c
96 */ 96 */
97extern void set_pageblock_migratetype(struct page *page, int migratetype);
98extern int move_freepages_block(struct zone *zone, struct page *page,
99 int migratetype);
100extern void __free_pages_bootmem(struct page *page, unsigned int order); 97extern void __free_pages_bootmem(struct page *page, unsigned int order);
101extern void prep_compound_page(struct page *page, unsigned long order); 98extern void prep_compound_page(struct page *page, unsigned long order);
102#ifdef CONFIG_MEMORY_FAILURE 99#ifdef CONFIG_MEMORY_FAILURE
@@ -104,7 +101,6 @@ extern bool is_free_buddy_page(struct page *page);
104#endif 101#endif
105 102
106#if defined CONFIG_COMPACTION || defined CONFIG_CMA 103#if defined CONFIG_COMPACTION || defined CONFIG_CMA
107#include <linux/compaction.h>
108 104
109/* 105/*
110 * in mm/compaction.c 106 * in mm/compaction.c
@@ -123,14 +119,11 @@ struct compact_control {
123 unsigned long nr_migratepages; /* Number of pages to migrate */ 119 unsigned long nr_migratepages; /* Number of pages to migrate */
124 unsigned long free_pfn; /* isolate_freepages search base */ 120 unsigned long free_pfn; /* isolate_freepages search base */
125 unsigned long migrate_pfn; /* isolate_migratepages search base */ 121 unsigned long migrate_pfn; /* isolate_migratepages search base */
126 enum compact_mode mode; /* Compaction mode */ 122 bool sync; /* Synchronous migration */
127 123
128 int order; /* order a direct compactor needs */ 124 int order; /* order a direct compactor needs */
129 int migratetype; /* MOVABLE, RECLAIMABLE etc */ 125 int migratetype; /* MOVABLE, RECLAIMABLE etc */
130 struct zone *zone; 126 struct zone *zone;
131
132 /* Number of UNMOVABLE destination pageblocks skipped during scan */
133 unsigned long nr_pageblocks_skipped;
134}; 127};
135 128
136unsigned long 129unsigned long
diff --git a/mm/migrate.c b/mm/migrate.c
index ab81d482ae6f..be26d5cbe56b 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -436,7 +436,10 @@ void migrate_page_copy(struct page *newpage, struct page *page)
436 * is actually a signal that all of the page has become dirty. 436 * is actually a signal that all of the page has become dirty.
437 * Whereas only part of our page may be dirty. 437 * Whereas only part of our page may be dirty.
438 */ 438 */
439 __set_page_dirty_nobuffers(newpage); 439 if (PageSwapBacked(page))
440 SetPageDirty(newpage);
441 else
442 __set_page_dirty_nobuffers(newpage);
440 } 443 }
441 444
442 mlock_migrate_page(newpage, page); 445 mlock_migrate_page(newpage, page);
diff --git a/mm/nommu.c b/mm/nommu.c
index c4acfbc09972..d4b0c10872de 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1486,7 +1486,7 @@ SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len,
1486 1486
1487 flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); 1487 flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
1488 1488
1489 ret = vm_mmap_pgoff(file, addr, len, prot, flags, pgoff); 1489 retval = vm_mmap_pgoff(file, addr, len, prot, flags, pgoff);
1490 1490
1491 if (file) 1491 if (file)
1492 fput(file); 1492 fput(file);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 6092f331b32e..44030096da63 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -219,7 +219,7 @@ EXPORT_SYMBOL(nr_online_nodes);
219 219
220int page_group_by_mobility_disabled __read_mostly; 220int page_group_by_mobility_disabled __read_mostly;
221 221
222void set_pageblock_migratetype(struct page *page, int migratetype) 222static void set_pageblock_migratetype(struct page *page, int migratetype)
223{ 223{
224 224
225 if (unlikely(page_group_by_mobility_disabled)) 225 if (unlikely(page_group_by_mobility_disabled))
@@ -954,8 +954,8 @@ static int move_freepages(struct zone *zone,
954 return pages_moved; 954 return pages_moved;
955} 955}
956 956
957int move_freepages_block(struct zone *zone, struct page *page, 957static int move_freepages_block(struct zone *zone, struct page *page,
958 int migratetype) 958 int migratetype)
959{ 959{
960 unsigned long start_pfn, end_pfn; 960 unsigned long start_pfn, end_pfn;
961 struct page *start_page, *end_page; 961 struct page *start_page, *end_page;
@@ -5651,7 +5651,7 @@ static int __alloc_contig_migrate_range(unsigned long start, unsigned long end)
5651 .nr_migratepages = 0, 5651 .nr_migratepages = 0,
5652 .order = -1, 5652 .order = -1,
5653 .zone = page_zone(pfn_to_page(start)), 5653 .zone = page_zone(pfn_to_page(start)),
5654 .mode = COMPACT_SYNC, 5654 .sync = true,
5655 }; 5655 };
5656 INIT_LIST_HEAD(&cc.migratepages); 5656 INIT_LIST_HEAD(&cc.migratepages);
5657 5657
diff --git a/mm/page_io.c b/mm/page_io.c
index dc76b4d0611e..34f02923744c 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -18,6 +18,7 @@
18#include <linux/bio.h> 18#include <linux/bio.h>
19#include <linux/swapops.h> 19#include <linux/swapops.h>
20#include <linux/writeback.h> 20#include <linux/writeback.h>
21#include <linux/frontswap.h>
21#include <asm/pgtable.h> 22#include <asm/pgtable.h>
22 23
23static struct bio *get_swap_bio(gfp_t gfp_flags, 24static struct bio *get_swap_bio(gfp_t gfp_flags,
@@ -98,6 +99,12 @@ int swap_writepage(struct page *page, struct writeback_control *wbc)
98 unlock_page(page); 99 unlock_page(page);
99 goto out; 100 goto out;
100 } 101 }
102 if (frontswap_store(page) == 0) {
103 set_page_writeback(page);
104 unlock_page(page);
105 end_page_writeback(page);
106 goto out;
107 }
101 bio = get_swap_bio(GFP_NOIO, page, end_swap_bio_write); 108 bio = get_swap_bio(GFP_NOIO, page, end_swap_bio_write);
102 if (bio == NULL) { 109 if (bio == NULL) {
103 set_page_dirty(page); 110 set_page_dirty(page);
@@ -122,6 +129,11 @@ int swap_readpage(struct page *page)
122 129
123 VM_BUG_ON(!PageLocked(page)); 130 VM_BUG_ON(!PageLocked(page));
124 VM_BUG_ON(PageUptodate(page)); 131 VM_BUG_ON(PageUptodate(page));
132 if (frontswap_load(page) == 0) {
133 SetPageUptodate(page);
134 unlock_page(page);
135 goto out;
136 }
125 bio = get_swap_bio(GFP_KERNEL, page, end_swap_bio_read); 137 bio = get_swap_bio(GFP_KERNEL, page, end_swap_bio_read);
126 if (bio == NULL) { 138 if (bio == NULL) {
127 unlock_page(page); 139 unlock_page(page);
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 457b10baef59..de5bc51c4a66 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -31,6 +31,8 @@
31#include <linux/memcontrol.h> 31#include <linux/memcontrol.h>
32#include <linux/poll.h> 32#include <linux/poll.h>
33#include <linux/oom.h> 33#include <linux/oom.h>
34#include <linux/frontswap.h>
35#include <linux/swapfile.h>
34 36
35#include <asm/pgtable.h> 37#include <asm/pgtable.h>
36#include <asm/tlbflush.h> 38#include <asm/tlbflush.h>
@@ -42,7 +44,7 @@ static bool swap_count_continued(struct swap_info_struct *, pgoff_t,
42static void free_swap_count_continuations(struct swap_info_struct *); 44static void free_swap_count_continuations(struct swap_info_struct *);
43static sector_t map_swap_entry(swp_entry_t, struct block_device**); 45static sector_t map_swap_entry(swp_entry_t, struct block_device**);
44 46
45static DEFINE_SPINLOCK(swap_lock); 47DEFINE_SPINLOCK(swap_lock);
46static unsigned int nr_swapfiles; 48static unsigned int nr_swapfiles;
47long nr_swap_pages; 49long nr_swap_pages;
48long total_swap_pages; 50long total_swap_pages;
@@ -53,9 +55,9 @@ static const char Unused_file[] = "Unused swap file entry ";
53static const char Bad_offset[] = "Bad swap offset entry "; 55static const char Bad_offset[] = "Bad swap offset entry ";
54static const char Unused_offset[] = "Unused swap offset entry "; 56static const char Unused_offset[] = "Unused swap offset entry ";
55 57
56static struct swap_list_t swap_list = {-1, -1}; 58struct swap_list_t swap_list = {-1, -1};
57 59
58static struct swap_info_struct *swap_info[MAX_SWAPFILES]; 60struct swap_info_struct *swap_info[MAX_SWAPFILES];
59 61
60static DEFINE_MUTEX(swapon_mutex); 62static DEFINE_MUTEX(swapon_mutex);
61 63
@@ -556,6 +558,7 @@ static unsigned char swap_entry_free(struct swap_info_struct *p,
556 swap_list.next = p->type; 558 swap_list.next = p->type;
557 nr_swap_pages++; 559 nr_swap_pages++;
558 p->inuse_pages--; 560 p->inuse_pages--;
561 frontswap_invalidate_page(p->type, offset);
559 if ((p->flags & SWP_BLKDEV) && 562 if ((p->flags & SWP_BLKDEV) &&
560 disk->fops->swap_slot_free_notify) 563 disk->fops->swap_slot_free_notify)
561 disk->fops->swap_slot_free_notify(p->bdev, offset); 564 disk->fops->swap_slot_free_notify(p->bdev, offset);
@@ -985,11 +988,12 @@ static int unuse_mm(struct mm_struct *mm,
985} 988}
986 989
987/* 990/*
988 * Scan swap_map from current position to next entry still in use. 991 * Scan swap_map (or frontswap_map if frontswap parameter is true)
992 * from current position to next entry still in use.
989 * Recycle to start on reaching the end, returning 0 when empty. 993 * Recycle to start on reaching the end, returning 0 when empty.
990 */ 994 */
991static unsigned int find_next_to_unuse(struct swap_info_struct *si, 995static unsigned int find_next_to_unuse(struct swap_info_struct *si,
992 unsigned int prev) 996 unsigned int prev, bool frontswap)
993{ 997{
994 unsigned int max = si->max; 998 unsigned int max = si->max;
995 unsigned int i = prev; 999 unsigned int i = prev;
@@ -1015,6 +1019,12 @@ static unsigned int find_next_to_unuse(struct swap_info_struct *si,
1015 prev = 0; 1019 prev = 0;
1016 i = 1; 1020 i = 1;
1017 } 1021 }
1022 if (frontswap) {
1023 if (frontswap_test(si, i))
1024 break;
1025 else
1026 continue;
1027 }
1018 count = si->swap_map[i]; 1028 count = si->swap_map[i];
1019 if (count && swap_count(count) != SWAP_MAP_BAD) 1029 if (count && swap_count(count) != SWAP_MAP_BAD)
1020 break; 1030 break;
@@ -1026,8 +1036,12 @@ static unsigned int find_next_to_unuse(struct swap_info_struct *si,
1026 * We completely avoid races by reading each swap page in advance, 1036 * We completely avoid races by reading each swap page in advance,
1027 * and then search for the process using it. All the necessary 1037 * and then search for the process using it. All the necessary
1028 * page table adjustments can then be made atomically. 1038 * page table adjustments can then be made atomically.
1039 *
1040 * if the boolean frontswap is true, only unuse pages_to_unuse pages;
1041 * pages_to_unuse==0 means all pages; ignored if frontswap is false
1029 */ 1042 */
1030static int try_to_unuse(unsigned int type) 1043int try_to_unuse(unsigned int type, bool frontswap,
1044 unsigned long pages_to_unuse)
1031{ 1045{
1032 struct swap_info_struct *si = swap_info[type]; 1046 struct swap_info_struct *si = swap_info[type];
1033 struct mm_struct *start_mm; 1047 struct mm_struct *start_mm;
@@ -1060,7 +1074,7 @@ static int try_to_unuse(unsigned int type)
1060 * one pass through swap_map is enough, but not necessarily: 1074 * one pass through swap_map is enough, but not necessarily:
1061 * there are races when an instance of an entry might be missed. 1075 * there are races when an instance of an entry might be missed.
1062 */ 1076 */
1063 while ((i = find_next_to_unuse(si, i)) != 0) { 1077 while ((i = find_next_to_unuse(si, i, frontswap)) != 0) {
1064 if (signal_pending(current)) { 1078 if (signal_pending(current)) {
1065 retval = -EINTR; 1079 retval = -EINTR;
1066 break; 1080 break;
@@ -1227,6 +1241,10 @@ static int try_to_unuse(unsigned int type)
1227 * interactive performance. 1241 * interactive performance.
1228 */ 1242 */
1229 cond_resched(); 1243 cond_resched();
1244 if (frontswap && pages_to_unuse > 0) {
1245 if (!--pages_to_unuse)
1246 break;
1247 }
1230 } 1248 }
1231 1249
1232 mmput(start_mm); 1250 mmput(start_mm);
@@ -1486,7 +1504,8 @@ bad_bmap:
1486} 1504}
1487 1505
1488static void enable_swap_info(struct swap_info_struct *p, int prio, 1506static void enable_swap_info(struct swap_info_struct *p, int prio,
1489 unsigned char *swap_map) 1507 unsigned char *swap_map,
1508 unsigned long *frontswap_map)
1490{ 1509{
1491 int i, prev; 1510 int i, prev;
1492 1511
@@ -1496,6 +1515,7 @@ static void enable_swap_info(struct swap_info_struct *p, int prio,
1496 else 1515 else
1497 p->prio = --least_priority; 1516 p->prio = --least_priority;
1498 p->swap_map = swap_map; 1517 p->swap_map = swap_map;
1518 frontswap_map_set(p, frontswap_map);
1499 p->flags |= SWP_WRITEOK; 1519 p->flags |= SWP_WRITEOK;
1500 nr_swap_pages += p->pages; 1520 nr_swap_pages += p->pages;
1501 total_swap_pages += p->pages; 1521 total_swap_pages += p->pages;
@@ -1512,6 +1532,7 @@ static void enable_swap_info(struct swap_info_struct *p, int prio,
1512 swap_list.head = swap_list.next = p->type; 1532 swap_list.head = swap_list.next = p->type;
1513 else 1533 else
1514 swap_info[prev]->next = p->type; 1534 swap_info[prev]->next = p->type;
1535 frontswap_init(p->type);
1515 spin_unlock(&swap_lock); 1536 spin_unlock(&swap_lock);
1516} 1537}
1517 1538
@@ -1585,7 +1606,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
1585 spin_unlock(&swap_lock); 1606 spin_unlock(&swap_lock);
1586 1607
1587 oom_score_adj = test_set_oom_score_adj(OOM_SCORE_ADJ_MAX); 1608 oom_score_adj = test_set_oom_score_adj(OOM_SCORE_ADJ_MAX);
1588 err = try_to_unuse(type); 1609 err = try_to_unuse(type, false, 0); /* force all pages to be unused */
1589 compare_swap_oom_score_adj(OOM_SCORE_ADJ_MAX, oom_score_adj); 1610 compare_swap_oom_score_adj(OOM_SCORE_ADJ_MAX, oom_score_adj);
1590 1611
1591 if (err) { 1612 if (err) {
@@ -1596,7 +1617,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
1596 * sys_swapoff for this swap_info_struct at this point. 1617 * sys_swapoff for this swap_info_struct at this point.
1597 */ 1618 */
1598 /* re-insert swap space back into swap_list */ 1619 /* re-insert swap space back into swap_list */
1599 enable_swap_info(p, p->prio, p->swap_map); 1620 enable_swap_info(p, p->prio, p->swap_map, frontswap_map_get(p));
1600 goto out_dput; 1621 goto out_dput;
1601 } 1622 }
1602 1623
@@ -1622,9 +1643,11 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
1622 swap_map = p->swap_map; 1643 swap_map = p->swap_map;
1623 p->swap_map = NULL; 1644 p->swap_map = NULL;
1624 p->flags = 0; 1645 p->flags = 0;
1646 frontswap_invalidate_area(type);
1625 spin_unlock(&swap_lock); 1647 spin_unlock(&swap_lock);
1626 mutex_unlock(&swapon_mutex); 1648 mutex_unlock(&swapon_mutex);
1627 vfree(swap_map); 1649 vfree(swap_map);
1650 vfree(frontswap_map_get(p));
1628 /* Destroy swap account informatin */ 1651 /* Destroy swap account informatin */
1629 swap_cgroup_swapoff(type); 1652 swap_cgroup_swapoff(type);
1630 1653
@@ -1988,6 +2011,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
1988 sector_t span; 2011 sector_t span;
1989 unsigned long maxpages; 2012 unsigned long maxpages;
1990 unsigned char *swap_map = NULL; 2013 unsigned char *swap_map = NULL;
2014 unsigned long *frontswap_map = NULL;
1991 struct page *page = NULL; 2015 struct page *page = NULL;
1992 struct inode *inode = NULL; 2016 struct inode *inode = NULL;
1993 2017
@@ -2071,6 +2095,9 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
2071 error = nr_extents; 2095 error = nr_extents;
2072 goto bad_swap; 2096 goto bad_swap;
2073 } 2097 }
2098 /* frontswap enabled? set up bit-per-page map for frontswap */
2099 if (frontswap_enabled)
2100 frontswap_map = vzalloc(maxpages / sizeof(long));
2074 2101
2075 if (p->bdev) { 2102 if (p->bdev) {
2076 if (blk_queue_nonrot(bdev_get_queue(p->bdev))) { 2103 if (blk_queue_nonrot(bdev_get_queue(p->bdev))) {
@@ -2086,14 +2113,15 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
2086 if (swap_flags & SWAP_FLAG_PREFER) 2113 if (swap_flags & SWAP_FLAG_PREFER)
2087 prio = 2114 prio =
2088 (swap_flags & SWAP_FLAG_PRIO_MASK) >> SWAP_FLAG_PRIO_SHIFT; 2115 (swap_flags & SWAP_FLAG_PRIO_MASK) >> SWAP_FLAG_PRIO_SHIFT;
2089 enable_swap_info(p, prio, swap_map); 2116 enable_swap_info(p, prio, swap_map, frontswap_map);
2090 2117
2091 printk(KERN_INFO "Adding %uk swap on %s. " 2118 printk(KERN_INFO "Adding %uk swap on %s. "
2092 "Priority:%d extents:%d across:%lluk %s%s\n", 2119 "Priority:%d extents:%d across:%lluk %s%s%s\n",
2093 p->pages<<(PAGE_SHIFT-10), name, p->prio, 2120 p->pages<<(PAGE_SHIFT-10), name, p->prio,
2094 nr_extents, (unsigned long long)span<<(PAGE_SHIFT-10), 2121 nr_extents, (unsigned long long)span<<(PAGE_SHIFT-10),
2095 (p->flags & SWP_SOLIDSTATE) ? "SS" : "", 2122 (p->flags & SWP_SOLIDSTATE) ? "SS" : "",
2096 (p->flags & SWP_DISCARDABLE) ? "D" : ""); 2123 (p->flags & SWP_DISCARDABLE) ? "D" : "",
2124 (frontswap_map) ? "FS" : "");
2097 2125
2098 mutex_unlock(&swapon_mutex); 2126 mutex_unlock(&swapon_mutex);
2099 atomic_inc(&proc_poll_event); 2127 atomic_inc(&proc_poll_event);
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index a6a0365475ed..5afb43114020 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -332,6 +332,7 @@ static int setup_routing_entry(struct kvm_irq_routing_table *rt,
332 */ 332 */
333 hlist_for_each_entry(ei, n, &rt->map[ue->gsi], link) 333 hlist_for_each_entry(ei, n, &rt->map[ue->gsi], link)
334 if (ei->type == KVM_IRQ_ROUTING_MSI || 334 if (ei->type == KVM_IRQ_ROUTING_MSI ||
335 ue->type == KVM_IRQ_ROUTING_MSI ||
335 ue->u.irqchip.irqchip == ei->irqchip.irqchip) 336 ue->u.irqchip.irqchip == ei->irqchip.irqchip)
336 return r; 337 return r;
337 338