diff options
182 files changed, 7633 insertions, 1177 deletions
diff --git a/Documentation/devicetree/bindings/powerpc/fsl/interlaken-lac.txt b/Documentation/devicetree/bindings/powerpc/fsl/interlaken-lac.txt new file mode 100644 index 000000000000..641bc13983e1 --- /dev/null +++ b/Documentation/devicetree/bindings/powerpc/fsl/interlaken-lac.txt | |||
@@ -0,0 +1,309 @@ | |||
1 | =============================================================================== | ||
2 | Freescale Interlaken Look-Aside Controller Device Bindings | ||
3 | Copyright 2012 Freescale Semiconductor Inc. | ||
4 | |||
5 | CONTENTS | ||
6 | - Interlaken Look-Aside Controller (LAC) Node | ||
7 | - Example LAC Node | ||
8 | - Interlaken Look-Aside Controller (LAC) Software Portal Node | ||
9 | - Interlaken Look-Aside Controller (LAC) Software Portal Child Nodes | ||
10 | - Example LAC SWP Node with Child Nodes | ||
11 | |||
12 | ============================================================================== | ||
13 | Interlaken Look-Aside Controller (LAC) Node | ||
14 | |||
15 | DESCRIPTION | ||
16 | |||
17 | The Interlaken is a narrow, high speed channelized chip-to-chip interface. To | ||
18 | facilitate interoperability between a data path device and a look-aside | ||
19 | co-processor, the Interlaken Look-Aside protocol is defined for short | ||
20 | transaction-related transfers. Although based on the Interlaken protocol, | ||
21 | Interlaken Look-Aside is not directly compatible with Interlaken and can be | ||
22 | considered a different operation mode. | ||
23 | |||
24 | The Interlaken LA controller connects internal platform to Interlaken serial | ||
25 | interface. It accepts LA command through software portals, which are system | ||
26 | memory mapped 4KB spaces. The LA commands are then translated into the | ||
27 | Interlaken control words and data words, which are sent on TX side to TCAM | ||
28 | through SerDes lanes. | ||
29 | |||
30 | There are two 4KiB spaces defined within the LAC global register memory map. | ||
31 | There is a full register set at 0x0000-0x0FFF (also known as the "hypervisor" | ||
32 | version), and a subset at 0x1000-0x1FFF. The former is a superset of the | ||
33 | latter, and includes certain registers that should not be accessible to | ||
34 | partitioned software. Separate nodes are used for each region, with a phandle | ||
35 | linking the hypervisor node to the normal operating node. | ||
36 | |||
37 | PROPERTIES | ||
38 | |||
39 | - compatible | ||
40 | Usage: required | ||
41 | Value type: <string> | ||
42 | Definition: Must include "fsl,interlaken-lac". This represents only | ||
43 | those LAC CCSR registers not protected in partitioned | ||
44 | software. The version of the device is determined by the LAC | ||
45 | IP Block Revision Register (IPBRR0) at offset 0x0BF8. | ||
46 | |||
47 | Table of correspondences between IPBRR0 values and example | ||
48 | chips: | ||
49 | Value Device | ||
50 | ----------- ------- | ||
51 | 0x02000100 T4240 | ||
52 | |||
53 | The Hypervisor node has a different compatible. It must include | ||
54 | "fsl,interlaken-lac-hv". This node represents the protected | ||
55 | LAC register space and is required except inside a partition | ||
56 | where access to the hypervisor node is to be denied. | ||
57 | |||
58 | - fsl,non-hv-node | ||
59 | Usage: required in "fsl,interlaken-lac-hv" | ||
60 | Value type: <phandle> | ||
61 | Definition: Points to the non-protected LAC CCSR mapped register space | ||
62 | node. | ||
63 | |||
64 | - reg | ||
65 | Usage: required | ||
66 | Value type: <prop-encoded-array> | ||
67 | Definition: A standard property. The first resource represents the | ||
68 | Interlaken LAC configuration registers. | ||
69 | |||
70 | - interrupts: | ||
71 | Usage: required in non-hv node only | ||
72 | Value type: <prop-encoded-array> | ||
73 | Definition: Interrupt mapping for Interlaken LAC error IRQ. | ||
74 | |||
75 | EXAMPLE | ||
76 | lac: lac@229000 { | ||
77 | compatible = "fsl,interlaken-lac" | ||
78 | reg = <0x229000 0x1000>; | ||
79 | interrupts = <16 2 1 18>; | ||
80 | }; | ||
81 | |||
82 | lac-hv@228000 { | ||
83 | compatible = "fsl,interlaken-lac-hv" | ||
84 | reg = <0x228000 0x1000>; | ||
85 | fsl,non-hv-node = <&lac>; | ||
86 | }; | ||
87 | |||
88 | =============================================================================== | ||
89 | Interlaken Look-Aside Controller (LAC) Software Portal Container Node | ||
90 | |||
91 | DESCRIPTION | ||
92 | The Interlaken Look-Aside Controller (LAC) utilizes Software Portals to accept | ||
93 | Interlaken Look-Aside (ILA) commands. The Interlaken LAC software portal | ||
94 | memory map occupies 128KB of memory space. The software portal memory space is | ||
95 | intended to be cache-enabled. WIMG for each software space is required to be | ||
96 | 0010 if stashing is enabled; otherwise, WIMG can be 0000 or 0010. | ||
97 | |||
98 | PROPERTIES | ||
99 | |||
100 | - #address-cells | ||
101 | Usage: required | ||
102 | Value type: <u32> | ||
103 | Definition: A standard property. Must have a value of 1. | ||
104 | |||
105 | - #size-cells | ||
106 | Usage: required | ||
107 | Value type: <u32> | ||
108 | Definition: A standard property. Must have a value of 1. | ||
109 | |||
110 | - compatible | ||
111 | Usage: required | ||
112 | Value type: <string> | ||
113 | Definition: Must include "fsl,interlaken-lac-portals" | ||
114 | |||
115 | - ranges | ||
116 | Usage: required | ||
117 | Value type: <prop-encoded-array> | ||
118 | Definition: A standard property. Specifies the address and length | ||
119 | of the LAC portal memory space. | ||
120 | |||
121 | =============================================================================== | ||
122 | Interlaken Look-Aside Controller (LAC) Software Portals Child Nodes | ||
123 | |||
124 | DESCRIPTION | ||
125 | There are up to 24 available software portals with each software portal | ||
126 | requiring 4KB of consecutive memory within the software portal memory mapped | ||
127 | space. | ||
128 | |||
129 | PROPERTIES | ||
130 | |||
131 | - compatible | ||
132 | Usage: required | ||
133 | Value type: <string> | ||
134 | Definition: Must include "fsl,interlaken-lac-portal-vX.Y" where X is | ||
135 | the Major version (IP_MJ) found in the LAC IP Block Revision | ||
136 | Register (IPBRR0), at offset 0x0BF8, and Y is the Minor version | ||
137 | (IP_MN). | ||
138 | |||
139 | Table of correspondences between version values and example chips: | ||
140 | Value Device | ||
141 | ------ ------- | ||
142 | 1.0 T4240 | ||
143 | |||
144 | - reg | ||
145 | Usage: required | ||
146 | Value type: <prop-encoded-array> | ||
147 | Definition: A standard property. The first resource represents the | ||
148 | Interlaken LAC software portal registers. | ||
149 | |||
150 | - fsl,liodn | ||
151 | Value type: <u32> | ||
152 | Definition: The logical I/O device number (LIODN) for this device. The | ||
153 | LIODN is a number expressed by this device and used to perform | ||
154 | look-ups in the IOMMU (PAMU) address table when performing | ||
155 | DMAs. This property is automatically added by u-boot. | ||
156 | |||
157 | =============================================================================== | ||
158 | EXAMPLE | ||
159 | |||
160 | lac-portals { | ||
161 | #address-cells = <0x1>; | ||
162 | #size-cells = <0x1>; | ||
163 | compatible = "fsl,interlaken-lac-portals"; | ||
164 | ranges = <0x0 0xf 0xf4400000 0x20000>; | ||
165 | |||
166 | lportal0: lac-portal@0 { | ||
167 | compatible = "fsl,interlaken-lac-portal-v1.0"; | ||
168 | fsl,liodn = <0x204>; | ||
169 | reg = <0x0 0x1000>; | ||
170 | }; | ||
171 | |||
172 | lportal1: lac-portal@1000 { | ||
173 | compatible = "fsl,interlaken-lac-portal-v1.0"; | ||
174 | fsl,liodn = <0x205>; | ||
175 | reg = <0x1000 0x1000>; | ||
176 | }; | ||
177 | |||
178 | lportal2: lac-portal@2000 { | ||
179 | compatible = "fsl,interlaken-lac-portal-v1.0"; | ||
180 | fsl,liodn = <0x206>; | ||
181 | reg = <0x2000 0x1000>; | ||
182 | }; | ||
183 | |||
184 | lportal3: lac-portal@3000 { | ||
185 | compatible = "fsl,interlaken-lac-portal-v1.0"; | ||
186 | fsl,liodn = <0x207>; | ||
187 | reg = <0x3000 0x1000>; | ||
188 | }; | ||
189 | |||
190 | lportal4: lac-portal@4000 { | ||
191 | compatible = "fsl,interlaken-lac-portal-v1.0"; | ||
192 | fsl,liodn = <0x208>; | ||
193 | reg = <0x4000 0x1000>; | ||
194 | }; | ||
195 | |||
196 | lportal5: lac-portal@5000 { | ||
197 | compatible = "fsl,interlaken-lac-portal-v1.0"; | ||
198 | fsl,liodn = <0x209>; | ||
199 | reg = <0x5000 0x1000>; | ||
200 | }; | ||
201 | |||
202 | lportal6: lac-portal@6000 { | ||
203 | compatible = "fsl,interlaken-lac-portal-v1.0"; | ||
204 | fsl,liodn = <0x20A>; | ||
205 | reg = <0x6000 0x1000>; | ||
206 | }; | ||
207 | |||
208 | lportal7: lac-portal@7000 { | ||
209 | compatible = "fsl,interlaken-lac-portal-v1.0"; | ||
210 | fsl,liodn = <0x20B>; | ||
211 | reg = <0x7000 0x1000>; | ||
212 | }; | ||
213 | |||
214 | lportal8: lac-portal@8000 { | ||
215 | compatible = "fsl,interlaken-lac-portal-v1.0"; | ||
216 | fsl,liodn = <0x20C>; | ||
217 | reg = <0x8000 0x1000>; | ||
218 | }; | ||
219 | |||
220 | lportal9: lac-portal@9000 { | ||
221 | compatible = "fsl,interlaken-lac-portal-v1.0"; | ||
222 | fsl,liodn = <0x20D>; | ||
223 | reg = <0x9000 0x1000>; | ||
224 | }; | ||
225 | |||
226 | lportal10: lac-portal@A000 { | ||
227 | compatible = "fsl,interlaken-lac-portal-v1.0"; | ||
228 | fsl,liodn = <0x20E>; | ||
229 | reg = <0xA000 0x1000>; | ||
230 | }; | ||
231 | |||
232 | lportal11: lac-portal@B000 { | ||
233 | compatible = "fsl,interlaken-lac-portal-v1.0"; | ||
234 | fsl,liodn = <0x20F>; | ||
235 | reg = <0xB000 0x1000>; | ||
236 | }; | ||
237 | |||
238 | lportal12: lac-portal@C000 { | ||
239 | compatible = "fsl,interlaken-lac-portal-v1.0"; | ||
240 | fsl,liodn = <0x210>; | ||
241 | reg = <0xC000 0x1000>; | ||
242 | }; | ||
243 | |||
244 | lportal13: lac-portal@D000 { | ||
245 | compatible = "fsl,interlaken-lac-portal-v1.0"; | ||
246 | fsl,liodn = <0x211>; | ||
247 | reg = <0xD000 0x1000>; | ||
248 | }; | ||
249 | |||
250 | lportal14: lac-portal@E000 { | ||
251 | compatible = "fsl,interlaken-lac-portal-v1.0"; | ||
252 | fsl,liodn = <0x212>; | ||
253 | reg = <0xE000 0x1000>; | ||
254 | }; | ||
255 | |||
256 | lportal15: lac-portal@F000 { | ||
257 | compatible = "fsl,interlaken-lac-portal-v1.0"; | ||
258 | fsl,liodn = <0x213>; | ||
259 | reg = <0xF000 0x1000>; | ||
260 | }; | ||
261 | |||
262 | lportal16: lac-portal@10000 { | ||
263 | compatible = "fsl,interlaken-lac-portal-v1.0"; | ||
264 | fsl,liodn = <0x214>; | ||
265 | reg = <0x10000 0x1000>; | ||
266 | }; | ||
267 | |||
268 | lportal17: lac-portal@11000 { | ||
269 | compatible = "fsl,interlaken-lac-portal-v1.0"; | ||
270 | fsl,liodn = <0x215>; | ||
271 | reg = <0x11000 0x1000>; | ||
272 | }; | ||
273 | |||
274 | lportal8: lac-portal@1200 { | ||
275 | compatible = "fsl,interlaken-lac-portal-v1.0"; | ||
276 | fsl,liodn = <0x216>; | ||
277 | reg = <0x12000 0x1000>; | ||
278 | }; | ||
279 | |||
280 | lportal19: lac-portal@13000 { | ||
281 | compatible = "fsl,interlaken-lac-portal-v1.0"; | ||
282 | fsl,liodn = <0x217>; | ||
283 | reg = <0x13000 0x1000>; | ||
284 | }; | ||
285 | |||
286 | lportal20: lac-portal@14000 { | ||
287 | compatible = "fsl,interlaken-lac-portal-v1.0"; | ||
288 | fsl,liodn = <0x218>; | ||
289 | reg = <0x14000 0x1000>; | ||
290 | }; | ||
291 | |||
292 | lportal21: lac-portal@15000 { | ||
293 | compatible = "fsl,interlaken-lac-portal-v1.0"; | ||
294 | fsl,liodn = <0x219>; | ||
295 | reg = <0x15000 0x1000>; | ||
296 | }; | ||
297 | |||
298 | lportal22: lac-portal@16000 { | ||
299 | compatible = "fsl,interlaken-lac-portal-v1.0"; | ||
300 | fsl,liodn = <0x21A>; | ||
301 | reg = <0x16000 0x1000>; | ||
302 | }; | ||
303 | |||
304 | lportal23: lac-portal@17000 { | ||
305 | compatible = "fsl,interlaken-lac-portal-v1.0"; | ||
306 | fsl,liodn = <0x21B>; | ||
307 | reg = <0x17000 0x1000>; | ||
308 | }; | ||
309 | }; | ||
diff --git a/Documentation/powerpc/00-INDEX b/Documentation/powerpc/00-INDEX index dd9e92802ec0..05026ce1875e 100644 --- a/Documentation/powerpc/00-INDEX +++ b/Documentation/powerpc/00-INDEX | |||
@@ -14,6 +14,8 @@ hvcs.txt | |||
14 | - IBM "Hypervisor Virtual Console Server" Installation Guide | 14 | - IBM "Hypervisor Virtual Console Server" Installation Guide |
15 | mpc52xx.txt | 15 | mpc52xx.txt |
16 | - Linux 2.6.x on MPC52xx family | 16 | - Linux 2.6.x on MPC52xx family |
17 | pmu-ebb.txt | ||
18 | - Description of the API for using the PMU with Event Based Branches. | ||
17 | qe_firmware.txt | 19 | qe_firmware.txt |
18 | - describes the layout of firmware binaries for the Freescale QUICC | 20 | - describes the layout of firmware binaries for the Freescale QUICC |
19 | Engine and the code that parses and uploads the microcode therein. | 21 | Engine and the code that parses and uploads the microcode therein. |
diff --git a/Documentation/powerpc/pmu-ebb.txt b/Documentation/powerpc/pmu-ebb.txt new file mode 100644 index 000000000000..73cd163dbfb8 --- /dev/null +++ b/Documentation/powerpc/pmu-ebb.txt | |||
@@ -0,0 +1,137 @@ | |||
1 | PMU Event Based Branches | ||
2 | ======================== | ||
3 | |||
4 | Event Based Branches (EBBs) are a feature which allows the hardware to | ||
5 | branch directly to a specified user space address when certain events occur. | ||
6 | |||
7 | The full specification is available in Power ISA v2.07: | ||
8 | |||
9 | https://www.power.org/documentation/power-isa-version-2-07/ | ||
10 | |||
11 | One type of event for which EBBs can be configured is PMU exceptions. This | ||
12 | document describes the API for configuring the Power PMU to generate EBBs, | ||
13 | using the Linux perf_events API. | ||
14 | |||
15 | |||
16 | Terminology | ||
17 | ----------- | ||
18 | |||
19 | Throughout this document we will refer to an "EBB event" or "EBB events". This | ||
20 | just refers to a struct perf_event which has set the "EBB" flag in its | ||
21 | attr.config. All events which can be configured on the hardware PMU are | ||
22 | possible "EBB events". | ||
23 | |||
24 | |||
25 | Background | ||
26 | ---------- | ||
27 | |||
28 | When a PMU EBB occurs it is delivered to the currently running process. As such | ||
29 | EBBs can only sensibly be used by programs for self-monitoring. | ||
30 | |||
31 | It is a feature of the perf_events API that events can be created on other | ||
32 | processes, subject to standard permission checks. This is also true of EBB | ||
33 | events, however unless the target process enables EBBs (via mtspr(BESCR)) no | ||
34 | EBBs will ever be delivered. | ||
35 | |||
36 | This makes it possible for a process to enable EBBs for itself, but not | ||
37 | actually configure any events. At a later time another process can come along | ||
38 | and attach an EBB event to the process, which will then cause EBBs to be | ||
39 | delivered to the first process. It's not clear if this is actually useful. | ||
40 | |||
41 | |||
42 | When the PMU is configured for EBBs, all PMU interrupts are delivered to the | ||
43 | user process. This means once an EBB event is scheduled on the PMU, no non-EBB | ||
44 | events can be configured. This means that EBB events can not be run | ||
45 | concurrently with regular 'perf' commands, or any other perf events. | ||
46 | |||
47 | It is however safe to run 'perf' commands on a process which is using EBBs. The | ||
48 | kernel will in general schedule the EBB event, and perf will be notified that | ||
49 | its events could not run. | ||
50 | |||
51 | The exclusion between EBB events and regular events is implemented using the | ||
52 | existing "pinned" and "exclusive" attributes of perf_events. This means EBB | ||
53 | events will be given priority over other events, unless they are also pinned. | ||
54 | If an EBB event and a regular event are both pinned, then whichever is enabled | ||
55 | first will be scheduled and the other will be put in error state. See the | ||
56 | section below titled "Enabling an EBB event" for more information. | ||
57 | |||
58 | |||
59 | Creating an EBB event | ||
60 | --------------------- | ||
61 | |||
62 | To request that an event is counted using EBB, the event code should have bit | ||
63 | 63 set. | ||
64 | |||
65 | EBB events must be created with a particular, and restrictive, set of | ||
66 | attributes - this is so that they interoperate correctly with the rest of the | ||
67 | perf_events subsystem. | ||
68 | |||
69 | An EBB event must be created with the "pinned" and "exclusive" attributes set. | ||
70 | Note that if you are creating a group of EBB events, only the leader can have | ||
71 | these attributes set. | ||
72 | |||
73 | An EBB event must NOT set any of the "inherit", "sample_period", "freq" or | ||
74 | "enable_on_exec" attributes. | ||
75 | |||
76 | An EBB event must be attached to a task. This is specified to perf_event_open() | ||
77 | by passing a pid value, typically 0 indicating the current task. | ||
78 | |||
79 | All events in a group must agree on whether they want EBB. That is all events | ||
80 | must request EBB, or none may request EBB. | ||
81 | |||
82 | EBB events must specify the PMC they are to be counted on. This ensures | ||
83 | userspace is able to reliably determine which PMC the event is scheduled on. | ||
84 | |||
85 | |||
86 | Enabling an EBB event | ||
87 | --------------------- | ||
88 | |||
89 | Once an EBB event has been successfully opened, it must be enabled with the | ||
90 | perf_events API. This can be achieved either via the ioctl() interface, or the | ||
91 | prctl() interface. | ||
92 | |||
93 | However, due to the design of the perf_events API, enabling an event does not | ||
94 | guarantee that it has been scheduled on the PMU. To ensure that the EBB event | ||
95 | has been scheduled on the PMU, you must perform a read() on the event. If the | ||
96 | read() returns EOF, then the event has not been scheduled and EBBs are not | ||
97 | enabled. | ||
98 | |||
99 | This behaviour occurs because the EBB event is pinned and exclusive. When the | ||
100 | EBB event is enabled it will force all other non-pinned events off the PMU. In | ||
101 | this case the enable will be successful. However if there is already an event | ||
102 | pinned on the PMU then the enable will not be successful. | ||
103 | |||
104 | |||
105 | Reading an EBB event | ||
106 | -------------------- | ||
107 | |||
108 | It is possible to read() from an EBB event. However the results are | ||
109 | meaningless. Because interrupts are being delivered to the user process the | ||
110 | kernel is not able to count the event, and so will return a junk value. | ||
111 | |||
112 | |||
113 | Closing an EBB event | ||
114 | -------------------- | ||
115 | |||
116 | When an EBB event is finished with, you can close it using close() as for any | ||
117 | regular event. If this is the last EBB event the PMU will be deconfigured and | ||
118 | no further PMU EBBs will be delivered. | ||
119 | |||
120 | |||
121 | EBB Handler | ||
122 | ----------- | ||
123 | |||
124 | The EBB handler is just regular userspace code, however it must be written in | ||
125 | the style of an interrupt handler. When the handler is entered all registers | ||
126 | are live (possibly) and so must be saved somehow before the handler can invoke | ||
127 | other code. | ||
128 | |||
129 | It's up to the program how to handle this. For C programs a relatively simple | ||
130 | option is to create an interrupt frame on the stack and save registers there. | ||
131 | |||
132 | Fork | ||
133 | ---- | ||
134 | |||
135 | EBB events are not inherited across fork. If the child process wishes to use | ||
136 | EBBs it should open a new event for itself. Similarly the EBB state in | ||
137 | BESCR/EBBHR/EBBRR is cleared across fork(). | ||
diff --git a/Documentation/vfio.txt b/Documentation/vfio.txt index 8eda3635a17d..c55533c0adb3 100644 --- a/Documentation/vfio.txt +++ b/Documentation/vfio.txt | |||
@@ -283,6 +283,69 @@ a direct pass through for VFIO_DEVICE_* ioctls. The read/write/mmap | |||
283 | interfaces implement the device region access defined by the device's | 283 | interfaces implement the device region access defined by the device's |
284 | own VFIO_DEVICE_GET_REGION_INFO ioctl. | 284 | own VFIO_DEVICE_GET_REGION_INFO ioctl. |
285 | 285 | ||
286 | |||
287 | PPC64 sPAPR implementation note | ||
288 | ------------------------------------------------------------------------------- | ||
289 | |||
290 | This implementation has some specifics: | ||
291 | |||
292 | 1) Only one IOMMU group per container is supported as an IOMMU group | ||
293 | represents the minimal entity which isolation can be guaranteed for and | ||
294 | groups are allocated statically, one per a Partitionable Endpoint (PE) | ||
295 | (PE is often a PCI domain but not always). | ||
296 | |||
297 | 2) The hardware supports so called DMA windows - the PCI address range | ||
298 | within which DMA transfer is allowed, any attempt to access address space | ||
299 | out of the window leads to the whole PE isolation. | ||
300 | |||
301 | 3) PPC64 guests are paravirtualized but not fully emulated. There is an API | ||
302 | to map/unmap pages for DMA, and it normally maps 1..32 pages per call and | ||
303 | currently there is no way to reduce the number of calls. In order to make things | ||
304 | faster, the map/unmap handling has been implemented in real mode which provides | ||
305 | an excellent performance which has limitations such as inability to do | ||
306 | locked pages accounting in real time. | ||
307 | |||
308 | So 3 additional ioctls have been added: | ||
309 | |||
310 | VFIO_IOMMU_SPAPR_TCE_GET_INFO - returns the size and the start | ||
311 | of the DMA window on the PCI bus. | ||
312 | |||
313 | VFIO_IOMMU_ENABLE - enables the container. The locked pages accounting | ||
314 | is done at this point. This lets user first to know what | ||
315 | the DMA window is and adjust rlimit before doing any real job. | ||
316 | |||
317 | VFIO_IOMMU_DISABLE - disables the container. | ||
318 | |||
319 | |||
320 | The code flow from the example above should be slightly changed: | ||
321 | |||
322 | ..... | ||
323 | /* Add the group to the container */ | ||
324 | ioctl(group, VFIO_GROUP_SET_CONTAINER, &container); | ||
325 | |||
326 | /* Enable the IOMMU model we want */ | ||
327 | ioctl(container, VFIO_SET_IOMMU, VFIO_SPAPR_TCE_IOMMU) | ||
328 | |||
329 | /* Get addition sPAPR IOMMU info */ | ||
330 | vfio_iommu_spapr_tce_info spapr_iommu_info; | ||
331 | ioctl(container, VFIO_IOMMU_SPAPR_TCE_GET_INFO, &spapr_iommu_info); | ||
332 | |||
333 | if (ioctl(container, VFIO_IOMMU_ENABLE)) | ||
334 | /* Cannot enable container, may be low rlimit */ | ||
335 | |||
336 | /* Allocate some space and setup a DMA mapping */ | ||
337 | dma_map.vaddr = mmap(0, 1024 * 1024, PROT_READ | PROT_WRITE, | ||
338 | MAP_PRIVATE | MAP_ANONYMOUS, 0, 0); | ||
339 | |||
340 | dma_map.size = 1024 * 1024; | ||
341 | dma_map.iova = 0; /* 1MB starting at 0x0 from device view */ | ||
342 | dma_map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE; | ||
343 | |||
344 | /* Check here is .iova/.size are within DMA window from spapr_iommu_info */ | ||
345 | |||
346 | ioctl(container, VFIO_IOMMU_MAP_DMA, &dma_map); | ||
347 | ..... | ||
348 | |||
286 | ------------------------------------------------------------------------------- | 349 | ------------------------------------------------------------------------------- |
287 | 350 | ||
288 | [1] VFIO was originally an acronym for "Virtual Function I/O" in its | 351 | [1] VFIO was originally an acronym for "Virtual Function I/O" in its |
diff --git a/MAINTAINERS b/MAINTAINERS index 3f7710151a75..65730838dca4 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
@@ -3123,6 +3123,13 @@ M: Maxim Levitsky <maximlevitsky@gmail.com> | |||
3123 | S: Maintained | 3123 | S: Maintained |
3124 | F: drivers/media/rc/ene_ir.* | 3124 | F: drivers/media/rc/ene_ir.* |
3125 | 3125 | ||
3126 | ENHANCED ERROR HANDLING (EEH) | ||
3127 | M: Gavin Shan <shangw@linux.vnet.ibm.com> | ||
3128 | L: linuxppc-dev@lists.ozlabs.org | ||
3129 | S: Supported | ||
3130 | F: Documentation/powerpc/eeh-pci-error-recovery.txt | ||
3131 | F: arch/powerpc/kernel/eeh*.c | ||
3132 | |||
3126 | EPSON S1D13XXX FRAMEBUFFER DRIVER | 3133 | EPSON S1D13XXX FRAMEBUFFER DRIVER |
3127 | M: Kristoffer Ericson <kristoffer.ericson@gmail.com> | 3134 | M: Kristoffer Ericson <kristoffer.ericson@gmail.com> |
3128 | S: Maintained | 3135 | S: Maintained |
@@ -6192,7 +6199,6 @@ M: Linas Vepstas <linasvepstas@gmail.com> | |||
6192 | L: linux-pci@vger.kernel.org | 6199 | L: linux-pci@vger.kernel.org |
6193 | S: Supported | 6200 | S: Supported |
6194 | F: Documentation/PCI/pci-error-recovery.txt | 6201 | F: Documentation/PCI/pci-error-recovery.txt |
6195 | F: Documentation/powerpc/eeh-pci-error-recovery.txt | ||
6196 | 6202 | ||
6197 | PCI SUBSYSTEM | 6203 | PCI SUBSYSTEM |
6198 | M: Bjorn Helgaas <bhelgaas@google.com> | 6204 | M: Bjorn Helgaas <bhelgaas@google.com> |
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 508e3fe934d2..1022e7b675c2 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig | |||
@@ -298,7 +298,7 @@ config HUGETLB_PAGE_SIZE_VARIABLE | |||
298 | 298 | ||
299 | config MATH_EMULATION | 299 | config MATH_EMULATION |
300 | bool "Math emulation" | 300 | bool "Math emulation" |
301 | depends on 4xx || 8xx || E200 || PPC_MPC832x || E500 | 301 | depends on 4xx || 8xx || PPC_MPC832x || BOOKE |
302 | ---help--- | 302 | ---help--- |
303 | Some PowerPC chips designed for embedded applications do not have | 303 | Some PowerPC chips designed for embedded applications do not have |
304 | a floating-point unit and therefore do not implement the | 304 | a floating-point unit and therefore do not implement the |
@@ -307,6 +307,10 @@ config MATH_EMULATION | |||
307 | unit, which will allow programs that use floating-point | 307 | unit, which will allow programs that use floating-point |
308 | instructions to run. | 308 | instructions to run. |
309 | 309 | ||
310 | This is also useful to emulate missing (optional) instructions | ||
311 | such as fsqrt on cores that do have an FPU but do not implement | ||
312 | them (such as Freescale BookE). | ||
313 | |||
310 | config PPC_TRANSACTIONAL_MEM | 314 | config PPC_TRANSACTIONAL_MEM |
311 | bool "Transactional Memory support for POWERPC" | 315 | bool "Transactional Memory support for POWERPC" |
312 | depends on PPC_BOOK3S_64 | 316 | depends on PPC_BOOK3S_64 |
@@ -315,17 +319,6 @@ config PPC_TRANSACTIONAL_MEM | |||
315 | ---help--- | 319 | ---help--- |
316 | Support user-mode Transactional Memory on POWERPC. | 320 | Support user-mode Transactional Memory on POWERPC. |
317 | 321 | ||
318 | config 8XX_MINIMAL_FPEMU | ||
319 | bool "Minimal math emulation for 8xx" | ||
320 | depends on 8xx && !MATH_EMULATION | ||
321 | help | ||
322 | Older arch/ppc kernels still emulated a few floating point | ||
323 | instructions such as load and store, even when full math | ||
324 | emulation is disabled. Say "Y" here if you want to preserve | ||
325 | this behavior. | ||
326 | |||
327 | It is recommended that you build a soft-float userspace instead. | ||
328 | |||
329 | config IOMMU_HELPER | 322 | config IOMMU_HELPER |
330 | def_bool PPC64 | 323 | def_bool PPC64 |
331 | 324 | ||
diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index 863d877e0b5f..d86875f3e17e 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug | |||
@@ -147,6 +147,13 @@ choice | |||
147 | enable debugging for the wrong type of machine your kernel | 147 | enable debugging for the wrong type of machine your kernel |
148 | _will not boot_. | 148 | _will not boot_. |
149 | 149 | ||
150 | config PPC_EARLY_DEBUG_BOOTX | ||
151 | bool "BootX or OpenFirmware" | ||
152 | depends on BOOTX_TEXT | ||
153 | help | ||
154 | Select this to enable early debugging for a machine using BootX | ||
155 | or OpenFirmware. | ||
156 | |||
150 | config PPC_EARLY_DEBUG_LPAR | 157 | config PPC_EARLY_DEBUG_LPAR |
151 | bool "LPAR HV Console" | 158 | bool "LPAR HV Console" |
152 | depends on PPC_PSERIES | 159 | depends on PPC_PSERIES |
diff --git a/arch/powerpc/boot/dts/currituck.dts b/arch/powerpc/boot/dts/currituck.dts index b801dd06e573..d2c8a872308e 100644 --- a/arch/powerpc/boot/dts/currituck.dts +++ b/arch/powerpc/boot/dts/currituck.dts | |||
@@ -103,6 +103,11 @@ | |||
103 | interrupts = <34 2>; | 103 | interrupts = <34 2>; |
104 | }; | 104 | }; |
105 | 105 | ||
106 | FPGA0: fpga@50000000 { | ||
107 | compatible = "ibm,currituck-fpga"; | ||
108 | reg = <0x50000000 0x4>; | ||
109 | }; | ||
110 | |||
106 | IIC0: i2c@00000000 { | 111 | IIC0: i2c@00000000 { |
107 | compatible = "ibm,iic-currituck", "ibm,iic"; | 112 | compatible = "ibm,iic-currituck", "ibm,iic"; |
108 | reg = <0x0 0x00000014>; | 113 | reg = <0x0 0x00000014>; |
diff --git a/arch/powerpc/boot/dts/fsl/interlaken-lac-portals.dtsi b/arch/powerpc/boot/dts/fsl/interlaken-lac-portals.dtsi new file mode 100644 index 000000000000..9cffccf4e07e --- /dev/null +++ b/arch/powerpc/boot/dts/fsl/interlaken-lac-portals.dtsi | |||
@@ -0,0 +1,156 @@ | |||
1 | /* T4240 Interlaken LAC Portal device tree stub with 24 portals. | ||
2 | * | ||
3 | * Copyright 2012 Freescale Semiconductor Inc. | ||
4 | * | ||
5 | * Redistribution and use in source and binary forms, with or without | ||
6 | * modification, are permitted provided that the following conditions are met: | ||
7 | * * Redistributions of source code must retain the above copyright | ||
8 | * notice, this list of conditions and the following disclaimer. | ||
9 | * * Redistributions in binary form must reproduce the above copyright | ||
10 | * notice, this list of conditions and the following disclaimer in the | ||
11 | * documentation and/or other materials provided with the distribution. | ||
12 | * * Neither the name of Freescale Semiconductor nor the | ||
13 | * names of its contributors may be used to endorse or promote products | ||
14 | * derived from this software without specific prior written permission. | ||
15 | * | ||
16 | * | ||
17 | * ALTERNATIVELY, this software may be distributed under the terms of the | ||
18 | * GNU General Public License ("GPL") as published by the Free Software | ||
19 | * Foundation, either version 2 of that License or (at your option) any | ||
20 | * later version. | ||
21 | * | ||
22 | * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY | ||
23 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | ||
24 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | ||
25 | * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY | ||
26 | * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES | ||
27 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | ||
28 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND | ||
29 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
30 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | ||
31 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
32 | */ | ||
33 | |||
34 | #address-cells = <0x1>; | ||
35 | #size-cells = <0x1>; | ||
36 | compatible = "fsl,interlaken-lac-portals"; | ||
37 | |||
38 | lportal0: lac-portal@0 { | ||
39 | compatible = "fsl,interlaken-lac-portal-v1.0"; | ||
40 | reg = <0x0 0x1000>; | ||
41 | }; | ||
42 | |||
43 | lportal1: lac-portal@1000 { | ||
44 | compatible = "fsl,interlaken-lac-portal-v1.0"; | ||
45 | reg = <0x1000 0x1000>; | ||
46 | }; | ||
47 | |||
48 | lportal2: lac-portal@2000 { | ||
49 | compatible = "fsl,interlaken-lac-portal-v1.0"; | ||
50 | reg = <0x2000 0x1000>; | ||
51 | }; | ||
52 | |||
53 | lportal3: lac-portal@3000 { | ||
54 | compatible = "fsl,interlaken-lac-portal-v1.0"; | ||
55 | reg = <0x3000 0x1000>; | ||
56 | }; | ||
57 | |||
58 | lportal4: lac-portal@4000 { | ||
59 | compatible = "fsl,interlaken-lac-portal-v1.0"; | ||
60 | reg = <0x4000 0x1000>; | ||
61 | }; | ||
62 | |||
63 | lportal5: lac-portal@5000 { | ||
64 | compatible = "fsl,interlaken-lac-portal-v1.0"; | ||
65 | reg = <0x5000 0x1000>; | ||
66 | }; | ||
67 | |||
68 | lportal6: lac-portal@6000 { | ||
69 | compatible = "fsl,interlaken-lac-portal-v1.0"; | ||
70 | reg = <0x6000 0x1000>; | ||
71 | }; | ||
72 | |||
73 | lportal7: lac-portal@7000 { | ||
74 | compatible = "fsl,interlaken-lac-portal-v1.0"; | ||
75 | reg = <0x7000 0x1000>; | ||
76 | }; | ||
77 | |||
78 | lportal8: lac-portal@8000 { | ||
79 | compatible = "fsl,interlaken-lac-portal-v1.0"; | ||
80 | reg = <0x8000 0x1000>; | ||
81 | }; | ||
82 | |||
83 | lportal9: lac-portal@9000 { | ||
84 | compatible = "fsl,interlaken-lac-portal-v1.0"; | ||
85 | reg = <0x9000 0x1000>; | ||
86 | }; | ||
87 | |||
88 | lportal10: lac-portal@A000 { | ||
89 | compatible = "fsl,interlaken-lac-portal-v1.0"; | ||
90 | reg = <0xA000 0x1000>; | ||
91 | }; | ||
92 | |||
93 | lportal11: lac-portal@B000 { | ||
94 | compatible = "fsl,interlaken-lac-portal-v1.0"; | ||
95 | reg = <0xB000 0x1000>; | ||
96 | }; | ||
97 | |||
98 | lportal12: lac-portal@C000 { | ||
99 | compatible = "fsl,interlaken-lac-portal-v1.0"; | ||
100 | reg = <0xC000 0x1000>; | ||
101 | }; | ||
102 | |||
103 | lportal13: lac-portal@D000 { | ||
104 | compatible = "fsl,interlaken-lac-portal-v1.0"; | ||
105 | reg = <0xD000 0x1000>; | ||
106 | }; | ||
107 | |||
108 | lportal14: lac-portal@E000 { | ||
109 | compatible = "fsl,interlaken-lac-portal-v1.0"; | ||
110 | reg = <0xE000 0x1000>; | ||
111 | }; | ||
112 | |||
113 | lportal15: lac-portal@F000 { | ||
114 | compatible = "fsl,interlaken-lac-portal-v1.0"; | ||
115 | reg = <0xF000 0x1000>; | ||
116 | }; | ||
117 | |||
118 | lportal16: lac-portal@10000 { | ||
119 | compatible = "fsl,interlaken-lac-portal-v1.0"; | ||
120 | reg = <0x10000 0x1000>; | ||
121 | }; | ||
122 | |||
123 | lportal17: lac-portal@11000 { | ||
124 | compatible = "fsl,interlaken-lac-portal-v1.0"; | ||
125 | reg = <0x11000 0x1000>; | ||
126 | }; | ||
127 | |||
128 | lportal18: lac-portal@1200 { | ||
129 | compatible = "fsl,interlaken-lac-portal-v1.0"; | ||
130 | reg = <0x12000 0x1000>; | ||
131 | }; | ||
132 | |||
133 | lportal19: lac-portal@13000 { | ||
134 | compatible = "fsl,interlaken-lac-portal-v1.0"; | ||
135 | reg = <0x13000 0x1000>; | ||
136 | }; | ||
137 | |||
138 | lportal20: lac-portal@14000 { | ||
139 | compatible = "fsl,interlaken-lac-portal-v1.0"; | ||
140 | reg = <0x14000 0x1000>; | ||
141 | }; | ||
142 | |||
143 | lportal21: lac-portal@15000 { | ||
144 | compatible = "fsl,interlaken-lac-portal-v1.0"; | ||
145 | reg = <0x15000 0x1000>; | ||
146 | }; | ||
147 | |||
148 | lportal22: lac-portal@16000 { | ||
149 | compatible = "fsl,interlaken-lac-portal-v1.0"; | ||
150 | reg = <0x16000 0x1000>; | ||
151 | }; | ||
152 | |||
153 | lportal23: lac-portal@17000 { | ||
154 | compatible = "fsl,interlaken-lac-portal-v1.0"; | ||
155 | reg = <0x17000 0x1000>; | ||
156 | }; | ||
diff --git a/arch/powerpc/boot/dts/fsl/interlaken-lac.dtsi b/arch/powerpc/boot/dts/fsl/interlaken-lac.dtsi new file mode 100644 index 000000000000..e8208720ac0e --- /dev/null +++ b/arch/powerpc/boot/dts/fsl/interlaken-lac.dtsi | |||
@@ -0,0 +1,45 @@ | |||
1 | /* | ||
2 | * T4 Interlaken Look-aside Controller (LAC) device tree stub | ||
3 | * | ||
4 | * Copyright 2012 Freescale Semiconductor Inc. | ||
5 | * | ||
6 | * Redistribution and use in source and binary forms, with or without | ||
7 | * modification, are permitted provided that the following conditions are met: | ||
8 | * * Redistributions of source code must retain the above copyright | ||
9 | * notice, this list of conditions and the following disclaimer. | ||
10 | * * Redistributions in binary form must reproduce the above copyright | ||
11 | * notice, this list of conditions and the following disclaimer in the | ||
12 | * documentation and/or other materials provided with the distribution. | ||
13 | * * Neither the name of Freescale Semiconductor nor the | ||
14 | * names of its contributors may be used to endorse or promote products | ||
15 | * derived from this software without specific prior written permission. | ||
16 | * | ||
17 | * | ||
18 | * ALTERNATIVELY, this software may be distributed under the terms of the | ||
19 | * GNU General Public License ("GPL") as published by the Free Software | ||
20 | * Foundation, either version 2 of that License or (at your option) any | ||
21 | * later version. | ||
22 | * | ||
23 | * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY | ||
24 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | ||
25 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | ||
26 | * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY | ||
27 | * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES | ||
28 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | ||
29 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND | ||
30 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
31 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | ||
32 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
33 | */ | ||
34 | |||
35 | lac: lac@229000 { | ||
36 | compatible = "fsl,interlaken-lac"; | ||
37 | reg = <0x229000 0x1000>; | ||
38 | interrupts = <16 2 1 18>; | ||
39 | }; | ||
40 | |||
41 | lac-hv@228000 { | ||
42 | compatible = "fsl,interlaken-lac-hv"; | ||
43 | reg = <0x228000 0x1000>; | ||
44 | fsl,non-hv-node = <&lac>; | ||
45 | }; | ||
diff --git a/arch/powerpc/configs/c2k_defconfig b/arch/powerpc/configs/c2k_defconfig index 2a84fd7f631c..671a8f960afa 100644 --- a/arch/powerpc/configs/c2k_defconfig +++ b/arch/powerpc/configs/c2k_defconfig | |||
@@ -423,6 +423,8 @@ CONFIG_SYSCTL_SYSCALL_CHECK=y | |||
423 | CONFIG_DEBUG_STACKOVERFLOW=y | 423 | CONFIG_DEBUG_STACKOVERFLOW=y |
424 | CONFIG_DEBUG_STACK_USAGE=y | 424 | CONFIG_DEBUG_STACK_USAGE=y |
425 | CONFIG_BOOTX_TEXT=y | 425 | CONFIG_BOOTX_TEXT=y |
426 | CONFIG_PPC_EARLY_DEBUG=y | ||
427 | CONFIG_PPC_EARLY_DEBUG_BOOTX=y | ||
426 | CONFIG_KEYS=y | 428 | CONFIG_KEYS=y |
427 | CONFIG_KEYS_DEBUG_PROC_KEYS=y | 429 | CONFIG_KEYS_DEBUG_PROC_KEYS=y |
428 | CONFIG_SECURITY=y | 430 | CONFIG_SECURITY=y |
diff --git a/arch/powerpc/configs/g5_defconfig b/arch/powerpc/configs/g5_defconfig index 07b7f2af2dca..1ea22fc24ea8 100644 --- a/arch/powerpc/configs/g5_defconfig +++ b/arch/powerpc/configs/g5_defconfig | |||
@@ -284,6 +284,8 @@ CONFIG_DEBUG_MUTEXES=y | |||
284 | CONFIG_LATENCYTOP=y | 284 | CONFIG_LATENCYTOP=y |
285 | CONFIG_SYSCTL_SYSCALL_CHECK=y | 285 | CONFIG_SYSCTL_SYSCALL_CHECK=y |
286 | CONFIG_BOOTX_TEXT=y | 286 | CONFIG_BOOTX_TEXT=y |
287 | CONFIG_PPC_EARLY_DEBUG=y | ||
288 | CONFIG_PPC_EARLY_DEBUG_BOOTX=y | ||
287 | CONFIG_CRYPTO_NULL=m | 289 | CONFIG_CRYPTO_NULL=m |
288 | CONFIG_CRYPTO_TEST=m | 290 | CONFIG_CRYPTO_TEST=m |
289 | CONFIG_CRYPTO_ECB=m | 291 | CONFIG_CRYPTO_ECB=m |
diff --git a/arch/powerpc/configs/maple_defconfig b/arch/powerpc/configs/maple_defconfig index 02ac96b679b8..2a5afac29861 100644 --- a/arch/powerpc/configs/maple_defconfig +++ b/arch/powerpc/configs/maple_defconfig | |||
@@ -138,6 +138,8 @@ CONFIG_DEBUG_STACK_USAGE=y | |||
138 | CONFIG_XMON=y | 138 | CONFIG_XMON=y |
139 | CONFIG_XMON_DEFAULT=y | 139 | CONFIG_XMON_DEFAULT=y |
140 | CONFIG_BOOTX_TEXT=y | 140 | CONFIG_BOOTX_TEXT=y |
141 | CONFIG_PPC_EARLY_DEBUG=y | ||
142 | CONFIG_PPC_EARLY_DEBUG_BOOTX=y | ||
141 | CONFIG_CRYPTO_ECB=m | 143 | CONFIG_CRYPTO_ECB=m |
142 | CONFIG_CRYPTO_PCBC=m | 144 | CONFIG_CRYPTO_PCBC=m |
143 | # CONFIG_CRYPTO_ANSI_CPRNG is not set | 145 | # CONFIG_CRYPTO_ANSI_CPRNG is not set |
diff --git a/arch/powerpc/configs/mpc512x_defconfig b/arch/powerpc/configs/mpc512x_defconfig index 0d0d981442fd..ee853a1b1b2c 100644 --- a/arch/powerpc/configs/mpc512x_defconfig +++ b/arch/powerpc/configs/mpc512x_defconfig | |||
@@ -1,7 +1,6 @@ | |||
1 | CONFIG_EXPERIMENTAL=y | ||
2 | # CONFIG_SWAP is not set | 1 | # CONFIG_SWAP is not set |
3 | CONFIG_SYSVIPC=y | 2 | CONFIG_SYSVIPC=y |
4 | CONFIG_SPARSE_IRQ=y | 3 | CONFIG_NO_HZ=y |
5 | CONFIG_LOG_BUF_SHIFT=16 | 4 | CONFIG_LOG_BUF_SHIFT=16 |
6 | CONFIG_BLK_DEV_INITRD=y | 5 | CONFIG_BLK_DEV_INITRD=y |
7 | # CONFIG_COMPAT_BRK is not set | 6 | # CONFIG_COMPAT_BRK is not set |
@@ -9,6 +8,7 @@ CONFIG_SLAB=y | |||
9 | CONFIG_MODULES=y | 8 | CONFIG_MODULES=y |
10 | CONFIG_MODULE_UNLOAD=y | 9 | CONFIG_MODULE_UNLOAD=y |
11 | # CONFIG_BLK_DEV_BSG is not set | 10 | # CONFIG_BLK_DEV_BSG is not set |
11 | CONFIG_PARTITION_ADVANCED=y | ||
12 | # CONFIG_IOSCHED_CFQ is not set | 12 | # CONFIG_IOSCHED_CFQ is not set |
13 | # CONFIG_PPC_CHRP is not set | 13 | # CONFIG_PPC_CHRP is not set |
14 | CONFIG_PPC_MPC512x=y | 14 | CONFIG_PPC_MPC512x=y |
@@ -16,9 +16,7 @@ CONFIG_MPC5121_ADS=y | |||
16 | CONFIG_MPC512x_GENERIC=y | 16 | CONFIG_MPC512x_GENERIC=y |
17 | CONFIG_PDM360NG=y | 17 | CONFIG_PDM360NG=y |
18 | # CONFIG_PPC_PMAC is not set | 18 | # CONFIG_PPC_PMAC is not set |
19 | CONFIG_NO_HZ=y | ||
20 | CONFIG_HZ_1000=y | 19 | CONFIG_HZ_1000=y |
21 | # CONFIG_MIGRATION is not set | ||
22 | # CONFIG_SECCOMP is not set | 20 | # CONFIG_SECCOMP is not set |
23 | # CONFIG_PCI is not set | 21 | # CONFIG_PCI is not set |
24 | CONFIG_NET=y | 22 | CONFIG_NET=y |
@@ -33,8 +31,6 @@ CONFIG_IP_PNP=y | |||
33 | # CONFIG_INET_DIAG is not set | 31 | # CONFIG_INET_DIAG is not set |
34 | # CONFIG_IPV6 is not set | 32 | # CONFIG_IPV6 is not set |
35 | CONFIG_CAN=y | 33 | CONFIG_CAN=y |
36 | CONFIG_CAN_RAW=y | ||
37 | CONFIG_CAN_BCM=y | ||
38 | CONFIG_CAN_VCAN=y | 34 | CONFIG_CAN_VCAN=y |
39 | CONFIG_CAN_MSCAN=y | 35 | CONFIG_CAN_MSCAN=y |
40 | CONFIG_CAN_DEBUG_DEVICES=y | 36 | CONFIG_CAN_DEBUG_DEVICES=y |
@@ -46,7 +42,6 @@ CONFIG_DEVTMPFS_MOUNT=y | |||
46 | # CONFIG_FIRMWARE_IN_KERNEL is not set | 42 | # CONFIG_FIRMWARE_IN_KERNEL is not set |
47 | CONFIG_MTD=y | 43 | CONFIG_MTD=y |
48 | CONFIG_MTD_CMDLINE_PARTS=y | 44 | CONFIG_MTD_CMDLINE_PARTS=y |
49 | CONFIG_MTD_CHAR=y | ||
50 | CONFIG_MTD_BLOCK=y | 45 | CONFIG_MTD_BLOCK=y |
51 | CONFIG_MTD_CFI=y | 46 | CONFIG_MTD_CFI=y |
52 | CONFIG_MTD_CFI_AMDSTD=y | 47 | CONFIG_MTD_CFI_AMDSTD=y |
@@ -60,7 +55,6 @@ CONFIG_BLK_DEV_RAM=y | |||
60 | CONFIG_BLK_DEV_RAM_COUNT=1 | 55 | CONFIG_BLK_DEV_RAM_COUNT=1 |
61 | CONFIG_BLK_DEV_RAM_SIZE=8192 | 56 | CONFIG_BLK_DEV_RAM_SIZE=8192 |
62 | CONFIG_BLK_DEV_XIP=y | 57 | CONFIG_BLK_DEV_XIP=y |
63 | CONFIG_MISC_DEVICES=y | ||
64 | CONFIG_EEPROM_AT24=y | 58 | CONFIG_EEPROM_AT24=y |
65 | CONFIG_EEPROM_AT25=y | 59 | CONFIG_EEPROM_AT25=y |
66 | CONFIG_SCSI=y | 60 | CONFIG_SCSI=y |
@@ -68,6 +62,7 @@ CONFIG_SCSI=y | |||
68 | CONFIG_BLK_DEV_SD=y | 62 | CONFIG_BLK_DEV_SD=y |
69 | CONFIG_CHR_DEV_SG=y | 63 | CONFIG_CHR_DEV_SG=y |
70 | CONFIG_NETDEVICES=y | 64 | CONFIG_NETDEVICES=y |
65 | CONFIG_FS_ENET=y | ||
71 | CONFIG_MARVELL_PHY=y | 66 | CONFIG_MARVELL_PHY=y |
72 | CONFIG_DAVICOM_PHY=y | 67 | CONFIG_DAVICOM_PHY=y |
73 | CONFIG_QSEMI_PHY=y | 68 | CONFIG_QSEMI_PHY=y |
@@ -83,10 +78,6 @@ CONFIG_STE10XP=y | |||
83 | CONFIG_LSI_ET1011C_PHY=y | 78 | CONFIG_LSI_ET1011C_PHY=y |
84 | CONFIG_FIXED_PHY=y | 79 | CONFIG_FIXED_PHY=y |
85 | CONFIG_MDIO_BITBANG=y | 80 | CONFIG_MDIO_BITBANG=y |
86 | CONFIG_NET_ETHERNET=y | ||
87 | CONFIG_FS_ENET=y | ||
88 | # CONFIG_NETDEV_1000 is not set | ||
89 | # CONFIG_NETDEV_10000 is not set | ||
90 | # CONFIG_WLAN is not set | 81 | # CONFIG_WLAN is not set |
91 | # CONFIG_INPUT_MOUSEDEV_PSAUX is not set | 82 | # CONFIG_INPUT_MOUSEDEV_PSAUX is not set |
92 | CONFIG_INPUT_EVDEV=y | 83 | CONFIG_INPUT_EVDEV=y |
@@ -106,14 +97,18 @@ CONFIG_GPIO_SYSFS=y | |||
106 | CONFIG_GPIO_MPC8XXX=y | 97 | CONFIG_GPIO_MPC8XXX=y |
107 | # CONFIG_HWMON is not set | 98 | # CONFIG_HWMON is not set |
108 | CONFIG_MEDIA_SUPPORT=y | 99 | CONFIG_MEDIA_SUPPORT=y |
109 | CONFIG_VIDEO_DEV=y | ||
110 | CONFIG_VIDEO_ADV_DEBUG=y | 100 | CONFIG_VIDEO_ADV_DEBUG=y |
111 | # CONFIG_VIDEO_HELPER_CHIPS_AUTO is not set | ||
112 | CONFIG_VIDEO_SAA711X=y | ||
113 | CONFIG_FB=y | 101 | CONFIG_FB=y |
114 | CONFIG_FB_FSL_DIU=y | 102 | CONFIG_FB_FSL_DIU=y |
115 | # CONFIG_VGA_CONSOLE is not set | 103 | # CONFIG_VGA_CONSOLE is not set |
116 | CONFIG_FRAMEBUFFER_CONSOLE=y | 104 | CONFIG_FRAMEBUFFER_CONSOLE=y |
105 | CONFIG_USB=y | ||
106 | CONFIG_USB_EHCI_HCD=y | ||
107 | CONFIG_USB_EHCI_FSL=y | ||
108 | # CONFIG_USB_EHCI_HCD_PPC_OF is not set | ||
109 | CONFIG_USB_STORAGE=y | ||
110 | CONFIG_USB_GADGET=y | ||
111 | CONFIG_USB_FSL_USB2=y | ||
117 | CONFIG_RTC_CLASS=y | 112 | CONFIG_RTC_CLASS=y |
118 | CONFIG_RTC_DRV_M41T80=y | 113 | CONFIG_RTC_DRV_M41T80=y |
119 | CONFIG_RTC_DRV_MPC5121=y | 114 | CONFIG_RTC_DRV_MPC5121=y |
@@ -129,9 +124,7 @@ CONFIG_TMPFS=y | |||
129 | CONFIG_JFFS2_FS=y | 124 | CONFIG_JFFS2_FS=y |
130 | CONFIG_UBIFS_FS=y | 125 | CONFIG_UBIFS_FS=y |
131 | CONFIG_NFS_FS=y | 126 | CONFIG_NFS_FS=y |
132 | CONFIG_NFS_V3=y | ||
133 | CONFIG_ROOT_NFS=y | 127 | CONFIG_ROOT_NFS=y |
134 | CONFIG_PARTITION_ADVANCED=y | ||
135 | CONFIG_NLS_CODEPAGE_437=y | 128 | CONFIG_NLS_CODEPAGE_437=y |
136 | CONFIG_NLS_ISO8859_1=y | 129 | CONFIG_NLS_ISO8859_1=y |
137 | # CONFIG_ENABLE_WARN_DEPRECATED is not set | 130 | # CONFIG_ENABLE_WARN_DEPRECATED is not set |
diff --git a/arch/powerpc/configs/mpc85xx_smp_defconfig b/arch/powerpc/configs/mpc85xx_smp_defconfig index 165e6b32baef..152fa05b15e4 100644 --- a/arch/powerpc/configs/mpc85xx_smp_defconfig +++ b/arch/powerpc/configs/mpc85xx_smp_defconfig | |||
@@ -131,6 +131,7 @@ CONFIG_DUMMY=y | |||
131 | CONFIG_FS_ENET=y | 131 | CONFIG_FS_ENET=y |
132 | CONFIG_UCC_GETH=y | 132 | CONFIG_UCC_GETH=y |
133 | CONFIG_GIANFAR=y | 133 | CONFIG_GIANFAR=y |
134 | CONFIG_E1000E=y | ||
134 | CONFIG_MARVELL_PHY=y | 135 | CONFIG_MARVELL_PHY=y |
135 | CONFIG_DAVICOM_PHY=y | 136 | CONFIG_DAVICOM_PHY=y |
136 | CONFIG_CICADA_PHY=y | 137 | CONFIG_CICADA_PHY=y |
diff --git a/arch/powerpc/configs/pmac32_defconfig b/arch/powerpc/configs/pmac32_defconfig index 29767a8dfea5..a73626b09051 100644 --- a/arch/powerpc/configs/pmac32_defconfig +++ b/arch/powerpc/configs/pmac32_defconfig | |||
@@ -350,6 +350,8 @@ CONFIG_SYSCTL_SYSCALL_CHECK=y | |||
350 | CONFIG_XMON=y | 350 | CONFIG_XMON=y |
351 | CONFIG_XMON_DEFAULT=y | 351 | CONFIG_XMON_DEFAULT=y |
352 | CONFIG_BOOTX_TEXT=y | 352 | CONFIG_BOOTX_TEXT=y |
353 | CONFIG_PPC_EARLY_DEBUG=y | ||
354 | CONFIG_PPC_EARLY_DEBUG_BOOTX=y | ||
353 | CONFIG_CRYPTO_NULL=m | 355 | CONFIG_CRYPTO_NULL=m |
354 | CONFIG_CRYPTO_PCBC=m | 356 | CONFIG_CRYPTO_PCBC=m |
355 | CONFIG_CRYPTO_MD4=m | 357 | CONFIG_CRYPTO_MD4=m |
diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig index aef3f71de5ad..c86fcb92358e 100644 --- a/arch/powerpc/configs/ppc64_defconfig +++ b/arch/powerpc/configs/ppc64_defconfig | |||
@@ -398,6 +398,8 @@ CONFIG_FTR_FIXUP_SELFTEST=y | |||
398 | CONFIG_MSI_BITMAP_SELFTEST=y | 398 | CONFIG_MSI_BITMAP_SELFTEST=y |
399 | CONFIG_XMON=y | 399 | CONFIG_XMON=y |
400 | CONFIG_BOOTX_TEXT=y | 400 | CONFIG_BOOTX_TEXT=y |
401 | CONFIG_PPC_EARLY_DEBUG=y | ||
402 | CONFIG_PPC_EARLY_DEBUG_BOOTX=y | ||
401 | CONFIG_CRYPTO_NULL=m | 403 | CONFIG_CRYPTO_NULL=m |
402 | CONFIG_CRYPTO_TEST=m | 404 | CONFIG_CRYPTO_TEST=m |
403 | CONFIG_CRYPTO_PCBC=m | 405 | CONFIG_CRYPTO_PCBC=m |
diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig index be1cb6ea3a36..20ebfaf7234b 100644 --- a/arch/powerpc/configs/ppc6xx_defconfig +++ b/arch/powerpc/configs/ppc6xx_defconfig | |||
@@ -1264,6 +1264,8 @@ CONFIG_DEBUG_STACKOVERFLOW=y | |||
1264 | CONFIG_DEBUG_STACK_USAGE=y | 1264 | CONFIG_DEBUG_STACK_USAGE=y |
1265 | CONFIG_XMON=y | 1265 | CONFIG_XMON=y |
1266 | CONFIG_BOOTX_TEXT=y | 1266 | CONFIG_BOOTX_TEXT=y |
1267 | CONFIG_PPC_EARLY_DEBUG=y | ||
1268 | CONFIG_PPC_EARLY_DEBUG_BOOTX=y | ||
1267 | CONFIG_KEYS=y | 1269 | CONFIG_KEYS=y |
1268 | CONFIG_KEYS_DEBUG_PROC_KEYS=y | 1270 | CONFIG_KEYS_DEBUG_PROC_KEYS=y |
1269 | CONFIG_SECURITY=y | 1271 | CONFIG_SECURITY=y |
diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig index c4dfbaf8b192..bea8587c3af5 100644 --- a/arch/powerpc/configs/pseries_defconfig +++ b/arch/powerpc/configs/pseries_defconfig | |||
@@ -296,6 +296,7 @@ CONFIG_SQUASHFS=m | |||
296 | CONFIG_SQUASHFS_XATTR=y | 296 | CONFIG_SQUASHFS_XATTR=y |
297 | CONFIG_SQUASHFS_LZO=y | 297 | CONFIG_SQUASHFS_LZO=y |
298 | CONFIG_SQUASHFS_XZ=y | 298 | CONFIG_SQUASHFS_XZ=y |
299 | CONFIG_PSTORE=y | ||
299 | CONFIG_NFS_FS=y | 300 | CONFIG_NFS_FS=y |
300 | CONFIG_NFS_V3_ACL=y | 301 | CONFIG_NFS_V3_ACL=y |
301 | CONFIG_NFS_V4=y | 302 | CONFIG_NFS_V4=y |
diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index a80e32b46c11..09a8743143f3 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h | |||
@@ -24,6 +24,7 @@ | |||
24 | #include <linux/init.h> | 24 | #include <linux/init.h> |
25 | #include <linux/list.h> | 25 | #include <linux/list.h> |
26 | #include <linux/string.h> | 26 | #include <linux/string.h> |
27 | #include <linux/time.h> | ||
27 | 28 | ||
28 | struct pci_dev; | 29 | struct pci_dev; |
29 | struct pci_bus; | 30 | struct pci_bus; |
@@ -52,6 +53,7 @@ struct device_node; | |||
52 | 53 | ||
53 | #define EEH_PE_ISOLATED (1 << 0) /* Isolated PE */ | 54 | #define EEH_PE_ISOLATED (1 << 0) /* Isolated PE */ |
54 | #define EEH_PE_RECOVERING (1 << 1) /* Recovering PE */ | 55 | #define EEH_PE_RECOVERING (1 << 1) /* Recovering PE */ |
56 | #define EEH_PE_PHB_DEAD (1 << 2) /* Dead PHB */ | ||
55 | 57 | ||
56 | struct eeh_pe { | 58 | struct eeh_pe { |
57 | int type; /* PE type: PHB/Bus/Device */ | 59 | int type; /* PE type: PHB/Bus/Device */ |
@@ -59,8 +61,10 @@ struct eeh_pe { | |||
59 | int config_addr; /* Traditional PCI address */ | 61 | int config_addr; /* Traditional PCI address */ |
60 | int addr; /* PE configuration address */ | 62 | int addr; /* PE configuration address */ |
61 | struct pci_controller *phb; /* Associated PHB */ | 63 | struct pci_controller *phb; /* Associated PHB */ |
64 | struct pci_bus *bus; /* Top PCI bus for bus PE */ | ||
62 | int check_count; /* Times of ignored error */ | 65 | int check_count; /* Times of ignored error */ |
63 | int freeze_count; /* Times of froze up */ | 66 | int freeze_count; /* Times of froze up */ |
67 | struct timeval tstamp; /* Time on first-time freeze */ | ||
64 | int false_positives; /* Times of reported #ff's */ | 68 | int false_positives; /* Times of reported #ff's */ |
65 | struct eeh_pe *parent; /* Parent PE */ | 69 | struct eeh_pe *parent; /* Parent PE */ |
66 | struct list_head child_list; /* Link PE to the child list */ | 70 | struct list_head child_list; /* Link PE to the child list */ |
@@ -95,12 +99,12 @@ struct eeh_dev { | |||
95 | 99 | ||
96 | static inline struct device_node *eeh_dev_to_of_node(struct eeh_dev *edev) | 100 | static inline struct device_node *eeh_dev_to_of_node(struct eeh_dev *edev) |
97 | { | 101 | { |
98 | return edev->dn; | 102 | return edev ? edev->dn : NULL; |
99 | } | 103 | } |
100 | 104 | ||
101 | static inline struct pci_dev *eeh_dev_to_pci_dev(struct eeh_dev *edev) | 105 | static inline struct pci_dev *eeh_dev_to_pci_dev(struct eeh_dev *edev) |
102 | { | 106 | { |
103 | return edev->pdev; | 107 | return edev ? edev->pdev : NULL; |
104 | } | 108 | } |
105 | 109 | ||
106 | /* | 110 | /* |
@@ -130,8 +134,9 @@ static inline struct pci_dev *eeh_dev_to_pci_dev(struct eeh_dev *edev) | |||
130 | struct eeh_ops { | 134 | struct eeh_ops { |
131 | char *name; | 135 | char *name; |
132 | int (*init)(void); | 136 | int (*init)(void); |
137 | int (*post_init)(void); | ||
133 | void* (*of_probe)(struct device_node *dn, void *flag); | 138 | void* (*of_probe)(struct device_node *dn, void *flag); |
134 | void* (*dev_probe)(struct pci_dev *dev, void *flag); | 139 | int (*dev_probe)(struct pci_dev *dev, void *flag); |
135 | int (*set_option)(struct eeh_pe *pe, int option); | 140 | int (*set_option)(struct eeh_pe *pe, int option); |
136 | int (*get_pe_addr)(struct eeh_pe *pe); | 141 | int (*get_pe_addr)(struct eeh_pe *pe); |
137 | int (*get_state)(struct eeh_pe *pe, int *state); | 142 | int (*get_state)(struct eeh_pe *pe, int *state); |
@@ -141,11 +146,12 @@ struct eeh_ops { | |||
141 | int (*configure_bridge)(struct eeh_pe *pe); | 146 | int (*configure_bridge)(struct eeh_pe *pe); |
142 | int (*read_config)(struct device_node *dn, int where, int size, u32 *val); | 147 | int (*read_config)(struct device_node *dn, int where, int size, u32 *val); |
143 | int (*write_config)(struct device_node *dn, int where, int size, u32 val); | 148 | int (*write_config)(struct device_node *dn, int where, int size, u32 val); |
149 | int (*next_error)(struct eeh_pe **pe); | ||
144 | }; | 150 | }; |
145 | 151 | ||
146 | extern struct eeh_ops *eeh_ops; | 152 | extern struct eeh_ops *eeh_ops; |
147 | extern int eeh_subsystem_enabled; | 153 | extern int eeh_subsystem_enabled; |
148 | extern struct mutex eeh_mutex; | 154 | extern raw_spinlock_t confirm_error_lock; |
149 | extern int eeh_probe_mode; | 155 | extern int eeh_probe_mode; |
150 | 156 | ||
151 | #define EEH_PROBE_MODE_DEV (1<<0) /* From PCI device */ | 157 | #define EEH_PROBE_MODE_DEV (1<<0) /* From PCI device */ |
@@ -166,14 +172,14 @@ static inline int eeh_probe_mode_dev(void) | |||
166 | return (eeh_probe_mode == EEH_PROBE_MODE_DEV); | 172 | return (eeh_probe_mode == EEH_PROBE_MODE_DEV); |
167 | } | 173 | } |
168 | 174 | ||
169 | static inline void eeh_lock(void) | 175 | static inline void eeh_serialize_lock(unsigned long *flags) |
170 | { | 176 | { |
171 | mutex_lock(&eeh_mutex); | 177 | raw_spin_lock_irqsave(&confirm_error_lock, *flags); |
172 | } | 178 | } |
173 | 179 | ||
174 | static inline void eeh_unlock(void) | 180 | static inline void eeh_serialize_unlock(unsigned long flags) |
175 | { | 181 | { |
176 | mutex_unlock(&eeh_mutex); | 182 | raw_spin_unlock_irqrestore(&confirm_error_lock, flags); |
177 | } | 183 | } |
178 | 184 | ||
179 | /* | 185 | /* |
@@ -184,8 +190,11 @@ static inline void eeh_unlock(void) | |||
184 | 190 | ||
185 | typedef void *(*eeh_traverse_func)(void *data, void *flag); | 191 | typedef void *(*eeh_traverse_func)(void *data, void *flag); |
186 | int eeh_phb_pe_create(struct pci_controller *phb); | 192 | int eeh_phb_pe_create(struct pci_controller *phb); |
193 | struct eeh_pe *eeh_phb_pe_get(struct pci_controller *phb); | ||
194 | struct eeh_pe *eeh_pe_get(struct eeh_dev *edev); | ||
187 | int eeh_add_to_parent_pe(struct eeh_dev *edev); | 195 | int eeh_add_to_parent_pe(struct eeh_dev *edev); |
188 | int eeh_rmv_from_parent_pe(struct eeh_dev *edev, int purge_pe); | 196 | int eeh_rmv_from_parent_pe(struct eeh_dev *edev, int purge_pe); |
197 | void eeh_pe_update_time_stamp(struct eeh_pe *pe); | ||
189 | void *eeh_pe_dev_traverse(struct eeh_pe *root, | 198 | void *eeh_pe_dev_traverse(struct eeh_pe *root, |
190 | eeh_traverse_func fn, void *flag); | 199 | eeh_traverse_func fn, void *flag); |
191 | void eeh_pe_restore_bars(struct eeh_pe *pe); | 200 | void eeh_pe_restore_bars(struct eeh_pe *pe); |
@@ -193,12 +202,13 @@ struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe); | |||
193 | 202 | ||
194 | void *eeh_dev_init(struct device_node *dn, void *data); | 203 | void *eeh_dev_init(struct device_node *dn, void *data); |
195 | void eeh_dev_phb_init_dynamic(struct pci_controller *phb); | 204 | void eeh_dev_phb_init_dynamic(struct pci_controller *phb); |
205 | int eeh_init(void); | ||
196 | int __init eeh_ops_register(struct eeh_ops *ops); | 206 | int __init eeh_ops_register(struct eeh_ops *ops); |
197 | int __exit eeh_ops_unregister(const char *name); | 207 | int __exit eeh_ops_unregister(const char *name); |
198 | unsigned long eeh_check_failure(const volatile void __iomem *token, | 208 | unsigned long eeh_check_failure(const volatile void __iomem *token, |
199 | unsigned long val); | 209 | unsigned long val); |
200 | int eeh_dev_check_failure(struct eeh_dev *edev); | 210 | int eeh_dev_check_failure(struct eeh_dev *edev); |
201 | void __init eeh_addr_cache_build(void); | 211 | void eeh_addr_cache_build(void); |
202 | void eeh_add_device_tree_early(struct device_node *); | 212 | void eeh_add_device_tree_early(struct device_node *); |
203 | void eeh_add_device_tree_late(struct pci_bus *); | 213 | void eeh_add_device_tree_late(struct pci_bus *); |
204 | void eeh_add_sysfs_files(struct pci_bus *); | 214 | void eeh_add_sysfs_files(struct pci_bus *); |
@@ -221,6 +231,11 @@ void eeh_remove_bus_device(struct pci_dev *, int); | |||
221 | 231 | ||
222 | #else /* !CONFIG_EEH */ | 232 | #else /* !CONFIG_EEH */ |
223 | 233 | ||
234 | static inline int eeh_init(void) | ||
235 | { | ||
236 | return 0; | ||
237 | } | ||
238 | |||
224 | static inline void *eeh_dev_init(struct device_node *dn, void *data) | 239 | static inline void *eeh_dev_init(struct device_node *dn, void *data) |
225 | { | 240 | { |
226 | return NULL; | 241 | return NULL; |
@@ -245,9 +260,6 @@ static inline void eeh_add_sysfs_files(struct pci_bus *bus) { } | |||
245 | 260 | ||
246 | static inline void eeh_remove_bus_device(struct pci_dev *dev, int purge_pe) { } | 261 | static inline void eeh_remove_bus_device(struct pci_dev *dev, int purge_pe) { } |
247 | 262 | ||
248 | static inline void eeh_lock(void) { } | ||
249 | static inline void eeh_unlock(void) { } | ||
250 | |||
251 | #define EEH_POSSIBLE_ERROR(val, type) (0) | 263 | #define EEH_POSSIBLE_ERROR(val, type) (0) |
252 | #define EEH_IO_ERROR_VALUE(size) (-1UL) | 264 | #define EEH_IO_ERROR_VALUE(size) (-1UL) |
253 | #endif /* CONFIG_EEH */ | 265 | #endif /* CONFIG_EEH */ |
diff --git a/arch/powerpc/include/asm/eeh_event.h b/arch/powerpc/include/asm/eeh_event.h index de67d830151b..89d5670b2eeb 100644 --- a/arch/powerpc/include/asm/eeh_event.h +++ b/arch/powerpc/include/asm/eeh_event.h | |||
@@ -31,7 +31,9 @@ struct eeh_event { | |||
31 | struct eeh_pe *pe; /* EEH PE */ | 31 | struct eeh_pe *pe; /* EEH PE */ |
32 | }; | 32 | }; |
33 | 33 | ||
34 | int eeh_event_init(void); | ||
34 | int eeh_send_failure_event(struct eeh_pe *pe); | 35 | int eeh_send_failure_event(struct eeh_pe *pe); |
36 | void eeh_remove_event(struct eeh_pe *pe); | ||
35 | void eeh_handle_event(struct eeh_pe *pe); | 37 | void eeh_handle_event(struct eeh_pe *pe); |
36 | 38 | ||
37 | #endif /* __KERNEL__ */ | 39 | #endif /* __KERNEL__ */ |
diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 46793b58a761..07ca627e52c0 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h | |||
@@ -358,12 +358,12 @@ label##_relon_pSeries: \ | |||
358 | /* No guest interrupts come through here */ \ | 358 | /* No guest interrupts come through here */ \ |
359 | SET_SCRATCH0(r13); /* save r13 */ \ | 359 | SET_SCRATCH0(r13); /* save r13 */ \ |
360 | EXCEPTION_RELON_PROLOG_PSERIES(PACA_EXGEN, label##_common, \ | 360 | EXCEPTION_RELON_PROLOG_PSERIES(PACA_EXGEN, label##_common, \ |
361 | EXC_STD, KVMTEST_PR, vec) | 361 | EXC_STD, NOTEST, vec) |
362 | 362 | ||
363 | #define STD_RELON_EXCEPTION_PSERIES_OOL(vec, label) \ | 363 | #define STD_RELON_EXCEPTION_PSERIES_OOL(vec, label) \ |
364 | .globl label##_relon_pSeries; \ | 364 | .globl label##_relon_pSeries; \ |
365 | label##_relon_pSeries: \ | 365 | label##_relon_pSeries: \ |
366 | EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST_PR, vec); \ | 366 | EXCEPTION_PROLOG_1(PACA_EXGEN, NOTEST, vec); \ |
367 | EXCEPTION_RELON_PROLOG_PSERIES_1(label##_common, EXC_STD) | 367 | EXCEPTION_RELON_PROLOG_PSERIES_1(label##_common, EXC_STD) |
368 | 368 | ||
369 | #define STD_RELON_EXCEPTION_HV(loc, vec, label) \ | 369 | #define STD_RELON_EXCEPTION_HV(loc, vec, label) \ |
@@ -374,12 +374,12 @@ label##_relon_hv: \ | |||
374 | /* No guest interrupts come through here */ \ | 374 | /* No guest interrupts come through here */ \ |
375 | SET_SCRATCH0(r13); /* save r13 */ \ | 375 | SET_SCRATCH0(r13); /* save r13 */ \ |
376 | EXCEPTION_RELON_PROLOG_PSERIES(PACA_EXGEN, label##_common, \ | 376 | EXCEPTION_RELON_PROLOG_PSERIES(PACA_EXGEN, label##_common, \ |
377 | EXC_HV, KVMTEST, vec) | 377 | EXC_HV, NOTEST, vec) |
378 | 378 | ||
379 | #define STD_RELON_EXCEPTION_HV_OOL(vec, label) \ | 379 | #define STD_RELON_EXCEPTION_HV_OOL(vec, label) \ |
380 | .globl label##_relon_hv; \ | 380 | .globl label##_relon_hv; \ |
381 | label##_relon_hv: \ | 381 | label##_relon_hv: \ |
382 | EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST, vec); \ | 382 | EXCEPTION_PROLOG_1(PACA_EXGEN, NOTEST, vec); \ |
383 | EXCEPTION_RELON_PROLOG_PSERIES_1(label##_common, EXC_HV) | 383 | EXCEPTION_RELON_PROLOG_PSERIES_1(label##_common, EXC_HV) |
384 | 384 | ||
385 | /* This associate vector numbers with bits in paca->irq_happened */ | 385 | /* This associate vector numbers with bits in paca->irq_happened */ |
diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h index f2498c8e595d..d750336b171d 100644 --- a/arch/powerpc/include/asm/hugetlb.h +++ b/arch/powerpc/include/asm/hugetlb.h | |||
@@ -191,8 +191,14 @@ static inline void flush_hugetlb_page(struct vm_area_struct *vma, | |||
191 | unsigned long vmaddr) | 191 | unsigned long vmaddr) |
192 | { | 192 | { |
193 | } | 193 | } |
194 | #endif /* CONFIG_HUGETLB_PAGE */ | ||
195 | 194 | ||
195 | #define hugepd_shift(x) 0 | ||
196 | static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr, | ||
197 | unsigned pdshift) | ||
198 | { | ||
199 | return 0; | ||
200 | } | ||
201 | #endif /* CONFIG_HUGETLB_PAGE */ | ||
196 | 202 | ||
197 | /* | 203 | /* |
198 | * FSL Book3E platforms require special gpage handling - the gpages | 204 | * FSL Book3E platforms require special gpage handling - the gpages |
diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index cbfe678e3dbe..c34656a8925e 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h | |||
@@ -76,6 +76,9 @@ struct iommu_table { | |||
76 | struct iommu_pool large_pool; | 76 | struct iommu_pool large_pool; |
77 | struct iommu_pool pools[IOMMU_NR_POOLS]; | 77 | struct iommu_pool pools[IOMMU_NR_POOLS]; |
78 | unsigned long *it_map; /* A simple allocation bitmap for now */ | 78 | unsigned long *it_map; /* A simple allocation bitmap for now */ |
79 | #ifdef CONFIG_IOMMU_API | ||
80 | struct iommu_group *it_group; | ||
81 | #endif | ||
79 | }; | 82 | }; |
80 | 83 | ||
81 | struct scatterlist; | 84 | struct scatterlist; |
@@ -98,6 +101,8 @@ extern void iommu_free_table(struct iommu_table *tbl, const char *node_name); | |||
98 | */ | 101 | */ |
99 | extern struct iommu_table *iommu_init_table(struct iommu_table * tbl, | 102 | extern struct iommu_table *iommu_init_table(struct iommu_table * tbl, |
100 | int nid); | 103 | int nid); |
104 | extern void iommu_register_group(struct iommu_table *tbl, | ||
105 | int pci_domain_number, unsigned long pe_num); | ||
101 | 106 | ||
102 | extern int iommu_map_sg(struct device *dev, struct iommu_table *tbl, | 107 | extern int iommu_map_sg(struct device *dev, struct iommu_table *tbl, |
103 | struct scatterlist *sglist, int nelems, | 108 | struct scatterlist *sglist, int nelems, |
@@ -125,13 +130,6 @@ extern void iommu_init_early_pSeries(void); | |||
125 | extern void iommu_init_early_dart(void); | 130 | extern void iommu_init_early_dart(void); |
126 | extern void iommu_init_early_pasemi(void); | 131 | extern void iommu_init_early_pasemi(void); |
127 | 132 | ||
128 | #ifdef CONFIG_PCI | ||
129 | extern void pci_iommu_init(void); | ||
130 | extern void pci_direct_iommu_init(void); | ||
131 | #else | ||
132 | static inline void pci_iommu_init(void) { } | ||
133 | #endif | ||
134 | |||
135 | extern void alloc_dart_table(void); | 133 | extern void alloc_dart_table(void); |
136 | #if defined(CONFIG_PPC64) && defined(CONFIG_PM) | 134 | #if defined(CONFIG_PPC64) && defined(CONFIG_PM) |
137 | static inline void iommu_save(void) | 135 | static inline void iommu_save(void) |
@@ -147,5 +145,26 @@ static inline void iommu_restore(void) | |||
147 | } | 145 | } |
148 | #endif | 146 | #endif |
149 | 147 | ||
148 | /* The API to support IOMMU operations for VFIO */ | ||
149 | extern int iommu_tce_clear_param_check(struct iommu_table *tbl, | ||
150 | unsigned long ioba, unsigned long tce_value, | ||
151 | unsigned long npages); | ||
152 | extern int iommu_tce_put_param_check(struct iommu_table *tbl, | ||
153 | unsigned long ioba, unsigned long tce); | ||
154 | extern int iommu_tce_build(struct iommu_table *tbl, unsigned long entry, | ||
155 | unsigned long hwaddr, enum dma_data_direction direction); | ||
156 | extern unsigned long iommu_clear_tce(struct iommu_table *tbl, | ||
157 | unsigned long entry); | ||
158 | extern int iommu_clear_tces_and_put_pages(struct iommu_table *tbl, | ||
159 | unsigned long entry, unsigned long pages); | ||
160 | extern int iommu_put_tce_user_mode(struct iommu_table *tbl, | ||
161 | unsigned long entry, unsigned long tce); | ||
162 | |||
163 | extern void iommu_flush_tce(struct iommu_table *tbl); | ||
164 | extern int iommu_take_ownership(struct iommu_table *tbl); | ||
165 | extern void iommu_release_ownership(struct iommu_table *tbl); | ||
166 | |||
167 | extern enum dma_data_direction iommu_tce_direction(unsigned long tce); | ||
168 | |||
150 | #endif /* __KERNEL__ */ | 169 | #endif /* __KERNEL__ */ |
151 | #endif /* _ASM_IOMMU_H */ | 170 | #endif /* _ASM_IOMMU_H */ |
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index 9c1ff330c805..a1ecb14e4442 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h | |||
@@ -159,36 +159,46 @@ static inline int hpte_cache_flags_ok(unsigned long ptel, unsigned long io_type) | |||
159 | } | 159 | } |
160 | 160 | ||
161 | /* | 161 | /* |
162 | * Lock and read a linux PTE. If it's present and writable, atomically | 162 | * If it's present and writable, atomically set dirty and referenced bits and |
163 | * set dirty and referenced bits and return the PTE, otherwise return 0. | 163 | * return the PTE, otherwise return 0. If we find a transparent hugepage |
164 | * and if it is marked splitting we return 0; | ||
164 | */ | 165 | */ |
165 | static inline pte_t kvmppc_read_update_linux_pte(pte_t *p, int writing) | 166 | static inline pte_t kvmppc_read_update_linux_pte(pte_t *ptep, int writing, |
167 | unsigned int hugepage) | ||
166 | { | 168 | { |
167 | pte_t pte, tmp; | 169 | pte_t old_pte, new_pte = __pte(0); |
168 | 170 | ||
169 | /* wait until _PAGE_BUSY is clear then set it atomically */ | 171 | while (1) { |
170 | __asm__ __volatile__ ( | 172 | old_pte = pte_val(*ptep); |
171 | "1: ldarx %0,0,%3\n" | 173 | /* |
172 | " andi. %1,%0,%4\n" | 174 | * wait until _PAGE_BUSY is clear then set it atomically |
173 | " bne- 1b\n" | 175 | */ |
174 | " ori %1,%0,%4\n" | 176 | if (unlikely(old_pte & _PAGE_BUSY)) { |
175 | " stdcx. %1,0,%3\n" | 177 | cpu_relax(); |
176 | " bne- 1b" | 178 | continue; |
177 | : "=&r" (pte), "=&r" (tmp), "=m" (*p) | 179 | } |
178 | : "r" (p), "i" (_PAGE_BUSY) | 180 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
179 | : "cc"); | 181 | /* If hugepage and is trans splitting return None */ |
180 | 182 | if (unlikely(hugepage && | |
181 | if (pte_present(pte)) { | 183 | pmd_trans_splitting(pte_pmd(old_pte)))) |
182 | pte = pte_mkyoung(pte); | 184 | return __pte(0); |
183 | if (writing && pte_write(pte)) | 185 | #endif |
184 | pte = pte_mkdirty(pte); | 186 | /* If pte is not present return None */ |
185 | } | 187 | if (unlikely(!(old_pte & _PAGE_PRESENT))) |
188 | return __pte(0); | ||
186 | 189 | ||
187 | *p = pte; /* clears _PAGE_BUSY */ | 190 | new_pte = pte_mkyoung(old_pte); |
191 | if (writing && pte_write(old_pte)) | ||
192 | new_pte = pte_mkdirty(new_pte); | ||
188 | 193 | ||
189 | return pte; | 194 | if (old_pte == __cmpxchg_u64((unsigned long *)ptep, old_pte, |
195 | new_pte)) | ||
196 | break; | ||
197 | } | ||
198 | return new_pte; | ||
190 | } | 199 | } |
191 | 200 | ||
201 | |||
192 | /* Return HPTE cache control bits corresponding to Linux pte bits */ | 202 | /* Return HPTE cache control bits corresponding to Linux pte bits */ |
193 | static inline unsigned long hpte_cache_bits(unsigned long pte_val) | 203 | static inline unsigned long hpte_cache_bits(unsigned long pte_val) |
194 | { | 204 | { |
diff --git a/arch/powerpc/include/asm/lppaca.h b/arch/powerpc/include/asm/lppaca.h index b1e7f2af1016..9b12f88d4adb 100644 --- a/arch/powerpc/include/asm/lppaca.h +++ b/arch/powerpc/include/asm/lppaca.h | |||
@@ -66,7 +66,8 @@ struct lppaca { | |||
66 | 66 | ||
67 | u8 reserved6[48]; | 67 | u8 reserved6[48]; |
68 | u8 cede_latency_hint; | 68 | u8 cede_latency_hint; |
69 | u8 reserved7[7]; | 69 | u8 ebb_regs_in_use; |
70 | u8 reserved7[6]; | ||
70 | u8 dtl_enable_mask; /* Dispatch Trace Log mask */ | 71 | u8 dtl_enable_mask; /* Dispatch Trace Log mask */ |
71 | u8 donate_dedicated_cpu; /* Donate dedicated CPU cycles */ | 72 | u8 donate_dedicated_cpu; /* Donate dedicated CPU cycles */ |
72 | u8 fpregs_in_use; | 73 | u8 fpregs_in_use; |
diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index 92386fc4e82a..8b480901165a 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h | |||
@@ -36,13 +36,13 @@ struct machdep_calls { | |||
36 | #ifdef CONFIG_PPC64 | 36 | #ifdef CONFIG_PPC64 |
37 | void (*hpte_invalidate)(unsigned long slot, | 37 | void (*hpte_invalidate)(unsigned long slot, |
38 | unsigned long vpn, | 38 | unsigned long vpn, |
39 | int psize, int ssize, | 39 | int bpsize, int apsize, |
40 | int local); | 40 | int ssize, int local); |
41 | long (*hpte_updatepp)(unsigned long slot, | 41 | long (*hpte_updatepp)(unsigned long slot, |
42 | unsigned long newpp, | 42 | unsigned long newpp, |
43 | unsigned long vpn, | 43 | unsigned long vpn, |
44 | int psize, int ssize, | 44 | int bpsize, int apsize, |
45 | int local); | 45 | int ssize, int local); |
46 | void (*hpte_updateboltedpp)(unsigned long newpp, | 46 | void (*hpte_updateboltedpp)(unsigned long newpp, |
47 | unsigned long ea, | 47 | unsigned long ea, |
48 | int psize, int ssize); | 48 | int psize, int ssize); |
@@ -57,6 +57,9 @@ struct machdep_calls { | |||
57 | void (*hpte_removebolted)(unsigned long ea, | 57 | void (*hpte_removebolted)(unsigned long ea, |
58 | int psize, int ssize); | 58 | int psize, int ssize); |
59 | void (*flush_hash_range)(unsigned long number, int local); | 59 | void (*flush_hash_range)(unsigned long number, int local); |
60 | void (*hugepage_invalidate)(struct mm_struct *mm, | ||
61 | unsigned char *hpte_slot_array, | ||
62 | unsigned long addr, int psize); | ||
60 | 63 | ||
61 | /* special for kexec, to be called in real mode, linear mapping is | 64 | /* special for kexec, to be called in real mode, linear mapping is |
62 | * destroyed as well */ | 65 | * destroyed as well */ |
diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h index 2accc9611248..c4cf01197273 100644 --- a/arch/powerpc/include/asm/mmu-hash64.h +++ b/arch/powerpc/include/asm/mmu-hash64.h | |||
@@ -340,6 +340,20 @@ extern int hash_page(unsigned long ea, unsigned long access, unsigned long trap) | |||
340 | int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, | 340 | int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, |
341 | pte_t *ptep, unsigned long trap, int local, int ssize, | 341 | pte_t *ptep, unsigned long trap, int local, int ssize, |
342 | unsigned int shift, unsigned int mmu_psize); | 342 | unsigned int shift, unsigned int mmu_psize); |
343 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
344 | extern int __hash_page_thp(unsigned long ea, unsigned long access, | ||
345 | unsigned long vsid, pmd_t *pmdp, unsigned long trap, | ||
346 | int local, int ssize, unsigned int psize); | ||
347 | #else | ||
348 | static inline int __hash_page_thp(unsigned long ea, unsigned long access, | ||
349 | unsigned long vsid, pmd_t *pmdp, | ||
350 | unsigned long trap, int local, | ||
351 | int ssize, unsigned int psize) | ||
352 | { | ||
353 | BUG(); | ||
354 | return -1; | ||
355 | } | ||
356 | #endif | ||
343 | extern void hash_failure_debug(unsigned long ea, unsigned long access, | 357 | extern void hash_failure_debug(unsigned long ea, unsigned long access, |
344 | unsigned long vsid, unsigned long trap, | 358 | unsigned long vsid, unsigned long trap, |
345 | int ssize, int psize, int lpsize, | 359 | int ssize, int psize, int lpsize, |
diff --git a/arch/powerpc/include/asm/mpc5121.h b/arch/powerpc/include/asm/mpc5121.h index 885c040d6194..8ae133eaf9fa 100644 --- a/arch/powerpc/include/asm/mpc5121.h +++ b/arch/powerpc/include/asm/mpc5121.h | |||
@@ -68,6 +68,5 @@ struct mpc512x_lpc { | |||
68 | }; | 68 | }; |
69 | 69 | ||
70 | int mpc512x_cs_config(unsigned int cs, u32 val); | 70 | int mpc512x_cs_config(unsigned int cs, u32 val); |
71 | int __init mpc5121_clk_init(void); | ||
72 | 71 | ||
73 | #endif /* __ASM_POWERPC_MPC5121_H__ */ | 72 | #endif /* __ASM_POWERPC_MPC5121_H__ */ |
diff --git a/arch/powerpc/include/asm/mpic.h b/arch/powerpc/include/asm/mpic.h index c0f9ef90f0b8..4a1ac9fbf186 100644 --- a/arch/powerpc/include/asm/mpic.h +++ b/arch/powerpc/include/asm/mpic.h | |||
@@ -339,6 +339,8 @@ struct mpic | |||
339 | #endif | 339 | #endif |
340 | }; | 340 | }; |
341 | 341 | ||
342 | extern struct bus_type mpic_subsys; | ||
343 | |||
342 | /* | 344 | /* |
343 | * MPIC flags (passed to mpic_alloc) | 345 | * MPIC flags (passed to mpic_alloc) |
344 | * | 346 | * |
@@ -393,6 +395,9 @@ struct mpic | |||
393 | #define MPIC_REGSET_STANDARD MPIC_REGSET(0) /* Original MPIC */ | 395 | #define MPIC_REGSET_STANDARD MPIC_REGSET(0) /* Original MPIC */ |
394 | #define MPIC_REGSET_TSI108 MPIC_REGSET(1) /* Tsi108/109 PIC */ | 396 | #define MPIC_REGSET_TSI108 MPIC_REGSET(1) /* Tsi108/109 PIC */ |
395 | 397 | ||
398 | /* Get the version of primary MPIC */ | ||
399 | extern u32 fsl_mpic_primary_get_version(void); | ||
400 | |||
396 | /* Allocate the controller structure and setup the linux irq descs | 401 | /* Allocate the controller structure and setup the linux irq descs |
397 | * for the range if interrupts passed in. No HW initialization is | 402 | * for the range if interrupts passed in. No HW initialization is |
398 | * actually performed. | 403 | * actually performed. |
diff --git a/arch/powerpc/include/asm/mpic_timer.h b/arch/powerpc/include/asm/mpic_timer.h new file mode 100644 index 000000000000..0e23cd4ac8aa --- /dev/null +++ b/arch/powerpc/include/asm/mpic_timer.h | |||
@@ -0,0 +1,46 @@ | |||
1 | /* | ||
2 | * arch/powerpc/include/asm/mpic_timer.h | ||
3 | * | ||
4 | * Header file for Mpic Global Timer | ||
5 | * | ||
6 | * Copyright 2013 Freescale Semiconductor, Inc. | ||
7 | * | ||
8 | * Author: Wang Dongsheng <Dongsheng.Wang@freescale.com> | ||
9 | * Li Yang <leoli@freescale.com> | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or modify it | ||
12 | * under the terms of the GNU General Public License as published by the | ||
13 | * Free Software Foundation; either version 2 of the License, or (at your | ||
14 | * option) any later version. | ||
15 | */ | ||
16 | |||
17 | #ifndef __MPIC_TIMER__ | ||
18 | #define __MPIC_TIMER__ | ||
19 | |||
20 | #include <linux/interrupt.h> | ||
21 | #include <linux/time.h> | ||
22 | |||
23 | struct mpic_timer { | ||
24 | void *dev; | ||
25 | struct cascade_priv *cascade_handle; | ||
26 | unsigned int num; | ||
27 | unsigned int irq; | ||
28 | }; | ||
29 | |||
30 | #ifdef CONFIG_MPIC_TIMER | ||
31 | struct mpic_timer *mpic_request_timer(irq_handler_t fn, void *dev, | ||
32 | const struct timeval *time); | ||
33 | void mpic_start_timer(struct mpic_timer *handle); | ||
34 | void mpic_stop_timer(struct mpic_timer *handle); | ||
35 | void mpic_get_remain_time(struct mpic_timer *handle, struct timeval *time); | ||
36 | void mpic_free_timer(struct mpic_timer *handle); | ||
37 | #else | ||
38 | struct mpic_timer *mpic_request_timer(irq_handler_t fn, void *dev, | ||
39 | const struct timeval *time) { return NULL; } | ||
40 | void mpic_start_timer(struct mpic_timer *handle) { } | ||
41 | void mpic_stop_timer(struct mpic_timer *handle) { } | ||
42 | void mpic_get_remain_time(struct mpic_timer *handle, struct timeval *time) { } | ||
43 | void mpic_free_timer(struct mpic_timer *handle) { } | ||
44 | #endif | ||
45 | |||
46 | #endif | ||
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index cbb9305ab15a..029fe85722aa 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h | |||
@@ -117,7 +117,13 @@ extern int opal_enter_rtas(struct rtas_args *args, | |||
117 | #define OPAL_SET_SLOT_LED_STATUS 55 | 117 | #define OPAL_SET_SLOT_LED_STATUS 55 |
118 | #define OPAL_GET_EPOW_STATUS 56 | 118 | #define OPAL_GET_EPOW_STATUS 56 |
119 | #define OPAL_SET_SYSTEM_ATTENTION_LED 57 | 119 | #define OPAL_SET_SYSTEM_ATTENTION_LED 57 |
120 | #define OPAL_RESERVED1 58 | ||
121 | #define OPAL_RESERVED2 59 | ||
122 | #define OPAL_PCI_NEXT_ERROR 60 | ||
123 | #define OPAL_PCI_EEH_FREEZE_STATUS2 61 | ||
124 | #define OPAL_PCI_POLL 62 | ||
120 | #define OPAL_PCI_MSI_EOI 63 | 125 | #define OPAL_PCI_MSI_EOI 63 |
126 | #define OPAL_PCI_GET_PHB_DIAG_DATA2 64 | ||
121 | 127 | ||
122 | #ifndef __ASSEMBLY__ | 128 | #ifndef __ASSEMBLY__ |
123 | 129 | ||
@@ -125,6 +131,7 @@ extern int opal_enter_rtas(struct rtas_args *args, | |||
125 | enum OpalVendorApiTokens { | 131 | enum OpalVendorApiTokens { |
126 | OPAL_START_VENDOR_API_RANGE = 1000, OPAL_END_VENDOR_API_RANGE = 1999 | 132 | OPAL_START_VENDOR_API_RANGE = 1000, OPAL_END_VENDOR_API_RANGE = 1999 |
127 | }; | 133 | }; |
134 | |||
128 | enum OpalFreezeState { | 135 | enum OpalFreezeState { |
129 | OPAL_EEH_STOPPED_NOT_FROZEN = 0, | 136 | OPAL_EEH_STOPPED_NOT_FROZEN = 0, |
130 | OPAL_EEH_STOPPED_MMIO_FREEZE = 1, | 137 | OPAL_EEH_STOPPED_MMIO_FREEZE = 1, |
@@ -134,55 +141,69 @@ enum OpalFreezeState { | |||
134 | OPAL_EEH_STOPPED_TEMP_UNAVAIL = 5, | 141 | OPAL_EEH_STOPPED_TEMP_UNAVAIL = 5, |
135 | OPAL_EEH_STOPPED_PERM_UNAVAIL = 6 | 142 | OPAL_EEH_STOPPED_PERM_UNAVAIL = 6 |
136 | }; | 143 | }; |
144 | |||
137 | enum OpalEehFreezeActionToken { | 145 | enum OpalEehFreezeActionToken { |
138 | OPAL_EEH_ACTION_CLEAR_FREEZE_MMIO = 1, | 146 | OPAL_EEH_ACTION_CLEAR_FREEZE_MMIO = 1, |
139 | OPAL_EEH_ACTION_CLEAR_FREEZE_DMA = 2, | 147 | OPAL_EEH_ACTION_CLEAR_FREEZE_DMA = 2, |
140 | OPAL_EEH_ACTION_CLEAR_FREEZE_ALL = 3 | 148 | OPAL_EEH_ACTION_CLEAR_FREEZE_ALL = 3 |
141 | }; | 149 | }; |
150 | |||
142 | enum OpalPciStatusToken { | 151 | enum OpalPciStatusToken { |
143 | OPAL_EEH_PHB_NO_ERROR = 0, | 152 | OPAL_EEH_NO_ERROR = 0, |
144 | OPAL_EEH_PHB_FATAL = 1, | 153 | OPAL_EEH_IOC_ERROR = 1, |
145 | OPAL_EEH_PHB_RECOVERABLE = 2, | 154 | OPAL_EEH_PHB_ERROR = 2, |
146 | OPAL_EEH_PHB_BUS_ERROR = 3, | 155 | OPAL_EEH_PE_ERROR = 3, |
147 | OPAL_EEH_PCI_NO_DEVSEL = 4, | 156 | OPAL_EEH_PE_MMIO_ERROR = 4, |
148 | OPAL_EEH_PCI_TA = 5, | 157 | OPAL_EEH_PE_DMA_ERROR = 5 |
149 | OPAL_EEH_PCIEX_UR = 6, | ||
150 | OPAL_EEH_PCIEX_CA = 7, | ||
151 | OPAL_EEH_PCI_MMIO_ERROR = 8, | ||
152 | OPAL_EEH_PCI_DMA_ERROR = 9 | ||
153 | }; | 158 | }; |
159 | |||
160 | enum OpalPciErrorSeverity { | ||
161 | OPAL_EEH_SEV_NO_ERROR = 0, | ||
162 | OPAL_EEH_SEV_IOC_DEAD = 1, | ||
163 | OPAL_EEH_SEV_PHB_DEAD = 2, | ||
164 | OPAL_EEH_SEV_PHB_FENCED = 3, | ||
165 | OPAL_EEH_SEV_PE_ER = 4, | ||
166 | OPAL_EEH_SEV_INF = 5 | ||
167 | }; | ||
168 | |||
154 | enum OpalShpcAction { | 169 | enum OpalShpcAction { |
155 | OPAL_SHPC_GET_LINK_STATE = 0, | 170 | OPAL_SHPC_GET_LINK_STATE = 0, |
156 | OPAL_SHPC_GET_SLOT_STATE = 1 | 171 | OPAL_SHPC_GET_SLOT_STATE = 1 |
157 | }; | 172 | }; |
173 | |||
158 | enum OpalShpcLinkState { | 174 | enum OpalShpcLinkState { |
159 | OPAL_SHPC_LINK_DOWN = 0, | 175 | OPAL_SHPC_LINK_DOWN = 0, |
160 | OPAL_SHPC_LINK_UP = 1 | 176 | OPAL_SHPC_LINK_UP = 1 |
161 | }; | 177 | }; |
178 | |||
162 | enum OpalMmioWindowType { | 179 | enum OpalMmioWindowType { |
163 | OPAL_M32_WINDOW_TYPE = 1, | 180 | OPAL_M32_WINDOW_TYPE = 1, |
164 | OPAL_M64_WINDOW_TYPE = 2, | 181 | OPAL_M64_WINDOW_TYPE = 2, |
165 | OPAL_IO_WINDOW_TYPE = 3 | 182 | OPAL_IO_WINDOW_TYPE = 3 |
166 | }; | 183 | }; |
184 | |||
167 | enum OpalShpcSlotState { | 185 | enum OpalShpcSlotState { |
168 | OPAL_SHPC_DEV_NOT_PRESENT = 0, | 186 | OPAL_SHPC_DEV_NOT_PRESENT = 0, |
169 | OPAL_SHPC_DEV_PRESENT = 1 | 187 | OPAL_SHPC_DEV_PRESENT = 1 |
170 | }; | 188 | }; |
189 | |||
171 | enum OpalExceptionHandler { | 190 | enum OpalExceptionHandler { |
172 | OPAL_MACHINE_CHECK_HANDLER = 1, | 191 | OPAL_MACHINE_CHECK_HANDLER = 1, |
173 | OPAL_HYPERVISOR_MAINTENANCE_HANDLER = 2, | 192 | OPAL_HYPERVISOR_MAINTENANCE_HANDLER = 2, |
174 | OPAL_SOFTPATCH_HANDLER = 3 | 193 | OPAL_SOFTPATCH_HANDLER = 3 |
175 | }; | 194 | }; |
195 | |||
176 | enum OpalPendingState { | 196 | enum OpalPendingState { |
177 | OPAL_EVENT_OPAL_INTERNAL = 0x1, | 197 | OPAL_EVENT_OPAL_INTERNAL = 0x1, |
178 | OPAL_EVENT_NVRAM = 0x2, | 198 | OPAL_EVENT_NVRAM = 0x2, |
179 | OPAL_EVENT_RTC = 0x4, | 199 | OPAL_EVENT_RTC = 0x4, |
180 | OPAL_EVENT_CONSOLE_OUTPUT = 0x8, | 200 | OPAL_EVENT_CONSOLE_OUTPUT = 0x8, |
181 | OPAL_EVENT_CONSOLE_INPUT = 0x10, | 201 | OPAL_EVENT_CONSOLE_INPUT = 0x10, |
182 | OPAL_EVENT_ERROR_LOG_AVAIL = 0x20, | 202 | OPAL_EVENT_ERROR_LOG_AVAIL = 0x20, |
183 | OPAL_EVENT_ERROR_LOG = 0x40, | 203 | OPAL_EVENT_ERROR_LOG = 0x40, |
184 | OPAL_EVENT_EPOW = 0x80, | 204 | OPAL_EVENT_EPOW = 0x80, |
185 | OPAL_EVENT_LED_STATUS = 0x100 | 205 | OPAL_EVENT_LED_STATUS = 0x100, |
206 | OPAL_EVENT_PCI_ERROR = 0x200 | ||
186 | }; | 207 | }; |
187 | 208 | ||
188 | /* Machine check related definitions */ | 209 | /* Machine check related definitions */ |
@@ -364,15 +385,80 @@ struct opal_machine_check_event { | |||
364 | } u; | 385 | } u; |
365 | }; | 386 | }; |
366 | 387 | ||
388 | enum { | ||
389 | OPAL_P7IOC_DIAG_TYPE_NONE = 0, | ||
390 | OPAL_P7IOC_DIAG_TYPE_RGC = 1, | ||
391 | OPAL_P7IOC_DIAG_TYPE_BI = 2, | ||
392 | OPAL_P7IOC_DIAG_TYPE_CI = 3, | ||
393 | OPAL_P7IOC_DIAG_TYPE_MISC = 4, | ||
394 | OPAL_P7IOC_DIAG_TYPE_I2C = 5, | ||
395 | OPAL_P7IOC_DIAG_TYPE_LAST = 6 | ||
396 | }; | ||
397 | |||
398 | struct OpalIoP7IOCErrorData { | ||
399 | uint16_t type; | ||
400 | |||
401 | /* GEM */ | ||
402 | uint64_t gemXfir; | ||
403 | uint64_t gemRfir; | ||
404 | uint64_t gemRirqfir; | ||
405 | uint64_t gemMask; | ||
406 | uint64_t gemRwof; | ||
407 | |||
408 | /* LEM */ | ||
409 | uint64_t lemFir; | ||
410 | uint64_t lemErrMask; | ||
411 | uint64_t lemAction0; | ||
412 | uint64_t lemAction1; | ||
413 | uint64_t lemWof; | ||
414 | |||
415 | union { | ||
416 | struct OpalIoP7IOCRgcErrorData { | ||
417 | uint64_t rgcStatus; /* 3E1C10 */ | ||
418 | uint64_t rgcLdcp; /* 3E1C18 */ | ||
419 | }rgc; | ||
420 | struct OpalIoP7IOCBiErrorData { | ||
421 | uint64_t biLdcp0; /* 3C0100, 3C0118 */ | ||
422 | uint64_t biLdcp1; /* 3C0108, 3C0120 */ | ||
423 | uint64_t biLdcp2; /* 3C0110, 3C0128 */ | ||
424 | uint64_t biFenceStatus; /* 3C0130, 3C0130 */ | ||
425 | |||
426 | uint8_t biDownbound; /* BI Downbound or Upbound */ | ||
427 | }bi; | ||
428 | struct OpalIoP7IOCCiErrorData { | ||
429 | uint64_t ciPortStatus; /* 3Dn008 */ | ||
430 | uint64_t ciPortLdcp; /* 3Dn010 */ | ||
431 | |||
432 | uint8_t ciPort; /* Index of CI port: 0/1 */ | ||
433 | }ci; | ||
434 | }; | ||
435 | }; | ||
436 | |||
367 | /** | 437 | /** |
368 | * This structure defines the overlay which will be used to store PHB error | 438 | * This structure defines the overlay which will be used to store PHB error |
369 | * data upon request. | 439 | * data upon request. |
370 | */ | 440 | */ |
371 | enum { | 441 | enum { |
442 | OPAL_PHB_ERROR_DATA_VERSION_1 = 1, | ||
443 | }; | ||
444 | |||
445 | enum { | ||
446 | OPAL_PHB_ERROR_DATA_TYPE_P7IOC = 1, | ||
447 | }; | ||
448 | |||
449 | enum { | ||
372 | OPAL_P7IOC_NUM_PEST_REGS = 128, | 450 | OPAL_P7IOC_NUM_PEST_REGS = 128, |
373 | }; | 451 | }; |
374 | 452 | ||
453 | struct OpalIoPhbErrorCommon { | ||
454 | uint32_t version; | ||
455 | uint32_t ioType; | ||
456 | uint32_t len; | ||
457 | }; | ||
458 | |||
375 | struct OpalIoP7IOCPhbErrorData { | 459 | struct OpalIoP7IOCPhbErrorData { |
460 | struct OpalIoPhbErrorCommon common; | ||
461 | |||
376 | uint32_t brdgCtl; | 462 | uint32_t brdgCtl; |
377 | 463 | ||
378 | // P7IOC utl regs | 464 | // P7IOC utl regs |
@@ -530,14 +616,21 @@ int64_t opal_pci_map_pe_dma_window_real(uint64_t phb_id, uint16_t pe_number, | |||
530 | uint64_t pci_mem_size); | 616 | uint64_t pci_mem_size); |
531 | int64_t opal_pci_reset(uint64_t phb_id, uint8_t reset_scope, uint8_t assert_state); | 617 | int64_t opal_pci_reset(uint64_t phb_id, uint8_t reset_scope, uint8_t assert_state); |
532 | 618 | ||
533 | int64_t opal_pci_get_hub_diag_data(uint64_t hub_id, void *diag_buffer, uint64_t diag_buffer_len); | 619 | int64_t opal_pci_get_hub_diag_data(uint64_t hub_id, void *diag_buffer, |
534 | int64_t opal_pci_get_phb_diag_data(uint64_t phb_id, void *diag_buffer, uint64_t diag_buffer_len); | 620 | uint64_t diag_buffer_len); |
621 | int64_t opal_pci_get_phb_diag_data(uint64_t phb_id, void *diag_buffer, | ||
622 | uint64_t diag_buffer_len); | ||
623 | int64_t opal_pci_get_phb_diag_data2(uint64_t phb_id, void *diag_buffer, | ||
624 | uint64_t diag_buffer_len); | ||
535 | int64_t opal_pci_fence_phb(uint64_t phb_id); | 625 | int64_t opal_pci_fence_phb(uint64_t phb_id); |
536 | int64_t opal_pci_reinit(uint64_t phb_id, uint8_t reinit_scope); | 626 | int64_t opal_pci_reinit(uint64_t phb_id, uint8_t reinit_scope); |
537 | int64_t opal_pci_mask_pe_error(uint64_t phb_id, uint16_t pe_number, uint8_t error_type, uint8_t mask_action); | 627 | int64_t opal_pci_mask_pe_error(uint64_t phb_id, uint16_t pe_number, uint8_t error_type, uint8_t mask_action); |
538 | int64_t opal_set_slot_led_status(uint64_t phb_id, uint64_t slot_id, uint8_t led_type, uint8_t led_action); | 628 | int64_t opal_set_slot_led_status(uint64_t phb_id, uint64_t slot_id, uint8_t led_type, uint8_t led_action); |
539 | int64_t opal_get_epow_status(uint64_t *status); | 629 | int64_t opal_get_epow_status(uint64_t *status); |
540 | int64_t opal_set_system_attention_led(uint8_t led_action); | 630 | int64_t opal_set_system_attention_led(uint8_t led_action); |
631 | int64_t opal_pci_next_error(uint64_t phb_id, uint64_t *first_frozen_pe, | ||
632 | uint16_t *pci_error_type, uint16_t *severity); | ||
633 | int64_t opal_pci_poll(uint64_t phb_id); | ||
541 | 634 | ||
542 | /* Internal functions */ | 635 | /* Internal functions */ |
543 | extern int early_init_dt_scan_opal(unsigned long node, const char *uname, int depth, void *data); | 636 | extern int early_init_dt_scan_opal(unsigned long node, const char *uname, int depth, void *data); |
@@ -551,6 +644,11 @@ extern void hvc_opal_init_early(void); | |||
551 | extern int early_init_dt_scan_opal(unsigned long node, const char *uname, | 644 | extern int early_init_dt_scan_opal(unsigned long node, const char *uname, |
552 | int depth, void *data); | 645 | int depth, void *data); |
553 | 646 | ||
647 | extern int opal_notifier_register(struct notifier_block *nb); | ||
648 | extern void opal_notifier_enable(void); | ||
649 | extern void opal_notifier_disable(void); | ||
650 | extern void opal_notifier_update_evt(uint64_t evt_mask, uint64_t evt_val); | ||
651 | |||
554 | extern int opal_get_chars(uint32_t vtermno, char *buf, int count); | 652 | extern int opal_get_chars(uint32_t vtermno, char *buf, int count); |
555 | extern int opal_put_chars(uint32_t vtermno, const char *buf, int total_len); | 653 | extern int opal_put_chars(uint32_t vtermno, const char *buf, int total_len); |
556 | 654 | ||
diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h index f265049dd7d6..2dd7bfc459be 100644 --- a/arch/powerpc/include/asm/perf_event_server.h +++ b/arch/powerpc/include/asm/perf_event_server.h | |||
@@ -60,6 +60,7 @@ struct power_pmu { | |||
60 | #define PPMU_HAS_SSLOT 0x00000020 /* Has sampled slot in MMCRA */ | 60 | #define PPMU_HAS_SSLOT 0x00000020 /* Has sampled slot in MMCRA */ |
61 | #define PPMU_HAS_SIER 0x00000040 /* Has SIER */ | 61 | #define PPMU_HAS_SIER 0x00000040 /* Has SIER */ |
62 | #define PPMU_BHRB 0x00000080 /* has BHRB feature enabled */ | 62 | #define PPMU_BHRB 0x00000080 /* has BHRB feature enabled */ |
63 | #define PPMU_EBB 0x00000100 /* supports event based branch */ | ||
63 | 64 | ||
64 | /* | 65 | /* |
65 | * Values for flags to get_alternatives() | 66 | * Values for flags to get_alternatives() |
@@ -68,6 +69,11 @@ struct power_pmu { | |||
68 | #define PPMU_LIMITED_PMC_REQD 2 /* have to put this on a limited PMC */ | 69 | #define PPMU_LIMITED_PMC_REQD 2 /* have to put this on a limited PMC */ |
69 | #define PPMU_ONLY_COUNT_RUN 4 /* only counting in run state */ | 70 | #define PPMU_ONLY_COUNT_RUN 4 /* only counting in run state */ |
70 | 71 | ||
72 | /* | ||
73 | * We use the event config bit 63 as a flag to request EBB. | ||
74 | */ | ||
75 | #define EVENT_CONFIG_EBB_SHIFT 63 | ||
76 | |||
71 | extern int register_power_pmu(struct power_pmu *); | 77 | extern int register_power_pmu(struct power_pmu *); |
72 | 78 | ||
73 | struct pt_regs; | 79 | struct pt_regs; |
diff --git a/arch/powerpc/include/asm/pgalloc-64.h b/arch/powerpc/include/asm/pgalloc-64.h index b66ae722a8e9..f65e27b09bd3 100644 --- a/arch/powerpc/include/asm/pgalloc-64.h +++ b/arch/powerpc/include/asm/pgalloc-64.h | |||
@@ -221,17 +221,17 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table, | |||
221 | 221 | ||
222 | static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) | 222 | static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) |
223 | { | 223 | { |
224 | return kmem_cache_alloc(PGT_CACHE(PMD_INDEX_SIZE), | 224 | return kmem_cache_alloc(PGT_CACHE(PMD_CACHE_INDEX), |
225 | GFP_KERNEL|__GFP_REPEAT); | 225 | GFP_KERNEL|__GFP_REPEAT); |
226 | } | 226 | } |
227 | 227 | ||
228 | static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) | 228 | static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) |
229 | { | 229 | { |
230 | kmem_cache_free(PGT_CACHE(PMD_INDEX_SIZE), pmd); | 230 | kmem_cache_free(PGT_CACHE(PMD_CACHE_INDEX), pmd); |
231 | } | 231 | } |
232 | 232 | ||
233 | #define __pmd_free_tlb(tlb, pmd, addr) \ | 233 | #define __pmd_free_tlb(tlb, pmd, addr) \ |
234 | pgtable_free_tlb(tlb, pmd, PMD_INDEX_SIZE) | 234 | pgtable_free_tlb(tlb, pmd, PMD_CACHE_INDEX) |
235 | #ifndef CONFIG_PPC_64K_PAGES | 235 | #ifndef CONFIG_PPC_64K_PAGES |
236 | #define __pud_free_tlb(tlb, pud, addr) \ | 236 | #define __pud_free_tlb(tlb, pud, addr) \ |
237 | pgtable_free_tlb(tlb, pud, PUD_INDEX_SIZE) | 237 | pgtable_free_tlb(tlb, pud, PUD_INDEX_SIZE) |
diff --git a/arch/powerpc/include/asm/pgtable-ppc64-64k.h b/arch/powerpc/include/asm/pgtable-ppc64-64k.h index 45142d640720..a56b82fb0609 100644 --- a/arch/powerpc/include/asm/pgtable-ppc64-64k.h +++ b/arch/powerpc/include/asm/pgtable-ppc64-64k.h | |||
@@ -33,7 +33,8 @@ | |||
33 | #define PGDIR_MASK (~(PGDIR_SIZE-1)) | 33 | #define PGDIR_MASK (~(PGDIR_SIZE-1)) |
34 | 34 | ||
35 | /* Bits to mask out from a PMD to get to the PTE page */ | 35 | /* Bits to mask out from a PMD to get to the PTE page */ |
36 | #define PMD_MASKED_BITS 0x1ff | 36 | /* PMDs point to PTE table fragments which are 4K aligned. */ |
37 | #define PMD_MASKED_BITS 0xfff | ||
37 | /* Bits to mask out from a PGD/PUD to get to the PMD page */ | 38 | /* Bits to mask out from a PGD/PUD to get to the PMD page */ |
38 | #define PUD_MASKED_BITS 0x1ff | 39 | #define PUD_MASKED_BITS 0x1ff |
39 | 40 | ||
diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h index e3d55f6f24fe..46db09414a10 100644 --- a/arch/powerpc/include/asm/pgtable-ppc64.h +++ b/arch/powerpc/include/asm/pgtable-ppc64.h | |||
@@ -10,6 +10,7 @@ | |||
10 | #else | 10 | #else |
11 | #include <asm/pgtable-ppc64-4k.h> | 11 | #include <asm/pgtable-ppc64-4k.h> |
12 | #endif | 12 | #endif |
13 | #include <asm/barrier.h> | ||
13 | 14 | ||
14 | #define FIRST_USER_ADDRESS 0 | 15 | #define FIRST_USER_ADDRESS 0 |
15 | 16 | ||
@@ -20,7 +21,11 @@ | |||
20 | PUD_INDEX_SIZE + PGD_INDEX_SIZE + PAGE_SHIFT) | 21 | PUD_INDEX_SIZE + PGD_INDEX_SIZE + PAGE_SHIFT) |
21 | #define PGTABLE_RANGE (ASM_CONST(1) << PGTABLE_EADDR_SIZE) | 22 | #define PGTABLE_RANGE (ASM_CONST(1) << PGTABLE_EADDR_SIZE) |
22 | 23 | ||
23 | 24 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | |
25 | #define PMD_CACHE_INDEX (PMD_INDEX_SIZE + 1) | ||
26 | #else | ||
27 | #define PMD_CACHE_INDEX PMD_INDEX_SIZE | ||
28 | #endif | ||
24 | /* | 29 | /* |
25 | * Define the address range of the kernel non-linear virtual area | 30 | * Define the address range of the kernel non-linear virtual area |
26 | */ | 31 | */ |
@@ -150,7 +155,7 @@ | |||
150 | #define pmd_present(pmd) (pmd_val(pmd) != 0) | 155 | #define pmd_present(pmd) (pmd_val(pmd) != 0) |
151 | #define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0) | 156 | #define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0) |
152 | #define pmd_page_vaddr(pmd) (pmd_val(pmd) & ~PMD_MASKED_BITS) | 157 | #define pmd_page_vaddr(pmd) (pmd_val(pmd) & ~PMD_MASKED_BITS) |
153 | #define pmd_page(pmd) virt_to_page(pmd_page_vaddr(pmd)) | 158 | extern struct page *pmd_page(pmd_t pmd); |
154 | 159 | ||
155 | #define pud_set(pudp, pudval) (pud_val(*(pudp)) = (pudval)) | 160 | #define pud_set(pudp, pudval) (pud_val(*(pudp)) = (pudval)) |
156 | #define pud_none(pud) (!pud_val(pud)) | 161 | #define pud_none(pud) (!pud_val(pud)) |
@@ -339,43 +344,217 @@ static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry) | |||
339 | 344 | ||
340 | void pgtable_cache_add(unsigned shift, void (*ctor)(void *)); | 345 | void pgtable_cache_add(unsigned shift, void (*ctor)(void *)); |
341 | void pgtable_cache_init(void); | 346 | void pgtable_cache_init(void); |
347 | #endif /* __ASSEMBLY__ */ | ||
348 | |||
349 | /* | ||
350 | * THP pages can't be special. So use the _PAGE_SPECIAL | ||
351 | */ | ||
352 | #define _PAGE_SPLITTING _PAGE_SPECIAL | ||
353 | |||
354 | /* | ||
355 | * We need to differentiate between explicit huge page and THP huge | ||
356 | * page, since THP huge page also need to track real subpage details | ||
357 | */ | ||
358 | #define _PAGE_THP_HUGE _PAGE_4K_PFN | ||
342 | 359 | ||
343 | /* | 360 | /* |
344 | * find_linux_pte returns the address of a linux pte for a given | 361 | * set of bits not changed in pmd_modify. |
345 | * effective address and directory. If not found, it returns zero. | ||
346 | */ | 362 | */ |
347 | static inline pte_t *find_linux_pte(pgd_t *pgdir, unsigned long ea) | 363 | #define _HPAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | \ |
364 | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_SPLITTING | \ | ||
365 | _PAGE_THP_HUGE) | ||
366 | |||
367 | #ifndef __ASSEMBLY__ | ||
368 | /* | ||
369 | * The linux hugepage PMD now include the pmd entries followed by the address | ||
370 | * to the stashed pgtable_t. The stashed pgtable_t contains the hpte bits. | ||
371 | * [ 1 bit secondary | 3 bit hidx | 1 bit valid | 000]. We use one byte per | ||
372 | * each HPTE entry. With 16MB hugepage and 64K HPTE we need 256 entries and | ||
373 | * with 4K HPTE we need 4096 entries. Both will fit in a 4K pgtable_t. | ||
374 | * | ||
375 | * The last three bits are intentionally left to zero. This memory location | ||
376 | * are also used as normal page PTE pointers. So if we have any pointers | ||
377 | * left around while we collapse a hugepage, we need to make sure | ||
378 | * _PAGE_PRESENT and _PAGE_FILE bits of that are zero when we look at them | ||
379 | */ | ||
380 | static inline unsigned int hpte_valid(unsigned char *hpte_slot_array, int index) | ||
348 | { | 381 | { |
349 | pgd_t *pg; | 382 | return (hpte_slot_array[index] >> 3) & 0x1; |
350 | pud_t *pu; | ||
351 | pmd_t *pm; | ||
352 | pte_t *pt = NULL; | ||
353 | |||
354 | pg = pgdir + pgd_index(ea); | ||
355 | if (!pgd_none(*pg)) { | ||
356 | pu = pud_offset(pg, ea); | ||
357 | if (!pud_none(*pu)) { | ||
358 | pm = pmd_offset(pu, ea); | ||
359 | if (pmd_present(*pm)) | ||
360 | pt = pte_offset_kernel(pm, ea); | ||
361 | } | ||
362 | } | ||
363 | return pt; | ||
364 | } | 383 | } |
365 | 384 | ||
366 | #ifdef CONFIG_HUGETLB_PAGE | 385 | static inline unsigned int hpte_hash_index(unsigned char *hpte_slot_array, |
367 | pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, | 386 | int index) |
368 | unsigned *shift); | ||
369 | #else | ||
370 | static inline pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, | ||
371 | unsigned *shift) | ||
372 | { | 387 | { |
373 | if (shift) | 388 | return hpte_slot_array[index] >> 4; |
374 | *shift = 0; | ||
375 | return find_linux_pte(pgdir, ea); | ||
376 | } | 389 | } |
377 | #endif /* !CONFIG_HUGETLB_PAGE */ | ||
378 | 390 | ||
379 | #endif /* __ASSEMBLY__ */ | 391 | static inline void mark_hpte_slot_valid(unsigned char *hpte_slot_array, |
392 | unsigned int index, unsigned int hidx) | ||
393 | { | ||
394 | hpte_slot_array[index] = hidx << 4 | 0x1 << 3; | ||
395 | } | ||
380 | 396 | ||
397 | static inline char *get_hpte_slot_array(pmd_t *pmdp) | ||
398 | { | ||
399 | /* | ||
400 | * The hpte hindex is stored in the pgtable whose address is in the | ||
401 | * second half of the PMD | ||
402 | * | ||
403 | * Order this load with the test for pmd_trans_huge in the caller | ||
404 | */ | ||
405 | smp_rmb(); | ||
406 | return *(char **)(pmdp + PTRS_PER_PMD); | ||
407 | |||
408 | |||
409 | } | ||
410 | |||
411 | extern void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr, | ||
412 | pmd_t *pmdp); | ||
413 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
414 | extern pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot); | ||
415 | extern pmd_t mk_pmd(struct page *page, pgprot_t pgprot); | ||
416 | extern pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot); | ||
417 | extern void set_pmd_at(struct mm_struct *mm, unsigned long addr, | ||
418 | pmd_t *pmdp, pmd_t pmd); | ||
419 | extern void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr, | ||
420 | pmd_t *pmd); | ||
421 | |||
422 | static inline int pmd_trans_huge(pmd_t pmd) | ||
423 | { | ||
424 | /* | ||
425 | * leaf pte for huge page, bottom two bits != 00 | ||
426 | */ | ||
427 | return (pmd_val(pmd) & 0x3) && (pmd_val(pmd) & _PAGE_THP_HUGE); | ||
428 | } | ||
429 | |||
430 | static inline int pmd_large(pmd_t pmd) | ||
431 | { | ||
432 | /* | ||
433 | * leaf pte for huge page, bottom two bits != 00 | ||
434 | */ | ||
435 | if (pmd_trans_huge(pmd)) | ||
436 | return pmd_val(pmd) & _PAGE_PRESENT; | ||
437 | return 0; | ||
438 | } | ||
439 | |||
440 | static inline int pmd_trans_splitting(pmd_t pmd) | ||
441 | { | ||
442 | if (pmd_trans_huge(pmd)) | ||
443 | return pmd_val(pmd) & _PAGE_SPLITTING; | ||
444 | return 0; | ||
445 | } | ||
446 | |||
447 | extern int has_transparent_hugepage(void); | ||
448 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ | ||
449 | |||
450 | static inline pte_t pmd_pte(pmd_t pmd) | ||
451 | { | ||
452 | return __pte(pmd_val(pmd)); | ||
453 | } | ||
454 | |||
455 | static inline pmd_t pte_pmd(pte_t pte) | ||
456 | { | ||
457 | return __pmd(pte_val(pte)); | ||
458 | } | ||
459 | |||
460 | static inline pte_t *pmdp_ptep(pmd_t *pmd) | ||
461 | { | ||
462 | return (pte_t *)pmd; | ||
463 | } | ||
464 | |||
465 | #define pmd_pfn(pmd) pte_pfn(pmd_pte(pmd)) | ||
466 | #define pmd_young(pmd) pte_young(pmd_pte(pmd)) | ||
467 | #define pmd_mkold(pmd) pte_pmd(pte_mkold(pmd_pte(pmd))) | ||
468 | #define pmd_wrprotect(pmd) pte_pmd(pte_wrprotect(pmd_pte(pmd))) | ||
469 | #define pmd_mkdirty(pmd) pte_pmd(pte_mkdirty(pmd_pte(pmd))) | ||
470 | #define pmd_mkyoung(pmd) pte_pmd(pte_mkyoung(pmd_pte(pmd))) | ||
471 | #define pmd_mkwrite(pmd) pte_pmd(pte_mkwrite(pmd_pte(pmd))) | ||
472 | |||
473 | #define __HAVE_ARCH_PMD_WRITE | ||
474 | #define pmd_write(pmd) pte_write(pmd_pte(pmd)) | ||
475 | |||
476 | static inline pmd_t pmd_mkhuge(pmd_t pmd) | ||
477 | { | ||
478 | /* Do nothing, mk_pmd() does this part. */ | ||
479 | return pmd; | ||
480 | } | ||
481 | |||
482 | static inline pmd_t pmd_mknotpresent(pmd_t pmd) | ||
483 | { | ||
484 | pmd_val(pmd) &= ~_PAGE_PRESENT; | ||
485 | return pmd; | ||
486 | } | ||
487 | |||
488 | static inline pmd_t pmd_mksplitting(pmd_t pmd) | ||
489 | { | ||
490 | pmd_val(pmd) |= _PAGE_SPLITTING; | ||
491 | return pmd; | ||
492 | } | ||
493 | |||
494 | #define __HAVE_ARCH_PMD_SAME | ||
495 | static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b) | ||
496 | { | ||
497 | return (((pmd_val(pmd_a) ^ pmd_val(pmd_b)) & ~_PAGE_HPTEFLAGS) == 0); | ||
498 | } | ||
499 | |||
500 | #define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS | ||
501 | extern int pmdp_set_access_flags(struct vm_area_struct *vma, | ||
502 | unsigned long address, pmd_t *pmdp, | ||
503 | pmd_t entry, int dirty); | ||
504 | |||
505 | extern unsigned long pmd_hugepage_update(struct mm_struct *mm, | ||
506 | unsigned long addr, | ||
507 | pmd_t *pmdp, unsigned long clr); | ||
508 | |||
509 | static inline int __pmdp_test_and_clear_young(struct mm_struct *mm, | ||
510 | unsigned long addr, pmd_t *pmdp) | ||
511 | { | ||
512 | unsigned long old; | ||
513 | |||
514 | if ((pmd_val(*pmdp) & (_PAGE_ACCESSED | _PAGE_HASHPTE)) == 0) | ||
515 | return 0; | ||
516 | old = pmd_hugepage_update(mm, addr, pmdp, _PAGE_ACCESSED); | ||
517 | return ((old & _PAGE_ACCESSED) != 0); | ||
518 | } | ||
519 | |||
520 | #define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG | ||
521 | extern int pmdp_test_and_clear_young(struct vm_area_struct *vma, | ||
522 | unsigned long address, pmd_t *pmdp); | ||
523 | #define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH | ||
524 | extern int pmdp_clear_flush_young(struct vm_area_struct *vma, | ||
525 | unsigned long address, pmd_t *pmdp); | ||
526 | |||
527 | #define __HAVE_ARCH_PMDP_GET_AND_CLEAR | ||
528 | extern pmd_t pmdp_get_and_clear(struct mm_struct *mm, | ||
529 | unsigned long addr, pmd_t *pmdp); | ||
530 | |||
531 | #define __HAVE_ARCH_PMDP_CLEAR_FLUSH | ||
532 | extern pmd_t pmdp_clear_flush(struct vm_area_struct *vma, unsigned long address, | ||
533 | pmd_t *pmdp); | ||
534 | |||
535 | #define __HAVE_ARCH_PMDP_SET_WRPROTECT | ||
536 | static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr, | ||
537 | pmd_t *pmdp) | ||
538 | { | ||
539 | |||
540 | if ((pmd_val(*pmdp) & _PAGE_RW) == 0) | ||
541 | return; | ||
542 | |||
543 | pmd_hugepage_update(mm, addr, pmdp, _PAGE_RW); | ||
544 | } | ||
545 | |||
546 | #define __HAVE_ARCH_PMDP_SPLITTING_FLUSH | ||
547 | extern void pmdp_splitting_flush(struct vm_area_struct *vma, | ||
548 | unsigned long address, pmd_t *pmdp); | ||
549 | |||
550 | #define __HAVE_ARCH_PGTABLE_DEPOSIT | ||
551 | extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, | ||
552 | pgtable_t pgtable); | ||
553 | #define __HAVE_ARCH_PGTABLE_WITHDRAW | ||
554 | extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp); | ||
555 | |||
556 | #define __HAVE_ARCH_PMDP_INVALIDATE | ||
557 | extern void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, | ||
558 | pmd_t *pmdp); | ||
559 | #endif /* __ASSEMBLY__ */ | ||
381 | #endif /* _ASM_POWERPC_PGTABLE_PPC64_H_ */ | 560 | #endif /* _ASM_POWERPC_PGTABLE_PPC64_H_ */ |
diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index b6293d26bd39..7d6eacf249cf 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h | |||
@@ -217,6 +217,12 @@ extern int gup_hugepd(hugepd_t *hugepd, unsigned pdshift, unsigned long addr, | |||
217 | 217 | ||
218 | extern int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, | 218 | extern int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, |
219 | unsigned long end, int write, struct page **pages, int *nr); | 219 | unsigned long end, int write, struct page **pages, int *nr); |
220 | #ifndef CONFIG_TRANSPARENT_HUGEPAGE | ||
221 | #define pmd_large(pmd) 0 | ||
222 | #define has_transparent_hugepage() 0 | ||
223 | #endif | ||
224 | pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, | ||
225 | unsigned *shift); | ||
220 | #endif /* __ASSEMBLY__ */ | 226 | #endif /* __ASSEMBLY__ */ |
221 | 227 | ||
222 | #endif /* __KERNEL__ */ | 228 | #endif /* __KERNEL__ */ |
diff --git a/arch/powerpc/include/asm/probes.h b/arch/powerpc/include/asm/probes.h index 5f1e15b68704..3421637cfd7b 100644 --- a/arch/powerpc/include/asm/probes.h +++ b/arch/powerpc/include/asm/probes.h | |||
@@ -38,5 +38,30 @@ typedef u32 ppc_opcode_t; | |||
38 | #define is_trap(instr) (IS_TW(instr) || IS_TWI(instr)) | 38 | #define is_trap(instr) (IS_TW(instr) || IS_TWI(instr)) |
39 | #endif /* CONFIG_PPC64 */ | 39 | #endif /* CONFIG_PPC64 */ |
40 | 40 | ||
41 | #ifdef CONFIG_PPC_ADV_DEBUG_REGS | ||
42 | #define MSR_SINGLESTEP (MSR_DE) | ||
43 | #else | ||
44 | #define MSR_SINGLESTEP (MSR_SE) | ||
45 | #endif | ||
46 | |||
47 | /* Enable single stepping for the current task */ | ||
48 | static inline void enable_single_step(struct pt_regs *regs) | ||
49 | { | ||
50 | regs->msr |= MSR_SINGLESTEP; | ||
51 | #ifdef CONFIG_PPC_ADV_DEBUG_REGS | ||
52 | /* | ||
53 | * We turn off Critical Input Exception(CE) to ensure that the single | ||
54 | * step will be for the instruction we have the probe on; if we don't, | ||
55 | * it is possible we'd get the single step reported for CE. | ||
56 | */ | ||
57 | regs->msr &= ~MSR_CE; | ||
58 | mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) | DBCR0_IC | DBCR0_IDM); | ||
59 | #ifdef CONFIG_PPC_47x | ||
60 | isync(); | ||
61 | #endif | ||
62 | #endif | ||
63 | } | ||
64 | |||
65 | |||
41 | #endif /* __KERNEL__ */ | 66 | #endif /* __KERNEL__ */ |
42 | #endif /* _ASM_POWERPC_PROBES_H */ | 67 | #endif /* _ASM_POWERPC_PROBES_H */ |
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index 14a658363698..47a35b08b963 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h | |||
@@ -168,10 +168,10 @@ struct thread_struct { | |||
168 | * The following help to manage the use of Debug Control Registers | 168 | * The following help to manage the use of Debug Control Registers |
169 | * om the BookE platforms. | 169 | * om the BookE platforms. |
170 | */ | 170 | */ |
171 | unsigned long dbcr0; | 171 | uint32_t dbcr0; |
172 | unsigned long dbcr1; | 172 | uint32_t dbcr1; |
173 | #ifdef CONFIG_BOOKE | 173 | #ifdef CONFIG_BOOKE |
174 | unsigned long dbcr2; | 174 | uint32_t dbcr2; |
175 | #endif | 175 | #endif |
176 | /* | 176 | /* |
177 | * The stored value of the DBSR register will be the value at the | 177 | * The stored value of the DBSR register will be the value at the |
@@ -179,7 +179,7 @@ struct thread_struct { | |||
179 | * user (will never be written to) and has value while helping to | 179 | * user (will never be written to) and has value while helping to |
180 | * describe the reason for the last debug trap. Torez | 180 | * describe the reason for the last debug trap. Torez |
181 | */ | 181 | */ |
182 | unsigned long dbsr; | 182 | uint32_t dbsr; |
183 | /* | 183 | /* |
184 | * The following will contain addresses used by debug applications | 184 | * The following will contain addresses used by debug applications |
185 | * to help trace and trap on particular address locations. | 185 | * to help trace and trap on particular address locations. |
@@ -200,7 +200,7 @@ struct thread_struct { | |||
200 | #endif | 200 | #endif |
201 | #endif | 201 | #endif |
202 | /* FP and VSX 0-31 register set */ | 202 | /* FP and VSX 0-31 register set */ |
203 | double fpr[32][TS_FPRWIDTH]; | 203 | double fpr[32][TS_FPRWIDTH] __attribute__((aligned(16))); |
204 | struct { | 204 | struct { |
205 | 205 | ||
206 | unsigned int pad; | 206 | unsigned int pad; |
@@ -287,9 +287,9 @@ struct thread_struct { | |||
287 | unsigned long siar; | 287 | unsigned long siar; |
288 | unsigned long sdar; | 288 | unsigned long sdar; |
289 | unsigned long sier; | 289 | unsigned long sier; |
290 | unsigned long mmcr0; | ||
291 | unsigned long mmcr2; | 290 | unsigned long mmcr2; |
292 | unsigned long mmcra; | 291 | unsigned mmcr0; |
292 | unsigned used_ebb; | ||
293 | #endif | 293 | #endif |
294 | }; | 294 | }; |
295 | 295 | ||
@@ -404,9 +404,7 @@ static inline void prefetchw(const void *x) | |||
404 | 404 | ||
405 | #define spin_lock_prefetch(x) prefetchw(x) | 405 | #define spin_lock_prefetch(x) prefetchw(x) |
406 | 406 | ||
407 | #ifdef CONFIG_PPC64 | ||
408 | #define HAVE_ARCH_PICK_MMAP_LAYOUT | 407 | #define HAVE_ARCH_PICK_MMAP_LAYOUT |
409 | #endif | ||
410 | 408 | ||
411 | #ifdef CONFIG_PPC64 | 409 | #ifdef CONFIG_PPC64 |
412 | static inline unsigned long get_clean_sp(unsigned long sp, int is_32) | 410 | static inline unsigned long get_clean_sp(unsigned long sp, int is_32) |
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 4a9e408644fe..5d7d9c2a5473 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h | |||
@@ -621,11 +621,15 @@ | |||
621 | #define MMCR0_PMXE 0x04000000UL /* performance monitor exception enable */ | 621 | #define MMCR0_PMXE 0x04000000UL /* performance monitor exception enable */ |
622 | #define MMCR0_FCECE 0x02000000UL /* freeze ctrs on enabled cond or event */ | 622 | #define MMCR0_FCECE 0x02000000UL /* freeze ctrs on enabled cond or event */ |
623 | #define MMCR0_TBEE 0x00400000UL /* time base exception enable */ | 623 | #define MMCR0_TBEE 0x00400000UL /* time base exception enable */ |
624 | #define MMCR0_EBE 0x00100000UL /* Event based branch enable */ | ||
625 | #define MMCR0_PMCC 0x000c0000UL /* PMC control */ | ||
626 | #define MMCR0_PMCC_U6 0x00080000UL /* PMC1-6 are R/W by user (PR) */ | ||
624 | #define MMCR0_PMC1CE 0x00008000UL /* PMC1 count enable*/ | 627 | #define MMCR0_PMC1CE 0x00008000UL /* PMC1 count enable*/ |
625 | #define MMCR0_PMCjCE 0x00004000UL /* PMCj count enable*/ | 628 | #define MMCR0_PMCjCE 0x00004000UL /* PMCj count enable*/ |
626 | #define MMCR0_TRIGGER 0x00002000UL /* TRIGGER enable */ | 629 | #define MMCR0_TRIGGER 0x00002000UL /* TRIGGER enable */ |
627 | #define MMCR0_PMAO 0x00000080UL /* performance monitor alert has occurred, set to 0 after handling exception */ | 630 | #define MMCR0_PMAO 0x00000080UL /* performance monitor alert has occurred, set to 0 after handling exception */ |
628 | #define MMCR0_SHRFC 0x00000040UL /* SHRre freeze conditions between threads */ | 631 | #define MMCR0_SHRFC 0x00000040UL /* SHRre freeze conditions between threads */ |
632 | #define MMCR0_FC56 0x00000010UL /* freeze counters 5 and 6 */ | ||
629 | #define MMCR0_FCTI 0x00000008UL /* freeze counters in tags inactive mode */ | 633 | #define MMCR0_FCTI 0x00000008UL /* freeze counters in tags inactive mode */ |
630 | #define MMCR0_FCTA 0x00000004UL /* freeze counters in tags active mode */ | 634 | #define MMCR0_FCTA 0x00000004UL /* freeze counters in tags active mode */ |
631 | #define MMCR0_FCWAIT 0x00000002UL /* freeze counter in WAIT state */ | 635 | #define MMCR0_FCWAIT 0x00000002UL /* freeze counter in WAIT state */ |
@@ -673,6 +677,11 @@ | |||
673 | #define SIER_SIAR_VALID 0x0400000 /* SIAR contents valid */ | 677 | #define SIER_SIAR_VALID 0x0400000 /* SIAR contents valid */ |
674 | #define SIER_SDAR_VALID 0x0200000 /* SDAR contents valid */ | 678 | #define SIER_SDAR_VALID 0x0200000 /* SDAR contents valid */ |
675 | 679 | ||
680 | /* When EBB is enabled, some of MMCR0/MMCR2/SIER are user accessible */ | ||
681 | #define MMCR0_USER_MASK (MMCR0_FC | MMCR0_PMXE | MMCR0_PMAO) | ||
682 | #define MMCR2_USER_MASK 0x4020100804020000UL /* (FC1P|FC2P|FC3P|FC4P|FC5P|FC6P) */ | ||
683 | #define SIER_USER_MASK 0x7fffffUL | ||
684 | |||
676 | #define SPRN_PA6T_MMCR0 795 | 685 | #define SPRN_PA6T_MMCR0 795 |
677 | #define PA6T_MMCR0_EN0 0x0000000000000001UL | 686 | #define PA6T_MMCR0_EN0 0x0000000000000001UL |
678 | #define PA6T_MMCR0_EN1 0x0000000000000002UL | 687 | #define PA6T_MMCR0_EN1 0x0000000000000002UL |
diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index 34fd70488d83..c7a8bfc9f6f5 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h | |||
@@ -350,8 +350,8 @@ static inline u32 rtas_config_addr(int busno, int devfn, int reg) | |||
350 | (devfn << 8) | (reg & 0xff); | 350 | (devfn << 8) | (reg & 0xff); |
351 | } | 351 | } |
352 | 352 | ||
353 | extern void __cpuinit rtas_give_timebase(void); | 353 | extern void rtas_give_timebase(void); |
354 | extern void __cpuinit rtas_take_timebase(void); | 354 | extern void rtas_take_timebase(void); |
355 | 355 | ||
356 | #ifdef CONFIG_PPC_RTAS | 356 | #ifdef CONFIG_PPC_RTAS |
357 | static inline int page_is_rtas_user_buf(unsigned long pfn) | 357 | static inline int page_is_rtas_user_buf(unsigned long pfn) |
diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h index 200d763a0a67..49a13e0ef234 100644 --- a/arch/powerpc/include/asm/switch_to.h +++ b/arch/powerpc/include/asm/switch_to.h | |||
@@ -67,4 +67,18 @@ static inline void flush_spe_to_thread(struct task_struct *t) | |||
67 | } | 67 | } |
68 | #endif | 68 | #endif |
69 | 69 | ||
70 | static inline void clear_task_ebb(struct task_struct *t) | ||
71 | { | ||
72 | #ifdef CONFIG_PPC_BOOK3S_64 | ||
73 | /* EBB perf events are not inherited, so clear all EBB state. */ | ||
74 | t->thread.bescr = 0; | ||
75 | t->thread.mmcr2 = 0; | ||
76 | t->thread.mmcr0 = 0; | ||
77 | t->thread.siar = 0; | ||
78 | t->thread.sdar = 0; | ||
79 | t->thread.sier = 0; | ||
80 | t->thread.used_ebb = 0; | ||
81 | #endif | ||
82 | } | ||
83 | |||
70 | #endif /* _ASM_POWERPC_SWITCH_TO_H */ | 84 | #endif /* _ASM_POWERPC_SWITCH_TO_H */ |
diff --git a/arch/powerpc/include/asm/tlbflush.h b/arch/powerpc/include/asm/tlbflush.h index 61a59271665b..2def01ed0cb2 100644 --- a/arch/powerpc/include/asm/tlbflush.h +++ b/arch/powerpc/include/asm/tlbflush.h | |||
@@ -165,7 +165,8 @@ static inline void flush_tlb_kernel_range(unsigned long start, | |||
165 | /* Private function for use by PCI IO mapping code */ | 165 | /* Private function for use by PCI IO mapping code */ |
166 | extern void __flush_hash_table_range(struct mm_struct *mm, unsigned long start, | 166 | extern void __flush_hash_table_range(struct mm_struct *mm, unsigned long start, |
167 | unsigned long end); | 167 | unsigned long end); |
168 | 168 | extern void flush_tlb_pmd_range(struct mm_struct *mm, pmd_t *pmd, | |
169 | unsigned long addr); | ||
169 | #else | 170 | #else |
170 | #error Unsupported MMU type | 171 | #error Unsupported MMU type |
171 | #endif | 172 | #endif |
diff --git a/arch/powerpc/include/asm/vdso.h b/arch/powerpc/include/asm/vdso.h index 50f261bc3e95..0d9cecddf8a4 100644 --- a/arch/powerpc/include/asm/vdso.h +++ b/arch/powerpc/include/asm/vdso.h | |||
@@ -22,7 +22,7 @@ extern unsigned long vdso64_rt_sigtramp; | |||
22 | extern unsigned long vdso32_sigtramp; | 22 | extern unsigned long vdso32_sigtramp; |
23 | extern unsigned long vdso32_rt_sigtramp; | 23 | extern unsigned long vdso32_rt_sigtramp; |
24 | 24 | ||
25 | int __cpuinit vdso_getcpu_init(void); | 25 | int vdso_getcpu_init(void); |
26 | 26 | ||
27 | #else /* __ASSEMBLY__ */ | 27 | #else /* __ASSEMBLY__ */ |
28 | 28 | ||
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index f960a7944553..a8619bfe879e 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile | |||
@@ -58,6 +58,8 @@ obj-$(CONFIG_RTAS_PROC) += rtas-proc.o | |||
58 | obj-$(CONFIG_LPARCFG) += lparcfg.o | 58 | obj-$(CONFIG_LPARCFG) += lparcfg.o |
59 | obj-$(CONFIG_IBMVIO) += vio.o | 59 | obj-$(CONFIG_IBMVIO) += vio.o |
60 | obj-$(CONFIG_IBMEBUS) += ibmebus.o | 60 | obj-$(CONFIG_IBMEBUS) += ibmebus.o |
61 | obj-$(CONFIG_EEH) += eeh.o eeh_pe.o eeh_dev.o eeh_cache.o \ | ||
62 | eeh_driver.o eeh_event.o eeh_sysfs.o | ||
61 | obj-$(CONFIG_GENERIC_TBSYNC) += smp-tbsync.o | 63 | obj-$(CONFIG_GENERIC_TBSYNC) += smp-tbsync.o |
62 | obj-$(CONFIG_CRASH_DUMP) += crash_dump.o | 64 | obj-$(CONFIG_CRASH_DUMP) += crash_dump.o |
63 | obj-$(CONFIG_FA_DUMP) += fadump.o | 65 | obj-$(CONFIG_FA_DUMP) += fadump.o |
@@ -100,7 +102,7 @@ obj-$(CONFIG_PPC_UDBG_16550) += legacy_serial.o udbg_16550.o | |||
100 | obj-$(CONFIG_STACKTRACE) += stacktrace.o | 102 | obj-$(CONFIG_STACKTRACE) += stacktrace.o |
101 | obj-$(CONFIG_SWIOTLB) += dma-swiotlb.o | 103 | obj-$(CONFIG_SWIOTLB) += dma-swiotlb.o |
102 | 104 | ||
103 | pci64-$(CONFIG_PPC64) += pci_dn.o isa-bridge.o | 105 | pci64-$(CONFIG_PPC64) += pci_dn.o pci-hotplug.o isa-bridge.o |
104 | obj-$(CONFIG_PCI) += pci_$(CONFIG_WORD_SIZE).o $(pci64-y) \ | 106 | obj-$(CONFIG_PCI) += pci_$(CONFIG_WORD_SIZE).o $(pci64-y) \ |
105 | pci-common.o pci_of_scan.o | 107 | pci-common.o pci_of_scan.o |
106 | obj-$(CONFIG_PCI_MSI) += msi.o | 108 | obj-$(CONFIG_PCI_MSI) += msi.o |
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 6f16ffafa6f0..c7e8afc2ead0 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c | |||
@@ -105,9 +105,6 @@ int main(void) | |||
105 | DEFINE(KSP_VSID, offsetof(struct thread_struct, ksp_vsid)); | 105 | DEFINE(KSP_VSID, offsetof(struct thread_struct, ksp_vsid)); |
106 | #else /* CONFIG_PPC64 */ | 106 | #else /* CONFIG_PPC64 */ |
107 | DEFINE(PGDIR, offsetof(struct thread_struct, pgdir)); | 107 | DEFINE(PGDIR, offsetof(struct thread_struct, pgdir)); |
108 | #if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) | ||
109 | DEFINE(THREAD_DBCR0, offsetof(struct thread_struct, dbcr0)); | ||
110 | #endif | ||
111 | #ifdef CONFIG_SPE | 108 | #ifdef CONFIG_SPE |
112 | DEFINE(THREAD_EVR0, offsetof(struct thread_struct, evr[0])); | 109 | DEFINE(THREAD_EVR0, offsetof(struct thread_struct, evr[0])); |
113 | DEFINE(THREAD_ACC, offsetof(struct thread_struct, acc)); | 110 | DEFINE(THREAD_ACC, offsetof(struct thread_struct, acc)); |
@@ -115,6 +112,9 @@ int main(void) | |||
115 | DEFINE(THREAD_USED_SPE, offsetof(struct thread_struct, used_spe)); | 112 | DEFINE(THREAD_USED_SPE, offsetof(struct thread_struct, used_spe)); |
116 | #endif /* CONFIG_SPE */ | 113 | #endif /* CONFIG_SPE */ |
117 | #endif /* CONFIG_PPC64 */ | 114 | #endif /* CONFIG_PPC64 */ |
115 | #if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) | ||
116 | DEFINE(THREAD_DBCR0, offsetof(struct thread_struct, dbcr0)); | ||
117 | #endif | ||
118 | #ifdef CONFIG_KVM_BOOK3S_32_HANDLER | 118 | #ifdef CONFIG_KVM_BOOK3S_32_HANDLER |
119 | DEFINE(THREAD_KVM_SVCPU, offsetof(struct thread_struct, kvm_shadow_vcpu)); | 119 | DEFINE(THREAD_KVM_SVCPU, offsetof(struct thread_struct, kvm_shadow_vcpu)); |
120 | #endif | 120 | #endif |
@@ -132,7 +132,6 @@ int main(void) | |||
132 | DEFINE(THREAD_SIER, offsetof(struct thread_struct, sier)); | 132 | DEFINE(THREAD_SIER, offsetof(struct thread_struct, sier)); |
133 | DEFINE(THREAD_MMCR0, offsetof(struct thread_struct, mmcr0)); | 133 | DEFINE(THREAD_MMCR0, offsetof(struct thread_struct, mmcr0)); |
134 | DEFINE(THREAD_MMCR2, offsetof(struct thread_struct, mmcr2)); | 134 | DEFINE(THREAD_MMCR2, offsetof(struct thread_struct, mmcr2)); |
135 | DEFINE(THREAD_MMCRA, offsetof(struct thread_struct, mmcra)); | ||
136 | #endif | 135 | #endif |
137 | #ifdef CONFIG_PPC_TRANSACTIONAL_MEM | 136 | #ifdef CONFIG_PPC_TRANSACTIONAL_MEM |
138 | DEFINE(PACATMSCRATCH, offsetof(struct paca_struct, tm_scratch)); | 137 | DEFINE(PACATMSCRATCH, offsetof(struct paca_struct, tm_scratch)); |
diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c index 92c6b008dd2b..9262cf2bec4b 100644 --- a/arch/powerpc/kernel/cacheinfo.c +++ b/arch/powerpc/kernel/cacheinfo.c | |||
@@ -131,7 +131,8 @@ static const char *cache_type_string(const struct cache *cache) | |||
131 | return cache_type_info[cache->type].name; | 131 | return cache_type_info[cache->type].name; |
132 | } | 132 | } |
133 | 133 | ||
134 | static void __cpuinit cache_init(struct cache *cache, int type, int level, struct device_node *ofnode) | 134 | static void cache_init(struct cache *cache, int type, int level, |
135 | struct device_node *ofnode) | ||
135 | { | 136 | { |
136 | cache->type = type; | 137 | cache->type = type; |
137 | cache->level = level; | 138 | cache->level = level; |
@@ -140,7 +141,7 @@ static void __cpuinit cache_init(struct cache *cache, int type, int level, struc | |||
140 | list_add(&cache->list, &cache_list); | 141 | list_add(&cache->list, &cache_list); |
141 | } | 142 | } |
142 | 143 | ||
143 | static struct cache *__cpuinit new_cache(int type, int level, struct device_node *ofnode) | 144 | static struct cache *new_cache(int type, int level, struct device_node *ofnode) |
144 | { | 145 | { |
145 | struct cache *cache; | 146 | struct cache *cache; |
146 | 147 | ||
@@ -324,7 +325,8 @@ static bool cache_node_is_unified(const struct device_node *np) | |||
324 | return of_get_property(np, "cache-unified", NULL); | 325 | return of_get_property(np, "cache-unified", NULL); |
325 | } | 326 | } |
326 | 327 | ||
327 | static struct cache *__cpuinit cache_do_one_devnode_unified(struct device_node *node, int level) | 328 | static struct cache *cache_do_one_devnode_unified(struct device_node *node, |
329 | int level) | ||
328 | { | 330 | { |
329 | struct cache *cache; | 331 | struct cache *cache; |
330 | 332 | ||
@@ -335,7 +337,8 @@ static struct cache *__cpuinit cache_do_one_devnode_unified(struct device_node * | |||
335 | return cache; | 337 | return cache; |
336 | } | 338 | } |
337 | 339 | ||
338 | static struct cache *__cpuinit cache_do_one_devnode_split(struct device_node *node, int level) | 340 | static struct cache *cache_do_one_devnode_split(struct device_node *node, |
341 | int level) | ||
339 | { | 342 | { |
340 | struct cache *dcache, *icache; | 343 | struct cache *dcache, *icache; |
341 | 344 | ||
@@ -357,7 +360,7 @@ err: | |||
357 | return NULL; | 360 | return NULL; |
358 | } | 361 | } |
359 | 362 | ||
360 | static struct cache *__cpuinit cache_do_one_devnode(struct device_node *node, int level) | 363 | static struct cache *cache_do_one_devnode(struct device_node *node, int level) |
361 | { | 364 | { |
362 | struct cache *cache; | 365 | struct cache *cache; |
363 | 366 | ||
@@ -369,7 +372,8 @@ static struct cache *__cpuinit cache_do_one_devnode(struct device_node *node, in | |||
369 | return cache; | 372 | return cache; |
370 | } | 373 | } |
371 | 374 | ||
372 | static struct cache *__cpuinit cache_lookup_or_instantiate(struct device_node *node, int level) | 375 | static struct cache *cache_lookup_or_instantiate(struct device_node *node, |
376 | int level) | ||
373 | { | 377 | { |
374 | struct cache *cache; | 378 | struct cache *cache; |
375 | 379 | ||
@@ -385,7 +389,7 @@ static struct cache *__cpuinit cache_lookup_or_instantiate(struct device_node *n | |||
385 | return cache; | 389 | return cache; |
386 | } | 390 | } |
387 | 391 | ||
388 | static void __cpuinit link_cache_lists(struct cache *smaller, struct cache *bigger) | 392 | static void link_cache_lists(struct cache *smaller, struct cache *bigger) |
389 | { | 393 | { |
390 | while (smaller->next_local) { | 394 | while (smaller->next_local) { |
391 | if (smaller->next_local == bigger) | 395 | if (smaller->next_local == bigger) |
@@ -396,13 +400,13 @@ static void __cpuinit link_cache_lists(struct cache *smaller, struct cache *bigg | |||
396 | smaller->next_local = bigger; | 400 | smaller->next_local = bigger; |
397 | } | 401 | } |
398 | 402 | ||
399 | static void __cpuinit do_subsidiary_caches_debugcheck(struct cache *cache) | 403 | static void do_subsidiary_caches_debugcheck(struct cache *cache) |
400 | { | 404 | { |
401 | WARN_ON_ONCE(cache->level != 1); | 405 | WARN_ON_ONCE(cache->level != 1); |
402 | WARN_ON_ONCE(strcmp(cache->ofnode->type, "cpu")); | 406 | WARN_ON_ONCE(strcmp(cache->ofnode->type, "cpu")); |
403 | } | 407 | } |
404 | 408 | ||
405 | static void __cpuinit do_subsidiary_caches(struct cache *cache) | 409 | static void do_subsidiary_caches(struct cache *cache) |
406 | { | 410 | { |
407 | struct device_node *subcache_node; | 411 | struct device_node *subcache_node; |
408 | int level = cache->level; | 412 | int level = cache->level; |
@@ -423,7 +427,7 @@ static void __cpuinit do_subsidiary_caches(struct cache *cache) | |||
423 | } | 427 | } |
424 | } | 428 | } |
425 | 429 | ||
426 | static struct cache *__cpuinit cache_chain_instantiate(unsigned int cpu_id) | 430 | static struct cache *cache_chain_instantiate(unsigned int cpu_id) |
427 | { | 431 | { |
428 | struct device_node *cpu_node; | 432 | struct device_node *cpu_node; |
429 | struct cache *cpu_cache = NULL; | 433 | struct cache *cpu_cache = NULL; |
@@ -448,7 +452,7 @@ out: | |||
448 | return cpu_cache; | 452 | return cpu_cache; |
449 | } | 453 | } |
450 | 454 | ||
451 | static struct cache_dir *__cpuinit cacheinfo_create_cache_dir(unsigned int cpu_id) | 455 | static struct cache_dir *cacheinfo_create_cache_dir(unsigned int cpu_id) |
452 | { | 456 | { |
453 | struct cache_dir *cache_dir; | 457 | struct cache_dir *cache_dir; |
454 | struct device *dev; | 458 | struct device *dev; |
@@ -653,7 +657,7 @@ static struct kobj_type cache_index_type = { | |||
653 | .default_attrs = cache_index_default_attrs, | 657 | .default_attrs = cache_index_default_attrs, |
654 | }; | 658 | }; |
655 | 659 | ||
656 | static void __cpuinit cacheinfo_create_index_opt_attrs(struct cache_index_dir *dir) | 660 | static void cacheinfo_create_index_opt_attrs(struct cache_index_dir *dir) |
657 | { | 661 | { |
658 | const char *cache_name; | 662 | const char *cache_name; |
659 | const char *cache_type; | 663 | const char *cache_type; |
@@ -696,7 +700,8 @@ static void __cpuinit cacheinfo_create_index_opt_attrs(struct cache_index_dir *d | |||
696 | kfree(buf); | 700 | kfree(buf); |
697 | } | 701 | } |
698 | 702 | ||
699 | static void __cpuinit cacheinfo_create_index_dir(struct cache *cache, int index, struct cache_dir *cache_dir) | 703 | static void cacheinfo_create_index_dir(struct cache *cache, int index, |
704 | struct cache_dir *cache_dir) | ||
700 | { | 705 | { |
701 | struct cache_index_dir *index_dir; | 706 | struct cache_index_dir *index_dir; |
702 | int rc; | 707 | int rc; |
@@ -722,7 +727,8 @@ err: | |||
722 | kfree(index_dir); | 727 | kfree(index_dir); |
723 | } | 728 | } |
724 | 729 | ||
725 | static void __cpuinit cacheinfo_sysfs_populate(unsigned int cpu_id, struct cache *cache_list) | 730 | static void cacheinfo_sysfs_populate(unsigned int cpu_id, |
731 | struct cache *cache_list) | ||
726 | { | 732 | { |
727 | struct cache_dir *cache_dir; | 733 | struct cache_dir *cache_dir; |
728 | struct cache *cache; | 734 | struct cache *cache; |
@@ -740,7 +746,7 @@ static void __cpuinit cacheinfo_sysfs_populate(unsigned int cpu_id, struct cache | |||
740 | } | 746 | } |
741 | } | 747 | } |
742 | 748 | ||
743 | void __cpuinit cacheinfo_cpu_online(unsigned int cpu_id) | 749 | void cacheinfo_cpu_online(unsigned int cpu_id) |
744 | { | 750 | { |
745 | struct cache *cache; | 751 | struct cache *cache; |
746 | 752 | ||
diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/kernel/eeh.c index 6b73d6c44f51..39954fe941b8 100644 --- a/arch/powerpc/platforms/pseries/eeh.c +++ b/arch/powerpc/kernel/eeh.c | |||
@@ -103,11 +103,8 @@ EXPORT_SYMBOL(eeh_subsystem_enabled); | |||
103 | */ | 103 | */ |
104 | int eeh_probe_mode; | 104 | int eeh_probe_mode; |
105 | 105 | ||
106 | /* Global EEH mutex */ | ||
107 | DEFINE_MUTEX(eeh_mutex); | ||
108 | |||
109 | /* Lock to avoid races due to multiple reports of an error */ | 106 | /* Lock to avoid races due to multiple reports of an error */ |
110 | static DEFINE_RAW_SPINLOCK(confirm_error_lock); | 107 | DEFINE_RAW_SPINLOCK(confirm_error_lock); |
111 | 108 | ||
112 | /* Buffer for reporting pci register dumps. Its here in BSS, and | 109 | /* Buffer for reporting pci register dumps. Its here in BSS, and |
113 | * not dynamically alloced, so that it ends up in RMO where RTAS | 110 | * not dynamically alloced, so that it ends up in RMO where RTAS |
@@ -235,16 +232,30 @@ void eeh_slot_error_detail(struct eeh_pe *pe, int severity) | |||
235 | { | 232 | { |
236 | size_t loglen = 0; | 233 | size_t loglen = 0; |
237 | struct eeh_dev *edev; | 234 | struct eeh_dev *edev; |
235 | bool valid_cfg_log = true; | ||
238 | 236 | ||
239 | eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); | 237 | /* |
240 | eeh_ops->configure_bridge(pe); | 238 | * When the PHB is fenced or dead, it's pointless to collect |
241 | eeh_pe_restore_bars(pe); | 239 | * the data from PCI config space because it should return |
242 | 240 | * 0xFF's. For ER, we still retrieve the data from the PCI | |
243 | pci_regs_buf[0] = 0; | 241 | * config space. |
244 | eeh_pe_for_each_dev(pe, edev) { | 242 | */ |
245 | loglen += eeh_gather_pci_data(edev, pci_regs_buf, | 243 | if (eeh_probe_mode_dev() && |
246 | EEH_PCI_REGS_LOG_LEN); | 244 | (pe->type & EEH_PE_PHB) && |
247 | } | 245 | (pe->state & (EEH_PE_ISOLATED | EEH_PE_PHB_DEAD))) |
246 | valid_cfg_log = false; | ||
247 | |||
248 | if (valid_cfg_log) { | ||
249 | eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); | ||
250 | eeh_ops->configure_bridge(pe); | ||
251 | eeh_pe_restore_bars(pe); | ||
252 | |||
253 | pci_regs_buf[0] = 0; | ||
254 | eeh_pe_for_each_dev(pe, edev) { | ||
255 | loglen += eeh_gather_pci_data(edev, pci_regs_buf + loglen, | ||
256 | EEH_PCI_REGS_LOG_LEN - loglen); | ||
257 | } | ||
258 | } | ||
248 | 259 | ||
249 | eeh_ops->get_log(pe, severity, pci_regs_buf, loglen); | 260 | eeh_ops->get_log(pe, severity, pci_regs_buf, loglen); |
250 | } | 261 | } |
@@ -260,15 +271,74 @@ static inline unsigned long eeh_token_to_phys(unsigned long token) | |||
260 | { | 271 | { |
261 | pte_t *ptep; | 272 | pte_t *ptep; |
262 | unsigned long pa; | 273 | unsigned long pa; |
274 | int hugepage_shift; | ||
263 | 275 | ||
264 | ptep = find_linux_pte(init_mm.pgd, token); | 276 | /* |
277 | * We won't find hugepages here, iomem | ||
278 | */ | ||
279 | ptep = find_linux_pte_or_hugepte(init_mm.pgd, token, &hugepage_shift); | ||
265 | if (!ptep) | 280 | if (!ptep) |
266 | return token; | 281 | return token; |
282 | WARN_ON(hugepage_shift); | ||
267 | pa = pte_pfn(*ptep) << PAGE_SHIFT; | 283 | pa = pte_pfn(*ptep) << PAGE_SHIFT; |
268 | 284 | ||
269 | return pa | (token & (PAGE_SIZE-1)); | 285 | return pa | (token & (PAGE_SIZE-1)); |
270 | } | 286 | } |
271 | 287 | ||
288 | /* | ||
289 | * On PowerNV platform, we might already have fenced PHB there. | ||
290 | * For that case, it's meaningless to recover frozen PE. Intead, | ||
291 | * We have to handle fenced PHB firstly. | ||
292 | */ | ||
293 | static int eeh_phb_check_failure(struct eeh_pe *pe) | ||
294 | { | ||
295 | struct eeh_pe *phb_pe; | ||
296 | unsigned long flags; | ||
297 | int ret; | ||
298 | |||
299 | if (!eeh_probe_mode_dev()) | ||
300 | return -EPERM; | ||
301 | |||
302 | /* Find the PHB PE */ | ||
303 | phb_pe = eeh_phb_pe_get(pe->phb); | ||
304 | if (!phb_pe) { | ||
305 | pr_warning("%s Can't find PE for PHB#%d\n", | ||
306 | __func__, pe->phb->global_number); | ||
307 | return -EEXIST; | ||
308 | } | ||
309 | |||
310 | /* If the PHB has been in problematic state */ | ||
311 | eeh_serialize_lock(&flags); | ||
312 | if (phb_pe->state & (EEH_PE_ISOLATED | EEH_PE_PHB_DEAD)) { | ||
313 | ret = 0; | ||
314 | goto out; | ||
315 | } | ||
316 | |||
317 | /* Check PHB state */ | ||
318 | ret = eeh_ops->get_state(phb_pe, NULL); | ||
319 | if ((ret < 0) || | ||
320 | (ret == EEH_STATE_NOT_SUPPORT) || | ||
321 | (ret & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) == | ||
322 | (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) { | ||
323 | ret = 0; | ||
324 | goto out; | ||
325 | } | ||
326 | |||
327 | /* Isolate the PHB and send event */ | ||
328 | eeh_pe_state_mark(phb_pe, EEH_PE_ISOLATED); | ||
329 | eeh_serialize_unlock(flags); | ||
330 | eeh_send_failure_event(phb_pe); | ||
331 | |||
332 | pr_err("EEH: PHB#%x failure detected\n", | ||
333 | phb_pe->phb->global_number); | ||
334 | dump_stack(); | ||
335 | |||
336 | return 1; | ||
337 | out: | ||
338 | eeh_serialize_unlock(flags); | ||
339 | return ret; | ||
340 | } | ||
341 | |||
272 | /** | 342 | /** |
273 | * eeh_dev_check_failure - Check if all 1's data is due to EEH slot freeze | 343 | * eeh_dev_check_failure - Check if all 1's data is due to EEH slot freeze |
274 | * @edev: eeh device | 344 | * @edev: eeh device |
@@ -319,13 +389,21 @@ int eeh_dev_check_failure(struct eeh_dev *edev) | |||
319 | return 0; | 389 | return 0; |
320 | } | 390 | } |
321 | 391 | ||
392 | /* | ||
393 | * On PowerNV platform, we might already have fenced PHB | ||
394 | * there and we need take care of that firstly. | ||
395 | */ | ||
396 | ret = eeh_phb_check_failure(pe); | ||
397 | if (ret > 0) | ||
398 | return ret; | ||
399 | |||
322 | /* If we already have a pending isolation event for this | 400 | /* If we already have a pending isolation event for this |
323 | * slot, we know it's bad already, we don't need to check. | 401 | * slot, we know it's bad already, we don't need to check. |
324 | * Do this checking under a lock; as multiple PCI devices | 402 | * Do this checking under a lock; as multiple PCI devices |
325 | * in one slot might report errors simultaneously, and we | 403 | * in one slot might report errors simultaneously, and we |
326 | * only want one error recovery routine running. | 404 | * only want one error recovery routine running. |
327 | */ | 405 | */ |
328 | raw_spin_lock_irqsave(&confirm_error_lock, flags); | 406 | eeh_serialize_lock(&flags); |
329 | rc = 1; | 407 | rc = 1; |
330 | if (pe->state & EEH_PE_ISOLATED) { | 408 | if (pe->state & EEH_PE_ISOLATED) { |
331 | pe->check_count++; | 409 | pe->check_count++; |
@@ -368,13 +446,13 @@ int eeh_dev_check_failure(struct eeh_dev *edev) | |||
368 | } | 446 | } |
369 | 447 | ||
370 | eeh_stats.slot_resets++; | 448 | eeh_stats.slot_resets++; |
371 | 449 | ||
372 | /* Avoid repeated reports of this failure, including problems | 450 | /* Avoid repeated reports of this failure, including problems |
373 | * with other functions on this device, and functions under | 451 | * with other functions on this device, and functions under |
374 | * bridges. | 452 | * bridges. |
375 | */ | 453 | */ |
376 | eeh_pe_state_mark(pe, EEH_PE_ISOLATED); | 454 | eeh_pe_state_mark(pe, EEH_PE_ISOLATED); |
377 | raw_spin_unlock_irqrestore(&confirm_error_lock, flags); | 455 | eeh_serialize_unlock(flags); |
378 | 456 | ||
379 | eeh_send_failure_event(pe); | 457 | eeh_send_failure_event(pe); |
380 | 458 | ||
@@ -382,11 +460,14 @@ int eeh_dev_check_failure(struct eeh_dev *edev) | |||
382 | * a stack trace will help the device-driver authors figure | 460 | * a stack trace will help the device-driver authors figure |
383 | * out what happened. So print that out. | 461 | * out what happened. So print that out. |
384 | */ | 462 | */ |
385 | WARN(1, "EEH: failure detected\n"); | 463 | pr_err("EEH: Frozen PE#%x detected on PHB#%x\n", |
464 | pe->addr, pe->phb->global_number); | ||
465 | dump_stack(); | ||
466 | |||
386 | return 1; | 467 | return 1; |
387 | 468 | ||
388 | dn_unlock: | 469 | dn_unlock: |
389 | raw_spin_unlock_irqrestore(&confirm_error_lock, flags); | 470 | eeh_serialize_unlock(flags); |
390 | return rc; | 471 | return rc; |
391 | } | 472 | } |
392 | 473 | ||
@@ -525,7 +606,7 @@ static void eeh_reset_pe_once(struct eeh_pe *pe) | |||
525 | * or a fundamental reset (3). | 606 | * or a fundamental reset (3). |
526 | * A fundamental reset required by any device under | 607 | * A fundamental reset required by any device under |
527 | * Partitionable Endpoint trumps hot-reset. | 608 | * Partitionable Endpoint trumps hot-reset. |
528 | */ | 609 | */ |
529 | eeh_pe_dev_traverse(pe, eeh_set_dev_freset, &freset); | 610 | eeh_pe_dev_traverse(pe, eeh_set_dev_freset, &freset); |
530 | 611 | ||
531 | if (freset) | 612 | if (freset) |
@@ -538,8 +619,8 @@ static void eeh_reset_pe_once(struct eeh_pe *pe) | |||
538 | */ | 619 | */ |
539 | #define PCI_BUS_RST_HOLD_TIME_MSEC 250 | 620 | #define PCI_BUS_RST_HOLD_TIME_MSEC 250 |
540 | msleep(PCI_BUS_RST_HOLD_TIME_MSEC); | 621 | msleep(PCI_BUS_RST_HOLD_TIME_MSEC); |
541 | 622 | ||
542 | /* We might get hit with another EEH freeze as soon as the | 623 | /* We might get hit with another EEH freeze as soon as the |
543 | * pci slot reset line is dropped. Make sure we don't miss | 624 | * pci slot reset line is dropped. Make sure we don't miss |
544 | * these, and clear the flag now. | 625 | * these, and clear the flag now. |
545 | */ | 626 | */ |
@@ -565,6 +646,7 @@ static void eeh_reset_pe_once(struct eeh_pe *pe) | |||
565 | */ | 646 | */ |
566 | int eeh_reset_pe(struct eeh_pe *pe) | 647 | int eeh_reset_pe(struct eeh_pe *pe) |
567 | { | 648 | { |
649 | int flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); | ||
568 | int i, rc; | 650 | int i, rc; |
569 | 651 | ||
570 | /* Take three shots at resetting the bus */ | 652 | /* Take three shots at resetting the bus */ |
@@ -572,7 +654,7 @@ int eeh_reset_pe(struct eeh_pe *pe) | |||
572 | eeh_reset_pe_once(pe); | 654 | eeh_reset_pe_once(pe); |
573 | 655 | ||
574 | rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); | 656 | rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); |
575 | if (rc == (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) | 657 | if ((rc & flags) == flags) |
576 | return 0; | 658 | return 0; |
577 | 659 | ||
578 | if (rc < 0) { | 660 | if (rc < 0) { |
@@ -604,7 +686,7 @@ void eeh_save_bars(struct eeh_dev *edev) | |||
604 | if (!edev) | 686 | if (!edev) |
605 | return; | 687 | return; |
606 | dn = eeh_dev_to_of_node(edev); | 688 | dn = eeh_dev_to_of_node(edev); |
607 | 689 | ||
608 | for (i = 0; i < 16; i++) | 690 | for (i = 0; i < 16; i++) |
609 | eeh_ops->read_config(dn, i * 4, 4, &edev->config_space[i]); | 691 | eeh_ops->read_config(dn, i * 4, 4, &edev->config_space[i]); |
610 | } | 692 | } |
@@ -674,11 +756,21 @@ int __exit eeh_ops_unregister(const char *name) | |||
674 | * Even if force-off is set, the EEH hardware is still enabled, so that | 756 | * Even if force-off is set, the EEH hardware is still enabled, so that |
675 | * newer systems can boot. | 757 | * newer systems can boot. |
676 | */ | 758 | */ |
677 | static int __init eeh_init(void) | 759 | int eeh_init(void) |
678 | { | 760 | { |
679 | struct pci_controller *hose, *tmp; | 761 | struct pci_controller *hose, *tmp; |
680 | struct device_node *phb; | 762 | struct device_node *phb; |
681 | int ret; | 763 | static int cnt = 0; |
764 | int ret = 0; | ||
765 | |||
766 | /* | ||
767 | * We have to delay the initialization on PowerNV after | ||
768 | * the PCI hierarchy tree has been built because the PEs | ||
769 | * are figured out based on PCI devices instead of device | ||
770 | * tree nodes | ||
771 | */ | ||
772 | if (machine_is(powernv) && cnt++ <= 0) | ||
773 | return ret; | ||
682 | 774 | ||
683 | /* call platform initialization function */ | 775 | /* call platform initialization function */ |
684 | if (!eeh_ops) { | 776 | if (!eeh_ops) { |
@@ -691,7 +783,10 @@ static int __init eeh_init(void) | |||
691 | return ret; | 783 | return ret; |
692 | } | 784 | } |
693 | 785 | ||
694 | raw_spin_lock_init(&confirm_error_lock); | 786 | /* Initialize EEH event */ |
787 | ret = eeh_event_init(); | ||
788 | if (ret) | ||
789 | return ret; | ||
695 | 790 | ||
696 | /* Enable EEH for all adapters */ | 791 | /* Enable EEH for all adapters */ |
697 | if (eeh_probe_mode_devtree()) { | 792 | if (eeh_probe_mode_devtree()) { |
@@ -700,6 +795,25 @@ static int __init eeh_init(void) | |||
700 | phb = hose->dn; | 795 | phb = hose->dn; |
701 | traverse_pci_devices(phb, eeh_ops->of_probe, NULL); | 796 | traverse_pci_devices(phb, eeh_ops->of_probe, NULL); |
702 | } | 797 | } |
798 | } else if (eeh_probe_mode_dev()) { | ||
799 | list_for_each_entry_safe(hose, tmp, | ||
800 | &hose_list, list_node) | ||
801 | pci_walk_bus(hose->bus, eeh_ops->dev_probe, NULL); | ||
802 | } else { | ||
803 | pr_warning("%s: Invalid probe mode %d\n", | ||
804 | __func__, eeh_probe_mode); | ||
805 | return -EINVAL; | ||
806 | } | ||
807 | |||
808 | /* | ||
809 | * Call platform post-initialization. Actually, It's good chance | ||
810 | * to inform platform that EEH is ready to supply service if the | ||
811 | * I/O cache stuff has been built up. | ||
812 | */ | ||
813 | if (eeh_ops->post_init) { | ||
814 | ret = eeh_ops->post_init(); | ||
815 | if (ret) | ||
816 | return ret; | ||
703 | } | 817 | } |
704 | 818 | ||
705 | if (eeh_subsystem_enabled) | 819 | if (eeh_subsystem_enabled) |
@@ -728,6 +842,14 @@ static void eeh_add_device_early(struct device_node *dn) | |||
728 | { | 842 | { |
729 | struct pci_controller *phb; | 843 | struct pci_controller *phb; |
730 | 844 | ||
845 | /* | ||
846 | * If we're doing EEH probe based on PCI device, we | ||
847 | * would delay the probe until late stage because | ||
848 | * the PCI device isn't available this moment. | ||
849 | */ | ||
850 | if (!eeh_probe_mode_devtree()) | ||
851 | return; | ||
852 | |||
731 | if (!of_node_to_eeh_dev(dn)) | 853 | if (!of_node_to_eeh_dev(dn)) |
732 | return; | 854 | return; |
733 | phb = of_node_to_eeh_dev(dn)->phb; | 855 | phb = of_node_to_eeh_dev(dn)->phb; |
@@ -736,7 +858,6 @@ static void eeh_add_device_early(struct device_node *dn) | |||
736 | if (NULL == phb || 0 == phb->buid) | 858 | if (NULL == phb || 0 == phb->buid) |
737 | return; | 859 | return; |
738 | 860 | ||
739 | /* FIXME: hotplug support on POWERNV */ | ||
740 | eeh_ops->of_probe(dn, NULL); | 861 | eeh_ops->of_probe(dn, NULL); |
741 | } | 862 | } |
742 | 863 | ||
@@ -787,6 +908,13 @@ static void eeh_add_device_late(struct pci_dev *dev) | |||
787 | edev->pdev = dev; | 908 | edev->pdev = dev; |
788 | dev->dev.archdata.edev = edev; | 909 | dev->dev.archdata.edev = edev; |
789 | 910 | ||
911 | /* | ||
912 | * We have to do the EEH probe here because the PCI device | ||
913 | * hasn't been created yet in the early stage. | ||
914 | */ | ||
915 | if (eeh_probe_mode_dev()) | ||
916 | eeh_ops->dev_probe(dev, NULL); | ||
917 | |||
790 | eeh_addr_cache_insert_dev(dev); | 918 | eeh_addr_cache_insert_dev(dev); |
791 | } | 919 | } |
792 | 920 | ||
@@ -803,12 +931,12 @@ void eeh_add_device_tree_late(struct pci_bus *bus) | |||
803 | struct pci_dev *dev; | 931 | struct pci_dev *dev; |
804 | 932 | ||
805 | list_for_each_entry(dev, &bus->devices, bus_list) { | 933 | list_for_each_entry(dev, &bus->devices, bus_list) { |
806 | eeh_add_device_late(dev); | 934 | eeh_add_device_late(dev); |
807 | if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { | 935 | if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { |
808 | struct pci_bus *subbus = dev->subordinate; | 936 | struct pci_bus *subbus = dev->subordinate; |
809 | if (subbus) | 937 | if (subbus) |
810 | eeh_add_device_tree_late(subbus); | 938 | eeh_add_device_tree_late(subbus); |
811 | } | 939 | } |
812 | } | 940 | } |
813 | } | 941 | } |
814 | EXPORT_SYMBOL_GPL(eeh_add_device_tree_late); | 942 | EXPORT_SYMBOL_GPL(eeh_add_device_tree_late); |
diff --git a/arch/powerpc/platforms/pseries/eeh_cache.c b/arch/powerpc/kernel/eeh_cache.c index 5ce3ba7ad137..f9ac1232a746 100644 --- a/arch/powerpc/platforms/pseries/eeh_cache.c +++ b/arch/powerpc/kernel/eeh_cache.c | |||
@@ -194,7 +194,7 @@ static void __eeh_addr_cache_insert_dev(struct pci_dev *dev) | |||
194 | } | 194 | } |
195 | 195 | ||
196 | /* Skip any devices for which EEH is not enabled. */ | 196 | /* Skip any devices for which EEH is not enabled. */ |
197 | if (!edev->pe) { | 197 | if (!eeh_probe_mode_dev() && !edev->pe) { |
198 | #ifdef DEBUG | 198 | #ifdef DEBUG |
199 | pr_info("PCI: skip building address cache for=%s - %s\n", | 199 | pr_info("PCI: skip building address cache for=%s - %s\n", |
200 | pci_name(dev), dn->full_name); | 200 | pci_name(dev), dn->full_name); |
@@ -285,7 +285,7 @@ void eeh_addr_cache_rmv_dev(struct pci_dev *dev) | |||
285 | * Must be run late in boot process, after the pci controllers | 285 | * Must be run late in boot process, after the pci controllers |
286 | * have been scanned for devices (after all device resources are known). | 286 | * have been scanned for devices (after all device resources are known). |
287 | */ | 287 | */ |
288 | void __init eeh_addr_cache_build(void) | 288 | void eeh_addr_cache_build(void) |
289 | { | 289 | { |
290 | struct device_node *dn; | 290 | struct device_node *dn; |
291 | struct eeh_dev *edev; | 291 | struct eeh_dev *edev; |
@@ -316,4 +316,3 @@ void __init eeh_addr_cache_build(void) | |||
316 | eeh_addr_cache_print(&pci_io_addr_cache_root); | 316 | eeh_addr_cache_print(&pci_io_addr_cache_root); |
317 | #endif | 317 | #endif |
318 | } | 318 | } |
319 | |||
diff --git a/arch/powerpc/platforms/pseries/eeh_dev.c b/arch/powerpc/kernel/eeh_dev.c index 1efa28f5fc54..1efa28f5fc54 100644 --- a/arch/powerpc/platforms/pseries/eeh_dev.c +++ b/arch/powerpc/kernel/eeh_dev.c | |||
diff --git a/arch/powerpc/platforms/pseries/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index a3fefb61097c..2b1ce17cae50 100644 --- a/arch/powerpc/platforms/pseries/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c | |||
@@ -154,9 +154,9 @@ static void eeh_enable_irq(struct pci_dev *dev) | |||
154 | * eeh_report_error - Report pci error to each device driver | 154 | * eeh_report_error - Report pci error to each device driver |
155 | * @data: eeh device | 155 | * @data: eeh device |
156 | * @userdata: return value | 156 | * @userdata: return value |
157 | * | 157 | * |
158 | * Report an EEH error to each device driver, collect up and | 158 | * Report an EEH error to each device driver, collect up and |
159 | * merge the device driver responses. Cumulative response | 159 | * merge the device driver responses. Cumulative response |
160 | * passed back in "userdata". | 160 | * passed back in "userdata". |
161 | */ | 161 | */ |
162 | static void *eeh_report_error(void *data, void *userdata) | 162 | static void *eeh_report_error(void *data, void *userdata) |
@@ -349,10 +349,12 @@ static void *eeh_report_failure(void *data, void *userdata) | |||
349 | */ | 349 | */ |
350 | static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus) | 350 | static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus) |
351 | { | 351 | { |
352 | struct timeval tstamp; | ||
352 | int cnt, rc; | 353 | int cnt, rc; |
353 | 354 | ||
354 | /* pcibios will clear the counter; save the value */ | 355 | /* pcibios will clear the counter; save the value */ |
355 | cnt = pe->freeze_count; | 356 | cnt = pe->freeze_count; |
357 | tstamp = pe->tstamp; | ||
356 | 358 | ||
357 | /* | 359 | /* |
358 | * We don't remove the corresponding PE instances because | 360 | * We don't remove the corresponding PE instances because |
@@ -376,15 +378,17 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus) | |||
376 | eeh_pe_restore_bars(pe); | 378 | eeh_pe_restore_bars(pe); |
377 | 379 | ||
378 | /* Give the system 5 seconds to finish running the user-space | 380 | /* Give the system 5 seconds to finish running the user-space |
379 | * hotplug shutdown scripts, e.g. ifdown for ethernet. Yes, | 381 | * hotplug shutdown scripts, e.g. ifdown for ethernet. Yes, |
380 | * this is a hack, but if we don't do this, and try to bring | 382 | * this is a hack, but if we don't do this, and try to bring |
381 | * the device up before the scripts have taken it down, | 383 | * the device up before the scripts have taken it down, |
382 | * potentially weird things happen. | 384 | * potentially weird things happen. |
383 | */ | 385 | */ |
384 | if (bus) { | 386 | if (bus) { |
385 | ssleep(5); | 387 | ssleep(5); |
386 | pcibios_add_pci_devices(bus); | 388 | pcibios_add_pci_devices(bus); |
387 | } | 389 | } |
390 | |||
391 | pe->tstamp = tstamp; | ||
388 | pe->freeze_count = cnt; | 392 | pe->freeze_count = cnt; |
389 | 393 | ||
390 | return 0; | 394 | return 0; |
@@ -395,24 +399,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus) | |||
395 | */ | 399 | */ |
396 | #define MAX_WAIT_FOR_RECOVERY 150 | 400 | #define MAX_WAIT_FOR_RECOVERY 150 |
397 | 401 | ||
398 | /** | 402 | static void eeh_handle_normal_event(struct eeh_pe *pe) |
399 | * eeh_handle_event - Reset a PCI device after hard lockup. | ||
400 | * @pe: EEH PE | ||
401 | * | ||
402 | * While PHB detects address or data parity errors on particular PCI | ||
403 | * slot, the associated PE will be frozen. Besides, DMA's occurring | ||
404 | * to wild addresses (which usually happen due to bugs in device | ||
405 | * drivers or in PCI adapter firmware) can cause EEH error. #SERR, | ||
406 | * #PERR or other misc PCI-related errors also can trigger EEH errors. | ||
407 | * | ||
408 | * Recovery process consists of unplugging the device driver (which | ||
409 | * generated hotplug events to userspace), then issuing a PCI #RST to | ||
410 | * the device, then reconfiguring the PCI config space for all bridges | ||
411 | * & devices under this slot, and then finally restarting the device | ||
412 | * drivers (which cause a second set of hotplug events to go out to | ||
413 | * userspace). | ||
414 | */ | ||
415 | void eeh_handle_event(struct eeh_pe *pe) | ||
416 | { | 403 | { |
417 | struct pci_bus *frozen_bus; | 404 | struct pci_bus *frozen_bus; |
418 | int rc = 0; | 405 | int rc = 0; |
@@ -425,6 +412,7 @@ void eeh_handle_event(struct eeh_pe *pe) | |||
425 | return; | 412 | return; |
426 | } | 413 | } |
427 | 414 | ||
415 | eeh_pe_update_time_stamp(pe); | ||
428 | pe->freeze_count++; | 416 | pe->freeze_count++; |
429 | if (pe->freeze_count > EEH_MAX_ALLOWED_FREEZES) | 417 | if (pe->freeze_count > EEH_MAX_ALLOWED_FREEZES) |
430 | goto excess_failures; | 418 | goto excess_failures; |
@@ -437,6 +425,7 @@ void eeh_handle_event(struct eeh_pe *pe) | |||
437 | * status ... if any child can't handle the reset, then the entire | 425 | * status ... if any child can't handle the reset, then the entire |
438 | * slot is dlpar removed and added. | 426 | * slot is dlpar removed and added. |
439 | */ | 427 | */ |
428 | pr_info("EEH: Notify device drivers to shutdown\n"); | ||
440 | eeh_pe_dev_traverse(pe, eeh_report_error, &result); | 429 | eeh_pe_dev_traverse(pe, eeh_report_error, &result); |
441 | 430 | ||
442 | /* Get the current PCI slot state. This can take a long time, | 431 | /* Get the current PCI slot state. This can take a long time, |
@@ -444,7 +433,7 @@ void eeh_handle_event(struct eeh_pe *pe) | |||
444 | */ | 433 | */ |
445 | rc = eeh_ops->wait_state(pe, MAX_WAIT_FOR_RECOVERY*1000); | 434 | rc = eeh_ops->wait_state(pe, MAX_WAIT_FOR_RECOVERY*1000); |
446 | if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) { | 435 | if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) { |
447 | printk(KERN_WARNING "EEH: Permanent failure\n"); | 436 | pr_warning("EEH: Permanent failure\n"); |
448 | goto hard_fail; | 437 | goto hard_fail; |
449 | } | 438 | } |
450 | 439 | ||
@@ -452,6 +441,7 @@ void eeh_handle_event(struct eeh_pe *pe) | |||
452 | * don't post the error log until after all dev drivers | 441 | * don't post the error log until after all dev drivers |
453 | * have been informed. | 442 | * have been informed. |
454 | */ | 443 | */ |
444 | pr_info("EEH: Collect temporary log\n"); | ||
455 | eeh_slot_error_detail(pe, EEH_LOG_TEMP); | 445 | eeh_slot_error_detail(pe, EEH_LOG_TEMP); |
456 | 446 | ||
457 | /* If all device drivers were EEH-unaware, then shut | 447 | /* If all device drivers were EEH-unaware, then shut |
@@ -459,15 +449,18 @@ void eeh_handle_event(struct eeh_pe *pe) | |||
459 | * go down willingly, without panicing the system. | 449 | * go down willingly, without panicing the system. |
460 | */ | 450 | */ |
461 | if (result == PCI_ERS_RESULT_NONE) { | 451 | if (result == PCI_ERS_RESULT_NONE) { |
452 | pr_info("EEH: Reset with hotplug activity\n"); | ||
462 | rc = eeh_reset_device(pe, frozen_bus); | 453 | rc = eeh_reset_device(pe, frozen_bus); |
463 | if (rc) { | 454 | if (rc) { |
464 | printk(KERN_WARNING "EEH: Unable to reset, rc=%d\n", rc); | 455 | pr_warning("%s: Unable to reset, err=%d\n", |
456 | __func__, rc); | ||
465 | goto hard_fail; | 457 | goto hard_fail; |
466 | } | 458 | } |
467 | } | 459 | } |
468 | 460 | ||
469 | /* If all devices reported they can proceed, then re-enable MMIO */ | 461 | /* If all devices reported they can proceed, then re-enable MMIO */ |
470 | if (result == PCI_ERS_RESULT_CAN_RECOVER) { | 462 | if (result == PCI_ERS_RESULT_CAN_RECOVER) { |
463 | pr_info("EEH: Enable I/O for affected devices\n"); | ||
471 | rc = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); | 464 | rc = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); |
472 | 465 | ||
473 | if (rc < 0) | 466 | if (rc < 0) |
@@ -475,6 +468,7 @@ void eeh_handle_event(struct eeh_pe *pe) | |||
475 | if (rc) { | 468 | if (rc) { |
476 | result = PCI_ERS_RESULT_NEED_RESET; | 469 | result = PCI_ERS_RESULT_NEED_RESET; |
477 | } else { | 470 | } else { |
471 | pr_info("EEH: Notify device drivers to resume I/O\n"); | ||
478 | result = PCI_ERS_RESULT_NONE; | 472 | result = PCI_ERS_RESULT_NONE; |
479 | eeh_pe_dev_traverse(pe, eeh_report_mmio_enabled, &result); | 473 | eeh_pe_dev_traverse(pe, eeh_report_mmio_enabled, &result); |
480 | } | 474 | } |
@@ -482,6 +476,7 @@ void eeh_handle_event(struct eeh_pe *pe) | |||
482 | 476 | ||
483 | /* If all devices reported they can proceed, then re-enable DMA */ | 477 | /* If all devices reported they can proceed, then re-enable DMA */ |
484 | if (result == PCI_ERS_RESULT_CAN_RECOVER) { | 478 | if (result == PCI_ERS_RESULT_CAN_RECOVER) { |
479 | pr_info("EEH: Enabled DMA for affected devices\n"); | ||
485 | rc = eeh_pci_enable(pe, EEH_OPT_THAW_DMA); | 480 | rc = eeh_pci_enable(pe, EEH_OPT_THAW_DMA); |
486 | 481 | ||
487 | if (rc < 0) | 482 | if (rc < 0) |
@@ -494,17 +489,22 @@ void eeh_handle_event(struct eeh_pe *pe) | |||
494 | 489 | ||
495 | /* If any device has a hard failure, then shut off everything. */ | 490 | /* If any device has a hard failure, then shut off everything. */ |
496 | if (result == PCI_ERS_RESULT_DISCONNECT) { | 491 | if (result == PCI_ERS_RESULT_DISCONNECT) { |
497 | printk(KERN_WARNING "EEH: Device driver gave up\n"); | 492 | pr_warning("EEH: Device driver gave up\n"); |
498 | goto hard_fail; | 493 | goto hard_fail; |
499 | } | 494 | } |
500 | 495 | ||
501 | /* If any device called out for a reset, then reset the slot */ | 496 | /* If any device called out for a reset, then reset the slot */ |
502 | if (result == PCI_ERS_RESULT_NEED_RESET) { | 497 | if (result == PCI_ERS_RESULT_NEED_RESET) { |
498 | pr_info("EEH: Reset without hotplug activity\n"); | ||
503 | rc = eeh_reset_device(pe, NULL); | 499 | rc = eeh_reset_device(pe, NULL); |
504 | if (rc) { | 500 | if (rc) { |
505 | printk(KERN_WARNING "EEH: Cannot reset, rc=%d\n", rc); | 501 | pr_warning("%s: Cannot reset, err=%d\n", |
502 | __func__, rc); | ||
506 | goto hard_fail; | 503 | goto hard_fail; |
507 | } | 504 | } |
505 | |||
506 | pr_info("EEH: Notify device drivers " | ||
507 | "the completion of reset\n"); | ||
508 | result = PCI_ERS_RESULT_NONE; | 508 | result = PCI_ERS_RESULT_NONE; |
509 | eeh_pe_dev_traverse(pe, eeh_report_reset, &result); | 509 | eeh_pe_dev_traverse(pe, eeh_report_reset, &result); |
510 | } | 510 | } |
@@ -512,15 +512,16 @@ void eeh_handle_event(struct eeh_pe *pe) | |||
512 | /* All devices should claim they have recovered by now. */ | 512 | /* All devices should claim they have recovered by now. */ |
513 | if ((result != PCI_ERS_RESULT_RECOVERED) && | 513 | if ((result != PCI_ERS_RESULT_RECOVERED) && |
514 | (result != PCI_ERS_RESULT_NONE)) { | 514 | (result != PCI_ERS_RESULT_NONE)) { |
515 | printk(KERN_WARNING "EEH: Not recovered\n"); | 515 | pr_warning("EEH: Not recovered\n"); |
516 | goto hard_fail; | 516 | goto hard_fail; |
517 | } | 517 | } |
518 | 518 | ||
519 | /* Tell all device drivers that they can resume operations */ | 519 | /* Tell all device drivers that they can resume operations */ |
520 | pr_info("EEH: Notify device driver to resume\n"); | ||
520 | eeh_pe_dev_traverse(pe, eeh_report_resume, NULL); | 521 | eeh_pe_dev_traverse(pe, eeh_report_resume, NULL); |
521 | 522 | ||
522 | return; | 523 | return; |
523 | 524 | ||
524 | excess_failures: | 525 | excess_failures: |
525 | /* | 526 | /* |
526 | * About 90% of all real-life EEH failures in the field | 527 | * About 90% of all real-life EEH failures in the field |
@@ -550,3 +551,111 @@ perm_error: | |||
550 | pcibios_remove_pci_devices(frozen_bus); | 551 | pcibios_remove_pci_devices(frozen_bus); |
551 | } | 552 | } |
552 | 553 | ||
554 | static void eeh_handle_special_event(void) | ||
555 | { | ||
556 | struct eeh_pe *pe, *phb_pe; | ||
557 | struct pci_bus *bus; | ||
558 | struct pci_controller *hose, *tmp; | ||
559 | unsigned long flags; | ||
560 | int rc = 0; | ||
561 | |||
562 | /* | ||
563 | * The return value from next_error() has been classified as follows. | ||
564 | * It might be good to enumerate them. However, next_error() is only | ||
565 | * supported by PowerNV platform for now. So it would be fine to use | ||
566 | * integer directly: | ||
567 | * | ||
568 | * 4 - Dead IOC 3 - Dead PHB | ||
569 | * 2 - Fenced PHB 1 - Frozen PE | ||
570 | * 0 - No error found | ||
571 | * | ||
572 | */ | ||
573 | rc = eeh_ops->next_error(&pe); | ||
574 | if (rc <= 0) | ||
575 | return; | ||
576 | |||
577 | switch (rc) { | ||
578 | case 4: | ||
579 | /* Mark all PHBs in dead state */ | ||
580 | eeh_serialize_lock(&flags); | ||
581 | list_for_each_entry_safe(hose, tmp, | ||
582 | &hose_list, list_node) { | ||
583 | phb_pe = eeh_phb_pe_get(hose); | ||
584 | if (!phb_pe) continue; | ||
585 | |||
586 | eeh_pe_state_mark(phb_pe, | ||
587 | EEH_PE_ISOLATED | EEH_PE_PHB_DEAD); | ||
588 | } | ||
589 | eeh_serialize_unlock(flags); | ||
590 | |||
591 | /* Purge all events */ | ||
592 | eeh_remove_event(NULL); | ||
593 | break; | ||
594 | case 3: | ||
595 | case 2: | ||
596 | case 1: | ||
597 | /* Mark the PE in fenced state */ | ||
598 | eeh_serialize_lock(&flags); | ||
599 | if (rc == 3) | ||
600 | eeh_pe_state_mark(pe, | ||
601 | EEH_PE_ISOLATED | EEH_PE_PHB_DEAD); | ||
602 | else | ||
603 | eeh_pe_state_mark(pe, | ||
604 | EEH_PE_ISOLATED | EEH_PE_RECOVERING); | ||
605 | eeh_serialize_unlock(flags); | ||
606 | |||
607 | /* Purge all events of the PHB */ | ||
608 | eeh_remove_event(pe); | ||
609 | break; | ||
610 | default: | ||
611 | pr_err("%s: Invalid value %d from next_error()\n", | ||
612 | __func__, rc); | ||
613 | return; | ||
614 | } | ||
615 | |||
616 | /* | ||
617 | * For fenced PHB and frozen PE, it's handled as normal | ||
618 | * event. We have to remove the affected PHBs for dead | ||
619 | * PHB and IOC | ||
620 | */ | ||
621 | if (rc == 2 || rc == 1) | ||
622 | eeh_handle_normal_event(pe); | ||
623 | else { | ||
624 | list_for_each_entry_safe(hose, tmp, | ||
625 | &hose_list, list_node) { | ||
626 | phb_pe = eeh_phb_pe_get(hose); | ||
627 | if (!phb_pe || !(phb_pe->state & EEH_PE_PHB_DEAD)) | ||
628 | continue; | ||
629 | |||
630 | bus = eeh_pe_bus_get(phb_pe); | ||
631 | /* Notify all devices that they're about to go down. */ | ||
632 | eeh_pe_dev_traverse(pe, eeh_report_failure, NULL); | ||
633 | pcibios_remove_pci_devices(bus); | ||
634 | } | ||
635 | } | ||
636 | } | ||
637 | |||
638 | /** | ||
639 | * eeh_handle_event - Reset a PCI device after hard lockup. | ||
640 | * @pe: EEH PE | ||
641 | * | ||
642 | * While PHB detects address or data parity errors on particular PCI | ||
643 | * slot, the associated PE will be frozen. Besides, DMA's occurring | ||
644 | * to wild addresses (which usually happen due to bugs in device | ||
645 | * drivers or in PCI adapter firmware) can cause EEH error. #SERR, | ||
646 | * #PERR or other misc PCI-related errors also can trigger EEH errors. | ||
647 | * | ||
648 | * Recovery process consists of unplugging the device driver (which | ||
649 | * generated hotplug events to userspace), then issuing a PCI #RST to | ||
650 | * the device, then reconfiguring the PCI config space for all bridges | ||
651 | * & devices under this slot, and then finally restarting the device | ||
652 | * drivers (which cause a second set of hotplug events to go out to | ||
653 | * userspace). | ||
654 | */ | ||
655 | void eeh_handle_event(struct eeh_pe *pe) | ||
656 | { | ||
657 | if (pe) | ||
658 | eeh_handle_normal_event(pe); | ||
659 | else | ||
660 | eeh_handle_special_event(); | ||
661 | } | ||
diff --git a/arch/powerpc/platforms/pseries/eeh_event.c b/arch/powerpc/kernel/eeh_event.c index 185bedd926df..d27c5afc90ae 100644 --- a/arch/powerpc/platforms/pseries/eeh_event.c +++ b/arch/powerpc/kernel/eeh_event.c | |||
@@ -18,11 +18,10 @@ | |||
18 | 18 | ||
19 | #include <linux/delay.h> | 19 | #include <linux/delay.h> |
20 | #include <linux/list.h> | 20 | #include <linux/list.h> |
21 | #include <linux/mutex.h> | ||
22 | #include <linux/sched.h> | 21 | #include <linux/sched.h> |
22 | #include <linux/semaphore.h> | ||
23 | #include <linux/pci.h> | 23 | #include <linux/pci.h> |
24 | #include <linux/slab.h> | 24 | #include <linux/slab.h> |
25 | #include <linux/workqueue.h> | ||
26 | #include <linux/kthread.h> | 25 | #include <linux/kthread.h> |
27 | #include <asm/eeh_event.h> | 26 | #include <asm/eeh_event.h> |
28 | #include <asm/ppc-pci.h> | 27 | #include <asm/ppc-pci.h> |
@@ -35,14 +34,9 @@ | |||
35 | * work-queue, where a worker thread can drive recovery. | 34 | * work-queue, where a worker thread can drive recovery. |
36 | */ | 35 | */ |
37 | 36 | ||
38 | /* EEH event workqueue setup. */ | ||
39 | static DEFINE_SPINLOCK(eeh_eventlist_lock); | 37 | static DEFINE_SPINLOCK(eeh_eventlist_lock); |
38 | static struct semaphore eeh_eventlist_sem; | ||
40 | LIST_HEAD(eeh_eventlist); | 39 | LIST_HEAD(eeh_eventlist); |
41 | static void eeh_thread_launcher(struct work_struct *); | ||
42 | DECLARE_WORK(eeh_event_wq, eeh_thread_launcher); | ||
43 | |||
44 | /* Serialize reset sequences for a given pci device */ | ||
45 | DEFINE_MUTEX(eeh_event_mutex); | ||
46 | 40 | ||
47 | /** | 41 | /** |
48 | * eeh_event_handler - Dispatch EEH events. | 42 | * eeh_event_handler - Dispatch EEH events. |
@@ -60,55 +54,63 @@ static int eeh_event_handler(void * dummy) | |||
60 | struct eeh_event *event; | 54 | struct eeh_event *event; |
61 | struct eeh_pe *pe; | 55 | struct eeh_pe *pe; |
62 | 56 | ||
63 | spin_lock_irqsave(&eeh_eventlist_lock, flags); | 57 | while (!kthread_should_stop()) { |
64 | event = NULL; | 58 | if (down_interruptible(&eeh_eventlist_sem)) |
65 | 59 | break; | |
66 | /* Unqueue the event, get ready to process. */ | 60 | |
67 | if (!list_empty(&eeh_eventlist)) { | 61 | /* Fetch EEH event from the queue */ |
68 | event = list_entry(eeh_eventlist.next, struct eeh_event, list); | 62 | spin_lock_irqsave(&eeh_eventlist_lock, flags); |
69 | list_del(&event->list); | 63 | event = NULL; |
70 | } | 64 | if (!list_empty(&eeh_eventlist)) { |
71 | spin_unlock_irqrestore(&eeh_eventlist_lock, flags); | 65 | event = list_entry(eeh_eventlist.next, |
72 | 66 | struct eeh_event, list); | |
73 | if (event == NULL) | 67 | list_del(&event->list); |
74 | return 0; | 68 | } |
75 | 69 | spin_unlock_irqrestore(&eeh_eventlist_lock, flags); | |
76 | /* Serialize processing of EEH events */ | 70 | if (!event) |
77 | mutex_lock(&eeh_event_mutex); | 71 | continue; |
78 | pe = event->pe; | 72 | |
79 | eeh_pe_state_mark(pe, EEH_PE_RECOVERING); | 73 | /* We might have event without binding PE */ |
80 | pr_info("EEH: Detected PCI bus error on PHB#%d-PE#%x\n", | 74 | pe = event->pe; |
81 | pe->phb->global_number, pe->addr); | 75 | if (pe) { |
82 | 76 | eeh_pe_state_mark(pe, EEH_PE_RECOVERING); | |
83 | set_current_state(TASK_INTERRUPTIBLE); /* Don't add to load average */ | 77 | pr_info("EEH: Detected PCI bus error on PHB#%d-PE#%x\n", |
84 | eeh_handle_event(pe); | 78 | pe->phb->global_number, pe->addr); |
85 | eeh_pe_state_clear(pe, EEH_PE_RECOVERING); | 79 | eeh_handle_event(pe); |
86 | 80 | eeh_pe_state_clear(pe, EEH_PE_RECOVERING); | |
87 | kfree(event); | 81 | } else { |
88 | mutex_unlock(&eeh_event_mutex); | 82 | eeh_handle_event(NULL); |
89 | 83 | } | |
90 | /* If there are no new errors after an hour, clear the counter. */ | 84 | |
91 | if (pe && pe->freeze_count > 0) { | 85 | kfree(event); |
92 | msleep_interruptible(3600*1000); | ||
93 | if (pe->freeze_count > 0) | ||
94 | pe->freeze_count--; | ||
95 | |||
96 | } | 86 | } |
97 | 87 | ||
98 | return 0; | 88 | return 0; |
99 | } | 89 | } |
100 | 90 | ||
101 | /** | 91 | /** |
102 | * eeh_thread_launcher - Start kernel thread to handle EEH events | 92 | * eeh_event_init - Start kernel thread to handle EEH events |
103 | * @dummy - unused | ||
104 | * | 93 | * |
105 | * This routine is called to start the kernel thread for processing | 94 | * This routine is called to start the kernel thread for processing |
106 | * EEH event. | 95 | * EEH event. |
107 | */ | 96 | */ |
108 | static void eeh_thread_launcher(struct work_struct *dummy) | 97 | int eeh_event_init(void) |
109 | { | 98 | { |
110 | if (IS_ERR(kthread_run(eeh_event_handler, NULL, "eehd"))) | 99 | struct task_struct *t; |
111 | printk(KERN_ERR "Failed to start EEH daemon\n"); | 100 | int ret = 0; |
101 | |||
102 | /* Initialize semaphore */ | ||
103 | sema_init(&eeh_eventlist_sem, 0); | ||
104 | |||
105 | t = kthread_run(eeh_event_handler, NULL, "eehd"); | ||
106 | if (IS_ERR(t)) { | ||
107 | ret = PTR_ERR(t); | ||
108 | pr_err("%s: Failed to start EEH daemon (%d)\n", | ||
109 | __func__, ret); | ||
110 | return ret; | ||
111 | } | ||
112 | |||
113 | return 0; | ||
112 | } | 114 | } |
113 | 115 | ||
114 | /** | 116 | /** |
@@ -136,7 +138,45 @@ int eeh_send_failure_event(struct eeh_pe *pe) | |||
136 | list_add(&event->list, &eeh_eventlist); | 138 | list_add(&event->list, &eeh_eventlist); |
137 | spin_unlock_irqrestore(&eeh_eventlist_lock, flags); | 139 | spin_unlock_irqrestore(&eeh_eventlist_lock, flags); |
138 | 140 | ||
139 | schedule_work(&eeh_event_wq); | 141 | /* For EEH deamon to knick in */ |
142 | up(&eeh_eventlist_sem); | ||
140 | 143 | ||
141 | return 0; | 144 | return 0; |
142 | } | 145 | } |
146 | |||
147 | /** | ||
148 | * eeh_remove_event - Remove EEH event from the queue | ||
149 | * @pe: Event binding to the PE | ||
150 | * | ||
151 | * On PowerNV platform, we might have subsequent coming events | ||
152 | * is part of the former one. For that case, those subsequent | ||
153 | * coming events are totally duplicated and unnecessary, thus | ||
154 | * they should be removed. | ||
155 | */ | ||
156 | void eeh_remove_event(struct eeh_pe *pe) | ||
157 | { | ||
158 | unsigned long flags; | ||
159 | struct eeh_event *event, *tmp; | ||
160 | |||
161 | spin_lock_irqsave(&eeh_eventlist_lock, flags); | ||
162 | list_for_each_entry_safe(event, tmp, &eeh_eventlist, list) { | ||
163 | /* | ||
164 | * If we don't have valid PE passed in, that means | ||
165 | * we already have event corresponding to dead IOC | ||
166 | * and all events should be purged. | ||
167 | */ | ||
168 | if (!pe) { | ||
169 | list_del(&event->list); | ||
170 | kfree(event); | ||
171 | } else if (pe->type & EEH_PE_PHB) { | ||
172 | if (event->pe && event->pe->phb == pe->phb) { | ||
173 | list_del(&event->list); | ||
174 | kfree(event); | ||
175 | } | ||
176 | } else if (event->pe == pe) { | ||
177 | list_del(&event->list); | ||
178 | kfree(event); | ||
179 | } | ||
180 | } | ||
181 | spin_unlock_irqrestore(&eeh_eventlist_lock, flags); | ||
182 | } | ||
diff --git a/arch/powerpc/platforms/pseries/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index 9d4a9e8562b2..016588a6f5ed 100644 --- a/arch/powerpc/platforms/pseries/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c | |||
@@ -22,6 +22,7 @@ | |||
22 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 22 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
23 | */ | 23 | */ |
24 | 24 | ||
25 | #include <linux/delay.h> | ||
25 | #include <linux/export.h> | 26 | #include <linux/export.h> |
26 | #include <linux/gfp.h> | 27 | #include <linux/gfp.h> |
27 | #include <linux/init.h> | 28 | #include <linux/init.h> |
@@ -78,9 +79,7 @@ int eeh_phb_pe_create(struct pci_controller *phb) | |||
78 | } | 79 | } |
79 | 80 | ||
80 | /* Put it into the list */ | 81 | /* Put it into the list */ |
81 | eeh_lock(); | ||
82 | list_add_tail(&pe->child, &eeh_phb_pe); | 82 | list_add_tail(&pe->child, &eeh_phb_pe); |
83 | eeh_unlock(); | ||
84 | 83 | ||
85 | pr_debug("EEH: Add PE for PHB#%d\n", phb->global_number); | 84 | pr_debug("EEH: Add PE for PHB#%d\n", phb->global_number); |
86 | 85 | ||
@@ -95,7 +94,7 @@ int eeh_phb_pe_create(struct pci_controller *phb) | |||
95 | * hierarchy tree is composed of PHB PEs. The function is used | 94 | * hierarchy tree is composed of PHB PEs. The function is used |
96 | * to retrieve the corresponding PHB PE according to the given PHB. | 95 | * to retrieve the corresponding PHB PE according to the given PHB. |
97 | */ | 96 | */ |
98 | static struct eeh_pe *eeh_phb_pe_get(struct pci_controller *phb) | 97 | struct eeh_pe *eeh_phb_pe_get(struct pci_controller *phb) |
99 | { | 98 | { |
100 | struct eeh_pe *pe; | 99 | struct eeh_pe *pe; |
101 | 100 | ||
@@ -185,21 +184,15 @@ void *eeh_pe_dev_traverse(struct eeh_pe *root, | |||
185 | return NULL; | 184 | return NULL; |
186 | } | 185 | } |
187 | 186 | ||
188 | eeh_lock(); | ||
189 | |||
190 | /* Traverse root PE */ | 187 | /* Traverse root PE */ |
191 | for (pe = root; pe; pe = eeh_pe_next(pe, root)) { | 188 | for (pe = root; pe; pe = eeh_pe_next(pe, root)) { |
192 | eeh_pe_for_each_dev(pe, edev) { | 189 | eeh_pe_for_each_dev(pe, edev) { |
193 | ret = fn(edev, flag); | 190 | ret = fn(edev, flag); |
194 | if (ret) { | 191 | if (ret) |
195 | eeh_unlock(); | ||
196 | return ret; | 192 | return ret; |
197 | } | ||
198 | } | 193 | } |
199 | } | 194 | } |
200 | 195 | ||
201 | eeh_unlock(); | ||
202 | |||
203 | return NULL; | 196 | return NULL; |
204 | } | 197 | } |
205 | 198 | ||
@@ -228,7 +221,7 @@ static void *__eeh_pe_get(void *data, void *flag) | |||
228 | return pe; | 221 | return pe; |
229 | 222 | ||
230 | /* Try BDF address */ | 223 | /* Try BDF address */ |
231 | if (edev->pe_config_addr && | 224 | if (edev->config_addr && |
232 | (edev->config_addr == pe->config_addr)) | 225 | (edev->config_addr == pe->config_addr)) |
233 | return pe; | 226 | return pe; |
234 | 227 | ||
@@ -246,7 +239,7 @@ static void *__eeh_pe_get(void *data, void *flag) | |||
246 | * which is composed of PCI bus/device/function number, or unified | 239 | * which is composed of PCI bus/device/function number, or unified |
247 | * PE address. | 240 | * PE address. |
248 | */ | 241 | */ |
249 | static struct eeh_pe *eeh_pe_get(struct eeh_dev *edev) | 242 | struct eeh_pe *eeh_pe_get(struct eeh_dev *edev) |
250 | { | 243 | { |
251 | struct eeh_pe *root = eeh_phb_pe_get(edev->phb); | 244 | struct eeh_pe *root = eeh_phb_pe_get(edev->phb); |
252 | struct eeh_pe *pe; | 245 | struct eeh_pe *pe; |
@@ -305,8 +298,6 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) | |||
305 | { | 298 | { |
306 | struct eeh_pe *pe, *parent; | 299 | struct eeh_pe *pe, *parent; |
307 | 300 | ||
308 | eeh_lock(); | ||
309 | |||
310 | /* | 301 | /* |
311 | * Search the PE has been existing or not according | 302 | * Search the PE has been existing or not according |
312 | * to the PE address. If that has been existing, the | 303 | * to the PE address. If that has been existing, the |
@@ -316,7 +307,6 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) | |||
316 | pe = eeh_pe_get(edev); | 307 | pe = eeh_pe_get(edev); |
317 | if (pe && !(pe->type & EEH_PE_INVALID)) { | 308 | if (pe && !(pe->type & EEH_PE_INVALID)) { |
318 | if (!edev->pe_config_addr) { | 309 | if (!edev->pe_config_addr) { |
319 | eeh_unlock(); | ||
320 | pr_err("%s: PE with addr 0x%x already exists\n", | 310 | pr_err("%s: PE with addr 0x%x already exists\n", |
321 | __func__, edev->config_addr); | 311 | __func__, edev->config_addr); |
322 | return -EEXIST; | 312 | return -EEXIST; |
@@ -328,7 +318,6 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) | |||
328 | 318 | ||
329 | /* Put the edev to PE */ | 319 | /* Put the edev to PE */ |
330 | list_add_tail(&edev->list, &pe->edevs); | 320 | list_add_tail(&edev->list, &pe->edevs); |
331 | eeh_unlock(); | ||
332 | pr_debug("EEH: Add %s to Bus PE#%x\n", | 321 | pr_debug("EEH: Add %s to Bus PE#%x\n", |
333 | edev->dn->full_name, pe->addr); | 322 | edev->dn->full_name, pe->addr); |
334 | 323 | ||
@@ -347,7 +336,6 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) | |||
347 | parent->type &= ~EEH_PE_INVALID; | 336 | parent->type &= ~EEH_PE_INVALID; |
348 | parent = parent->parent; | 337 | parent = parent->parent; |
349 | } | 338 | } |
350 | eeh_unlock(); | ||
351 | pr_debug("EEH: Add %s to Device PE#%x, Parent PE#%x\n", | 339 | pr_debug("EEH: Add %s to Device PE#%x, Parent PE#%x\n", |
352 | edev->dn->full_name, pe->addr, pe->parent->addr); | 340 | edev->dn->full_name, pe->addr, pe->parent->addr); |
353 | 341 | ||
@@ -357,7 +345,6 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) | |||
357 | /* Create a new EEH PE */ | 345 | /* Create a new EEH PE */ |
358 | pe = eeh_pe_alloc(edev->phb, EEH_PE_DEVICE); | 346 | pe = eeh_pe_alloc(edev->phb, EEH_PE_DEVICE); |
359 | if (!pe) { | 347 | if (!pe) { |
360 | eeh_unlock(); | ||
361 | pr_err("%s: out of memory!\n", __func__); | 348 | pr_err("%s: out of memory!\n", __func__); |
362 | return -ENOMEM; | 349 | return -ENOMEM; |
363 | } | 350 | } |
@@ -365,6 +352,17 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) | |||
365 | pe->config_addr = edev->config_addr; | 352 | pe->config_addr = edev->config_addr; |
366 | 353 | ||
367 | /* | 354 | /* |
355 | * While doing PE reset, we probably hot-reset the | ||
356 | * upstream bridge. However, the PCI devices including | ||
357 | * the associated EEH devices might be removed when EEH | ||
358 | * core is doing recovery. So that won't safe to retrieve | ||
359 | * the bridge through downstream EEH device. We have to | ||
360 | * trace the parent PCI bus, then the upstream bridge. | ||
361 | */ | ||
362 | if (eeh_probe_mode_dev()) | ||
363 | pe->bus = eeh_dev_to_pci_dev(edev)->bus; | ||
364 | |||
365 | /* | ||
368 | * Put the new EEH PE into hierarchy tree. If the parent | 366 | * Put the new EEH PE into hierarchy tree. If the parent |
369 | * can't be found, the newly created PE will be attached | 367 | * can't be found, the newly created PE will be attached |
370 | * to PHB directly. Otherwise, we have to associate the | 368 | * to PHB directly. Otherwise, we have to associate the |
@@ -374,7 +372,6 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) | |||
374 | if (!parent) { | 372 | if (!parent) { |
375 | parent = eeh_phb_pe_get(edev->phb); | 373 | parent = eeh_phb_pe_get(edev->phb); |
376 | if (!parent) { | 374 | if (!parent) { |
377 | eeh_unlock(); | ||
378 | pr_err("%s: No PHB PE is found (PHB Domain=%d)\n", | 375 | pr_err("%s: No PHB PE is found (PHB Domain=%d)\n", |
379 | __func__, edev->phb->global_number); | 376 | __func__, edev->phb->global_number); |
380 | edev->pe = NULL; | 377 | edev->pe = NULL; |
@@ -391,7 +388,6 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) | |||
391 | list_add_tail(&pe->child, &parent->child_list); | 388 | list_add_tail(&pe->child, &parent->child_list); |
392 | list_add_tail(&edev->list, &pe->edevs); | 389 | list_add_tail(&edev->list, &pe->edevs); |
393 | edev->pe = pe; | 390 | edev->pe = pe; |
394 | eeh_unlock(); | ||
395 | pr_debug("EEH: Add %s to Device PE#%x, Parent PE#%x\n", | 391 | pr_debug("EEH: Add %s to Device PE#%x, Parent PE#%x\n", |
396 | edev->dn->full_name, pe->addr, pe->parent->addr); | 392 | edev->dn->full_name, pe->addr, pe->parent->addr); |
397 | 393 | ||
@@ -419,8 +415,6 @@ int eeh_rmv_from_parent_pe(struct eeh_dev *edev, int purge_pe) | |||
419 | return -EEXIST; | 415 | return -EEXIST; |
420 | } | 416 | } |
421 | 417 | ||
422 | eeh_lock(); | ||
423 | |||
424 | /* Remove the EEH device */ | 418 | /* Remove the EEH device */ |
425 | pe = edev->pe; | 419 | pe = edev->pe; |
426 | edev->pe = NULL; | 420 | edev->pe = NULL; |
@@ -465,12 +459,37 @@ int eeh_rmv_from_parent_pe(struct eeh_dev *edev, int purge_pe) | |||
465 | pe = parent; | 459 | pe = parent; |
466 | } | 460 | } |
467 | 461 | ||
468 | eeh_unlock(); | ||
469 | |||
470 | return 0; | 462 | return 0; |
471 | } | 463 | } |
472 | 464 | ||
473 | /** | 465 | /** |
466 | * eeh_pe_update_time_stamp - Update PE's frozen time stamp | ||
467 | * @pe: EEH PE | ||
468 | * | ||
469 | * We have time stamp for each PE to trace its time of getting | ||
470 | * frozen in last hour. The function should be called to update | ||
471 | * the time stamp on first error of the specific PE. On the other | ||
472 | * handle, we needn't account for errors happened in last hour. | ||
473 | */ | ||
474 | void eeh_pe_update_time_stamp(struct eeh_pe *pe) | ||
475 | { | ||
476 | struct timeval tstamp; | ||
477 | |||
478 | if (!pe) return; | ||
479 | |||
480 | if (pe->freeze_count <= 0) { | ||
481 | pe->freeze_count = 0; | ||
482 | do_gettimeofday(&pe->tstamp); | ||
483 | } else { | ||
484 | do_gettimeofday(&tstamp); | ||
485 | if (tstamp.tv_sec - pe->tstamp.tv_sec > 3600) { | ||
486 | pe->tstamp = tstamp; | ||
487 | pe->freeze_count = 0; | ||
488 | } | ||
489 | } | ||
490 | } | ||
491 | |||
492 | /** | ||
474 | * __eeh_pe_state_mark - Mark the state for the PE | 493 | * __eeh_pe_state_mark - Mark the state for the PE |
475 | * @data: EEH PE | 494 | * @data: EEH PE |
476 | * @flag: state | 495 | * @flag: state |
@@ -512,9 +531,7 @@ static void *__eeh_pe_state_mark(void *data, void *flag) | |||
512 | */ | 531 | */ |
513 | void eeh_pe_state_mark(struct eeh_pe *pe, int state) | 532 | void eeh_pe_state_mark(struct eeh_pe *pe, int state) |
514 | { | 533 | { |
515 | eeh_lock(); | ||
516 | eeh_pe_traverse(pe, __eeh_pe_state_mark, &state); | 534 | eeh_pe_traverse(pe, __eeh_pe_state_mark, &state); |
517 | eeh_unlock(); | ||
518 | } | 535 | } |
519 | 536 | ||
520 | /** | 537 | /** |
@@ -548,35 +565,135 @@ static void *__eeh_pe_state_clear(void *data, void *flag) | |||
548 | */ | 565 | */ |
549 | void eeh_pe_state_clear(struct eeh_pe *pe, int state) | 566 | void eeh_pe_state_clear(struct eeh_pe *pe, int state) |
550 | { | 567 | { |
551 | eeh_lock(); | ||
552 | eeh_pe_traverse(pe, __eeh_pe_state_clear, &state); | 568 | eeh_pe_traverse(pe, __eeh_pe_state_clear, &state); |
553 | eeh_unlock(); | ||
554 | } | 569 | } |
555 | 570 | ||
556 | /** | 571 | /* |
557 | * eeh_restore_one_device_bars - Restore the Base Address Registers for one device | 572 | * Some PCI bridges (e.g. PLX bridges) have primary/secondary |
558 | * @data: EEH device | 573 | * buses assigned explicitly by firmware, and we probably have |
559 | * @flag: Unused | 574 | * lost that after reset. So we have to delay the check until |
575 | * the PCI-CFG registers have been restored for the parent | ||
576 | * bridge. | ||
560 | * | 577 | * |
561 | * Loads the PCI configuration space base address registers, | 578 | * Don't use normal PCI-CFG accessors, which probably has been |
562 | * the expansion ROM base address, the latency timer, and etc. | 579 | * blocked on normal path during the stage. So we need utilize |
563 | * from the saved values in the device node. | 580 | * eeh operations, which is always permitted. |
564 | */ | 581 | */ |
565 | static void *eeh_restore_one_device_bars(void *data, void *flag) | 582 | static void eeh_bridge_check_link(struct pci_dev *pdev, |
583 | struct device_node *dn) | ||
584 | { | ||
585 | int cap; | ||
586 | uint32_t val; | ||
587 | int timeout = 0; | ||
588 | |||
589 | /* | ||
590 | * We only check root port and downstream ports of | ||
591 | * PCIe switches | ||
592 | */ | ||
593 | if (!pci_is_pcie(pdev) || | ||
594 | (pci_pcie_type(pdev) != PCI_EXP_TYPE_ROOT_PORT && | ||
595 | pci_pcie_type(pdev) != PCI_EXP_TYPE_DOWNSTREAM)) | ||
596 | return; | ||
597 | |||
598 | pr_debug("%s: Check PCIe link for %s ...\n", | ||
599 | __func__, pci_name(pdev)); | ||
600 | |||
601 | /* Check slot status */ | ||
602 | cap = pdev->pcie_cap; | ||
603 | eeh_ops->read_config(dn, cap + PCI_EXP_SLTSTA, 2, &val); | ||
604 | if (!(val & PCI_EXP_SLTSTA_PDS)) { | ||
605 | pr_debug(" No card in the slot (0x%04x) !\n", val); | ||
606 | return; | ||
607 | } | ||
608 | |||
609 | /* Check power status if we have the capability */ | ||
610 | eeh_ops->read_config(dn, cap + PCI_EXP_SLTCAP, 2, &val); | ||
611 | if (val & PCI_EXP_SLTCAP_PCP) { | ||
612 | eeh_ops->read_config(dn, cap + PCI_EXP_SLTCTL, 2, &val); | ||
613 | if (val & PCI_EXP_SLTCTL_PCC) { | ||
614 | pr_debug(" In power-off state, power it on ...\n"); | ||
615 | val &= ~(PCI_EXP_SLTCTL_PCC | PCI_EXP_SLTCTL_PIC); | ||
616 | val |= (0x0100 & PCI_EXP_SLTCTL_PIC); | ||
617 | eeh_ops->write_config(dn, cap + PCI_EXP_SLTCTL, 2, val); | ||
618 | msleep(2 * 1000); | ||
619 | } | ||
620 | } | ||
621 | |||
622 | /* Enable link */ | ||
623 | eeh_ops->read_config(dn, cap + PCI_EXP_LNKCTL, 2, &val); | ||
624 | val &= ~PCI_EXP_LNKCTL_LD; | ||
625 | eeh_ops->write_config(dn, cap + PCI_EXP_LNKCTL, 2, val); | ||
626 | |||
627 | /* Check link */ | ||
628 | eeh_ops->read_config(dn, cap + PCI_EXP_LNKCAP, 4, &val); | ||
629 | if (!(val & PCI_EXP_LNKCAP_DLLLARC)) { | ||
630 | pr_debug(" No link reporting capability (0x%08x) \n", val); | ||
631 | msleep(1000); | ||
632 | return; | ||
633 | } | ||
634 | |||
635 | /* Wait the link is up until timeout (5s) */ | ||
636 | timeout = 0; | ||
637 | while (timeout < 5000) { | ||
638 | msleep(20); | ||
639 | timeout += 20; | ||
640 | |||
641 | eeh_ops->read_config(dn, cap + PCI_EXP_LNKSTA, 2, &val); | ||
642 | if (val & PCI_EXP_LNKSTA_DLLLA) | ||
643 | break; | ||
644 | } | ||
645 | |||
646 | if (val & PCI_EXP_LNKSTA_DLLLA) | ||
647 | pr_debug(" Link up (%s)\n", | ||
648 | (val & PCI_EXP_LNKSTA_CLS_2_5GB) ? "2.5GB" : "5GB"); | ||
649 | else | ||
650 | pr_debug(" Link not ready (0x%04x)\n", val); | ||
651 | } | ||
652 | |||
653 | #define BYTE_SWAP(OFF) (8*((OFF)/4)+3-(OFF)) | ||
654 | #define SAVED_BYTE(OFF) (((u8 *)(edev->config_space))[BYTE_SWAP(OFF)]) | ||
655 | |||
656 | static void eeh_restore_bridge_bars(struct pci_dev *pdev, | ||
657 | struct eeh_dev *edev, | ||
658 | struct device_node *dn) | ||
659 | { | ||
660 | int i; | ||
661 | |||
662 | /* | ||
663 | * Device BARs: 0x10 - 0x18 | ||
664 | * Bus numbers and windows: 0x18 - 0x30 | ||
665 | */ | ||
666 | for (i = 4; i < 13; i++) | ||
667 | eeh_ops->write_config(dn, i*4, 4, edev->config_space[i]); | ||
668 | /* Rom: 0x38 */ | ||
669 | eeh_ops->write_config(dn, 14*4, 4, edev->config_space[14]); | ||
670 | |||
671 | /* Cache line & Latency timer: 0xC 0xD */ | ||
672 | eeh_ops->write_config(dn, PCI_CACHE_LINE_SIZE, 1, | ||
673 | SAVED_BYTE(PCI_CACHE_LINE_SIZE)); | ||
674 | eeh_ops->write_config(dn, PCI_LATENCY_TIMER, 1, | ||
675 | SAVED_BYTE(PCI_LATENCY_TIMER)); | ||
676 | /* Max latency, min grant, interrupt ping and line: 0x3C */ | ||
677 | eeh_ops->write_config(dn, 15*4, 4, edev->config_space[15]); | ||
678 | |||
679 | /* PCI Command: 0x4 */ | ||
680 | eeh_ops->write_config(dn, PCI_COMMAND, 4, edev->config_space[1]); | ||
681 | |||
682 | /* Check the PCIe link is ready */ | ||
683 | eeh_bridge_check_link(pdev, dn); | ||
684 | } | ||
685 | |||
686 | static void eeh_restore_device_bars(struct eeh_dev *edev, | ||
687 | struct device_node *dn) | ||
566 | { | 688 | { |
567 | int i; | 689 | int i; |
568 | u32 cmd; | 690 | u32 cmd; |
569 | struct eeh_dev *edev = (struct eeh_dev *)data; | ||
570 | struct device_node *dn = eeh_dev_to_of_node(edev); | ||
571 | 691 | ||
572 | for (i = 4; i < 10; i++) | 692 | for (i = 4; i < 10; i++) |
573 | eeh_ops->write_config(dn, i*4, 4, edev->config_space[i]); | 693 | eeh_ops->write_config(dn, i*4, 4, edev->config_space[i]); |
574 | /* 12 == Expansion ROM Address */ | 694 | /* 12 == Expansion ROM Address */ |
575 | eeh_ops->write_config(dn, 12*4, 4, edev->config_space[12]); | 695 | eeh_ops->write_config(dn, 12*4, 4, edev->config_space[12]); |
576 | 696 | ||
577 | #define BYTE_SWAP(OFF) (8*((OFF)/4)+3-(OFF)) | ||
578 | #define SAVED_BYTE(OFF) (((u8 *)(edev->config_space))[BYTE_SWAP(OFF)]) | ||
579 | |||
580 | eeh_ops->write_config(dn, PCI_CACHE_LINE_SIZE, 1, | 697 | eeh_ops->write_config(dn, PCI_CACHE_LINE_SIZE, 1, |
581 | SAVED_BYTE(PCI_CACHE_LINE_SIZE)); | 698 | SAVED_BYTE(PCI_CACHE_LINE_SIZE)); |
582 | eeh_ops->write_config(dn, PCI_LATENCY_TIMER, 1, | 699 | eeh_ops->write_config(dn, PCI_LATENCY_TIMER, 1, |
@@ -599,6 +716,34 @@ static void *eeh_restore_one_device_bars(void *data, void *flag) | |||
599 | else | 716 | else |
600 | cmd &= ~PCI_COMMAND_SERR; | 717 | cmd &= ~PCI_COMMAND_SERR; |
601 | eeh_ops->write_config(dn, PCI_COMMAND, 4, cmd); | 718 | eeh_ops->write_config(dn, PCI_COMMAND, 4, cmd); |
719 | } | ||
720 | |||
721 | /** | ||
722 | * eeh_restore_one_device_bars - Restore the Base Address Registers for one device | ||
723 | * @data: EEH device | ||
724 | * @flag: Unused | ||
725 | * | ||
726 | * Loads the PCI configuration space base address registers, | ||
727 | * the expansion ROM base address, the latency timer, and etc. | ||
728 | * from the saved values in the device node. | ||
729 | */ | ||
730 | static void *eeh_restore_one_device_bars(void *data, void *flag) | ||
731 | { | ||
732 | struct pci_dev *pdev = NULL; | ||
733 | struct eeh_dev *edev = (struct eeh_dev *)data; | ||
734 | struct device_node *dn = eeh_dev_to_of_node(edev); | ||
735 | |||
736 | /* Trace the PCI bridge */ | ||
737 | if (eeh_probe_mode_dev()) { | ||
738 | pdev = eeh_dev_to_pci_dev(edev); | ||
739 | if (pdev->hdr_type != PCI_HEADER_TYPE_BRIDGE) | ||
740 | pdev = NULL; | ||
741 | } | ||
742 | |||
743 | if (pdev) | ||
744 | eeh_restore_bridge_bars(pdev, edev, dn); | ||
745 | else | ||
746 | eeh_restore_device_bars(edev, dn); | ||
602 | 747 | ||
603 | return NULL; | 748 | return NULL; |
604 | } | 749 | } |
@@ -635,19 +780,21 @@ struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe) | |||
635 | struct eeh_dev *edev; | 780 | struct eeh_dev *edev; |
636 | struct pci_dev *pdev; | 781 | struct pci_dev *pdev; |
637 | 782 | ||
638 | eeh_lock(); | ||
639 | |||
640 | if (pe->type & EEH_PE_PHB) { | 783 | if (pe->type & EEH_PE_PHB) { |
641 | bus = pe->phb->bus; | 784 | bus = pe->phb->bus; |
642 | } else if (pe->type & EEH_PE_BUS || | 785 | } else if (pe->type & EEH_PE_BUS || |
643 | pe->type & EEH_PE_DEVICE) { | 786 | pe->type & EEH_PE_DEVICE) { |
787 | if (pe->bus) { | ||
788 | bus = pe->bus; | ||
789 | goto out; | ||
790 | } | ||
791 | |||
644 | edev = list_first_entry(&pe->edevs, struct eeh_dev, list); | 792 | edev = list_first_entry(&pe->edevs, struct eeh_dev, list); |
645 | pdev = eeh_dev_to_pci_dev(edev); | 793 | pdev = eeh_dev_to_pci_dev(edev); |
646 | if (pdev) | 794 | if (pdev) |
647 | bus = pdev->bus; | 795 | bus = pdev->bus; |
648 | } | 796 | } |
649 | 797 | ||
650 | eeh_unlock(); | 798 | out: |
651 | |||
652 | return bus; | 799 | return bus; |
653 | } | 800 | } |
diff --git a/arch/powerpc/platforms/pseries/eeh_sysfs.c b/arch/powerpc/kernel/eeh_sysfs.c index d37708360f2e..e7ae3484918c 100644 --- a/arch/powerpc/platforms/pseries/eeh_sysfs.c +++ b/arch/powerpc/kernel/eeh_sysfs.c | |||
@@ -72,4 +72,3 @@ void eeh_sysfs_remove_device(struct pci_dev *pdev) | |||
72 | device_remove_file(&pdev->dev, &dev_attr_eeh_config_addr); | 72 | device_remove_file(&pdev->dev, &dev_attr_eeh_config_addr); |
73 | device_remove_file(&pdev->dev, &dev_attr_eeh_pe_config_addr); | 73 | device_remove_file(&pdev->dev, &dev_attr_eeh_pe_config_addr); |
74 | } | 74 | } |
75 | |||
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 8741c854e03d..ab15b8d057ad 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S | |||
@@ -629,21 +629,43 @@ _GLOBAL(ret_from_except_lite) | |||
629 | 629 | ||
630 | CURRENT_THREAD_INFO(r9, r1) | 630 | CURRENT_THREAD_INFO(r9, r1) |
631 | ld r3,_MSR(r1) | 631 | ld r3,_MSR(r1) |
632 | #ifdef CONFIG_PPC_BOOK3E | ||
633 | ld r10,PACACURRENT(r13) | ||
634 | #endif /* CONFIG_PPC_BOOK3E */ | ||
632 | ld r4,TI_FLAGS(r9) | 635 | ld r4,TI_FLAGS(r9) |
633 | andi. r3,r3,MSR_PR | 636 | andi. r3,r3,MSR_PR |
634 | beq resume_kernel | 637 | beq resume_kernel |
638 | #ifdef CONFIG_PPC_BOOK3E | ||
639 | lwz r3,(THREAD+THREAD_DBCR0)(r10) | ||
640 | #endif /* CONFIG_PPC_BOOK3E */ | ||
635 | 641 | ||
636 | /* Check current_thread_info()->flags */ | 642 | /* Check current_thread_info()->flags */ |
637 | andi. r0,r4,_TIF_USER_WORK_MASK | 643 | andi. r0,r4,_TIF_USER_WORK_MASK |
644 | #ifdef CONFIG_PPC_BOOK3E | ||
645 | bne 1f | ||
646 | /* | ||
647 | * Check to see if the dbcr0 register is set up to debug. | ||
648 | * Use the internal debug mode bit to do this. | ||
649 | */ | ||
650 | andis. r0,r3,DBCR0_IDM@h | ||
638 | beq restore | 651 | beq restore |
639 | 652 | mfmsr r0 | |
640 | andi. r0,r4,_TIF_NEED_RESCHED | 653 | rlwinm r0,r0,0,~MSR_DE /* Clear MSR.DE */ |
641 | beq 1f | 654 | mtmsr r0 |
655 | mtspr SPRN_DBCR0,r3 | ||
656 | li r10, -1 | ||
657 | mtspr SPRN_DBSR,r10 | ||
658 | b restore | ||
659 | #else | ||
660 | beq restore | ||
661 | #endif | ||
662 | 1: andi. r0,r4,_TIF_NEED_RESCHED | ||
663 | beq 2f | ||
642 | bl .restore_interrupts | 664 | bl .restore_interrupts |
643 | SCHEDULE_USER | 665 | SCHEDULE_USER |
644 | b .ret_from_except_lite | 666 | b .ret_from_except_lite |
645 | 667 | ||
646 | 1: bl .save_nvgprs | 668 | 2: bl .save_nvgprs |
647 | bl .restore_interrupts | 669 | bl .restore_interrupts |
648 | addi r3,r1,STACK_FRAME_OVERHEAD | 670 | addi r3,r1,STACK_FRAME_OVERHEAD |
649 | bl .do_notify_resume | 671 | bl .do_notify_resume |
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 40e4a17c8ba0..4e00d223b2e3 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S | |||
@@ -341,10 +341,17 @@ vsx_unavailable_pSeries_1: | |||
341 | EXCEPTION_PROLOG_0(PACA_EXGEN) | 341 | EXCEPTION_PROLOG_0(PACA_EXGEN) |
342 | b vsx_unavailable_pSeries | 342 | b vsx_unavailable_pSeries |
343 | 343 | ||
344 | facility_unavailable_trampoline: | ||
344 | . = 0xf60 | 345 | . = 0xf60 |
345 | SET_SCRATCH0(r13) | 346 | SET_SCRATCH0(r13) |
346 | EXCEPTION_PROLOG_0(PACA_EXGEN) | 347 | EXCEPTION_PROLOG_0(PACA_EXGEN) |
347 | b tm_unavailable_pSeries | 348 | b facility_unavailable_pSeries |
349 | |||
350 | hv_facility_unavailable_trampoline: | ||
351 | . = 0xf80 | ||
352 | SET_SCRATCH0(r13) | ||
353 | EXCEPTION_PROLOG_0(PACA_EXGEN) | ||
354 | b facility_unavailable_hv | ||
348 | 355 | ||
349 | #ifdef CONFIG_CBE_RAS | 356 | #ifdef CONFIG_CBE_RAS |
350 | STD_EXCEPTION_HV(0x1200, 0x1202, cbe_system_error) | 357 | STD_EXCEPTION_HV(0x1200, 0x1202, cbe_system_error) |
@@ -522,8 +529,10 @@ denorm_done: | |||
522 | KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf20) | 529 | KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf20) |
523 | STD_EXCEPTION_PSERIES_OOL(0xf40, vsx_unavailable) | 530 | STD_EXCEPTION_PSERIES_OOL(0xf40, vsx_unavailable) |
524 | KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf40) | 531 | KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf40) |
525 | STD_EXCEPTION_PSERIES_OOL(0xf60, tm_unavailable) | 532 | STD_EXCEPTION_PSERIES_OOL(0xf60, facility_unavailable) |
526 | KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf60) | 533 | KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf60) |
534 | STD_EXCEPTION_HV_OOL(0xf82, facility_unavailable) | ||
535 | KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xf82) | ||
527 | 536 | ||
528 | /* | 537 | /* |
529 | * An interrupt came in while soft-disabled. We set paca->irq_happened, then: | 538 | * An interrupt came in while soft-disabled. We set paca->irq_happened, then: |
@@ -793,14 +802,10 @@ system_call_relon_pSeries: | |||
793 | STD_RELON_EXCEPTION_PSERIES(0x4d00, 0xd00, single_step) | 802 | STD_RELON_EXCEPTION_PSERIES(0x4d00, 0xd00, single_step) |
794 | 803 | ||
795 | . = 0x4e00 | 804 | . = 0x4e00 |
796 | SET_SCRATCH0(r13) | 805 | b . /* Can't happen, see v2.07 Book III-S section 6.5 */ |
797 | EXCEPTION_PROLOG_0(PACA_EXGEN) | ||
798 | b h_data_storage_relon_hv | ||
799 | 806 | ||
800 | . = 0x4e20 | 807 | . = 0x4e20 |
801 | SET_SCRATCH0(r13) | 808 | b . /* Can't happen, see v2.07 Book III-S section 6.5 */ |
802 | EXCEPTION_PROLOG_0(PACA_EXGEN) | ||
803 | b h_instr_storage_relon_hv | ||
804 | 809 | ||
805 | . = 0x4e40 | 810 | . = 0x4e40 |
806 | SET_SCRATCH0(r13) | 811 | SET_SCRATCH0(r13) |
@@ -808,9 +813,7 @@ system_call_relon_pSeries: | |||
808 | b emulation_assist_relon_hv | 813 | b emulation_assist_relon_hv |
809 | 814 | ||
810 | . = 0x4e60 | 815 | . = 0x4e60 |
811 | SET_SCRATCH0(r13) | 816 | b . /* Can't happen, see v2.07 Book III-S section 6.5 */ |
812 | EXCEPTION_PROLOG_0(PACA_EXGEN) | ||
813 | b hmi_exception_relon_hv | ||
814 | 817 | ||
815 | . = 0x4e80 | 818 | . = 0x4e80 |
816 | SET_SCRATCH0(r13) | 819 | SET_SCRATCH0(r13) |
@@ -835,11 +838,17 @@ vsx_unavailable_relon_pSeries_1: | |||
835 | EXCEPTION_PROLOG_0(PACA_EXGEN) | 838 | EXCEPTION_PROLOG_0(PACA_EXGEN) |
836 | b vsx_unavailable_relon_pSeries | 839 | b vsx_unavailable_relon_pSeries |
837 | 840 | ||
838 | tm_unavailable_relon_pSeries_1: | 841 | facility_unavailable_relon_trampoline: |
839 | . = 0x4f60 | 842 | . = 0x4f60 |
840 | SET_SCRATCH0(r13) | 843 | SET_SCRATCH0(r13) |
841 | EXCEPTION_PROLOG_0(PACA_EXGEN) | 844 | EXCEPTION_PROLOG_0(PACA_EXGEN) |
842 | b tm_unavailable_relon_pSeries | 845 | b facility_unavailable_relon_pSeries |
846 | |||
847 | hv_facility_unavailable_relon_trampoline: | ||
848 | . = 0x4f80 | ||
849 | SET_SCRATCH0(r13) | ||
850 | EXCEPTION_PROLOG_0(PACA_EXGEN) | ||
851 | b facility_unavailable_relon_hv | ||
843 | 852 | ||
844 | STD_RELON_EXCEPTION_PSERIES(0x5300, 0x1300, instruction_breakpoint) | 853 | STD_RELON_EXCEPTION_PSERIES(0x5300, 0x1300, instruction_breakpoint) |
845 | #ifdef CONFIG_PPC_DENORMALISATION | 854 | #ifdef CONFIG_PPC_DENORMALISATION |
@@ -1165,36 +1174,21 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) | |||
1165 | bl .vsx_unavailable_exception | 1174 | bl .vsx_unavailable_exception |
1166 | b .ret_from_except | 1175 | b .ret_from_except |
1167 | 1176 | ||
1168 | .align 7 | 1177 | STD_EXCEPTION_COMMON(0xf60, facility_unavailable, .facility_unavailable_exception) |
1169 | .globl tm_unavailable_common | ||
1170 | tm_unavailable_common: | ||
1171 | EXCEPTION_PROLOG_COMMON(0xf60, PACA_EXGEN) | ||
1172 | bl .save_nvgprs | ||
1173 | DISABLE_INTS | ||
1174 | addi r3,r1,STACK_FRAME_OVERHEAD | ||
1175 | bl .tm_unavailable_exception | ||
1176 | b .ret_from_except | ||
1177 | 1178 | ||
1178 | .align 7 | 1179 | .align 7 |
1179 | .globl __end_handlers | 1180 | .globl __end_handlers |
1180 | __end_handlers: | 1181 | __end_handlers: |
1181 | 1182 | ||
1182 | /* Equivalents to the above handlers for relocation-on interrupt vectors */ | 1183 | /* Equivalents to the above handlers for relocation-on interrupt vectors */ |
1183 | STD_RELON_EXCEPTION_HV_OOL(0xe00, h_data_storage) | ||
1184 | KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe00) | ||
1185 | STD_RELON_EXCEPTION_HV_OOL(0xe20, h_instr_storage) | ||
1186 | KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe20) | ||
1187 | STD_RELON_EXCEPTION_HV_OOL(0xe40, emulation_assist) | 1184 | STD_RELON_EXCEPTION_HV_OOL(0xe40, emulation_assist) |
1188 | KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe40) | ||
1189 | STD_RELON_EXCEPTION_HV_OOL(0xe60, hmi_exception) | ||
1190 | KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe60) | ||
1191 | MASKABLE_RELON_EXCEPTION_HV_OOL(0xe80, h_doorbell) | 1185 | MASKABLE_RELON_EXCEPTION_HV_OOL(0xe80, h_doorbell) |
1192 | KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe80) | ||
1193 | 1186 | ||
1194 | STD_RELON_EXCEPTION_PSERIES_OOL(0xf00, performance_monitor) | 1187 | STD_RELON_EXCEPTION_PSERIES_OOL(0xf00, performance_monitor) |
1195 | STD_RELON_EXCEPTION_PSERIES_OOL(0xf20, altivec_unavailable) | 1188 | STD_RELON_EXCEPTION_PSERIES_OOL(0xf20, altivec_unavailable) |
1196 | STD_RELON_EXCEPTION_PSERIES_OOL(0xf40, vsx_unavailable) | 1189 | STD_RELON_EXCEPTION_PSERIES_OOL(0xf40, vsx_unavailable) |
1197 | STD_RELON_EXCEPTION_PSERIES_OOL(0xf60, tm_unavailable) | 1190 | STD_RELON_EXCEPTION_PSERIES_OOL(0xf60, facility_unavailable) |
1191 | STD_RELON_EXCEPTION_HV_OOL(0xf80, facility_unavailable) | ||
1198 | 1192 | ||
1199 | #if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) | 1193 | #if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) |
1200 | /* | 1194 | /* |
diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index a949bdfc9623..f0b47d1a6b0e 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c | |||
@@ -176,7 +176,7 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) | |||
176 | length_max = 512 ; /* 64 doublewords */ | 176 | length_max = 512 ; /* 64 doublewords */ |
177 | /* DAWR region can't cross 512 boundary */ | 177 | /* DAWR region can't cross 512 boundary */ |
178 | if ((bp->attr.bp_addr >> 10) != | 178 | if ((bp->attr.bp_addr >> 10) != |
179 | ((bp->attr.bp_addr + bp->attr.bp_len) >> 10)) | 179 | ((bp->attr.bp_addr + bp->attr.bp_len - 1) >> 10)) |
180 | return -EINVAL; | 180 | return -EINVAL; |
181 | } | 181 | } |
182 | if (info->len > | 182 | if (info->len > |
@@ -250,6 +250,7 @@ int __kprobes hw_breakpoint_handler(struct die_args *args) | |||
250 | * we still need to single-step the instruction, but we don't | 250 | * we still need to single-step the instruction, but we don't |
251 | * generate an event. | 251 | * generate an event. |
252 | */ | 252 | */ |
253 | info->type &= ~HW_BRK_TYPE_EXTRANEOUS_IRQ; | ||
253 | if (!((bp->attr.bp_addr <= dar) && | 254 | if (!((bp->attr.bp_addr <= dar) && |
254 | (dar - bp->attr.bp_addr < bp->attr.bp_len))) | 255 | (dar - bp->attr.bp_addr < bp->attr.bp_len))) |
255 | info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ; | 256 | info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ; |
diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c index 939ea7ef0dc8..d7216c9abda1 100644 --- a/arch/powerpc/kernel/idle.c +++ b/arch/powerpc/kernel/idle.c | |||
@@ -85,7 +85,7 @@ int powersave_nap; | |||
85 | /* | 85 | /* |
86 | * Register the sysctl to set/clear powersave_nap. | 86 | * Register the sysctl to set/clear powersave_nap. |
87 | */ | 87 | */ |
88 | static ctl_table powersave_nap_ctl_table[]={ | 88 | static struct ctl_table powersave_nap_ctl_table[] = { |
89 | { | 89 | { |
90 | .procname = "powersave-nap", | 90 | .procname = "powersave-nap", |
91 | .data = &powersave_nap, | 91 | .data = &powersave_nap, |
@@ -95,7 +95,7 @@ static ctl_table powersave_nap_ctl_table[]={ | |||
95 | }, | 95 | }, |
96 | {} | 96 | {} |
97 | }; | 97 | }; |
98 | static ctl_table powersave_nap_sysctl_root[] = { | 98 | static struct ctl_table powersave_nap_sysctl_root[] = { |
99 | { | 99 | { |
100 | .procname = "kernel", | 100 | .procname = "kernel", |
101 | .mode = 0555, | 101 | .mode = 0555, |
diff --git a/arch/powerpc/kernel/io-workarounds.c b/arch/powerpc/kernel/io-workarounds.c index 50e90b7e7139..fa0b54b2a362 100644 --- a/arch/powerpc/kernel/io-workarounds.c +++ b/arch/powerpc/kernel/io-workarounds.c | |||
@@ -55,6 +55,7 @@ static struct iowa_bus *iowa_pci_find(unsigned long vaddr, unsigned long paddr) | |||
55 | 55 | ||
56 | struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR addr) | 56 | struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR addr) |
57 | { | 57 | { |
58 | unsigned hugepage_shift; | ||
58 | struct iowa_bus *bus; | 59 | struct iowa_bus *bus; |
59 | int token; | 60 | int token; |
60 | 61 | ||
@@ -70,11 +71,17 @@ struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR addr) | |||
70 | if (vaddr < PHB_IO_BASE || vaddr >= PHB_IO_END) | 71 | if (vaddr < PHB_IO_BASE || vaddr >= PHB_IO_END) |
71 | return NULL; | 72 | return NULL; |
72 | 73 | ||
73 | ptep = find_linux_pte(init_mm.pgd, vaddr); | 74 | ptep = find_linux_pte_or_hugepte(init_mm.pgd, vaddr, |
75 | &hugepage_shift); | ||
74 | if (ptep == NULL) | 76 | if (ptep == NULL) |
75 | paddr = 0; | 77 | paddr = 0; |
76 | else | 78 | else { |
79 | /* | ||
80 | * we don't have hugepages backing iomem | ||
81 | */ | ||
82 | WARN_ON(hugepage_shift); | ||
77 | paddr = pte_pfn(*ptep) << PAGE_SHIFT; | 83 | paddr = pte_pfn(*ptep) << PAGE_SHIFT; |
84 | } | ||
78 | bus = iowa_pci_find(vaddr, paddr); | 85 | bus = iowa_pci_find(vaddr, paddr); |
79 | 86 | ||
80 | if (bus == NULL) | 87 | if (bus == NULL) |
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index c0d0dbddfba1..b20ff173a671 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c | |||
@@ -36,6 +36,8 @@ | |||
36 | #include <linux/hash.h> | 36 | #include <linux/hash.h> |
37 | #include <linux/fault-inject.h> | 37 | #include <linux/fault-inject.h> |
38 | #include <linux/pci.h> | 38 | #include <linux/pci.h> |
39 | #include <linux/iommu.h> | ||
40 | #include <linux/sched.h> | ||
39 | #include <asm/io.h> | 41 | #include <asm/io.h> |
40 | #include <asm/prom.h> | 42 | #include <asm/prom.h> |
41 | #include <asm/iommu.h> | 43 | #include <asm/iommu.h> |
@@ -44,6 +46,7 @@ | |||
44 | #include <asm/kdump.h> | 46 | #include <asm/kdump.h> |
45 | #include <asm/fadump.h> | 47 | #include <asm/fadump.h> |
46 | #include <asm/vio.h> | 48 | #include <asm/vio.h> |
49 | #include <asm/tce.h> | ||
47 | 50 | ||
48 | #define DBG(...) | 51 | #define DBG(...) |
49 | 52 | ||
@@ -724,6 +727,13 @@ void iommu_free_table(struct iommu_table *tbl, const char *node_name) | |||
724 | if (tbl->it_offset == 0) | 727 | if (tbl->it_offset == 0) |
725 | clear_bit(0, tbl->it_map); | 728 | clear_bit(0, tbl->it_map); |
726 | 729 | ||
730 | #ifdef CONFIG_IOMMU_API | ||
731 | if (tbl->it_group) { | ||
732 | iommu_group_put(tbl->it_group); | ||
733 | BUG_ON(tbl->it_group); | ||
734 | } | ||
735 | #endif | ||
736 | |||
727 | /* verify that table contains no entries */ | 737 | /* verify that table contains no entries */ |
728 | if (!bitmap_empty(tbl->it_map, tbl->it_size)) | 738 | if (!bitmap_empty(tbl->it_map, tbl->it_size)) |
729 | pr_warn("%s: Unexpected TCEs for %s\n", __func__, node_name); | 739 | pr_warn("%s: Unexpected TCEs for %s\n", __func__, node_name); |
@@ -860,3 +870,316 @@ void iommu_free_coherent(struct iommu_table *tbl, size_t size, | |||
860 | free_pages((unsigned long)vaddr, get_order(size)); | 870 | free_pages((unsigned long)vaddr, get_order(size)); |
861 | } | 871 | } |
862 | } | 872 | } |
873 | |||
874 | #ifdef CONFIG_IOMMU_API | ||
875 | /* | ||
876 | * SPAPR TCE API | ||
877 | */ | ||
878 | static void group_release(void *iommu_data) | ||
879 | { | ||
880 | struct iommu_table *tbl = iommu_data; | ||
881 | tbl->it_group = NULL; | ||
882 | } | ||
883 | |||
884 | void iommu_register_group(struct iommu_table *tbl, | ||
885 | int pci_domain_number, unsigned long pe_num) | ||
886 | { | ||
887 | struct iommu_group *grp; | ||
888 | char *name; | ||
889 | |||
890 | grp = iommu_group_alloc(); | ||
891 | if (IS_ERR(grp)) { | ||
892 | pr_warn("powerpc iommu api: cannot create new group, err=%ld\n", | ||
893 | PTR_ERR(grp)); | ||
894 | return; | ||
895 | } | ||
896 | tbl->it_group = grp; | ||
897 | iommu_group_set_iommudata(grp, tbl, group_release); | ||
898 | name = kasprintf(GFP_KERNEL, "domain%d-pe%lx", | ||
899 | pci_domain_number, pe_num); | ||
900 | if (!name) | ||
901 | return; | ||
902 | iommu_group_set_name(grp, name); | ||
903 | kfree(name); | ||
904 | } | ||
905 | |||
906 | enum dma_data_direction iommu_tce_direction(unsigned long tce) | ||
907 | { | ||
908 | if ((tce & TCE_PCI_READ) && (tce & TCE_PCI_WRITE)) | ||
909 | return DMA_BIDIRECTIONAL; | ||
910 | else if (tce & TCE_PCI_READ) | ||
911 | return DMA_TO_DEVICE; | ||
912 | else if (tce & TCE_PCI_WRITE) | ||
913 | return DMA_FROM_DEVICE; | ||
914 | else | ||
915 | return DMA_NONE; | ||
916 | } | ||
917 | EXPORT_SYMBOL_GPL(iommu_tce_direction); | ||
918 | |||
919 | void iommu_flush_tce(struct iommu_table *tbl) | ||
920 | { | ||
921 | /* Flush/invalidate TLB caches if necessary */ | ||
922 | if (ppc_md.tce_flush) | ||
923 | ppc_md.tce_flush(tbl); | ||
924 | |||
925 | /* Make sure updates are seen by hardware */ | ||
926 | mb(); | ||
927 | } | ||
928 | EXPORT_SYMBOL_GPL(iommu_flush_tce); | ||
929 | |||
930 | int iommu_tce_clear_param_check(struct iommu_table *tbl, | ||
931 | unsigned long ioba, unsigned long tce_value, | ||
932 | unsigned long npages) | ||
933 | { | ||
934 | /* ppc_md.tce_free() does not support any value but 0 */ | ||
935 | if (tce_value) | ||
936 | return -EINVAL; | ||
937 | |||
938 | if (ioba & ~IOMMU_PAGE_MASK) | ||
939 | return -EINVAL; | ||
940 | |||
941 | ioba >>= IOMMU_PAGE_SHIFT; | ||
942 | if (ioba < tbl->it_offset) | ||
943 | return -EINVAL; | ||
944 | |||
945 | if ((ioba + npages) > (tbl->it_offset + tbl->it_size)) | ||
946 | return -EINVAL; | ||
947 | |||
948 | return 0; | ||
949 | } | ||
950 | EXPORT_SYMBOL_GPL(iommu_tce_clear_param_check); | ||
951 | |||
952 | int iommu_tce_put_param_check(struct iommu_table *tbl, | ||
953 | unsigned long ioba, unsigned long tce) | ||
954 | { | ||
955 | if (!(tce & (TCE_PCI_WRITE | TCE_PCI_READ))) | ||
956 | return -EINVAL; | ||
957 | |||
958 | if (tce & ~(IOMMU_PAGE_MASK | TCE_PCI_WRITE | TCE_PCI_READ)) | ||
959 | return -EINVAL; | ||
960 | |||
961 | if (ioba & ~IOMMU_PAGE_MASK) | ||
962 | return -EINVAL; | ||
963 | |||
964 | ioba >>= IOMMU_PAGE_SHIFT; | ||
965 | if (ioba < tbl->it_offset) | ||
966 | return -EINVAL; | ||
967 | |||
968 | if ((ioba + 1) > (tbl->it_offset + tbl->it_size)) | ||
969 | return -EINVAL; | ||
970 | |||
971 | return 0; | ||
972 | } | ||
973 | EXPORT_SYMBOL_GPL(iommu_tce_put_param_check); | ||
974 | |||
975 | unsigned long iommu_clear_tce(struct iommu_table *tbl, unsigned long entry) | ||
976 | { | ||
977 | unsigned long oldtce; | ||
978 | struct iommu_pool *pool = get_pool(tbl, entry); | ||
979 | |||
980 | spin_lock(&(pool->lock)); | ||
981 | |||
982 | oldtce = ppc_md.tce_get(tbl, entry); | ||
983 | if (oldtce & (TCE_PCI_WRITE | TCE_PCI_READ)) | ||
984 | ppc_md.tce_free(tbl, entry, 1); | ||
985 | else | ||
986 | oldtce = 0; | ||
987 | |||
988 | spin_unlock(&(pool->lock)); | ||
989 | |||
990 | return oldtce; | ||
991 | } | ||
992 | EXPORT_SYMBOL_GPL(iommu_clear_tce); | ||
993 | |||
994 | int iommu_clear_tces_and_put_pages(struct iommu_table *tbl, | ||
995 | unsigned long entry, unsigned long pages) | ||
996 | { | ||
997 | unsigned long oldtce; | ||
998 | struct page *page; | ||
999 | |||
1000 | for ( ; pages; --pages, ++entry) { | ||
1001 | oldtce = iommu_clear_tce(tbl, entry); | ||
1002 | if (!oldtce) | ||
1003 | continue; | ||
1004 | |||
1005 | page = pfn_to_page(oldtce >> PAGE_SHIFT); | ||
1006 | WARN_ON(!page); | ||
1007 | if (page) { | ||
1008 | if (oldtce & TCE_PCI_WRITE) | ||
1009 | SetPageDirty(page); | ||
1010 | put_page(page); | ||
1011 | } | ||
1012 | } | ||
1013 | |||
1014 | return 0; | ||
1015 | } | ||
1016 | EXPORT_SYMBOL_GPL(iommu_clear_tces_and_put_pages); | ||
1017 | |||
1018 | /* | ||
1019 | * hwaddr is a kernel virtual address here (0xc... bazillion), | ||
1020 | * tce_build converts it to a physical address. | ||
1021 | */ | ||
1022 | int iommu_tce_build(struct iommu_table *tbl, unsigned long entry, | ||
1023 | unsigned long hwaddr, enum dma_data_direction direction) | ||
1024 | { | ||
1025 | int ret = -EBUSY; | ||
1026 | unsigned long oldtce; | ||
1027 | struct iommu_pool *pool = get_pool(tbl, entry); | ||
1028 | |||
1029 | spin_lock(&(pool->lock)); | ||
1030 | |||
1031 | oldtce = ppc_md.tce_get(tbl, entry); | ||
1032 | /* Add new entry if it is not busy */ | ||
1033 | if (!(oldtce & (TCE_PCI_WRITE | TCE_PCI_READ))) | ||
1034 | ret = ppc_md.tce_build(tbl, entry, 1, hwaddr, direction, NULL); | ||
1035 | |||
1036 | spin_unlock(&(pool->lock)); | ||
1037 | |||
1038 | /* if (unlikely(ret)) | ||
1039 | pr_err("iommu_tce: %s failed on hwaddr=%lx ioba=%lx kva=%lx ret=%d\n", | ||
1040 | __func__, hwaddr, entry << IOMMU_PAGE_SHIFT, | ||
1041 | hwaddr, ret); */ | ||
1042 | |||
1043 | return ret; | ||
1044 | } | ||
1045 | EXPORT_SYMBOL_GPL(iommu_tce_build); | ||
1046 | |||
1047 | int iommu_put_tce_user_mode(struct iommu_table *tbl, unsigned long entry, | ||
1048 | unsigned long tce) | ||
1049 | { | ||
1050 | int ret; | ||
1051 | struct page *page = NULL; | ||
1052 | unsigned long hwaddr, offset = tce & IOMMU_PAGE_MASK & ~PAGE_MASK; | ||
1053 | enum dma_data_direction direction = iommu_tce_direction(tce); | ||
1054 | |||
1055 | ret = get_user_pages_fast(tce & PAGE_MASK, 1, | ||
1056 | direction != DMA_TO_DEVICE, &page); | ||
1057 | if (unlikely(ret != 1)) { | ||
1058 | /* pr_err("iommu_tce: get_user_pages_fast failed tce=%lx ioba=%lx ret=%d\n", | ||
1059 | tce, entry << IOMMU_PAGE_SHIFT, ret); */ | ||
1060 | return -EFAULT; | ||
1061 | } | ||
1062 | hwaddr = (unsigned long) page_address(page) + offset; | ||
1063 | |||
1064 | ret = iommu_tce_build(tbl, entry, hwaddr, direction); | ||
1065 | if (ret) | ||
1066 | put_page(page); | ||
1067 | |||
1068 | if (ret < 0) | ||
1069 | pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%d\n", | ||
1070 | __func__, entry << IOMMU_PAGE_SHIFT, tce, ret); | ||
1071 | |||
1072 | return ret; | ||
1073 | } | ||
1074 | EXPORT_SYMBOL_GPL(iommu_put_tce_user_mode); | ||
1075 | |||
1076 | int iommu_take_ownership(struct iommu_table *tbl) | ||
1077 | { | ||
1078 | unsigned long sz = (tbl->it_size + 7) >> 3; | ||
1079 | |||
1080 | if (tbl->it_offset == 0) | ||
1081 | clear_bit(0, tbl->it_map); | ||
1082 | |||
1083 | if (!bitmap_empty(tbl->it_map, tbl->it_size)) { | ||
1084 | pr_err("iommu_tce: it_map is not empty"); | ||
1085 | return -EBUSY; | ||
1086 | } | ||
1087 | |||
1088 | memset(tbl->it_map, 0xff, sz); | ||
1089 | iommu_clear_tces_and_put_pages(tbl, tbl->it_offset, tbl->it_size); | ||
1090 | |||
1091 | return 0; | ||
1092 | } | ||
1093 | EXPORT_SYMBOL_GPL(iommu_take_ownership); | ||
1094 | |||
1095 | void iommu_release_ownership(struct iommu_table *tbl) | ||
1096 | { | ||
1097 | unsigned long sz = (tbl->it_size + 7) >> 3; | ||
1098 | |||
1099 | iommu_clear_tces_and_put_pages(tbl, tbl->it_offset, tbl->it_size); | ||
1100 | memset(tbl->it_map, 0, sz); | ||
1101 | |||
1102 | /* Restore bit#0 set by iommu_init_table() */ | ||
1103 | if (tbl->it_offset == 0) | ||
1104 | set_bit(0, tbl->it_map); | ||
1105 | } | ||
1106 | EXPORT_SYMBOL_GPL(iommu_release_ownership); | ||
1107 | |||
1108 | static int iommu_add_device(struct device *dev) | ||
1109 | { | ||
1110 | struct iommu_table *tbl; | ||
1111 | int ret = 0; | ||
1112 | |||
1113 | if (WARN_ON(dev->iommu_group)) { | ||
1114 | pr_warn("iommu_tce: device %s is already in iommu group %d, skipping\n", | ||
1115 | dev_name(dev), | ||
1116 | iommu_group_id(dev->iommu_group)); | ||
1117 | return -EBUSY; | ||
1118 | } | ||
1119 | |||
1120 | tbl = get_iommu_table_base(dev); | ||
1121 | if (!tbl || !tbl->it_group) { | ||
1122 | pr_debug("iommu_tce: skipping device %s with no tbl\n", | ||
1123 | dev_name(dev)); | ||
1124 | return 0; | ||
1125 | } | ||
1126 | |||
1127 | pr_debug("iommu_tce: adding %s to iommu group %d\n", | ||
1128 | dev_name(dev), iommu_group_id(tbl->it_group)); | ||
1129 | |||
1130 | ret = iommu_group_add_device(tbl->it_group, dev); | ||
1131 | if (ret < 0) | ||
1132 | pr_err("iommu_tce: %s has not been added, ret=%d\n", | ||
1133 | dev_name(dev), ret); | ||
1134 | |||
1135 | return ret; | ||
1136 | } | ||
1137 | |||
1138 | static void iommu_del_device(struct device *dev) | ||
1139 | { | ||
1140 | iommu_group_remove_device(dev); | ||
1141 | } | ||
1142 | |||
1143 | static int iommu_bus_notifier(struct notifier_block *nb, | ||
1144 | unsigned long action, void *data) | ||
1145 | { | ||
1146 | struct device *dev = data; | ||
1147 | |||
1148 | switch (action) { | ||
1149 | case BUS_NOTIFY_ADD_DEVICE: | ||
1150 | return iommu_add_device(dev); | ||
1151 | case BUS_NOTIFY_DEL_DEVICE: | ||
1152 | iommu_del_device(dev); | ||
1153 | return 0; | ||
1154 | default: | ||
1155 | return 0; | ||
1156 | } | ||
1157 | } | ||
1158 | |||
1159 | static struct notifier_block tce_iommu_bus_nb = { | ||
1160 | .notifier_call = iommu_bus_notifier, | ||
1161 | }; | ||
1162 | |||
1163 | static int __init tce_iommu_init(void) | ||
1164 | { | ||
1165 | struct pci_dev *pdev = NULL; | ||
1166 | |||
1167 | BUILD_BUG_ON(PAGE_SIZE < IOMMU_PAGE_SIZE); | ||
1168 | |||
1169 | for_each_pci_dev(pdev) | ||
1170 | iommu_add_device(&pdev->dev); | ||
1171 | |||
1172 | bus_register_notifier(&pci_bus_type, &tce_iommu_bus_nb); | ||
1173 | return 0; | ||
1174 | } | ||
1175 | |||
1176 | subsys_initcall_sync(tce_iommu_init); | ||
1177 | |||
1178 | #else | ||
1179 | |||
1180 | void iommu_register_group(struct iommu_table *tbl, | ||
1181 | int pci_domain_number, unsigned long pe_num) | ||
1182 | { | ||
1183 | } | ||
1184 | |||
1185 | #endif /* CONFIG_IOMMU_API */ | ||
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index ea185e0b3cae..2e51cde616d2 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c | |||
@@ -116,8 +116,6 @@ static inline notrace int decrementer_check_overflow(void) | |||
116 | u64 now = get_tb_or_rtc(); | 116 | u64 now = get_tb_or_rtc(); |
117 | u64 *next_tb = &__get_cpu_var(decrementers_next_tb); | 117 | u64 *next_tb = &__get_cpu_var(decrementers_next_tb); |
118 | 118 | ||
119 | if (now >= *next_tb) | ||
120 | set_dec(1); | ||
121 | return now >= *next_tb; | 119 | return now >= *next_tb; |
122 | } | 120 | } |
123 | 121 | ||
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 11f5b03a0b06..2156ea90eb54 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c | |||
@@ -36,12 +36,6 @@ | |||
36 | #include <asm/sstep.h> | 36 | #include <asm/sstep.h> |
37 | #include <asm/uaccess.h> | 37 | #include <asm/uaccess.h> |
38 | 38 | ||
39 | #ifdef CONFIG_PPC_ADV_DEBUG_REGS | ||
40 | #define MSR_SINGLESTEP (MSR_DE) | ||
41 | #else | ||
42 | #define MSR_SINGLESTEP (MSR_SE) | ||
43 | #endif | ||
44 | |||
45 | DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; | 39 | DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; |
46 | DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); | 40 | DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); |
47 | 41 | ||
@@ -104,19 +98,7 @@ void __kprobes arch_remove_kprobe(struct kprobe *p) | |||
104 | 98 | ||
105 | static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs) | 99 | static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs) |
106 | { | 100 | { |
107 | /* We turn off async exceptions to ensure that the single step will | 101 | enable_single_step(regs); |
108 | * be for the instruction we have the kprobe on, if we dont its | ||
109 | * possible we'd get the single step reported for an exception handler | ||
110 | * like Decrementer or External Interrupt */ | ||
111 | regs->msr &= ~MSR_EE; | ||
112 | regs->msr |= MSR_SINGLESTEP; | ||
113 | #ifdef CONFIG_PPC_ADV_DEBUG_REGS | ||
114 | regs->msr &= ~MSR_CE; | ||
115 | mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) | DBCR0_IC | DBCR0_IDM); | ||
116 | #ifdef CONFIG_PPC_47x | ||
117 | isync(); | ||
118 | #endif | ||
119 | #endif | ||
120 | 102 | ||
121 | /* | 103 | /* |
122 | * On powerpc we should single step on the original | 104 | * On powerpc we should single step on the original |
diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c index 48fbc2b97e95..8213ee1eb05a 100644 --- a/arch/powerpc/kernel/nvram_64.c +++ b/arch/powerpc/kernel/nvram_64.c | |||
@@ -84,22 +84,30 @@ static ssize_t dev_nvram_read(struct file *file, char __user *buf, | |||
84 | char *tmp = NULL; | 84 | char *tmp = NULL; |
85 | ssize_t size; | 85 | ssize_t size; |
86 | 86 | ||
87 | ret = -ENODEV; | 87 | if (!ppc_md.nvram_size) { |
88 | if (!ppc_md.nvram_size) | 88 | ret = -ENODEV; |
89 | goto out; | 89 | goto out; |
90 | } | ||
90 | 91 | ||
91 | ret = 0; | ||
92 | size = ppc_md.nvram_size(); | 92 | size = ppc_md.nvram_size(); |
93 | if (*ppos >= size || size < 0) | 93 | if (size < 0) { |
94 | ret = size; | ||
95 | goto out; | ||
96 | } | ||
97 | |||
98 | if (*ppos >= size) { | ||
99 | ret = 0; | ||
94 | goto out; | 100 | goto out; |
101 | } | ||
95 | 102 | ||
96 | count = min_t(size_t, count, size - *ppos); | 103 | count = min_t(size_t, count, size - *ppos); |
97 | count = min(count, PAGE_SIZE); | 104 | count = min(count, PAGE_SIZE); |
98 | 105 | ||
99 | ret = -ENOMEM; | ||
100 | tmp = kmalloc(count, GFP_KERNEL); | 106 | tmp = kmalloc(count, GFP_KERNEL); |
101 | if (!tmp) | 107 | if (!tmp) { |
108 | ret = -ENOMEM; | ||
102 | goto out; | 109 | goto out; |
110 | } | ||
103 | 111 | ||
104 | ret = ppc_md.nvram_read(tmp, count, ppos); | 112 | ret = ppc_md.nvram_read(tmp, count, ppos); |
105 | if (ret <= 0) | 113 | if (ret <= 0) |
diff --git a/arch/powerpc/kernel/pci-hotplug.c b/arch/powerpc/kernel/pci-hotplug.c new file mode 100644 index 000000000000..3f608800c06b --- /dev/null +++ b/arch/powerpc/kernel/pci-hotplug.c | |||
@@ -0,0 +1,111 @@ | |||
1 | /* | ||
2 | * Derived from "arch/powerpc/platforms/pseries/pci_dlpar.c" | ||
3 | * | ||
4 | * Copyright (C) 2003 Linda Xie <lxie@us.ibm.com> | ||
5 | * Copyright (C) 2005 International Business Machines | ||
6 | * | ||
7 | * Updates, 2005, John Rose <johnrose@austin.ibm.com> | ||
8 | * Updates, 2005, Linas Vepstas <linas@austin.ibm.com> | ||
9 | * Updates, 2013, Gavin Shan <shangw@linux.vnet.ibm.com> | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or modify | ||
12 | * it under the terms of the GNU General Public License as published by | ||
13 | * the Free Software Foundation; either version 2 of the License, or | ||
14 | * (at your option) any later version. | ||
15 | */ | ||
16 | |||
17 | #include <linux/pci.h> | ||
18 | #include <linux/export.h> | ||
19 | #include <asm/pci-bridge.h> | ||
20 | #include <asm/ppc-pci.h> | ||
21 | #include <asm/firmware.h> | ||
22 | #include <asm/eeh.h> | ||
23 | |||
24 | /** | ||
25 | * __pcibios_remove_pci_devices - remove all devices under this bus | ||
26 | * @bus: the indicated PCI bus | ||
27 | * @purge_pe: destroy the PE on removal of PCI devices | ||
28 | * | ||
29 | * Remove all of the PCI devices under this bus both from the | ||
30 | * linux pci device tree, and from the powerpc EEH address cache. | ||
31 | * By default, the corresponding PE will be destroied during the | ||
32 | * normal PCI hotplug path. For PCI hotplug during EEH recovery, | ||
33 | * the corresponding PE won't be destroied and deallocated. | ||
34 | */ | ||
35 | void __pcibios_remove_pci_devices(struct pci_bus *bus, int purge_pe) | ||
36 | { | ||
37 | struct pci_dev *dev, *tmp; | ||
38 | struct pci_bus *child_bus; | ||
39 | |||
40 | /* First go down child busses */ | ||
41 | list_for_each_entry(child_bus, &bus->children, node) | ||
42 | __pcibios_remove_pci_devices(child_bus, purge_pe); | ||
43 | |||
44 | pr_debug("PCI: Removing devices on bus %04x:%02x\n", | ||
45 | pci_domain_nr(bus), bus->number); | ||
46 | list_for_each_entry_safe(dev, tmp, &bus->devices, bus_list) { | ||
47 | pr_debug(" * Removing %s...\n", pci_name(dev)); | ||
48 | eeh_remove_bus_device(dev, purge_pe); | ||
49 | pci_stop_and_remove_bus_device(dev); | ||
50 | } | ||
51 | } | ||
52 | |||
53 | /** | ||
54 | * pcibios_remove_pci_devices - remove all devices under this bus | ||
55 | * @bus: the indicated PCI bus | ||
56 | * | ||
57 | * Remove all of the PCI devices under this bus both from the | ||
58 | * linux pci device tree, and from the powerpc EEH address cache. | ||
59 | */ | ||
60 | void pcibios_remove_pci_devices(struct pci_bus *bus) | ||
61 | { | ||
62 | __pcibios_remove_pci_devices(bus, 1); | ||
63 | } | ||
64 | EXPORT_SYMBOL_GPL(pcibios_remove_pci_devices); | ||
65 | |||
66 | /** | ||
67 | * pcibios_add_pci_devices - adds new pci devices to bus | ||
68 | * @bus: the indicated PCI bus | ||
69 | * | ||
70 | * This routine will find and fixup new pci devices under | ||
71 | * the indicated bus. This routine presumes that there | ||
72 | * might already be some devices under this bridge, so | ||
73 | * it carefully tries to add only new devices. (And that | ||
74 | * is how this routine differs from other, similar pcibios | ||
75 | * routines.) | ||
76 | */ | ||
77 | void pcibios_add_pci_devices(struct pci_bus * bus) | ||
78 | { | ||
79 | int slotno, num, mode, pass, max; | ||
80 | struct pci_dev *dev; | ||
81 | struct device_node *dn = pci_bus_to_OF_node(bus); | ||
82 | |||
83 | eeh_add_device_tree_early(dn); | ||
84 | |||
85 | mode = PCI_PROBE_NORMAL; | ||
86 | if (ppc_md.pci_probe_mode) | ||
87 | mode = ppc_md.pci_probe_mode(bus); | ||
88 | |||
89 | if (mode == PCI_PROBE_DEVTREE) { | ||
90 | /* use ofdt-based probe */ | ||
91 | of_rescan_bus(dn, bus); | ||
92 | } else if (mode == PCI_PROBE_NORMAL) { | ||
93 | /* use legacy probe */ | ||
94 | slotno = PCI_SLOT(PCI_DN(dn->child)->devfn); | ||
95 | num = pci_scan_slot(bus, PCI_DEVFN(slotno, 0)); | ||
96 | if (!num) | ||
97 | return; | ||
98 | pcibios_setup_bus_devices(bus); | ||
99 | max = bus->busn_res.start; | ||
100 | for (pass = 0; pass < 2; pass++) { | ||
101 | list_for_each_entry(dev, &bus->devices, bus_list) { | ||
102 | if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE || | ||
103 | dev->hdr_type == PCI_HEADER_TYPE_CARDBUS) | ||
104 | max = pci_scan_bridge(bus, dev, | ||
105 | max, pass); | ||
106 | } | ||
107 | } | ||
108 | } | ||
109 | pcibios_finish_adding_to_bus(bus); | ||
110 | } | ||
111 | EXPORT_SYMBOL_GPL(pcibios_add_pci_devices); | ||
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 076d1242507a..c517dbe705fd 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c | |||
@@ -916,7 +916,11 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) | |||
916 | flush_altivec_to_thread(src); | 916 | flush_altivec_to_thread(src); |
917 | flush_vsx_to_thread(src); | 917 | flush_vsx_to_thread(src); |
918 | flush_spe_to_thread(src); | 918 | flush_spe_to_thread(src); |
919 | |||
919 | *dst = *src; | 920 | *dst = *src; |
921 | |||
922 | clear_task_ebb(dst); | ||
923 | |||
920 | return 0; | 924 | return 0; |
921 | } | 925 | } |
922 | 926 | ||
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 8b6f7a99cce2..eb23ac92abb9 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c | |||
@@ -559,6 +559,35 @@ void __init early_init_dt_setup_initrd_arch(unsigned long start, | |||
559 | } | 559 | } |
560 | #endif | 560 | #endif |
561 | 561 | ||
562 | static void __init early_reserve_mem_dt(void) | ||
563 | { | ||
564 | unsigned long i, len, dt_root; | ||
565 | const __be32 *prop; | ||
566 | |||
567 | dt_root = of_get_flat_dt_root(); | ||
568 | |||
569 | prop = of_get_flat_dt_prop(dt_root, "reserved-ranges", &len); | ||
570 | |||
571 | if (!prop) | ||
572 | return; | ||
573 | |||
574 | DBG("Found new-style reserved-ranges\n"); | ||
575 | |||
576 | /* Each reserved range is an (address,size) pair, 2 cells each, | ||
577 | * totalling 4 cells per range. */ | ||
578 | for (i = 0; i < len / (sizeof(*prop) * 4); i++) { | ||
579 | u64 base, size; | ||
580 | |||
581 | base = of_read_number(prop + (i * 4) + 0, 2); | ||
582 | size = of_read_number(prop + (i * 4) + 2, 2); | ||
583 | |||
584 | if (size) { | ||
585 | DBG("reserving: %llx -> %llx\n", base, size); | ||
586 | memblock_reserve(base, size); | ||
587 | } | ||
588 | } | ||
589 | } | ||
590 | |||
562 | static void __init early_reserve_mem(void) | 591 | static void __init early_reserve_mem(void) |
563 | { | 592 | { |
564 | u64 base, size; | 593 | u64 base, size; |
@@ -574,12 +603,16 @@ static void __init early_reserve_mem(void) | |||
574 | self_size = initial_boot_params->totalsize; | 603 | self_size = initial_boot_params->totalsize; |
575 | memblock_reserve(self_base, self_size); | 604 | memblock_reserve(self_base, self_size); |
576 | 605 | ||
606 | /* Look for the new "reserved-regions" property in the DT */ | ||
607 | early_reserve_mem_dt(); | ||
608 | |||
577 | #ifdef CONFIG_BLK_DEV_INITRD | 609 | #ifdef CONFIG_BLK_DEV_INITRD |
578 | /* then reserve the initrd, if any */ | 610 | /* Then reserve the initrd, if any */ |
579 | if (initrd_start && (initrd_end > initrd_start)) | 611 | if (initrd_start && (initrd_end > initrd_start)) { |
580 | memblock_reserve(_ALIGN_DOWN(__pa(initrd_start), PAGE_SIZE), | 612 | memblock_reserve(_ALIGN_DOWN(__pa(initrd_start), PAGE_SIZE), |
581 | _ALIGN_UP(initrd_end, PAGE_SIZE) - | 613 | _ALIGN_UP(initrd_end, PAGE_SIZE) - |
582 | _ALIGN_DOWN(initrd_start, PAGE_SIZE)); | 614 | _ALIGN_DOWN(initrd_start, PAGE_SIZE)); |
615 | } | ||
583 | #endif /* CONFIG_BLK_DEV_INITRD */ | 616 | #endif /* CONFIG_BLK_DEV_INITRD */ |
584 | 617 | ||
585 | #ifdef CONFIG_PPC32 | 618 | #ifdef CONFIG_PPC32 |
@@ -591,6 +624,8 @@ static void __init early_reserve_mem(void) | |||
591 | u32 base_32, size_32; | 624 | u32 base_32, size_32; |
592 | u32 *reserve_map_32 = (u32 *)reserve_map; | 625 | u32 *reserve_map_32 = (u32 *)reserve_map; |
593 | 626 | ||
627 | DBG("Found old 32-bit reserve map\n"); | ||
628 | |||
594 | while (1) { | 629 | while (1) { |
595 | base_32 = *(reserve_map_32++); | 630 | base_32 = *(reserve_map_32++); |
596 | size_32 = *(reserve_map_32++); | 631 | size_32 = *(reserve_map_32++); |
@@ -605,6 +640,9 @@ static void __init early_reserve_mem(void) | |||
605 | return; | 640 | return; |
606 | } | 641 | } |
607 | #endif | 642 | #endif |
643 | DBG("Processing reserve map\n"); | ||
644 | |||
645 | /* Handle the reserve map in the fdt blob if it exists */ | ||
608 | while (1) { | 646 | while (1) { |
609 | base = *(reserve_map++); | 647 | base = *(reserve_map++); |
610 | size = *(reserve_map++); | 648 | size = *(reserve_map++); |
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index 98c2fc198712..64f7bd5b1b0f 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c | |||
@@ -1449,7 +1449,9 @@ static long ppc_set_hwdebug(struct task_struct *child, | |||
1449 | */ | 1449 | */ |
1450 | if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE) { | 1450 | if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE) { |
1451 | len = bp_info->addr2 - bp_info->addr; | 1451 | len = bp_info->addr2 - bp_info->addr; |
1452 | } else if (bp_info->addr_mode != PPC_BREAKPOINT_MODE_EXACT) { | 1452 | } else if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_EXACT) |
1453 | len = 1; | ||
1454 | else { | ||
1453 | ptrace_put_breakpoints(child); | 1455 | ptrace_put_breakpoints(child); |
1454 | return -EINVAL; | 1456 | return -EINVAL; |
1455 | } | 1457 | } |
diff --git a/arch/powerpc/kernel/reloc_32.S b/arch/powerpc/kernel/reloc_32.S index ef46ba6e094f..f366fedb0872 100644 --- a/arch/powerpc/kernel/reloc_32.S +++ b/arch/powerpc/kernel/reloc_32.S | |||
@@ -166,7 +166,7 @@ ha16: | |||
166 | /* R_PPC_ADDR16_LO */ | 166 | /* R_PPC_ADDR16_LO */ |
167 | lo16: | 167 | lo16: |
168 | cmpwi r4, R_PPC_ADDR16_LO | 168 | cmpwi r4, R_PPC_ADDR16_LO |
169 | bne nxtrela | 169 | bne unknown_type |
170 | lwz r4, 0(r9) /* r_offset */ | 170 | lwz r4, 0(r9) /* r_offset */ |
171 | lwz r0, 8(r9) /* r_addend */ | 171 | lwz r0, 8(r9) /* r_addend */ |
172 | add r0, r0, r3 | 172 | add r0, r0, r3 |
@@ -191,6 +191,7 @@ nxtrela: | |||
191 | dcbst r4,r7 | 191 | dcbst r4,r7 |
192 | sync /* Ensure the data is flushed before icbi */ | 192 | sync /* Ensure the data is flushed before icbi */ |
193 | icbi r4,r7 | 193 | icbi r4,r7 |
194 | unknown_type: | ||
194 | cmpwi r8, 0 /* relasz = 0 ? */ | 195 | cmpwi r8, 0 /* relasz = 0 ? */ |
195 | ble done | 196 | ble done |
196 | add r9, r9, r6 /* move to next entry in the .rela table */ | 197 | add r9, r9, r6 /* move to next entry in the .rela table */ |
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 52add6f3e201..80b5ef403f68 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c | |||
@@ -1172,7 +1172,7 @@ int __init early_init_dt_scan_rtas(unsigned long node, | |||
1172 | static arch_spinlock_t timebase_lock; | 1172 | static arch_spinlock_t timebase_lock; |
1173 | static u64 timebase = 0; | 1173 | static u64 timebase = 0; |
1174 | 1174 | ||
1175 | void __cpuinit rtas_give_timebase(void) | 1175 | void rtas_give_timebase(void) |
1176 | { | 1176 | { |
1177 | unsigned long flags; | 1177 | unsigned long flags; |
1178 | 1178 | ||
@@ -1189,7 +1189,7 @@ void __cpuinit rtas_give_timebase(void) | |||
1189 | local_irq_restore(flags); | 1189 | local_irq_restore(flags); |
1190 | } | 1190 | } |
1191 | 1191 | ||
1192 | void __cpuinit rtas_take_timebase(void) | 1192 | void rtas_take_timebase(void) |
1193 | { | 1193 | { |
1194 | while (!timebase) | 1194 | while (!timebase) |
1195 | barrier(); | 1195 | barrier(); |
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index e379d3fd1694..389fb8077cc9 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c | |||
@@ -76,7 +76,7 @@ | |||
76 | #endif | 76 | #endif |
77 | 77 | ||
78 | int boot_cpuid = 0; | 78 | int boot_cpuid = 0; |
79 | int __initdata spinning_secondaries; | 79 | int spinning_secondaries; |
80 | u64 ppc64_pft_size; | 80 | u64 ppc64_pft_size; |
81 | 81 | ||
82 | /* Pick defaults since we might want to patch instructions | 82 | /* Pick defaults since we might want to patch instructions |
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 201385c3a1ae..0f83122e6676 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c | |||
@@ -407,7 +407,8 @@ inline unsigned long copy_transact_fpr_from_user(struct task_struct *task, | |||
407 | * altivec/spe instructions at some point. | 407 | * altivec/spe instructions at some point. |
408 | */ | 408 | */ |
409 | static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame, | 409 | static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame, |
410 | int sigret, int ctx_has_vsx_region) | 410 | struct mcontext __user *tm_frame, int sigret, |
411 | int ctx_has_vsx_region) | ||
411 | { | 412 | { |
412 | unsigned long msr = regs->msr; | 413 | unsigned long msr = regs->msr; |
413 | 414 | ||
@@ -475,6 +476,12 @@ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame, | |||
475 | 476 | ||
476 | if (__put_user(msr, &frame->mc_gregs[PT_MSR])) | 477 | if (__put_user(msr, &frame->mc_gregs[PT_MSR])) |
477 | return 1; | 478 | return 1; |
479 | /* We need to write 0 the MSR top 32 bits in the tm frame so that we | ||
480 | * can check it on the restore to see if TM is active | ||
481 | */ | ||
482 | if (tm_frame && __put_user(0, &tm_frame->mc_gregs[PT_MSR])) | ||
483 | return 1; | ||
484 | |||
478 | if (sigret) { | 485 | if (sigret) { |
479 | /* Set up the sigreturn trampoline: li r0,sigret; sc */ | 486 | /* Set up the sigreturn trampoline: li r0,sigret; sc */ |
480 | if (__put_user(0x38000000UL + sigret, &frame->tramp[0]) | 487 | if (__put_user(0x38000000UL + sigret, &frame->tramp[0]) |
@@ -747,7 +754,7 @@ static long restore_tm_user_regs(struct pt_regs *regs, | |||
747 | struct mcontext __user *tm_sr) | 754 | struct mcontext __user *tm_sr) |
748 | { | 755 | { |
749 | long err; | 756 | long err; |
750 | unsigned long msr; | 757 | unsigned long msr, msr_hi; |
751 | #ifdef CONFIG_VSX | 758 | #ifdef CONFIG_VSX |
752 | int i; | 759 | int i; |
753 | #endif | 760 | #endif |
@@ -852,8 +859,11 @@ static long restore_tm_user_regs(struct pt_regs *regs, | |||
852 | tm_enable(); | 859 | tm_enable(); |
853 | /* This loads the checkpointed FP/VEC state, if used */ | 860 | /* This loads the checkpointed FP/VEC state, if used */ |
854 | tm_recheckpoint(¤t->thread, msr); | 861 | tm_recheckpoint(¤t->thread, msr); |
855 | /* The task has moved into TM state S, so ensure MSR reflects this */ | 862 | /* Get the top half of the MSR */ |
856 | regs->msr = (regs->msr & ~MSR_TS_MASK) | MSR_TS_S; | 863 | if (__get_user(msr_hi, &tm_sr->mc_gregs[PT_MSR])) |
864 | return 1; | ||
865 | /* Pull in MSR TM from user context */ | ||
866 | regs->msr = (regs->msr & ~MSR_TS_MASK) | ((msr_hi<<32) & MSR_TS_MASK); | ||
857 | 867 | ||
858 | /* This loads the speculative FP/VEC state, if used */ | 868 | /* This loads the speculative FP/VEC state, if used */ |
859 | if (msr & MSR_FP) { | 869 | if (msr & MSR_FP) { |
@@ -952,6 +962,7 @@ int handle_rt_signal32(unsigned long sig, struct k_sigaction *ka, | |||
952 | { | 962 | { |
953 | struct rt_sigframe __user *rt_sf; | 963 | struct rt_sigframe __user *rt_sf; |
954 | struct mcontext __user *frame; | 964 | struct mcontext __user *frame; |
965 | struct mcontext __user *tm_frame = NULL; | ||
955 | void __user *addr; | 966 | void __user *addr; |
956 | unsigned long newsp = 0; | 967 | unsigned long newsp = 0; |
957 | int sigret; | 968 | int sigret; |
@@ -985,23 +996,24 @@ int handle_rt_signal32(unsigned long sig, struct k_sigaction *ka, | |||
985 | } | 996 | } |
986 | 997 | ||
987 | #ifdef CONFIG_PPC_TRANSACTIONAL_MEM | 998 | #ifdef CONFIG_PPC_TRANSACTIONAL_MEM |
999 | tm_frame = &rt_sf->uc_transact.uc_mcontext; | ||
988 | if (MSR_TM_ACTIVE(regs->msr)) { | 1000 | if (MSR_TM_ACTIVE(regs->msr)) { |
989 | if (save_tm_user_regs(regs, &rt_sf->uc.uc_mcontext, | 1001 | if (save_tm_user_regs(regs, frame, tm_frame, sigret)) |
990 | &rt_sf->uc_transact.uc_mcontext, sigret)) | ||
991 | goto badframe; | 1002 | goto badframe; |
992 | } | 1003 | } |
993 | else | 1004 | else |
994 | #endif | 1005 | #endif |
995 | if (save_user_regs(regs, frame, sigret, 1)) | 1006 | { |
1007 | if (save_user_regs(regs, frame, tm_frame, sigret, 1)) | ||
996 | goto badframe; | 1008 | goto badframe; |
1009 | } | ||
997 | regs->link = tramp; | 1010 | regs->link = tramp; |
998 | 1011 | ||
999 | #ifdef CONFIG_PPC_TRANSACTIONAL_MEM | 1012 | #ifdef CONFIG_PPC_TRANSACTIONAL_MEM |
1000 | if (MSR_TM_ACTIVE(regs->msr)) { | 1013 | if (MSR_TM_ACTIVE(regs->msr)) { |
1001 | if (__put_user((unsigned long)&rt_sf->uc_transact, | 1014 | if (__put_user((unsigned long)&rt_sf->uc_transact, |
1002 | &rt_sf->uc.uc_link) | 1015 | &rt_sf->uc.uc_link) |
1003 | || __put_user(to_user_ptr(&rt_sf->uc_transact.uc_mcontext), | 1016 | || __put_user((unsigned long)tm_frame, &rt_sf->uc_transact.uc_regs)) |
1004 | &rt_sf->uc_transact.uc_regs)) | ||
1005 | goto badframe; | 1017 | goto badframe; |
1006 | } | 1018 | } |
1007 | else | 1019 | else |
@@ -1170,7 +1182,7 @@ long sys_swapcontext(struct ucontext __user *old_ctx, | |||
1170 | mctx = (struct mcontext __user *) | 1182 | mctx = (struct mcontext __user *) |
1171 | ((unsigned long) &old_ctx->uc_mcontext & ~0xfUL); | 1183 | ((unsigned long) &old_ctx->uc_mcontext & ~0xfUL); |
1172 | if (!access_ok(VERIFY_WRITE, old_ctx, ctx_size) | 1184 | if (!access_ok(VERIFY_WRITE, old_ctx, ctx_size) |
1173 | || save_user_regs(regs, mctx, 0, ctx_has_vsx_region) | 1185 | || save_user_regs(regs, mctx, NULL, 0, ctx_has_vsx_region) |
1174 | || put_sigset_t(&old_ctx->uc_sigmask, ¤t->blocked) | 1186 | || put_sigset_t(&old_ctx->uc_sigmask, ¤t->blocked) |
1175 | || __put_user(to_user_ptr(mctx), &old_ctx->uc_regs)) | 1187 | || __put_user(to_user_ptr(mctx), &old_ctx->uc_regs)) |
1176 | return -EFAULT; | 1188 | return -EFAULT; |
@@ -1233,7 +1245,7 @@ long sys_rt_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8, | |||
1233 | if (__get_user(msr_hi, &mcp->mc_gregs[PT_MSR])) | 1245 | if (__get_user(msr_hi, &mcp->mc_gregs[PT_MSR])) |
1234 | goto bad; | 1246 | goto bad; |
1235 | 1247 | ||
1236 | if (MSR_TM_SUSPENDED(msr_hi<<32)) { | 1248 | if (MSR_TM_ACTIVE(msr_hi<<32)) { |
1237 | /* We only recheckpoint on return if we're | 1249 | /* We only recheckpoint on return if we're |
1238 | * transaction. | 1250 | * transaction. |
1239 | */ | 1251 | */ |
@@ -1392,6 +1404,7 @@ int handle_signal32(unsigned long sig, struct k_sigaction *ka, | |||
1392 | { | 1404 | { |
1393 | struct sigcontext __user *sc; | 1405 | struct sigcontext __user *sc; |
1394 | struct sigframe __user *frame; | 1406 | struct sigframe __user *frame; |
1407 | struct mcontext __user *tm_mctx = NULL; | ||
1395 | unsigned long newsp = 0; | 1408 | unsigned long newsp = 0; |
1396 | int sigret; | 1409 | int sigret; |
1397 | unsigned long tramp; | 1410 | unsigned long tramp; |
@@ -1425,6 +1438,7 @@ int handle_signal32(unsigned long sig, struct k_sigaction *ka, | |||
1425 | } | 1438 | } |
1426 | 1439 | ||
1427 | #ifdef CONFIG_PPC_TRANSACTIONAL_MEM | 1440 | #ifdef CONFIG_PPC_TRANSACTIONAL_MEM |
1441 | tm_mctx = &frame->mctx_transact; | ||
1428 | if (MSR_TM_ACTIVE(regs->msr)) { | 1442 | if (MSR_TM_ACTIVE(regs->msr)) { |
1429 | if (save_tm_user_regs(regs, &frame->mctx, &frame->mctx_transact, | 1443 | if (save_tm_user_regs(regs, &frame->mctx, &frame->mctx_transact, |
1430 | sigret)) | 1444 | sigret)) |
@@ -1432,8 +1446,10 @@ int handle_signal32(unsigned long sig, struct k_sigaction *ka, | |||
1432 | } | 1446 | } |
1433 | else | 1447 | else |
1434 | #endif | 1448 | #endif |
1435 | if (save_user_regs(regs, &frame->mctx, sigret, 1)) | 1449 | { |
1450 | if (save_user_regs(regs, &frame->mctx, tm_mctx, sigret, 1)) | ||
1436 | goto badframe; | 1451 | goto badframe; |
1452 | } | ||
1437 | 1453 | ||
1438 | regs->link = tramp; | 1454 | regs->link = tramp; |
1439 | 1455 | ||
@@ -1481,16 +1497,22 @@ badframe: | |||
1481 | long sys_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8, | 1497 | long sys_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8, |
1482 | struct pt_regs *regs) | 1498 | struct pt_regs *regs) |
1483 | { | 1499 | { |
1500 | struct sigframe __user *sf; | ||
1484 | struct sigcontext __user *sc; | 1501 | struct sigcontext __user *sc; |
1485 | struct sigcontext sigctx; | 1502 | struct sigcontext sigctx; |
1486 | struct mcontext __user *sr; | 1503 | struct mcontext __user *sr; |
1487 | void __user *addr; | 1504 | void __user *addr; |
1488 | sigset_t set; | 1505 | sigset_t set; |
1506 | #ifdef CONFIG_PPC_TRANSACTIONAL_MEM | ||
1507 | struct mcontext __user *mcp, *tm_mcp; | ||
1508 | unsigned long msr_hi; | ||
1509 | #endif | ||
1489 | 1510 | ||
1490 | /* Always make any pending restarted system calls return -EINTR */ | 1511 | /* Always make any pending restarted system calls return -EINTR */ |
1491 | current_thread_info()->restart_block.fn = do_no_restart_syscall; | 1512 | current_thread_info()->restart_block.fn = do_no_restart_syscall; |
1492 | 1513 | ||
1493 | sc = (struct sigcontext __user *)(regs->gpr[1] + __SIGNAL_FRAMESIZE); | 1514 | sf = (struct sigframe __user *)(regs->gpr[1] + __SIGNAL_FRAMESIZE); |
1515 | sc = &sf->sctx; | ||
1494 | addr = sc; | 1516 | addr = sc; |
1495 | if (copy_from_user(&sigctx, sc, sizeof(sigctx))) | 1517 | if (copy_from_user(&sigctx, sc, sizeof(sigctx))) |
1496 | goto badframe; | 1518 | goto badframe; |
@@ -1507,11 +1529,25 @@ long sys_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8, | |||
1507 | #endif | 1529 | #endif |
1508 | set_current_blocked(&set); | 1530 | set_current_blocked(&set); |
1509 | 1531 | ||
1510 | sr = (struct mcontext __user *)from_user_ptr(sigctx.regs); | 1532 | #ifdef CONFIG_PPC_TRANSACTIONAL_MEM |
1511 | addr = sr; | 1533 | mcp = (struct mcontext __user *)&sf->mctx; |
1512 | if (!access_ok(VERIFY_READ, sr, sizeof(*sr)) | 1534 | tm_mcp = (struct mcontext __user *)&sf->mctx_transact; |
1513 | || restore_user_regs(regs, sr, 1)) | 1535 | if (__get_user(msr_hi, &tm_mcp->mc_gregs[PT_MSR])) |
1514 | goto badframe; | 1536 | goto badframe; |
1537 | if (MSR_TM_ACTIVE(msr_hi<<32)) { | ||
1538 | if (!cpu_has_feature(CPU_FTR_TM)) | ||
1539 | goto badframe; | ||
1540 | if (restore_tm_user_regs(regs, mcp, tm_mcp)) | ||
1541 | goto badframe; | ||
1542 | } else | ||
1543 | #endif | ||
1544 | { | ||
1545 | sr = (struct mcontext __user *)from_user_ptr(sigctx.regs); | ||
1546 | addr = sr; | ||
1547 | if (!access_ok(VERIFY_READ, sr, sizeof(*sr)) | ||
1548 | || restore_user_regs(regs, sr, 1)) | ||
1549 | goto badframe; | ||
1550 | } | ||
1515 | 1551 | ||
1516 | set_thread_flag(TIF_RESTOREALL); | 1552 | set_thread_flag(TIF_RESTOREALL); |
1517 | return 0; | 1553 | return 0; |
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 345947367ec0..887e99d85bc2 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c | |||
@@ -410,6 +410,10 @@ static long restore_tm_sigcontexts(struct pt_regs *regs, | |||
410 | 410 | ||
411 | /* get MSR separately, transfer the LE bit if doing signal return */ | 411 | /* get MSR separately, transfer the LE bit if doing signal return */ |
412 | err |= __get_user(msr, &sc->gp_regs[PT_MSR]); | 412 | err |= __get_user(msr, &sc->gp_regs[PT_MSR]); |
413 | /* pull in MSR TM from user context */ | ||
414 | regs->msr = (regs->msr & ~MSR_TS_MASK) | (msr & MSR_TS_MASK); | ||
415 | |||
416 | /* pull in MSR LE from user context */ | ||
413 | regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE); | 417 | regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE); |
414 | 418 | ||
415 | /* The following non-GPR non-FPR non-VR state is also checkpointed: */ | 419 | /* The following non-GPR non-FPR non-VR state is also checkpointed: */ |
@@ -505,8 +509,6 @@ static long restore_tm_sigcontexts(struct pt_regs *regs, | |||
505 | tm_enable(); | 509 | tm_enable(); |
506 | /* This loads the checkpointed FP/VEC state, if used */ | 510 | /* This loads the checkpointed FP/VEC state, if used */ |
507 | tm_recheckpoint(¤t->thread, msr); | 511 | tm_recheckpoint(¤t->thread, msr); |
508 | /* The task has moved into TM state S, so ensure MSR reflects this: */ | ||
509 | regs->msr = (regs->msr & ~MSR_TS_MASK) | __MASK(33); | ||
510 | 512 | ||
511 | /* This loads the speculative FP/VEC state, if used */ | 513 | /* This loads the speculative FP/VEC state, if used */ |
512 | if (msr & MSR_FP) { | 514 | if (msr & MSR_FP) { |
@@ -654,7 +656,7 @@ int sys_rt_sigreturn(unsigned long r3, unsigned long r4, unsigned long r5, | |||
654 | #ifdef CONFIG_PPC_TRANSACTIONAL_MEM | 656 | #ifdef CONFIG_PPC_TRANSACTIONAL_MEM |
655 | if (__get_user(msr, &uc->uc_mcontext.gp_regs[PT_MSR])) | 657 | if (__get_user(msr, &uc->uc_mcontext.gp_regs[PT_MSR])) |
656 | goto badframe; | 658 | goto badframe; |
657 | if (MSR_TM_SUSPENDED(msr)) { | 659 | if (MSR_TM_ACTIVE(msr)) { |
658 | /* We recheckpoint on return. */ | 660 | /* We recheckpoint on return. */ |
659 | struct ucontext __user *uc_transact; | 661 | struct ucontext __user *uc_transact; |
660 | if (__get_user(uc_transact, &uc->uc_link)) | 662 | if (__get_user(uc_transact, &uc->uc_link)) |
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index ee7ac5e6e28a..38b0ba65a735 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c | |||
@@ -480,7 +480,7 @@ static void cpu_idle_thread_init(unsigned int cpu, struct task_struct *idle) | |||
480 | secondary_ti = current_set[cpu] = ti; | 480 | secondary_ti = current_set[cpu] = ti; |
481 | } | 481 | } |
482 | 482 | ||
483 | int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *tidle) | 483 | int __cpu_up(unsigned int cpu, struct task_struct *tidle) |
484 | { | 484 | { |
485 | int rc, c; | 485 | int rc, c; |
486 | 486 | ||
@@ -610,7 +610,7 @@ static struct device_node *cpu_to_l2cache(int cpu) | |||
610 | } | 610 | } |
611 | 611 | ||
612 | /* Activate a secondary processor. */ | 612 | /* Activate a secondary processor. */ |
613 | __cpuinit void start_secondary(void *unused) | 613 | void start_secondary(void *unused) |
614 | { | 614 | { |
615 | unsigned int cpu = smp_processor_id(); | 615 | unsigned int cpu = smp_processor_id(); |
616 | struct device_node *l2_cache; | 616 | struct device_node *l2_cache; |
@@ -637,12 +637,10 @@ __cpuinit void start_secondary(void *unused) | |||
637 | 637 | ||
638 | vdso_getcpu_init(); | 638 | vdso_getcpu_init(); |
639 | #endif | 639 | #endif |
640 | notify_cpu_starting(cpu); | ||
641 | set_cpu_online(cpu, true); | ||
642 | /* Update sibling maps */ | 640 | /* Update sibling maps */ |
643 | base = cpu_first_thread_sibling(cpu); | 641 | base = cpu_first_thread_sibling(cpu); |
644 | for (i = 0; i < threads_per_core; i++) { | 642 | for (i = 0; i < threads_per_core; i++) { |
645 | if (cpu_is_offline(base + i)) | 643 | if (cpu_is_offline(base + i) && (cpu != base + i)) |
646 | continue; | 644 | continue; |
647 | cpumask_set_cpu(cpu, cpu_sibling_mask(base + i)); | 645 | cpumask_set_cpu(cpu, cpu_sibling_mask(base + i)); |
648 | cpumask_set_cpu(base + i, cpu_sibling_mask(cpu)); | 646 | cpumask_set_cpu(base + i, cpu_sibling_mask(cpu)); |
@@ -667,6 +665,10 @@ __cpuinit void start_secondary(void *unused) | |||
667 | } | 665 | } |
668 | of_node_put(l2_cache); | 666 | of_node_put(l2_cache); |
669 | 667 | ||
668 | smp_wmb(); | ||
669 | notify_cpu_starting(cpu); | ||
670 | set_cpu_online(cpu, true); | ||
671 | |||
670 | local_irq_enable(); | 672 | local_irq_enable(); |
671 | 673 | ||
672 | cpu_startup_entry(CPUHP_ONLINE); | 674 | cpu_startup_entry(CPUHP_ONLINE); |
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index e68a84568b8b..27a90b99ef67 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c | |||
@@ -341,7 +341,7 @@ static struct device_attribute pa6t_attrs[] = { | |||
341 | #endif /* HAS_PPC_PMC_PA6T */ | 341 | #endif /* HAS_PPC_PMC_PA6T */ |
342 | #endif /* HAS_PPC_PMC_CLASSIC */ | 342 | #endif /* HAS_PPC_PMC_CLASSIC */ |
343 | 343 | ||
344 | static void __cpuinit register_cpu_online(unsigned int cpu) | 344 | static void register_cpu_online(unsigned int cpu) |
345 | { | 345 | { |
346 | struct cpu *c = &per_cpu(cpu_devices, cpu); | 346 | struct cpu *c = &per_cpu(cpu_devices, cpu); |
347 | struct device *s = &c->dev; | 347 | struct device *s = &c->dev; |
@@ -502,7 +502,7 @@ ssize_t arch_cpu_release(const char *buf, size_t count) | |||
502 | 502 | ||
503 | #endif /* CONFIG_HOTPLUG_CPU */ | 503 | #endif /* CONFIG_HOTPLUG_CPU */ |
504 | 504 | ||
505 | static int __cpuinit sysfs_cpu_notify(struct notifier_block *self, | 505 | static int sysfs_cpu_notify(struct notifier_block *self, |
506 | unsigned long action, void *hcpu) | 506 | unsigned long action, void *hcpu) |
507 | { | 507 | { |
508 | unsigned int cpu = (unsigned int)(long)hcpu; | 508 | unsigned int cpu = (unsigned int)(long)hcpu; |
@@ -522,7 +522,7 @@ static int __cpuinit sysfs_cpu_notify(struct notifier_block *self, | |||
522 | return NOTIFY_OK; | 522 | return NOTIFY_OK; |
523 | } | 523 | } |
524 | 524 | ||
525 | static struct notifier_block __cpuinitdata sysfs_cpu_nb = { | 525 | static struct notifier_block sysfs_cpu_nb = { |
526 | .notifier_call = sysfs_cpu_notify, | 526 | .notifier_call = sysfs_cpu_notify, |
527 | }; | 527 | }; |
528 | 528 | ||
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 5fc29ad7e26f..65ab9e909377 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c | |||
@@ -631,7 +631,6 @@ static int __init get_freq(char *name, int cells, unsigned long *val) | |||
631 | return found; | 631 | return found; |
632 | } | 632 | } |
633 | 633 | ||
634 | /* should become __cpuinit when secondary_cpu_time_init also is */ | ||
635 | void start_cpu_decrementer(void) | 634 | void start_cpu_decrementer(void) |
636 | { | 635 | { |
637 | #if defined(CONFIG_BOOKE) || defined(CONFIG_40x) | 636 | #if defined(CONFIG_BOOKE) || defined(CONFIG_40x) |
diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S index 2da67e7a16d5..51be8fb24803 100644 --- a/arch/powerpc/kernel/tm.S +++ b/arch/powerpc/kernel/tm.S | |||
@@ -112,9 +112,18 @@ _GLOBAL(tm_reclaim) | |||
112 | std r3, STACK_PARAM(0)(r1) | 112 | std r3, STACK_PARAM(0)(r1) |
113 | SAVE_NVGPRS(r1) | 113 | SAVE_NVGPRS(r1) |
114 | 114 | ||
115 | /* We need to setup MSR for VSX register save instructions. Here we | ||
116 | * also clear the MSR RI since when we do the treclaim, we won't have a | ||
117 | * valid kernel pointer for a while. We clear RI here as it avoids | ||
118 | * adding another mtmsr closer to the treclaim. This makes the region | ||
119 | * maked as non-recoverable wider than it needs to be but it saves on | ||
120 | * inserting another mtmsrd later. | ||
121 | */ | ||
115 | mfmsr r14 | 122 | mfmsr r14 |
116 | mr r15, r14 | 123 | mr r15, r14 |
117 | ori r15, r15, MSR_FP | 124 | ori r15, r15, MSR_FP |
125 | li r16, MSR_RI | ||
126 | andc r15, r15, r16 | ||
118 | oris r15, r15, MSR_VEC@h | 127 | oris r15, r15, MSR_VEC@h |
119 | #ifdef CONFIG_VSX | 128 | #ifdef CONFIG_VSX |
120 | BEGIN_FTR_SECTION | 129 | BEGIN_FTR_SECTION |
@@ -349,9 +358,10 @@ restore_gprs: | |||
349 | mtcr r5 | 358 | mtcr r5 |
350 | mtxer r6 | 359 | mtxer r6 |
351 | 360 | ||
352 | /* MSR and flags: We don't change CRs, and we don't need to alter | 361 | /* Clear the MSR RI since we are about to change R1. EE is already off |
353 | * MSR. | ||
354 | */ | 362 | */ |
363 | li r4, 0 | ||
364 | mtmsrd r4, 1 | ||
355 | 365 | ||
356 | REST_4GPRS(0, r7) /* GPR0-3 */ | 366 | REST_4GPRS(0, r7) /* GPR0-3 */ |
357 | REST_GPR(4, r7) /* GPR4-6 */ | 367 | REST_GPR(4, r7) /* GPR4-6 */ |
@@ -377,6 +387,10 @@ restore_gprs: | |||
377 | GET_PACA(r13) | 387 | GET_PACA(r13) |
378 | GET_SCRATCH0(r1) | 388 | GET_SCRATCH0(r1) |
379 | 389 | ||
390 | /* R1 is restored, so we are recoverable again. EE is still off */ | ||
391 | li r4, MSR_RI | ||
392 | mtmsrd r4, 1 | ||
393 | |||
380 | REST_NVGPRS(r1) | 394 | REST_NVGPRS(r1) |
381 | 395 | ||
382 | addi r1, r1, TM_FRAME_SIZE | 396 | addi r1, r1, TM_FRAME_SIZE |
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index c0e5caf8ccc7..bf33c22e38a4 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c | |||
@@ -866,6 +866,10 @@ static int emulate_string_inst(struct pt_regs *regs, u32 instword) | |||
866 | u8 val; | 866 | u8 val; |
867 | u32 shift = 8 * (3 - (pos & 0x3)); | 867 | u32 shift = 8 * (3 - (pos & 0x3)); |
868 | 868 | ||
869 | /* if process is 32-bit, clear upper 32 bits of EA */ | ||
870 | if ((regs->msr & MSR_64BIT) == 0) | ||
871 | EA &= 0xFFFFFFFF; | ||
872 | |||
869 | switch ((instword & PPC_INST_STRING_MASK)) { | 873 | switch ((instword & PPC_INST_STRING_MASK)) { |
870 | case PPC_INST_LSWX: | 874 | case PPC_INST_LSWX: |
871 | case PPC_INST_LSWI: | 875 | case PPC_INST_LSWI: |
@@ -1125,7 +1129,17 @@ void __kprobes program_check_exception(struct pt_regs *regs) | |||
1125 | * ESR_DST (!?) or 0. In the process of chasing this with the | 1129 | * ESR_DST (!?) or 0. In the process of chasing this with the |
1126 | * hardware people - not sure if it can happen on any illegal | 1130 | * hardware people - not sure if it can happen on any illegal |
1127 | * instruction or only on FP instructions, whether there is a | 1131 | * instruction or only on FP instructions, whether there is a |
1128 | * pattern to occurrences etc. -dgibson 31/Mar/2003 */ | 1132 | * pattern to occurrences etc. -dgibson 31/Mar/2003 |
1133 | */ | ||
1134 | |||
1135 | /* | ||
1136 | * If we support a HW FPU, we need to ensure the FP state | ||
1137 | * if flushed into the thread_struct before attempting | ||
1138 | * emulation | ||
1139 | */ | ||
1140 | #ifdef CONFIG_PPC_FPU | ||
1141 | flush_fp_to_thread(current); | ||
1142 | #endif | ||
1129 | switch (do_mathemu(regs)) { | 1143 | switch (do_mathemu(regs)) { |
1130 | case 0: | 1144 | case 0: |
1131 | emulate_single_step(regs); | 1145 | emulate_single_step(regs); |
@@ -1282,25 +1296,50 @@ void vsx_unavailable_exception(struct pt_regs *regs) | |||
1282 | die("Unrecoverable VSX Unavailable Exception", regs, SIGABRT); | 1296 | die("Unrecoverable VSX Unavailable Exception", regs, SIGABRT); |
1283 | } | 1297 | } |
1284 | 1298 | ||
1285 | void tm_unavailable_exception(struct pt_regs *regs) | 1299 | void facility_unavailable_exception(struct pt_regs *regs) |
1286 | { | 1300 | { |
1301 | static char *facility_strings[] = { | ||
1302 | "FPU", | ||
1303 | "VMX/VSX", | ||
1304 | "DSCR", | ||
1305 | "PMU SPRs", | ||
1306 | "BHRB", | ||
1307 | "TM", | ||
1308 | "AT", | ||
1309 | "EBB", | ||
1310 | "TAR", | ||
1311 | }; | ||
1312 | char *facility, *prefix; | ||
1313 | u64 value; | ||
1314 | |||
1315 | if (regs->trap == 0xf60) { | ||
1316 | value = mfspr(SPRN_FSCR); | ||
1317 | prefix = ""; | ||
1318 | } else { | ||
1319 | value = mfspr(SPRN_HFSCR); | ||
1320 | prefix = "Hypervisor "; | ||
1321 | } | ||
1322 | |||
1323 | value = value >> 56; | ||
1324 | |||
1287 | /* We restore the interrupt state now */ | 1325 | /* We restore the interrupt state now */ |
1288 | if (!arch_irq_disabled_regs(regs)) | 1326 | if (!arch_irq_disabled_regs(regs)) |
1289 | local_irq_enable(); | 1327 | local_irq_enable(); |
1290 | 1328 | ||
1291 | /* Currently we never expect a TMU exception. Catch | 1329 | if (value < ARRAY_SIZE(facility_strings)) |
1292 | * this and kill the process! | 1330 | facility = facility_strings[value]; |
1293 | */ | 1331 | else |
1294 | printk(KERN_EMERG "Unexpected TM unavailable exception at %lx " | 1332 | facility = "unknown"; |
1295 | "(msr %lx)\n", | 1333 | |
1296 | regs->nip, regs->msr); | 1334 | pr_err("%sFacility '%s' unavailable, exception at 0x%lx, MSR=%lx\n", |
1335 | prefix, facility, regs->nip, regs->msr); | ||
1297 | 1336 | ||
1298 | if (user_mode(regs)) { | 1337 | if (user_mode(regs)) { |
1299 | _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); | 1338 | _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); |
1300 | return; | 1339 | return; |
1301 | } | 1340 | } |
1302 | 1341 | ||
1303 | die("Unexpected TM unavailable exception", regs, SIGABRT); | 1342 | die("Unexpected facility unavailable exception", regs, SIGABRT); |
1304 | } | 1343 | } |
1305 | 1344 | ||
1306 | #ifdef CONFIG_PPC_TRANSACTIONAL_MEM | 1345 | #ifdef CONFIG_PPC_TRANSACTIONAL_MEM |
@@ -1396,8 +1435,7 @@ void performance_monitor_exception(struct pt_regs *regs) | |||
1396 | void SoftwareEmulation(struct pt_regs *regs) | 1435 | void SoftwareEmulation(struct pt_regs *regs) |
1397 | { | 1436 | { |
1398 | extern int do_mathemu(struct pt_regs *); | 1437 | extern int do_mathemu(struct pt_regs *); |
1399 | extern int Soft_emulate_8xx(struct pt_regs *); | 1438 | #if defined(CONFIG_MATH_EMULATION) |
1400 | #if defined(CONFIG_MATH_EMULATION) || defined(CONFIG_8XX_MINIMAL_FPEMU) | ||
1401 | int errcode; | 1439 | int errcode; |
1402 | #endif | 1440 | #endif |
1403 | 1441 | ||
@@ -1430,23 +1468,6 @@ void SoftwareEmulation(struct pt_regs *regs) | |||
1430 | _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); | 1468 | _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); |
1431 | return; | 1469 | return; |
1432 | } | 1470 | } |
1433 | |||
1434 | #elif defined(CONFIG_8XX_MINIMAL_FPEMU) | ||
1435 | errcode = Soft_emulate_8xx(regs); | ||
1436 | if (errcode >= 0) | ||
1437 | PPC_WARN_EMULATED(8xx, regs); | ||
1438 | |||
1439 | switch (errcode) { | ||
1440 | case 0: | ||
1441 | emulate_single_step(regs); | ||
1442 | return; | ||
1443 | case 1: | ||
1444 | _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); | ||
1445 | return; | ||
1446 | case -EFAULT: | ||
1447 | _exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip); | ||
1448 | return; | ||
1449 | } | ||
1450 | #else | 1471 | #else |
1451 | _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); | 1472 | _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); |
1452 | #endif | 1473 | #endif |
@@ -1796,8 +1817,6 @@ struct ppc_emulated ppc_emulated = { | |||
1796 | WARN_EMULATED_SETUP(unaligned), | 1817 | WARN_EMULATED_SETUP(unaligned), |
1797 | #ifdef CONFIG_MATH_EMULATION | 1818 | #ifdef CONFIG_MATH_EMULATION |
1798 | WARN_EMULATED_SETUP(math), | 1819 | WARN_EMULATED_SETUP(math), |
1799 | #elif defined(CONFIG_8XX_MINIMAL_FPEMU) | ||
1800 | WARN_EMULATED_SETUP(8xx), | ||
1801 | #endif | 1820 | #endif |
1802 | #ifdef CONFIG_VSX | 1821 | #ifdef CONFIG_VSX |
1803 | WARN_EMULATED_SETUP(vsx), | 1822 | WARN_EMULATED_SETUP(vsx), |
diff --git a/arch/powerpc/kernel/udbg.c b/arch/powerpc/kernel/udbg.c index 9d3fdcd66290..a15837519dca 100644 --- a/arch/powerpc/kernel/udbg.c +++ b/arch/powerpc/kernel/udbg.c | |||
@@ -50,7 +50,7 @@ void __init udbg_early_init(void) | |||
50 | udbg_init_debug_beat(); | 50 | udbg_init_debug_beat(); |
51 | #elif defined(CONFIG_PPC_EARLY_DEBUG_PAS_REALMODE) | 51 | #elif defined(CONFIG_PPC_EARLY_DEBUG_PAS_REALMODE) |
52 | udbg_init_pas_realmode(); | 52 | udbg_init_pas_realmode(); |
53 | #elif defined(CONFIG_BOOTX_TEXT) | 53 | #elif defined(CONFIG_PPC_EARLY_DEBUG_BOOTX) |
54 | udbg_init_btext(); | 54 | udbg_init_btext(); |
55 | #elif defined(CONFIG_PPC_EARLY_DEBUG_44x) | 55 | #elif defined(CONFIG_PPC_EARLY_DEBUG_44x) |
56 | /* PPC44x debug */ | 56 | /* PPC44x debug */ |
diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index d4f463ac65b1..1d9c92621b36 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c | |||
@@ -711,7 +711,7 @@ static void __init vdso_setup_syscall_map(void) | |||
711 | } | 711 | } |
712 | 712 | ||
713 | #ifdef CONFIG_PPC64 | 713 | #ifdef CONFIG_PPC64 |
714 | int __cpuinit vdso_getcpu_init(void) | 714 | int vdso_getcpu_init(void) |
715 | { | 715 | { |
716 | unsigned long cpu, node, val; | 716 | unsigned long cpu, node, val; |
717 | 717 | ||
diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c index b350d9494b26..e5240524bf6c 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_host.c +++ b/arch/powerpc/kvm/book3s_64_mmu_host.c | |||
@@ -34,7 +34,7 @@ | |||
34 | void kvmppc_mmu_invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte) | 34 | void kvmppc_mmu_invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte) |
35 | { | 35 | { |
36 | ppc_md.hpte_invalidate(pte->slot, pte->host_vpn, | 36 | ppc_md.hpte_invalidate(pte->slot, pte->host_vpn, |
37 | MMU_PAGE_4K, MMU_SEGSIZE_256M, | 37 | MMU_PAGE_4K, MMU_PAGE_4K, MMU_SEGSIZE_256M, |
38 | false); | 38 | false); |
39 | } | 39 | } |
40 | 40 | ||
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 5880dfb31074..710d31317d81 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c | |||
@@ -675,6 +675,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
675 | } | 675 | } |
676 | /* if the guest wants write access, see if that is OK */ | 676 | /* if the guest wants write access, see if that is OK */ |
677 | if (!writing && hpte_is_writable(r)) { | 677 | if (!writing && hpte_is_writable(r)) { |
678 | unsigned int hugepage_shift; | ||
678 | pte_t *ptep, pte; | 679 | pte_t *ptep, pte; |
679 | 680 | ||
680 | /* | 681 | /* |
@@ -683,9 +684,10 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
683 | */ | 684 | */ |
684 | rcu_read_lock_sched(); | 685 | rcu_read_lock_sched(); |
685 | ptep = find_linux_pte_or_hugepte(current->mm->pgd, | 686 | ptep = find_linux_pte_or_hugepte(current->mm->pgd, |
686 | hva, NULL); | 687 | hva, &hugepage_shift); |
687 | if (ptep && pte_present(*ptep)) { | 688 | if (ptep) { |
688 | pte = kvmppc_read_update_linux_pte(ptep, 1); | 689 | pte = kvmppc_read_update_linux_pte(ptep, 1, |
690 | hugepage_shift); | ||
689 | if (pte_write(pte)) | 691 | if (pte_write(pte)) |
690 | write_ok = 1; | 692 | write_ok = 1; |
691 | } | 693 | } |
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 6dcbb49105a4..fc25689a9f35 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c | |||
@@ -27,7 +27,7 @@ static void *real_vmalloc_addr(void *x) | |||
27 | unsigned long addr = (unsigned long) x; | 27 | unsigned long addr = (unsigned long) x; |
28 | pte_t *p; | 28 | pte_t *p; |
29 | 29 | ||
30 | p = find_linux_pte(swapper_pg_dir, addr); | 30 | p = find_linux_pte_or_hugepte(swapper_pg_dir, addr, NULL); |
31 | if (!p || !pte_present(*p)) | 31 | if (!p || !pte_present(*p)) |
32 | return NULL; | 32 | return NULL; |
33 | /* assume we don't have huge pages in vmalloc space... */ | 33 | /* assume we don't have huge pages in vmalloc space... */ |
@@ -139,20 +139,18 @@ static pte_t lookup_linux_pte(pgd_t *pgdir, unsigned long hva, | |||
139 | { | 139 | { |
140 | pte_t *ptep; | 140 | pte_t *ptep; |
141 | unsigned long ps = *pte_sizep; | 141 | unsigned long ps = *pte_sizep; |
142 | unsigned int shift; | 142 | unsigned int hugepage_shift; |
143 | 143 | ||
144 | ptep = find_linux_pte_or_hugepte(pgdir, hva, &shift); | 144 | ptep = find_linux_pte_or_hugepte(pgdir, hva, &hugepage_shift); |
145 | if (!ptep) | 145 | if (!ptep) |
146 | return __pte(0); | 146 | return __pte(0); |
147 | if (shift) | 147 | if (hugepage_shift) |
148 | *pte_sizep = 1ul << shift; | 148 | *pte_sizep = 1ul << hugepage_shift; |
149 | else | 149 | else |
150 | *pte_sizep = PAGE_SIZE; | 150 | *pte_sizep = PAGE_SIZE; |
151 | if (ps > *pte_sizep) | 151 | if (ps > *pte_sizep) |
152 | return __pte(0); | 152 | return __pte(0); |
153 | if (!pte_present(*ptep)) | 153 | return kvmppc_read_update_linux_pte(ptep, writing, hugepage_shift); |
154 | return __pte(0); | ||
155 | return kvmppc_read_update_linux_pte(ptep, writing); | ||
156 | } | 154 | } |
157 | 155 | ||
158 | static inline void unlock_hpte(unsigned long *hpte, unsigned long hpte_v) | 156 | static inline void unlock_hpte(unsigned long *hpte, unsigned long hpte_v) |
diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index e15c521846ca..99c7fc16dc0d 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c | |||
@@ -580,7 +580,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) | |||
580 | if (instr & 1) | 580 | if (instr & 1) |
581 | regs->link = regs->nip; | 581 | regs->link = regs->nip; |
582 | if (branch_taken(instr, regs)) | 582 | if (branch_taken(instr, regs)) |
583 | regs->nip = imm; | 583 | regs->nip = truncate_if_32bit(regs->msr, imm); |
584 | return 1; | 584 | return 1; |
585 | #ifdef CONFIG_PPC64 | 585 | #ifdef CONFIG_PPC64 |
586 | case 17: /* sc */ | 586 | case 17: /* sc */ |
diff --git a/arch/powerpc/math-emu/Makefile b/arch/powerpc/math-emu/Makefile index 7d1dba0d57f9..8d035d2d42a6 100644 --- a/arch/powerpc/math-emu/Makefile +++ b/arch/powerpc/math-emu/Makefile | |||
@@ -4,7 +4,8 @@ obj-$(CONFIG_MATH_EMULATION) += fabs.o fadd.o fadds.o fcmpo.o fcmpu.o \ | |||
4 | fmadd.o fmadds.o fmsub.o fmsubs.o \ | 4 | fmadd.o fmadds.o fmsub.o fmsubs.o \ |
5 | fmul.o fmuls.o fnabs.o fneg.o \ | 5 | fmul.o fmuls.o fnabs.o fneg.o \ |
6 | fnmadd.o fnmadds.o fnmsub.o fnmsubs.o \ | 6 | fnmadd.o fnmadds.o fnmsub.o fnmsubs.o \ |
7 | fres.o frsp.o frsqrte.o fsel.o lfs.o \ | 7 | fres.o fre.o frsp.o fsel.o lfs.o \ |
8 | frsqrte.o frsqrtes.o \ | ||
8 | fsqrt.o fsqrts.o fsub.o fsubs.o \ | 9 | fsqrt.o fsqrts.o fsub.o fsubs.o \ |
9 | mcrfs.o mffs.o mtfsb0.o mtfsb1.o \ | 10 | mcrfs.o mffs.o mtfsb0.o mtfsb1.o \ |
10 | mtfsf.o mtfsfi.o stfiwx.o stfs.o \ | 11 | mtfsf.o mtfsfi.o stfiwx.o stfs.o \ |
diff --git a/arch/powerpc/math-emu/fre.c b/arch/powerpc/math-emu/fre.c new file mode 100644 index 000000000000..49ccf2cc6a5a --- /dev/null +++ b/arch/powerpc/math-emu/fre.c | |||
@@ -0,0 +1,11 @@ | |||
1 | #include <linux/types.h> | ||
2 | #include <linux/errno.h> | ||
3 | #include <asm/uaccess.h> | ||
4 | |||
5 | int fre(void *frD, void *frB) | ||
6 | { | ||
7 | #ifdef DEBUG | ||
8 | printk("%s: %p %p\n", __func__, frD, frB); | ||
9 | #endif | ||
10 | return -ENOSYS; | ||
11 | } | ||
diff --git a/arch/powerpc/math-emu/frsqrtes.c b/arch/powerpc/math-emu/frsqrtes.c new file mode 100644 index 000000000000..7e838e380314 --- /dev/null +++ b/arch/powerpc/math-emu/frsqrtes.c | |||
@@ -0,0 +1,11 @@ | |||
1 | #include <linux/types.h> | ||
2 | #include <linux/errno.h> | ||
3 | #include <asm/uaccess.h> | ||
4 | |||
5 | int frsqrtes(void *frD, void *frB) | ||
6 | { | ||
7 | #ifdef DEBUG | ||
8 | printk("%s: %p %p\n", __func__, frD, frB); | ||
9 | #endif | ||
10 | return 0; | ||
11 | } | ||
diff --git a/arch/powerpc/math-emu/math.c b/arch/powerpc/math-emu/math.c index 164d55935bd8..0328e66e0799 100644 --- a/arch/powerpc/math-emu/math.c +++ b/arch/powerpc/math-emu/math.c | |||
@@ -58,8 +58,10 @@ FLOATFUNC(fnabs); | |||
58 | FLOATFUNC(fneg); | 58 | FLOATFUNC(fneg); |
59 | 59 | ||
60 | /* Optional */ | 60 | /* Optional */ |
61 | FLOATFUNC(fre); | ||
61 | FLOATFUNC(fres); | 62 | FLOATFUNC(fres); |
62 | FLOATFUNC(frsqrte); | 63 | FLOATFUNC(frsqrte); |
64 | FLOATFUNC(frsqrtes); | ||
63 | FLOATFUNC(fsel); | 65 | FLOATFUNC(fsel); |
64 | FLOATFUNC(fsqrt); | 66 | FLOATFUNC(fsqrt); |
65 | FLOATFUNC(fsqrts); | 67 | FLOATFUNC(fsqrts); |
@@ -97,6 +99,7 @@ FLOATFUNC(fsqrts); | |||
97 | #define FSQRTS 0x016 /* 22 */ | 99 | #define FSQRTS 0x016 /* 22 */ |
98 | #define FRES 0x018 /* 24 */ | 100 | #define FRES 0x018 /* 24 */ |
99 | #define FMULS 0x019 /* 25 */ | 101 | #define FMULS 0x019 /* 25 */ |
102 | #define FRSQRTES 0x01a /* 26 */ | ||
100 | #define FMSUBS 0x01c /* 28 */ | 103 | #define FMSUBS 0x01c /* 28 */ |
101 | #define FMADDS 0x01d /* 29 */ | 104 | #define FMADDS 0x01d /* 29 */ |
102 | #define FNMSUBS 0x01e /* 30 */ | 105 | #define FNMSUBS 0x01e /* 30 */ |
@@ -109,6 +112,7 @@ FLOATFUNC(fsqrts); | |||
109 | #define FADD 0x015 /* 21 */ | 112 | #define FADD 0x015 /* 21 */ |
110 | #define FSQRT 0x016 /* 22 */ | 113 | #define FSQRT 0x016 /* 22 */ |
111 | #define FSEL 0x017 /* 23 */ | 114 | #define FSEL 0x017 /* 23 */ |
115 | #define FRE 0x018 /* 24 */ | ||
112 | #define FMUL 0x019 /* 25 */ | 116 | #define FMUL 0x019 /* 25 */ |
113 | #define FRSQRTE 0x01a /* 26 */ | 117 | #define FRSQRTE 0x01a /* 26 */ |
114 | #define FMSUB 0x01c /* 28 */ | 118 | #define FMSUB 0x01c /* 28 */ |
@@ -299,9 +303,10 @@ do_mathemu(struct pt_regs *regs) | |||
299 | case FDIVS: func = fdivs; type = AB; break; | 303 | case FDIVS: func = fdivs; type = AB; break; |
300 | case FSUBS: func = fsubs; type = AB; break; | 304 | case FSUBS: func = fsubs; type = AB; break; |
301 | case FADDS: func = fadds; type = AB; break; | 305 | case FADDS: func = fadds; type = AB; break; |
302 | case FSQRTS: func = fsqrts; type = AB; break; | 306 | case FSQRTS: func = fsqrts; type = XB; break; |
303 | case FRES: func = fres; type = AB; break; | 307 | case FRES: func = fres; type = XB; break; |
304 | case FMULS: func = fmuls; type = AC; break; | 308 | case FMULS: func = fmuls; type = AC; break; |
309 | case FRSQRTES: func = frsqrtes;type = XB; break; | ||
305 | case FMSUBS: func = fmsubs; type = ABC; break; | 310 | case FMSUBS: func = fmsubs; type = ABC; break; |
306 | case FMADDS: func = fmadds; type = ABC; break; | 311 | case FMADDS: func = fmadds; type = ABC; break; |
307 | case FNMSUBS: func = fnmsubs; type = ABC; break; | 312 | case FNMSUBS: func = fnmsubs; type = ABC; break; |
@@ -317,10 +322,11 @@ do_mathemu(struct pt_regs *regs) | |||
317 | case FDIV: func = fdiv; type = AB; break; | 322 | case FDIV: func = fdiv; type = AB; break; |
318 | case FSUB: func = fsub; type = AB; break; | 323 | case FSUB: func = fsub; type = AB; break; |
319 | case FADD: func = fadd; type = AB; break; | 324 | case FADD: func = fadd; type = AB; break; |
320 | case FSQRT: func = fsqrt; type = AB; break; | 325 | case FSQRT: func = fsqrt; type = XB; break; |
326 | case FRE: func = fre; type = XB; break; | ||
321 | case FSEL: func = fsel; type = ABC; break; | 327 | case FSEL: func = fsel; type = ABC; break; |
322 | case FMUL: func = fmul; type = AC; break; | 328 | case FMUL: func = fmul; type = AC; break; |
323 | case FRSQRTE: func = frsqrte; type = AB; break; | 329 | case FRSQRTE: func = frsqrte; type = XB; break; |
324 | case FMSUB: func = fmsub; type = ABC; break; | 330 | case FMSUB: func = fmsub; type = ABC; break; |
325 | case FMADD: func = fmadd; type = ABC; break; | 331 | case FMADD: func = fmadd; type = ABC; break; |
326 | case FNMSUB: func = fnmsub; type = ABC; break; | 332 | case FNMSUB: func = fnmsub; type = ABC; break; |
diff --git a/arch/powerpc/mm/44x_mmu.c b/arch/powerpc/mm/44x_mmu.c index 2c9441ee6bb8..82b1ff759e26 100644 --- a/arch/powerpc/mm/44x_mmu.c +++ b/arch/powerpc/mm/44x_mmu.c | |||
@@ -41,7 +41,7 @@ int icache_44x_need_flush; | |||
41 | 41 | ||
42 | unsigned long tlb_47x_boltmap[1024/8]; | 42 | unsigned long tlb_47x_boltmap[1024/8]; |
43 | 43 | ||
44 | static void __cpuinit ppc44x_update_tlb_hwater(void) | 44 | static void ppc44x_update_tlb_hwater(void) |
45 | { | 45 | { |
46 | extern unsigned int tlb_44x_patch_hwater_D[]; | 46 | extern unsigned int tlb_44x_patch_hwater_D[]; |
47 | extern unsigned int tlb_44x_patch_hwater_I[]; | 47 | extern unsigned int tlb_44x_patch_hwater_I[]; |
@@ -134,7 +134,7 @@ static void __init ppc47x_update_boltmap(void) | |||
134 | /* | 134 | /* |
135 | * "Pins" a 256MB TLB entry in AS0 for kernel lowmem for 47x type MMU | 135 | * "Pins" a 256MB TLB entry in AS0 for kernel lowmem for 47x type MMU |
136 | */ | 136 | */ |
137 | static void __cpuinit ppc47x_pin_tlb(unsigned int virt, unsigned int phys) | 137 | static void ppc47x_pin_tlb(unsigned int virt, unsigned int phys) |
138 | { | 138 | { |
139 | unsigned int rA; | 139 | unsigned int rA; |
140 | int bolted; | 140 | int bolted; |
@@ -229,7 +229,7 @@ void setup_initial_memory_limit(phys_addr_t first_memblock_base, | |||
229 | } | 229 | } |
230 | 230 | ||
231 | #ifdef CONFIG_SMP | 231 | #ifdef CONFIG_SMP |
232 | void __cpuinit mmu_init_secondary(int cpu) | 232 | void mmu_init_secondary(int cpu) |
233 | { | 233 | { |
234 | unsigned long addr; | 234 | unsigned long addr; |
235 | unsigned long memstart = memstart_addr & ~(PPC_PIN_SIZE - 1); | 235 | unsigned long memstart = memstart_addr & ~(PPC_PIN_SIZE - 1); |
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index cf16b5733eaa..51230ee6a407 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile | |||
@@ -6,17 +6,16 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror | |||
6 | 6 | ||
7 | ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) | 7 | ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) |
8 | 8 | ||
9 | obj-y := fault.o mem.o pgtable.o gup.o \ | 9 | obj-y := fault.o mem.o pgtable.o gup.o mmap.o \ |
10 | init_$(CONFIG_WORD_SIZE).o \ | 10 | init_$(CONFIG_WORD_SIZE).o \ |
11 | pgtable_$(CONFIG_WORD_SIZE).o | 11 | pgtable_$(CONFIG_WORD_SIZE).o |
12 | obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o tlb_nohash.o \ | 12 | obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o tlb_nohash.o \ |
13 | tlb_nohash_low.o | 13 | tlb_nohash_low.o |
14 | obj-$(CONFIG_PPC_BOOK3E) += tlb_low_$(CONFIG_WORD_SIZE)e.o | 14 | obj-$(CONFIG_PPC_BOOK3E) += tlb_low_$(CONFIG_WORD_SIZE)e.o |
15 | obj-$(CONFIG_PPC64) += mmap_64.o | ||
16 | hash64-$(CONFIG_PPC_NATIVE) := hash_native_64.o | 15 | hash64-$(CONFIG_PPC_NATIVE) := hash_native_64.o |
17 | obj-$(CONFIG_PPC_STD_MMU_64) += hash_utils_64.o \ | 16 | obj-$(CONFIG_PPC_STD_MMU_64) += hash_utils_64.o \ |
18 | slb_low.o slb.o stab.o \ | 17 | slb_low.o slb.o stab.o \ |
19 | mmap_64.o $(hash64-y) | 18 | $(hash64-y) |
20 | obj-$(CONFIG_PPC_STD_MMU_32) += ppc_mmu_32.o | 19 | obj-$(CONFIG_PPC_STD_MMU_32) += ppc_mmu_32.o |
21 | obj-$(CONFIG_PPC_STD_MMU) += hash_low_$(CONFIG_WORD_SIZE).o \ | 20 | obj-$(CONFIG_PPC_STD_MMU) += hash_low_$(CONFIG_WORD_SIZE).o \ |
22 | tlb_hash$(CONFIG_WORD_SIZE).o \ | 21 | tlb_hash$(CONFIG_WORD_SIZE).o \ |
@@ -28,11 +27,12 @@ obj-$(CONFIG_44x) += 44x_mmu.o | |||
28 | obj-$(CONFIG_PPC_FSL_BOOK3E) += fsl_booke_mmu.o | 27 | obj-$(CONFIG_PPC_FSL_BOOK3E) += fsl_booke_mmu.o |
29 | obj-$(CONFIG_NEED_MULTIPLE_NODES) += numa.o | 28 | obj-$(CONFIG_NEED_MULTIPLE_NODES) += numa.o |
30 | obj-$(CONFIG_PPC_MM_SLICES) += slice.o | 29 | obj-$(CONFIG_PPC_MM_SLICES) += slice.o |
31 | ifeq ($(CONFIG_HUGETLB_PAGE),y) | ||
32 | obj-y += hugetlbpage.o | 30 | obj-y += hugetlbpage.o |
31 | ifeq ($(CONFIG_HUGETLB_PAGE),y) | ||
33 | obj-$(CONFIG_PPC_STD_MMU_64) += hugetlbpage-hash64.o | 32 | obj-$(CONFIG_PPC_STD_MMU_64) += hugetlbpage-hash64.o |
34 | obj-$(CONFIG_PPC_BOOK3E_MMU) += hugetlbpage-book3e.o | 33 | obj-$(CONFIG_PPC_BOOK3E_MMU) += hugetlbpage-book3e.o |
35 | endif | 34 | endif |
35 | obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += hugepage-hash64.o | ||
36 | obj-$(CONFIG_PPC_SUBPAGE_PROT) += subpage-prot.o | 36 | obj-$(CONFIG_PPC_SUBPAGE_PROT) += subpage-prot.o |
37 | obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o | 37 | obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o |
38 | obj-$(CONFIG_HIGHMEM) += highmem.o | 38 | obj-$(CONFIG_HIGHMEM) += highmem.o |
diff --git a/arch/powerpc/mm/gup.c b/arch/powerpc/mm/gup.c index 4b921affa495..49822d90ea96 100644 --- a/arch/powerpc/mm/gup.c +++ b/arch/powerpc/mm/gup.c | |||
@@ -34,7 +34,7 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, | |||
34 | 34 | ||
35 | ptep = pte_offset_kernel(&pmd, addr); | 35 | ptep = pte_offset_kernel(&pmd, addr); |
36 | do { | 36 | do { |
37 | pte_t pte = *ptep; | 37 | pte_t pte = ACCESS_ONCE(*ptep); |
38 | struct page *page; | 38 | struct page *page; |
39 | 39 | ||
40 | if ((pte_val(pte) & mask) != result) | 40 | if ((pte_val(pte) & mask) != result) |
@@ -63,12 +63,18 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, | |||
63 | 63 | ||
64 | pmdp = pmd_offset(&pud, addr); | 64 | pmdp = pmd_offset(&pud, addr); |
65 | do { | 65 | do { |
66 | pmd_t pmd = *pmdp; | 66 | pmd_t pmd = ACCESS_ONCE(*pmdp); |
67 | 67 | ||
68 | next = pmd_addr_end(addr, end); | 68 | next = pmd_addr_end(addr, end); |
69 | if (pmd_none(pmd)) | 69 | /* |
70 | * If we find a splitting transparent hugepage we | ||
71 | * return zero. That will result in taking the slow | ||
72 | * path which will call wait_split_huge_page() | ||
73 | * if the pmd is still in splitting state | ||
74 | */ | ||
75 | if (pmd_none(pmd) || pmd_trans_splitting(pmd)) | ||
70 | return 0; | 76 | return 0; |
71 | if (pmd_huge(pmd)) { | 77 | if (pmd_huge(pmd) || pmd_large(pmd)) { |
72 | if (!gup_hugepte((pte_t *)pmdp, PMD_SIZE, addr, next, | 78 | if (!gup_hugepte((pte_t *)pmdp, PMD_SIZE, addr, next, |
73 | write, pages, nr)) | 79 | write, pages, nr)) |
74 | return 0; | 80 | return 0; |
@@ -91,7 +97,7 @@ static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end, | |||
91 | 97 | ||
92 | pudp = pud_offset(&pgd, addr); | 98 | pudp = pud_offset(&pgd, addr); |
93 | do { | 99 | do { |
94 | pud_t pud = *pudp; | 100 | pud_t pud = ACCESS_ONCE(*pudp); |
95 | 101 | ||
96 | next = pud_addr_end(addr, end); | 102 | next = pud_addr_end(addr, end); |
97 | if (pud_none(pud)) | 103 | if (pud_none(pud)) |
@@ -154,7 +160,7 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write, | |||
154 | 160 | ||
155 | pgdp = pgd_offset(mm, addr); | 161 | pgdp = pgd_offset(mm, addr); |
156 | do { | 162 | do { |
157 | pgd_t pgd = *pgdp; | 163 | pgd_t pgd = ACCESS_ONCE(*pgdp); |
158 | 164 | ||
159 | pr_devel(" %016lx: normal pgd %p\n", addr, | 165 | pr_devel(" %016lx: normal pgd %p\n", addr, |
160 | (void *)pgd_val(pgd)); | 166 | (void *)pgd_val(pgd)); |
diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S index 0e980acae67c..d3cbda62857b 100644 --- a/arch/powerpc/mm/hash_low_64.S +++ b/arch/powerpc/mm/hash_low_64.S | |||
@@ -289,9 +289,10 @@ htab_modify_pte: | |||
289 | 289 | ||
290 | /* Call ppc_md.hpte_updatepp */ | 290 | /* Call ppc_md.hpte_updatepp */ |
291 | mr r5,r29 /* vpn */ | 291 | mr r5,r29 /* vpn */ |
292 | li r6,MMU_PAGE_4K /* page size */ | 292 | li r6,MMU_PAGE_4K /* base page size */ |
293 | ld r7,STK_PARAM(R9)(r1) /* segment size */ | 293 | li r7,MMU_PAGE_4K /* actual page size */ |
294 | ld r8,STK_PARAM(R8)(r1) /* get "local" param */ | 294 | ld r8,STK_PARAM(R9)(r1) /* segment size */ |
295 | ld r9,STK_PARAM(R8)(r1) /* get "local" param */ | ||
295 | _GLOBAL(htab_call_hpte_updatepp) | 296 | _GLOBAL(htab_call_hpte_updatepp) |
296 | bl . /* Patched by htab_finish_init() */ | 297 | bl . /* Patched by htab_finish_init() */ |
297 | 298 | ||
@@ -649,9 +650,10 @@ htab_modify_pte: | |||
649 | 650 | ||
650 | /* Call ppc_md.hpte_updatepp */ | 651 | /* Call ppc_md.hpte_updatepp */ |
651 | mr r5,r29 /* vpn */ | 652 | mr r5,r29 /* vpn */ |
652 | li r6,MMU_PAGE_4K /* page size */ | 653 | li r6,MMU_PAGE_4K /* base page size */ |
653 | ld r7,STK_PARAM(R9)(r1) /* segment size */ | 654 | li r7,MMU_PAGE_4K /* actual page size */ |
654 | ld r8,STK_PARAM(R8)(r1) /* get "local" param */ | 655 | ld r8,STK_PARAM(R9)(r1) /* segment size */ |
656 | ld r9,STK_PARAM(R8)(r1) /* get "local" param */ | ||
655 | _GLOBAL(htab_call_hpte_updatepp) | 657 | _GLOBAL(htab_call_hpte_updatepp) |
656 | bl . /* patched by htab_finish_init() */ | 658 | bl . /* patched by htab_finish_init() */ |
657 | 659 | ||
@@ -937,9 +939,10 @@ ht64_modify_pte: | |||
937 | 939 | ||
938 | /* Call ppc_md.hpte_updatepp */ | 940 | /* Call ppc_md.hpte_updatepp */ |
939 | mr r5,r29 /* vpn */ | 941 | mr r5,r29 /* vpn */ |
940 | li r6,MMU_PAGE_64K | 942 | li r6,MMU_PAGE_64K /* base page size */ |
941 | ld r7,STK_PARAM(R9)(r1) /* segment size */ | 943 | li r7,MMU_PAGE_64K /* actual page size */ |
942 | ld r8,STK_PARAM(R8)(r1) /* get "local" param */ | 944 | ld r8,STK_PARAM(R9)(r1) /* segment size */ |
945 | ld r9,STK_PARAM(R8)(r1) /* get "local" param */ | ||
943 | _GLOBAL(ht64_call_hpte_updatepp) | 946 | _GLOBAL(ht64_call_hpte_updatepp) |
944 | bl . /* patched by htab_finish_init() */ | 947 | bl . /* patched by htab_finish_init() */ |
945 | 948 | ||
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index 4c122c3f1623..3f0c30ae4791 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c | |||
@@ -273,61 +273,15 @@ static long native_hpte_remove(unsigned long hpte_group) | |||
273 | return i; | 273 | return i; |
274 | } | 274 | } |
275 | 275 | ||
276 | static inline int __hpte_actual_psize(unsigned int lp, int psize) | ||
277 | { | ||
278 | int i, shift; | ||
279 | unsigned int mask; | ||
280 | |||
281 | /* start from 1 ignoring MMU_PAGE_4K */ | ||
282 | for (i = 1; i < MMU_PAGE_COUNT; i++) { | ||
283 | |||
284 | /* invalid penc */ | ||
285 | if (mmu_psize_defs[psize].penc[i] == -1) | ||
286 | continue; | ||
287 | /* | ||
288 | * encoding bits per actual page size | ||
289 | * PTE LP actual page size | ||
290 | * rrrr rrrz >=8KB | ||
291 | * rrrr rrzz >=16KB | ||
292 | * rrrr rzzz >=32KB | ||
293 | * rrrr zzzz >=64KB | ||
294 | * ....... | ||
295 | */ | ||
296 | shift = mmu_psize_defs[i].shift - LP_SHIFT; | ||
297 | if (shift > LP_BITS) | ||
298 | shift = LP_BITS; | ||
299 | mask = (1 << shift) - 1; | ||
300 | if ((lp & mask) == mmu_psize_defs[psize].penc[i]) | ||
301 | return i; | ||
302 | } | ||
303 | return -1; | ||
304 | } | ||
305 | |||
306 | static inline int hpte_actual_psize(struct hash_pte *hptep, int psize) | ||
307 | { | ||
308 | /* Look at the 8 bit LP value */ | ||
309 | unsigned int lp = (hptep->r >> LP_SHIFT) & ((1 << LP_BITS) - 1); | ||
310 | |||
311 | if (!(hptep->v & HPTE_V_VALID)) | ||
312 | return -1; | ||
313 | |||
314 | /* First check if it is large page */ | ||
315 | if (!(hptep->v & HPTE_V_LARGE)) | ||
316 | return MMU_PAGE_4K; | ||
317 | |||
318 | return __hpte_actual_psize(lp, psize); | ||
319 | } | ||
320 | |||
321 | static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, | 276 | static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, |
322 | unsigned long vpn, int psize, int ssize, | 277 | unsigned long vpn, int bpsize, |
323 | int local) | 278 | int apsize, int ssize, int local) |
324 | { | 279 | { |
325 | struct hash_pte *hptep = htab_address + slot; | 280 | struct hash_pte *hptep = htab_address + slot; |
326 | unsigned long hpte_v, want_v; | 281 | unsigned long hpte_v, want_v; |
327 | int ret = 0; | 282 | int ret = 0; |
328 | int actual_psize; | ||
329 | 283 | ||
330 | want_v = hpte_encode_avpn(vpn, psize, ssize); | 284 | want_v = hpte_encode_avpn(vpn, bpsize, ssize); |
331 | 285 | ||
332 | DBG_LOW(" update(vpn=%016lx, avpnv=%016lx, group=%lx, newpp=%lx)", | 286 | DBG_LOW(" update(vpn=%016lx, avpnv=%016lx, group=%lx, newpp=%lx)", |
333 | vpn, want_v & HPTE_V_AVPN, slot, newpp); | 287 | vpn, want_v & HPTE_V_AVPN, slot, newpp); |
@@ -335,7 +289,6 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, | |||
335 | native_lock_hpte(hptep); | 289 | native_lock_hpte(hptep); |
336 | 290 | ||
337 | hpte_v = hptep->v; | 291 | hpte_v = hptep->v; |
338 | actual_psize = hpte_actual_psize(hptep, psize); | ||
339 | /* | 292 | /* |
340 | * We need to invalidate the TLB always because hpte_remove doesn't do | 293 | * We need to invalidate the TLB always because hpte_remove doesn't do |
341 | * a tlb invalidate. If a hash bucket gets full, we "evict" a more/less | 294 | * a tlb invalidate. If a hash bucket gets full, we "evict" a more/less |
@@ -343,12 +296,7 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, | |||
343 | * (hpte_remove) because we assume the old translation is still | 296 | * (hpte_remove) because we assume the old translation is still |
344 | * technically "valid". | 297 | * technically "valid". |
345 | */ | 298 | */ |
346 | if (actual_psize < 0) { | 299 | if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) { |
347 | actual_psize = psize; | ||
348 | ret = -1; | ||
349 | goto err_out; | ||
350 | } | ||
351 | if (!HPTE_V_COMPARE(hpte_v, want_v)) { | ||
352 | DBG_LOW(" -> miss\n"); | 300 | DBG_LOW(" -> miss\n"); |
353 | ret = -1; | 301 | ret = -1; |
354 | } else { | 302 | } else { |
@@ -357,11 +305,10 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, | |||
357 | hptep->r = (hptep->r & ~(HPTE_R_PP | HPTE_R_N)) | | 305 | hptep->r = (hptep->r & ~(HPTE_R_PP | HPTE_R_N)) | |
358 | (newpp & (HPTE_R_PP | HPTE_R_N | HPTE_R_C)); | 306 | (newpp & (HPTE_R_PP | HPTE_R_N | HPTE_R_C)); |
359 | } | 307 | } |
360 | err_out: | ||
361 | native_unlock_hpte(hptep); | 308 | native_unlock_hpte(hptep); |
362 | 309 | ||
363 | /* Ensure it is out of the tlb too. */ | 310 | /* Ensure it is out of the tlb too. */ |
364 | tlbie(vpn, psize, actual_psize, ssize, local); | 311 | tlbie(vpn, bpsize, apsize, ssize, local); |
365 | 312 | ||
366 | return ret; | 313 | return ret; |
367 | } | 314 | } |
@@ -402,7 +349,6 @@ static long native_hpte_find(unsigned long vpn, int psize, int ssize) | |||
402 | static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea, | 349 | static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea, |
403 | int psize, int ssize) | 350 | int psize, int ssize) |
404 | { | 351 | { |
405 | int actual_psize; | ||
406 | unsigned long vpn; | 352 | unsigned long vpn; |
407 | unsigned long vsid; | 353 | unsigned long vsid; |
408 | long slot; | 354 | long slot; |
@@ -415,36 +361,33 @@ static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea, | |||
415 | if (slot == -1) | 361 | if (slot == -1) |
416 | panic("could not find page to bolt\n"); | 362 | panic("could not find page to bolt\n"); |
417 | hptep = htab_address + slot; | 363 | hptep = htab_address + slot; |
418 | actual_psize = hpte_actual_psize(hptep, psize); | ||
419 | if (actual_psize < 0) | ||
420 | actual_psize = psize; | ||
421 | 364 | ||
422 | /* Update the HPTE */ | 365 | /* Update the HPTE */ |
423 | hptep->r = (hptep->r & ~(HPTE_R_PP | HPTE_R_N)) | | 366 | hptep->r = (hptep->r & ~(HPTE_R_PP | HPTE_R_N)) | |
424 | (newpp & (HPTE_R_PP | HPTE_R_N)); | 367 | (newpp & (HPTE_R_PP | HPTE_R_N)); |
425 | 368 | /* | |
426 | /* Ensure it is out of the tlb too. */ | 369 | * Ensure it is out of the tlb too. Bolted entries base and |
427 | tlbie(vpn, psize, actual_psize, ssize, 0); | 370 | * actual page size will be same. |
371 | */ | ||
372 | tlbie(vpn, psize, psize, ssize, 0); | ||
428 | } | 373 | } |
429 | 374 | ||
430 | static void native_hpte_invalidate(unsigned long slot, unsigned long vpn, | 375 | static void native_hpte_invalidate(unsigned long slot, unsigned long vpn, |
431 | int psize, int ssize, int local) | 376 | int bpsize, int apsize, int ssize, int local) |
432 | { | 377 | { |
433 | struct hash_pte *hptep = htab_address + slot; | 378 | struct hash_pte *hptep = htab_address + slot; |
434 | unsigned long hpte_v; | 379 | unsigned long hpte_v; |
435 | unsigned long want_v; | 380 | unsigned long want_v; |
436 | unsigned long flags; | 381 | unsigned long flags; |
437 | int actual_psize; | ||
438 | 382 | ||
439 | local_irq_save(flags); | 383 | local_irq_save(flags); |
440 | 384 | ||
441 | DBG_LOW(" invalidate(vpn=%016lx, hash: %lx)\n", vpn, slot); | 385 | DBG_LOW(" invalidate(vpn=%016lx, hash: %lx)\n", vpn, slot); |
442 | 386 | ||
443 | want_v = hpte_encode_avpn(vpn, psize, ssize); | 387 | want_v = hpte_encode_avpn(vpn, bpsize, ssize); |
444 | native_lock_hpte(hptep); | 388 | native_lock_hpte(hptep); |
445 | hpte_v = hptep->v; | 389 | hpte_v = hptep->v; |
446 | 390 | ||
447 | actual_psize = hpte_actual_psize(hptep, psize); | ||
448 | /* | 391 | /* |
449 | * We need to invalidate the TLB always because hpte_remove doesn't do | 392 | * We need to invalidate the TLB always because hpte_remove doesn't do |
450 | * a tlb invalidate. If a hash bucket gets full, we "evict" a more/less | 393 | * a tlb invalidate. If a hash bucket gets full, we "evict" a more/less |
@@ -452,23 +395,120 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long vpn, | |||
452 | * (hpte_remove) because we assume the old translation is still | 395 | * (hpte_remove) because we assume the old translation is still |
453 | * technically "valid". | 396 | * technically "valid". |
454 | */ | 397 | */ |
455 | if (actual_psize < 0) { | 398 | if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) |
456 | actual_psize = psize; | ||
457 | native_unlock_hpte(hptep); | ||
458 | goto err_out; | ||
459 | } | ||
460 | if (!HPTE_V_COMPARE(hpte_v, want_v)) | ||
461 | native_unlock_hpte(hptep); | 399 | native_unlock_hpte(hptep); |
462 | else | 400 | else |
463 | /* Invalidate the hpte. NOTE: this also unlocks it */ | 401 | /* Invalidate the hpte. NOTE: this also unlocks it */ |
464 | hptep->v = 0; | 402 | hptep->v = 0; |
465 | 403 | ||
466 | err_out: | ||
467 | /* Invalidate the TLB */ | 404 | /* Invalidate the TLB */ |
468 | tlbie(vpn, psize, actual_psize, ssize, local); | 405 | tlbie(vpn, bpsize, apsize, ssize, local); |
406 | |||
407 | local_irq_restore(flags); | ||
408 | } | ||
409 | |||
410 | static void native_hugepage_invalidate(struct mm_struct *mm, | ||
411 | unsigned char *hpte_slot_array, | ||
412 | unsigned long addr, int psize) | ||
413 | { | ||
414 | int ssize = 0, i; | ||
415 | int lock_tlbie; | ||
416 | struct hash_pte *hptep; | ||
417 | int actual_psize = MMU_PAGE_16M; | ||
418 | unsigned int max_hpte_count, valid; | ||
419 | unsigned long flags, s_addr = addr; | ||
420 | unsigned long hpte_v, want_v, shift; | ||
421 | unsigned long hidx, vpn = 0, vsid, hash, slot; | ||
422 | |||
423 | shift = mmu_psize_defs[psize].shift; | ||
424 | max_hpte_count = 1U << (PMD_SHIFT - shift); | ||
425 | |||
426 | local_irq_save(flags); | ||
427 | for (i = 0; i < max_hpte_count; i++) { | ||
428 | valid = hpte_valid(hpte_slot_array, i); | ||
429 | if (!valid) | ||
430 | continue; | ||
431 | hidx = hpte_hash_index(hpte_slot_array, i); | ||
432 | |||
433 | /* get the vpn */ | ||
434 | addr = s_addr + (i * (1ul << shift)); | ||
435 | if (!is_kernel_addr(addr)) { | ||
436 | ssize = user_segment_size(addr); | ||
437 | vsid = get_vsid(mm->context.id, addr, ssize); | ||
438 | WARN_ON(vsid == 0); | ||
439 | } else { | ||
440 | vsid = get_kernel_vsid(addr, mmu_kernel_ssize); | ||
441 | ssize = mmu_kernel_ssize; | ||
442 | } | ||
443 | |||
444 | vpn = hpt_vpn(addr, vsid, ssize); | ||
445 | hash = hpt_hash(vpn, shift, ssize); | ||
446 | if (hidx & _PTEIDX_SECONDARY) | ||
447 | hash = ~hash; | ||
448 | |||
449 | slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; | ||
450 | slot += hidx & _PTEIDX_GROUP_IX; | ||
451 | |||
452 | hptep = htab_address + slot; | ||
453 | want_v = hpte_encode_avpn(vpn, psize, ssize); | ||
454 | native_lock_hpte(hptep); | ||
455 | hpte_v = hptep->v; | ||
456 | |||
457 | /* Even if we miss, we need to invalidate the TLB */ | ||
458 | if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) | ||
459 | native_unlock_hpte(hptep); | ||
460 | else | ||
461 | /* Invalidate the hpte. NOTE: this also unlocks it */ | ||
462 | hptep->v = 0; | ||
463 | } | ||
464 | /* | ||
465 | * Since this is a hugepage, we just need a single tlbie. | ||
466 | * use the last vpn. | ||
467 | */ | ||
468 | lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); | ||
469 | if (lock_tlbie) | ||
470 | raw_spin_lock(&native_tlbie_lock); | ||
471 | |||
472 | asm volatile("ptesync":::"memory"); | ||
473 | __tlbie(vpn, psize, actual_psize, ssize); | ||
474 | asm volatile("eieio; tlbsync; ptesync":::"memory"); | ||
475 | |||
476 | if (lock_tlbie) | ||
477 | raw_spin_unlock(&native_tlbie_lock); | ||
478 | |||
469 | local_irq_restore(flags); | 479 | local_irq_restore(flags); |
470 | } | 480 | } |
471 | 481 | ||
482 | static inline int __hpte_actual_psize(unsigned int lp, int psize) | ||
483 | { | ||
484 | int i, shift; | ||
485 | unsigned int mask; | ||
486 | |||
487 | /* start from 1 ignoring MMU_PAGE_4K */ | ||
488 | for (i = 1; i < MMU_PAGE_COUNT; i++) { | ||
489 | |||
490 | /* invalid penc */ | ||
491 | if (mmu_psize_defs[psize].penc[i] == -1) | ||
492 | continue; | ||
493 | /* | ||
494 | * encoding bits per actual page size | ||
495 | * PTE LP actual page size | ||
496 | * rrrr rrrz >=8KB | ||
497 | * rrrr rrzz >=16KB | ||
498 | * rrrr rzzz >=32KB | ||
499 | * rrrr zzzz >=64KB | ||
500 | * ....... | ||
501 | */ | ||
502 | shift = mmu_psize_defs[i].shift - LP_SHIFT; | ||
503 | if (shift > LP_BITS) | ||
504 | shift = LP_BITS; | ||
505 | mask = (1 << shift) - 1; | ||
506 | if ((lp & mask) == mmu_psize_defs[psize].penc[i]) | ||
507 | return i; | ||
508 | } | ||
509 | return -1; | ||
510 | } | ||
511 | |||
472 | static void hpte_decode(struct hash_pte *hpte, unsigned long slot, | 512 | static void hpte_decode(struct hash_pte *hpte, unsigned long slot, |
473 | int *psize, int *apsize, int *ssize, unsigned long *vpn) | 513 | int *psize, int *apsize, int *ssize, unsigned long *vpn) |
474 | { | 514 | { |
@@ -672,4 +712,5 @@ void __init hpte_init_native(void) | |||
672 | ppc_md.hpte_remove = native_hpte_remove; | 712 | ppc_md.hpte_remove = native_hpte_remove; |
673 | ppc_md.hpte_clear_all = native_hpte_clear; | 713 | ppc_md.hpte_clear_all = native_hpte_clear; |
674 | ppc_md.flush_hash_range = native_flush_hash_range; | 714 | ppc_md.flush_hash_range = native_flush_hash_range; |
715 | ppc_md.hugepage_invalidate = native_hugepage_invalidate; | ||
675 | } | 716 | } |
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index e303a6d74e3a..6ecc38bd5b24 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c | |||
@@ -807,7 +807,7 @@ void __init early_init_mmu(void) | |||
807 | } | 807 | } |
808 | 808 | ||
809 | #ifdef CONFIG_SMP | 809 | #ifdef CONFIG_SMP |
810 | void __cpuinit early_init_mmu_secondary(void) | 810 | void early_init_mmu_secondary(void) |
811 | { | 811 | { |
812 | /* Initialize hash table for that CPU */ | 812 | /* Initialize hash table for that CPU */ |
813 | if (!firmware_has_feature(FW_FEATURE_LPAR)) | 813 | if (!firmware_has_feature(FW_FEATURE_LPAR)) |
@@ -1050,13 +1050,26 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) | |||
1050 | goto bail; | 1050 | goto bail; |
1051 | } | 1051 | } |
1052 | 1052 | ||
1053 | #ifdef CONFIG_HUGETLB_PAGE | ||
1054 | if (hugeshift) { | 1053 | if (hugeshift) { |
1055 | rc = __hash_page_huge(ea, access, vsid, ptep, trap, local, | 1054 | if (pmd_trans_huge(*(pmd_t *)ptep)) |
1056 | ssize, hugeshift, psize); | 1055 | rc = __hash_page_thp(ea, access, vsid, (pmd_t *)ptep, |
1056 | trap, local, ssize, psize); | ||
1057 | #ifdef CONFIG_HUGETLB_PAGE | ||
1058 | else | ||
1059 | rc = __hash_page_huge(ea, access, vsid, ptep, trap, | ||
1060 | local, ssize, hugeshift, psize); | ||
1061 | #else | ||
1062 | else { | ||
1063 | /* | ||
1064 | * if we have hugeshift, and is not transhuge with | ||
1065 | * hugetlb disabled, something is really wrong. | ||
1066 | */ | ||
1067 | rc = 1; | ||
1068 | WARN_ON(1); | ||
1069 | } | ||
1070 | #endif | ||
1057 | goto bail; | 1071 | goto bail; |
1058 | } | 1072 | } |
1059 | #endif /* CONFIG_HUGETLB_PAGE */ | ||
1060 | 1073 | ||
1061 | #ifndef CONFIG_PPC_64K_PAGES | 1074 | #ifndef CONFIG_PPC_64K_PAGES |
1062 | DBG_LOW(" i-pte: %016lx\n", pte_val(*ptep)); | 1075 | DBG_LOW(" i-pte: %016lx\n", pte_val(*ptep)); |
@@ -1145,6 +1158,7 @@ EXPORT_SYMBOL_GPL(hash_page); | |||
1145 | void hash_preload(struct mm_struct *mm, unsigned long ea, | 1158 | void hash_preload(struct mm_struct *mm, unsigned long ea, |
1146 | unsigned long access, unsigned long trap) | 1159 | unsigned long access, unsigned long trap) |
1147 | { | 1160 | { |
1161 | int hugepage_shift; | ||
1148 | unsigned long vsid; | 1162 | unsigned long vsid; |
1149 | pgd_t *pgdir; | 1163 | pgd_t *pgdir; |
1150 | pte_t *ptep; | 1164 | pte_t *ptep; |
@@ -1166,10 +1180,27 @@ void hash_preload(struct mm_struct *mm, unsigned long ea, | |||
1166 | pgdir = mm->pgd; | 1180 | pgdir = mm->pgd; |
1167 | if (pgdir == NULL) | 1181 | if (pgdir == NULL) |
1168 | return; | 1182 | return; |
1169 | ptep = find_linux_pte(pgdir, ea); | 1183 | |
1170 | if (!ptep) | 1184 | /* Get VSID */ |
1185 | ssize = user_segment_size(ea); | ||
1186 | vsid = get_vsid(mm->context.id, ea, ssize); | ||
1187 | if (!vsid) | ||
1171 | return; | 1188 | return; |
1189 | /* | ||
1190 | * Hash doesn't like irqs. Walking linux page table with irq disabled | ||
1191 | * saves us from holding multiple locks. | ||
1192 | */ | ||
1193 | local_irq_save(flags); | ||
1194 | |||
1195 | /* | ||
1196 | * THP pages use update_mmu_cache_pmd. We don't do | ||
1197 | * hash preload there. Hence can ignore THP here | ||
1198 | */ | ||
1199 | ptep = find_linux_pte_or_hugepte(pgdir, ea, &hugepage_shift); | ||
1200 | if (!ptep) | ||
1201 | goto out_exit; | ||
1172 | 1202 | ||
1203 | WARN_ON(hugepage_shift); | ||
1173 | #ifdef CONFIG_PPC_64K_PAGES | 1204 | #ifdef CONFIG_PPC_64K_PAGES |
1174 | /* If either _PAGE_4K_PFN or _PAGE_NO_CACHE is set (and we are on | 1205 | /* If either _PAGE_4K_PFN or _PAGE_NO_CACHE is set (and we are on |
1175 | * a 64K kernel), then we don't preload, hash_page() will take | 1206 | * a 64K kernel), then we don't preload, hash_page() will take |
@@ -1178,18 +1209,9 @@ void hash_preload(struct mm_struct *mm, unsigned long ea, | |||
1178 | * page size demotion here | 1209 | * page size demotion here |
1179 | */ | 1210 | */ |
1180 | if (pte_val(*ptep) & (_PAGE_4K_PFN | _PAGE_NO_CACHE)) | 1211 | if (pte_val(*ptep) & (_PAGE_4K_PFN | _PAGE_NO_CACHE)) |
1181 | return; | 1212 | goto out_exit; |
1182 | #endif /* CONFIG_PPC_64K_PAGES */ | 1213 | #endif /* CONFIG_PPC_64K_PAGES */ |
1183 | 1214 | ||
1184 | /* Get VSID */ | ||
1185 | ssize = user_segment_size(ea); | ||
1186 | vsid = get_vsid(mm->context.id, ea, ssize); | ||
1187 | if (!vsid) | ||
1188 | return; | ||
1189 | |||
1190 | /* Hash doesn't like irqs */ | ||
1191 | local_irq_save(flags); | ||
1192 | |||
1193 | /* Is that local to this CPU ? */ | 1215 | /* Is that local to this CPU ? */ |
1194 | if (cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) | 1216 | if (cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) |
1195 | local = 1; | 1217 | local = 1; |
@@ -1211,7 +1233,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea, | |||
1211 | mm->context.user_psize, | 1233 | mm->context.user_psize, |
1212 | mm->context.user_psize, | 1234 | mm->context.user_psize, |
1213 | pte_val(*ptep)); | 1235 | pte_val(*ptep)); |
1214 | 1236 | out_exit: | |
1215 | local_irq_restore(flags); | 1237 | local_irq_restore(flags); |
1216 | } | 1238 | } |
1217 | 1239 | ||
@@ -1232,7 +1254,11 @@ void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize, | |||
1232 | slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; | 1254 | slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; |
1233 | slot += hidx & _PTEIDX_GROUP_IX; | 1255 | slot += hidx & _PTEIDX_GROUP_IX; |
1234 | DBG_LOW(" sub %ld: hash=%lx, hidx=%lx\n", index, slot, hidx); | 1256 | DBG_LOW(" sub %ld: hash=%lx, hidx=%lx\n", index, slot, hidx); |
1235 | ppc_md.hpte_invalidate(slot, vpn, psize, ssize, local); | 1257 | /* |
1258 | * We use same base page size and actual psize, because we don't | ||
1259 | * use these functions for hugepage | ||
1260 | */ | ||
1261 | ppc_md.hpte_invalidate(slot, vpn, psize, psize, ssize, local); | ||
1236 | } pte_iterate_hashed_end(); | 1262 | } pte_iterate_hashed_end(); |
1237 | 1263 | ||
1238 | #ifdef CONFIG_PPC_TRANSACTIONAL_MEM | 1264 | #ifdef CONFIG_PPC_TRANSACTIONAL_MEM |
@@ -1365,7 +1391,8 @@ static void kernel_unmap_linear_page(unsigned long vaddr, unsigned long lmi) | |||
1365 | hash = ~hash; | 1391 | hash = ~hash; |
1366 | slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; | 1392 | slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; |
1367 | slot += hidx & _PTEIDX_GROUP_IX; | 1393 | slot += hidx & _PTEIDX_GROUP_IX; |
1368 | ppc_md.hpte_invalidate(slot, vpn, mmu_linear_psize, mmu_kernel_ssize, 0); | 1394 | ppc_md.hpte_invalidate(slot, vpn, mmu_linear_psize, mmu_linear_psize, |
1395 | mmu_kernel_ssize, 0); | ||
1369 | } | 1396 | } |
1370 | 1397 | ||
1371 | void kernel_map_pages(struct page *page, int numpages, int enable) | 1398 | void kernel_map_pages(struct page *page, int numpages, int enable) |
diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c new file mode 100644 index 000000000000..34de9e0cdc34 --- /dev/null +++ b/arch/powerpc/mm/hugepage-hash64.c | |||
@@ -0,0 +1,175 @@ | |||
1 | /* | ||
2 | * Copyright IBM Corporation, 2013 | ||
3 | * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify it | ||
6 | * under the terms of version 2.1 of the GNU Lesser General Public License | ||
7 | * as published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, but | ||
10 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
12 | * | ||
13 | */ | ||
14 | |||
15 | /* | ||
16 | * PPC64 THP Support for hash based MMUs | ||
17 | */ | ||
18 | #include <linux/mm.h> | ||
19 | #include <asm/machdep.h> | ||
20 | |||
21 | int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, | ||
22 | pmd_t *pmdp, unsigned long trap, int local, int ssize, | ||
23 | unsigned int psize) | ||
24 | { | ||
25 | unsigned int index, valid; | ||
26 | unsigned char *hpte_slot_array; | ||
27 | unsigned long rflags, pa, hidx; | ||
28 | unsigned long old_pmd, new_pmd; | ||
29 | int ret, lpsize = MMU_PAGE_16M; | ||
30 | unsigned long vpn, hash, shift, slot; | ||
31 | |||
32 | /* | ||
33 | * atomically mark the linux large page PMD busy and dirty | ||
34 | */ | ||
35 | do { | ||
36 | old_pmd = pmd_val(*pmdp); | ||
37 | /* If PMD busy, retry the access */ | ||
38 | if (unlikely(old_pmd & _PAGE_BUSY)) | ||
39 | return 0; | ||
40 | /* If PMD is trans splitting retry the access */ | ||
41 | if (unlikely(old_pmd & _PAGE_SPLITTING)) | ||
42 | return 0; | ||
43 | /* If PMD permissions don't match, take page fault */ | ||
44 | if (unlikely(access & ~old_pmd)) | ||
45 | return 1; | ||
46 | /* | ||
47 | * Try to lock the PTE, add ACCESSED and DIRTY if it was | ||
48 | * a write access | ||
49 | */ | ||
50 | new_pmd = old_pmd | _PAGE_BUSY | _PAGE_ACCESSED; | ||
51 | if (access & _PAGE_RW) | ||
52 | new_pmd |= _PAGE_DIRTY; | ||
53 | } while (old_pmd != __cmpxchg_u64((unsigned long *)pmdp, | ||
54 | old_pmd, new_pmd)); | ||
55 | /* | ||
56 | * PP bits. _PAGE_USER is already PP bit 0x2, so we only | ||
57 | * need to add in 0x1 if it's a read-only user page | ||
58 | */ | ||
59 | rflags = new_pmd & _PAGE_USER; | ||
60 | if ((new_pmd & _PAGE_USER) && !((new_pmd & _PAGE_RW) && | ||
61 | (new_pmd & _PAGE_DIRTY))) | ||
62 | rflags |= 0x1; | ||
63 | /* | ||
64 | * _PAGE_EXEC -> HW_NO_EXEC since it's inverted | ||
65 | */ | ||
66 | rflags |= ((new_pmd & _PAGE_EXEC) ? 0 : HPTE_R_N); | ||
67 | |||
68 | #if 0 | ||
69 | if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) { | ||
70 | |||
71 | /* | ||
72 | * No CPU has hugepages but lacks no execute, so we | ||
73 | * don't need to worry about that case | ||
74 | */ | ||
75 | rflags = hash_page_do_lazy_icache(rflags, __pte(old_pte), trap); | ||
76 | } | ||
77 | #endif | ||
78 | /* | ||
79 | * Find the slot index details for this ea, using base page size. | ||
80 | */ | ||
81 | shift = mmu_psize_defs[psize].shift; | ||
82 | index = (ea & ~HPAGE_PMD_MASK) >> shift; | ||
83 | BUG_ON(index >= 4096); | ||
84 | |||
85 | vpn = hpt_vpn(ea, vsid, ssize); | ||
86 | hash = hpt_hash(vpn, shift, ssize); | ||
87 | hpte_slot_array = get_hpte_slot_array(pmdp); | ||
88 | |||
89 | valid = hpte_valid(hpte_slot_array, index); | ||
90 | if (valid) { | ||
91 | /* update the hpte bits */ | ||
92 | hidx = hpte_hash_index(hpte_slot_array, index); | ||
93 | if (hidx & _PTEIDX_SECONDARY) | ||
94 | hash = ~hash; | ||
95 | slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; | ||
96 | slot += hidx & _PTEIDX_GROUP_IX; | ||
97 | |||
98 | ret = ppc_md.hpte_updatepp(slot, rflags, vpn, | ||
99 | psize, lpsize, ssize, local); | ||
100 | /* | ||
101 | * We failed to update, try to insert a new entry. | ||
102 | */ | ||
103 | if (ret == -1) { | ||
104 | /* | ||
105 | * large pte is marked busy, so we can be sure | ||
106 | * nobody is looking at hpte_slot_array. hence we can | ||
107 | * safely update this here. | ||
108 | */ | ||
109 | valid = 0; | ||
110 | new_pmd &= ~_PAGE_HPTEFLAGS; | ||
111 | hpte_slot_array[index] = 0; | ||
112 | } else | ||
113 | /* clear the busy bits and set the hash pte bits */ | ||
114 | new_pmd = (new_pmd & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE; | ||
115 | } | ||
116 | |||
117 | if (!valid) { | ||
118 | unsigned long hpte_group; | ||
119 | |||
120 | /* insert new entry */ | ||
121 | pa = pmd_pfn(__pmd(old_pmd)) << PAGE_SHIFT; | ||
122 | repeat: | ||
123 | hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; | ||
124 | |||
125 | /* clear the busy bits and set the hash pte bits */ | ||
126 | new_pmd = (new_pmd & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE; | ||
127 | |||
128 | /* Add in WIMG bits */ | ||
129 | rflags |= (new_pmd & (_PAGE_WRITETHRU | _PAGE_NO_CACHE | | ||
130 | _PAGE_COHERENT | _PAGE_GUARDED)); | ||
131 | |||
132 | /* Insert into the hash table, primary slot */ | ||
133 | slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, 0, | ||
134 | psize, lpsize, ssize); | ||
135 | /* | ||
136 | * Primary is full, try the secondary | ||
137 | */ | ||
138 | if (unlikely(slot == -1)) { | ||
139 | hpte_group = ((~hash & htab_hash_mask) * | ||
140 | HPTES_PER_GROUP) & ~0x7UL; | ||
141 | slot = ppc_md.hpte_insert(hpte_group, vpn, pa, | ||
142 | rflags, HPTE_V_SECONDARY, | ||
143 | psize, lpsize, ssize); | ||
144 | if (slot == -1) { | ||
145 | if (mftb() & 0x1) | ||
146 | hpte_group = ((hash & htab_hash_mask) * | ||
147 | HPTES_PER_GROUP) & ~0x7UL; | ||
148 | |||
149 | ppc_md.hpte_remove(hpte_group); | ||
150 | goto repeat; | ||
151 | } | ||
152 | } | ||
153 | /* | ||
154 | * Hypervisor failure. Restore old pmd and return -1 | ||
155 | * similar to __hash_page_* | ||
156 | */ | ||
157 | if (unlikely(slot == -2)) { | ||
158 | *pmdp = __pmd(old_pmd); | ||
159 | hash_failure_debug(ea, access, vsid, trap, ssize, | ||
160 | psize, lpsize, old_pmd); | ||
161 | return -1; | ||
162 | } | ||
163 | /* | ||
164 | * large pte is marked busy, so we can be sure | ||
165 | * nobody is looking at hpte_slot_array. hence we can | ||
166 | * safely update this here. | ||
167 | */ | ||
168 | mark_hpte_slot_valid(hpte_slot_array, index, slot); | ||
169 | } | ||
170 | /* | ||
171 | * No need to use ldarx/stdcx here | ||
172 | */ | ||
173 | *pmdp = __pmd(new_pmd & ~_PAGE_BUSY); | ||
174 | return 0; | ||
175 | } | ||
diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c index 0f1d94a1fb82..0b7fb6761015 100644 --- a/arch/powerpc/mm/hugetlbpage-hash64.c +++ b/arch/powerpc/mm/hugetlbpage-hash64.c | |||
@@ -81,7 +81,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, | |||
81 | slot += (old_pte & _PAGE_F_GIX) >> 12; | 81 | slot += (old_pte & _PAGE_F_GIX) >> 12; |
82 | 82 | ||
83 | if (ppc_md.hpte_updatepp(slot, rflags, vpn, mmu_psize, | 83 | if (ppc_md.hpte_updatepp(slot, rflags, vpn, mmu_psize, |
84 | ssize, local) == -1) | 84 | mmu_psize, ssize, local) == -1) |
85 | old_pte &= ~_PAGE_HPTEFLAGS; | 85 | old_pte &= ~_PAGE_HPTEFLAGS; |
86 | } | 86 | } |
87 | 87 | ||
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 4210549ac95e..834ca8eb38f2 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c | |||
@@ -21,6 +21,9 @@ | |||
21 | #include <asm/pgalloc.h> | 21 | #include <asm/pgalloc.h> |
22 | #include <asm/tlb.h> | 22 | #include <asm/tlb.h> |
23 | #include <asm/setup.h> | 23 | #include <asm/setup.h> |
24 | #include <asm/hugetlb.h> | ||
25 | |||
26 | #ifdef CONFIG_HUGETLB_PAGE | ||
24 | 27 | ||
25 | #define PAGE_SHIFT_64K 16 | 28 | #define PAGE_SHIFT_64K 16 |
26 | #define PAGE_SHIFT_16M 24 | 29 | #define PAGE_SHIFT_16M 24 |
@@ -100,68 +103,9 @@ int pgd_huge(pgd_t pgd) | |||
100 | } | 103 | } |
101 | #endif | 104 | #endif |
102 | 105 | ||
103 | /* | ||
104 | * We have 4 cases for pgds and pmds: | ||
105 | * (1) invalid (all zeroes) | ||
106 | * (2) pointer to next table, as normal; bottom 6 bits == 0 | ||
107 | * (3) leaf pte for huge page, bottom two bits != 00 | ||
108 | * (4) hugepd pointer, bottom two bits == 00, next 4 bits indicate size of table | ||
109 | */ | ||
110 | pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift) | ||
111 | { | ||
112 | pgd_t *pg; | ||
113 | pud_t *pu; | ||
114 | pmd_t *pm; | ||
115 | pte_t *ret_pte; | ||
116 | hugepd_t *hpdp = NULL; | ||
117 | unsigned pdshift = PGDIR_SHIFT; | ||
118 | |||
119 | if (shift) | ||
120 | *shift = 0; | ||
121 | |||
122 | pg = pgdir + pgd_index(ea); | ||
123 | |||
124 | if (pgd_huge(*pg)) { | ||
125 | ret_pte = (pte_t *) pg; | ||
126 | goto out; | ||
127 | } else if (is_hugepd(pg)) | ||
128 | hpdp = (hugepd_t *)pg; | ||
129 | else if (!pgd_none(*pg)) { | ||
130 | pdshift = PUD_SHIFT; | ||
131 | pu = pud_offset(pg, ea); | ||
132 | |||
133 | if (pud_huge(*pu)) { | ||
134 | ret_pte = (pte_t *) pu; | ||
135 | goto out; | ||
136 | } else if (is_hugepd(pu)) | ||
137 | hpdp = (hugepd_t *)pu; | ||
138 | else if (!pud_none(*pu)) { | ||
139 | pdshift = PMD_SHIFT; | ||
140 | pm = pmd_offset(pu, ea); | ||
141 | |||
142 | if (pmd_huge(*pm)) { | ||
143 | ret_pte = (pte_t *) pm; | ||
144 | goto out; | ||
145 | } else if (is_hugepd(pm)) | ||
146 | hpdp = (hugepd_t *)pm; | ||
147 | else if (!pmd_none(*pm)) | ||
148 | return pte_offset_kernel(pm, ea); | ||
149 | } | ||
150 | } | ||
151 | if (!hpdp) | ||
152 | return NULL; | ||
153 | |||
154 | ret_pte = hugepte_offset(hpdp, ea, pdshift); | ||
155 | pdshift = hugepd_shift(*hpdp); | ||
156 | out: | ||
157 | if (shift) | ||
158 | *shift = pdshift; | ||
159 | return ret_pte; | ||
160 | } | ||
161 | EXPORT_SYMBOL_GPL(find_linux_pte_or_hugepte); | ||
162 | |||
163 | pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) | 106 | pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) |
164 | { | 107 | { |
108 | /* Only called for hugetlbfs pages, hence can ignore THP */ | ||
165 | return find_linux_pte_or_hugepte(mm->pgd, addr, NULL); | 109 | return find_linux_pte_or_hugepte(mm->pgd, addr, NULL); |
166 | } | 110 | } |
167 | 111 | ||
@@ -736,11 +680,14 @@ follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) | |||
736 | struct page *page; | 680 | struct page *page; |
737 | unsigned shift; | 681 | unsigned shift; |
738 | unsigned long mask; | 682 | unsigned long mask; |
739 | 683 | /* | |
684 | * Transparent hugepages are handled by generic code. We can skip them | ||
685 | * here. | ||
686 | */ | ||
740 | ptep = find_linux_pte_or_hugepte(mm->pgd, address, &shift); | 687 | ptep = find_linux_pte_or_hugepte(mm->pgd, address, &shift); |
741 | 688 | ||
742 | /* Verify it is a huge page else bail. */ | 689 | /* Verify it is a huge page else bail. */ |
743 | if (!ptep || !shift) | 690 | if (!ptep || !shift || pmd_trans_huge(*(pmd_t *)ptep)) |
744 | return ERR_PTR(-EINVAL); | 691 | return ERR_PTR(-EINVAL); |
745 | 692 | ||
746 | mask = (1UL << shift) - 1; | 693 | mask = (1UL << shift) - 1; |
@@ -759,69 +706,6 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address, | |||
759 | return NULL; | 706 | return NULL; |
760 | } | 707 | } |
761 | 708 | ||
762 | int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, | ||
763 | unsigned long end, int write, struct page **pages, int *nr) | ||
764 | { | ||
765 | unsigned long mask; | ||
766 | unsigned long pte_end; | ||
767 | struct page *head, *page, *tail; | ||
768 | pte_t pte; | ||
769 | int refs; | ||
770 | |||
771 | pte_end = (addr + sz) & ~(sz-1); | ||
772 | if (pte_end < end) | ||
773 | end = pte_end; | ||
774 | |||
775 | pte = *ptep; | ||
776 | mask = _PAGE_PRESENT | _PAGE_USER; | ||
777 | if (write) | ||
778 | mask |= _PAGE_RW; | ||
779 | |||
780 | if ((pte_val(pte) & mask) != mask) | ||
781 | return 0; | ||
782 | |||
783 | /* hugepages are never "special" */ | ||
784 | VM_BUG_ON(!pfn_valid(pte_pfn(pte))); | ||
785 | |||
786 | refs = 0; | ||
787 | head = pte_page(pte); | ||
788 | |||
789 | page = head + ((addr & (sz-1)) >> PAGE_SHIFT); | ||
790 | tail = page; | ||
791 | do { | ||
792 | VM_BUG_ON(compound_head(page) != head); | ||
793 | pages[*nr] = page; | ||
794 | (*nr)++; | ||
795 | page++; | ||
796 | refs++; | ||
797 | } while (addr += PAGE_SIZE, addr != end); | ||
798 | |||
799 | if (!page_cache_add_speculative(head, refs)) { | ||
800 | *nr -= refs; | ||
801 | return 0; | ||
802 | } | ||
803 | |||
804 | if (unlikely(pte_val(pte) != pte_val(*ptep))) { | ||
805 | /* Could be optimized better */ | ||
806 | *nr -= refs; | ||
807 | while (refs--) | ||
808 | put_page(head); | ||
809 | return 0; | ||
810 | } | ||
811 | |||
812 | /* | ||
813 | * Any tail page need their mapcount reference taken before we | ||
814 | * return. | ||
815 | */ | ||
816 | while (refs--) { | ||
817 | if (PageTail(tail)) | ||
818 | get_huge_page_tail(tail); | ||
819 | tail++; | ||
820 | } | ||
821 | |||
822 | return 1; | ||
823 | } | ||
824 | |||
825 | static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end, | 709 | static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end, |
826 | unsigned long sz) | 710 | unsigned long sz) |
827 | { | 711 | { |
@@ -1038,3 +922,168 @@ void flush_dcache_icache_hugepage(struct page *page) | |||
1038 | } | 922 | } |
1039 | } | 923 | } |
1040 | } | 924 | } |
925 | |||
926 | #endif /* CONFIG_HUGETLB_PAGE */ | ||
927 | |||
928 | /* | ||
929 | * We have 4 cases for pgds and pmds: | ||
930 | * (1) invalid (all zeroes) | ||
931 | * (2) pointer to next table, as normal; bottom 6 bits == 0 | ||
932 | * (3) leaf pte for huge page, bottom two bits != 00 | ||
933 | * (4) hugepd pointer, bottom two bits == 00, next 4 bits indicate size of table | ||
934 | * | ||
935 | * So long as we atomically load page table pointers we are safe against teardown, | ||
936 | * we can follow the address down to the the page and take a ref on it. | ||
937 | */ | ||
938 | |||
939 | pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift) | ||
940 | { | ||
941 | pgd_t pgd, *pgdp; | ||
942 | pud_t pud, *pudp; | ||
943 | pmd_t pmd, *pmdp; | ||
944 | pte_t *ret_pte; | ||
945 | hugepd_t *hpdp = NULL; | ||
946 | unsigned pdshift = PGDIR_SHIFT; | ||
947 | |||
948 | if (shift) | ||
949 | *shift = 0; | ||
950 | |||
951 | pgdp = pgdir + pgd_index(ea); | ||
952 | pgd = ACCESS_ONCE(*pgdp); | ||
953 | /* | ||
954 | * Always operate on the local stack value. This make sure the | ||
955 | * value don't get updated by a parallel THP split/collapse, | ||
956 | * page fault or a page unmap. The return pte_t * is still not | ||
957 | * stable. So should be checked there for above conditions. | ||
958 | */ | ||
959 | if (pgd_none(pgd)) | ||
960 | return NULL; | ||
961 | else if (pgd_huge(pgd)) { | ||
962 | ret_pte = (pte_t *) pgdp; | ||
963 | goto out; | ||
964 | } else if (is_hugepd(&pgd)) | ||
965 | hpdp = (hugepd_t *)&pgd; | ||
966 | else { | ||
967 | /* | ||
968 | * Even if we end up with an unmap, the pgtable will not | ||
969 | * be freed, because we do an rcu free and here we are | ||
970 | * irq disabled | ||
971 | */ | ||
972 | pdshift = PUD_SHIFT; | ||
973 | pudp = pud_offset(&pgd, ea); | ||
974 | pud = ACCESS_ONCE(*pudp); | ||
975 | |||
976 | if (pud_none(pud)) | ||
977 | return NULL; | ||
978 | else if (pud_huge(pud)) { | ||
979 | ret_pte = (pte_t *) pudp; | ||
980 | goto out; | ||
981 | } else if (is_hugepd(&pud)) | ||
982 | hpdp = (hugepd_t *)&pud; | ||
983 | else { | ||
984 | pdshift = PMD_SHIFT; | ||
985 | pmdp = pmd_offset(&pud, ea); | ||
986 | pmd = ACCESS_ONCE(*pmdp); | ||
987 | /* | ||
988 | * A hugepage collapse is captured by pmd_none, because | ||
989 | * it mark the pmd none and do a hpte invalidate. | ||
990 | * | ||
991 | * A hugepage split is captured by pmd_trans_splitting | ||
992 | * because we mark the pmd trans splitting and do a | ||
993 | * hpte invalidate | ||
994 | * | ||
995 | */ | ||
996 | if (pmd_none(pmd) || pmd_trans_splitting(pmd)) | ||
997 | return NULL; | ||
998 | |||
999 | if (pmd_huge(pmd) || pmd_large(pmd)) { | ||
1000 | ret_pte = (pte_t *) pmdp; | ||
1001 | goto out; | ||
1002 | } else if (is_hugepd(&pmd)) | ||
1003 | hpdp = (hugepd_t *)&pmd; | ||
1004 | else | ||
1005 | return pte_offset_kernel(&pmd, ea); | ||
1006 | } | ||
1007 | } | ||
1008 | if (!hpdp) | ||
1009 | return NULL; | ||
1010 | |||
1011 | ret_pte = hugepte_offset(hpdp, ea, pdshift); | ||
1012 | pdshift = hugepd_shift(*hpdp); | ||
1013 | out: | ||
1014 | if (shift) | ||
1015 | *shift = pdshift; | ||
1016 | return ret_pte; | ||
1017 | } | ||
1018 | EXPORT_SYMBOL_GPL(find_linux_pte_or_hugepte); | ||
1019 | |||
1020 | int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, | ||
1021 | unsigned long end, int write, struct page **pages, int *nr) | ||
1022 | { | ||
1023 | unsigned long mask; | ||
1024 | unsigned long pte_end; | ||
1025 | struct page *head, *page, *tail; | ||
1026 | pte_t pte; | ||
1027 | int refs; | ||
1028 | |||
1029 | pte_end = (addr + sz) & ~(sz-1); | ||
1030 | if (pte_end < end) | ||
1031 | end = pte_end; | ||
1032 | |||
1033 | pte = ACCESS_ONCE(*ptep); | ||
1034 | mask = _PAGE_PRESENT | _PAGE_USER; | ||
1035 | if (write) | ||
1036 | mask |= _PAGE_RW; | ||
1037 | |||
1038 | if ((pte_val(pte) & mask) != mask) | ||
1039 | return 0; | ||
1040 | |||
1041 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
1042 | /* | ||
1043 | * check for splitting here | ||
1044 | */ | ||
1045 | if (pmd_trans_splitting(pte_pmd(pte))) | ||
1046 | return 0; | ||
1047 | #endif | ||
1048 | |||
1049 | /* hugepages are never "special" */ | ||
1050 | VM_BUG_ON(!pfn_valid(pte_pfn(pte))); | ||
1051 | |||
1052 | refs = 0; | ||
1053 | head = pte_page(pte); | ||
1054 | |||
1055 | page = head + ((addr & (sz-1)) >> PAGE_SHIFT); | ||
1056 | tail = page; | ||
1057 | do { | ||
1058 | VM_BUG_ON(compound_head(page) != head); | ||
1059 | pages[*nr] = page; | ||
1060 | (*nr)++; | ||
1061 | page++; | ||
1062 | refs++; | ||
1063 | } while (addr += PAGE_SIZE, addr != end); | ||
1064 | |||
1065 | if (!page_cache_add_speculative(head, refs)) { | ||
1066 | *nr -= refs; | ||
1067 | return 0; | ||
1068 | } | ||
1069 | |||
1070 | if (unlikely(pte_val(pte) != pte_val(*ptep))) { | ||
1071 | /* Could be optimized better */ | ||
1072 | *nr -= refs; | ||
1073 | while (refs--) | ||
1074 | put_page(head); | ||
1075 | return 0; | ||
1076 | } | ||
1077 | |||
1078 | /* | ||
1079 | * Any tail page need their mapcount reference taken before we | ||
1080 | * return. | ||
1081 | */ | ||
1082 | while (refs--) { | ||
1083 | if (PageTail(tail)) | ||
1084 | get_huge_page_tail(tail); | ||
1085 | tail++; | ||
1086 | } | ||
1087 | |||
1088 | return 1; | ||
1089 | } | ||
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index a90b9c458990..d0cd9e4c6837 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c | |||
@@ -88,7 +88,11 @@ static void pgd_ctor(void *addr) | |||
88 | 88 | ||
89 | static void pmd_ctor(void *addr) | 89 | static void pmd_ctor(void *addr) |
90 | { | 90 | { |
91 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
92 | memset(addr, 0, PMD_TABLE_SIZE * 2); | ||
93 | #else | ||
91 | memset(addr, 0, PMD_TABLE_SIZE); | 94 | memset(addr, 0, PMD_TABLE_SIZE); |
95 | #endif | ||
92 | } | 96 | } |
93 | 97 | ||
94 | struct kmem_cache *pgtable_cache[MAX_PGTABLE_INDEX_SIZE]; | 98 | struct kmem_cache *pgtable_cache[MAX_PGTABLE_INDEX_SIZE]; |
@@ -137,10 +141,9 @@ void pgtable_cache_add(unsigned shift, void (*ctor)(void *)) | |||
137 | void pgtable_cache_init(void) | 141 | void pgtable_cache_init(void) |
138 | { | 142 | { |
139 | pgtable_cache_add(PGD_INDEX_SIZE, pgd_ctor); | 143 | pgtable_cache_add(PGD_INDEX_SIZE, pgd_ctor); |
140 | pgtable_cache_add(PMD_INDEX_SIZE, pmd_ctor); | 144 | pgtable_cache_add(PMD_CACHE_INDEX, pmd_ctor); |
141 | if (!PGT_CACHE(PGD_INDEX_SIZE) || !PGT_CACHE(PMD_INDEX_SIZE)) | 145 | if (!PGT_CACHE(PGD_INDEX_SIZE) || !PGT_CACHE(PMD_CACHE_INDEX)) |
142 | panic("Couldn't allocate pgtable caches"); | 146 | panic("Couldn't allocate pgtable caches"); |
143 | |||
144 | /* In all current configs, when the PUD index exists it's the | 147 | /* In all current configs, when the PUD index exists it's the |
145 | * same size as either the pgd or pmd index. Verify that the | 148 | * same size as either the pgd or pmd index. Verify that the |
146 | * initialization above has also created a PUD cache. This | 149 | * initialization above has also created a PUD cache. This |
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 1cb1ea133a2c..7f4bea162026 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c | |||
@@ -461,6 +461,10 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, | |||
461 | pte_t *ptep) | 461 | pte_t *ptep) |
462 | { | 462 | { |
463 | #ifdef CONFIG_PPC_STD_MMU | 463 | #ifdef CONFIG_PPC_STD_MMU |
464 | /* | ||
465 | * We don't need to worry about _PAGE_PRESENT here because we are | ||
466 | * called with either mm->page_table_lock held or ptl lock held | ||
467 | */ | ||
464 | unsigned long access = 0, trap; | 468 | unsigned long access = 0, trap; |
465 | 469 | ||
466 | /* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */ | 470 | /* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */ |
diff --git a/arch/powerpc/mm/mmap_64.c b/arch/powerpc/mm/mmap.c index 67a42ed0d2fc..67a42ed0d2fc 100644 --- a/arch/powerpc/mm/mmap_64.c +++ b/arch/powerpc/mm/mmap.c | |||
diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c index e779642c25e5..af3d78e19302 100644 --- a/arch/powerpc/mm/mmu_context_nohash.c +++ b/arch/powerpc/mm/mmu_context_nohash.c | |||
@@ -112,8 +112,10 @@ static unsigned int steal_context_smp(unsigned int id) | |||
112 | */ | 112 | */ |
113 | for_each_cpu(cpu, mm_cpumask(mm)) { | 113 | for_each_cpu(cpu, mm_cpumask(mm)) { |
114 | for (i = cpu_first_thread_sibling(cpu); | 114 | for (i = cpu_first_thread_sibling(cpu); |
115 | i <= cpu_last_thread_sibling(cpu); i++) | 115 | i <= cpu_last_thread_sibling(cpu); i++) { |
116 | __set_bit(id, stale_map[i]); | 116 | if (stale_map[i]) |
117 | __set_bit(id, stale_map[i]); | ||
118 | } | ||
117 | cpu = i - 1; | 119 | cpu = i - 1; |
118 | } | 120 | } |
119 | return id; | 121 | return id; |
@@ -272,7 +274,8 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next) | |||
272 | /* XXX This clear should ultimately be part of local_flush_tlb_mm */ | 274 | /* XXX This clear should ultimately be part of local_flush_tlb_mm */ |
273 | for (i = cpu_first_thread_sibling(cpu); | 275 | for (i = cpu_first_thread_sibling(cpu); |
274 | i <= cpu_last_thread_sibling(cpu); i++) { | 276 | i <= cpu_last_thread_sibling(cpu); i++) { |
275 | __clear_bit(id, stale_map[i]); | 277 | if (stale_map[i]) |
278 | __clear_bit(id, stale_map[i]); | ||
276 | } | 279 | } |
277 | } | 280 | } |
278 | 281 | ||
@@ -329,8 +332,8 @@ void destroy_context(struct mm_struct *mm) | |||
329 | 332 | ||
330 | #ifdef CONFIG_SMP | 333 | #ifdef CONFIG_SMP |
331 | 334 | ||
332 | static int __cpuinit mmu_context_cpu_notify(struct notifier_block *self, | 335 | static int mmu_context_cpu_notify(struct notifier_block *self, |
333 | unsigned long action, void *hcpu) | 336 | unsigned long action, void *hcpu) |
334 | { | 337 | { |
335 | unsigned int cpu = (unsigned int)(long)hcpu; | 338 | unsigned int cpu = (unsigned int)(long)hcpu; |
336 | 339 | ||
@@ -363,7 +366,7 @@ static int __cpuinit mmu_context_cpu_notify(struct notifier_block *self, | |||
363 | return NOTIFY_OK; | 366 | return NOTIFY_OK; |
364 | } | 367 | } |
365 | 368 | ||
366 | static struct notifier_block __cpuinitdata mmu_context_cpu_nb = { | 369 | static struct notifier_block mmu_context_cpu_nb = { |
367 | .notifier_call = mmu_context_cpu_notify, | 370 | .notifier_call = mmu_context_cpu_notify, |
368 | }; | 371 | }; |
369 | 372 | ||
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 88c0425dc0a8..08397217e8ac 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c | |||
@@ -516,7 +516,7 @@ static int of_drconf_to_nid_single(struct of_drconf_cell *drmem, | |||
516 | * Figure out to which domain a cpu belongs and stick it there. | 516 | * Figure out to which domain a cpu belongs and stick it there. |
517 | * Return the id of the domain used. | 517 | * Return the id of the domain used. |
518 | */ | 518 | */ |
519 | static int __cpuinit numa_setup_cpu(unsigned long lcpu) | 519 | static int numa_setup_cpu(unsigned long lcpu) |
520 | { | 520 | { |
521 | int nid = 0; | 521 | int nid = 0; |
522 | struct device_node *cpu = of_get_cpu_node(lcpu, NULL); | 522 | struct device_node *cpu = of_get_cpu_node(lcpu, NULL); |
@@ -538,8 +538,7 @@ out: | |||
538 | return nid; | 538 | return nid; |
539 | } | 539 | } |
540 | 540 | ||
541 | static int __cpuinit cpu_numa_callback(struct notifier_block *nfb, | 541 | static int cpu_numa_callback(struct notifier_block *nfb, unsigned long action, |
542 | unsigned long action, | ||
543 | void *hcpu) | 542 | void *hcpu) |
544 | { | 543 | { |
545 | unsigned long lcpu = (unsigned long)hcpu; | 544 | unsigned long lcpu = (unsigned long)hcpu; |
@@ -919,7 +918,7 @@ static void __init *careful_zallocation(int nid, unsigned long size, | |||
919 | return ret; | 918 | return ret; |
920 | } | 919 | } |
921 | 920 | ||
922 | static struct notifier_block __cpuinitdata ppc64_numa_nb = { | 921 | static struct notifier_block ppc64_numa_nb = { |
923 | .notifier_call = cpu_numa_callback, | 922 | .notifier_call = cpu_numa_callback, |
924 | .priority = 1 /* Must run before sched domains notifier. */ | 923 | .priority = 1 /* Must run before sched domains notifier. */ |
925 | }; | 924 | }; |
@@ -1433,11 +1432,9 @@ static int update_cpu_topology(void *data) | |||
1433 | if (cpu != update->cpu) | 1432 | if (cpu != update->cpu) |
1434 | continue; | 1433 | continue; |
1435 | 1434 | ||
1436 | unregister_cpu_under_node(update->cpu, update->old_nid); | ||
1437 | unmap_cpu_from_node(update->cpu); | 1435 | unmap_cpu_from_node(update->cpu); |
1438 | map_cpu_to_node(update->cpu, update->new_nid); | 1436 | map_cpu_to_node(update->cpu, update->new_nid); |
1439 | vdso_getcpu_init(); | 1437 | vdso_getcpu_init(); |
1440 | register_cpu_under_node(update->cpu, update->new_nid); | ||
1441 | } | 1438 | } |
1442 | 1439 | ||
1443 | return 0; | 1440 | return 0; |
@@ -1485,6 +1482,9 @@ int arch_update_cpu_topology(void) | |||
1485 | stop_machine(update_cpu_topology, &updates[0], &updated_cpus); | 1482 | stop_machine(update_cpu_topology, &updates[0], &updated_cpus); |
1486 | 1483 | ||
1487 | for (ud = &updates[0]; ud; ud = ud->next) { | 1484 | for (ud = &updates[0]; ud; ud = ud->next) { |
1485 | unregister_cpu_under_node(ud->cpu, ud->old_nid); | ||
1486 | register_cpu_under_node(ud->cpu, ud->new_nid); | ||
1487 | |||
1488 | dev = get_cpu_device(ud->cpu); | 1488 | dev = get_cpu_device(ud->cpu); |
1489 | if (dev) | 1489 | if (dev) |
1490 | kobject_uevent(&dev->kobj, KOBJ_CHANGE); | 1490 | kobject_uevent(&dev->kobj, KOBJ_CHANGE); |
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index 214130a4edc6..edda589795c3 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c | |||
@@ -235,6 +235,14 @@ void assert_pte_locked(struct mm_struct *mm, unsigned long addr) | |||
235 | pud = pud_offset(pgd, addr); | 235 | pud = pud_offset(pgd, addr); |
236 | BUG_ON(pud_none(*pud)); | 236 | BUG_ON(pud_none(*pud)); |
237 | pmd = pmd_offset(pud, addr); | 237 | pmd = pmd_offset(pud, addr); |
238 | /* | ||
239 | * khugepaged to collapse normal pages to hugepage, first set | ||
240 | * pmd to none to force page fault/gup to take mmap_sem. After | ||
241 | * pmd is set to none, we do a pte_clear which does this assertion | ||
242 | * so if we find pmd none, return. | ||
243 | */ | ||
244 | if (pmd_none(*pmd)) | ||
245 | return; | ||
238 | BUG_ON(!pmd_present(*pmd)); | 246 | BUG_ON(!pmd_present(*pmd)); |
239 | assert_spin_locked(pte_lockptr(mm, pmd)); | 247 | assert_spin_locked(pte_lockptr(mm, pmd)); |
240 | } | 248 | } |
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index a854096e1023..536eec72c0f7 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c | |||
@@ -338,6 +338,19 @@ EXPORT_SYMBOL(iounmap); | |||
338 | EXPORT_SYMBOL(__iounmap); | 338 | EXPORT_SYMBOL(__iounmap); |
339 | EXPORT_SYMBOL(__iounmap_at); | 339 | EXPORT_SYMBOL(__iounmap_at); |
340 | 340 | ||
341 | /* | ||
342 | * For hugepage we have pfn in the pmd, we use PTE_RPN_SHIFT bits for flags | ||
343 | * For PTE page, we have a PTE_FRAG_SIZE (4K) aligned virtual address. | ||
344 | */ | ||
345 | struct page *pmd_page(pmd_t pmd) | ||
346 | { | ||
347 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
348 | if (pmd_trans_huge(pmd)) | ||
349 | return pfn_to_page(pmd_pfn(pmd)); | ||
350 | #endif | ||
351 | return virt_to_page(pmd_page_vaddr(pmd)); | ||
352 | } | ||
353 | |||
341 | #ifdef CONFIG_PPC_64K_PAGES | 354 | #ifdef CONFIG_PPC_64K_PAGES |
342 | static pte_t *get_from_cache(struct mm_struct *mm) | 355 | static pte_t *get_from_cache(struct mm_struct *mm) |
343 | { | 356 | { |
@@ -455,3 +468,404 @@ void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift) | |||
455 | } | 468 | } |
456 | #endif | 469 | #endif |
457 | #endif /* CONFIG_PPC_64K_PAGES */ | 470 | #endif /* CONFIG_PPC_64K_PAGES */ |
471 | |||
472 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
473 | |||
474 | /* | ||
475 | * This is called when relaxing access to a hugepage. It's also called in the page | ||
476 | * fault path when we don't hit any of the major fault cases, ie, a minor | ||
477 | * update of _PAGE_ACCESSED, _PAGE_DIRTY, etc... The generic code will have | ||
478 | * handled those two for us, we additionally deal with missing execute | ||
479 | * permission here on some processors | ||
480 | */ | ||
481 | int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address, | ||
482 | pmd_t *pmdp, pmd_t entry, int dirty) | ||
483 | { | ||
484 | int changed; | ||
485 | #ifdef CONFIG_DEBUG_VM | ||
486 | WARN_ON(!pmd_trans_huge(*pmdp)); | ||
487 | assert_spin_locked(&vma->vm_mm->page_table_lock); | ||
488 | #endif | ||
489 | changed = !pmd_same(*(pmdp), entry); | ||
490 | if (changed) { | ||
491 | __ptep_set_access_flags(pmdp_ptep(pmdp), pmd_pte(entry)); | ||
492 | /* | ||
493 | * Since we are not supporting SW TLB systems, we don't | ||
494 | * have any thing similar to flush_tlb_page_nohash() | ||
495 | */ | ||
496 | } | ||
497 | return changed; | ||
498 | } | ||
499 | |||
500 | unsigned long pmd_hugepage_update(struct mm_struct *mm, unsigned long addr, | ||
501 | pmd_t *pmdp, unsigned long clr) | ||
502 | { | ||
503 | |||
504 | unsigned long old, tmp; | ||
505 | |||
506 | #ifdef CONFIG_DEBUG_VM | ||
507 | WARN_ON(!pmd_trans_huge(*pmdp)); | ||
508 | assert_spin_locked(&mm->page_table_lock); | ||
509 | #endif | ||
510 | |||
511 | #ifdef PTE_ATOMIC_UPDATES | ||
512 | __asm__ __volatile__( | ||
513 | "1: ldarx %0,0,%3\n\ | ||
514 | andi. %1,%0,%6\n\ | ||
515 | bne- 1b \n\ | ||
516 | andc %1,%0,%4 \n\ | ||
517 | stdcx. %1,0,%3 \n\ | ||
518 | bne- 1b" | ||
519 | : "=&r" (old), "=&r" (tmp), "=m" (*pmdp) | ||
520 | : "r" (pmdp), "r" (clr), "m" (*pmdp), "i" (_PAGE_BUSY) | ||
521 | : "cc" ); | ||
522 | #else | ||
523 | old = pmd_val(*pmdp); | ||
524 | *pmdp = __pmd(old & ~clr); | ||
525 | #endif | ||
526 | if (old & _PAGE_HASHPTE) | ||
527 | hpte_do_hugepage_flush(mm, addr, pmdp); | ||
528 | return old; | ||
529 | } | ||
530 | |||
531 | pmd_t pmdp_clear_flush(struct vm_area_struct *vma, unsigned long address, | ||
532 | pmd_t *pmdp) | ||
533 | { | ||
534 | pmd_t pmd; | ||
535 | |||
536 | VM_BUG_ON(address & ~HPAGE_PMD_MASK); | ||
537 | if (pmd_trans_huge(*pmdp)) { | ||
538 | pmd = pmdp_get_and_clear(vma->vm_mm, address, pmdp); | ||
539 | } else { | ||
540 | /* | ||
541 | * khugepaged calls this for normal pmd | ||
542 | */ | ||
543 | pmd = *pmdp; | ||
544 | pmd_clear(pmdp); | ||
545 | /* | ||
546 | * Wait for all pending hash_page to finish. This is needed | ||
547 | * in case of subpage collapse. When we collapse normal pages | ||
548 | * to hugepage, we first clear the pmd, then invalidate all | ||
549 | * the PTE entries. The assumption here is that any low level | ||
550 | * page fault will see a none pmd and take the slow path that | ||
551 | * will wait on mmap_sem. But we could very well be in a | ||
552 | * hash_page with local ptep pointer value. Such a hash page | ||
553 | * can result in adding new HPTE entries for normal subpages. | ||
554 | * That means we could be modifying the page content as we | ||
555 | * copy them to a huge page. So wait for parallel hash_page | ||
556 | * to finish before invalidating HPTE entries. We can do this | ||
557 | * by sending an IPI to all the cpus and executing a dummy | ||
558 | * function there. | ||
559 | */ | ||
560 | kick_all_cpus_sync(); | ||
561 | /* | ||
562 | * Now invalidate the hpte entries in the range | ||
563 | * covered by pmd. This make sure we take a | ||
564 | * fault and will find the pmd as none, which will | ||
565 | * result in a major fault which takes mmap_sem and | ||
566 | * hence wait for collapse to complete. Without this | ||
567 | * the __collapse_huge_page_copy can result in copying | ||
568 | * the old content. | ||
569 | */ | ||
570 | flush_tlb_pmd_range(vma->vm_mm, &pmd, address); | ||
571 | } | ||
572 | return pmd; | ||
573 | } | ||
574 | |||
575 | int pmdp_test_and_clear_young(struct vm_area_struct *vma, | ||
576 | unsigned long address, pmd_t *pmdp) | ||
577 | { | ||
578 | return __pmdp_test_and_clear_young(vma->vm_mm, address, pmdp); | ||
579 | } | ||
580 | |||
581 | /* | ||
582 | * We currently remove entries from the hashtable regardless of whether | ||
583 | * the entry was young or dirty. The generic routines only flush if the | ||
584 | * entry was young or dirty which is not good enough. | ||
585 | * | ||
586 | * We should be more intelligent about this but for the moment we override | ||
587 | * these functions and force a tlb flush unconditionally | ||
588 | */ | ||
589 | int pmdp_clear_flush_young(struct vm_area_struct *vma, | ||
590 | unsigned long address, pmd_t *pmdp) | ||
591 | { | ||
592 | return __pmdp_test_and_clear_young(vma->vm_mm, address, pmdp); | ||
593 | } | ||
594 | |||
595 | /* | ||
596 | * We mark the pmd splitting and invalidate all the hpte | ||
597 | * entries for this hugepage. | ||
598 | */ | ||
599 | void pmdp_splitting_flush(struct vm_area_struct *vma, | ||
600 | unsigned long address, pmd_t *pmdp) | ||
601 | { | ||
602 | unsigned long old, tmp; | ||
603 | |||
604 | VM_BUG_ON(address & ~HPAGE_PMD_MASK); | ||
605 | |||
606 | #ifdef CONFIG_DEBUG_VM | ||
607 | WARN_ON(!pmd_trans_huge(*pmdp)); | ||
608 | assert_spin_locked(&vma->vm_mm->page_table_lock); | ||
609 | #endif | ||
610 | |||
611 | #ifdef PTE_ATOMIC_UPDATES | ||
612 | |||
613 | __asm__ __volatile__( | ||
614 | "1: ldarx %0,0,%3\n\ | ||
615 | andi. %1,%0,%6\n\ | ||
616 | bne- 1b \n\ | ||
617 | ori %1,%0,%4 \n\ | ||
618 | stdcx. %1,0,%3 \n\ | ||
619 | bne- 1b" | ||
620 | : "=&r" (old), "=&r" (tmp), "=m" (*pmdp) | ||
621 | : "r" (pmdp), "i" (_PAGE_SPLITTING), "m" (*pmdp), "i" (_PAGE_BUSY) | ||
622 | : "cc" ); | ||
623 | #else | ||
624 | old = pmd_val(*pmdp); | ||
625 | *pmdp = __pmd(old | _PAGE_SPLITTING); | ||
626 | #endif | ||
627 | /* | ||
628 | * If we didn't had the splitting flag set, go and flush the | ||
629 | * HPTE entries. | ||
630 | */ | ||
631 | if (!(old & _PAGE_SPLITTING)) { | ||
632 | /* We need to flush the hpte */ | ||
633 | if (old & _PAGE_HASHPTE) | ||
634 | hpte_do_hugepage_flush(vma->vm_mm, address, pmdp); | ||
635 | } | ||
636 | } | ||
637 | |||
638 | /* | ||
639 | * We want to put the pgtable in pmd and use pgtable for tracking | ||
640 | * the base page size hptes | ||
641 | */ | ||
642 | void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, | ||
643 | pgtable_t pgtable) | ||
644 | { | ||
645 | pgtable_t *pgtable_slot; | ||
646 | assert_spin_locked(&mm->page_table_lock); | ||
647 | /* | ||
648 | * we store the pgtable in the second half of PMD | ||
649 | */ | ||
650 | pgtable_slot = (pgtable_t *)pmdp + PTRS_PER_PMD; | ||
651 | *pgtable_slot = pgtable; | ||
652 | /* | ||
653 | * expose the deposited pgtable to other cpus. | ||
654 | * before we set the hugepage PTE at pmd level | ||
655 | * hash fault code looks at the deposted pgtable | ||
656 | * to store hash index values. | ||
657 | */ | ||
658 | smp_wmb(); | ||
659 | } | ||
660 | |||
661 | pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) | ||
662 | { | ||
663 | pgtable_t pgtable; | ||
664 | pgtable_t *pgtable_slot; | ||
665 | |||
666 | assert_spin_locked(&mm->page_table_lock); | ||
667 | pgtable_slot = (pgtable_t *)pmdp + PTRS_PER_PMD; | ||
668 | pgtable = *pgtable_slot; | ||
669 | /* | ||
670 | * Once we withdraw, mark the entry NULL. | ||
671 | */ | ||
672 | *pgtable_slot = NULL; | ||
673 | /* | ||
674 | * We store HPTE information in the deposited PTE fragment. | ||
675 | * zero out the content on withdraw. | ||
676 | */ | ||
677 | memset(pgtable, 0, PTE_FRAG_SIZE); | ||
678 | return pgtable; | ||
679 | } | ||
680 | |||
681 | /* | ||
682 | * set a new huge pmd. We should not be called for updating | ||
683 | * an existing pmd entry. That should go via pmd_hugepage_update. | ||
684 | */ | ||
685 | void set_pmd_at(struct mm_struct *mm, unsigned long addr, | ||
686 | pmd_t *pmdp, pmd_t pmd) | ||
687 | { | ||
688 | #ifdef CONFIG_DEBUG_VM | ||
689 | WARN_ON(!pmd_none(*pmdp)); | ||
690 | assert_spin_locked(&mm->page_table_lock); | ||
691 | WARN_ON(!pmd_trans_huge(pmd)); | ||
692 | #endif | ||
693 | return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd)); | ||
694 | } | ||
695 | |||
696 | void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, | ||
697 | pmd_t *pmdp) | ||
698 | { | ||
699 | pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT); | ||
700 | } | ||
701 | |||
702 | /* | ||
703 | * A linux hugepage PMD was changed and the corresponding hash table entries | ||
704 | * neesd to be flushed. | ||
705 | */ | ||
706 | void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr, | ||
707 | pmd_t *pmdp) | ||
708 | { | ||
709 | int ssize, i; | ||
710 | unsigned long s_addr; | ||
711 | int max_hpte_count; | ||
712 | unsigned int psize, valid; | ||
713 | unsigned char *hpte_slot_array; | ||
714 | unsigned long hidx, vpn, vsid, hash, shift, slot; | ||
715 | |||
716 | /* | ||
717 | * Flush all the hptes mapping this hugepage | ||
718 | */ | ||
719 | s_addr = addr & HPAGE_PMD_MASK; | ||
720 | hpte_slot_array = get_hpte_slot_array(pmdp); | ||
721 | /* | ||
722 | * IF we try to do a HUGE PTE update after a withdraw is done. | ||
723 | * we will find the below NULL. This happens when we do | ||
724 | * split_huge_page_pmd | ||
725 | */ | ||
726 | if (!hpte_slot_array) | ||
727 | return; | ||
728 | |||
729 | /* get the base page size */ | ||
730 | psize = get_slice_psize(mm, s_addr); | ||
731 | |||
732 | if (ppc_md.hugepage_invalidate) | ||
733 | return ppc_md.hugepage_invalidate(mm, hpte_slot_array, | ||
734 | s_addr, psize); | ||
735 | /* | ||
736 | * No bluk hpte removal support, invalidate each entry | ||
737 | */ | ||
738 | shift = mmu_psize_defs[psize].shift; | ||
739 | max_hpte_count = HPAGE_PMD_SIZE >> shift; | ||
740 | for (i = 0; i < max_hpte_count; i++) { | ||
741 | /* | ||
742 | * 8 bits per each hpte entries | ||
743 | * 000| [ secondary group (one bit) | hidx (3 bits) | valid bit] | ||
744 | */ | ||
745 | valid = hpte_valid(hpte_slot_array, i); | ||
746 | if (!valid) | ||
747 | continue; | ||
748 | hidx = hpte_hash_index(hpte_slot_array, i); | ||
749 | |||
750 | /* get the vpn */ | ||
751 | addr = s_addr + (i * (1ul << shift)); | ||
752 | if (!is_kernel_addr(addr)) { | ||
753 | ssize = user_segment_size(addr); | ||
754 | vsid = get_vsid(mm->context.id, addr, ssize); | ||
755 | WARN_ON(vsid == 0); | ||
756 | } else { | ||
757 | vsid = get_kernel_vsid(addr, mmu_kernel_ssize); | ||
758 | ssize = mmu_kernel_ssize; | ||
759 | } | ||
760 | |||
761 | vpn = hpt_vpn(addr, vsid, ssize); | ||
762 | hash = hpt_hash(vpn, shift, ssize); | ||
763 | if (hidx & _PTEIDX_SECONDARY) | ||
764 | hash = ~hash; | ||
765 | |||
766 | slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; | ||
767 | slot += hidx & _PTEIDX_GROUP_IX; | ||
768 | ppc_md.hpte_invalidate(slot, vpn, psize, | ||
769 | MMU_PAGE_16M, ssize, 0); | ||
770 | } | ||
771 | } | ||
772 | |||
773 | static pmd_t pmd_set_protbits(pmd_t pmd, pgprot_t pgprot) | ||
774 | { | ||
775 | pmd_val(pmd) |= pgprot_val(pgprot); | ||
776 | return pmd; | ||
777 | } | ||
778 | |||
779 | pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot) | ||
780 | { | ||
781 | pmd_t pmd; | ||
782 | /* | ||
783 | * For a valid pte, we would have _PAGE_PRESENT or _PAGE_FILE always | ||
784 | * set. We use this to check THP page at pmd level. | ||
785 | * leaf pte for huge page, bottom two bits != 00 | ||
786 | */ | ||
787 | pmd_val(pmd) = pfn << PTE_RPN_SHIFT; | ||
788 | pmd_val(pmd) |= _PAGE_THP_HUGE; | ||
789 | pmd = pmd_set_protbits(pmd, pgprot); | ||
790 | return pmd; | ||
791 | } | ||
792 | |||
793 | pmd_t mk_pmd(struct page *page, pgprot_t pgprot) | ||
794 | { | ||
795 | return pfn_pmd(page_to_pfn(page), pgprot); | ||
796 | } | ||
797 | |||
798 | pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) | ||
799 | { | ||
800 | |||
801 | pmd_val(pmd) &= _HPAGE_CHG_MASK; | ||
802 | pmd = pmd_set_protbits(pmd, newprot); | ||
803 | return pmd; | ||
804 | } | ||
805 | |||
806 | /* | ||
807 | * This is called at the end of handling a user page fault, when the | ||
808 | * fault has been handled by updating a HUGE PMD entry in the linux page tables. | ||
809 | * We use it to preload an HPTE into the hash table corresponding to | ||
810 | * the updated linux HUGE PMD entry. | ||
811 | */ | ||
812 | void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr, | ||
813 | pmd_t *pmd) | ||
814 | { | ||
815 | return; | ||
816 | } | ||
817 | |||
818 | pmd_t pmdp_get_and_clear(struct mm_struct *mm, | ||
819 | unsigned long addr, pmd_t *pmdp) | ||
820 | { | ||
821 | pmd_t old_pmd; | ||
822 | pgtable_t pgtable; | ||
823 | unsigned long old; | ||
824 | pgtable_t *pgtable_slot; | ||
825 | |||
826 | old = pmd_hugepage_update(mm, addr, pmdp, ~0UL); | ||
827 | old_pmd = __pmd(old); | ||
828 | /* | ||
829 | * We have pmd == none and we are holding page_table_lock. | ||
830 | * So we can safely go and clear the pgtable hash | ||
831 | * index info. | ||
832 | */ | ||
833 | pgtable_slot = (pgtable_t *)pmdp + PTRS_PER_PMD; | ||
834 | pgtable = *pgtable_slot; | ||
835 | /* | ||
836 | * Let's zero out old valid and hash index details | ||
837 | * hash fault look at them. | ||
838 | */ | ||
839 | memset(pgtable, 0, PTE_FRAG_SIZE); | ||
840 | return old_pmd; | ||
841 | } | ||
842 | |||
843 | int has_transparent_hugepage(void) | ||
844 | { | ||
845 | if (!mmu_has_feature(MMU_FTR_16M_PAGE)) | ||
846 | return 0; | ||
847 | /* | ||
848 | * We support THP only if PMD_SIZE is 16MB. | ||
849 | */ | ||
850 | if (mmu_psize_defs[MMU_PAGE_16M].shift != PMD_SHIFT) | ||
851 | return 0; | ||
852 | /* | ||
853 | * We need to make sure that we support 16MB hugepage in a segement | ||
854 | * with base page size 64K or 4K. We only enable THP with a PAGE_SIZE | ||
855 | * of 64K. | ||
856 | */ | ||
857 | /* | ||
858 | * If we have 64K HPTE, we will be using that by default | ||
859 | */ | ||
860 | if (mmu_psize_defs[MMU_PAGE_64K].shift && | ||
861 | (mmu_psize_defs[MMU_PAGE_64K].penc[MMU_PAGE_16M] == -1)) | ||
862 | return 0; | ||
863 | /* | ||
864 | * Ok we only have 4K HPTE | ||
865 | */ | ||
866 | if (mmu_psize_defs[MMU_PAGE_4K].penc[MMU_PAGE_16M] == -1) | ||
867 | return 0; | ||
868 | |||
869 | return 1; | ||
870 | } | ||
871 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ | ||
diff --git a/arch/powerpc/mm/subpage-prot.c b/arch/powerpc/mm/subpage-prot.c index 7c415ddde948..aa74acb0fdfc 100644 --- a/arch/powerpc/mm/subpage-prot.c +++ b/arch/powerpc/mm/subpage-prot.c | |||
@@ -130,6 +130,53 @@ static void subpage_prot_clear(unsigned long addr, unsigned long len) | |||
130 | up_write(&mm->mmap_sem); | 130 | up_write(&mm->mmap_sem); |
131 | } | 131 | } |
132 | 132 | ||
133 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
134 | static int subpage_walk_pmd_entry(pmd_t *pmd, unsigned long addr, | ||
135 | unsigned long end, struct mm_walk *walk) | ||
136 | { | ||
137 | struct vm_area_struct *vma = walk->private; | ||
138 | split_huge_page_pmd(vma, addr, pmd); | ||
139 | return 0; | ||
140 | } | ||
141 | |||
142 | static void subpage_mark_vma_nohuge(struct mm_struct *mm, unsigned long addr, | ||
143 | unsigned long len) | ||
144 | { | ||
145 | struct vm_area_struct *vma; | ||
146 | struct mm_walk subpage_proto_walk = { | ||
147 | .mm = mm, | ||
148 | .pmd_entry = subpage_walk_pmd_entry, | ||
149 | }; | ||
150 | |||
151 | /* | ||
152 | * We don't try too hard, we just mark all the vma in that range | ||
153 | * VM_NOHUGEPAGE and split them. | ||
154 | */ | ||
155 | vma = find_vma(mm, addr); | ||
156 | /* | ||
157 | * If the range is in unmapped range, just return | ||
158 | */ | ||
159 | if (vma && ((addr + len) <= vma->vm_start)) | ||
160 | return; | ||
161 | |||
162 | while (vma) { | ||
163 | if (vma->vm_start >= (addr + len)) | ||
164 | break; | ||
165 | vma->vm_flags |= VM_NOHUGEPAGE; | ||
166 | subpage_proto_walk.private = vma; | ||
167 | walk_page_range(vma->vm_start, vma->vm_end, | ||
168 | &subpage_proto_walk); | ||
169 | vma = vma->vm_next; | ||
170 | } | ||
171 | } | ||
172 | #else | ||
173 | static void subpage_mark_vma_nohuge(struct mm_struct *mm, unsigned long addr, | ||
174 | unsigned long len) | ||
175 | { | ||
176 | return; | ||
177 | } | ||
178 | #endif | ||
179 | |||
133 | /* | 180 | /* |
134 | * Copy in a subpage protection map for an address range. | 181 | * Copy in a subpage protection map for an address range. |
135 | * The map has 2 bits per 4k subpage, so 32 bits per 64k page. | 182 | * The map has 2 bits per 4k subpage, so 32 bits per 64k page. |
@@ -168,6 +215,7 @@ long sys_subpage_prot(unsigned long addr, unsigned long len, u32 __user *map) | |||
168 | return -EFAULT; | 215 | return -EFAULT; |
169 | 216 | ||
170 | down_write(&mm->mmap_sem); | 217 | down_write(&mm->mmap_sem); |
218 | subpage_mark_vma_nohuge(mm, addr, len); | ||
171 | for (limit = addr + len; addr < limit; addr = next) { | 219 | for (limit = addr + len; addr < limit; addr = next) { |
172 | next = pmd_addr_end(addr, limit); | 220 | next = pmd_addr_end(addr, limit); |
173 | err = -ENOMEM; | 221 | err = -ENOMEM; |
diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c index 7df1c5edda87..36e44b4260eb 100644 --- a/arch/powerpc/mm/tlb_hash64.c +++ b/arch/powerpc/mm/tlb_hash64.c | |||
@@ -189,6 +189,7 @@ void tlb_flush(struct mmu_gather *tlb) | |||
189 | void __flush_hash_table_range(struct mm_struct *mm, unsigned long start, | 189 | void __flush_hash_table_range(struct mm_struct *mm, unsigned long start, |
190 | unsigned long end) | 190 | unsigned long end) |
191 | { | 191 | { |
192 | int hugepage_shift; | ||
192 | unsigned long flags; | 193 | unsigned long flags; |
193 | 194 | ||
194 | start = _ALIGN_DOWN(start, PAGE_SIZE); | 195 | start = _ALIGN_DOWN(start, PAGE_SIZE); |
@@ -206,7 +207,8 @@ void __flush_hash_table_range(struct mm_struct *mm, unsigned long start, | |||
206 | local_irq_save(flags); | 207 | local_irq_save(flags); |
207 | arch_enter_lazy_mmu_mode(); | 208 | arch_enter_lazy_mmu_mode(); |
208 | for (; start < end; start += PAGE_SIZE) { | 209 | for (; start < end; start += PAGE_SIZE) { |
209 | pte_t *ptep = find_linux_pte(mm->pgd, start); | 210 | pte_t *ptep = find_linux_pte_or_hugepte(mm->pgd, start, |
211 | &hugepage_shift); | ||
210 | unsigned long pte; | 212 | unsigned long pte; |
211 | 213 | ||
212 | if (ptep == NULL) | 214 | if (ptep == NULL) |
@@ -214,7 +216,37 @@ void __flush_hash_table_range(struct mm_struct *mm, unsigned long start, | |||
214 | pte = pte_val(*ptep); | 216 | pte = pte_val(*ptep); |
215 | if (!(pte & _PAGE_HASHPTE)) | 217 | if (!(pte & _PAGE_HASHPTE)) |
216 | continue; | 218 | continue; |
217 | hpte_need_flush(mm, start, ptep, pte, 0); | 219 | if (unlikely(hugepage_shift && pmd_trans_huge(*(pmd_t *)pte))) |
220 | hpte_do_hugepage_flush(mm, start, (pmd_t *)pte); | ||
221 | else | ||
222 | hpte_need_flush(mm, start, ptep, pte, 0); | ||
223 | } | ||
224 | arch_leave_lazy_mmu_mode(); | ||
225 | local_irq_restore(flags); | ||
226 | } | ||
227 | |||
228 | void flush_tlb_pmd_range(struct mm_struct *mm, pmd_t *pmd, unsigned long addr) | ||
229 | { | ||
230 | pte_t *pte; | ||
231 | pte_t *start_pte; | ||
232 | unsigned long flags; | ||
233 | |||
234 | addr = _ALIGN_DOWN(addr, PMD_SIZE); | ||
235 | /* Note: Normally, we should only ever use a batch within a | ||
236 | * PTE locked section. This violates the rule, but will work | ||
237 | * since we don't actually modify the PTEs, we just flush the | ||
238 | * hash while leaving the PTEs intact (including their reference | ||
239 | * to being hashed). This is not the most performance oriented | ||
240 | * way to do things but is fine for our needs here. | ||
241 | */ | ||
242 | local_irq_save(flags); | ||
243 | arch_enter_lazy_mmu_mode(); | ||
244 | start_pte = pte_offset_map(pmd, addr); | ||
245 | for (pte = start_pte; pte < start_pte + PTRS_PER_PTE; pte++) { | ||
246 | unsigned long pteval = pte_val(*pte); | ||
247 | if (pteval & _PAGE_HASHPTE) | ||
248 | hpte_need_flush(mm, addr, pte, pteval, 0); | ||
249 | addr += PAGE_SIZE; | ||
218 | } | 250 | } |
219 | arch_leave_lazy_mmu_mode(); | 251 | arch_leave_lazy_mmu_mode(); |
220 | local_irq_restore(flags); | 252 | local_irq_restore(flags); |
diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c index 6888cad5103d..41cd68dee681 100644 --- a/arch/powerpc/mm/tlb_nohash.c +++ b/arch/powerpc/mm/tlb_nohash.c | |||
@@ -648,7 +648,7 @@ void __init early_init_mmu(void) | |||
648 | __early_init_mmu(1); | 648 | __early_init_mmu(1); |
649 | } | 649 | } |
650 | 650 | ||
651 | void __cpuinit early_init_mmu_secondary(void) | 651 | void early_init_mmu_secondary(void) |
652 | { | 652 | { |
653 | __early_init_mmu(0); | 653 | __early_init_mmu(0); |
654 | } | 654 | } |
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 29c6482890c8..a3985aee77fe 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c | |||
@@ -75,6 +75,11 @@ static unsigned int freeze_events_kernel = MMCR0_FCS; | |||
75 | 75 | ||
76 | #define MMCR0_FCHV 0 | 76 | #define MMCR0_FCHV 0 |
77 | #define MMCR0_PMCjCE MMCR0_PMCnCE | 77 | #define MMCR0_PMCjCE MMCR0_PMCnCE |
78 | #define MMCR0_FC56 0 | ||
79 | #define MMCR0_PMAO 0 | ||
80 | #define MMCR0_EBE 0 | ||
81 | #define MMCR0_PMCC 0 | ||
82 | #define MMCR0_PMCC_U6 0 | ||
78 | 83 | ||
79 | #define SPRN_MMCRA SPRN_MMCR2 | 84 | #define SPRN_MMCRA SPRN_MMCR2 |
80 | #define MMCRA_SAMPLE_ENABLE 0 | 85 | #define MMCRA_SAMPLE_ENABLE 0 |
@@ -102,6 +107,15 @@ static inline int siar_valid(struct pt_regs *regs) | |||
102 | return 1; | 107 | return 1; |
103 | } | 108 | } |
104 | 109 | ||
110 | static bool is_ebb_event(struct perf_event *event) { return false; } | ||
111 | static int ebb_event_check(struct perf_event *event) { return 0; } | ||
112 | static void ebb_event_add(struct perf_event *event) { } | ||
113 | static void ebb_switch_out(unsigned long mmcr0) { } | ||
114 | static unsigned long ebb_switch_in(bool ebb, unsigned long mmcr0) | ||
115 | { | ||
116 | return mmcr0; | ||
117 | } | ||
118 | |||
105 | static inline void power_pmu_bhrb_enable(struct perf_event *event) {} | 119 | static inline void power_pmu_bhrb_enable(struct perf_event *event) {} |
106 | static inline void power_pmu_bhrb_disable(struct perf_event *event) {} | 120 | static inline void power_pmu_bhrb_disable(struct perf_event *event) {} |
107 | void power_pmu_flush_branch_stack(void) {} | 121 | void power_pmu_flush_branch_stack(void) {} |
@@ -462,6 +476,89 @@ void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw) | |||
462 | return; | 476 | return; |
463 | } | 477 | } |
464 | 478 | ||
479 | static bool is_ebb_event(struct perf_event *event) | ||
480 | { | ||
481 | /* | ||
482 | * This could be a per-PMU callback, but we'd rather avoid the cost. We | ||
483 | * check that the PMU supports EBB, meaning those that don't can still | ||
484 | * use bit 63 of the event code for something else if they wish. | ||
485 | */ | ||
486 | return (ppmu->flags & PPMU_EBB) && | ||
487 | ((event->attr.config >> EVENT_CONFIG_EBB_SHIFT) & 1); | ||
488 | } | ||
489 | |||
490 | static int ebb_event_check(struct perf_event *event) | ||
491 | { | ||
492 | struct perf_event *leader = event->group_leader; | ||
493 | |||
494 | /* Event and group leader must agree on EBB */ | ||
495 | if (is_ebb_event(leader) != is_ebb_event(event)) | ||
496 | return -EINVAL; | ||
497 | |||
498 | if (is_ebb_event(event)) { | ||
499 | if (!(event->attach_state & PERF_ATTACH_TASK)) | ||
500 | return -EINVAL; | ||
501 | |||
502 | if (!leader->attr.pinned || !leader->attr.exclusive) | ||
503 | return -EINVAL; | ||
504 | |||
505 | if (event->attr.inherit || event->attr.sample_period || | ||
506 | event->attr.enable_on_exec || event->attr.freq) | ||
507 | return -EINVAL; | ||
508 | } | ||
509 | |||
510 | return 0; | ||
511 | } | ||
512 | |||
513 | static void ebb_event_add(struct perf_event *event) | ||
514 | { | ||
515 | if (!is_ebb_event(event) || current->thread.used_ebb) | ||
516 | return; | ||
517 | |||
518 | /* | ||
519 | * IFF this is the first time we've added an EBB event, set | ||
520 | * PMXE in the user MMCR0 so we can detect when it's cleared by | ||
521 | * userspace. We need this so that we can context switch while | ||
522 | * userspace is in the EBB handler (where PMXE is 0). | ||
523 | */ | ||
524 | current->thread.used_ebb = 1; | ||
525 | current->thread.mmcr0 |= MMCR0_PMXE; | ||
526 | } | ||
527 | |||
528 | static void ebb_switch_out(unsigned long mmcr0) | ||
529 | { | ||
530 | if (!(mmcr0 & MMCR0_EBE)) | ||
531 | return; | ||
532 | |||
533 | current->thread.siar = mfspr(SPRN_SIAR); | ||
534 | current->thread.sier = mfspr(SPRN_SIER); | ||
535 | current->thread.sdar = mfspr(SPRN_SDAR); | ||
536 | current->thread.mmcr0 = mmcr0 & MMCR0_USER_MASK; | ||
537 | current->thread.mmcr2 = mfspr(SPRN_MMCR2) & MMCR2_USER_MASK; | ||
538 | } | ||
539 | |||
540 | static unsigned long ebb_switch_in(bool ebb, unsigned long mmcr0) | ||
541 | { | ||
542 | if (!ebb) | ||
543 | goto out; | ||
544 | |||
545 | /* Enable EBB and read/write to all 6 PMCs for userspace */ | ||
546 | mmcr0 |= MMCR0_EBE | MMCR0_PMCC_U6; | ||
547 | |||
548 | /* Add any bits from the user reg, FC or PMAO */ | ||
549 | mmcr0 |= current->thread.mmcr0; | ||
550 | |||
551 | /* Be careful not to set PMXE if userspace had it cleared */ | ||
552 | if (!(current->thread.mmcr0 & MMCR0_PMXE)) | ||
553 | mmcr0 &= ~MMCR0_PMXE; | ||
554 | |||
555 | mtspr(SPRN_SIAR, current->thread.siar); | ||
556 | mtspr(SPRN_SIER, current->thread.sier); | ||
557 | mtspr(SPRN_SDAR, current->thread.sdar); | ||
558 | mtspr(SPRN_MMCR2, current->thread.mmcr2); | ||
559 | out: | ||
560 | return mmcr0; | ||
561 | } | ||
465 | #endif /* CONFIG_PPC64 */ | 562 | #endif /* CONFIG_PPC64 */ |
466 | 563 | ||
467 | static void perf_event_interrupt(struct pt_regs *regs); | 564 | static void perf_event_interrupt(struct pt_regs *regs); |
@@ -732,6 +829,13 @@ static void power_pmu_read(struct perf_event *event) | |||
732 | 829 | ||
733 | if (!event->hw.idx) | 830 | if (!event->hw.idx) |
734 | return; | 831 | return; |
832 | |||
833 | if (is_ebb_event(event)) { | ||
834 | val = read_pmc(event->hw.idx); | ||
835 | local64_set(&event->hw.prev_count, val); | ||
836 | return; | ||
837 | } | ||
838 | |||
735 | /* | 839 | /* |
736 | * Performance monitor interrupts come even when interrupts | 840 | * Performance monitor interrupts come even when interrupts |
737 | * are soft-disabled, as long as interrupts are hard-enabled. | 841 | * are soft-disabled, as long as interrupts are hard-enabled. |
@@ -852,7 +956,7 @@ static void write_mmcr0(struct cpu_hw_events *cpuhw, unsigned long mmcr0) | |||
852 | static void power_pmu_disable(struct pmu *pmu) | 956 | static void power_pmu_disable(struct pmu *pmu) |
853 | { | 957 | { |
854 | struct cpu_hw_events *cpuhw; | 958 | struct cpu_hw_events *cpuhw; |
855 | unsigned long flags; | 959 | unsigned long flags, mmcr0, val; |
856 | 960 | ||
857 | if (!ppmu) | 961 | if (!ppmu) |
858 | return; | 962 | return; |
@@ -860,9 +964,6 @@ static void power_pmu_disable(struct pmu *pmu) | |||
860 | cpuhw = &__get_cpu_var(cpu_hw_events); | 964 | cpuhw = &__get_cpu_var(cpu_hw_events); |
861 | 965 | ||
862 | if (!cpuhw->disabled) { | 966 | if (!cpuhw->disabled) { |
863 | cpuhw->disabled = 1; | ||
864 | cpuhw->n_added = 0; | ||
865 | |||
866 | /* | 967 | /* |
867 | * Check if we ever enabled the PMU on this cpu. | 968 | * Check if we ever enabled the PMU on this cpu. |
868 | */ | 969 | */ |
@@ -872,6 +973,21 @@ static void power_pmu_disable(struct pmu *pmu) | |||
872 | } | 973 | } |
873 | 974 | ||
874 | /* | 975 | /* |
976 | * Set the 'freeze counters' bit, clear EBE/PMCC/PMAO/FC56. | ||
977 | */ | ||
978 | val = mmcr0 = mfspr(SPRN_MMCR0); | ||
979 | val |= MMCR0_FC; | ||
980 | val &= ~(MMCR0_EBE | MMCR0_PMCC | MMCR0_PMAO | MMCR0_FC56); | ||
981 | |||
982 | /* | ||
983 | * The barrier is to make sure the mtspr has been | ||
984 | * executed and the PMU has frozen the events etc. | ||
985 | * before we return. | ||
986 | */ | ||
987 | write_mmcr0(cpuhw, val); | ||
988 | mb(); | ||
989 | |||
990 | /* | ||
875 | * Disable instruction sampling if it was enabled | 991 | * Disable instruction sampling if it was enabled |
876 | */ | 992 | */ |
877 | if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) { | 993 | if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) { |
@@ -880,15 +996,12 @@ static void power_pmu_disable(struct pmu *pmu) | |||
880 | mb(); | 996 | mb(); |
881 | } | 997 | } |
882 | 998 | ||
883 | /* | 999 | cpuhw->disabled = 1; |
884 | * Set the 'freeze counters' bit. | 1000 | cpuhw->n_added = 0; |
885 | * The barrier is to make sure the mtspr has been | 1001 | |
886 | * executed and the PMU has frozen the events | 1002 | ebb_switch_out(mmcr0); |
887 | * before we return. | ||
888 | */ | ||
889 | write_mmcr0(cpuhw, mfspr(SPRN_MMCR0) | MMCR0_FC); | ||
890 | mb(); | ||
891 | } | 1003 | } |
1004 | |||
892 | local_irq_restore(flags); | 1005 | local_irq_restore(flags); |
893 | } | 1006 | } |
894 | 1007 | ||
@@ -903,23 +1016,36 @@ static void power_pmu_enable(struct pmu *pmu) | |||
903 | struct cpu_hw_events *cpuhw; | 1016 | struct cpu_hw_events *cpuhw; |
904 | unsigned long flags; | 1017 | unsigned long flags; |
905 | long i; | 1018 | long i; |
906 | unsigned long val; | 1019 | unsigned long val, mmcr0; |
907 | s64 left; | 1020 | s64 left; |
908 | unsigned int hwc_index[MAX_HWEVENTS]; | 1021 | unsigned int hwc_index[MAX_HWEVENTS]; |
909 | int n_lim; | 1022 | int n_lim; |
910 | int idx; | 1023 | int idx; |
1024 | bool ebb; | ||
911 | 1025 | ||
912 | if (!ppmu) | 1026 | if (!ppmu) |
913 | return; | 1027 | return; |
914 | local_irq_save(flags); | 1028 | local_irq_save(flags); |
1029 | |||
915 | cpuhw = &__get_cpu_var(cpu_hw_events); | 1030 | cpuhw = &__get_cpu_var(cpu_hw_events); |
916 | if (!cpuhw->disabled) { | 1031 | if (!cpuhw->disabled) |
917 | local_irq_restore(flags); | 1032 | goto out; |
918 | return; | 1033 | |
1034 | if (cpuhw->n_events == 0) { | ||
1035 | ppc_set_pmu_inuse(0); | ||
1036 | goto out; | ||
919 | } | 1037 | } |
1038 | |||
920 | cpuhw->disabled = 0; | 1039 | cpuhw->disabled = 0; |
921 | 1040 | ||
922 | /* | 1041 | /* |
1042 | * EBB requires an exclusive group and all events must have the EBB | ||
1043 | * flag set, or not set, so we can just check a single event. Also we | ||
1044 | * know we have at least one event. | ||
1045 | */ | ||
1046 | ebb = is_ebb_event(cpuhw->event[0]); | ||
1047 | |||
1048 | /* | ||
923 | * If we didn't change anything, or only removed events, | 1049 | * If we didn't change anything, or only removed events, |
924 | * no need to recalculate MMCR* settings and reset the PMCs. | 1050 | * no need to recalculate MMCR* settings and reset the PMCs. |
925 | * Just reenable the PMU with the current MMCR* settings | 1051 | * Just reenable the PMU with the current MMCR* settings |
@@ -928,8 +1054,6 @@ static void power_pmu_enable(struct pmu *pmu) | |||
928 | if (!cpuhw->n_added) { | 1054 | if (!cpuhw->n_added) { |
929 | mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); | 1055 | mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); |
930 | mtspr(SPRN_MMCR1, cpuhw->mmcr[1]); | 1056 | mtspr(SPRN_MMCR1, cpuhw->mmcr[1]); |
931 | if (cpuhw->n_events == 0) | ||
932 | ppc_set_pmu_inuse(0); | ||
933 | goto out_enable; | 1057 | goto out_enable; |
934 | } | 1058 | } |
935 | 1059 | ||
@@ -996,25 +1120,34 @@ static void power_pmu_enable(struct pmu *pmu) | |||
996 | ++n_lim; | 1120 | ++n_lim; |
997 | continue; | 1121 | continue; |
998 | } | 1122 | } |
999 | val = 0; | 1123 | |
1000 | if (event->hw.sample_period) { | 1124 | if (ebb) |
1001 | left = local64_read(&event->hw.period_left); | 1125 | val = local64_read(&event->hw.prev_count); |
1002 | if (left < 0x80000000L) | 1126 | else { |
1003 | val = 0x80000000L - left; | 1127 | val = 0; |
1128 | if (event->hw.sample_period) { | ||
1129 | left = local64_read(&event->hw.period_left); | ||
1130 | if (left < 0x80000000L) | ||
1131 | val = 0x80000000L - left; | ||
1132 | } | ||
1133 | local64_set(&event->hw.prev_count, val); | ||
1004 | } | 1134 | } |
1005 | local64_set(&event->hw.prev_count, val); | 1135 | |
1006 | event->hw.idx = idx; | 1136 | event->hw.idx = idx; |
1007 | if (event->hw.state & PERF_HES_STOPPED) | 1137 | if (event->hw.state & PERF_HES_STOPPED) |
1008 | val = 0; | 1138 | val = 0; |
1009 | write_pmc(idx, val); | 1139 | write_pmc(idx, val); |
1140 | |||
1010 | perf_event_update_userpage(event); | 1141 | perf_event_update_userpage(event); |
1011 | } | 1142 | } |
1012 | cpuhw->n_limited = n_lim; | 1143 | cpuhw->n_limited = n_lim; |
1013 | cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE; | 1144 | cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE; |
1014 | 1145 | ||
1015 | out_enable: | 1146 | out_enable: |
1147 | mmcr0 = ebb_switch_in(ebb, cpuhw->mmcr[0]); | ||
1148 | |||
1016 | mb(); | 1149 | mb(); |
1017 | write_mmcr0(cpuhw, cpuhw->mmcr[0]); | 1150 | write_mmcr0(cpuhw, mmcr0); |
1018 | 1151 | ||
1019 | /* | 1152 | /* |
1020 | * Enable instruction sampling if necessary | 1153 | * Enable instruction sampling if necessary |
@@ -1112,6 +1245,8 @@ static int power_pmu_add(struct perf_event *event, int ef_flags) | |||
1112 | event->hw.config = cpuhw->events[n0]; | 1245 | event->hw.config = cpuhw->events[n0]; |
1113 | 1246 | ||
1114 | nocheck: | 1247 | nocheck: |
1248 | ebb_event_add(event); | ||
1249 | |||
1115 | ++cpuhw->n_events; | 1250 | ++cpuhw->n_events; |
1116 | ++cpuhw->n_added; | 1251 | ++cpuhw->n_added; |
1117 | 1252 | ||
@@ -1472,6 +1607,11 @@ static int power_pmu_event_init(struct perf_event *event) | |||
1472 | } | 1607 | } |
1473 | } | 1608 | } |
1474 | 1609 | ||
1610 | /* Extra checks for EBB */ | ||
1611 | err = ebb_event_check(event); | ||
1612 | if (err) | ||
1613 | return err; | ||
1614 | |||
1475 | /* | 1615 | /* |
1476 | * If this is in a group, check if it can go on with all the | 1616 | * If this is in a group, check if it can go on with all the |
1477 | * other hardware events in the group. We assume the event | 1617 | * other hardware events in the group. We assume the event |
@@ -1511,6 +1651,13 @@ static int power_pmu_event_init(struct perf_event *event) | |||
1511 | local64_set(&event->hw.period_left, event->hw.last_period); | 1651 | local64_set(&event->hw.period_left, event->hw.last_period); |
1512 | 1652 | ||
1513 | /* | 1653 | /* |
1654 | * For EBB events we just context switch the PMC value, we don't do any | ||
1655 | * of the sample_period logic. We use hw.prev_count for this. | ||
1656 | */ | ||
1657 | if (is_ebb_event(event)) | ||
1658 | local64_set(&event->hw.prev_count, 0); | ||
1659 | |||
1660 | /* | ||
1514 | * See if we need to reserve the PMU. | 1661 | * See if we need to reserve the PMU. |
1515 | * If no events are currently in use, then we have to take a | 1662 | * If no events are currently in use, then we have to take a |
1516 | * mutex to ensure that we don't race with another task doing | 1663 | * mutex to ensure that we don't race with another task doing |
@@ -1786,7 +1933,7 @@ static void power_pmu_setup(int cpu) | |||
1786 | cpuhw->mmcr[0] = MMCR0_FC; | 1933 | cpuhw->mmcr[0] = MMCR0_FC; |
1787 | } | 1934 | } |
1788 | 1935 | ||
1789 | static int __cpuinit | 1936 | static int |
1790 | power_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) | 1937 | power_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) |
1791 | { | 1938 | { |
1792 | unsigned int cpu = (long)hcpu; | 1939 | unsigned int cpu = (long)hcpu; |
@@ -1803,7 +1950,7 @@ power_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu | |||
1803 | return NOTIFY_OK; | 1950 | return NOTIFY_OK; |
1804 | } | 1951 | } |
1805 | 1952 | ||
1806 | int __cpuinit register_power_pmu(struct power_pmu *pmu) | 1953 | int register_power_pmu(struct power_pmu *pmu) |
1807 | { | 1954 | { |
1808 | if (ppmu) | 1955 | if (ppmu) |
1809 | return -EBUSY; /* something's already registered */ | 1956 | return -EBUSY; /* something's already registered */ |
diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c index f7d1c4fff303..96a64d6a8bdf 100644 --- a/arch/powerpc/perf/power8-pmu.c +++ b/arch/powerpc/perf/power8-pmu.c | |||
@@ -31,9 +31,9 @@ | |||
31 | * | 31 | * |
32 | * 60 56 52 48 44 40 36 32 | 32 | * 60 56 52 48 44 40 36 32 |
33 | * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | | 33 | * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | |
34 | * [ thresh_cmp ] [ thresh_ctl ] | 34 | * | [ thresh_cmp ] [ thresh_ctl ] |
35 | * | | 35 | * | | |
36 | * thresh start/stop OR FAB match -* | 36 | * *- EBB (Linux) thresh start/stop OR FAB match -* |
37 | * | 37 | * |
38 | * 28 24 20 16 12 8 4 0 | 38 | * 28 24 20 16 12 8 4 0 |
39 | * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | | 39 | * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | |
@@ -85,6 +85,7 @@ | |||
85 | * | 85 | * |
86 | */ | 86 | */ |
87 | 87 | ||
88 | #define EVENT_EBB_MASK 1ull | ||
88 | #define EVENT_THR_CMP_SHIFT 40 /* Threshold CMP value */ | 89 | #define EVENT_THR_CMP_SHIFT 40 /* Threshold CMP value */ |
89 | #define EVENT_THR_CMP_MASK 0x3ff | 90 | #define EVENT_THR_CMP_MASK 0x3ff |
90 | #define EVENT_THR_CTL_SHIFT 32 /* Threshold control value (start/stop) */ | 91 | #define EVENT_THR_CTL_SHIFT 32 /* Threshold control value (start/stop) */ |
@@ -109,6 +110,17 @@ | |||
109 | #define EVENT_IS_MARKED (EVENT_MARKED_MASK << EVENT_MARKED_SHIFT) | 110 | #define EVENT_IS_MARKED (EVENT_MARKED_MASK << EVENT_MARKED_SHIFT) |
110 | #define EVENT_PSEL_MASK 0xff /* PMCxSEL value */ | 111 | #define EVENT_PSEL_MASK 0xff /* PMCxSEL value */ |
111 | 112 | ||
113 | #define EVENT_VALID_MASK \ | ||
114 | ((EVENT_THRESH_MASK << EVENT_THRESH_SHIFT) | \ | ||
115 | (EVENT_SAMPLE_MASK << EVENT_SAMPLE_SHIFT) | \ | ||
116 | (EVENT_CACHE_SEL_MASK << EVENT_CACHE_SEL_SHIFT) | \ | ||
117 | (EVENT_PMC_MASK << EVENT_PMC_SHIFT) | \ | ||
118 | (EVENT_UNIT_MASK << EVENT_UNIT_SHIFT) | \ | ||
119 | (EVENT_COMBINE_MASK << EVENT_COMBINE_SHIFT) | \ | ||
120 | (EVENT_MARKED_MASK << EVENT_MARKED_SHIFT) | \ | ||
121 | (EVENT_EBB_MASK << EVENT_CONFIG_EBB_SHIFT) | \ | ||
122 | EVENT_PSEL_MASK) | ||
123 | |||
112 | /* MMCRA IFM bits - POWER8 */ | 124 | /* MMCRA IFM bits - POWER8 */ |
113 | #define POWER8_MMCRA_IFM1 0x0000000040000000UL | 125 | #define POWER8_MMCRA_IFM1 0x0000000040000000UL |
114 | #define POWER8_MMCRA_IFM2 0x0000000080000000UL | 126 | #define POWER8_MMCRA_IFM2 0x0000000080000000UL |
@@ -130,10 +142,10 @@ | |||
130 | * | 142 | * |
131 | * 28 24 20 16 12 8 4 0 | 143 | * 28 24 20 16 12 8 4 0 |
132 | * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | | 144 | * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | |
133 | * [ ] [ sample ] [ ] [6] [5] [4] [3] [2] [1] | 145 | * | [ ] [ sample ] [ ] [6] [5] [4] [3] [2] [1] |
134 | * | | | 146 | * EBB -* | | |
135 | * L1 I/D qualifier -* | Count of events for each PMC. | 147 | * | | Count of events for each PMC. |
136 | * | p1, p2, p3, p4, p5, p6. | 148 | * L1 I/D qualifier -* | p1, p2, p3, p4, p5, p6. |
137 | * nc - number of counters -* | 149 | * nc - number of counters -* |
138 | * | 150 | * |
139 | * The PMC fields P1..P6, and NC, are adder fields. As we accumulate constraints | 151 | * The PMC fields P1..P6, and NC, are adder fields. As we accumulate constraints |
@@ -149,6 +161,9 @@ | |||
149 | #define CNST_THRESH_VAL(v) (((v) & EVENT_THRESH_MASK) << 32) | 161 | #define CNST_THRESH_VAL(v) (((v) & EVENT_THRESH_MASK) << 32) |
150 | #define CNST_THRESH_MASK CNST_THRESH_VAL(EVENT_THRESH_MASK) | 162 | #define CNST_THRESH_MASK CNST_THRESH_VAL(EVENT_THRESH_MASK) |
151 | 163 | ||
164 | #define CNST_EBB_VAL(v) (((v) & EVENT_EBB_MASK) << 24) | ||
165 | #define CNST_EBB_MASK CNST_EBB_VAL(EVENT_EBB_MASK) | ||
166 | |||
152 | #define CNST_L1_QUAL_VAL(v) (((v) & 3) << 22) | 167 | #define CNST_L1_QUAL_VAL(v) (((v) & 3) << 22) |
153 | #define CNST_L1_QUAL_MASK CNST_L1_QUAL_VAL(3) | 168 | #define CNST_L1_QUAL_MASK CNST_L1_QUAL_VAL(3) |
154 | 169 | ||
@@ -207,14 +222,21 @@ static inline bool event_is_fab_match(u64 event) | |||
207 | 222 | ||
208 | static int power8_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp) | 223 | static int power8_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp) |
209 | { | 224 | { |
210 | unsigned int unit, pmc, cache; | 225 | unsigned int unit, pmc, cache, ebb; |
211 | unsigned long mask, value; | 226 | unsigned long mask, value; |
212 | 227 | ||
213 | mask = value = 0; | 228 | mask = value = 0; |
214 | 229 | ||
215 | pmc = (event >> EVENT_PMC_SHIFT) & EVENT_PMC_MASK; | 230 | if (event & ~EVENT_VALID_MASK) |
216 | unit = (event >> EVENT_UNIT_SHIFT) & EVENT_UNIT_MASK; | 231 | return -1; |
217 | cache = (event >> EVENT_CACHE_SEL_SHIFT) & EVENT_CACHE_SEL_MASK; | 232 | |
233 | pmc = (event >> EVENT_PMC_SHIFT) & EVENT_PMC_MASK; | ||
234 | unit = (event >> EVENT_UNIT_SHIFT) & EVENT_UNIT_MASK; | ||
235 | cache = (event >> EVENT_CACHE_SEL_SHIFT) & EVENT_CACHE_SEL_MASK; | ||
236 | ebb = (event >> EVENT_CONFIG_EBB_SHIFT) & EVENT_EBB_MASK; | ||
237 | |||
238 | /* Clear the EBB bit in the event, so event checks work below */ | ||
239 | event &= ~(EVENT_EBB_MASK << EVENT_CONFIG_EBB_SHIFT); | ||
218 | 240 | ||
219 | if (pmc) { | 241 | if (pmc) { |
220 | if (pmc > 6) | 242 | if (pmc > 6) |
@@ -284,6 +306,18 @@ static int power8_get_constraint(u64 event, unsigned long *maskp, unsigned long | |||
284 | value |= CNST_THRESH_VAL(event >> EVENT_THRESH_SHIFT); | 306 | value |= CNST_THRESH_VAL(event >> EVENT_THRESH_SHIFT); |
285 | } | 307 | } |
286 | 308 | ||
309 | if (!pmc && ebb) | ||
310 | /* EBB events must specify the PMC */ | ||
311 | return -1; | ||
312 | |||
313 | /* | ||
314 | * All events must agree on EBB, either all request it or none. | ||
315 | * EBB events are pinned & exclusive, so this should never actually | ||
316 | * hit, but we leave it as a fallback in case. | ||
317 | */ | ||
318 | mask |= CNST_EBB_VAL(ebb); | ||
319 | value |= CNST_EBB_MASK; | ||
320 | |||
287 | *maskp = mask; | 321 | *maskp = mask; |
288 | *valp = value; | 322 | *valp = value; |
289 | 323 | ||
@@ -378,6 +412,10 @@ static int power8_compute_mmcr(u64 event[], int n_ev, | |||
378 | if (pmc_inuse & 0x7c) | 412 | if (pmc_inuse & 0x7c) |
379 | mmcr[0] |= MMCR0_PMCjCE; | 413 | mmcr[0] |= MMCR0_PMCjCE; |
380 | 414 | ||
415 | /* If we're not using PMC 5 or 6, freeze them */ | ||
416 | if (!(pmc_inuse & 0x60)) | ||
417 | mmcr[0] |= MMCR0_FC56; | ||
418 | |||
381 | mmcr[1] = mmcr1; | 419 | mmcr[1] = mmcr1; |
382 | mmcr[2] = mmcra; | 420 | mmcr[2] = mmcra; |
383 | 421 | ||
@@ -574,7 +612,7 @@ static struct power_pmu power8_pmu = { | |||
574 | .get_constraint = power8_get_constraint, | 612 | .get_constraint = power8_get_constraint, |
575 | .get_alternatives = power8_get_alternatives, | 613 | .get_alternatives = power8_get_alternatives, |
576 | .disable_pmc = power8_disable_pmc, | 614 | .disable_pmc = power8_disable_pmc, |
577 | .flags = PPMU_HAS_SSLOT | PPMU_HAS_SIER | PPMU_BHRB, | 615 | .flags = PPMU_HAS_SSLOT | PPMU_HAS_SIER | PPMU_BHRB | PPMU_EBB, |
578 | .n_generic = ARRAY_SIZE(power8_generic_events), | 616 | .n_generic = ARRAY_SIZE(power8_generic_events), |
579 | .generic_events = power8_generic_events, | 617 | .generic_events = power8_generic_events, |
580 | .attr_groups = power8_pmu_attr_groups, | 618 | .attr_groups = power8_pmu_attr_groups, |
diff --git a/arch/powerpc/platforms/44x/currituck.c b/arch/powerpc/platforms/44x/currituck.c index ecd3890c40d7..7f1b71a01c6a 100644 --- a/arch/powerpc/platforms/44x/currituck.c +++ b/arch/powerpc/platforms/44x/currituck.c | |||
@@ -91,12 +91,12 @@ static void __init ppc47x_init_irq(void) | |||
91 | } | 91 | } |
92 | 92 | ||
93 | #ifdef CONFIG_SMP | 93 | #ifdef CONFIG_SMP |
94 | static void __cpuinit smp_ppc47x_setup_cpu(int cpu) | 94 | static void smp_ppc47x_setup_cpu(int cpu) |
95 | { | 95 | { |
96 | mpic_setup_this_cpu(); | 96 | mpic_setup_this_cpu(); |
97 | } | 97 | } |
98 | 98 | ||
99 | static int __cpuinit smp_ppc47x_kick_cpu(int cpu) | 99 | static int smp_ppc47x_kick_cpu(int cpu) |
100 | { | 100 | { |
101 | struct device_node *cpunode = of_get_cpu_node(cpu, NULL); | 101 | struct device_node *cpunode = of_get_cpu_node(cpu, NULL); |
102 | const u64 *spin_table_addr_prop; | 102 | const u64 *spin_table_addr_prop; |
@@ -176,13 +176,48 @@ static int __init ppc47x_probe(void) | |||
176 | return 1; | 176 | return 1; |
177 | } | 177 | } |
178 | 178 | ||
179 | static int board_rev = -1; | ||
180 | static int __init ppc47x_get_board_rev(void) | ||
181 | { | ||
182 | u8 fpga_reg0; | ||
183 | void *fpga; | ||
184 | struct device_node *np; | ||
185 | |||
186 | np = of_find_compatible_node(NULL, NULL, "ibm,currituck-fpga"); | ||
187 | if (!np) | ||
188 | goto fail; | ||
189 | |||
190 | fpga = of_iomap(np, 0); | ||
191 | of_node_put(np); | ||
192 | if (!fpga) | ||
193 | goto fail; | ||
194 | |||
195 | fpga_reg0 = ioread8(fpga); | ||
196 | board_rev = fpga_reg0 & 0x03; | ||
197 | pr_info("%s: Found board revision %d\n", __func__, board_rev); | ||
198 | iounmap(fpga); | ||
199 | return 0; | ||
200 | |||
201 | fail: | ||
202 | pr_info("%s: Unable to find board revision\n", __func__); | ||
203 | return 0; | ||
204 | } | ||
205 | machine_arch_initcall(ppc47x, ppc47x_get_board_rev); | ||
206 | |||
179 | /* Use USB controller should have been hardware swizzled but it wasn't :( */ | 207 | /* Use USB controller should have been hardware swizzled but it wasn't :( */ |
180 | static void ppc47x_pci_irq_fixup(struct pci_dev *dev) | 208 | static void ppc47x_pci_irq_fixup(struct pci_dev *dev) |
181 | { | 209 | { |
182 | if (dev->vendor == 0x1033 && (dev->device == 0x0035 || | 210 | if (dev->vendor == 0x1033 && (dev->device == 0x0035 || |
183 | dev->device == 0x00e0)) { | 211 | dev->device == 0x00e0)) { |
184 | dev->irq = irq_create_mapping(NULL, 47); | 212 | if (board_rev == 0) { |
185 | pr_info("%s: Mapping irq 47 %d\n", __func__, dev->irq); | 213 | dev->irq = irq_create_mapping(NULL, 47); |
214 | pr_info("%s: Mapping irq %d\n", __func__, dev->irq); | ||
215 | } else if (board_rev == 2) { | ||
216 | dev->irq = irq_create_mapping(NULL, 49); | ||
217 | pr_info("%s: Mapping irq %d\n", __func__, dev->irq); | ||
218 | } else { | ||
219 | pr_alert("%s: Unknown board revision\n", __func__); | ||
220 | } | ||
186 | } | 221 | } |
187 | } | 222 | } |
188 | 223 | ||
diff --git a/arch/powerpc/platforms/44x/iss4xx.c b/arch/powerpc/platforms/44x/iss4xx.c index a28a8629727e..4241bc825800 100644 --- a/arch/powerpc/platforms/44x/iss4xx.c +++ b/arch/powerpc/platforms/44x/iss4xx.c | |||
@@ -81,12 +81,12 @@ static void __init iss4xx_init_irq(void) | |||
81 | } | 81 | } |
82 | 82 | ||
83 | #ifdef CONFIG_SMP | 83 | #ifdef CONFIG_SMP |
84 | static void __cpuinit smp_iss4xx_setup_cpu(int cpu) | 84 | static void smp_iss4xx_setup_cpu(int cpu) |
85 | { | 85 | { |
86 | mpic_setup_this_cpu(); | 86 | mpic_setup_this_cpu(); |
87 | } | 87 | } |
88 | 88 | ||
89 | static int __cpuinit smp_iss4xx_kick_cpu(int cpu) | 89 | static int smp_iss4xx_kick_cpu(int cpu) |
90 | { | 90 | { |
91 | struct device_node *cpunode = of_get_cpu_node(cpu, NULL); | 91 | struct device_node *cpunode = of_get_cpu_node(cpu, NULL); |
92 | const u64 *spin_table_addr_prop; | 92 | const u64 *spin_table_addr_prop; |
diff --git a/arch/powerpc/platforms/512x/mpc5121_ads.c b/arch/powerpc/platforms/512x/mpc5121_ads.c index 0a134e0469ef..3e90ece10ae9 100644 --- a/arch/powerpc/platforms/512x/mpc5121_ads.c +++ b/arch/powerpc/platforms/512x/mpc5121_ads.c | |||
@@ -43,9 +43,7 @@ static void __init mpc5121_ads_setup_arch(void) | |||
43 | mpc83xx_add_bridge(np); | 43 | mpc83xx_add_bridge(np); |
44 | #endif | 44 | #endif |
45 | 45 | ||
46 | #if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE) | 46 | mpc512x_setup_arch(); |
47 | mpc512x_setup_diu(); | ||
48 | #endif | ||
49 | } | 47 | } |
50 | 48 | ||
51 | static void __init mpc5121_ads_init_IRQ(void) | 49 | static void __init mpc5121_ads_init_IRQ(void) |
@@ -69,7 +67,7 @@ define_machine(mpc5121_ads) { | |||
69 | .probe = mpc5121_ads_probe, | 67 | .probe = mpc5121_ads_probe, |
70 | .setup_arch = mpc5121_ads_setup_arch, | 68 | .setup_arch = mpc5121_ads_setup_arch, |
71 | .init = mpc512x_init, | 69 | .init = mpc512x_init, |
72 | .init_early = mpc512x_init_diu, | 70 | .init_early = mpc512x_init_early, |
73 | .init_IRQ = mpc5121_ads_init_IRQ, | 71 | .init_IRQ = mpc5121_ads_init_IRQ, |
74 | .get_irq = ipic_get_irq, | 72 | .get_irq = ipic_get_irq, |
75 | .calibrate_decr = generic_calibrate_decr, | 73 | .calibrate_decr = generic_calibrate_decr, |
diff --git a/arch/powerpc/platforms/512x/mpc512x.h b/arch/powerpc/platforms/512x/mpc512x.h index 0a8e60023944..cc97f022d028 100644 --- a/arch/powerpc/platforms/512x/mpc512x.h +++ b/arch/powerpc/platforms/512x/mpc512x.h | |||
@@ -12,18 +12,12 @@ | |||
12 | #ifndef __MPC512X_H__ | 12 | #ifndef __MPC512X_H__ |
13 | #define __MPC512X_H__ | 13 | #define __MPC512X_H__ |
14 | extern void __init mpc512x_init_IRQ(void); | 14 | extern void __init mpc512x_init_IRQ(void); |
15 | extern void __init mpc512x_init_early(void); | ||
15 | extern void __init mpc512x_init(void); | 16 | extern void __init mpc512x_init(void); |
17 | extern void __init mpc512x_setup_arch(void); | ||
16 | extern int __init mpc5121_clk_init(void); | 18 | extern int __init mpc5121_clk_init(void); |
17 | void __init mpc512x_declare_of_platform_devices(void); | ||
18 | extern const char *mpc512x_select_psc_compat(void); | 19 | extern const char *mpc512x_select_psc_compat(void); |
20 | extern const char *mpc512x_select_reset_compat(void); | ||
19 | extern void mpc512x_restart(char *cmd); | 21 | extern void mpc512x_restart(char *cmd); |
20 | 22 | ||
21 | #if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE) | ||
22 | void mpc512x_init_diu(void); | ||
23 | void mpc512x_setup_diu(void); | ||
24 | #else | ||
25 | #define mpc512x_init_diu NULL | ||
26 | #define mpc512x_setup_diu NULL | ||
27 | #endif | ||
28 | |||
29 | #endif /* __MPC512X_H__ */ | 23 | #endif /* __MPC512X_H__ */ |
diff --git a/arch/powerpc/platforms/512x/mpc512x_generic.c b/arch/powerpc/platforms/512x/mpc512x_generic.c index 5fb919b30924..ce71408781a0 100644 --- a/arch/powerpc/platforms/512x/mpc512x_generic.c +++ b/arch/powerpc/platforms/512x/mpc512x_generic.c | |||
@@ -45,8 +45,8 @@ define_machine(mpc512x_generic) { | |||
45 | .name = "MPC512x generic", | 45 | .name = "MPC512x generic", |
46 | .probe = mpc512x_generic_probe, | 46 | .probe = mpc512x_generic_probe, |
47 | .init = mpc512x_init, | 47 | .init = mpc512x_init, |
48 | .init_early = mpc512x_init_diu, | 48 | .init_early = mpc512x_init_early, |
49 | .setup_arch = mpc512x_setup_diu, | 49 | .setup_arch = mpc512x_setup_arch, |
50 | .init_IRQ = mpc512x_init_IRQ, | 50 | .init_IRQ = mpc512x_init_IRQ, |
51 | .get_irq = ipic_get_irq, | 51 | .get_irq = ipic_get_irq, |
52 | .calibrate_decr = generic_calibrate_decr, | 52 | .calibrate_decr = generic_calibrate_decr, |
diff --git a/arch/powerpc/platforms/512x/mpc512x_shared.c b/arch/powerpc/platforms/512x/mpc512x_shared.c index 6eb94ab99d39..a82a41b4fd91 100644 --- a/arch/powerpc/platforms/512x/mpc512x_shared.c +++ b/arch/powerpc/platforms/512x/mpc512x_shared.c | |||
@@ -35,8 +35,10 @@ static struct mpc512x_reset_module __iomem *reset_module_base; | |||
35 | static void __init mpc512x_restart_init(void) | 35 | static void __init mpc512x_restart_init(void) |
36 | { | 36 | { |
37 | struct device_node *np; | 37 | struct device_node *np; |
38 | const char *reset_compat; | ||
38 | 39 | ||
39 | np = of_find_compatible_node(NULL, NULL, "fsl,mpc5121-reset"); | 40 | reset_compat = mpc512x_select_reset_compat(); |
41 | np = of_find_compatible_node(NULL, NULL, reset_compat); | ||
40 | if (!np) | 42 | if (!np) |
41 | return; | 43 | return; |
42 | 44 | ||
@@ -58,7 +60,7 @@ void mpc512x_restart(char *cmd) | |||
58 | ; | 60 | ; |
59 | } | 61 | } |
60 | 62 | ||
61 | #if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE) | 63 | #if IS_ENABLED(CONFIG_FB_FSL_DIU) |
62 | 64 | ||
63 | struct fsl_diu_shared_fb { | 65 | struct fsl_diu_shared_fb { |
64 | u8 gamma[0x300]; /* 32-bit aligned! */ | 66 | u8 gamma[0x300]; /* 32-bit aligned! */ |
@@ -355,6 +357,17 @@ const char *mpc512x_select_psc_compat(void) | |||
355 | return NULL; | 357 | return NULL; |
356 | } | 358 | } |
357 | 359 | ||
360 | const char *mpc512x_select_reset_compat(void) | ||
361 | { | ||
362 | if (of_machine_is_compatible("fsl,mpc5121")) | ||
363 | return "fsl,mpc5121-reset"; | ||
364 | |||
365 | if (of_machine_is_compatible("fsl,mpc5125")) | ||
366 | return "fsl,mpc5125-reset"; | ||
367 | |||
368 | return NULL; | ||
369 | } | ||
370 | |||
358 | static unsigned int __init get_fifo_size(struct device_node *np, | 371 | static unsigned int __init get_fifo_size(struct device_node *np, |
359 | char *prop_name) | 372 | char *prop_name) |
360 | { | 373 | { |
@@ -436,14 +449,26 @@ void __init mpc512x_psc_fifo_init(void) | |||
436 | } | 449 | } |
437 | } | 450 | } |
438 | 451 | ||
452 | void __init mpc512x_init_early(void) | ||
453 | { | ||
454 | mpc512x_restart_init(); | ||
455 | if (IS_ENABLED(CONFIG_FB_FSL_DIU)) | ||
456 | mpc512x_init_diu(); | ||
457 | } | ||
458 | |||
439 | void __init mpc512x_init(void) | 459 | void __init mpc512x_init(void) |
440 | { | 460 | { |
441 | mpc5121_clk_init(); | 461 | mpc5121_clk_init(); |
442 | mpc512x_declare_of_platform_devices(); | 462 | mpc512x_declare_of_platform_devices(); |
443 | mpc512x_restart_init(); | ||
444 | mpc512x_psc_fifo_init(); | 463 | mpc512x_psc_fifo_init(); |
445 | } | 464 | } |
446 | 465 | ||
466 | void __init mpc512x_setup_arch(void) | ||
467 | { | ||
468 | if (IS_ENABLED(CONFIG_FB_FSL_DIU)) | ||
469 | mpc512x_setup_diu(); | ||
470 | } | ||
471 | |||
447 | /** | 472 | /** |
448 | * mpc512x_cs_config - Setup chip select configuration | 473 | * mpc512x_cs_config - Setup chip select configuration |
449 | * @cs: chip select number | 474 | * @cs: chip select number |
diff --git a/arch/powerpc/platforms/512x/pdm360ng.c b/arch/powerpc/platforms/512x/pdm360ng.c index 0575e858291c..24b314d7bd5f 100644 --- a/arch/powerpc/platforms/512x/pdm360ng.c +++ b/arch/powerpc/platforms/512x/pdm360ng.c | |||
@@ -119,9 +119,9 @@ static int __init pdm360ng_probe(void) | |||
119 | define_machine(pdm360ng) { | 119 | define_machine(pdm360ng) { |
120 | .name = "PDM360NG", | 120 | .name = "PDM360NG", |
121 | .probe = pdm360ng_probe, | 121 | .probe = pdm360ng_probe, |
122 | .setup_arch = mpc512x_setup_diu, | 122 | .setup_arch = mpc512x_setup_arch, |
123 | .init = pdm360ng_init, | 123 | .init = pdm360ng_init, |
124 | .init_early = mpc512x_init_diu, | 124 | .init_early = mpc512x_init_early, |
125 | .init_IRQ = mpc512x_init_IRQ, | 125 | .init_IRQ = mpc512x_init_IRQ, |
126 | .get_irq = ipic_get_irq, | 126 | .get_irq = ipic_get_irq, |
127 | .calibrate_decr = generic_calibrate_decr, | 127 | .calibrate_decr = generic_calibrate_decr, |
diff --git a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c index 624cb51d19c9..7bc315822935 100644 --- a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c +++ b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c | |||
@@ -231,17 +231,7 @@ static struct i2c_driver mcu_driver = { | |||
231 | .id_table = mcu_ids, | 231 | .id_table = mcu_ids, |
232 | }; | 232 | }; |
233 | 233 | ||
234 | static int __init mcu_init(void) | 234 | module_i2c_driver(mcu_driver); |
235 | { | ||
236 | return i2c_add_driver(&mcu_driver); | ||
237 | } | ||
238 | module_init(mcu_init); | ||
239 | |||
240 | static void __exit mcu_exit(void) | ||
241 | { | ||
242 | i2c_del_driver(&mcu_driver); | ||
243 | } | ||
244 | module_exit(mcu_exit); | ||
245 | 235 | ||
246 | MODULE_DESCRIPTION("Power Management and GPIO expander driver for " | 236 | MODULE_DESCRIPTION("Power Management and GPIO expander driver for " |
247 | "MPC8349E-mITX-compatible MCU"); | 237 | "MPC8349E-mITX-compatible MCU"); |
diff --git a/arch/powerpc/platforms/85xx/p5020_ds.c b/arch/powerpc/platforms/85xx/p5020_ds.c index 753a42c29d4d..39cfa4044e6c 100644 --- a/arch/powerpc/platforms/85xx/p5020_ds.c +++ b/arch/powerpc/platforms/85xx/p5020_ds.c | |||
@@ -75,12 +75,7 @@ define_machine(p5020_ds) { | |||
75 | #ifdef CONFIG_PCI | 75 | #ifdef CONFIG_PCI |
76 | .pcibios_fixup_bus = fsl_pcibios_fixup_bus, | 76 | .pcibios_fixup_bus = fsl_pcibios_fixup_bus, |
77 | #endif | 77 | #endif |
78 | /* coreint doesn't play nice with lazy EE, use legacy mpic for now */ | ||
79 | #ifdef CONFIG_PPC64 | ||
80 | .get_irq = mpic_get_irq, | ||
81 | #else | ||
82 | .get_irq = mpic_get_coreint_irq, | 78 | .get_irq = mpic_get_coreint_irq, |
83 | #endif | ||
84 | .restart = fsl_rstcr_restart, | 79 | .restart = fsl_rstcr_restart, |
85 | .calibrate_decr = generic_calibrate_decr, | 80 | .calibrate_decr = generic_calibrate_decr, |
86 | .progress = udbg_progress, | 81 | .progress = udbg_progress, |
diff --git a/arch/powerpc/platforms/85xx/p5040_ds.c b/arch/powerpc/platforms/85xx/p5040_ds.c index 11381851828e..f70e74cddf97 100644 --- a/arch/powerpc/platforms/85xx/p5040_ds.c +++ b/arch/powerpc/platforms/85xx/p5040_ds.c | |||
@@ -66,12 +66,7 @@ define_machine(p5040_ds) { | |||
66 | #ifdef CONFIG_PCI | 66 | #ifdef CONFIG_PCI |
67 | .pcibios_fixup_bus = fsl_pcibios_fixup_bus, | 67 | .pcibios_fixup_bus = fsl_pcibios_fixup_bus, |
68 | #endif | 68 | #endif |
69 | /* coreint doesn't play nice with lazy EE, use legacy mpic for now */ | ||
70 | #ifdef CONFIG_PPC64 | ||
71 | .get_irq = mpic_get_irq, | ||
72 | #else | ||
73 | .get_irq = mpic_get_coreint_irq, | 69 | .get_irq = mpic_get_coreint_irq, |
74 | #endif | ||
75 | .restart = fsl_rstcr_restart, | 70 | .restart = fsl_rstcr_restart, |
76 | .calibrate_decr = generic_calibrate_decr, | 71 | .calibrate_decr = generic_calibrate_decr, |
77 | .progress = udbg_progress, | 72 | .progress = udbg_progress, |
diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c index 6a1759939c6b..5ced4f5bb2b2 100644 --- a/arch/powerpc/platforms/85xx/smp.c +++ b/arch/powerpc/platforms/85xx/smp.c | |||
@@ -99,7 +99,7 @@ static void mpc85xx_take_timebase(void) | |||
99 | } | 99 | } |
100 | 100 | ||
101 | #ifdef CONFIG_HOTPLUG_CPU | 101 | #ifdef CONFIG_HOTPLUG_CPU |
102 | static void __cpuinit smp_85xx_mach_cpu_die(void) | 102 | static void smp_85xx_mach_cpu_die(void) |
103 | { | 103 | { |
104 | unsigned int cpu = smp_processor_id(); | 104 | unsigned int cpu = smp_processor_id(); |
105 | u32 tmp; | 105 | u32 tmp; |
@@ -141,7 +141,7 @@ static inline u32 read_spin_table_addr_l(void *spin_table) | |||
141 | return in_be32(&((struct epapr_spin_table *)spin_table)->addr_l); | 141 | return in_be32(&((struct epapr_spin_table *)spin_table)->addr_l); |
142 | } | 142 | } |
143 | 143 | ||
144 | static int __cpuinit smp_85xx_kick_cpu(int nr) | 144 | static int smp_85xx_kick_cpu(int nr) |
145 | { | 145 | { |
146 | unsigned long flags; | 146 | unsigned long flags; |
147 | const u64 *cpu_rel_addr; | 147 | const u64 *cpu_rel_addr; |
@@ -362,7 +362,7 @@ static void mpc85xx_smp_machine_kexec(struct kimage *image) | |||
362 | } | 362 | } |
363 | #endif /* CONFIG_KEXEC */ | 363 | #endif /* CONFIG_KEXEC */ |
364 | 364 | ||
365 | static void __cpuinit smp_85xx_setup_cpu(int cpu_nr) | 365 | static void smp_85xx_setup_cpu(int cpu_nr) |
366 | { | 366 | { |
367 | if (smp_85xx_ops.probe == smp_mpic_probe) | 367 | if (smp_85xx_ops.probe == smp_mpic_probe) |
368 | mpic_setup_this_cpu(); | 368 | mpic_setup_this_cpu(); |
diff --git a/arch/powerpc/platforms/85xx/t4240_qds.c b/arch/powerpc/platforms/85xx/t4240_qds.c index 5998e9f33304..91ead6b1b8af 100644 --- a/arch/powerpc/platforms/85xx/t4240_qds.c +++ b/arch/powerpc/platforms/85xx/t4240_qds.c | |||
@@ -75,12 +75,7 @@ define_machine(t4240_qds) { | |||
75 | #ifdef CONFIG_PCI | 75 | #ifdef CONFIG_PCI |
76 | .pcibios_fixup_bus = fsl_pcibios_fixup_bus, | 76 | .pcibios_fixup_bus = fsl_pcibios_fixup_bus, |
77 | #endif | 77 | #endif |
78 | /* coreint doesn't play nice with lazy EE, use legacy mpic for now */ | ||
79 | #ifdef CONFIG_PPC64 | ||
80 | .get_irq = mpic_get_irq, | ||
81 | #else | ||
82 | .get_irq = mpic_get_coreint_irq, | 78 | .get_irq = mpic_get_coreint_irq, |
83 | #endif | ||
84 | .restart = fsl_rstcr_restart, | 79 | .restart = fsl_rstcr_restart, |
85 | .calibrate_decr = generic_calibrate_decr, | 80 | .calibrate_decr = generic_calibrate_decr, |
86 | .progress = udbg_progress, | 81 | .progress = udbg_progress, |
diff --git a/arch/powerpc/platforms/8xx/m8xx_setup.c b/arch/powerpc/platforms/8xx/m8xx_setup.c index 1e121088826f..587a2828b06c 100644 --- a/arch/powerpc/platforms/8xx/m8xx_setup.c +++ b/arch/powerpc/platforms/8xx/m8xx_setup.c | |||
@@ -43,6 +43,7 @@ static irqreturn_t timebase_interrupt(int irq, void *dev) | |||
43 | 43 | ||
44 | static struct irqaction tbint_irqaction = { | 44 | static struct irqaction tbint_irqaction = { |
45 | .handler = timebase_interrupt, | 45 | .handler = timebase_interrupt, |
46 | .flags = IRQF_NO_THREAD, | ||
46 | .name = "tbint", | 47 | .name = "tbint", |
47 | }; | 48 | }; |
48 | 49 | ||
@@ -218,19 +219,12 @@ void mpc8xx_restart(char *cmd) | |||
218 | 219 | ||
219 | static void cpm_cascade(unsigned int irq, struct irq_desc *desc) | 220 | static void cpm_cascade(unsigned int irq, struct irq_desc *desc) |
220 | { | 221 | { |
221 | struct irq_chip *chip; | 222 | struct irq_chip *chip = irq_desc_get_chip(desc); |
222 | int cascade_irq; | 223 | int cascade_irq = cpm_get_irq(); |
223 | |||
224 | if ((cascade_irq = cpm_get_irq()) >= 0) { | ||
225 | struct irq_desc *cdesc = irq_to_desc(cascade_irq); | ||
226 | 224 | ||
225 | if (cascade_irq >= 0) | ||
227 | generic_handle_irq(cascade_irq); | 226 | generic_handle_irq(cascade_irq); |
228 | 227 | ||
229 | chip = irq_desc_get_chip(cdesc); | ||
230 | chip->irq_eoi(&cdesc->irq_data); | ||
231 | } | ||
232 | |||
233 | chip = irq_desc_get_chip(desc); | ||
234 | chip->irq_eoi(&desc->irq_data); | 228 | chip->irq_eoi(&desc->irq_data); |
235 | } | 229 | } |
236 | 230 | ||
diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig index e17cdfc5ba40..d703775bda30 100644 --- a/arch/powerpc/platforms/Kconfig +++ b/arch/powerpc/platforms/Kconfig | |||
@@ -86,6 +86,27 @@ config MPIC | |||
86 | bool | 86 | bool |
87 | default n | 87 | default n |
88 | 88 | ||
89 | config MPIC_TIMER | ||
90 | bool "MPIC Global Timer" | ||
91 | depends on MPIC && FSL_SOC | ||
92 | default n | ||
93 | help | ||
94 | The MPIC global timer is a hardware timer inside the | ||
95 | Freescale PIC complying with OpenPIC standard. When the | ||
96 | specified interval times out, the hardware timer generates | ||
97 | an interrupt. The driver currently is only tested on fsl | ||
98 | chip, but it can potentially support other global timers | ||
99 | complying with the OpenPIC standard. | ||
100 | |||
101 | config FSL_MPIC_TIMER_WAKEUP | ||
102 | tristate "Freescale MPIC global timer wakeup driver" | ||
103 | depends on FSL_SOC && MPIC_TIMER && PM | ||
104 | default n | ||
105 | help | ||
106 | The driver provides a way to wake up the system by MPIC | ||
107 | timer. | ||
108 | e.g. "echo 5 > /sys/devices/system/mpic/timer_wakeup" | ||
109 | |||
89 | config PPC_EPAPR_HV_PIC | 110 | config PPC_EPAPR_HV_PIC |
90 | bool | 111 | bool |
91 | default n | 112 | default n |
@@ -164,6 +185,11 @@ config IBMEBUS | |||
164 | help | 185 | help |
165 | Bus device driver for GX bus based adapters. | 186 | Bus device driver for GX bus based adapters. |
166 | 187 | ||
188 | config EEH | ||
189 | bool | ||
190 | depends on (PPC_POWERNV || PPC_PSERIES) && PCI | ||
191 | default y | ||
192 | |||
167 | config PPC_MPC106 | 193 | config PPC_MPC106 |
168 | bool | 194 | bool |
169 | default n | 195 | default n |
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 7819c40a6bc3..47d9a03dd415 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype | |||
@@ -71,6 +71,7 @@ config PPC_BOOK3S_64 | |||
71 | select PPC_FPU | 71 | select PPC_FPU |
72 | select PPC_HAVE_PMU_SUPPORT | 72 | select PPC_HAVE_PMU_SUPPORT |
73 | select SYS_SUPPORTS_HUGETLBFS | 73 | select SYS_SUPPORTS_HUGETLBFS |
74 | select HAVE_ARCH_TRANSPARENT_HUGEPAGE if PPC_64K_PAGES | ||
74 | 75 | ||
75 | config PPC_BOOK3E_64 | 76 | config PPC_BOOK3E_64 |
76 | bool "Embedded processors" | 77 | bool "Embedded processors" |
diff --git a/arch/powerpc/platforms/cell/beat_htab.c b/arch/powerpc/platforms/cell/beat_htab.c index 246e1d8b3af3..c34ee4e60873 100644 --- a/arch/powerpc/platforms/cell/beat_htab.c +++ b/arch/powerpc/platforms/cell/beat_htab.c | |||
@@ -185,7 +185,8 @@ static void beat_lpar_hptab_clear(void) | |||
185 | static long beat_lpar_hpte_updatepp(unsigned long slot, | 185 | static long beat_lpar_hpte_updatepp(unsigned long slot, |
186 | unsigned long newpp, | 186 | unsigned long newpp, |
187 | unsigned long vpn, | 187 | unsigned long vpn, |
188 | int psize, int ssize, int local) | 188 | int psize, int apsize, |
189 | int ssize, int local) | ||
189 | { | 190 | { |
190 | unsigned long lpar_rc; | 191 | unsigned long lpar_rc; |
191 | u64 dummy0, dummy1; | 192 | u64 dummy0, dummy1; |
@@ -274,7 +275,8 @@ static void beat_lpar_hpte_updateboltedpp(unsigned long newpp, | |||
274 | } | 275 | } |
275 | 276 | ||
276 | static void beat_lpar_hpte_invalidate(unsigned long slot, unsigned long vpn, | 277 | static void beat_lpar_hpte_invalidate(unsigned long slot, unsigned long vpn, |
277 | int psize, int ssize, int local) | 278 | int psize, int apsize, |
279 | int ssize, int local) | ||
278 | { | 280 | { |
279 | unsigned long want_v; | 281 | unsigned long want_v; |
280 | unsigned long lpar_rc; | 282 | unsigned long lpar_rc; |
@@ -364,9 +366,10 @@ static long beat_lpar_hpte_insert_v3(unsigned long hpte_group, | |||
364 | * already zero. For now I am paranoid. | 366 | * already zero. For now I am paranoid. |
365 | */ | 367 | */ |
366 | static long beat_lpar_hpte_updatepp_v3(unsigned long slot, | 368 | static long beat_lpar_hpte_updatepp_v3(unsigned long slot, |
367 | unsigned long newpp, | 369 | unsigned long newpp, |
368 | unsigned long vpn, | 370 | unsigned long vpn, |
369 | int psize, int ssize, int local) | 371 | int psize, int apsize, |
372 | int ssize, int local) | ||
370 | { | 373 | { |
371 | unsigned long lpar_rc; | 374 | unsigned long lpar_rc; |
372 | unsigned long want_v; | 375 | unsigned long want_v; |
@@ -394,7 +397,8 @@ static long beat_lpar_hpte_updatepp_v3(unsigned long slot, | |||
394 | } | 397 | } |
395 | 398 | ||
396 | static void beat_lpar_hpte_invalidate_v3(unsigned long slot, unsigned long vpn, | 399 | static void beat_lpar_hpte_invalidate_v3(unsigned long slot, unsigned long vpn, |
397 | int psize, int ssize, int local) | 400 | int psize, int apsize, |
401 | int ssize, int local) | ||
398 | { | 402 | { |
399 | unsigned long want_v; | 403 | unsigned long want_v; |
400 | unsigned long lpar_rc; | 404 | unsigned long lpar_rc; |
diff --git a/arch/powerpc/platforms/cell/smp.c b/arch/powerpc/platforms/cell/smp.c index d35dbbc8ec79..f75f6fcac729 100644 --- a/arch/powerpc/platforms/cell/smp.c +++ b/arch/powerpc/platforms/cell/smp.c | |||
@@ -142,7 +142,7 @@ static int smp_cell_cpu_bootable(unsigned int nr) | |||
142 | * during boot if the user requests it. Odd-numbered | 142 | * during boot if the user requests it. Odd-numbered |
143 | * cpus are assumed to be secondary threads. | 143 | * cpus are assumed to be secondary threads. |
144 | */ | 144 | */ |
145 | if (system_state < SYSTEM_RUNNING && | 145 | if (system_state == SYSTEM_BOOTING && |
146 | cpu_has_feature(CPU_FTR_SMT) && | 146 | cpu_has_feature(CPU_FTR_SMT) && |
147 | !smt_enabled_at_boot && cpu_thread_in_core(nr) != 0) | 147 | !smt_enabled_at_boot && cpu_thread_in_core(nr) != 0) |
148 | return 0; | 148 | return 0; |
diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c index bdb738a69e41..49c9f9501c21 100644 --- a/arch/powerpc/platforms/powermac/smp.c +++ b/arch/powerpc/platforms/powermac/smp.c | |||
@@ -885,7 +885,7 @@ static int smp_core99_cpu_notify(struct notifier_block *self, | |||
885 | return NOTIFY_OK; | 885 | return NOTIFY_OK; |
886 | } | 886 | } |
887 | 887 | ||
888 | static struct notifier_block __cpuinitdata smp_core99_cpu_nb = { | 888 | static struct notifier_block smp_core99_cpu_nb = { |
889 | .notifier_call = smp_core99_cpu_notify, | 889 | .notifier_call = smp_core99_cpu_notify, |
890 | }; | 890 | }; |
891 | #endif /* CONFIG_HOTPLUG_CPU */ | 891 | #endif /* CONFIG_HOTPLUG_CPU */ |
diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile index bcc3cb48a44e..7fe595152478 100644 --- a/arch/powerpc/platforms/powernv/Makefile +++ b/arch/powerpc/platforms/powernv/Makefile | |||
@@ -3,3 +3,4 @@ obj-y += opal-rtc.o opal-nvram.o | |||
3 | 3 | ||
4 | obj-$(CONFIG_SMP) += smp.o | 4 | obj-$(CONFIG_SMP) += smp.o |
5 | obj-$(CONFIG_PCI) += pci.o pci-p5ioc2.o pci-ioda.o | 5 | obj-$(CONFIG_PCI) += pci.o pci-p5ioc2.o pci-ioda.o |
6 | obj-$(CONFIG_EEH) += eeh-ioda.o eeh-powernv.o | ||
diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c new file mode 100644 index 000000000000..0cd1c4a71755 --- /dev/null +++ b/arch/powerpc/platforms/powernv/eeh-ioda.c | |||
@@ -0,0 +1,916 @@ | |||
1 | /* | ||
2 | * The file intends to implement the functions needed by EEH, which is | ||
3 | * built on IODA compliant chip. Actually, lots of functions related | ||
4 | * to EEH would be built based on the OPAL APIs. | ||
5 | * | ||
6 | * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2013. | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify | ||
9 | * it under the terms of the GNU General Public License as published by | ||
10 | * the Free Software Foundation; either version 2 of the License, or | ||
11 | * (at your option) any later version. | ||
12 | */ | ||
13 | |||
14 | #include <linux/bootmem.h> | ||
15 | #include <linux/debugfs.h> | ||
16 | #include <linux/delay.h> | ||
17 | #include <linux/init.h> | ||
18 | #include <linux/io.h> | ||
19 | #include <linux/irq.h> | ||
20 | #include <linux/kernel.h> | ||
21 | #include <linux/msi.h> | ||
22 | #include <linux/notifier.h> | ||
23 | #include <linux/pci.h> | ||
24 | #include <linux/string.h> | ||
25 | |||
26 | #include <asm/eeh.h> | ||
27 | #include <asm/eeh_event.h> | ||
28 | #include <asm/io.h> | ||
29 | #include <asm/iommu.h> | ||
30 | #include <asm/msi_bitmap.h> | ||
31 | #include <asm/opal.h> | ||
32 | #include <asm/pci-bridge.h> | ||
33 | #include <asm/ppc-pci.h> | ||
34 | #include <asm/tce.h> | ||
35 | |||
36 | #include "powernv.h" | ||
37 | #include "pci.h" | ||
38 | |||
39 | /* Debugging option */ | ||
40 | #ifdef IODA_EEH_DBG_ON | ||
41 | #define IODA_EEH_DBG(args...) pr_info(args) | ||
42 | #else | ||
43 | #define IODA_EEH_DBG(args...) | ||
44 | #endif | ||
45 | |||
46 | static char *hub_diag = NULL; | ||
47 | static int ioda_eeh_nb_init = 0; | ||
48 | |||
49 | static int ioda_eeh_event(struct notifier_block *nb, | ||
50 | unsigned long events, void *change) | ||
51 | { | ||
52 | uint64_t changed_evts = (uint64_t)change; | ||
53 | |||
54 | /* We simply send special EEH event */ | ||
55 | if ((changed_evts & OPAL_EVENT_PCI_ERROR) && | ||
56 | (events & OPAL_EVENT_PCI_ERROR)) | ||
57 | eeh_send_failure_event(NULL); | ||
58 | |||
59 | return 0; | ||
60 | } | ||
61 | |||
62 | static struct notifier_block ioda_eeh_nb = { | ||
63 | .notifier_call = ioda_eeh_event, | ||
64 | .next = NULL, | ||
65 | .priority = 0 | ||
66 | }; | ||
67 | |||
68 | #ifdef CONFIG_DEBUG_FS | ||
69 | static int ioda_eeh_dbgfs_set(void *data, u64 val) | ||
70 | { | ||
71 | struct pci_controller *hose = data; | ||
72 | struct pnv_phb *phb = hose->private_data; | ||
73 | |||
74 | out_be64(phb->regs + 0xD10, val); | ||
75 | return 0; | ||
76 | } | ||
77 | |||
78 | static int ioda_eeh_dbgfs_get(void *data, u64 *val) | ||
79 | { | ||
80 | struct pci_controller *hose = data; | ||
81 | struct pnv_phb *phb = hose->private_data; | ||
82 | |||
83 | *val = in_be64(phb->regs + 0xD10); | ||
84 | return 0; | ||
85 | } | ||
86 | |||
87 | DEFINE_SIMPLE_ATTRIBUTE(ioda_eeh_dbgfs_ops, ioda_eeh_dbgfs_get, | ||
88 | ioda_eeh_dbgfs_set, "0x%llx\n"); | ||
89 | #endif /* CONFIG_DEBUG_FS */ | ||
90 | |||
91 | /** | ||
92 | * ioda_eeh_post_init - Chip dependent post initialization | ||
93 | * @hose: PCI controller | ||
94 | * | ||
95 | * The function will be called after eeh PEs and devices | ||
96 | * have been built. That means the EEH is ready to supply | ||
97 | * service with I/O cache. | ||
98 | */ | ||
99 | static int ioda_eeh_post_init(struct pci_controller *hose) | ||
100 | { | ||
101 | struct pnv_phb *phb = hose->private_data; | ||
102 | int ret; | ||
103 | |||
104 | /* Register OPAL event notifier */ | ||
105 | if (!ioda_eeh_nb_init) { | ||
106 | ret = opal_notifier_register(&ioda_eeh_nb); | ||
107 | if (ret) { | ||
108 | pr_err("%s: Can't register OPAL event notifier (%d)\n", | ||
109 | __func__, ret); | ||
110 | return ret; | ||
111 | } | ||
112 | |||
113 | ioda_eeh_nb_init = 1; | ||
114 | } | ||
115 | |||
116 | /* FIXME: Enable it for PHB3 later */ | ||
117 | if (phb->type == PNV_PHB_IODA1) { | ||
118 | if (!hub_diag) { | ||
119 | hub_diag = (char *)__get_free_page(GFP_KERNEL | | ||
120 | __GFP_ZERO); | ||
121 | if (!hub_diag) { | ||
122 | pr_err("%s: Out of memory !\n", | ||
123 | __func__); | ||
124 | return -ENOMEM; | ||
125 | } | ||
126 | } | ||
127 | |||
128 | #ifdef CONFIG_DEBUG_FS | ||
129 | if (phb->dbgfs) | ||
130 | debugfs_create_file("err_injct", 0600, | ||
131 | phb->dbgfs, hose, | ||
132 | &ioda_eeh_dbgfs_ops); | ||
133 | #endif | ||
134 | |||
135 | phb->eeh_state |= PNV_EEH_STATE_ENABLED; | ||
136 | } | ||
137 | |||
138 | return 0; | ||
139 | } | ||
140 | |||
141 | /** | ||
142 | * ioda_eeh_set_option - Set EEH operation or I/O setting | ||
143 | * @pe: EEH PE | ||
144 | * @option: options | ||
145 | * | ||
146 | * Enable or disable EEH option for the indicated PE. The | ||
147 | * function also can be used to enable I/O or DMA for the | ||
148 | * PE. | ||
149 | */ | ||
150 | static int ioda_eeh_set_option(struct eeh_pe *pe, int option) | ||
151 | { | ||
152 | s64 ret; | ||
153 | u32 pe_no; | ||
154 | struct pci_controller *hose = pe->phb; | ||
155 | struct pnv_phb *phb = hose->private_data; | ||
156 | |||
157 | /* Check on PE number */ | ||
158 | if (pe->addr < 0 || pe->addr >= phb->ioda.total_pe) { | ||
159 | pr_err("%s: PE address %x out of range [0, %x] " | ||
160 | "on PHB#%x\n", | ||
161 | __func__, pe->addr, phb->ioda.total_pe, | ||
162 | hose->global_number); | ||
163 | return -EINVAL; | ||
164 | } | ||
165 | |||
166 | pe_no = pe->addr; | ||
167 | switch (option) { | ||
168 | case EEH_OPT_DISABLE: | ||
169 | ret = -EEXIST; | ||
170 | break; | ||
171 | case EEH_OPT_ENABLE: | ||
172 | ret = 0; | ||
173 | break; | ||
174 | case EEH_OPT_THAW_MMIO: | ||
175 | ret = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no, | ||
176 | OPAL_EEH_ACTION_CLEAR_FREEZE_MMIO); | ||
177 | if (ret) { | ||
178 | pr_warning("%s: Failed to enable MMIO for " | ||
179 | "PHB#%x-PE#%x, err=%lld\n", | ||
180 | __func__, hose->global_number, pe_no, ret); | ||
181 | return -EIO; | ||
182 | } | ||
183 | |||
184 | break; | ||
185 | case EEH_OPT_THAW_DMA: | ||
186 | ret = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no, | ||
187 | OPAL_EEH_ACTION_CLEAR_FREEZE_DMA); | ||
188 | if (ret) { | ||
189 | pr_warning("%s: Failed to enable DMA for " | ||
190 | "PHB#%x-PE#%x, err=%lld\n", | ||
191 | __func__, hose->global_number, pe_no, ret); | ||
192 | return -EIO; | ||
193 | } | ||
194 | |||
195 | break; | ||
196 | default: | ||
197 | pr_warning("%s: Invalid option %d\n", __func__, option); | ||
198 | return -EINVAL; | ||
199 | } | ||
200 | |||
201 | return ret; | ||
202 | } | ||
203 | |||
204 | /** | ||
205 | * ioda_eeh_get_state - Retrieve the state of PE | ||
206 | * @pe: EEH PE | ||
207 | * | ||
208 | * The PE's state should be retrieved from the PEEV, PEST | ||
209 | * IODA tables. Since the OPAL has exported the function | ||
210 | * to do it, it'd better to use that. | ||
211 | */ | ||
212 | static int ioda_eeh_get_state(struct eeh_pe *pe) | ||
213 | { | ||
214 | s64 ret = 0; | ||
215 | u8 fstate; | ||
216 | u16 pcierr; | ||
217 | u32 pe_no; | ||
218 | int result; | ||
219 | struct pci_controller *hose = pe->phb; | ||
220 | struct pnv_phb *phb = hose->private_data; | ||
221 | |||
222 | /* | ||
223 | * Sanity check on PE address. The PHB PE address should | ||
224 | * be zero. | ||
225 | */ | ||
226 | if (pe->addr < 0 || pe->addr >= phb->ioda.total_pe) { | ||
227 | pr_err("%s: PE address %x out of range [0, %x] " | ||
228 | "on PHB#%x\n", | ||
229 | __func__, pe->addr, phb->ioda.total_pe, | ||
230 | hose->global_number); | ||
231 | return EEH_STATE_NOT_SUPPORT; | ||
232 | } | ||
233 | |||
234 | /* Retrieve PE status through OPAL */ | ||
235 | pe_no = pe->addr; | ||
236 | ret = opal_pci_eeh_freeze_status(phb->opal_id, pe_no, | ||
237 | &fstate, &pcierr, NULL); | ||
238 | if (ret) { | ||
239 | pr_err("%s: Failed to get EEH status on " | ||
240 | "PHB#%x-PE#%x\n, err=%lld\n", | ||
241 | __func__, hose->global_number, pe_no, ret); | ||
242 | return EEH_STATE_NOT_SUPPORT; | ||
243 | } | ||
244 | |||
245 | /* Check PHB status */ | ||
246 | if (pe->type & EEH_PE_PHB) { | ||
247 | result = 0; | ||
248 | result &= ~EEH_STATE_RESET_ACTIVE; | ||
249 | |||
250 | if (pcierr != OPAL_EEH_PHB_ERROR) { | ||
251 | result |= EEH_STATE_MMIO_ACTIVE; | ||
252 | result |= EEH_STATE_DMA_ACTIVE; | ||
253 | result |= EEH_STATE_MMIO_ENABLED; | ||
254 | result |= EEH_STATE_DMA_ENABLED; | ||
255 | } | ||
256 | |||
257 | return result; | ||
258 | } | ||
259 | |||
260 | /* Parse result out */ | ||
261 | result = 0; | ||
262 | switch (fstate) { | ||
263 | case OPAL_EEH_STOPPED_NOT_FROZEN: | ||
264 | result &= ~EEH_STATE_RESET_ACTIVE; | ||
265 | result |= EEH_STATE_MMIO_ACTIVE; | ||
266 | result |= EEH_STATE_DMA_ACTIVE; | ||
267 | result |= EEH_STATE_MMIO_ENABLED; | ||
268 | result |= EEH_STATE_DMA_ENABLED; | ||
269 | break; | ||
270 | case OPAL_EEH_STOPPED_MMIO_FREEZE: | ||
271 | result &= ~EEH_STATE_RESET_ACTIVE; | ||
272 | result |= EEH_STATE_DMA_ACTIVE; | ||
273 | result |= EEH_STATE_DMA_ENABLED; | ||
274 | break; | ||
275 | case OPAL_EEH_STOPPED_DMA_FREEZE: | ||
276 | result &= ~EEH_STATE_RESET_ACTIVE; | ||
277 | result |= EEH_STATE_MMIO_ACTIVE; | ||
278 | result |= EEH_STATE_MMIO_ENABLED; | ||
279 | break; | ||
280 | case OPAL_EEH_STOPPED_MMIO_DMA_FREEZE: | ||
281 | result &= ~EEH_STATE_RESET_ACTIVE; | ||
282 | break; | ||
283 | case OPAL_EEH_STOPPED_RESET: | ||
284 | result |= EEH_STATE_RESET_ACTIVE; | ||
285 | break; | ||
286 | case OPAL_EEH_STOPPED_TEMP_UNAVAIL: | ||
287 | result |= EEH_STATE_UNAVAILABLE; | ||
288 | break; | ||
289 | case OPAL_EEH_STOPPED_PERM_UNAVAIL: | ||
290 | result |= EEH_STATE_NOT_SUPPORT; | ||
291 | break; | ||
292 | default: | ||
293 | pr_warning("%s: Unexpected EEH status 0x%x " | ||
294 | "on PHB#%x-PE#%x\n", | ||
295 | __func__, fstate, hose->global_number, pe_no); | ||
296 | } | ||
297 | |||
298 | return result; | ||
299 | } | ||
300 | |||
301 | static int ioda_eeh_pe_clear(struct eeh_pe *pe) | ||
302 | { | ||
303 | struct pci_controller *hose; | ||
304 | struct pnv_phb *phb; | ||
305 | u32 pe_no; | ||
306 | u8 fstate; | ||
307 | u16 pcierr; | ||
308 | s64 ret; | ||
309 | |||
310 | pe_no = pe->addr; | ||
311 | hose = pe->phb; | ||
312 | phb = pe->phb->private_data; | ||
313 | |||
314 | /* Clear the EEH error on the PE */ | ||
315 | ret = opal_pci_eeh_freeze_clear(phb->opal_id, | ||
316 | pe_no, OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); | ||
317 | if (ret) { | ||
318 | pr_err("%s: Failed to clear EEH error for " | ||
319 | "PHB#%x-PE#%x, err=%lld\n", | ||
320 | __func__, hose->global_number, pe_no, ret); | ||
321 | return -EIO; | ||
322 | } | ||
323 | |||
324 | /* | ||
325 | * Read the PE state back and verify that the frozen | ||
326 | * state has been removed. | ||
327 | */ | ||
328 | ret = opal_pci_eeh_freeze_status(phb->opal_id, pe_no, | ||
329 | &fstate, &pcierr, NULL); | ||
330 | if (ret) { | ||
331 | pr_err("%s: Failed to get EEH status on " | ||
332 | "PHB#%x-PE#%x\n, err=%lld\n", | ||
333 | __func__, hose->global_number, pe_no, ret); | ||
334 | return -EIO; | ||
335 | } | ||
336 | |||
337 | if (fstate != OPAL_EEH_STOPPED_NOT_FROZEN) { | ||
338 | pr_err("%s: Frozen state not cleared on " | ||
339 | "PHB#%x-PE#%x, sts=%x\n", | ||
340 | __func__, hose->global_number, pe_no, fstate); | ||
341 | return -EIO; | ||
342 | } | ||
343 | |||
344 | return 0; | ||
345 | } | ||
346 | |||
347 | static s64 ioda_eeh_phb_poll(struct pnv_phb *phb) | ||
348 | { | ||
349 | s64 rc = OPAL_HARDWARE; | ||
350 | |||
351 | while (1) { | ||
352 | rc = opal_pci_poll(phb->opal_id); | ||
353 | if (rc <= 0) | ||
354 | break; | ||
355 | |||
356 | msleep(rc); | ||
357 | } | ||
358 | |||
359 | return rc; | ||
360 | } | ||
361 | |||
362 | static int ioda_eeh_phb_reset(struct pci_controller *hose, int option) | ||
363 | { | ||
364 | struct pnv_phb *phb = hose->private_data; | ||
365 | s64 rc = OPAL_HARDWARE; | ||
366 | |||
367 | pr_debug("%s: Reset PHB#%x, option=%d\n", | ||
368 | __func__, hose->global_number, option); | ||
369 | |||
370 | /* Issue PHB complete reset request */ | ||
371 | if (option == EEH_RESET_FUNDAMENTAL || | ||
372 | option == EEH_RESET_HOT) | ||
373 | rc = opal_pci_reset(phb->opal_id, | ||
374 | OPAL_PHB_COMPLETE, | ||
375 | OPAL_ASSERT_RESET); | ||
376 | else if (option == EEH_RESET_DEACTIVATE) | ||
377 | rc = opal_pci_reset(phb->opal_id, | ||
378 | OPAL_PHB_COMPLETE, | ||
379 | OPAL_DEASSERT_RESET); | ||
380 | if (rc < 0) | ||
381 | goto out; | ||
382 | |||
383 | /* | ||
384 | * Poll state of the PHB until the request is done | ||
385 | * successfully. | ||
386 | */ | ||
387 | rc = ioda_eeh_phb_poll(phb); | ||
388 | out: | ||
389 | if (rc != OPAL_SUCCESS) | ||
390 | return -EIO; | ||
391 | |||
392 | return 0; | ||
393 | } | ||
394 | |||
395 | static int ioda_eeh_root_reset(struct pci_controller *hose, int option) | ||
396 | { | ||
397 | struct pnv_phb *phb = hose->private_data; | ||
398 | s64 rc = OPAL_SUCCESS; | ||
399 | |||
400 | pr_debug("%s: Reset PHB#%x, option=%d\n", | ||
401 | __func__, hose->global_number, option); | ||
402 | |||
403 | /* | ||
404 | * During the reset deassert time, we needn't care | ||
405 | * the reset scope because the firmware does nothing | ||
406 | * for fundamental or hot reset during deassert phase. | ||
407 | */ | ||
408 | if (option == EEH_RESET_FUNDAMENTAL) | ||
409 | rc = opal_pci_reset(phb->opal_id, | ||
410 | OPAL_PCI_FUNDAMENTAL_RESET, | ||
411 | OPAL_ASSERT_RESET); | ||
412 | else if (option == EEH_RESET_HOT) | ||
413 | rc = opal_pci_reset(phb->opal_id, | ||
414 | OPAL_PCI_HOT_RESET, | ||
415 | OPAL_ASSERT_RESET); | ||
416 | else if (option == EEH_RESET_DEACTIVATE) | ||
417 | rc = opal_pci_reset(phb->opal_id, | ||
418 | OPAL_PCI_HOT_RESET, | ||
419 | OPAL_DEASSERT_RESET); | ||
420 | if (rc < 0) | ||
421 | goto out; | ||
422 | |||
423 | /* Poll state of the PHB until the request is done */ | ||
424 | rc = ioda_eeh_phb_poll(phb); | ||
425 | out: | ||
426 | if (rc != OPAL_SUCCESS) | ||
427 | return -EIO; | ||
428 | |||
429 | return 0; | ||
430 | } | ||
431 | |||
432 | static int ioda_eeh_bridge_reset(struct pci_controller *hose, | ||
433 | struct pci_dev *dev, int option) | ||
434 | { | ||
435 | u16 ctrl; | ||
436 | |||
437 | pr_debug("%s: Reset device %04x:%02x:%02x.%01x with option %d\n", | ||
438 | __func__, hose->global_number, dev->bus->number, | ||
439 | PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn), option); | ||
440 | |||
441 | switch (option) { | ||
442 | case EEH_RESET_FUNDAMENTAL: | ||
443 | case EEH_RESET_HOT: | ||
444 | pci_read_config_word(dev, PCI_BRIDGE_CONTROL, &ctrl); | ||
445 | ctrl |= PCI_BRIDGE_CTL_BUS_RESET; | ||
446 | pci_write_config_word(dev, PCI_BRIDGE_CONTROL, ctrl); | ||
447 | break; | ||
448 | case EEH_RESET_DEACTIVATE: | ||
449 | pci_read_config_word(dev, PCI_BRIDGE_CONTROL, &ctrl); | ||
450 | ctrl &= ~PCI_BRIDGE_CTL_BUS_RESET; | ||
451 | pci_write_config_word(dev, PCI_BRIDGE_CONTROL, ctrl); | ||
452 | break; | ||
453 | } | ||
454 | |||
455 | return 0; | ||
456 | } | ||
457 | |||
458 | /** | ||
459 | * ioda_eeh_reset - Reset the indicated PE | ||
460 | * @pe: EEH PE | ||
461 | * @option: reset option | ||
462 | * | ||
463 | * Do reset on the indicated PE. For PCI bus sensitive PE, | ||
464 | * we need to reset the parent p2p bridge. The PHB has to | ||
465 | * be reinitialized if the p2p bridge is root bridge. For | ||
466 | * PCI device sensitive PE, we will try to reset the device | ||
467 | * through FLR. For now, we don't have OPAL APIs to do HARD | ||
468 | * reset yet, so all reset would be SOFT (HOT) reset. | ||
469 | */ | ||
470 | static int ioda_eeh_reset(struct eeh_pe *pe, int option) | ||
471 | { | ||
472 | struct pci_controller *hose = pe->phb; | ||
473 | struct eeh_dev *edev; | ||
474 | struct pci_dev *dev; | ||
475 | int ret; | ||
476 | |||
477 | /* | ||
478 | * Anyway, we have to clear the problematic state for the | ||
479 | * corresponding PE. However, we needn't do it if the PE | ||
480 | * is PHB associated. That means the PHB is having fatal | ||
481 | * errors and it needs reset. Further more, the AIB interface | ||
482 | * isn't reliable any more. | ||
483 | */ | ||
484 | if (!(pe->type & EEH_PE_PHB) && | ||
485 | (option == EEH_RESET_HOT || | ||
486 | option == EEH_RESET_FUNDAMENTAL)) { | ||
487 | ret = ioda_eeh_pe_clear(pe); | ||
488 | if (ret) | ||
489 | return -EIO; | ||
490 | } | ||
491 | |||
492 | /* | ||
493 | * The rules applied to reset, either fundamental or hot reset: | ||
494 | * | ||
495 | * We always reset the direct upstream bridge of the PE. If the | ||
496 | * direct upstream bridge isn't root bridge, we always take hot | ||
497 | * reset no matter what option (fundamental or hot) is. Otherwise, | ||
498 | * we should do the reset according to the required option. | ||
499 | */ | ||
500 | if (pe->type & EEH_PE_PHB) { | ||
501 | ret = ioda_eeh_phb_reset(hose, option); | ||
502 | } else { | ||
503 | if (pe->type & EEH_PE_DEVICE) { | ||
504 | /* | ||
505 | * If it's device PE, we didn't refer to the parent | ||
506 | * PCI bus yet. So we have to figure it out indirectly. | ||
507 | */ | ||
508 | edev = list_first_entry(&pe->edevs, | ||
509 | struct eeh_dev, list); | ||
510 | dev = eeh_dev_to_pci_dev(edev); | ||
511 | dev = dev->bus->self; | ||
512 | } else { | ||
513 | /* | ||
514 | * If it's bus PE, the parent PCI bus is already there | ||
515 | * and just pick it up. | ||
516 | */ | ||
517 | dev = pe->bus->self; | ||
518 | } | ||
519 | |||
520 | /* | ||
521 | * Do reset based on the fact that the direct upstream bridge | ||
522 | * is root bridge (port) or not. | ||
523 | */ | ||
524 | if (dev->bus->number == 0) | ||
525 | ret = ioda_eeh_root_reset(hose, option); | ||
526 | else | ||
527 | ret = ioda_eeh_bridge_reset(hose, dev, option); | ||
528 | } | ||
529 | |||
530 | return ret; | ||
531 | } | ||
532 | |||
533 | /** | ||
534 | * ioda_eeh_get_log - Retrieve error log | ||
535 | * @pe: EEH PE | ||
536 | * @severity: Severity level of the log | ||
537 | * @drv_log: buffer to store the log | ||
538 | * @len: space of the log buffer | ||
539 | * | ||
540 | * The function is used to retrieve error log from P7IOC. | ||
541 | */ | ||
542 | static int ioda_eeh_get_log(struct eeh_pe *pe, int severity, | ||
543 | char *drv_log, unsigned long len) | ||
544 | { | ||
545 | s64 ret; | ||
546 | unsigned long flags; | ||
547 | struct pci_controller *hose = pe->phb; | ||
548 | struct pnv_phb *phb = hose->private_data; | ||
549 | |||
550 | spin_lock_irqsave(&phb->lock, flags); | ||
551 | |||
552 | ret = opal_pci_get_phb_diag_data2(phb->opal_id, | ||
553 | phb->diag.blob, PNV_PCI_DIAG_BUF_SIZE); | ||
554 | if (ret) { | ||
555 | spin_unlock_irqrestore(&phb->lock, flags); | ||
556 | pr_warning("%s: Failed to get log for PHB#%x-PE#%x\n", | ||
557 | __func__, hose->global_number, pe->addr); | ||
558 | return -EIO; | ||
559 | } | ||
560 | |||
561 | /* | ||
562 | * FIXME: We probably need log the error in somewhere. | ||
563 | * Lets make it up in future. | ||
564 | */ | ||
565 | /* pr_info("%s", phb->diag.blob); */ | ||
566 | |||
567 | spin_unlock_irqrestore(&phb->lock, flags); | ||
568 | |||
569 | return 0; | ||
570 | } | ||
571 | |||
572 | /** | ||
573 | * ioda_eeh_configure_bridge - Configure the PCI bridges for the indicated PE | ||
574 | * @pe: EEH PE | ||
575 | * | ||
576 | * For particular PE, it might have included PCI bridges. In order | ||
577 | * to make the PE work properly, those PCI bridges should be configured | ||
578 | * correctly. However, we need do nothing on P7IOC since the reset | ||
579 | * function will do everything that should be covered by the function. | ||
580 | */ | ||
581 | static int ioda_eeh_configure_bridge(struct eeh_pe *pe) | ||
582 | { | ||
583 | return 0; | ||
584 | } | ||
585 | |||
586 | static void ioda_eeh_hub_diag_common(struct OpalIoP7IOCErrorData *data) | ||
587 | { | ||
588 | /* GEM */ | ||
589 | pr_info(" GEM XFIR: %016llx\n", data->gemXfir); | ||
590 | pr_info(" GEM RFIR: %016llx\n", data->gemRfir); | ||
591 | pr_info(" GEM RIRQFIR: %016llx\n", data->gemRirqfir); | ||
592 | pr_info(" GEM Mask: %016llx\n", data->gemMask); | ||
593 | pr_info(" GEM RWOF: %016llx\n", data->gemRwof); | ||
594 | |||
595 | /* LEM */ | ||
596 | pr_info(" LEM FIR: %016llx\n", data->lemFir); | ||
597 | pr_info(" LEM Error Mask: %016llx\n", data->lemErrMask); | ||
598 | pr_info(" LEM Action 0: %016llx\n", data->lemAction0); | ||
599 | pr_info(" LEM Action 1: %016llx\n", data->lemAction1); | ||
600 | pr_info(" LEM WOF: %016llx\n", data->lemWof); | ||
601 | } | ||
602 | |||
603 | static void ioda_eeh_hub_diag(struct pci_controller *hose) | ||
604 | { | ||
605 | struct pnv_phb *phb = hose->private_data; | ||
606 | struct OpalIoP7IOCErrorData *data; | ||
607 | long rc; | ||
608 | |||
609 | data = (struct OpalIoP7IOCErrorData *)ioda_eeh_hub_diag; | ||
610 | rc = opal_pci_get_hub_diag_data(phb->hub_id, data, PAGE_SIZE); | ||
611 | if (rc != OPAL_SUCCESS) { | ||
612 | pr_warning("%s: Failed to get HUB#%llx diag-data (%ld)\n", | ||
613 | __func__, phb->hub_id, rc); | ||
614 | return; | ||
615 | } | ||
616 | |||
617 | switch (data->type) { | ||
618 | case OPAL_P7IOC_DIAG_TYPE_RGC: | ||
619 | pr_info("P7IOC diag-data for RGC\n\n"); | ||
620 | ioda_eeh_hub_diag_common(data); | ||
621 | pr_info(" RGC Status: %016llx\n", data->rgc.rgcStatus); | ||
622 | pr_info(" RGC LDCP: %016llx\n", data->rgc.rgcLdcp); | ||
623 | break; | ||
624 | case OPAL_P7IOC_DIAG_TYPE_BI: | ||
625 | pr_info("P7IOC diag-data for BI %s\n\n", | ||
626 | data->bi.biDownbound ? "Downbound" : "Upbound"); | ||
627 | ioda_eeh_hub_diag_common(data); | ||
628 | pr_info(" BI LDCP 0: %016llx\n", data->bi.biLdcp0); | ||
629 | pr_info(" BI LDCP 1: %016llx\n", data->bi.biLdcp1); | ||
630 | pr_info(" BI LDCP 2: %016llx\n", data->bi.biLdcp2); | ||
631 | pr_info(" BI Fence Status: %016llx\n", data->bi.biFenceStatus); | ||
632 | break; | ||
633 | case OPAL_P7IOC_DIAG_TYPE_CI: | ||
634 | pr_info("P7IOC diag-data for CI Port %d\\nn", | ||
635 | data->ci.ciPort); | ||
636 | ioda_eeh_hub_diag_common(data); | ||
637 | pr_info(" CI Port Status: %016llx\n", data->ci.ciPortStatus); | ||
638 | pr_info(" CI Port LDCP: %016llx\n", data->ci.ciPortLdcp); | ||
639 | break; | ||
640 | case OPAL_P7IOC_DIAG_TYPE_MISC: | ||
641 | pr_info("P7IOC diag-data for MISC\n\n"); | ||
642 | ioda_eeh_hub_diag_common(data); | ||
643 | break; | ||
644 | case OPAL_P7IOC_DIAG_TYPE_I2C: | ||
645 | pr_info("P7IOC diag-data for I2C\n\n"); | ||
646 | ioda_eeh_hub_diag_common(data); | ||
647 | break; | ||
648 | default: | ||
649 | pr_warning("%s: Invalid type of HUB#%llx diag-data (%d)\n", | ||
650 | __func__, phb->hub_id, data->type); | ||
651 | } | ||
652 | } | ||
653 | |||
654 | static void ioda_eeh_p7ioc_phb_diag(struct pci_controller *hose, | ||
655 | struct OpalIoPhbErrorCommon *common) | ||
656 | { | ||
657 | struct OpalIoP7IOCPhbErrorData *data; | ||
658 | int i; | ||
659 | |||
660 | data = (struct OpalIoP7IOCPhbErrorData *)common; | ||
661 | |||
662 | pr_info("P7IOC PHB#%x Diag-data (Version: %d)\n\n", | ||
663 | hose->global_number, common->version); | ||
664 | |||
665 | pr_info(" brdgCtl: %08x\n", data->brdgCtl); | ||
666 | |||
667 | pr_info(" portStatusReg: %08x\n", data->portStatusReg); | ||
668 | pr_info(" rootCmplxStatus: %08x\n", data->rootCmplxStatus); | ||
669 | pr_info(" busAgentStatus: %08x\n", data->busAgentStatus); | ||
670 | |||
671 | pr_info(" deviceStatus: %08x\n", data->deviceStatus); | ||
672 | pr_info(" slotStatus: %08x\n", data->slotStatus); | ||
673 | pr_info(" linkStatus: %08x\n", data->linkStatus); | ||
674 | pr_info(" devCmdStatus: %08x\n", data->devCmdStatus); | ||
675 | pr_info(" devSecStatus: %08x\n", data->devSecStatus); | ||
676 | |||
677 | pr_info(" rootErrorStatus: %08x\n", data->rootErrorStatus); | ||
678 | pr_info(" uncorrErrorStatus: %08x\n", data->uncorrErrorStatus); | ||
679 | pr_info(" corrErrorStatus: %08x\n", data->corrErrorStatus); | ||
680 | pr_info(" tlpHdr1: %08x\n", data->tlpHdr1); | ||
681 | pr_info(" tlpHdr2: %08x\n", data->tlpHdr2); | ||
682 | pr_info(" tlpHdr3: %08x\n", data->tlpHdr3); | ||
683 | pr_info(" tlpHdr4: %08x\n", data->tlpHdr4); | ||
684 | pr_info(" sourceId: %08x\n", data->sourceId); | ||
685 | |||
686 | pr_info(" errorClass: %016llx\n", data->errorClass); | ||
687 | pr_info(" correlator: %016llx\n", data->correlator); | ||
688 | pr_info(" p7iocPlssr: %016llx\n", data->p7iocPlssr); | ||
689 | pr_info(" p7iocCsr: %016llx\n", data->p7iocCsr); | ||
690 | pr_info(" lemFir: %016llx\n", data->lemFir); | ||
691 | pr_info(" lemErrorMask: %016llx\n", data->lemErrorMask); | ||
692 | pr_info(" lemWOF: %016llx\n", data->lemWOF); | ||
693 | pr_info(" phbErrorStatus: %016llx\n", data->phbErrorStatus); | ||
694 | pr_info(" phbFirstErrorStatus: %016llx\n", data->phbFirstErrorStatus); | ||
695 | pr_info(" phbErrorLog0: %016llx\n", data->phbErrorLog0); | ||
696 | pr_info(" phbErrorLog1: %016llx\n", data->phbErrorLog1); | ||
697 | pr_info(" mmioErrorStatus: %016llx\n", data->mmioErrorStatus); | ||
698 | pr_info(" mmioFirstErrorStatus: %016llx\n", data->mmioFirstErrorStatus); | ||
699 | pr_info(" mmioErrorLog0: %016llx\n", data->mmioErrorLog0); | ||
700 | pr_info(" mmioErrorLog1: %016llx\n", data->mmioErrorLog1); | ||
701 | pr_info(" dma0ErrorStatus: %016llx\n", data->dma0ErrorStatus); | ||
702 | pr_info(" dma0FirstErrorStatus: %016llx\n", data->dma0FirstErrorStatus); | ||
703 | pr_info(" dma0ErrorLog0: %016llx\n", data->dma0ErrorLog0); | ||
704 | pr_info(" dma0ErrorLog1: %016llx\n", data->dma0ErrorLog1); | ||
705 | pr_info(" dma1ErrorStatus: %016llx\n", data->dma1ErrorStatus); | ||
706 | pr_info(" dma1FirstErrorStatus: %016llx\n", data->dma1FirstErrorStatus); | ||
707 | pr_info(" dma1ErrorLog0: %016llx\n", data->dma1ErrorLog0); | ||
708 | pr_info(" dma1ErrorLog1: %016llx\n", data->dma1ErrorLog1); | ||
709 | |||
710 | for (i = 0; i < OPAL_P7IOC_NUM_PEST_REGS; i++) { | ||
711 | if ((data->pestA[i] >> 63) == 0 && | ||
712 | (data->pestB[i] >> 63) == 0) | ||
713 | continue; | ||
714 | |||
715 | pr_info(" PE[%3d] PESTA: %016llx\n", i, data->pestA[i]); | ||
716 | pr_info(" PESTB: %016llx\n", data->pestB[i]); | ||
717 | } | ||
718 | } | ||
719 | |||
720 | static void ioda_eeh_phb_diag(struct pci_controller *hose) | ||
721 | { | ||
722 | struct pnv_phb *phb = hose->private_data; | ||
723 | struct OpalIoPhbErrorCommon *common; | ||
724 | long rc; | ||
725 | |||
726 | common = (struct OpalIoPhbErrorCommon *)phb->diag.blob; | ||
727 | rc = opal_pci_get_phb_diag_data2(phb->opal_id, common, PAGE_SIZE); | ||
728 | if (rc != OPAL_SUCCESS) { | ||
729 | pr_warning("%s: Failed to get diag-data for PHB#%x (%ld)\n", | ||
730 | __func__, hose->global_number, rc); | ||
731 | return; | ||
732 | } | ||
733 | |||
734 | switch (common->ioType) { | ||
735 | case OPAL_PHB_ERROR_DATA_TYPE_P7IOC: | ||
736 | ioda_eeh_p7ioc_phb_diag(hose, common); | ||
737 | break; | ||
738 | default: | ||
739 | pr_warning("%s: Unrecognized I/O chip %d\n", | ||
740 | __func__, common->ioType); | ||
741 | } | ||
742 | } | ||
743 | |||
744 | static int ioda_eeh_get_phb_pe(struct pci_controller *hose, | ||
745 | struct eeh_pe **pe) | ||
746 | { | ||
747 | struct eeh_pe *phb_pe; | ||
748 | |||
749 | phb_pe = eeh_phb_pe_get(hose); | ||
750 | if (!phb_pe) { | ||
751 | pr_warning("%s Can't find PE for PHB#%d\n", | ||
752 | __func__, hose->global_number); | ||
753 | return -EEXIST; | ||
754 | } | ||
755 | |||
756 | *pe = phb_pe; | ||
757 | return 0; | ||
758 | } | ||
759 | |||
760 | static int ioda_eeh_get_pe(struct pci_controller *hose, | ||
761 | u16 pe_no, struct eeh_pe **pe) | ||
762 | { | ||
763 | struct eeh_pe *phb_pe, *dev_pe; | ||
764 | struct eeh_dev dev; | ||
765 | |||
766 | /* Find the PHB PE */ | ||
767 | if (ioda_eeh_get_phb_pe(hose, &phb_pe)) | ||
768 | return -EEXIST; | ||
769 | |||
770 | /* Find the PE according to PE# */ | ||
771 | memset(&dev, 0, sizeof(struct eeh_dev)); | ||
772 | dev.phb = hose; | ||
773 | dev.pe_config_addr = pe_no; | ||
774 | dev_pe = eeh_pe_get(&dev); | ||
775 | if (!dev_pe) { | ||
776 | pr_warning("%s: Can't find PE for PHB#%x - PE#%x\n", | ||
777 | __func__, hose->global_number, pe_no); | ||
778 | return -EEXIST; | ||
779 | } | ||
780 | |||
781 | *pe = dev_pe; | ||
782 | return 0; | ||
783 | } | ||
784 | |||
785 | /** | ||
786 | * ioda_eeh_next_error - Retrieve next error for EEH core to handle | ||
787 | * @pe: The affected PE | ||
788 | * | ||
789 | * The function is expected to be called by EEH core while it gets | ||
790 | * special EEH event (without binding PE). The function calls to | ||
791 | * OPAL APIs for next error to handle. The informational error is | ||
792 | * handled internally by platform. However, the dead IOC, dead PHB, | ||
793 | * fenced PHB and frozen PE should be handled by EEH core eventually. | ||
794 | */ | ||
795 | static int ioda_eeh_next_error(struct eeh_pe **pe) | ||
796 | { | ||
797 | struct pci_controller *hose, *tmp; | ||
798 | struct pnv_phb *phb; | ||
799 | u64 frozen_pe_no; | ||
800 | u16 err_type, severity; | ||
801 | long rc; | ||
802 | int ret = 1; | ||
803 | |||
804 | /* | ||
805 | * While running here, it's safe to purge the event queue. | ||
806 | * And we should keep the cached OPAL notifier event sychronized | ||
807 | * between the kernel and firmware. | ||
808 | */ | ||
809 | eeh_remove_event(NULL); | ||
810 | opal_notifier_update_evt(OPAL_EVENT_PCI_ERROR, 0x0ul); | ||
811 | |||
812 | list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { | ||
813 | /* | ||
814 | * If the subordinate PCI buses of the PHB has been | ||
815 | * removed, we needn't take care of it any more. | ||
816 | */ | ||
817 | phb = hose->private_data; | ||
818 | if (phb->eeh_state & PNV_EEH_STATE_REMOVED) | ||
819 | continue; | ||
820 | |||
821 | rc = opal_pci_next_error(phb->opal_id, | ||
822 | &frozen_pe_no, &err_type, &severity); | ||
823 | |||
824 | /* If OPAL API returns error, we needn't proceed */ | ||
825 | if (rc != OPAL_SUCCESS) { | ||
826 | IODA_EEH_DBG("%s: Invalid return value on " | ||
827 | "PHB#%x (0x%lx) from opal_pci_next_error", | ||
828 | __func__, hose->global_number, rc); | ||
829 | continue; | ||
830 | } | ||
831 | |||
832 | /* If the PHB doesn't have error, stop processing */ | ||
833 | if (err_type == OPAL_EEH_NO_ERROR || | ||
834 | severity == OPAL_EEH_SEV_NO_ERROR) { | ||
835 | IODA_EEH_DBG("%s: No error found on PHB#%x\n", | ||
836 | __func__, hose->global_number); | ||
837 | continue; | ||
838 | } | ||
839 | |||
840 | /* | ||
841 | * Processing the error. We're expecting the error with | ||
842 | * highest priority reported upon multiple errors on the | ||
843 | * specific PHB. | ||
844 | */ | ||
845 | IODA_EEH_DBG("%s: Error (%d, %d, %d) on PHB#%x\n", | ||
846 | err_type, severity, pe_no, hose->global_number); | ||
847 | switch (err_type) { | ||
848 | case OPAL_EEH_IOC_ERROR: | ||
849 | if (severity == OPAL_EEH_SEV_IOC_DEAD) { | ||
850 | list_for_each_entry_safe(hose, tmp, | ||
851 | &hose_list, list_node) { | ||
852 | phb = hose->private_data; | ||
853 | phb->eeh_state |= PNV_EEH_STATE_REMOVED; | ||
854 | } | ||
855 | |||
856 | pr_err("EEH: dead IOC detected\n"); | ||
857 | ret = 4; | ||
858 | goto out; | ||
859 | } else if (severity == OPAL_EEH_SEV_INF) { | ||
860 | pr_info("EEH: IOC informative error " | ||
861 | "detected\n"); | ||
862 | ioda_eeh_hub_diag(hose); | ||
863 | } | ||
864 | |||
865 | break; | ||
866 | case OPAL_EEH_PHB_ERROR: | ||
867 | if (severity == OPAL_EEH_SEV_PHB_DEAD) { | ||
868 | if (ioda_eeh_get_phb_pe(hose, pe)) | ||
869 | break; | ||
870 | |||
871 | pr_err("EEH: dead PHB#%x detected\n", | ||
872 | hose->global_number); | ||
873 | phb->eeh_state |= PNV_EEH_STATE_REMOVED; | ||
874 | ret = 3; | ||
875 | goto out; | ||
876 | } else if (severity == OPAL_EEH_SEV_PHB_FENCED) { | ||
877 | if (ioda_eeh_get_phb_pe(hose, pe)) | ||
878 | break; | ||
879 | |||
880 | pr_err("EEH: fenced PHB#%x detected\n", | ||
881 | hose->global_number); | ||
882 | ret = 2; | ||
883 | goto out; | ||
884 | } else if (severity == OPAL_EEH_SEV_INF) { | ||
885 | pr_info("EEH: PHB#%x informative error " | ||
886 | "detected\n", | ||
887 | hose->global_number); | ||
888 | ioda_eeh_phb_diag(hose); | ||
889 | } | ||
890 | |||
891 | break; | ||
892 | case OPAL_EEH_PE_ERROR: | ||
893 | if (ioda_eeh_get_pe(hose, frozen_pe_no, pe)) | ||
894 | break; | ||
895 | |||
896 | pr_err("EEH: Frozen PE#%x on PHB#%x detected\n", | ||
897 | (*pe)->addr, (*pe)->phb->global_number); | ||
898 | ret = 1; | ||
899 | goto out; | ||
900 | } | ||
901 | } | ||
902 | |||
903 | ret = 0; | ||
904 | out: | ||
905 | return ret; | ||
906 | } | ||
907 | |||
908 | struct pnv_eeh_ops ioda_eeh_ops = { | ||
909 | .post_init = ioda_eeh_post_init, | ||
910 | .set_option = ioda_eeh_set_option, | ||
911 | .get_state = ioda_eeh_get_state, | ||
912 | .reset = ioda_eeh_reset, | ||
913 | .get_log = ioda_eeh_get_log, | ||
914 | .configure_bridge = ioda_eeh_configure_bridge, | ||
915 | .next_error = ioda_eeh_next_error | ||
916 | }; | ||
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c new file mode 100644 index 000000000000..969cce73055a --- /dev/null +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c | |||
@@ -0,0 +1,379 @@ | |||
1 | /* | ||
2 | * The file intends to implement the platform dependent EEH operations on | ||
3 | * powernv platform. Actually, the powernv was created in order to fully | ||
4 | * hypervisor support. | ||
5 | * | ||
6 | * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2013. | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify | ||
9 | * it under the terms of the GNU General Public License as published by | ||
10 | * the Free Software Foundation; either version 2 of the License, or | ||
11 | * (at your option) any later version. | ||
12 | */ | ||
13 | |||
14 | #include <linux/atomic.h> | ||
15 | #include <linux/delay.h> | ||
16 | #include <linux/export.h> | ||
17 | #include <linux/init.h> | ||
18 | #include <linux/list.h> | ||
19 | #include <linux/msi.h> | ||
20 | #include <linux/of.h> | ||
21 | #include <linux/pci.h> | ||
22 | #include <linux/proc_fs.h> | ||
23 | #include <linux/rbtree.h> | ||
24 | #include <linux/sched.h> | ||
25 | #include <linux/seq_file.h> | ||
26 | #include <linux/spinlock.h> | ||
27 | |||
28 | #include <asm/eeh.h> | ||
29 | #include <asm/eeh_event.h> | ||
30 | #include <asm/firmware.h> | ||
31 | #include <asm/io.h> | ||
32 | #include <asm/iommu.h> | ||
33 | #include <asm/machdep.h> | ||
34 | #include <asm/msi_bitmap.h> | ||
35 | #include <asm/opal.h> | ||
36 | #include <asm/ppc-pci.h> | ||
37 | |||
38 | #include "powernv.h" | ||
39 | #include "pci.h" | ||
40 | |||
41 | /** | ||
42 | * powernv_eeh_init - EEH platform dependent initialization | ||
43 | * | ||
44 | * EEH platform dependent initialization on powernv | ||
45 | */ | ||
46 | static int powernv_eeh_init(void) | ||
47 | { | ||
48 | /* We require OPALv3 */ | ||
49 | if (!firmware_has_feature(FW_FEATURE_OPALv3)) { | ||
50 | pr_warning("%s: OPALv3 is required !\n", __func__); | ||
51 | return -EINVAL; | ||
52 | } | ||
53 | |||
54 | /* Set EEH probe mode */ | ||
55 | eeh_probe_mode_set(EEH_PROBE_MODE_DEV); | ||
56 | |||
57 | return 0; | ||
58 | } | ||
59 | |||
60 | /** | ||
61 | * powernv_eeh_post_init - EEH platform dependent post initialization | ||
62 | * | ||
63 | * EEH platform dependent post initialization on powernv. When | ||
64 | * the function is called, the EEH PEs and devices should have | ||
65 | * been built. If the I/O cache staff has been built, EEH is | ||
66 | * ready to supply service. | ||
67 | */ | ||
68 | static int powernv_eeh_post_init(void) | ||
69 | { | ||
70 | struct pci_controller *hose; | ||
71 | struct pnv_phb *phb; | ||
72 | int ret = 0; | ||
73 | |||
74 | list_for_each_entry(hose, &hose_list, list_node) { | ||
75 | phb = hose->private_data; | ||
76 | |||
77 | if (phb->eeh_ops && phb->eeh_ops->post_init) { | ||
78 | ret = phb->eeh_ops->post_init(hose); | ||
79 | if (ret) | ||
80 | break; | ||
81 | } | ||
82 | } | ||
83 | |||
84 | return ret; | ||
85 | } | ||
86 | |||
87 | /** | ||
88 | * powernv_eeh_dev_probe - Do probe on PCI device | ||
89 | * @dev: PCI device | ||
90 | * @flag: unused | ||
91 | * | ||
92 | * When EEH module is installed during system boot, all PCI devices | ||
93 | * are checked one by one to see if it supports EEH. The function | ||
94 | * is introduced for the purpose. By default, EEH has been enabled | ||
95 | * on all PCI devices. That's to say, we only need do necessary | ||
96 | * initialization on the corresponding eeh device and create PE | ||
97 | * accordingly. | ||
98 | * | ||
99 | * It's notable that's unsafe to retrieve the EEH device through | ||
100 | * the corresponding PCI device. During the PCI device hotplug, which | ||
101 | * was possiblly triggered by EEH core, the binding between EEH device | ||
102 | * and the PCI device isn't built yet. | ||
103 | */ | ||
104 | static int powernv_eeh_dev_probe(struct pci_dev *dev, void *flag) | ||
105 | { | ||
106 | struct pci_controller *hose = pci_bus_to_host(dev->bus); | ||
107 | struct pnv_phb *phb = hose->private_data; | ||
108 | struct device_node *dn = pci_device_to_OF_node(dev); | ||
109 | struct eeh_dev *edev = of_node_to_eeh_dev(dn); | ||
110 | |||
111 | /* | ||
112 | * When probing the root bridge, which doesn't have any | ||
113 | * subordinate PCI devices. We don't have OF node for | ||
114 | * the root bridge. So it's not reasonable to continue | ||
115 | * the probing. | ||
116 | */ | ||
117 | if (!dn || !edev) | ||
118 | return 0; | ||
119 | |||
120 | /* Skip for PCI-ISA bridge */ | ||
121 | if ((dev->class >> 8) == PCI_CLASS_BRIDGE_ISA) | ||
122 | return 0; | ||
123 | |||
124 | /* Initialize eeh device */ | ||
125 | edev->class_code = dev->class; | ||
126 | edev->mode = 0; | ||
127 | edev->config_addr = ((dev->bus->number << 8) | dev->devfn); | ||
128 | edev->pe_config_addr = phb->bdfn_to_pe(phb, dev->bus, dev->devfn & 0xff); | ||
129 | |||
130 | /* Create PE */ | ||
131 | eeh_add_to_parent_pe(edev); | ||
132 | |||
133 | /* | ||
134 | * Enable EEH explicitly so that we will do EEH check | ||
135 | * while accessing I/O stuff | ||
136 | * | ||
137 | * FIXME: Enable that for PHB3 later | ||
138 | */ | ||
139 | if (phb->type == PNV_PHB_IODA1) | ||
140 | eeh_subsystem_enabled = 1; | ||
141 | |||
142 | /* Save memory bars */ | ||
143 | eeh_save_bars(edev); | ||
144 | |||
145 | return 0; | ||
146 | } | ||
147 | |||
148 | /** | ||
149 | * powernv_eeh_set_option - Initialize EEH or MMIO/DMA reenable | ||
150 | * @pe: EEH PE | ||
151 | * @option: operation to be issued | ||
152 | * | ||
153 | * The function is used to control the EEH functionality globally. | ||
154 | * Currently, following options are support according to PAPR: | ||
155 | * Enable EEH, Disable EEH, Enable MMIO and Enable DMA | ||
156 | */ | ||
157 | static int powernv_eeh_set_option(struct eeh_pe *pe, int option) | ||
158 | { | ||
159 | struct pci_controller *hose = pe->phb; | ||
160 | struct pnv_phb *phb = hose->private_data; | ||
161 | int ret = -EEXIST; | ||
162 | |||
163 | /* | ||
164 | * What we need do is pass it down for hardware | ||
165 | * implementation to handle it. | ||
166 | */ | ||
167 | if (phb->eeh_ops && phb->eeh_ops->set_option) | ||
168 | ret = phb->eeh_ops->set_option(pe, option); | ||
169 | |||
170 | return ret; | ||
171 | } | ||
172 | |||
173 | /** | ||
174 | * powernv_eeh_get_pe_addr - Retrieve PE address | ||
175 | * @pe: EEH PE | ||
176 | * | ||
177 | * Retrieve the PE address according to the given tranditional | ||
178 | * PCI BDF (Bus/Device/Function) address. | ||
179 | */ | ||
180 | static int powernv_eeh_get_pe_addr(struct eeh_pe *pe) | ||
181 | { | ||
182 | return pe->addr; | ||
183 | } | ||
184 | |||
185 | /** | ||
186 | * powernv_eeh_get_state - Retrieve PE state | ||
187 | * @pe: EEH PE | ||
188 | * @delay: delay while PE state is temporarily unavailable | ||
189 | * | ||
190 | * Retrieve the state of the specified PE. For IODA-compitable | ||
191 | * platform, it should be retrieved from IODA table. Therefore, | ||
192 | * we prefer passing down to hardware implementation to handle | ||
193 | * it. | ||
194 | */ | ||
195 | static int powernv_eeh_get_state(struct eeh_pe *pe, int *delay) | ||
196 | { | ||
197 | struct pci_controller *hose = pe->phb; | ||
198 | struct pnv_phb *phb = hose->private_data; | ||
199 | int ret = EEH_STATE_NOT_SUPPORT; | ||
200 | |||
201 | if (phb->eeh_ops && phb->eeh_ops->get_state) { | ||
202 | ret = phb->eeh_ops->get_state(pe); | ||
203 | |||
204 | /* | ||
205 | * If the PE state is temporarily unavailable, | ||
206 | * to inform the EEH core delay for default | ||
207 | * period (1 second) | ||
208 | */ | ||
209 | if (delay) { | ||
210 | *delay = 0; | ||
211 | if (ret & EEH_STATE_UNAVAILABLE) | ||
212 | *delay = 1000; | ||
213 | } | ||
214 | } | ||
215 | |||
216 | return ret; | ||
217 | } | ||
218 | |||
219 | /** | ||
220 | * powernv_eeh_reset - Reset the specified PE | ||
221 | * @pe: EEH PE | ||
222 | * @option: reset option | ||
223 | * | ||
224 | * Reset the specified PE | ||
225 | */ | ||
226 | static int powernv_eeh_reset(struct eeh_pe *pe, int option) | ||
227 | { | ||
228 | struct pci_controller *hose = pe->phb; | ||
229 | struct pnv_phb *phb = hose->private_data; | ||
230 | int ret = -EEXIST; | ||
231 | |||
232 | if (phb->eeh_ops && phb->eeh_ops->reset) | ||
233 | ret = phb->eeh_ops->reset(pe, option); | ||
234 | |||
235 | return ret; | ||
236 | } | ||
237 | |||
238 | /** | ||
239 | * powernv_eeh_wait_state - Wait for PE state | ||
240 | * @pe: EEH PE | ||
241 | * @max_wait: maximal period in microsecond | ||
242 | * | ||
243 | * Wait for the state of associated PE. It might take some time | ||
244 | * to retrieve the PE's state. | ||
245 | */ | ||
246 | static int powernv_eeh_wait_state(struct eeh_pe *pe, int max_wait) | ||
247 | { | ||
248 | int ret; | ||
249 | int mwait; | ||
250 | |||
251 | while (1) { | ||
252 | ret = powernv_eeh_get_state(pe, &mwait); | ||
253 | |||
254 | /* | ||
255 | * If the PE's state is temporarily unavailable, | ||
256 | * we have to wait for the specified time. Otherwise, | ||
257 | * the PE's state will be returned immediately. | ||
258 | */ | ||
259 | if (ret != EEH_STATE_UNAVAILABLE) | ||
260 | return ret; | ||
261 | |||
262 | max_wait -= mwait; | ||
263 | if (max_wait <= 0) { | ||
264 | pr_warning("%s: Timeout getting PE#%x's state (%d)\n", | ||
265 | __func__, pe->addr, max_wait); | ||
266 | return EEH_STATE_NOT_SUPPORT; | ||
267 | } | ||
268 | |||
269 | msleep(mwait); | ||
270 | } | ||
271 | |||
272 | return EEH_STATE_NOT_SUPPORT; | ||
273 | } | ||
274 | |||
275 | /** | ||
276 | * powernv_eeh_get_log - Retrieve error log | ||
277 | * @pe: EEH PE | ||
278 | * @severity: temporary or permanent error log | ||
279 | * @drv_log: driver log to be combined with retrieved error log | ||
280 | * @len: length of driver log | ||
281 | * | ||
282 | * Retrieve the temporary or permanent error from the PE. | ||
283 | */ | ||
284 | static int powernv_eeh_get_log(struct eeh_pe *pe, int severity, | ||
285 | char *drv_log, unsigned long len) | ||
286 | { | ||
287 | struct pci_controller *hose = pe->phb; | ||
288 | struct pnv_phb *phb = hose->private_data; | ||
289 | int ret = -EEXIST; | ||
290 | |||
291 | if (phb->eeh_ops && phb->eeh_ops->get_log) | ||
292 | ret = phb->eeh_ops->get_log(pe, severity, drv_log, len); | ||
293 | |||
294 | return ret; | ||
295 | } | ||
296 | |||
297 | /** | ||
298 | * powernv_eeh_configure_bridge - Configure PCI bridges in the indicated PE | ||
299 | * @pe: EEH PE | ||
300 | * | ||
301 | * The function will be called to reconfigure the bridges included | ||
302 | * in the specified PE so that the mulfunctional PE would be recovered | ||
303 | * again. | ||
304 | */ | ||
305 | static int powernv_eeh_configure_bridge(struct eeh_pe *pe) | ||
306 | { | ||
307 | struct pci_controller *hose = pe->phb; | ||
308 | struct pnv_phb *phb = hose->private_data; | ||
309 | int ret = 0; | ||
310 | |||
311 | if (phb->eeh_ops && phb->eeh_ops->configure_bridge) | ||
312 | ret = phb->eeh_ops->configure_bridge(pe); | ||
313 | |||
314 | return ret; | ||
315 | } | ||
316 | |||
317 | /** | ||
318 | * powernv_eeh_next_error - Retrieve next EEH error to handle | ||
319 | * @pe: Affected PE | ||
320 | * | ||
321 | * Using OPAL API, to retrieve next EEH error for EEH core to handle | ||
322 | */ | ||
323 | static int powernv_eeh_next_error(struct eeh_pe **pe) | ||
324 | { | ||
325 | struct pci_controller *hose; | ||
326 | struct pnv_phb *phb = NULL; | ||
327 | |||
328 | list_for_each_entry(hose, &hose_list, list_node) { | ||
329 | phb = hose->private_data; | ||
330 | break; | ||
331 | } | ||
332 | |||
333 | if (phb && phb->eeh_ops->next_error) | ||
334 | return phb->eeh_ops->next_error(pe); | ||
335 | |||
336 | return -EEXIST; | ||
337 | } | ||
338 | |||
339 | static struct eeh_ops powernv_eeh_ops = { | ||
340 | .name = "powernv", | ||
341 | .init = powernv_eeh_init, | ||
342 | .post_init = powernv_eeh_post_init, | ||
343 | .of_probe = NULL, | ||
344 | .dev_probe = powernv_eeh_dev_probe, | ||
345 | .set_option = powernv_eeh_set_option, | ||
346 | .get_pe_addr = powernv_eeh_get_pe_addr, | ||
347 | .get_state = powernv_eeh_get_state, | ||
348 | .reset = powernv_eeh_reset, | ||
349 | .wait_state = powernv_eeh_wait_state, | ||
350 | .get_log = powernv_eeh_get_log, | ||
351 | .configure_bridge = powernv_eeh_configure_bridge, | ||
352 | .read_config = pnv_pci_cfg_read, | ||
353 | .write_config = pnv_pci_cfg_write, | ||
354 | .next_error = powernv_eeh_next_error | ||
355 | }; | ||
356 | |||
357 | /** | ||
358 | * eeh_powernv_init - Register platform dependent EEH operations | ||
359 | * | ||
360 | * EEH initialization on powernv platform. This function should be | ||
361 | * called before any EEH related functions. | ||
362 | */ | ||
363 | static int __init eeh_powernv_init(void) | ||
364 | { | ||
365 | int ret = -EINVAL; | ||
366 | |||
367 | if (!machine_is(powernv)) | ||
368 | return ret; | ||
369 | |||
370 | ret = eeh_ops_register(&powernv_eeh_ops); | ||
371 | if (!ret) | ||
372 | pr_info("EEH: PowerNV platform initialized\n"); | ||
373 | else | ||
374 | pr_info("EEH: Failed to initialize PowerNV platform (%d)\n", ret); | ||
375 | |||
376 | return ret; | ||
377 | } | ||
378 | |||
379 | early_initcall(eeh_powernv_init); | ||
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index 6fabe92eafb6..e88863ffb135 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S | |||
@@ -107,4 +107,7 @@ OPAL_CALL(opal_pci_mask_pe_error, OPAL_PCI_MASK_PE_ERROR); | |||
107 | OPAL_CALL(opal_set_slot_led_status, OPAL_SET_SLOT_LED_STATUS); | 107 | OPAL_CALL(opal_set_slot_led_status, OPAL_SET_SLOT_LED_STATUS); |
108 | OPAL_CALL(opal_get_epow_status, OPAL_GET_EPOW_STATUS); | 108 | OPAL_CALL(opal_get_epow_status, OPAL_GET_EPOW_STATUS); |
109 | OPAL_CALL(opal_set_system_attention_led, OPAL_SET_SYSTEM_ATTENTION_LED); | 109 | OPAL_CALL(opal_set_system_attention_led, OPAL_SET_SYSTEM_ATTENTION_LED); |
110 | OPAL_CALL(opal_pci_next_error, OPAL_PCI_NEXT_ERROR); | ||
111 | OPAL_CALL(opal_pci_poll, OPAL_PCI_POLL); | ||
110 | OPAL_CALL(opal_pci_msi_eoi, OPAL_PCI_MSI_EOI); | 112 | OPAL_CALL(opal_pci_msi_eoi, OPAL_PCI_MSI_EOI); |
113 | OPAL_CALL(opal_pci_get_phb_diag_data2, OPAL_PCI_GET_PHB_DIAG_DATA2); | ||
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 628c564ceadb..106301fd2fa5 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c | |||
@@ -15,6 +15,7 @@ | |||
15 | #include <linux/of.h> | 15 | #include <linux/of.h> |
16 | #include <linux/of_platform.h> | 16 | #include <linux/of_platform.h> |
17 | #include <linux/interrupt.h> | 17 | #include <linux/interrupt.h> |
18 | #include <linux/notifier.h> | ||
18 | #include <linux/slab.h> | 19 | #include <linux/slab.h> |
19 | #include <asm/opal.h> | 20 | #include <asm/opal.h> |
20 | #include <asm/firmware.h> | 21 | #include <asm/firmware.h> |
@@ -31,6 +32,10 @@ static DEFINE_SPINLOCK(opal_write_lock); | |||
31 | extern u64 opal_mc_secondary_handler[]; | 32 | extern u64 opal_mc_secondary_handler[]; |
32 | static unsigned int *opal_irqs; | 33 | static unsigned int *opal_irqs; |
33 | static unsigned int opal_irq_count; | 34 | static unsigned int opal_irq_count; |
35 | static ATOMIC_NOTIFIER_HEAD(opal_notifier_head); | ||
36 | static DEFINE_SPINLOCK(opal_notifier_lock); | ||
37 | static uint64_t last_notified_mask = 0x0ul; | ||
38 | static atomic_t opal_notifier_hold = ATOMIC_INIT(0); | ||
34 | 39 | ||
35 | int __init early_init_dt_scan_opal(unsigned long node, | 40 | int __init early_init_dt_scan_opal(unsigned long node, |
36 | const char *uname, int depth, void *data) | 41 | const char *uname, int depth, void *data) |
@@ -95,6 +100,68 @@ static int __init opal_register_exception_handlers(void) | |||
95 | 100 | ||
96 | early_initcall(opal_register_exception_handlers); | 101 | early_initcall(opal_register_exception_handlers); |
97 | 102 | ||
103 | int opal_notifier_register(struct notifier_block *nb) | ||
104 | { | ||
105 | if (!nb) { | ||
106 | pr_warning("%s: Invalid argument (%p)\n", | ||
107 | __func__, nb); | ||
108 | return -EINVAL; | ||
109 | } | ||
110 | |||
111 | atomic_notifier_chain_register(&opal_notifier_head, nb); | ||
112 | return 0; | ||
113 | } | ||
114 | |||
115 | static void opal_do_notifier(uint64_t events) | ||
116 | { | ||
117 | unsigned long flags; | ||
118 | uint64_t changed_mask; | ||
119 | |||
120 | if (atomic_read(&opal_notifier_hold)) | ||
121 | return; | ||
122 | |||
123 | spin_lock_irqsave(&opal_notifier_lock, flags); | ||
124 | changed_mask = last_notified_mask ^ events; | ||
125 | last_notified_mask = events; | ||
126 | spin_unlock_irqrestore(&opal_notifier_lock, flags); | ||
127 | |||
128 | /* | ||
129 | * We feed with the event bits and changed bits for | ||
130 | * enough information to the callback. | ||
131 | */ | ||
132 | atomic_notifier_call_chain(&opal_notifier_head, | ||
133 | events, (void *)changed_mask); | ||
134 | } | ||
135 | |||
136 | void opal_notifier_update_evt(uint64_t evt_mask, | ||
137 | uint64_t evt_val) | ||
138 | { | ||
139 | unsigned long flags; | ||
140 | |||
141 | spin_lock_irqsave(&opal_notifier_lock, flags); | ||
142 | last_notified_mask &= ~evt_mask; | ||
143 | last_notified_mask |= evt_val; | ||
144 | spin_unlock_irqrestore(&opal_notifier_lock, flags); | ||
145 | } | ||
146 | |||
147 | void opal_notifier_enable(void) | ||
148 | { | ||
149 | int64_t rc; | ||
150 | uint64_t evt = 0; | ||
151 | |||
152 | atomic_set(&opal_notifier_hold, 0); | ||
153 | |||
154 | /* Process pending events */ | ||
155 | rc = opal_poll_events(&evt); | ||
156 | if (rc == OPAL_SUCCESS && evt) | ||
157 | opal_do_notifier(evt); | ||
158 | } | ||
159 | |||
160 | void opal_notifier_disable(void) | ||
161 | { | ||
162 | atomic_set(&opal_notifier_hold, 1); | ||
163 | } | ||
164 | |||
98 | int opal_get_chars(uint32_t vtermno, char *buf, int count) | 165 | int opal_get_chars(uint32_t vtermno, char *buf, int count) |
99 | { | 166 | { |
100 | s64 len, rc; | 167 | s64 len, rc; |
@@ -297,7 +364,7 @@ static irqreturn_t opal_interrupt(int irq, void *data) | |||
297 | 364 | ||
298 | opal_handle_interrupt(virq_to_hw(irq), &events); | 365 | opal_handle_interrupt(virq_to_hw(irq), &events); |
299 | 366 | ||
300 | /* XXX TODO: Do something with the events */ | 367 | opal_do_notifier(events); |
301 | 368 | ||
302 | return IRQ_HANDLED; | 369 | return IRQ_HANDLED; |
303 | } | 370 | } |
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 9c9d15e4cdf2..49b57b9f835d 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c | |||
@@ -13,6 +13,7 @@ | |||
13 | 13 | ||
14 | #include <linux/kernel.h> | 14 | #include <linux/kernel.h> |
15 | #include <linux/pci.h> | 15 | #include <linux/pci.h> |
16 | #include <linux/debugfs.h> | ||
16 | #include <linux/delay.h> | 17 | #include <linux/delay.h> |
17 | #include <linux/string.h> | 18 | #include <linux/string.h> |
18 | #include <linux/init.h> | 19 | #include <linux/init.h> |
@@ -32,6 +33,7 @@ | |||
32 | #include <asm/iommu.h> | 33 | #include <asm/iommu.h> |
33 | #include <asm/tce.h> | 34 | #include <asm/tce.h> |
34 | #include <asm/xics.h> | 35 | #include <asm/xics.h> |
36 | #include <asm/debug.h> | ||
35 | 37 | ||
36 | #include "powernv.h" | 38 | #include "powernv.h" |
37 | #include "pci.h" | 39 | #include "pci.h" |
@@ -441,6 +443,17 @@ static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev | |||
441 | set_iommu_table_base(&pdev->dev, &pe->tce32_table); | 443 | set_iommu_table_base(&pdev->dev, &pe->tce32_table); |
442 | } | 444 | } |
443 | 445 | ||
446 | static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, struct pci_bus *bus) | ||
447 | { | ||
448 | struct pci_dev *dev; | ||
449 | |||
450 | list_for_each_entry(dev, &bus->devices, bus_list) { | ||
451 | set_iommu_table_base(&dev->dev, &pe->tce32_table); | ||
452 | if (dev->subordinate) | ||
453 | pnv_ioda_setup_bus_dma(pe, dev->subordinate); | ||
454 | } | ||
455 | } | ||
456 | |||
444 | static void pnv_pci_ioda1_tce_invalidate(struct iommu_table *tbl, | 457 | static void pnv_pci_ioda1_tce_invalidate(struct iommu_table *tbl, |
445 | u64 *startp, u64 *endp) | 458 | u64 *startp, u64 *endp) |
446 | { | 459 | { |
@@ -595,6 +608,12 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb, | |||
595 | TCE_PCI_SWINV_PAIR; | 608 | TCE_PCI_SWINV_PAIR; |
596 | } | 609 | } |
597 | iommu_init_table(tbl, phb->hose->node); | 610 | iommu_init_table(tbl, phb->hose->node); |
611 | iommu_register_group(tbl, pci_domain_nr(pe->pbus), pe->pe_number); | ||
612 | |||
613 | if (pe->pdev) | ||
614 | set_iommu_table_base(&pe->pdev->dev, tbl); | ||
615 | else | ||
616 | pnv_ioda_setup_bus_dma(pe, pe->pbus); | ||
598 | 617 | ||
599 | return; | 618 | return; |
600 | fail: | 619 | fail: |
@@ -667,6 +686,11 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, | |||
667 | } | 686 | } |
668 | iommu_init_table(tbl, phb->hose->node); | 687 | iommu_init_table(tbl, phb->hose->node); |
669 | 688 | ||
689 | if (pe->pdev) | ||
690 | set_iommu_table_base(&pe->pdev->dev, tbl); | ||
691 | else | ||
692 | pnv_ioda_setup_bus_dma(pe, pe->pbus); | ||
693 | |||
670 | return; | 694 | return; |
671 | fail: | 695 | fail: |
672 | if (pe->tce32_seg >= 0) | 696 | if (pe->tce32_seg >= 0) |
@@ -968,11 +992,38 @@ static void pnv_pci_ioda_setup_DMA(void) | |||
968 | } | 992 | } |
969 | } | 993 | } |
970 | 994 | ||
995 | static void pnv_pci_ioda_create_dbgfs(void) | ||
996 | { | ||
997 | #ifdef CONFIG_DEBUG_FS | ||
998 | struct pci_controller *hose, *tmp; | ||
999 | struct pnv_phb *phb; | ||
1000 | char name[16]; | ||
1001 | |||
1002 | list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { | ||
1003 | phb = hose->private_data; | ||
1004 | |||
1005 | sprintf(name, "PCI%04x", hose->global_number); | ||
1006 | phb->dbgfs = debugfs_create_dir(name, powerpc_debugfs_root); | ||
1007 | if (!phb->dbgfs) | ||
1008 | pr_warning("%s: Error on creating debugfs on PHB#%x\n", | ||
1009 | __func__, hose->global_number); | ||
1010 | } | ||
1011 | #endif /* CONFIG_DEBUG_FS */ | ||
1012 | } | ||
1013 | |||
971 | static void pnv_pci_ioda_fixup(void) | 1014 | static void pnv_pci_ioda_fixup(void) |
972 | { | 1015 | { |
973 | pnv_pci_ioda_setup_PEs(); | 1016 | pnv_pci_ioda_setup_PEs(); |
974 | pnv_pci_ioda_setup_seg(); | 1017 | pnv_pci_ioda_setup_seg(); |
975 | pnv_pci_ioda_setup_DMA(); | 1018 | pnv_pci_ioda_setup_DMA(); |
1019 | |||
1020 | pnv_pci_ioda_create_dbgfs(); | ||
1021 | |||
1022 | #ifdef CONFIG_EEH | ||
1023 | eeh_probe_mode_set(EEH_PROBE_MODE_DEV); | ||
1024 | eeh_addr_cache_build(); | ||
1025 | eeh_init(); | ||
1026 | #endif | ||
976 | } | 1027 | } |
977 | 1028 | ||
978 | /* | 1029 | /* |
@@ -1049,7 +1100,8 @@ static void pnv_pci_ioda_shutdown(struct pnv_phb *phb) | |||
1049 | OPAL_ASSERT_RESET); | 1100 | OPAL_ASSERT_RESET); |
1050 | } | 1101 | } |
1051 | 1102 | ||
1052 | void __init pnv_pci_init_ioda_phb(struct device_node *np, int ioda_type) | 1103 | void __init pnv_pci_init_ioda_phb(struct device_node *np, |
1104 | u64 hub_id, int ioda_type) | ||
1053 | { | 1105 | { |
1054 | struct pci_controller *hose; | 1106 | struct pci_controller *hose; |
1055 | static int primary = 1; | 1107 | static int primary = 1; |
@@ -1087,6 +1139,7 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np, int ioda_type) | |||
1087 | hose->first_busno = 0; | 1139 | hose->first_busno = 0; |
1088 | hose->last_busno = 0xff; | 1140 | hose->last_busno = 0xff; |
1089 | hose->private_data = phb; | 1141 | hose->private_data = phb; |
1142 | phb->hub_id = hub_id; | ||
1090 | phb->opal_id = phb_id; | 1143 | phb->opal_id = phb_id; |
1091 | phb->type = ioda_type; | 1144 | phb->type = ioda_type; |
1092 | 1145 | ||
@@ -1172,6 +1225,9 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np, int ioda_type) | |||
1172 | phb->ioda.io_size, phb->ioda.io_segsize); | 1225 | phb->ioda.io_size, phb->ioda.io_segsize); |
1173 | 1226 | ||
1174 | phb->hose->ops = &pnv_pci_ops; | 1227 | phb->hose->ops = &pnv_pci_ops; |
1228 | #ifdef CONFIG_EEH | ||
1229 | phb->eeh_ops = &ioda_eeh_ops; | ||
1230 | #endif | ||
1175 | 1231 | ||
1176 | /* Setup RID -> PE mapping function */ | 1232 | /* Setup RID -> PE mapping function */ |
1177 | phb->bdfn_to_pe = pnv_ioda_bdfn_to_pe; | 1233 | phb->bdfn_to_pe = pnv_ioda_bdfn_to_pe; |
@@ -1212,7 +1268,7 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np, int ioda_type) | |||
1212 | 1268 | ||
1213 | void pnv_pci_init_ioda2_phb(struct device_node *np) | 1269 | void pnv_pci_init_ioda2_phb(struct device_node *np) |
1214 | { | 1270 | { |
1215 | pnv_pci_init_ioda_phb(np, PNV_PHB_IODA2); | 1271 | pnv_pci_init_ioda_phb(np, 0, PNV_PHB_IODA2); |
1216 | } | 1272 | } |
1217 | 1273 | ||
1218 | void __init pnv_pci_init_ioda_hub(struct device_node *np) | 1274 | void __init pnv_pci_init_ioda_hub(struct device_node *np) |
@@ -1235,6 +1291,6 @@ void __init pnv_pci_init_ioda_hub(struct device_node *np) | |||
1235 | for_each_child_of_node(np, phbn) { | 1291 | for_each_child_of_node(np, phbn) { |
1236 | /* Look for IODA1 PHBs */ | 1292 | /* Look for IODA1 PHBs */ |
1237 | if (of_device_is_compatible(phbn, "ibm,ioda-phb")) | 1293 | if (of_device_is_compatible(phbn, "ibm,ioda-phb")) |
1238 | pnv_pci_init_ioda_phb(phbn, PNV_PHB_IODA1); | 1294 | pnv_pci_init_ioda_phb(phbn, hub_id, PNV_PHB_IODA1); |
1239 | } | 1295 | } |
1240 | } | 1296 | } |
diff --git a/arch/powerpc/platforms/powernv/pci-p5ioc2.c b/arch/powerpc/platforms/powernv/pci-p5ioc2.c index 92b37a0186c9..b68db6325c1b 100644 --- a/arch/powerpc/platforms/powernv/pci-p5ioc2.c +++ b/arch/powerpc/platforms/powernv/pci-p5ioc2.c | |||
@@ -86,13 +86,16 @@ static void pnv_pci_init_p5ioc2_msis(struct pnv_phb *phb) { } | |||
86 | static void pnv_pci_p5ioc2_dma_dev_setup(struct pnv_phb *phb, | 86 | static void pnv_pci_p5ioc2_dma_dev_setup(struct pnv_phb *phb, |
87 | struct pci_dev *pdev) | 87 | struct pci_dev *pdev) |
88 | { | 88 | { |
89 | if (phb->p5ioc2.iommu_table.it_map == NULL) | 89 | if (phb->p5ioc2.iommu_table.it_map == NULL) { |
90 | iommu_init_table(&phb->p5ioc2.iommu_table, phb->hose->node); | 90 | iommu_init_table(&phb->p5ioc2.iommu_table, phb->hose->node); |
91 | iommu_register_group(&phb->p5ioc2.iommu_table, | ||
92 | pci_domain_nr(phb->hose->bus), phb->opal_id); | ||
93 | } | ||
91 | 94 | ||
92 | set_iommu_table_base(&pdev->dev, &phb->p5ioc2.iommu_table); | 95 | set_iommu_table_base(&pdev->dev, &phb->p5ioc2.iommu_table); |
93 | } | 96 | } |
94 | 97 | ||
95 | static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np, | 98 | static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np, u64 hub_id, |
96 | void *tce_mem, u64 tce_size) | 99 | void *tce_mem, u64 tce_size) |
97 | { | 100 | { |
98 | struct pnv_phb *phb; | 101 | struct pnv_phb *phb; |
@@ -133,6 +136,7 @@ static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np, | |||
133 | phb->hose->first_busno = 0; | 136 | phb->hose->first_busno = 0; |
134 | phb->hose->last_busno = 0xff; | 137 | phb->hose->last_busno = 0xff; |
135 | phb->hose->private_data = phb; | 138 | phb->hose->private_data = phb; |
139 | phb->hub_id = hub_id; | ||
136 | phb->opal_id = phb_id; | 140 | phb->opal_id = phb_id; |
137 | phb->type = PNV_PHB_P5IOC2; | 141 | phb->type = PNV_PHB_P5IOC2; |
138 | phb->model = PNV_PHB_MODEL_P5IOC2; | 142 | phb->model = PNV_PHB_MODEL_P5IOC2; |
@@ -226,7 +230,8 @@ void __init pnv_pci_init_p5ioc2_hub(struct device_node *np) | |||
226 | for_each_child_of_node(np, phbn) { | 230 | for_each_child_of_node(np, phbn) { |
227 | if (of_device_is_compatible(phbn, "ibm,p5ioc2-pcix") || | 231 | if (of_device_is_compatible(phbn, "ibm,p5ioc2-pcix") || |
228 | of_device_is_compatible(phbn, "ibm,p5ioc2-pciex")) { | 232 | of_device_is_compatible(phbn, "ibm,p5ioc2-pciex")) { |
229 | pnv_pci_init_p5ioc2_phb(phbn, tce_mem, tce_per_phb); | 233 | pnv_pci_init_p5ioc2_phb(phbn, hub_id, |
234 | tce_mem, tce_per_phb); | ||
230 | tce_mem += tce_per_phb; | 235 | tce_mem += tce_per_phb; |
231 | } | 236 | } |
232 | } | 237 | } |
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index 277343cc6a3d..a28d3b5e6393 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <linux/irq.h> | 20 | #include <linux/irq.h> |
21 | #include <linux/io.h> | 21 | #include <linux/io.h> |
22 | #include <linux/msi.h> | 22 | #include <linux/msi.h> |
23 | #include <linux/iommu.h> | ||
23 | 24 | ||
24 | #include <asm/sections.h> | 25 | #include <asm/sections.h> |
25 | #include <asm/io.h> | 26 | #include <asm/io.h> |
@@ -32,6 +33,8 @@ | |||
32 | #include <asm/iommu.h> | 33 | #include <asm/iommu.h> |
33 | #include <asm/tce.h> | 34 | #include <asm/tce.h> |
34 | #include <asm/firmware.h> | 35 | #include <asm/firmware.h> |
36 | #include <asm/eeh_event.h> | ||
37 | #include <asm/eeh.h> | ||
35 | 38 | ||
36 | #include "powernv.h" | 39 | #include "powernv.h" |
37 | #include "pci.h" | 40 | #include "pci.h" |
@@ -202,7 +205,8 @@ static void pnv_pci_handle_eeh_config(struct pnv_phb *phb, u32 pe_no) | |||
202 | 205 | ||
203 | spin_lock_irqsave(&phb->lock, flags); | 206 | spin_lock_irqsave(&phb->lock, flags); |
204 | 207 | ||
205 | rc = opal_pci_get_phb_diag_data(phb->opal_id, phb->diag.blob, PNV_PCI_DIAG_BUF_SIZE); | 208 | rc = opal_pci_get_phb_diag_data2(phb->opal_id, phb->diag.blob, |
209 | PNV_PCI_DIAG_BUF_SIZE); | ||
206 | has_diag = (rc == OPAL_SUCCESS); | 210 | has_diag = (rc == OPAL_SUCCESS); |
207 | 211 | ||
208 | rc = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no, | 212 | rc = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no, |
@@ -227,43 +231,50 @@ static void pnv_pci_handle_eeh_config(struct pnv_phb *phb, u32 pe_no) | |||
227 | spin_unlock_irqrestore(&phb->lock, flags); | 231 | spin_unlock_irqrestore(&phb->lock, flags); |
228 | } | 232 | } |
229 | 233 | ||
230 | static void pnv_pci_config_check_eeh(struct pnv_phb *phb, struct pci_bus *bus, | 234 | static void pnv_pci_config_check_eeh(struct pnv_phb *phb, |
231 | u32 bdfn) | 235 | struct device_node *dn) |
232 | { | 236 | { |
233 | s64 rc; | 237 | s64 rc; |
234 | u8 fstate; | 238 | u8 fstate; |
235 | u16 pcierr; | 239 | u16 pcierr; |
236 | u32 pe_no; | 240 | u32 pe_no; |
237 | 241 | ||
238 | /* Get PE# if we support IODA */ | 242 | /* |
239 | pe_no = phb->bdfn_to_pe ? phb->bdfn_to_pe(phb, bus, bdfn & 0xff) : 0; | 243 | * Get the PE#. During the PCI probe stage, we might not |
244 | * setup that yet. So all ER errors should be mapped to | ||
245 | * PE#0 | ||
246 | */ | ||
247 | pe_no = PCI_DN(dn)->pe_number; | ||
248 | if (pe_no == IODA_INVALID_PE) | ||
249 | pe_no = 0; | ||
240 | 250 | ||
241 | /* Read freeze status */ | 251 | /* Read freeze status */ |
242 | rc = opal_pci_eeh_freeze_status(phb->opal_id, pe_no, &fstate, &pcierr, | 252 | rc = opal_pci_eeh_freeze_status(phb->opal_id, pe_no, &fstate, &pcierr, |
243 | NULL); | 253 | NULL); |
244 | if (rc) { | 254 | if (rc) { |
245 | pr_warning("PCI %d: Failed to read EEH status for PE#%d," | 255 | pr_warning("%s: Can't read EEH status (PE#%d) for " |
246 | " err %lld\n", phb->hose->global_number, pe_no, rc); | 256 | "%s, err %lld\n", |
257 | __func__, pe_no, dn->full_name, rc); | ||
247 | return; | 258 | return; |
248 | } | 259 | } |
249 | cfg_dbg(" -> EEH check, bdfn=%04x PE%d fstate=%x\n", | 260 | cfg_dbg(" -> EEH check, bdfn=%04x PE#%d fstate=%x\n", |
250 | bdfn, pe_no, fstate); | 261 | (PCI_DN(dn)->busno << 8) | (PCI_DN(dn)->devfn), |
262 | pe_no, fstate); | ||
251 | if (fstate != 0) | 263 | if (fstate != 0) |
252 | pnv_pci_handle_eeh_config(phb, pe_no); | 264 | pnv_pci_handle_eeh_config(phb, pe_no); |
253 | } | 265 | } |
254 | 266 | ||
255 | static int pnv_pci_read_config(struct pci_bus *bus, | 267 | int pnv_pci_cfg_read(struct device_node *dn, |
256 | unsigned int devfn, | 268 | int where, int size, u32 *val) |
257 | int where, int size, u32 *val) | ||
258 | { | 269 | { |
259 | struct pci_controller *hose = pci_bus_to_host(bus); | 270 | struct pci_dn *pdn = PCI_DN(dn); |
260 | struct pnv_phb *phb = hose->private_data; | 271 | struct pnv_phb *phb = pdn->phb->private_data; |
261 | u32 bdfn = (((uint64_t)bus->number) << 8) | devfn; | 272 | u32 bdfn = (pdn->busno << 8) | pdn->devfn; |
273 | #ifdef CONFIG_EEH | ||
274 | struct eeh_pe *phb_pe = NULL; | ||
275 | #endif | ||
262 | s64 rc; | 276 | s64 rc; |
263 | 277 | ||
264 | if (hose == NULL) | ||
265 | return PCIBIOS_DEVICE_NOT_FOUND; | ||
266 | |||
267 | switch (size) { | 278 | switch (size) { |
268 | case 1: { | 279 | case 1: { |
269 | u8 v8; | 280 | u8 v8; |
@@ -287,28 +298,43 @@ static int pnv_pci_read_config(struct pci_bus *bus, | |||
287 | default: | 298 | default: |
288 | return PCIBIOS_FUNC_NOT_SUPPORTED; | 299 | return PCIBIOS_FUNC_NOT_SUPPORTED; |
289 | } | 300 | } |
290 | cfg_dbg("pnv_pci_read_config bus: %x devfn: %x +%x/%x -> %08x\n", | 301 | cfg_dbg("%s: bus: %x devfn: %x +%x/%x -> %08x\n", |
291 | bus->number, devfn, where, size, *val); | 302 | __func__, pdn->busno, pdn->devfn, where, size, *val); |
292 | 303 | ||
293 | /* Check if the PHB got frozen due to an error (no response) */ | 304 | /* |
294 | pnv_pci_config_check_eeh(phb, bus, bdfn); | 305 | * Check if the specified PE has been put into frozen |
306 | * state. On the other hand, we needn't do that while | ||
307 | * the PHB has been put into frozen state because of | ||
308 | * PHB-fatal errors. | ||
309 | */ | ||
310 | #ifdef CONFIG_EEH | ||
311 | phb_pe = eeh_phb_pe_get(pdn->phb); | ||
312 | if (phb_pe && (phb_pe->state & EEH_PE_ISOLATED)) | ||
313 | return PCIBIOS_SUCCESSFUL; | ||
314 | |||
315 | if (phb->eeh_state & PNV_EEH_STATE_ENABLED) { | ||
316 | if (*val == EEH_IO_ERROR_VALUE(size) && | ||
317 | eeh_dev_check_failure(of_node_to_eeh_dev(dn))) | ||
318 | return PCIBIOS_DEVICE_NOT_FOUND; | ||
319 | } else { | ||
320 | pnv_pci_config_check_eeh(phb, dn); | ||
321 | } | ||
322 | #else | ||
323 | pnv_pci_config_check_eeh(phb, dn); | ||
324 | #endif | ||
295 | 325 | ||
296 | return PCIBIOS_SUCCESSFUL; | 326 | return PCIBIOS_SUCCESSFUL; |
297 | } | 327 | } |
298 | 328 | ||
299 | static int pnv_pci_write_config(struct pci_bus *bus, | 329 | int pnv_pci_cfg_write(struct device_node *dn, |
300 | unsigned int devfn, | 330 | int where, int size, u32 val) |
301 | int where, int size, u32 val) | ||
302 | { | 331 | { |
303 | struct pci_controller *hose = pci_bus_to_host(bus); | 332 | struct pci_dn *pdn = PCI_DN(dn); |
304 | struct pnv_phb *phb = hose->private_data; | 333 | struct pnv_phb *phb = pdn->phb->private_data; |
305 | u32 bdfn = (((uint64_t)bus->number) << 8) | devfn; | 334 | u32 bdfn = (pdn->busno << 8) | pdn->devfn; |
306 | |||
307 | if (hose == NULL) | ||
308 | return PCIBIOS_DEVICE_NOT_FOUND; | ||
309 | 335 | ||
310 | cfg_dbg("pnv_pci_write_config bus: %x devfn: %x +%x/%x -> %08x\n", | 336 | cfg_dbg("%s: bus: %x devfn: %x +%x/%x -> %08x\n", |
311 | bus->number, devfn, where, size, val); | 337 | pdn->busno, pdn->devfn, where, size, val); |
312 | switch (size) { | 338 | switch (size) { |
313 | case 1: | 339 | case 1: |
314 | opal_pci_config_write_byte(phb->opal_id, bdfn, where, val); | 340 | opal_pci_config_write_byte(phb->opal_id, bdfn, where, val); |
@@ -322,14 +348,54 @@ static int pnv_pci_write_config(struct pci_bus *bus, | |||
322 | default: | 348 | default: |
323 | return PCIBIOS_FUNC_NOT_SUPPORTED; | 349 | return PCIBIOS_FUNC_NOT_SUPPORTED; |
324 | } | 350 | } |
351 | |||
325 | /* Check if the PHB got frozen due to an error (no response) */ | 352 | /* Check if the PHB got frozen due to an error (no response) */ |
326 | pnv_pci_config_check_eeh(phb, bus, bdfn); | 353 | #ifdef CONFIG_EEH |
354 | if (!(phb->eeh_state & PNV_EEH_STATE_ENABLED)) | ||
355 | pnv_pci_config_check_eeh(phb, dn); | ||
356 | #else | ||
357 | pnv_pci_config_check_eeh(phb, dn); | ||
358 | #endif | ||
327 | 359 | ||
328 | return PCIBIOS_SUCCESSFUL; | 360 | return PCIBIOS_SUCCESSFUL; |
329 | } | 361 | } |
330 | 362 | ||
363 | static int pnv_pci_read_config(struct pci_bus *bus, | ||
364 | unsigned int devfn, | ||
365 | int where, int size, u32 *val) | ||
366 | { | ||
367 | struct device_node *dn, *busdn = pci_bus_to_OF_node(bus); | ||
368 | struct pci_dn *pdn; | ||
369 | |||
370 | for (dn = busdn->child; dn; dn = dn->sibling) { | ||
371 | pdn = PCI_DN(dn); | ||
372 | if (pdn && pdn->devfn == devfn) | ||
373 | return pnv_pci_cfg_read(dn, where, size, val); | ||
374 | } | ||
375 | |||
376 | *val = 0xFFFFFFFF; | ||
377 | return PCIBIOS_DEVICE_NOT_FOUND; | ||
378 | |||
379 | } | ||
380 | |||
381 | static int pnv_pci_write_config(struct pci_bus *bus, | ||
382 | unsigned int devfn, | ||
383 | int where, int size, u32 val) | ||
384 | { | ||
385 | struct device_node *dn, *busdn = pci_bus_to_OF_node(bus); | ||
386 | struct pci_dn *pdn; | ||
387 | |||
388 | for (dn = busdn->child; dn; dn = dn->sibling) { | ||
389 | pdn = PCI_DN(dn); | ||
390 | if (pdn && pdn->devfn == devfn) | ||
391 | return pnv_pci_cfg_write(dn, where, size, val); | ||
392 | } | ||
393 | |||
394 | return PCIBIOS_DEVICE_NOT_FOUND; | ||
395 | } | ||
396 | |||
331 | struct pci_ops pnv_pci_ops = { | 397 | struct pci_ops pnv_pci_ops = { |
332 | .read = pnv_pci_read_config, | 398 | .read = pnv_pci_read_config, |
333 | .write = pnv_pci_write_config, | 399 | .write = pnv_pci_write_config, |
334 | }; | 400 | }; |
335 | 401 | ||
@@ -412,6 +478,7 @@ static struct iommu_table *pnv_pci_setup_bml_iommu(struct pci_controller *hose) | |||
412 | pnv_pci_setup_iommu_table(tbl, __va(be64_to_cpup(basep)), | 478 | pnv_pci_setup_iommu_table(tbl, __va(be64_to_cpup(basep)), |
413 | be32_to_cpup(sizep), 0); | 479 | be32_to_cpup(sizep), 0); |
414 | iommu_init_table(tbl, hose->node); | 480 | iommu_init_table(tbl, hose->node); |
481 | iommu_register_group(tbl, pci_domain_nr(hose->bus), 0); | ||
415 | 482 | ||
416 | /* Deal with SW invalidated TCEs when needed (BML way) */ | 483 | /* Deal with SW invalidated TCEs when needed (BML way) */ |
417 | swinvp = of_get_property(hose->dn, "linux,tce-sw-invalidate-info", | 484 | swinvp = of_get_property(hose->dn, "linux,tce-sw-invalidate-info", |
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index 25d76c4df50b..d633c64e05a1 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h | |||
@@ -66,15 +66,43 @@ struct pnv_ioda_pe { | |||
66 | struct list_head list; | 66 | struct list_head list; |
67 | }; | 67 | }; |
68 | 68 | ||
69 | /* IOC dependent EEH operations */ | ||
70 | #ifdef CONFIG_EEH | ||
71 | struct pnv_eeh_ops { | ||
72 | int (*post_init)(struct pci_controller *hose); | ||
73 | int (*set_option)(struct eeh_pe *pe, int option); | ||
74 | int (*get_state)(struct eeh_pe *pe); | ||
75 | int (*reset)(struct eeh_pe *pe, int option); | ||
76 | int (*get_log)(struct eeh_pe *pe, int severity, | ||
77 | char *drv_log, unsigned long len); | ||
78 | int (*configure_bridge)(struct eeh_pe *pe); | ||
79 | int (*next_error)(struct eeh_pe **pe); | ||
80 | }; | ||
81 | |||
82 | #define PNV_EEH_STATE_ENABLED (1 << 0) /* EEH enabled */ | ||
83 | #define PNV_EEH_STATE_REMOVED (1 << 1) /* PHB removed */ | ||
84 | |||
85 | #endif /* CONFIG_EEH */ | ||
86 | |||
69 | struct pnv_phb { | 87 | struct pnv_phb { |
70 | struct pci_controller *hose; | 88 | struct pci_controller *hose; |
71 | enum pnv_phb_type type; | 89 | enum pnv_phb_type type; |
72 | enum pnv_phb_model model; | 90 | enum pnv_phb_model model; |
91 | u64 hub_id; | ||
73 | u64 opal_id; | 92 | u64 opal_id; |
74 | void __iomem *regs; | 93 | void __iomem *regs; |
75 | int initialized; | 94 | int initialized; |
76 | spinlock_t lock; | 95 | spinlock_t lock; |
77 | 96 | ||
97 | #ifdef CONFIG_EEH | ||
98 | struct pnv_eeh_ops *eeh_ops; | ||
99 | int eeh_state; | ||
100 | #endif | ||
101 | |||
102 | #ifdef CONFIG_DEBUG_FS | ||
103 | struct dentry *dbgfs; | ||
104 | #endif | ||
105 | |||
78 | #ifdef CONFIG_PCI_MSI | 106 | #ifdef CONFIG_PCI_MSI |
79 | unsigned int msi_base; | 107 | unsigned int msi_base; |
80 | unsigned int msi32_support; | 108 | unsigned int msi32_support; |
@@ -150,7 +178,14 @@ struct pnv_phb { | |||
150 | }; | 178 | }; |
151 | 179 | ||
152 | extern struct pci_ops pnv_pci_ops; | 180 | extern struct pci_ops pnv_pci_ops; |
181 | #ifdef CONFIG_EEH | ||
182 | extern struct pnv_eeh_ops ioda_eeh_ops; | ||
183 | #endif | ||
153 | 184 | ||
185 | int pnv_pci_cfg_read(struct device_node *dn, | ||
186 | int where, int size, u32 *val); | ||
187 | int pnv_pci_cfg_write(struct device_node *dn, | ||
188 | int where, int size, u32 val); | ||
154 | extern void pnv_pci_setup_iommu_table(struct iommu_table *tbl, | 189 | extern void pnv_pci_setup_iommu_table(struct iommu_table *tbl, |
155 | void *tce_mem, u64 tce_size, | 190 | void *tce_mem, u64 tce_size, |
156 | u64 dma_offset); | 191 | u64 dma_offset); |
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index d4459bfc92f7..84438af96c05 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c | |||
@@ -93,6 +93,8 @@ static void __noreturn pnv_restart(char *cmd) | |||
93 | { | 93 | { |
94 | long rc = OPAL_BUSY; | 94 | long rc = OPAL_BUSY; |
95 | 95 | ||
96 | opal_notifier_disable(); | ||
97 | |||
96 | while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { | 98 | while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { |
97 | rc = opal_cec_reboot(); | 99 | rc = opal_cec_reboot(); |
98 | if (rc == OPAL_BUSY_EVENT) | 100 | if (rc == OPAL_BUSY_EVENT) |
@@ -108,6 +110,8 @@ static void __noreturn pnv_power_off(void) | |||
108 | { | 110 | { |
109 | long rc = OPAL_BUSY; | 111 | long rc = OPAL_BUSY; |
110 | 112 | ||
113 | opal_notifier_disable(); | ||
114 | |||
111 | while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { | 115 | while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { |
112 | rc = opal_cec_power_down(0); | 116 | rc = opal_cec_power_down(0); |
113 | if (rc == OPAL_BUSY_EVENT) | 117 | if (rc == OPAL_BUSY_EVENT) |
diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c index 88c9459c3e07..89e3857af4e0 100644 --- a/arch/powerpc/platforms/powernv/smp.c +++ b/arch/powerpc/platforms/powernv/smp.c | |||
@@ -40,7 +40,7 @@ | |||
40 | #define DBG(fmt...) | 40 | #define DBG(fmt...) |
41 | #endif | 41 | #endif |
42 | 42 | ||
43 | static void __cpuinit pnv_smp_setup_cpu(int cpu) | 43 | static void pnv_smp_setup_cpu(int cpu) |
44 | { | 44 | { |
45 | if (cpu != boot_cpuid) | 45 | if (cpu != boot_cpuid) |
46 | xics_setup_cpu(); | 46 | xics_setup_cpu(); |
@@ -51,7 +51,7 @@ static int pnv_smp_cpu_bootable(unsigned int nr) | |||
51 | /* Special case - we inhibit secondary thread startup | 51 | /* Special case - we inhibit secondary thread startup |
52 | * during boot if the user requests it. | 52 | * during boot if the user requests it. |
53 | */ | 53 | */ |
54 | if (system_state < SYSTEM_RUNNING && cpu_has_feature(CPU_FTR_SMT)) { | 54 | if (system_state == SYSTEM_BOOTING && cpu_has_feature(CPU_FTR_SMT)) { |
55 | if (!smt_enabled_at_boot && cpu_thread_in_core(nr) != 0) | 55 | if (!smt_enabled_at_boot && cpu_thread_in_core(nr) != 0) |
56 | return 0; | 56 | return 0; |
57 | if (smt_enabled_at_boot | 57 | if (smt_enabled_at_boot |
diff --git a/arch/powerpc/platforms/ps3/htab.c b/arch/powerpc/platforms/ps3/htab.c index 177a2f70700c..3e270e3412ae 100644 --- a/arch/powerpc/platforms/ps3/htab.c +++ b/arch/powerpc/platforms/ps3/htab.c | |||
@@ -109,7 +109,8 @@ static long ps3_hpte_remove(unsigned long hpte_group) | |||
109 | } | 109 | } |
110 | 110 | ||
111 | static long ps3_hpte_updatepp(unsigned long slot, unsigned long newpp, | 111 | static long ps3_hpte_updatepp(unsigned long slot, unsigned long newpp, |
112 | unsigned long vpn, int psize, int ssize, int local) | 112 | unsigned long vpn, int psize, int apsize, |
113 | int ssize, int local) | ||
113 | { | 114 | { |
114 | int result; | 115 | int result; |
115 | u64 hpte_v, want_v, hpte_rs; | 116 | u64 hpte_v, want_v, hpte_rs; |
@@ -162,7 +163,7 @@ static void ps3_hpte_updateboltedpp(unsigned long newpp, unsigned long ea, | |||
162 | } | 163 | } |
163 | 164 | ||
164 | static void ps3_hpte_invalidate(unsigned long slot, unsigned long vpn, | 165 | static void ps3_hpte_invalidate(unsigned long slot, unsigned long vpn, |
165 | int psize, int ssize, int local) | 166 | int psize, int apsize, int ssize, int local) |
166 | { | 167 | { |
167 | unsigned long flags; | 168 | unsigned long flags; |
168 | int result; | 169 | int result; |
diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig index 4459eff7a75a..1bd3399146ed 100644 --- a/arch/powerpc/platforms/pseries/Kconfig +++ b/arch/powerpc/platforms/pseries/Kconfig | |||
@@ -33,11 +33,6 @@ config PPC_SPLPAR | |||
33 | processors, that is, which share physical processors between | 33 | processors, that is, which share physical processors between |
34 | two or more partitions. | 34 | two or more partitions. |
35 | 35 | ||
36 | config EEH | ||
37 | bool | ||
38 | depends on PPC_PSERIES && PCI | ||
39 | default y | ||
40 | |||
41 | config PSERIES_MSI | 36 | config PSERIES_MSI |
42 | bool | 37 | bool |
43 | depends on PCI_MSI && EEH | 38 | depends on PCI_MSI && EEH |
diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile index 53866e537a92..8ae010381316 100644 --- a/arch/powerpc/platforms/pseries/Makefile +++ b/arch/powerpc/platforms/pseries/Makefile | |||
@@ -6,9 +6,7 @@ obj-y := lpar.o hvCall.o nvram.o reconfig.o \ | |||
6 | firmware.o power.o dlpar.o mobility.o | 6 | firmware.o power.o dlpar.o mobility.o |
7 | obj-$(CONFIG_SMP) += smp.o | 7 | obj-$(CONFIG_SMP) += smp.o |
8 | obj-$(CONFIG_SCANLOG) += scanlog.o | 8 | obj-$(CONFIG_SCANLOG) += scanlog.o |
9 | obj-$(CONFIG_EEH) += eeh.o eeh_pe.o eeh_dev.o eeh_cache.o \ | 9 | obj-$(CONFIG_EEH) += eeh_pseries.o |
10 | eeh_driver.o eeh_event.o eeh_sysfs.o \ | ||
11 | eeh_pseries.o | ||
12 | obj-$(CONFIG_KEXEC) += kexec.o | 10 | obj-$(CONFIG_KEXEC) += kexec.o |
13 | obj-$(CONFIG_PCI) += pci.o pci_dlpar.o | 11 | obj-$(CONFIG_PCI) += pci.o pci_dlpar.o |
14 | obj-$(CONFIG_PSERIES_MSI) += msi.o | 12 | obj-$(CONFIG_PSERIES_MSI) += msi.o |
diff --git a/arch/powerpc/platforms/pseries/io_event_irq.c b/arch/powerpc/platforms/pseries/io_event_irq.c index ef9d9d84c7d5..5ea88d1541f7 100644 --- a/arch/powerpc/platforms/pseries/io_event_irq.c +++ b/arch/powerpc/platforms/pseries/io_event_irq.c | |||
@@ -115,7 +115,7 @@ static struct pseries_io_event * ioei_find_event(struct rtas_error_log *elog) | |||
115 | * by scope or event type alone. For example, Torrent ISR route change | 115 | * by scope or event type alone. For example, Torrent ISR route change |
116 | * event is reported with scope 0x00 (Not Applicatable) rather than | 116 | * event is reported with scope 0x00 (Not Applicatable) rather than |
117 | * 0x3B (Torrent-hub). It is better to let the clients to identify | 117 | * 0x3B (Torrent-hub). It is better to let the clients to identify |
118 | * who owns the the event. | 118 | * who owns the event. |
119 | */ | 119 | */ |
120 | 120 | ||
121 | static irqreturn_t ioei_interrupt(int irq, void *dev_id) | 121 | static irqreturn_t ioei_interrupt(int irq, void *dev_id) |
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 86ae364900d6..23fc1dcf4434 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c | |||
@@ -614,6 +614,7 @@ static void pci_dma_bus_setup_pSeries(struct pci_bus *bus) | |||
614 | 614 | ||
615 | iommu_table_setparms(pci->phb, dn, tbl); | 615 | iommu_table_setparms(pci->phb, dn, tbl); |
616 | pci->iommu_table = iommu_init_table(tbl, pci->phb->node); | 616 | pci->iommu_table = iommu_init_table(tbl, pci->phb->node); |
617 | iommu_register_group(tbl, pci_domain_nr(bus), 0); | ||
617 | 618 | ||
618 | /* Divide the rest (1.75GB) among the children */ | 619 | /* Divide the rest (1.75GB) among the children */ |
619 | pci->phb->dma_window_size = 0x80000000ul; | 620 | pci->phb->dma_window_size = 0x80000000ul; |
@@ -658,6 +659,7 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus) | |||
658 | ppci->phb->node); | 659 | ppci->phb->node); |
659 | iommu_table_setparms_lpar(ppci->phb, pdn, tbl, dma_window); | 660 | iommu_table_setparms_lpar(ppci->phb, pdn, tbl, dma_window); |
660 | ppci->iommu_table = iommu_init_table(tbl, ppci->phb->node); | 661 | ppci->iommu_table = iommu_init_table(tbl, ppci->phb->node); |
662 | iommu_register_group(tbl, pci_domain_nr(bus), 0); | ||
661 | pr_debug(" created table: %p\n", ppci->iommu_table); | 663 | pr_debug(" created table: %p\n", ppci->iommu_table); |
662 | } | 664 | } |
663 | } | 665 | } |
@@ -684,6 +686,7 @@ static void pci_dma_dev_setup_pSeries(struct pci_dev *dev) | |||
684 | phb->node); | 686 | phb->node); |
685 | iommu_table_setparms(phb, dn, tbl); | 687 | iommu_table_setparms(phb, dn, tbl); |
686 | PCI_DN(dn)->iommu_table = iommu_init_table(tbl, phb->node); | 688 | PCI_DN(dn)->iommu_table = iommu_init_table(tbl, phb->node); |
689 | iommu_register_group(tbl, pci_domain_nr(phb->bus), 0); | ||
687 | set_iommu_table_base(&dev->dev, PCI_DN(dn)->iommu_table); | 690 | set_iommu_table_base(&dev->dev, PCI_DN(dn)->iommu_table); |
688 | return; | 691 | return; |
689 | } | 692 | } |
@@ -1184,6 +1187,7 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev) | |||
1184 | pci->phb->node); | 1187 | pci->phb->node); |
1185 | iommu_table_setparms_lpar(pci->phb, pdn, tbl, dma_window); | 1188 | iommu_table_setparms_lpar(pci->phb, pdn, tbl, dma_window); |
1186 | pci->iommu_table = iommu_init_table(tbl, pci->phb->node); | 1189 | pci->iommu_table = iommu_init_table(tbl, pci->phb->node); |
1190 | iommu_register_group(tbl, pci_domain_nr(pci->phb->bus), 0); | ||
1187 | pr_debug(" created table: %p\n", pci->iommu_table); | 1191 | pr_debug(" created table: %p\n", pci->iommu_table); |
1188 | } else { | 1192 | } else { |
1189 | pr_debug(" found DMA window, table: %p\n", pci->iommu_table); | 1193 | pr_debug(" found DMA window, table: %p\n", pci->iommu_table); |
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 6d62072a7d5a..02d6e21619bb 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c | |||
@@ -45,6 +45,13 @@ | |||
45 | #include "plpar_wrappers.h" | 45 | #include "plpar_wrappers.h" |
46 | #include "pseries.h" | 46 | #include "pseries.h" |
47 | 47 | ||
48 | /* Flag bits for H_BULK_REMOVE */ | ||
49 | #define HBR_REQUEST 0x4000000000000000UL | ||
50 | #define HBR_RESPONSE 0x8000000000000000UL | ||
51 | #define HBR_END 0xc000000000000000UL | ||
52 | #define HBR_AVPN 0x0200000000000000UL | ||
53 | #define HBR_ANDCOND 0x0100000000000000UL | ||
54 | |||
48 | 55 | ||
49 | /* in hvCall.S */ | 56 | /* in hvCall.S */ |
50 | EXPORT_SYMBOL(plpar_hcall); | 57 | EXPORT_SYMBOL(plpar_hcall); |
@@ -64,6 +71,9 @@ void vpa_init(int cpu) | |||
64 | if (cpu_has_feature(CPU_FTR_ALTIVEC)) | 71 | if (cpu_has_feature(CPU_FTR_ALTIVEC)) |
65 | lppaca_of(cpu).vmxregs_in_use = 1; | 72 | lppaca_of(cpu).vmxregs_in_use = 1; |
66 | 73 | ||
74 | if (cpu_has_feature(CPU_FTR_ARCH_207S)) | ||
75 | lppaca_of(cpu).ebb_regs_in_use = 1; | ||
76 | |||
67 | addr = __pa(&lppaca_of(cpu)); | 77 | addr = __pa(&lppaca_of(cpu)); |
68 | ret = register_vpa(hwcpu, addr); | 78 | ret = register_vpa(hwcpu, addr); |
69 | 79 | ||
@@ -240,7 +250,8 @@ static void pSeries_lpar_hptab_clear(void) | |||
240 | static long pSeries_lpar_hpte_updatepp(unsigned long slot, | 250 | static long pSeries_lpar_hpte_updatepp(unsigned long slot, |
241 | unsigned long newpp, | 251 | unsigned long newpp, |
242 | unsigned long vpn, | 252 | unsigned long vpn, |
243 | int psize, int ssize, int local) | 253 | int psize, int apsize, |
254 | int ssize, int local) | ||
244 | { | 255 | { |
245 | unsigned long lpar_rc; | 256 | unsigned long lpar_rc; |
246 | unsigned long flags = (newpp & 7) | H_AVPN; | 257 | unsigned long flags = (newpp & 7) | H_AVPN; |
@@ -328,7 +339,8 @@ static void pSeries_lpar_hpte_updateboltedpp(unsigned long newpp, | |||
328 | } | 339 | } |
329 | 340 | ||
330 | static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long vpn, | 341 | static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long vpn, |
331 | int psize, int ssize, int local) | 342 | int psize, int apsize, |
343 | int ssize, int local) | ||
332 | { | 344 | { |
333 | unsigned long want_v; | 345 | unsigned long want_v; |
334 | unsigned long lpar_rc; | 346 | unsigned long lpar_rc; |
@@ -345,6 +357,113 @@ static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long vpn, | |||
345 | BUG_ON(lpar_rc != H_SUCCESS); | 357 | BUG_ON(lpar_rc != H_SUCCESS); |
346 | } | 358 | } |
347 | 359 | ||
360 | /* | ||
361 | * Limit iterations holding pSeries_lpar_tlbie_lock to 3. We also need | ||
362 | * to make sure that we avoid bouncing the hypervisor tlbie lock. | ||
363 | */ | ||
364 | #define PPC64_HUGE_HPTE_BATCH 12 | ||
365 | |||
366 | static void __pSeries_lpar_hugepage_invalidate(unsigned long *slot, | ||
367 | unsigned long *vpn, int count, | ||
368 | int psize, int ssize) | ||
369 | { | ||
370 | unsigned long param[8]; | ||
371 | int i = 0, pix = 0, rc; | ||
372 | unsigned long flags = 0; | ||
373 | int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); | ||
374 | |||
375 | if (lock_tlbie) | ||
376 | spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags); | ||
377 | |||
378 | for (i = 0; i < count; i++) { | ||
379 | |||
380 | if (!firmware_has_feature(FW_FEATURE_BULK_REMOVE)) { | ||
381 | pSeries_lpar_hpte_invalidate(slot[i], vpn[i], psize, 0, | ||
382 | ssize, 0); | ||
383 | } else { | ||
384 | param[pix] = HBR_REQUEST | HBR_AVPN | slot[i]; | ||
385 | param[pix+1] = hpte_encode_avpn(vpn[i], psize, ssize); | ||
386 | pix += 2; | ||
387 | if (pix == 8) { | ||
388 | rc = plpar_hcall9(H_BULK_REMOVE, param, | ||
389 | param[0], param[1], param[2], | ||
390 | param[3], param[4], param[5], | ||
391 | param[6], param[7]); | ||
392 | BUG_ON(rc != H_SUCCESS); | ||
393 | pix = 0; | ||
394 | } | ||
395 | } | ||
396 | } | ||
397 | if (pix) { | ||
398 | param[pix] = HBR_END; | ||
399 | rc = plpar_hcall9(H_BULK_REMOVE, param, param[0], param[1], | ||
400 | param[2], param[3], param[4], param[5], | ||
401 | param[6], param[7]); | ||
402 | BUG_ON(rc != H_SUCCESS); | ||
403 | } | ||
404 | |||
405 | if (lock_tlbie) | ||
406 | spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags); | ||
407 | } | ||
408 | |||
409 | static void pSeries_lpar_hugepage_invalidate(struct mm_struct *mm, | ||
410 | unsigned char *hpte_slot_array, | ||
411 | unsigned long addr, int psize) | ||
412 | { | ||
413 | int ssize = 0, i, index = 0; | ||
414 | unsigned long s_addr = addr; | ||
415 | unsigned int max_hpte_count, valid; | ||
416 | unsigned long vpn_array[PPC64_HUGE_HPTE_BATCH]; | ||
417 | unsigned long slot_array[PPC64_HUGE_HPTE_BATCH]; | ||
418 | unsigned long shift, hidx, vpn = 0, vsid, hash, slot; | ||
419 | |||
420 | shift = mmu_psize_defs[psize].shift; | ||
421 | max_hpte_count = 1U << (PMD_SHIFT - shift); | ||
422 | |||
423 | for (i = 0; i < max_hpte_count; i++) { | ||
424 | valid = hpte_valid(hpte_slot_array, i); | ||
425 | if (!valid) | ||
426 | continue; | ||
427 | hidx = hpte_hash_index(hpte_slot_array, i); | ||
428 | |||
429 | /* get the vpn */ | ||
430 | addr = s_addr + (i * (1ul << shift)); | ||
431 | if (!is_kernel_addr(addr)) { | ||
432 | ssize = user_segment_size(addr); | ||
433 | vsid = get_vsid(mm->context.id, addr, ssize); | ||
434 | WARN_ON(vsid == 0); | ||
435 | } else { | ||
436 | vsid = get_kernel_vsid(addr, mmu_kernel_ssize); | ||
437 | ssize = mmu_kernel_ssize; | ||
438 | } | ||
439 | |||
440 | vpn = hpt_vpn(addr, vsid, ssize); | ||
441 | hash = hpt_hash(vpn, shift, ssize); | ||
442 | if (hidx & _PTEIDX_SECONDARY) | ||
443 | hash = ~hash; | ||
444 | |||
445 | slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; | ||
446 | slot += hidx & _PTEIDX_GROUP_IX; | ||
447 | |||
448 | slot_array[index] = slot; | ||
449 | vpn_array[index] = vpn; | ||
450 | if (index == PPC64_HUGE_HPTE_BATCH - 1) { | ||
451 | /* | ||
452 | * Now do a bluk invalidate | ||
453 | */ | ||
454 | __pSeries_lpar_hugepage_invalidate(slot_array, | ||
455 | vpn_array, | ||
456 | PPC64_HUGE_HPTE_BATCH, | ||
457 | psize, ssize); | ||
458 | index = 0; | ||
459 | } else | ||
460 | index++; | ||
461 | } | ||
462 | if (index) | ||
463 | __pSeries_lpar_hugepage_invalidate(slot_array, vpn_array, | ||
464 | index, psize, ssize); | ||
465 | } | ||
466 | |||
348 | static void pSeries_lpar_hpte_removebolted(unsigned long ea, | 467 | static void pSeries_lpar_hpte_removebolted(unsigned long ea, |
349 | int psize, int ssize) | 468 | int psize, int ssize) |
350 | { | 469 | { |
@@ -356,17 +475,12 @@ static void pSeries_lpar_hpte_removebolted(unsigned long ea, | |||
356 | 475 | ||
357 | slot = pSeries_lpar_hpte_find(vpn, psize, ssize); | 476 | slot = pSeries_lpar_hpte_find(vpn, psize, ssize); |
358 | BUG_ON(slot == -1); | 477 | BUG_ON(slot == -1); |
359 | 478 | /* | |
360 | pSeries_lpar_hpte_invalidate(slot, vpn, psize, ssize, 0); | 479 | * lpar doesn't use the passed actual page size |
480 | */ | ||
481 | pSeries_lpar_hpte_invalidate(slot, vpn, psize, 0, ssize, 0); | ||
361 | } | 482 | } |
362 | 483 | ||
363 | /* Flag bits for H_BULK_REMOVE */ | ||
364 | #define HBR_REQUEST 0x4000000000000000UL | ||
365 | #define HBR_RESPONSE 0x8000000000000000UL | ||
366 | #define HBR_END 0xc000000000000000UL | ||
367 | #define HBR_AVPN 0x0200000000000000UL | ||
368 | #define HBR_ANDCOND 0x0100000000000000UL | ||
369 | |||
370 | /* | 484 | /* |
371 | * Take a spinlock around flushes to avoid bouncing the hypervisor tlbie | 485 | * Take a spinlock around flushes to avoid bouncing the hypervisor tlbie |
372 | * lock. | 486 | * lock. |
@@ -400,8 +514,11 @@ static void pSeries_lpar_flush_hash_range(unsigned long number, int local) | |||
400 | slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; | 514 | slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; |
401 | slot += hidx & _PTEIDX_GROUP_IX; | 515 | slot += hidx & _PTEIDX_GROUP_IX; |
402 | if (!firmware_has_feature(FW_FEATURE_BULK_REMOVE)) { | 516 | if (!firmware_has_feature(FW_FEATURE_BULK_REMOVE)) { |
517 | /* | ||
518 | * lpar doesn't use the passed actual page size | ||
519 | */ | ||
403 | pSeries_lpar_hpte_invalidate(slot, vpn, psize, | 520 | pSeries_lpar_hpte_invalidate(slot, vpn, psize, |
404 | ssize, local); | 521 | 0, ssize, local); |
405 | } else { | 522 | } else { |
406 | param[pix] = HBR_REQUEST | HBR_AVPN | slot; | 523 | param[pix] = HBR_REQUEST | HBR_AVPN | slot; |
407 | param[pix+1] = hpte_encode_avpn(vpn, psize, | 524 | param[pix+1] = hpte_encode_avpn(vpn, psize, |
@@ -452,6 +569,7 @@ void __init hpte_init_lpar(void) | |||
452 | ppc_md.hpte_removebolted = pSeries_lpar_hpte_removebolted; | 569 | ppc_md.hpte_removebolted = pSeries_lpar_hpte_removebolted; |
453 | ppc_md.flush_hash_range = pSeries_lpar_flush_hash_range; | 570 | ppc_md.flush_hash_range = pSeries_lpar_flush_hash_range; |
454 | ppc_md.hpte_clear_all = pSeries_lpar_hptab_clear; | 571 | ppc_md.hpte_clear_all = pSeries_lpar_hptab_clear; |
572 | ppc_md.hugepage_invalidate = pSeries_lpar_hugepage_invalidate; | ||
455 | } | 573 | } |
456 | 574 | ||
457 | #ifdef CONFIG_PPC_SMLPAR | 575 | #ifdef CONFIG_PPC_SMLPAR |
diff --git a/arch/powerpc/platforms/pseries/nvram.c b/arch/powerpc/platforms/pseries/nvram.c index 8733a86ad52e..9f8671a44551 100644 --- a/arch/powerpc/platforms/pseries/nvram.c +++ b/arch/powerpc/platforms/pseries/nvram.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <linux/spinlock.h> | 18 | #include <linux/spinlock.h> |
19 | #include <linux/slab.h> | 19 | #include <linux/slab.h> |
20 | #include <linux/kmsg_dump.h> | 20 | #include <linux/kmsg_dump.h> |
21 | #include <linux/pstore.h> | ||
21 | #include <linux/ctype.h> | 22 | #include <linux/ctype.h> |
22 | #include <linux/zlib.h> | 23 | #include <linux/zlib.h> |
23 | #include <asm/uaccess.h> | 24 | #include <asm/uaccess.h> |
@@ -29,6 +30,13 @@ | |||
29 | /* Max bytes to read/write in one go */ | 30 | /* Max bytes to read/write in one go */ |
30 | #define NVRW_CNT 0x20 | 31 | #define NVRW_CNT 0x20 |
31 | 32 | ||
33 | /* | ||
34 | * Set oops header version to distingush between old and new format header. | ||
35 | * lnx,oops-log partition max size is 4000, header version > 4000 will | ||
36 | * help in identifying new header. | ||
37 | */ | ||
38 | #define OOPS_HDR_VERSION 5000 | ||
39 | |||
32 | static unsigned int nvram_size; | 40 | static unsigned int nvram_size; |
33 | static int nvram_fetch, nvram_store; | 41 | static int nvram_fetch, nvram_store; |
34 | static char nvram_buf[NVRW_CNT]; /* assume this is in the first 4GB */ | 42 | static char nvram_buf[NVRW_CNT]; /* assume this is in the first 4GB */ |
@@ -45,20 +53,23 @@ struct nvram_os_partition { | |||
45 | int min_size; /* minimum acceptable size (0 means req_size) */ | 53 | int min_size; /* minimum acceptable size (0 means req_size) */ |
46 | long size; /* size of data portion (excluding err_log_info) */ | 54 | long size; /* size of data portion (excluding err_log_info) */ |
47 | long index; /* offset of data portion of partition */ | 55 | long index; /* offset of data portion of partition */ |
56 | bool os_partition; /* partition initialized by OS, not FW */ | ||
48 | }; | 57 | }; |
49 | 58 | ||
50 | static struct nvram_os_partition rtas_log_partition = { | 59 | static struct nvram_os_partition rtas_log_partition = { |
51 | .name = "ibm,rtas-log", | 60 | .name = "ibm,rtas-log", |
52 | .req_size = 2079, | 61 | .req_size = 2079, |
53 | .min_size = 1055, | 62 | .min_size = 1055, |
54 | .index = -1 | 63 | .index = -1, |
64 | .os_partition = true | ||
55 | }; | 65 | }; |
56 | 66 | ||
57 | static struct nvram_os_partition oops_log_partition = { | 67 | static struct nvram_os_partition oops_log_partition = { |
58 | .name = "lnx,oops-log", | 68 | .name = "lnx,oops-log", |
59 | .req_size = 4000, | 69 | .req_size = 4000, |
60 | .min_size = 2000, | 70 | .min_size = 2000, |
61 | .index = -1 | 71 | .index = -1, |
72 | .os_partition = true | ||
62 | }; | 73 | }; |
63 | 74 | ||
64 | static const char *pseries_nvram_os_partitions[] = { | 75 | static const char *pseries_nvram_os_partitions[] = { |
@@ -67,6 +78,12 @@ static const char *pseries_nvram_os_partitions[] = { | |||
67 | NULL | 78 | NULL |
68 | }; | 79 | }; |
69 | 80 | ||
81 | struct oops_log_info { | ||
82 | u16 version; | ||
83 | u16 report_length; | ||
84 | u64 timestamp; | ||
85 | } __attribute__((packed)); | ||
86 | |||
70 | static void oops_to_nvram(struct kmsg_dumper *dumper, | 87 | static void oops_to_nvram(struct kmsg_dumper *dumper, |
71 | enum kmsg_dump_reason reason); | 88 | enum kmsg_dump_reason reason); |
72 | 89 | ||
@@ -83,28 +100,28 @@ static unsigned long last_unread_rtas_event; /* timestamp */ | |||
83 | 100 | ||
84 | * big_oops_buf[] holds the uncompressed text we're capturing. | 101 | * big_oops_buf[] holds the uncompressed text we're capturing. |
85 | * | 102 | * |
86 | * oops_buf[] holds the compressed text, preceded by a prefix. | 103 | * oops_buf[] holds the compressed text, preceded by a oops header. |
87 | * The prefix is just a u16 holding the length of the compressed* text. | 104 | * oops header has u16 holding the version of oops header (to differentiate |
88 | * (*Or uncompressed, if compression fails.) oops_buf[] gets written | 105 | * between old and new format header) followed by u16 holding the length of |
89 | * to NVRAM. | 106 | * the compressed* text (*Or uncompressed, if compression fails.) and u64 |
107 | * holding the timestamp. oops_buf[] gets written to NVRAM. | ||
90 | * | 108 | * |
91 | * oops_len points to the prefix. oops_data points to the compressed text. | 109 | * oops_log_info points to the header. oops_data points to the compressed text. |
92 | * | 110 | * |
93 | * +- oops_buf | 111 | * +- oops_buf |
94 | * | +- oops_data | 112 | * | +- oops_data |
95 | * v v | 113 | * v v |
96 | * +------------+-----------------------------------------------+ | 114 | * +-----------+-----------+-----------+------------------------+ |
97 | * | length | text | | 115 | * | version | length | timestamp | text | |
98 | * | (2 bytes) | (oops_data_sz bytes) | | 116 | * | (2 bytes) | (2 bytes) | (8 bytes) | (oops_data_sz bytes) | |
99 | * +------------+-----------------------------------------------+ | 117 | * +-----------+-----------+-----------+------------------------+ |
100 | * ^ | 118 | * ^ |
101 | * +- oops_len | 119 | * +- oops_log_info |
102 | * | 120 | * |
103 | * We preallocate these buffers during init to avoid kmalloc during oops/panic. | 121 | * We preallocate these buffers during init to avoid kmalloc during oops/panic. |
104 | */ | 122 | */ |
105 | static size_t big_oops_buf_sz; | 123 | static size_t big_oops_buf_sz; |
106 | static char *big_oops_buf, *oops_buf; | 124 | static char *big_oops_buf, *oops_buf; |
107 | static u16 *oops_len; | ||
108 | static char *oops_data; | 125 | static char *oops_data; |
109 | static size_t oops_data_sz; | 126 | static size_t oops_data_sz; |
110 | 127 | ||
@@ -114,6 +131,30 @@ static size_t oops_data_sz; | |||
114 | #define MEM_LEVEL 4 | 131 | #define MEM_LEVEL 4 |
115 | static struct z_stream_s stream; | 132 | static struct z_stream_s stream; |
116 | 133 | ||
134 | #ifdef CONFIG_PSTORE | ||
135 | static struct nvram_os_partition of_config_partition = { | ||
136 | .name = "of-config", | ||
137 | .index = -1, | ||
138 | .os_partition = false | ||
139 | }; | ||
140 | |||
141 | static struct nvram_os_partition common_partition = { | ||
142 | .name = "common", | ||
143 | .index = -1, | ||
144 | .os_partition = false | ||
145 | }; | ||
146 | |||
147 | static enum pstore_type_id nvram_type_ids[] = { | ||
148 | PSTORE_TYPE_DMESG, | ||
149 | PSTORE_TYPE_PPC_RTAS, | ||
150 | PSTORE_TYPE_PPC_OF, | ||
151 | PSTORE_TYPE_PPC_COMMON, | ||
152 | -1 | ||
153 | }; | ||
154 | static int read_type; | ||
155 | static unsigned long last_rtas_event; | ||
156 | #endif | ||
157 | |||
117 | static ssize_t pSeries_nvram_read(char *buf, size_t count, loff_t *index) | 158 | static ssize_t pSeries_nvram_read(char *buf, size_t count, loff_t *index) |
118 | { | 159 | { |
119 | unsigned int i; | 160 | unsigned int i; |
@@ -275,48 +316,72 @@ int nvram_write_error_log(char * buff, int length, | |||
275 | { | 316 | { |
276 | int rc = nvram_write_os_partition(&rtas_log_partition, buff, length, | 317 | int rc = nvram_write_os_partition(&rtas_log_partition, buff, length, |
277 | err_type, error_log_cnt); | 318 | err_type, error_log_cnt); |
278 | if (!rc) | 319 | if (!rc) { |
279 | last_unread_rtas_event = get_seconds(); | 320 | last_unread_rtas_event = get_seconds(); |
321 | #ifdef CONFIG_PSTORE | ||
322 | last_rtas_event = get_seconds(); | ||
323 | #endif | ||
324 | } | ||
325 | |||
280 | return rc; | 326 | return rc; |
281 | } | 327 | } |
282 | 328 | ||
283 | /* nvram_read_error_log | 329 | /* nvram_read_partition |
284 | * | 330 | * |
285 | * Reads nvram for error log for at most 'length' | 331 | * Reads nvram partition for at most 'length' |
286 | */ | 332 | */ |
287 | int nvram_read_error_log(char * buff, int length, | 333 | int nvram_read_partition(struct nvram_os_partition *part, char *buff, |
288 | unsigned int * err_type, unsigned int * error_log_cnt) | 334 | int length, unsigned int *err_type, |
335 | unsigned int *error_log_cnt) | ||
289 | { | 336 | { |
290 | int rc; | 337 | int rc; |
291 | loff_t tmp_index; | 338 | loff_t tmp_index; |
292 | struct err_log_info info; | 339 | struct err_log_info info; |
293 | 340 | ||
294 | if (rtas_log_partition.index == -1) | 341 | if (part->index == -1) |
295 | return -1; | 342 | return -1; |
296 | 343 | ||
297 | if (length > rtas_log_partition.size) | 344 | if (length > part->size) |
298 | length = rtas_log_partition.size; | 345 | length = part->size; |
299 | 346 | ||
300 | tmp_index = rtas_log_partition.index; | 347 | tmp_index = part->index; |
301 | 348 | ||
302 | rc = ppc_md.nvram_read((char *)&info, sizeof(struct err_log_info), &tmp_index); | 349 | if (part->os_partition) { |
303 | if (rc <= 0) { | 350 | rc = ppc_md.nvram_read((char *)&info, |
304 | printk(KERN_ERR "nvram_read_error_log: Failed nvram_read (%d)\n", rc); | 351 | sizeof(struct err_log_info), |
305 | return rc; | 352 | &tmp_index); |
353 | if (rc <= 0) { | ||
354 | pr_err("%s: Failed nvram_read (%d)\n", __FUNCTION__, | ||
355 | rc); | ||
356 | return rc; | ||
357 | } | ||
306 | } | 358 | } |
307 | 359 | ||
308 | rc = ppc_md.nvram_read(buff, length, &tmp_index); | 360 | rc = ppc_md.nvram_read(buff, length, &tmp_index); |
309 | if (rc <= 0) { | 361 | if (rc <= 0) { |
310 | printk(KERN_ERR "nvram_read_error_log: Failed nvram_read (%d)\n", rc); | 362 | pr_err("%s: Failed nvram_read (%d)\n", __FUNCTION__, rc); |
311 | return rc; | 363 | return rc; |
312 | } | 364 | } |
313 | 365 | ||
314 | *error_log_cnt = info.seq_num; | 366 | if (part->os_partition) { |
315 | *err_type = info.error_type; | 367 | *error_log_cnt = info.seq_num; |
368 | *err_type = info.error_type; | ||
369 | } | ||
316 | 370 | ||
317 | return 0; | 371 | return 0; |
318 | } | 372 | } |
319 | 373 | ||
374 | /* nvram_read_error_log | ||
375 | * | ||
376 | * Reads nvram for error log for at most 'length' | ||
377 | */ | ||
378 | int nvram_read_error_log(char *buff, int length, | ||
379 | unsigned int *err_type, unsigned int *error_log_cnt) | ||
380 | { | ||
381 | return nvram_read_partition(&rtas_log_partition, buff, length, | ||
382 | err_type, error_log_cnt); | ||
383 | } | ||
384 | |||
320 | /* This doesn't actually zero anything, but it sets the event_logged | 385 | /* This doesn't actually zero anything, but it sets the event_logged |
321 | * word to tell that this event is safely in syslog. | 386 | * word to tell that this event is safely in syslog. |
322 | */ | 387 | */ |
@@ -405,6 +470,349 @@ static int __init pseries_nvram_init_os_partition(struct nvram_os_partition | |||
405 | return 0; | 470 | return 0; |
406 | } | 471 | } |
407 | 472 | ||
473 | /* | ||
474 | * Are we using the ibm,rtas-log for oops/panic reports? And if so, | ||
475 | * would logging this oops/panic overwrite an RTAS event that rtas_errd | ||
476 | * hasn't had a chance to read and process? Return 1 if so, else 0. | ||
477 | * | ||
478 | * We assume that if rtas_errd hasn't read the RTAS event in | ||
479 | * NVRAM_RTAS_READ_TIMEOUT seconds, it's probably not going to. | ||
480 | */ | ||
481 | static int clobbering_unread_rtas_event(void) | ||
482 | { | ||
483 | return (oops_log_partition.index == rtas_log_partition.index | ||
484 | && last_unread_rtas_event | ||
485 | && get_seconds() - last_unread_rtas_event <= | ||
486 | NVRAM_RTAS_READ_TIMEOUT); | ||
487 | } | ||
488 | |||
489 | /* Derived from logfs_compress() */ | ||
490 | static int nvram_compress(const void *in, void *out, size_t inlen, | ||
491 | size_t outlen) | ||
492 | { | ||
493 | int err, ret; | ||
494 | |||
495 | ret = -EIO; | ||
496 | err = zlib_deflateInit2(&stream, COMPR_LEVEL, Z_DEFLATED, WINDOW_BITS, | ||
497 | MEM_LEVEL, Z_DEFAULT_STRATEGY); | ||
498 | if (err != Z_OK) | ||
499 | goto error; | ||
500 | |||
501 | stream.next_in = in; | ||
502 | stream.avail_in = inlen; | ||
503 | stream.total_in = 0; | ||
504 | stream.next_out = out; | ||
505 | stream.avail_out = outlen; | ||
506 | stream.total_out = 0; | ||
507 | |||
508 | err = zlib_deflate(&stream, Z_FINISH); | ||
509 | if (err != Z_STREAM_END) | ||
510 | goto error; | ||
511 | |||
512 | err = zlib_deflateEnd(&stream); | ||
513 | if (err != Z_OK) | ||
514 | goto error; | ||
515 | |||
516 | if (stream.total_out >= stream.total_in) | ||
517 | goto error; | ||
518 | |||
519 | ret = stream.total_out; | ||
520 | error: | ||
521 | return ret; | ||
522 | } | ||
523 | |||
524 | /* Compress the text from big_oops_buf into oops_buf. */ | ||
525 | static int zip_oops(size_t text_len) | ||
526 | { | ||
527 | struct oops_log_info *oops_hdr = (struct oops_log_info *)oops_buf; | ||
528 | int zipped_len = nvram_compress(big_oops_buf, oops_data, text_len, | ||
529 | oops_data_sz); | ||
530 | if (zipped_len < 0) { | ||
531 | pr_err("nvram: compression failed; returned %d\n", zipped_len); | ||
532 | pr_err("nvram: logging uncompressed oops/panic report\n"); | ||
533 | return -1; | ||
534 | } | ||
535 | oops_hdr->version = OOPS_HDR_VERSION; | ||
536 | oops_hdr->report_length = (u16) zipped_len; | ||
537 | oops_hdr->timestamp = get_seconds(); | ||
538 | return 0; | ||
539 | } | ||
540 | |||
541 | #ifdef CONFIG_PSTORE | ||
542 | /* Derived from logfs_uncompress */ | ||
543 | int nvram_decompress(void *in, void *out, size_t inlen, size_t outlen) | ||
544 | { | ||
545 | int err, ret; | ||
546 | |||
547 | ret = -EIO; | ||
548 | err = zlib_inflateInit(&stream); | ||
549 | if (err != Z_OK) | ||
550 | goto error; | ||
551 | |||
552 | stream.next_in = in; | ||
553 | stream.avail_in = inlen; | ||
554 | stream.total_in = 0; | ||
555 | stream.next_out = out; | ||
556 | stream.avail_out = outlen; | ||
557 | stream.total_out = 0; | ||
558 | |||
559 | err = zlib_inflate(&stream, Z_FINISH); | ||
560 | if (err != Z_STREAM_END) | ||
561 | goto error; | ||
562 | |||
563 | err = zlib_inflateEnd(&stream); | ||
564 | if (err != Z_OK) | ||
565 | goto error; | ||
566 | |||
567 | ret = stream.total_out; | ||
568 | error: | ||
569 | return ret; | ||
570 | } | ||
571 | |||
572 | static int unzip_oops(char *oops_buf, char *big_buf) | ||
573 | { | ||
574 | struct oops_log_info *oops_hdr = (struct oops_log_info *)oops_buf; | ||
575 | u64 timestamp = oops_hdr->timestamp; | ||
576 | char *big_oops_data = NULL; | ||
577 | char *oops_data_buf = NULL; | ||
578 | size_t big_oops_data_sz; | ||
579 | int unzipped_len; | ||
580 | |||
581 | big_oops_data = big_buf + sizeof(struct oops_log_info); | ||
582 | big_oops_data_sz = big_oops_buf_sz - sizeof(struct oops_log_info); | ||
583 | oops_data_buf = oops_buf + sizeof(struct oops_log_info); | ||
584 | |||
585 | unzipped_len = nvram_decompress(oops_data_buf, big_oops_data, | ||
586 | oops_hdr->report_length, | ||
587 | big_oops_data_sz); | ||
588 | |||
589 | if (unzipped_len < 0) { | ||
590 | pr_err("nvram: decompression failed; returned %d\n", | ||
591 | unzipped_len); | ||
592 | return -1; | ||
593 | } | ||
594 | oops_hdr = (struct oops_log_info *)big_buf; | ||
595 | oops_hdr->version = OOPS_HDR_VERSION; | ||
596 | oops_hdr->report_length = (u16) unzipped_len; | ||
597 | oops_hdr->timestamp = timestamp; | ||
598 | return 0; | ||
599 | } | ||
600 | |||
601 | static int nvram_pstore_open(struct pstore_info *psi) | ||
602 | { | ||
603 | /* Reset the iterator to start reading partitions again */ | ||
604 | read_type = -1; | ||
605 | return 0; | ||
606 | } | ||
607 | |||
608 | /** | ||
609 | * nvram_pstore_write - pstore write callback for nvram | ||
610 | * @type: Type of message logged | ||
611 | * @reason: reason behind dump (oops/panic) | ||
612 | * @id: identifier to indicate the write performed | ||
613 | * @part: pstore writes data to registered buffer in parts, | ||
614 | * part number will indicate the same. | ||
615 | * @count: Indicates oops count | ||
616 | * @hsize: Size of header added by pstore | ||
617 | * @size: number of bytes written to the registered buffer | ||
618 | * @psi: registered pstore_info structure | ||
619 | * | ||
620 | * Called by pstore_dump() when an oops or panic report is logged in the | ||
621 | * printk buffer. | ||
622 | * Returns 0 on successful write. | ||
623 | */ | ||
624 | static int nvram_pstore_write(enum pstore_type_id type, | ||
625 | enum kmsg_dump_reason reason, | ||
626 | u64 *id, unsigned int part, int count, | ||
627 | size_t hsize, size_t size, | ||
628 | struct pstore_info *psi) | ||
629 | { | ||
630 | int rc; | ||
631 | unsigned int err_type = ERR_TYPE_KERNEL_PANIC; | ||
632 | struct oops_log_info *oops_hdr = (struct oops_log_info *) oops_buf; | ||
633 | |||
634 | /* part 1 has the recent messages from printk buffer */ | ||
635 | if (part > 1 || type != PSTORE_TYPE_DMESG || | ||
636 | clobbering_unread_rtas_event()) | ||
637 | return -1; | ||
638 | |||
639 | oops_hdr->version = OOPS_HDR_VERSION; | ||
640 | oops_hdr->report_length = (u16) size; | ||
641 | oops_hdr->timestamp = get_seconds(); | ||
642 | |||
643 | if (big_oops_buf) { | ||
644 | rc = zip_oops(size); | ||
645 | /* | ||
646 | * If compression fails copy recent log messages from | ||
647 | * big_oops_buf to oops_data. | ||
648 | */ | ||
649 | if (rc != 0) { | ||
650 | size_t diff = size - oops_data_sz + hsize; | ||
651 | |||
652 | if (size > oops_data_sz) { | ||
653 | memcpy(oops_data, big_oops_buf, hsize); | ||
654 | memcpy(oops_data + hsize, big_oops_buf + diff, | ||
655 | oops_data_sz - hsize); | ||
656 | |||
657 | oops_hdr->report_length = (u16) oops_data_sz; | ||
658 | } else | ||
659 | memcpy(oops_data, big_oops_buf, size); | ||
660 | } else | ||
661 | err_type = ERR_TYPE_KERNEL_PANIC_GZ; | ||
662 | } | ||
663 | |||
664 | rc = nvram_write_os_partition(&oops_log_partition, oops_buf, | ||
665 | (int) (sizeof(*oops_hdr) + oops_hdr->report_length), err_type, | ||
666 | count); | ||
667 | |||
668 | if (rc != 0) | ||
669 | return rc; | ||
670 | |||
671 | *id = part; | ||
672 | return 0; | ||
673 | } | ||
674 | |||
675 | /* | ||
676 | * Reads the oops/panic report, rtas, of-config and common partition. | ||
677 | * Returns the length of the data we read from each partition. | ||
678 | * Returns 0 if we've been called before. | ||
679 | */ | ||
680 | static ssize_t nvram_pstore_read(u64 *id, enum pstore_type_id *type, | ||
681 | int *count, struct timespec *time, char **buf, | ||
682 | struct pstore_info *psi) | ||
683 | { | ||
684 | struct oops_log_info *oops_hdr; | ||
685 | unsigned int err_type, id_no, size = 0; | ||
686 | struct nvram_os_partition *part = NULL; | ||
687 | char *buff = NULL, *big_buff = NULL; | ||
688 | int rc, sig = 0; | ||
689 | loff_t p; | ||
690 | |||
691 | read_partition: | ||
692 | read_type++; | ||
693 | |||
694 | switch (nvram_type_ids[read_type]) { | ||
695 | case PSTORE_TYPE_DMESG: | ||
696 | part = &oops_log_partition; | ||
697 | *type = PSTORE_TYPE_DMESG; | ||
698 | break; | ||
699 | case PSTORE_TYPE_PPC_RTAS: | ||
700 | part = &rtas_log_partition; | ||
701 | *type = PSTORE_TYPE_PPC_RTAS; | ||
702 | time->tv_sec = last_rtas_event; | ||
703 | time->tv_nsec = 0; | ||
704 | break; | ||
705 | case PSTORE_TYPE_PPC_OF: | ||
706 | sig = NVRAM_SIG_OF; | ||
707 | part = &of_config_partition; | ||
708 | *type = PSTORE_TYPE_PPC_OF; | ||
709 | *id = PSTORE_TYPE_PPC_OF; | ||
710 | time->tv_sec = 0; | ||
711 | time->tv_nsec = 0; | ||
712 | break; | ||
713 | case PSTORE_TYPE_PPC_COMMON: | ||
714 | sig = NVRAM_SIG_SYS; | ||
715 | part = &common_partition; | ||
716 | *type = PSTORE_TYPE_PPC_COMMON; | ||
717 | *id = PSTORE_TYPE_PPC_COMMON; | ||
718 | time->tv_sec = 0; | ||
719 | time->tv_nsec = 0; | ||
720 | break; | ||
721 | default: | ||
722 | return 0; | ||
723 | } | ||
724 | |||
725 | if (!part->os_partition) { | ||
726 | p = nvram_find_partition(part->name, sig, &size); | ||
727 | if (p <= 0) { | ||
728 | pr_err("nvram: Failed to find partition %s, " | ||
729 | "err %d\n", part->name, (int)p); | ||
730 | return 0; | ||
731 | } | ||
732 | part->index = p; | ||
733 | part->size = size; | ||
734 | } | ||
735 | |||
736 | buff = kmalloc(part->size, GFP_KERNEL); | ||
737 | |||
738 | if (!buff) | ||
739 | return -ENOMEM; | ||
740 | |||
741 | if (nvram_read_partition(part, buff, part->size, &err_type, &id_no)) { | ||
742 | kfree(buff); | ||
743 | return 0; | ||
744 | } | ||
745 | |||
746 | *count = 0; | ||
747 | |||
748 | if (part->os_partition) | ||
749 | *id = id_no; | ||
750 | |||
751 | if (nvram_type_ids[read_type] == PSTORE_TYPE_DMESG) { | ||
752 | oops_hdr = (struct oops_log_info *)buff; | ||
753 | *buf = buff + sizeof(*oops_hdr); | ||
754 | |||
755 | if (err_type == ERR_TYPE_KERNEL_PANIC_GZ) { | ||
756 | big_buff = kmalloc(big_oops_buf_sz, GFP_KERNEL); | ||
757 | if (!big_buff) | ||
758 | return -ENOMEM; | ||
759 | |||
760 | rc = unzip_oops(buff, big_buff); | ||
761 | |||
762 | if (rc != 0) { | ||
763 | kfree(buff); | ||
764 | kfree(big_buff); | ||
765 | goto read_partition; | ||
766 | } | ||
767 | |||
768 | oops_hdr = (struct oops_log_info *)big_buff; | ||
769 | *buf = big_buff + sizeof(*oops_hdr); | ||
770 | kfree(buff); | ||
771 | } | ||
772 | |||
773 | time->tv_sec = oops_hdr->timestamp; | ||
774 | time->tv_nsec = 0; | ||
775 | return oops_hdr->report_length; | ||
776 | } | ||
777 | |||
778 | *buf = buff; | ||
779 | return part->size; | ||
780 | } | ||
781 | |||
782 | static struct pstore_info nvram_pstore_info = { | ||
783 | .owner = THIS_MODULE, | ||
784 | .name = "nvram", | ||
785 | .open = nvram_pstore_open, | ||
786 | .read = nvram_pstore_read, | ||
787 | .write = nvram_pstore_write, | ||
788 | }; | ||
789 | |||
790 | static int nvram_pstore_init(void) | ||
791 | { | ||
792 | int rc = 0; | ||
793 | |||
794 | if (big_oops_buf) { | ||
795 | nvram_pstore_info.buf = big_oops_buf; | ||
796 | nvram_pstore_info.bufsize = big_oops_buf_sz; | ||
797 | } else { | ||
798 | nvram_pstore_info.buf = oops_data; | ||
799 | nvram_pstore_info.bufsize = oops_data_sz; | ||
800 | } | ||
801 | |||
802 | rc = pstore_register(&nvram_pstore_info); | ||
803 | if (rc != 0) | ||
804 | pr_err("nvram: pstore_register() failed, defaults to " | ||
805 | "kmsg_dump; returned %d\n", rc); | ||
806 | |||
807 | return rc; | ||
808 | } | ||
809 | #else | ||
810 | static int nvram_pstore_init(void) | ||
811 | { | ||
812 | return -1; | ||
813 | } | ||
814 | #endif | ||
815 | |||
408 | static void __init nvram_init_oops_partition(int rtas_partition_exists) | 816 | static void __init nvram_init_oops_partition(int rtas_partition_exists) |
409 | { | 817 | { |
410 | int rc; | 818 | int rc; |
@@ -425,9 +833,8 @@ static void __init nvram_init_oops_partition(int rtas_partition_exists) | |||
425 | oops_log_partition.name); | 833 | oops_log_partition.name); |
426 | return; | 834 | return; |
427 | } | 835 | } |
428 | oops_len = (u16*) oops_buf; | 836 | oops_data = oops_buf + sizeof(struct oops_log_info); |
429 | oops_data = oops_buf + sizeof(u16); | 837 | oops_data_sz = oops_log_partition.size - sizeof(struct oops_log_info); |
430 | oops_data_sz = oops_log_partition.size - sizeof(u16); | ||
431 | 838 | ||
432 | /* | 839 | /* |
433 | * Figure compression (preceded by elimination of each line's <n> | 840 | * Figure compression (preceded by elimination of each line's <n> |
@@ -452,6 +859,11 @@ static void __init nvram_init_oops_partition(int rtas_partition_exists) | |||
452 | stream.workspace = NULL; | 859 | stream.workspace = NULL; |
453 | } | 860 | } |
454 | 861 | ||
862 | rc = nvram_pstore_init(); | ||
863 | |||
864 | if (!rc) | ||
865 | return; | ||
866 | |||
455 | rc = kmsg_dump_register(&nvram_kmsg_dumper); | 867 | rc = kmsg_dump_register(&nvram_kmsg_dumper); |
456 | if (rc != 0) { | 868 | if (rc != 0) { |
457 | pr_err("nvram: kmsg_dump_register() failed; returned %d\n", rc); | 869 | pr_err("nvram: kmsg_dump_register() failed; returned %d\n", rc); |
@@ -501,70 +913,6 @@ int __init pSeries_nvram_init(void) | |||
501 | return 0; | 913 | return 0; |
502 | } | 914 | } |
503 | 915 | ||
504 | /* | ||
505 | * Are we using the ibm,rtas-log for oops/panic reports? And if so, | ||
506 | * would logging this oops/panic overwrite an RTAS event that rtas_errd | ||
507 | * hasn't had a chance to read and process? Return 1 if so, else 0. | ||
508 | * | ||
509 | * We assume that if rtas_errd hasn't read the RTAS event in | ||
510 | * NVRAM_RTAS_READ_TIMEOUT seconds, it's probably not going to. | ||
511 | */ | ||
512 | static int clobbering_unread_rtas_event(void) | ||
513 | { | ||
514 | return (oops_log_partition.index == rtas_log_partition.index | ||
515 | && last_unread_rtas_event | ||
516 | && get_seconds() - last_unread_rtas_event <= | ||
517 | NVRAM_RTAS_READ_TIMEOUT); | ||
518 | } | ||
519 | |||
520 | /* Derived from logfs_compress() */ | ||
521 | static int nvram_compress(const void *in, void *out, size_t inlen, | ||
522 | size_t outlen) | ||
523 | { | ||
524 | int err, ret; | ||
525 | |||
526 | ret = -EIO; | ||
527 | err = zlib_deflateInit2(&stream, COMPR_LEVEL, Z_DEFLATED, WINDOW_BITS, | ||
528 | MEM_LEVEL, Z_DEFAULT_STRATEGY); | ||
529 | if (err != Z_OK) | ||
530 | goto error; | ||
531 | |||
532 | stream.next_in = in; | ||
533 | stream.avail_in = inlen; | ||
534 | stream.total_in = 0; | ||
535 | stream.next_out = out; | ||
536 | stream.avail_out = outlen; | ||
537 | stream.total_out = 0; | ||
538 | |||
539 | err = zlib_deflate(&stream, Z_FINISH); | ||
540 | if (err != Z_STREAM_END) | ||
541 | goto error; | ||
542 | |||
543 | err = zlib_deflateEnd(&stream); | ||
544 | if (err != Z_OK) | ||
545 | goto error; | ||
546 | |||
547 | if (stream.total_out >= stream.total_in) | ||
548 | goto error; | ||
549 | |||
550 | ret = stream.total_out; | ||
551 | error: | ||
552 | return ret; | ||
553 | } | ||
554 | |||
555 | /* Compress the text from big_oops_buf into oops_buf. */ | ||
556 | static int zip_oops(size_t text_len) | ||
557 | { | ||
558 | int zipped_len = nvram_compress(big_oops_buf, oops_data, text_len, | ||
559 | oops_data_sz); | ||
560 | if (zipped_len < 0) { | ||
561 | pr_err("nvram: compression failed; returned %d\n", zipped_len); | ||
562 | pr_err("nvram: logging uncompressed oops/panic report\n"); | ||
563 | return -1; | ||
564 | } | ||
565 | *oops_len = (u16) zipped_len; | ||
566 | return 0; | ||
567 | } | ||
568 | 916 | ||
569 | /* | 917 | /* |
570 | * This is our kmsg_dump callback, called after an oops or panic report | 918 | * This is our kmsg_dump callback, called after an oops or panic report |
@@ -576,6 +924,7 @@ static int zip_oops(size_t text_len) | |||
576 | static void oops_to_nvram(struct kmsg_dumper *dumper, | 924 | static void oops_to_nvram(struct kmsg_dumper *dumper, |
577 | enum kmsg_dump_reason reason) | 925 | enum kmsg_dump_reason reason) |
578 | { | 926 | { |
927 | struct oops_log_info *oops_hdr = (struct oops_log_info *)oops_buf; | ||
579 | static unsigned int oops_count = 0; | 928 | static unsigned int oops_count = 0; |
580 | static bool panicking = false; | 929 | static bool panicking = false; |
581 | static DEFINE_SPINLOCK(lock); | 930 | static DEFINE_SPINLOCK(lock); |
@@ -619,14 +968,17 @@ static void oops_to_nvram(struct kmsg_dumper *dumper, | |||
619 | } | 968 | } |
620 | if (rc != 0) { | 969 | if (rc != 0) { |
621 | kmsg_dump_rewind(dumper); | 970 | kmsg_dump_rewind(dumper); |
622 | kmsg_dump_get_buffer(dumper, true, | 971 | kmsg_dump_get_buffer(dumper, false, |
623 | oops_data, oops_data_sz, &text_len); | 972 | oops_data, oops_data_sz, &text_len); |
624 | err_type = ERR_TYPE_KERNEL_PANIC; | 973 | err_type = ERR_TYPE_KERNEL_PANIC; |
625 | *oops_len = (u16) text_len; | 974 | oops_hdr->version = OOPS_HDR_VERSION; |
975 | oops_hdr->report_length = (u16) text_len; | ||
976 | oops_hdr->timestamp = get_seconds(); | ||
626 | } | 977 | } |
627 | 978 | ||
628 | (void) nvram_write_os_partition(&oops_log_partition, oops_buf, | 979 | (void) nvram_write_os_partition(&oops_log_partition, oops_buf, |
629 | (int) (sizeof(*oops_len) + *oops_len), err_type, ++oops_count); | 980 | (int) (sizeof(*oops_hdr) + oops_hdr->report_length), err_type, |
981 | ++oops_count); | ||
630 | 982 | ||
631 | spin_unlock_irqrestore(&lock, flags); | 983 | spin_unlock_irqrestore(&lock, flags); |
632 | } | 984 | } |
diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c index c91b22be9288..efe61374f6ea 100644 --- a/arch/powerpc/platforms/pseries/pci_dlpar.c +++ b/arch/powerpc/platforms/pseries/pci_dlpar.c | |||
@@ -64,91 +64,6 @@ pcibios_find_pci_bus(struct device_node *dn) | |||
64 | } | 64 | } |
65 | EXPORT_SYMBOL_GPL(pcibios_find_pci_bus); | 65 | EXPORT_SYMBOL_GPL(pcibios_find_pci_bus); |
66 | 66 | ||
67 | /** | ||
68 | * __pcibios_remove_pci_devices - remove all devices under this bus | ||
69 | * @bus: the indicated PCI bus | ||
70 | * @purge_pe: destroy the PE on removal of PCI devices | ||
71 | * | ||
72 | * Remove all of the PCI devices under this bus both from the | ||
73 | * linux pci device tree, and from the powerpc EEH address cache. | ||
74 | * By default, the corresponding PE will be destroied during the | ||
75 | * normal PCI hotplug path. For PCI hotplug during EEH recovery, | ||
76 | * the corresponding PE won't be destroied and deallocated. | ||
77 | */ | ||
78 | void __pcibios_remove_pci_devices(struct pci_bus *bus, int purge_pe) | ||
79 | { | ||
80 | struct pci_dev *dev, *tmp; | ||
81 | struct pci_bus *child_bus; | ||
82 | |||
83 | /* First go down child busses */ | ||
84 | list_for_each_entry(child_bus, &bus->children, node) | ||
85 | __pcibios_remove_pci_devices(child_bus, purge_pe); | ||
86 | |||
87 | pr_debug("PCI: Removing devices on bus %04x:%02x\n", | ||
88 | pci_domain_nr(bus), bus->number); | ||
89 | list_for_each_entry_safe(dev, tmp, &bus->devices, bus_list) { | ||
90 | pr_debug(" * Removing %s...\n", pci_name(dev)); | ||
91 | eeh_remove_bus_device(dev, purge_pe); | ||
92 | pci_stop_and_remove_bus_device(dev); | ||
93 | } | ||
94 | } | ||
95 | |||
96 | /** | ||
97 | * pcibios_remove_pci_devices - remove all devices under this bus | ||
98 | * | ||
99 | * Remove all of the PCI devices under this bus both from the | ||
100 | * linux pci device tree, and from the powerpc EEH address cache. | ||
101 | */ | ||
102 | void pcibios_remove_pci_devices(struct pci_bus *bus) | ||
103 | { | ||
104 | __pcibios_remove_pci_devices(bus, 1); | ||
105 | } | ||
106 | EXPORT_SYMBOL_GPL(pcibios_remove_pci_devices); | ||
107 | |||
108 | /** | ||
109 | * pcibios_add_pci_devices - adds new pci devices to bus | ||
110 | * | ||
111 | * This routine will find and fixup new pci devices under | ||
112 | * the indicated bus. This routine presumes that there | ||
113 | * might already be some devices under this bridge, so | ||
114 | * it carefully tries to add only new devices. (And that | ||
115 | * is how this routine differs from other, similar pcibios | ||
116 | * routines.) | ||
117 | */ | ||
118 | void pcibios_add_pci_devices(struct pci_bus * bus) | ||
119 | { | ||
120 | int slotno, num, mode, pass, max; | ||
121 | struct pci_dev *dev; | ||
122 | struct device_node *dn = pci_bus_to_OF_node(bus); | ||
123 | |||
124 | eeh_add_device_tree_early(dn); | ||
125 | |||
126 | mode = PCI_PROBE_NORMAL; | ||
127 | if (ppc_md.pci_probe_mode) | ||
128 | mode = ppc_md.pci_probe_mode(bus); | ||
129 | |||
130 | if (mode == PCI_PROBE_DEVTREE) { | ||
131 | /* use ofdt-based probe */ | ||
132 | of_rescan_bus(dn, bus); | ||
133 | } else if (mode == PCI_PROBE_NORMAL) { | ||
134 | /* use legacy probe */ | ||
135 | slotno = PCI_SLOT(PCI_DN(dn->child)->devfn); | ||
136 | num = pci_scan_slot(bus, PCI_DEVFN(slotno, 0)); | ||
137 | if (!num) | ||
138 | return; | ||
139 | pcibios_setup_bus_devices(bus); | ||
140 | max = bus->busn_res.start; | ||
141 | for (pass=0; pass < 2; pass++) | ||
142 | list_for_each_entry(dev, &bus->devices, bus_list) { | ||
143 | if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE || | ||
144 | dev->hdr_type == PCI_HEADER_TYPE_CARDBUS) | ||
145 | max = pci_scan_bridge(bus, dev, max, pass); | ||
146 | } | ||
147 | } | ||
148 | pcibios_finish_adding_to_bus(bus); | ||
149 | } | ||
150 | EXPORT_SYMBOL_GPL(pcibios_add_pci_devices); | ||
151 | |||
152 | struct pci_controller *init_phb_dynamic(struct device_node *dn) | 67 | struct pci_controller *init_phb_dynamic(struct device_node *dn) |
153 | { | 68 | { |
154 | struct pci_controller *phb; | 69 | struct pci_controller *phb; |
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index c4dfccd3a3d9..7b3cbde8c783 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c | |||
@@ -83,7 +83,7 @@ static void handle_system_shutdown(char event_modifier) | |||
83 | switch (event_modifier) { | 83 | switch (event_modifier) { |
84 | case EPOW_SHUTDOWN_NORMAL: | 84 | case EPOW_SHUTDOWN_NORMAL: |
85 | pr_emerg("Firmware initiated power off"); | 85 | pr_emerg("Firmware initiated power off"); |
86 | orderly_poweroff(1); | 86 | orderly_poweroff(true); |
87 | break; | 87 | break; |
88 | 88 | ||
89 | case EPOW_SHUTDOWN_ON_UPS: | 89 | case EPOW_SHUTDOWN_ON_UPS: |
@@ -95,13 +95,13 @@ static void handle_system_shutdown(char event_modifier) | |||
95 | pr_emerg("Loss of system critical functions reported by " | 95 | pr_emerg("Loss of system critical functions reported by " |
96 | "firmware"); | 96 | "firmware"); |
97 | pr_emerg("Check RTAS error log for details"); | 97 | pr_emerg("Check RTAS error log for details"); |
98 | orderly_poweroff(1); | 98 | orderly_poweroff(true); |
99 | break; | 99 | break; |
100 | 100 | ||
101 | case EPOW_SHUTDOWN_AMBIENT_TEMPERATURE_TOO_HIGH: | 101 | case EPOW_SHUTDOWN_AMBIENT_TEMPERATURE_TOO_HIGH: |
102 | pr_emerg("Ambient temperature too high reported by firmware"); | 102 | pr_emerg("Ambient temperature too high reported by firmware"); |
103 | pr_emerg("Check RTAS error log for details"); | 103 | pr_emerg("Check RTAS error log for details"); |
104 | orderly_poweroff(1); | 104 | orderly_poweroff(true); |
105 | break; | 105 | break; |
106 | 106 | ||
107 | default: | 107 | default: |
@@ -162,7 +162,7 @@ void rtas_parse_epow_errlog(struct rtas_error_log *log) | |||
162 | 162 | ||
163 | case EPOW_SYSTEM_HALT: | 163 | case EPOW_SYSTEM_HALT: |
164 | pr_emerg("Firmware initiated power off"); | 164 | pr_emerg("Firmware initiated power off"); |
165 | orderly_poweroff(1); | 165 | orderly_poweroff(true); |
166 | break; | 166 | break; |
167 | 167 | ||
168 | case EPOW_MAIN_ENCLOSURE: | 168 | case EPOW_MAIN_ENCLOSURE: |
diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c index 12bc8c3663ad..306643cc9dbc 100644 --- a/arch/powerpc/platforms/pseries/smp.c +++ b/arch/powerpc/platforms/pseries/smp.c | |||
@@ -192,7 +192,7 @@ static int smp_pSeries_cpu_bootable(unsigned int nr) | |||
192 | /* Special case - we inhibit secondary thread startup | 192 | /* Special case - we inhibit secondary thread startup |
193 | * during boot if the user requests it. | 193 | * during boot if the user requests it. |
194 | */ | 194 | */ |
195 | if (system_state < SYSTEM_RUNNING && cpu_has_feature(CPU_FTR_SMT)) { | 195 | if (system_state == SYSTEM_BOOTING && cpu_has_feature(CPU_FTR_SMT)) { |
196 | if (!smt_enabled_at_boot && cpu_thread_in_core(nr) != 0) | 196 | if (!smt_enabled_at_boot && cpu_thread_in_core(nr) != 0) |
197 | return 0; | 197 | return 0; |
198 | if (smt_enabled_at_boot | 198 | if (smt_enabled_at_boot |
diff --git a/arch/powerpc/sysdev/Makefile b/arch/powerpc/sysdev/Makefile index 99464a7bdb3b..f67ac900d870 100644 --- a/arch/powerpc/sysdev/Makefile +++ b/arch/powerpc/sysdev/Makefile | |||
@@ -4,6 +4,8 @@ ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) | |||
4 | 4 | ||
5 | mpic-msi-obj-$(CONFIG_PCI_MSI) += mpic_msi.o mpic_u3msi.o mpic_pasemi_msi.o | 5 | mpic-msi-obj-$(CONFIG_PCI_MSI) += mpic_msi.o mpic_u3msi.o mpic_pasemi_msi.o |
6 | obj-$(CONFIG_MPIC) += mpic.o $(mpic-msi-obj-y) | 6 | obj-$(CONFIG_MPIC) += mpic.o $(mpic-msi-obj-y) |
7 | obj-$(CONFIG_MPIC_TIMER) += mpic_timer.o | ||
8 | obj-$(CONFIG_FSL_MPIC_TIMER_WAKEUP) += fsl_mpic_timer_wakeup.o | ||
7 | mpic-msgr-obj-$(CONFIG_MPIC_MSGR) += mpic_msgr.o | 9 | mpic-msgr-obj-$(CONFIG_MPIC_MSGR) += mpic_msgr.o |
8 | obj-$(CONFIG_MPIC) += mpic.o $(mpic-msi-obj-y) $(mpic-msgr-obj-y) | 10 | obj-$(CONFIG_MPIC) += mpic.o $(mpic-msi-obj-y) $(mpic-msgr-obj-y) |
9 | obj-$(CONFIG_PPC_EPAPR_HV_PIC) += ehv_pic.o | 11 | obj-$(CONFIG_PPC_EPAPR_HV_PIC) += ehv_pic.o |
diff --git a/arch/powerpc/sysdev/cpm1.c b/arch/powerpc/sysdev/cpm1.c index d4fa03f2b6ac..5e6ff38ea69f 100644 --- a/arch/powerpc/sysdev/cpm1.c +++ b/arch/powerpc/sysdev/cpm1.c | |||
@@ -120,6 +120,7 @@ static irqreturn_t cpm_error_interrupt(int irq, void *dev) | |||
120 | 120 | ||
121 | static struct irqaction cpm_error_irqaction = { | 121 | static struct irqaction cpm_error_irqaction = { |
122 | .handler = cpm_error_interrupt, | 122 | .handler = cpm_error_interrupt, |
123 | .flags = IRQF_NO_THREAD, | ||
123 | .name = "error", | 124 | .name = "error", |
124 | }; | 125 | }; |
125 | 126 | ||
diff --git a/arch/powerpc/sysdev/fsl_mpic_timer_wakeup.c b/arch/powerpc/sysdev/fsl_mpic_timer_wakeup.c new file mode 100644 index 000000000000..1707bf04dec6 --- /dev/null +++ b/arch/powerpc/sysdev/fsl_mpic_timer_wakeup.c | |||
@@ -0,0 +1,161 @@ | |||
1 | /* | ||
2 | * MPIC timer wakeup driver | ||
3 | * | ||
4 | * Copyright 2013 Freescale Semiconductor, Inc. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms of the GNU General Public License as published by the | ||
8 | * Free Software Foundation; either version 2 of the License, or (at your | ||
9 | * option) any later version. | ||
10 | */ | ||
11 | |||
12 | #include <linux/kernel.h> | ||
13 | #include <linux/slab.h> | ||
14 | #include <linux/errno.h> | ||
15 | #include <linux/module.h> | ||
16 | #include <linux/interrupt.h> | ||
17 | #include <linux/device.h> | ||
18 | |||
19 | #include <asm/mpic_timer.h> | ||
20 | #include <asm/mpic.h> | ||
21 | |||
22 | struct fsl_mpic_timer_wakeup { | ||
23 | struct mpic_timer *timer; | ||
24 | struct work_struct free_work; | ||
25 | }; | ||
26 | |||
27 | static struct fsl_mpic_timer_wakeup *fsl_wakeup; | ||
28 | static DEFINE_MUTEX(sysfs_lock); | ||
29 | |||
30 | static void fsl_free_resource(struct work_struct *ws) | ||
31 | { | ||
32 | struct fsl_mpic_timer_wakeup *wakeup = | ||
33 | container_of(ws, struct fsl_mpic_timer_wakeup, free_work); | ||
34 | |||
35 | mutex_lock(&sysfs_lock); | ||
36 | |||
37 | if (wakeup->timer) { | ||
38 | disable_irq_wake(wakeup->timer->irq); | ||
39 | mpic_free_timer(wakeup->timer); | ||
40 | } | ||
41 | |||
42 | wakeup->timer = NULL; | ||
43 | mutex_unlock(&sysfs_lock); | ||
44 | } | ||
45 | |||
46 | static irqreturn_t fsl_mpic_timer_irq(int irq, void *dev_id) | ||
47 | { | ||
48 | struct fsl_mpic_timer_wakeup *wakeup = dev_id; | ||
49 | |||
50 | schedule_work(&wakeup->free_work); | ||
51 | |||
52 | return wakeup->timer ? IRQ_HANDLED : IRQ_NONE; | ||
53 | } | ||
54 | |||
55 | static ssize_t fsl_timer_wakeup_show(struct device *dev, | ||
56 | struct device_attribute *attr, | ||
57 | char *buf) | ||
58 | { | ||
59 | struct timeval interval; | ||
60 | int val = 0; | ||
61 | |||
62 | mutex_lock(&sysfs_lock); | ||
63 | if (fsl_wakeup->timer) { | ||
64 | mpic_get_remain_time(fsl_wakeup->timer, &interval); | ||
65 | val = interval.tv_sec + 1; | ||
66 | } | ||
67 | mutex_unlock(&sysfs_lock); | ||
68 | |||
69 | return sprintf(buf, "%d\n", val); | ||
70 | } | ||
71 | |||
72 | static ssize_t fsl_timer_wakeup_store(struct device *dev, | ||
73 | struct device_attribute *attr, | ||
74 | const char *buf, | ||
75 | size_t count) | ||
76 | { | ||
77 | struct timeval interval; | ||
78 | int ret; | ||
79 | |||
80 | interval.tv_usec = 0; | ||
81 | if (kstrtol(buf, 0, &interval.tv_sec)) | ||
82 | return -EINVAL; | ||
83 | |||
84 | mutex_lock(&sysfs_lock); | ||
85 | |||
86 | if (fsl_wakeup->timer) { | ||
87 | disable_irq_wake(fsl_wakeup->timer->irq); | ||
88 | mpic_free_timer(fsl_wakeup->timer); | ||
89 | fsl_wakeup->timer = NULL; | ||
90 | } | ||
91 | |||
92 | if (!interval.tv_sec) { | ||
93 | mutex_unlock(&sysfs_lock); | ||
94 | return count; | ||
95 | } | ||
96 | |||
97 | fsl_wakeup->timer = mpic_request_timer(fsl_mpic_timer_irq, | ||
98 | fsl_wakeup, &interval); | ||
99 | if (!fsl_wakeup->timer) { | ||
100 | mutex_unlock(&sysfs_lock); | ||
101 | return -EINVAL; | ||
102 | } | ||
103 | |||
104 | ret = enable_irq_wake(fsl_wakeup->timer->irq); | ||
105 | if (ret) { | ||
106 | mpic_free_timer(fsl_wakeup->timer); | ||
107 | fsl_wakeup->timer = NULL; | ||
108 | mutex_unlock(&sysfs_lock); | ||
109 | |||
110 | return ret; | ||
111 | } | ||
112 | |||
113 | mpic_start_timer(fsl_wakeup->timer); | ||
114 | |||
115 | mutex_unlock(&sysfs_lock); | ||
116 | |||
117 | return count; | ||
118 | } | ||
119 | |||
120 | static struct device_attribute mpic_attributes = __ATTR(timer_wakeup, 0644, | ||
121 | fsl_timer_wakeup_show, fsl_timer_wakeup_store); | ||
122 | |||
123 | static int __init fsl_wakeup_sys_init(void) | ||
124 | { | ||
125 | int ret; | ||
126 | |||
127 | fsl_wakeup = kzalloc(sizeof(struct fsl_mpic_timer_wakeup), GFP_KERNEL); | ||
128 | if (!fsl_wakeup) | ||
129 | return -ENOMEM; | ||
130 | |||
131 | INIT_WORK(&fsl_wakeup->free_work, fsl_free_resource); | ||
132 | |||
133 | ret = device_create_file(mpic_subsys.dev_root, &mpic_attributes); | ||
134 | if (ret) | ||
135 | kfree(fsl_wakeup); | ||
136 | |||
137 | return ret; | ||
138 | } | ||
139 | |||
140 | static void __exit fsl_wakeup_sys_exit(void) | ||
141 | { | ||
142 | device_remove_file(mpic_subsys.dev_root, &mpic_attributes); | ||
143 | |||
144 | mutex_lock(&sysfs_lock); | ||
145 | |||
146 | if (fsl_wakeup->timer) { | ||
147 | disable_irq_wake(fsl_wakeup->timer->irq); | ||
148 | mpic_free_timer(fsl_wakeup->timer); | ||
149 | } | ||
150 | |||
151 | kfree(fsl_wakeup); | ||
152 | |||
153 | mutex_unlock(&sysfs_lock); | ||
154 | } | ||
155 | |||
156 | module_init(fsl_wakeup_sys_init); | ||
157 | module_exit(fsl_wakeup_sys_exit); | ||
158 | |||
159 | MODULE_DESCRIPTION("Freescale MPIC global timer wakeup driver"); | ||
160 | MODULE_LICENSE("GPL v2"); | ||
161 | MODULE_AUTHOR("Wang Dongsheng <dongsheng.wang@freescale.com>"); | ||
diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c index 3cc2f9159ab1..1be54faf60dd 100644 --- a/arch/powerpc/sysdev/mpic.c +++ b/arch/powerpc/sysdev/mpic.c | |||
@@ -48,6 +48,12 @@ | |||
48 | #define DBG(fmt...) | 48 | #define DBG(fmt...) |
49 | #endif | 49 | #endif |
50 | 50 | ||
51 | struct bus_type mpic_subsys = { | ||
52 | .name = "mpic", | ||
53 | .dev_name = "mpic", | ||
54 | }; | ||
55 | EXPORT_SYMBOL_GPL(mpic_subsys); | ||
56 | |||
51 | static struct mpic *mpics; | 57 | static struct mpic *mpics; |
52 | static struct mpic *mpic_primary; | 58 | static struct mpic *mpic_primary; |
53 | static DEFINE_RAW_SPINLOCK(mpic_lock); | 59 | static DEFINE_RAW_SPINLOCK(mpic_lock); |
@@ -920,6 +926,22 @@ int mpic_set_irq_type(struct irq_data *d, unsigned int flow_type) | |||
920 | return IRQ_SET_MASK_OK_NOCOPY; | 926 | return IRQ_SET_MASK_OK_NOCOPY; |
921 | } | 927 | } |
922 | 928 | ||
929 | static int mpic_irq_set_wake(struct irq_data *d, unsigned int on) | ||
930 | { | ||
931 | struct irq_desc *desc = container_of(d, struct irq_desc, irq_data); | ||
932 | struct mpic *mpic = mpic_from_irq_data(d); | ||
933 | |||
934 | if (!(mpic->flags & MPIC_FSL)) | ||
935 | return -ENXIO; | ||
936 | |||
937 | if (on) | ||
938 | desc->action->flags |= IRQF_NO_SUSPEND; | ||
939 | else | ||
940 | desc->action->flags &= ~IRQF_NO_SUSPEND; | ||
941 | |||
942 | return 0; | ||
943 | } | ||
944 | |||
923 | void mpic_set_vector(unsigned int virq, unsigned int vector) | 945 | void mpic_set_vector(unsigned int virq, unsigned int vector) |
924 | { | 946 | { |
925 | struct mpic *mpic = mpic_from_irq(virq); | 947 | struct mpic *mpic = mpic_from_irq(virq); |
@@ -957,6 +979,7 @@ static struct irq_chip mpic_irq_chip = { | |||
957 | .irq_unmask = mpic_unmask_irq, | 979 | .irq_unmask = mpic_unmask_irq, |
958 | .irq_eoi = mpic_end_irq, | 980 | .irq_eoi = mpic_end_irq, |
959 | .irq_set_type = mpic_set_irq_type, | 981 | .irq_set_type = mpic_set_irq_type, |
982 | .irq_set_wake = mpic_irq_set_wake, | ||
960 | }; | 983 | }; |
961 | 984 | ||
962 | #ifdef CONFIG_SMP | 985 | #ifdef CONFIG_SMP |
@@ -971,6 +994,7 @@ static struct irq_chip mpic_tm_chip = { | |||
971 | .irq_mask = mpic_mask_tm, | 994 | .irq_mask = mpic_mask_tm, |
972 | .irq_unmask = mpic_unmask_tm, | 995 | .irq_unmask = mpic_unmask_tm, |
973 | .irq_eoi = mpic_end_irq, | 996 | .irq_eoi = mpic_end_irq, |
997 | .irq_set_wake = mpic_irq_set_wake, | ||
974 | }; | 998 | }; |
975 | 999 | ||
976 | #ifdef CONFIG_MPIC_U3_HT_IRQS | 1000 | #ifdef CONFIG_MPIC_U3_HT_IRQS |
@@ -1173,10 +1197,33 @@ static struct irq_domain_ops mpic_host_ops = { | |||
1173 | .xlate = mpic_host_xlate, | 1197 | .xlate = mpic_host_xlate, |
1174 | }; | 1198 | }; |
1175 | 1199 | ||
1200 | static u32 fsl_mpic_get_version(struct mpic *mpic) | ||
1201 | { | ||
1202 | u32 brr1; | ||
1203 | |||
1204 | if (!(mpic->flags & MPIC_FSL)) | ||
1205 | return 0; | ||
1206 | |||
1207 | brr1 = _mpic_read(mpic->reg_type, &mpic->thiscpuregs, | ||
1208 | MPIC_FSL_BRR1); | ||
1209 | |||
1210 | return brr1 & MPIC_FSL_BRR1_VER; | ||
1211 | } | ||
1212 | |||
1176 | /* | 1213 | /* |
1177 | * Exported functions | 1214 | * Exported functions |
1178 | */ | 1215 | */ |
1179 | 1216 | ||
1217 | u32 fsl_mpic_primary_get_version(void) | ||
1218 | { | ||
1219 | struct mpic *mpic = mpic_primary; | ||
1220 | |||
1221 | if (mpic) | ||
1222 | return fsl_mpic_get_version(mpic); | ||
1223 | |||
1224 | return 0; | ||
1225 | } | ||
1226 | |||
1180 | struct mpic * __init mpic_alloc(struct device_node *node, | 1227 | struct mpic * __init mpic_alloc(struct device_node *node, |
1181 | phys_addr_t phys_addr, | 1228 | phys_addr_t phys_addr, |
1182 | unsigned int flags, | 1229 | unsigned int flags, |
@@ -1323,7 +1370,6 @@ struct mpic * __init mpic_alloc(struct device_node *node, | |||
1323 | mpic_map(mpic, mpic->paddr, &mpic->tmregs, MPIC_INFO(TIMER_BASE), 0x1000); | 1370 | mpic_map(mpic, mpic->paddr, &mpic->tmregs, MPIC_INFO(TIMER_BASE), 0x1000); |
1324 | 1371 | ||
1325 | if (mpic->flags & MPIC_FSL) { | 1372 | if (mpic->flags & MPIC_FSL) { |
1326 | u32 brr1; | ||
1327 | int ret; | 1373 | int ret; |
1328 | 1374 | ||
1329 | /* | 1375 | /* |
@@ -1334,9 +1380,7 @@ struct mpic * __init mpic_alloc(struct device_node *node, | |||
1334 | mpic_map(mpic, mpic->paddr, &mpic->thiscpuregs, | 1380 | mpic_map(mpic, mpic->paddr, &mpic->thiscpuregs, |
1335 | MPIC_CPU_THISBASE, 0x1000); | 1381 | MPIC_CPU_THISBASE, 0x1000); |
1336 | 1382 | ||
1337 | brr1 = _mpic_read(mpic->reg_type, &mpic->thiscpuregs, | 1383 | fsl_version = fsl_mpic_get_version(mpic); |
1338 | MPIC_FSL_BRR1); | ||
1339 | fsl_version = brr1 & MPIC_FSL_BRR1_VER; | ||
1340 | 1384 | ||
1341 | /* Error interrupt mask register (EIMR) is required for | 1385 | /* Error interrupt mask register (EIMR) is required for |
1342 | * handling individual device error interrupts. EIMR | 1386 | * handling individual device error interrupts. EIMR |
@@ -1526,9 +1570,7 @@ void __init mpic_init(struct mpic *mpic) | |||
1526 | mpic_cpu_write(MPIC_INFO(CPU_CURRENT_TASK_PRI), 0xf); | 1570 | mpic_cpu_write(MPIC_INFO(CPU_CURRENT_TASK_PRI), 0xf); |
1527 | 1571 | ||
1528 | if (mpic->flags & MPIC_FSL) { | 1572 | if (mpic->flags & MPIC_FSL) { |
1529 | u32 brr1 = _mpic_read(mpic->reg_type, &mpic->thiscpuregs, | 1573 | u32 version = fsl_mpic_get_version(mpic); |
1530 | MPIC_FSL_BRR1); | ||
1531 | u32 version = brr1 & MPIC_FSL_BRR1_VER; | ||
1532 | 1574 | ||
1533 | /* | 1575 | /* |
1534 | * Timer group B is present at the latest in MPIC 3.1 (e.g. | 1576 | * Timer group B is present at the latest in MPIC 3.1 (e.g. |
@@ -1999,6 +2041,8 @@ static struct syscore_ops mpic_syscore_ops = { | |||
1999 | static int mpic_init_sys(void) | 2041 | static int mpic_init_sys(void) |
2000 | { | 2042 | { |
2001 | register_syscore_ops(&mpic_syscore_ops); | 2043 | register_syscore_ops(&mpic_syscore_ops); |
2044 | subsys_system_register(&mpic_subsys, NULL); | ||
2045 | |||
2002 | return 0; | 2046 | return 0; |
2003 | } | 2047 | } |
2004 | 2048 | ||
diff --git a/arch/powerpc/sysdev/mpic_timer.c b/arch/powerpc/sysdev/mpic_timer.c new file mode 100644 index 000000000000..c06db92a4fb1 --- /dev/null +++ b/arch/powerpc/sysdev/mpic_timer.c | |||
@@ -0,0 +1,593 @@ | |||
1 | /* | ||
2 | * MPIC timer driver | ||
3 | * | ||
4 | * Copyright 2013 Freescale Semiconductor, Inc. | ||
5 | * Author: Dongsheng Wang <Dongsheng.Wang@freescale.com> | ||
6 | * Li Yang <leoli@freescale.com> | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify it | ||
9 | * under the terms of the GNU General Public License as published by the | ||
10 | * Free Software Foundation; either version 2 of the License, or (at your | ||
11 | * option) any later version. | ||
12 | */ | ||
13 | |||
14 | #include <linux/kernel.h> | ||
15 | #include <linux/init.h> | ||
16 | #include <linux/module.h> | ||
17 | #include <linux/errno.h> | ||
18 | #include <linux/mm.h> | ||
19 | #include <linux/interrupt.h> | ||
20 | #include <linux/slab.h> | ||
21 | #include <linux/of.h> | ||
22 | #include <linux/of_device.h> | ||
23 | #include <linux/syscore_ops.h> | ||
24 | #include <sysdev/fsl_soc.h> | ||
25 | #include <asm/io.h> | ||
26 | |||
27 | #include <asm/mpic_timer.h> | ||
28 | |||
29 | #define FSL_GLOBAL_TIMER 0x1 | ||
30 | |||
31 | /* Clock Ratio | ||
32 | * Divide by 64 0x00000300 | ||
33 | * Divide by 32 0x00000200 | ||
34 | * Divide by 16 0x00000100 | ||
35 | * Divide by 8 0x00000000 (Hardware default div) | ||
36 | */ | ||
37 | #define MPIC_TIMER_TCR_CLKDIV 0x00000300 | ||
38 | |||
39 | #define MPIC_TIMER_TCR_ROVR_OFFSET 24 | ||
40 | |||
41 | #define TIMER_STOP 0x80000000 | ||
42 | #define TIMERS_PER_GROUP 4 | ||
43 | #define MAX_TICKS (~0U >> 1) | ||
44 | #define MAX_TICKS_CASCADE (~0U) | ||
45 | #define TIMER_OFFSET(num) (1 << (TIMERS_PER_GROUP - 1 - num)) | ||
46 | |||
47 | /* tv_usec should be less than ONE_SECOND, otherwise use tv_sec */ | ||
48 | #define ONE_SECOND 1000000 | ||
49 | |||
50 | struct timer_regs { | ||
51 | u32 gtccr; | ||
52 | u32 res0[3]; | ||
53 | u32 gtbcr; | ||
54 | u32 res1[3]; | ||
55 | u32 gtvpr; | ||
56 | u32 res2[3]; | ||
57 | u32 gtdr; | ||
58 | u32 res3[3]; | ||
59 | }; | ||
60 | |||
61 | struct cascade_priv { | ||
62 | u32 tcr_value; /* TCR register: CASC & ROVR value */ | ||
63 | unsigned int cascade_map; /* cascade map */ | ||
64 | unsigned int timer_num; /* cascade control timer */ | ||
65 | }; | ||
66 | |||
67 | struct timer_group_priv { | ||
68 | struct timer_regs __iomem *regs; | ||
69 | struct mpic_timer timer[TIMERS_PER_GROUP]; | ||
70 | struct list_head node; | ||
71 | unsigned int timerfreq; | ||
72 | unsigned int idle; | ||
73 | unsigned int flags; | ||
74 | spinlock_t lock; | ||
75 | void __iomem *group_tcr; | ||
76 | }; | ||
77 | |||
78 | static struct cascade_priv cascade_timer[] = { | ||
79 | /* cascade timer 0 and 1 */ | ||
80 | {0x1, 0xc, 0x1}, | ||
81 | /* cascade timer 1 and 2 */ | ||
82 | {0x2, 0x6, 0x2}, | ||
83 | /* cascade timer 2 and 3 */ | ||
84 | {0x4, 0x3, 0x3} | ||
85 | }; | ||
86 | |||
87 | static LIST_HEAD(timer_group_list); | ||
88 | |||
89 | static void convert_ticks_to_time(struct timer_group_priv *priv, | ||
90 | const u64 ticks, struct timeval *time) | ||
91 | { | ||
92 | u64 tmp_sec; | ||
93 | |||
94 | time->tv_sec = (__kernel_time_t)div_u64(ticks, priv->timerfreq); | ||
95 | tmp_sec = (u64)time->tv_sec * (u64)priv->timerfreq; | ||
96 | |||
97 | time->tv_usec = (__kernel_suseconds_t) | ||
98 | div_u64((ticks - tmp_sec) * 1000000, priv->timerfreq); | ||
99 | |||
100 | return; | ||
101 | } | ||
102 | |||
103 | /* the time set by the user is converted to "ticks" */ | ||
104 | static int convert_time_to_ticks(struct timer_group_priv *priv, | ||
105 | const struct timeval *time, u64 *ticks) | ||
106 | { | ||
107 | u64 max_value; /* prevent u64 overflow */ | ||
108 | u64 tmp = 0; | ||
109 | |||
110 | u64 tmp_sec; | ||
111 | u64 tmp_ms; | ||
112 | u64 tmp_us; | ||
113 | |||
114 | max_value = div_u64(ULLONG_MAX, priv->timerfreq); | ||
115 | |||
116 | if (time->tv_sec > max_value || | ||
117 | (time->tv_sec == max_value && time->tv_usec > 0)) | ||
118 | return -EINVAL; | ||
119 | |||
120 | tmp_sec = (u64)time->tv_sec * (u64)priv->timerfreq; | ||
121 | tmp += tmp_sec; | ||
122 | |||
123 | tmp_ms = time->tv_usec / 1000; | ||
124 | tmp_ms = div_u64((u64)tmp_ms * (u64)priv->timerfreq, 1000); | ||
125 | tmp += tmp_ms; | ||
126 | |||
127 | tmp_us = time->tv_usec % 1000; | ||
128 | tmp_us = div_u64((u64)tmp_us * (u64)priv->timerfreq, 1000000); | ||
129 | tmp += tmp_us; | ||
130 | |||
131 | *ticks = tmp; | ||
132 | |||
133 | return 0; | ||
134 | } | ||
135 | |||
136 | /* detect whether there is a cascade timer available */ | ||
137 | static struct mpic_timer *detect_idle_cascade_timer( | ||
138 | struct timer_group_priv *priv) | ||
139 | { | ||
140 | struct cascade_priv *casc_priv; | ||
141 | unsigned int map; | ||
142 | unsigned int array_size = ARRAY_SIZE(cascade_timer); | ||
143 | unsigned int num; | ||
144 | unsigned int i; | ||
145 | unsigned long flags; | ||
146 | |||
147 | casc_priv = cascade_timer; | ||
148 | for (i = 0; i < array_size; i++) { | ||
149 | spin_lock_irqsave(&priv->lock, flags); | ||
150 | map = casc_priv->cascade_map & priv->idle; | ||
151 | if (map == casc_priv->cascade_map) { | ||
152 | num = casc_priv->timer_num; | ||
153 | priv->timer[num].cascade_handle = casc_priv; | ||
154 | |||
155 | /* set timer busy */ | ||
156 | priv->idle &= ~casc_priv->cascade_map; | ||
157 | spin_unlock_irqrestore(&priv->lock, flags); | ||
158 | return &priv->timer[num]; | ||
159 | } | ||
160 | spin_unlock_irqrestore(&priv->lock, flags); | ||
161 | casc_priv++; | ||
162 | } | ||
163 | |||
164 | return NULL; | ||
165 | } | ||
166 | |||
167 | static int set_cascade_timer(struct timer_group_priv *priv, u64 ticks, | ||
168 | unsigned int num) | ||
169 | { | ||
170 | struct cascade_priv *casc_priv; | ||
171 | u32 tcr; | ||
172 | u32 tmp_ticks; | ||
173 | u32 rem_ticks; | ||
174 | |||
175 | /* set group tcr reg for cascade */ | ||
176 | casc_priv = priv->timer[num].cascade_handle; | ||
177 | if (!casc_priv) | ||
178 | return -EINVAL; | ||
179 | |||
180 | tcr = casc_priv->tcr_value | | ||
181 | (casc_priv->tcr_value << MPIC_TIMER_TCR_ROVR_OFFSET); | ||
182 | setbits32(priv->group_tcr, tcr); | ||
183 | |||
184 | tmp_ticks = div_u64_rem(ticks, MAX_TICKS_CASCADE, &rem_ticks); | ||
185 | |||
186 | out_be32(&priv->regs[num].gtccr, 0); | ||
187 | out_be32(&priv->regs[num].gtbcr, tmp_ticks | TIMER_STOP); | ||
188 | |||
189 | out_be32(&priv->regs[num - 1].gtccr, 0); | ||
190 | out_be32(&priv->regs[num - 1].gtbcr, rem_ticks); | ||
191 | |||
192 | return 0; | ||
193 | } | ||
194 | |||
195 | static struct mpic_timer *get_cascade_timer(struct timer_group_priv *priv, | ||
196 | u64 ticks) | ||
197 | { | ||
198 | struct mpic_timer *allocated_timer; | ||
199 | |||
200 | /* Two cascade timers: Support the maximum time */ | ||
201 | const u64 max_ticks = (u64)MAX_TICKS * (u64)MAX_TICKS_CASCADE; | ||
202 | int ret; | ||
203 | |||
204 | if (ticks > max_ticks) | ||
205 | return NULL; | ||
206 | |||
207 | /* detect idle timer */ | ||
208 | allocated_timer = detect_idle_cascade_timer(priv); | ||
209 | if (!allocated_timer) | ||
210 | return NULL; | ||
211 | |||
212 | /* set ticks to timer */ | ||
213 | ret = set_cascade_timer(priv, ticks, allocated_timer->num); | ||
214 | if (ret < 0) | ||
215 | return NULL; | ||
216 | |||
217 | return allocated_timer; | ||
218 | } | ||
219 | |||
220 | static struct mpic_timer *get_timer(const struct timeval *time) | ||
221 | { | ||
222 | struct timer_group_priv *priv; | ||
223 | struct mpic_timer *timer; | ||
224 | |||
225 | u64 ticks; | ||
226 | unsigned int num; | ||
227 | unsigned int i; | ||
228 | unsigned long flags; | ||
229 | int ret; | ||
230 | |||
231 | list_for_each_entry(priv, &timer_group_list, node) { | ||
232 | ret = convert_time_to_ticks(priv, time, &ticks); | ||
233 | if (ret < 0) | ||
234 | return NULL; | ||
235 | |||
236 | if (ticks > MAX_TICKS) { | ||
237 | if (!(priv->flags & FSL_GLOBAL_TIMER)) | ||
238 | return NULL; | ||
239 | |||
240 | timer = get_cascade_timer(priv, ticks); | ||
241 | if (!timer) | ||
242 | continue; | ||
243 | |||
244 | return timer; | ||
245 | } | ||
246 | |||
247 | for (i = 0; i < TIMERS_PER_GROUP; i++) { | ||
248 | /* one timer: Reverse allocation */ | ||
249 | num = TIMERS_PER_GROUP - 1 - i; | ||
250 | spin_lock_irqsave(&priv->lock, flags); | ||
251 | if (priv->idle & (1 << i)) { | ||
252 | /* set timer busy */ | ||
253 | priv->idle &= ~(1 << i); | ||
254 | /* set ticks & stop timer */ | ||
255 | out_be32(&priv->regs[num].gtbcr, | ||
256 | ticks | TIMER_STOP); | ||
257 | out_be32(&priv->regs[num].gtccr, 0); | ||
258 | priv->timer[num].cascade_handle = NULL; | ||
259 | spin_unlock_irqrestore(&priv->lock, flags); | ||
260 | return &priv->timer[num]; | ||
261 | } | ||
262 | spin_unlock_irqrestore(&priv->lock, flags); | ||
263 | } | ||
264 | } | ||
265 | |||
266 | return NULL; | ||
267 | } | ||
268 | |||
269 | /** | ||
270 | * mpic_start_timer - start hardware timer | ||
271 | * @handle: the timer to be started. | ||
272 | * | ||
273 | * It will do ->fn(->dev) callback from the hardware interrupt at | ||
274 | * the ->timeval point in the future. | ||
275 | */ | ||
276 | void mpic_start_timer(struct mpic_timer *handle) | ||
277 | { | ||
278 | struct timer_group_priv *priv = container_of(handle, | ||
279 | struct timer_group_priv, timer[handle->num]); | ||
280 | |||
281 | clrbits32(&priv->regs[handle->num].gtbcr, TIMER_STOP); | ||
282 | } | ||
283 | EXPORT_SYMBOL(mpic_start_timer); | ||
284 | |||
285 | /** | ||
286 | * mpic_stop_timer - stop hardware timer | ||
287 | * @handle: the timer to be stoped | ||
288 | * | ||
289 | * The timer periodically generates an interrupt. Unless user stops the timer. | ||
290 | */ | ||
291 | void mpic_stop_timer(struct mpic_timer *handle) | ||
292 | { | ||
293 | struct timer_group_priv *priv = container_of(handle, | ||
294 | struct timer_group_priv, timer[handle->num]); | ||
295 | struct cascade_priv *casc_priv; | ||
296 | |||
297 | setbits32(&priv->regs[handle->num].gtbcr, TIMER_STOP); | ||
298 | |||
299 | casc_priv = priv->timer[handle->num].cascade_handle; | ||
300 | if (casc_priv) { | ||
301 | out_be32(&priv->regs[handle->num].gtccr, 0); | ||
302 | out_be32(&priv->regs[handle->num - 1].gtccr, 0); | ||
303 | } else { | ||
304 | out_be32(&priv->regs[handle->num].gtccr, 0); | ||
305 | } | ||
306 | } | ||
307 | EXPORT_SYMBOL(mpic_stop_timer); | ||
308 | |||
309 | /** | ||
310 | * mpic_get_remain_time - get timer time | ||
311 | * @handle: the timer to be selected. | ||
312 | * @time: time for timer | ||
313 | * | ||
314 | * Query timer remaining time. | ||
315 | */ | ||
316 | void mpic_get_remain_time(struct mpic_timer *handle, struct timeval *time) | ||
317 | { | ||
318 | struct timer_group_priv *priv = container_of(handle, | ||
319 | struct timer_group_priv, timer[handle->num]); | ||
320 | struct cascade_priv *casc_priv; | ||
321 | |||
322 | u64 ticks; | ||
323 | u32 tmp_ticks; | ||
324 | |||
325 | casc_priv = priv->timer[handle->num].cascade_handle; | ||
326 | if (casc_priv) { | ||
327 | tmp_ticks = in_be32(&priv->regs[handle->num].gtccr); | ||
328 | ticks = ((u64)tmp_ticks & UINT_MAX) * (u64)MAX_TICKS_CASCADE; | ||
329 | tmp_ticks = in_be32(&priv->regs[handle->num - 1].gtccr); | ||
330 | ticks += tmp_ticks; | ||
331 | } else { | ||
332 | ticks = in_be32(&priv->regs[handle->num].gtccr); | ||
333 | } | ||
334 | |||
335 | convert_ticks_to_time(priv, ticks, time); | ||
336 | } | ||
337 | EXPORT_SYMBOL(mpic_get_remain_time); | ||
338 | |||
339 | /** | ||
340 | * mpic_free_timer - free hardware timer | ||
341 | * @handle: the timer to be removed. | ||
342 | * | ||
343 | * Free the timer. | ||
344 | * | ||
345 | * Note: can not be used in interrupt context. | ||
346 | */ | ||
347 | void mpic_free_timer(struct mpic_timer *handle) | ||
348 | { | ||
349 | struct timer_group_priv *priv = container_of(handle, | ||
350 | struct timer_group_priv, timer[handle->num]); | ||
351 | |||
352 | struct cascade_priv *casc_priv; | ||
353 | unsigned long flags; | ||
354 | |||
355 | mpic_stop_timer(handle); | ||
356 | |||
357 | casc_priv = priv->timer[handle->num].cascade_handle; | ||
358 | |||
359 | free_irq(priv->timer[handle->num].irq, priv->timer[handle->num].dev); | ||
360 | |||
361 | spin_lock_irqsave(&priv->lock, flags); | ||
362 | if (casc_priv) { | ||
363 | u32 tcr; | ||
364 | tcr = casc_priv->tcr_value | (casc_priv->tcr_value << | ||
365 | MPIC_TIMER_TCR_ROVR_OFFSET); | ||
366 | clrbits32(priv->group_tcr, tcr); | ||
367 | priv->idle |= casc_priv->cascade_map; | ||
368 | priv->timer[handle->num].cascade_handle = NULL; | ||
369 | } else { | ||
370 | priv->idle |= TIMER_OFFSET(handle->num); | ||
371 | } | ||
372 | spin_unlock_irqrestore(&priv->lock, flags); | ||
373 | } | ||
374 | EXPORT_SYMBOL(mpic_free_timer); | ||
375 | |||
376 | /** | ||
377 | * mpic_request_timer - get a hardware timer | ||
378 | * @fn: interrupt handler function | ||
379 | * @dev: callback function of the data | ||
380 | * @time: time for timer | ||
381 | * | ||
382 | * This executes the "request_irq", returning NULL | ||
383 | * else "handle" on success. | ||
384 | */ | ||
385 | struct mpic_timer *mpic_request_timer(irq_handler_t fn, void *dev, | ||
386 | const struct timeval *time) | ||
387 | { | ||
388 | struct mpic_timer *allocated_timer; | ||
389 | int ret; | ||
390 | |||
391 | if (list_empty(&timer_group_list)) | ||
392 | return NULL; | ||
393 | |||
394 | if (!(time->tv_sec + time->tv_usec) || | ||
395 | time->tv_sec < 0 || time->tv_usec < 0) | ||
396 | return NULL; | ||
397 | |||
398 | if (time->tv_usec > ONE_SECOND) | ||
399 | return NULL; | ||
400 | |||
401 | allocated_timer = get_timer(time); | ||
402 | if (!allocated_timer) | ||
403 | return NULL; | ||
404 | |||
405 | ret = request_irq(allocated_timer->irq, fn, | ||
406 | IRQF_TRIGGER_LOW, "global-timer", dev); | ||
407 | if (ret) { | ||
408 | mpic_free_timer(allocated_timer); | ||
409 | return NULL; | ||
410 | } | ||
411 | |||
412 | allocated_timer->dev = dev; | ||
413 | |||
414 | return allocated_timer; | ||
415 | } | ||
416 | EXPORT_SYMBOL(mpic_request_timer); | ||
417 | |||
418 | static int timer_group_get_freq(struct device_node *np, | ||
419 | struct timer_group_priv *priv) | ||
420 | { | ||
421 | u32 div; | ||
422 | |||
423 | if (priv->flags & FSL_GLOBAL_TIMER) { | ||
424 | struct device_node *dn; | ||
425 | |||
426 | dn = of_find_compatible_node(NULL, NULL, "fsl,mpic"); | ||
427 | if (dn) { | ||
428 | of_property_read_u32(dn, "clock-frequency", | ||
429 | &priv->timerfreq); | ||
430 | of_node_put(dn); | ||
431 | } | ||
432 | } | ||
433 | |||
434 | if (priv->timerfreq <= 0) | ||
435 | return -EINVAL; | ||
436 | |||
437 | if (priv->flags & FSL_GLOBAL_TIMER) { | ||
438 | div = (1 << (MPIC_TIMER_TCR_CLKDIV >> 8)) * 8; | ||
439 | priv->timerfreq /= div; | ||
440 | } | ||
441 | |||
442 | return 0; | ||
443 | } | ||
444 | |||
445 | static int timer_group_get_irq(struct device_node *np, | ||
446 | struct timer_group_priv *priv) | ||
447 | { | ||
448 | const u32 all_timer[] = { 0, TIMERS_PER_GROUP }; | ||
449 | const u32 *p; | ||
450 | u32 offset; | ||
451 | u32 count; | ||
452 | |||
453 | unsigned int i; | ||
454 | unsigned int j; | ||
455 | unsigned int irq_index = 0; | ||
456 | unsigned int irq; | ||
457 | int len; | ||
458 | |||
459 | p = of_get_property(np, "fsl,available-ranges", &len); | ||
460 | if (p && len % (2 * sizeof(u32)) != 0) { | ||
461 | pr_err("%s: malformed available-ranges property.\n", | ||
462 | np->full_name); | ||
463 | return -EINVAL; | ||
464 | } | ||
465 | |||
466 | if (!p) { | ||
467 | p = all_timer; | ||
468 | len = sizeof(all_timer); | ||
469 | } | ||
470 | |||
471 | len /= 2 * sizeof(u32); | ||
472 | |||
473 | for (i = 0; i < len; i++) { | ||
474 | offset = p[i * 2]; | ||
475 | count = p[i * 2 + 1]; | ||
476 | for (j = 0; j < count; j++) { | ||
477 | irq = irq_of_parse_and_map(np, irq_index); | ||
478 | if (!irq) { | ||
479 | pr_err("%s: irq parse and map failed.\n", | ||
480 | np->full_name); | ||
481 | return -EINVAL; | ||
482 | } | ||
483 | |||
484 | /* Set timer idle */ | ||
485 | priv->idle |= TIMER_OFFSET((offset + j)); | ||
486 | priv->timer[offset + j].irq = irq; | ||
487 | priv->timer[offset + j].num = offset + j; | ||
488 | irq_index++; | ||
489 | } | ||
490 | } | ||
491 | |||
492 | return 0; | ||
493 | } | ||
494 | |||
495 | static void timer_group_init(struct device_node *np) | ||
496 | { | ||
497 | struct timer_group_priv *priv; | ||
498 | unsigned int i = 0; | ||
499 | int ret; | ||
500 | |||
501 | priv = kzalloc(sizeof(struct timer_group_priv), GFP_KERNEL); | ||
502 | if (!priv) { | ||
503 | pr_err("%s: cannot allocate memory for group.\n", | ||
504 | np->full_name); | ||
505 | return; | ||
506 | } | ||
507 | |||
508 | if (of_device_is_compatible(np, "fsl,mpic-global-timer")) | ||
509 | priv->flags |= FSL_GLOBAL_TIMER; | ||
510 | |||
511 | priv->regs = of_iomap(np, i++); | ||
512 | if (!priv->regs) { | ||
513 | pr_err("%s: cannot ioremap timer register address.\n", | ||
514 | np->full_name); | ||
515 | goto out; | ||
516 | } | ||
517 | |||
518 | if (priv->flags & FSL_GLOBAL_TIMER) { | ||
519 | priv->group_tcr = of_iomap(np, i++); | ||
520 | if (!priv->group_tcr) { | ||
521 | pr_err("%s: cannot ioremap tcr address.\n", | ||
522 | np->full_name); | ||
523 | goto out; | ||
524 | } | ||
525 | } | ||
526 | |||
527 | ret = timer_group_get_freq(np, priv); | ||
528 | if (ret < 0) { | ||
529 | pr_err("%s: cannot get timer frequency.\n", np->full_name); | ||
530 | goto out; | ||
531 | } | ||
532 | |||
533 | ret = timer_group_get_irq(np, priv); | ||
534 | if (ret < 0) { | ||
535 | pr_err("%s: cannot get timer irqs.\n", np->full_name); | ||
536 | goto out; | ||
537 | } | ||
538 | |||
539 | spin_lock_init(&priv->lock); | ||
540 | |||
541 | /* Init FSL timer hardware */ | ||
542 | if (priv->flags & FSL_GLOBAL_TIMER) | ||
543 | setbits32(priv->group_tcr, MPIC_TIMER_TCR_CLKDIV); | ||
544 | |||
545 | list_add_tail(&priv->node, &timer_group_list); | ||
546 | |||
547 | return; | ||
548 | |||
549 | out: | ||
550 | if (priv->regs) | ||
551 | iounmap(priv->regs); | ||
552 | |||
553 | if (priv->group_tcr) | ||
554 | iounmap(priv->group_tcr); | ||
555 | |||
556 | kfree(priv); | ||
557 | } | ||
558 | |||
559 | static void mpic_timer_resume(void) | ||
560 | { | ||
561 | struct timer_group_priv *priv; | ||
562 | |||
563 | list_for_each_entry(priv, &timer_group_list, node) { | ||
564 | /* Init FSL timer hardware */ | ||
565 | if (priv->flags & FSL_GLOBAL_TIMER) | ||
566 | setbits32(priv->group_tcr, MPIC_TIMER_TCR_CLKDIV); | ||
567 | } | ||
568 | } | ||
569 | |||
570 | static const struct of_device_id mpic_timer_ids[] = { | ||
571 | { .compatible = "fsl,mpic-global-timer", }, | ||
572 | {}, | ||
573 | }; | ||
574 | |||
575 | static struct syscore_ops mpic_timer_syscore_ops = { | ||
576 | .resume = mpic_timer_resume, | ||
577 | }; | ||
578 | |||
579 | static int __init mpic_timer_init(void) | ||
580 | { | ||
581 | struct device_node *np = NULL; | ||
582 | |||
583 | for_each_matching_node(np, mpic_timer_ids) | ||
584 | timer_group_init(np); | ||
585 | |||
586 | register_syscore_ops(&mpic_timer_syscore_ops); | ||
587 | |||
588 | if (list_empty(&timer_group_list)) | ||
589 | return -ENODEV; | ||
590 | |||
591 | return 0; | ||
592 | } | ||
593 | subsys_initcall(mpic_timer_init); | ||
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 0ea4e591fa78..75fb726de91f 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h | |||
@@ -1364,10 +1364,11 @@ static inline pmd_t pmd_mkwrite(pmd_t pmd) | |||
1364 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | 1364 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
1365 | 1365 | ||
1366 | #define __HAVE_ARCH_PGTABLE_DEPOSIT | 1366 | #define __HAVE_ARCH_PGTABLE_DEPOSIT |
1367 | extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pgtable_t pgtable); | 1367 | extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, |
1368 | pgtable_t pgtable); | ||
1368 | 1369 | ||
1369 | #define __HAVE_ARCH_PGTABLE_WITHDRAW | 1370 | #define __HAVE_ARCH_PGTABLE_WITHDRAW |
1370 | extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm); | 1371 | extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp); |
1371 | 1372 | ||
1372 | static inline int pmd_trans_splitting(pmd_t pmd) | 1373 | static inline int pmd_trans_splitting(pmd_t pmd) |
1373 | { | 1374 | { |
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 17bf4d3d303a..a8154a1a2c94 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c | |||
@@ -1165,7 +1165,8 @@ void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address, | |||
1165 | } | 1165 | } |
1166 | } | 1166 | } |
1167 | 1167 | ||
1168 | void pgtable_trans_huge_deposit(struct mm_struct *mm, pgtable_t pgtable) | 1168 | void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, |
1169 | pgtable_t pgtable) | ||
1169 | { | 1170 | { |
1170 | struct list_head *lh = (struct list_head *) pgtable; | 1171 | struct list_head *lh = (struct list_head *) pgtable; |
1171 | 1172 | ||
@@ -1179,7 +1180,7 @@ void pgtable_trans_huge_deposit(struct mm_struct *mm, pgtable_t pgtable) | |||
1179 | mm->pmd_huge_pte = pgtable; | 1180 | mm->pmd_huge_pte = pgtable; |
1180 | } | 1181 | } |
1181 | 1182 | ||
1182 | pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm) | 1183 | pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) |
1183 | { | 1184 | { |
1184 | struct list_head *lh; | 1185 | struct list_head *lh; |
1185 | pgtable_t pgtable; | 1186 | pgtable_t pgtable; |
diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index 79c214efa3fe..36760317814f 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h | |||
@@ -853,10 +853,11 @@ extern void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr, | |||
853 | pmd_t *pmd); | 853 | pmd_t *pmd); |
854 | 854 | ||
855 | #define __HAVE_ARCH_PGTABLE_DEPOSIT | 855 | #define __HAVE_ARCH_PGTABLE_DEPOSIT |
856 | extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pgtable_t pgtable); | 856 | extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, |
857 | pgtable_t pgtable); | ||
857 | 858 | ||
858 | #define __HAVE_ARCH_PGTABLE_WITHDRAW | 859 | #define __HAVE_ARCH_PGTABLE_WITHDRAW |
859 | extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm); | 860 | extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp); |
860 | #endif | 861 | #endif |
861 | 862 | ||
862 | /* Encode and de-code a swap entry */ | 863 | /* Encode and de-code a swap entry */ |
diff --git a/arch/sparc/mm/tlb.c b/arch/sparc/mm/tlb.c index 37e7bc4c95b3..7a91f288c708 100644 --- a/arch/sparc/mm/tlb.c +++ b/arch/sparc/mm/tlb.c | |||
@@ -188,7 +188,8 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr, | |||
188 | } | 188 | } |
189 | } | 189 | } |
190 | 190 | ||
191 | void pgtable_trans_huge_deposit(struct mm_struct *mm, pgtable_t pgtable) | 191 | void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, |
192 | pgtable_t pgtable) | ||
192 | { | 193 | { |
193 | struct list_head *lh = (struct list_head *) pgtable; | 194 | struct list_head *lh = (struct list_head *) pgtable; |
194 | 195 | ||
@@ -202,7 +203,7 @@ void pgtable_trans_huge_deposit(struct mm_struct *mm, pgtable_t pgtable) | |||
202 | mm->pmd_huge_pte = pgtable; | 203 | mm->pmd_huge_pte = pgtable; |
203 | } | 204 | } |
204 | 205 | ||
205 | pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm) | 206 | pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) |
206 | { | 207 | { |
207 | struct list_head *lh; | 208 | struct list_head *lh; |
208 | pgtable_t pgtable; | 209 | pgtable_t pgtable; |
diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c index f7b3b39e94fc..88d0b0f9f92b 100644 --- a/drivers/acpi/apei/erst.c +++ b/drivers/acpi/apei/erst.c | |||
@@ -935,7 +935,7 @@ static ssize_t erst_reader(u64 *id, enum pstore_type_id *type, int *count, | |||
935 | struct timespec *time, char **buf, | 935 | struct timespec *time, char **buf, |
936 | struct pstore_info *psi); | 936 | struct pstore_info *psi); |
937 | static int erst_writer(enum pstore_type_id type, enum kmsg_dump_reason reason, | 937 | static int erst_writer(enum pstore_type_id type, enum kmsg_dump_reason reason, |
938 | u64 *id, unsigned int part, int count, | 938 | u64 *id, unsigned int part, int count, size_t hsize, |
939 | size_t size, struct pstore_info *psi); | 939 | size_t size, struct pstore_info *psi); |
940 | static int erst_clearer(enum pstore_type_id type, u64 id, int count, | 940 | static int erst_clearer(enum pstore_type_id type, u64 id, int count, |
941 | struct timespec time, struct pstore_info *psi); | 941 | struct timespec time, struct pstore_info *psi); |
@@ -1055,7 +1055,7 @@ out: | |||
1055 | } | 1055 | } |
1056 | 1056 | ||
1057 | static int erst_writer(enum pstore_type_id type, enum kmsg_dump_reason reason, | 1057 | static int erst_writer(enum pstore_type_id type, enum kmsg_dump_reason reason, |
1058 | u64 *id, unsigned int part, int count, | 1058 | u64 *id, unsigned int part, int count, size_t hsize, |
1059 | size_t size, struct pstore_info *psi) | 1059 | size_t size, struct pstore_info *psi) |
1060 | { | 1060 | { |
1061 | struct cper_pstore_record *rcd = (struct cper_pstore_record *) | 1061 | struct cper_pstore_record *rcd = (struct cper_pstore_record *) |
diff --git a/drivers/firmware/efi/efi-pstore.c b/drivers/firmware/efi/efi-pstore.c index 91864ad200ff..73de5a9c2247 100644 --- a/drivers/firmware/efi/efi-pstore.c +++ b/drivers/firmware/efi/efi-pstore.c | |||
@@ -103,7 +103,7 @@ static ssize_t efi_pstore_read(u64 *id, enum pstore_type_id *type, | |||
103 | 103 | ||
104 | static int efi_pstore_write(enum pstore_type_id type, | 104 | static int efi_pstore_write(enum pstore_type_id type, |
105 | enum kmsg_dump_reason reason, u64 *id, | 105 | enum kmsg_dump_reason reason, u64 *id, |
106 | unsigned int part, int count, size_t size, | 106 | unsigned int part, int count, size_t hsize, size_t size, |
107 | struct pstore_info *psi) | 107 | struct pstore_info *psi) |
108 | { | 108 | { |
109 | char name[DUMP_NAME_LEN]; | 109 | char name[DUMP_NAME_LEN]; |
diff --git a/drivers/i2c/busses/i2c-cpm.c b/drivers/i2c/busses/i2c-cpm.c index 3823623baa48..9e6002108720 100644 --- a/drivers/i2c/busses/i2c-cpm.c +++ b/drivers/i2c/busses/i2c-cpm.c | |||
@@ -338,6 +338,14 @@ static int cpm_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num) | |||
338 | tptr = 0; | 338 | tptr = 0; |
339 | rptr = 0; | 339 | rptr = 0; |
340 | 340 | ||
341 | /* | ||
342 | * If there was a collision in the last i2c transaction, | ||
343 | * Set I2COM_MASTER as it was cleared during collision. | ||
344 | */ | ||
345 | if (in_be16(&tbdf->cbd_sc) & BD_SC_CL) { | ||
346 | out_8(&cpm->i2c_reg->i2com, I2COM_MASTER); | ||
347 | } | ||
348 | |||
341 | while (tptr < num) { | 349 | while (tptr < num) { |
342 | pmsg = &msgs[tptr]; | 350 | pmsg = &msgs[tptr]; |
343 | dev_dbg(&adap->dev, "R: %d T: %d\n", rptr, tptr); | 351 | dev_dbg(&adap->dev, "R: %d T: %d\n", rptr, tptr); |
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index c332fb98480d..01730b2b9954 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig | |||
@@ -261,4 +261,12 @@ config SHMOBILE_IOMMU_L1SIZE | |||
261 | default 256 if SHMOBILE_IOMMU_ADDRSIZE_64MB | 261 | default 256 if SHMOBILE_IOMMU_ADDRSIZE_64MB |
262 | default 128 if SHMOBILE_IOMMU_ADDRSIZE_32MB | 262 | default 128 if SHMOBILE_IOMMU_ADDRSIZE_32MB |
263 | 263 | ||
264 | config SPAPR_TCE_IOMMU | ||
265 | bool "sPAPR TCE IOMMU Support" | ||
266 | depends on PPC_POWERNV || PPC_PSERIES | ||
267 | select IOMMU_API | ||
268 | help | ||
269 | Enables bits of IOMMU API required by VFIO. The iommu_ops | ||
270 | is not implemented as it is not necessary for VFIO. | ||
271 | |||
264 | endif # IOMMU_SUPPORT | 272 | endif # IOMMU_SUPPORT |
diff --git a/drivers/macintosh/adb.c b/drivers/macintosh/adb.c index b026896206ca..04a50498f257 100644 --- a/drivers/macintosh/adb.c +++ b/drivers/macintosh/adb.c | |||
@@ -697,7 +697,7 @@ static ssize_t adb_read(struct file *file, char __user *buf, | |||
697 | int ret = 0; | 697 | int ret = 0; |
698 | struct adbdev_state *state = file->private_data; | 698 | struct adbdev_state *state = file->private_data; |
699 | struct adb_request *req; | 699 | struct adb_request *req; |
700 | wait_queue_t wait = __WAITQUEUE_INITIALIZER(wait,current); | 700 | DECLARE_WAITQUEUE(wait,current); |
701 | unsigned long flags; | 701 | unsigned long flags; |
702 | 702 | ||
703 | if (count < 2) | 703 | if (count < 2) |
diff --git a/drivers/macintosh/mac_hid.c b/drivers/macintosh/mac_hid.c index 6a82388505f0..80d30e8e3389 100644 --- a/drivers/macintosh/mac_hid.c +++ b/drivers/macintosh/mac_hid.c | |||
@@ -181,7 +181,7 @@ static void mac_hid_stop_emulation(void) | |||
181 | mac_hid_destroy_emumouse(); | 181 | mac_hid_destroy_emumouse(); |
182 | } | 182 | } |
183 | 183 | ||
184 | static int mac_hid_toggle_emumouse(ctl_table *table, int write, | 184 | static int mac_hid_toggle_emumouse(struct ctl_table *table, int write, |
185 | void __user *buffer, size_t *lenp, | 185 | void __user *buffer, size_t *lenp, |
186 | loff_t *ppos) | 186 | loff_t *ppos) |
187 | { | 187 | { |
@@ -214,7 +214,7 @@ static int mac_hid_toggle_emumouse(ctl_table *table, int write, | |||
214 | } | 214 | } |
215 | 215 | ||
216 | /* file(s) in /proc/sys/dev/mac_hid */ | 216 | /* file(s) in /proc/sys/dev/mac_hid */ |
217 | static ctl_table mac_hid_files[] = { | 217 | static struct ctl_table mac_hid_files[] = { |
218 | { | 218 | { |
219 | .procname = "mouse_button_emulation", | 219 | .procname = "mouse_button_emulation", |
220 | .data = &mouse_emulate_buttons, | 220 | .data = &mouse_emulate_buttons, |
@@ -240,7 +240,7 @@ static ctl_table mac_hid_files[] = { | |||
240 | }; | 240 | }; |
241 | 241 | ||
242 | /* dir in /proc/sys/dev */ | 242 | /* dir in /proc/sys/dev */ |
243 | static ctl_table mac_hid_dir[] = { | 243 | static struct ctl_table mac_hid_dir[] = { |
244 | { | 244 | { |
245 | .procname = "mac_hid", | 245 | .procname = "mac_hid", |
246 | .maxlen = 0, | 246 | .maxlen = 0, |
@@ -251,7 +251,7 @@ static ctl_table mac_hid_dir[] = { | |||
251 | }; | 251 | }; |
252 | 252 | ||
253 | /* /proc/sys/dev itself, in case that is not there yet */ | 253 | /* /proc/sys/dev itself, in case that is not there yet */ |
254 | static ctl_table mac_hid_root_dir[] = { | 254 | static struct ctl_table mac_hid_root_dir[] = { |
255 | { | 255 | { |
256 | .procname = "dev", | 256 | .procname = "dev", |
257 | .maxlen = 0, | 257 | .maxlen = 0, |
diff --git a/drivers/macintosh/via-cuda.c b/drivers/macintosh/via-cuda.c index 86511c570dd8..d61f271d2207 100644 --- a/drivers/macintosh/via-cuda.c +++ b/drivers/macintosh/via-cuda.c | |||
@@ -259,7 +259,7 @@ cuda_probe(void) | |||
259 | } while (0) | 259 | } while (0) |
260 | 260 | ||
261 | static int | 261 | static int |
262 | cuda_init_via(void) | 262 | __init cuda_init_via(void) |
263 | { | 263 | { |
264 | out_8(&via[DIRB], (in_8(&via[DIRB]) | TACK | TIP) & ~TREQ); /* TACK & TIP out */ | 264 | out_8(&via[DIRB], (in_8(&via[DIRB]) | TACK | TIP) & ~TREQ); /* TACK & TIP out */ |
265 | out_8(&via[B], in_8(&via[B]) | TACK | TIP); /* negate them */ | 265 | out_8(&via[B], in_8(&via[B]) | TACK | TIP); /* negate them */ |
diff --git a/drivers/macintosh/windfarm_pm121.c b/drivers/macintosh/windfarm_pm121.c index af605e915d41..7fe58b0ae8b4 100644 --- a/drivers/macintosh/windfarm_pm121.c +++ b/drivers/macintosh/windfarm_pm121.c | |||
@@ -276,6 +276,7 @@ static const char *loop_names[N_LOOPS] = { | |||
276 | 276 | ||
277 | static unsigned int pm121_failure_state; | 277 | static unsigned int pm121_failure_state; |
278 | static int pm121_readjust, pm121_skipping; | 278 | static int pm121_readjust, pm121_skipping; |
279 | static bool pm121_overtemp; | ||
279 | static s32 average_power; | 280 | static s32 average_power; |
280 | 281 | ||
281 | struct pm121_correction { | 282 | struct pm121_correction { |
@@ -847,6 +848,7 @@ static void pm121_tick(void) | |||
847 | if (new_failure & FAILURE_OVERTEMP) { | 848 | if (new_failure & FAILURE_OVERTEMP) { |
848 | wf_set_overtemp(); | 849 | wf_set_overtemp(); |
849 | pm121_skipping = 2; | 850 | pm121_skipping = 2; |
851 | pm121_overtemp = true; | ||
850 | } | 852 | } |
851 | 853 | ||
852 | /* We only clear the overtemp condition if overtemp is cleared | 854 | /* We only clear the overtemp condition if overtemp is cleared |
@@ -855,8 +857,10 @@ static void pm121_tick(void) | |||
855 | * the control loop levels, but we don't want to keep it clear | 857 | * the control loop levels, but we don't want to keep it clear |
856 | * here in this case | 858 | * here in this case |
857 | */ | 859 | */ |
858 | if (new_failure == 0 && last_failure & FAILURE_OVERTEMP) | 860 | if (!pm121_failure_state && pm121_overtemp) { |
859 | wf_clear_overtemp(); | 861 | wf_clear_overtemp(); |
862 | pm121_overtemp = false; | ||
863 | } | ||
860 | } | 864 | } |
861 | 865 | ||
862 | 866 | ||
diff --git a/drivers/macintosh/windfarm_pm81.c b/drivers/macintosh/windfarm_pm81.c index f84933ff3298..2a5e1b15b1d2 100644 --- a/drivers/macintosh/windfarm_pm81.c +++ b/drivers/macintosh/windfarm_pm81.c | |||
@@ -149,6 +149,7 @@ static int wf_smu_all_controls_ok, wf_smu_all_sensors_ok, wf_smu_started; | |||
149 | 149 | ||
150 | static unsigned int wf_smu_failure_state; | 150 | static unsigned int wf_smu_failure_state; |
151 | static int wf_smu_readjust, wf_smu_skipping; | 151 | static int wf_smu_readjust, wf_smu_skipping; |
152 | static bool wf_smu_overtemp; | ||
152 | 153 | ||
153 | /* | 154 | /* |
154 | * ****** System Fans Control Loop ****** | 155 | * ****** System Fans Control Loop ****** |
@@ -593,6 +594,7 @@ static void wf_smu_tick(void) | |||
593 | if (new_failure & FAILURE_OVERTEMP) { | 594 | if (new_failure & FAILURE_OVERTEMP) { |
594 | wf_set_overtemp(); | 595 | wf_set_overtemp(); |
595 | wf_smu_skipping = 2; | 596 | wf_smu_skipping = 2; |
597 | wf_smu_overtemp = true; | ||
596 | } | 598 | } |
597 | 599 | ||
598 | /* We only clear the overtemp condition if overtemp is cleared | 600 | /* We only clear the overtemp condition if overtemp is cleared |
@@ -601,8 +603,10 @@ static void wf_smu_tick(void) | |||
601 | * the control loop levels, but we don't want to keep it clear | 603 | * the control loop levels, but we don't want to keep it clear |
602 | * here in this case | 604 | * here in this case |
603 | */ | 605 | */ |
604 | if (new_failure == 0 && last_failure & FAILURE_OVERTEMP) | 606 | if (!wf_smu_failure_state && wf_smu_overtemp) { |
605 | wf_clear_overtemp(); | 607 | wf_clear_overtemp(); |
608 | wf_smu_overtemp = false; | ||
609 | } | ||
606 | } | 610 | } |
607 | 611 | ||
608 | static void wf_smu_new_control(struct wf_control *ct) | 612 | static void wf_smu_new_control(struct wf_control *ct) |
diff --git a/drivers/macintosh/windfarm_pm91.c b/drivers/macintosh/windfarm_pm91.c index 2eb484f213c8..a8ac66cd3b13 100644 --- a/drivers/macintosh/windfarm_pm91.c +++ b/drivers/macintosh/windfarm_pm91.c | |||
@@ -76,6 +76,7 @@ static struct wf_control *cpufreq_clamp; | |||
76 | 76 | ||
77 | /* Set to kick the control loop into life */ | 77 | /* Set to kick the control loop into life */ |
78 | static int wf_smu_all_controls_ok, wf_smu_all_sensors_ok, wf_smu_started; | 78 | static int wf_smu_all_controls_ok, wf_smu_all_sensors_ok, wf_smu_started; |
79 | static bool wf_smu_overtemp; | ||
79 | 80 | ||
80 | /* Failure handling.. could be nicer */ | 81 | /* Failure handling.. could be nicer */ |
81 | #define FAILURE_FAN 0x01 | 82 | #define FAILURE_FAN 0x01 |
@@ -517,6 +518,7 @@ static void wf_smu_tick(void) | |||
517 | if (new_failure & FAILURE_OVERTEMP) { | 518 | if (new_failure & FAILURE_OVERTEMP) { |
518 | wf_set_overtemp(); | 519 | wf_set_overtemp(); |
519 | wf_smu_skipping = 2; | 520 | wf_smu_skipping = 2; |
521 | wf_smu_overtemp = true; | ||
520 | } | 522 | } |
521 | 523 | ||
522 | /* We only clear the overtemp condition if overtemp is cleared | 524 | /* We only clear the overtemp condition if overtemp is cleared |
@@ -525,8 +527,10 @@ static void wf_smu_tick(void) | |||
525 | * the control loop levels, but we don't want to keep it clear | 527 | * the control loop levels, but we don't want to keep it clear |
526 | * here in this case | 528 | * here in this case |
527 | */ | 529 | */ |
528 | if (new_failure == 0 && last_failure & FAILURE_OVERTEMP) | 530 | if (!wf_smu_failure_state && wf_smu_overtemp) { |
529 | wf_clear_overtemp(); | 531 | wf_clear_overtemp(); |
532 | wf_smu_overtemp = false; | ||
533 | } | ||
530 | } | 534 | } |
531 | 535 | ||
532 | 536 | ||
diff --git a/drivers/macintosh/windfarm_smu_sat.c b/drivers/macintosh/windfarm_smu_sat.c index d87f5ee04ca9..ad6223e88340 100644 --- a/drivers/macintosh/windfarm_smu_sat.c +++ b/drivers/macintosh/windfarm_smu_sat.c | |||
@@ -343,7 +343,6 @@ static int wf_sat_remove(struct i2c_client *client) | |||
343 | wf_unregister_sensor(&sens->sens); | 343 | wf_unregister_sensor(&sens->sens); |
344 | } | 344 | } |
345 | sat->i2c = NULL; | 345 | sat->i2c = NULL; |
346 | i2c_set_clientdata(client, NULL); | ||
347 | kref_put(&sat->ref, wf_sat_release); | 346 | kref_put(&sat->ref, wf_sat_release); |
348 | 347 | ||
349 | return 0; | 348 | return 0; |
diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig index 7cd5dec0abd1..26b3d9d1409f 100644 --- a/drivers/vfio/Kconfig +++ b/drivers/vfio/Kconfig | |||
@@ -3,10 +3,16 @@ config VFIO_IOMMU_TYPE1 | |||
3 | depends on VFIO | 3 | depends on VFIO |
4 | default n | 4 | default n |
5 | 5 | ||
6 | config VFIO_IOMMU_SPAPR_TCE | ||
7 | tristate | ||
8 | depends on VFIO && SPAPR_TCE_IOMMU | ||
9 | default n | ||
10 | |||
6 | menuconfig VFIO | 11 | menuconfig VFIO |
7 | tristate "VFIO Non-Privileged userspace driver framework" | 12 | tristate "VFIO Non-Privileged userspace driver framework" |
8 | depends on IOMMU_API | 13 | depends on IOMMU_API |
9 | select VFIO_IOMMU_TYPE1 if X86 | 14 | select VFIO_IOMMU_TYPE1 if X86 |
15 | select VFIO_IOMMU_SPAPR_TCE if (PPC_POWERNV || PPC_PSERIES) | ||
10 | help | 16 | help |
11 | VFIO provides a framework for secure userspace device drivers. | 17 | VFIO provides a framework for secure userspace device drivers. |
12 | See Documentation/vfio.txt for more details. | 18 | See Documentation/vfio.txt for more details. |
diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile index 2398d4a0e38b..72bfabc8629e 100644 --- a/drivers/vfio/Makefile +++ b/drivers/vfio/Makefile | |||
@@ -1,3 +1,4 @@ | |||
1 | obj-$(CONFIG_VFIO) += vfio.o | 1 | obj-$(CONFIG_VFIO) += vfio.o |
2 | obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o | 2 | obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o |
3 | obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o | ||
3 | obj-$(CONFIG_VFIO_PCI) += pci/ | 4 | obj-$(CONFIG_VFIO_PCI) += pci/ |
diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 6d78736563de..259ad282ae5d 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c | |||
@@ -1415,6 +1415,7 @@ static int __init vfio_init(void) | |||
1415 | * drivers. | 1415 | * drivers. |
1416 | */ | 1416 | */ |
1417 | request_module_nowait("vfio_iommu_type1"); | 1417 | request_module_nowait("vfio_iommu_type1"); |
1418 | request_module_nowait("vfio_iommu_spapr_tce"); | ||
1418 | 1419 | ||
1419 | return 0; | 1420 | return 0; |
1420 | 1421 | ||
diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c new file mode 100644 index 000000000000..bdae7a04af75 --- /dev/null +++ b/drivers/vfio/vfio_iommu_spapr_tce.c | |||
@@ -0,0 +1,377 @@ | |||
1 | /* | ||
2 | * VFIO: IOMMU DMA mapping support for TCE on POWER | ||
3 | * | ||
4 | * Copyright (C) 2013 IBM Corp. All rights reserved. | ||
5 | * Author: Alexey Kardashevskiy <aik@ozlabs.ru> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License version 2 as | ||
9 | * published by the Free Software Foundation. | ||
10 | * | ||
11 | * Derived from original vfio_iommu_type1.c: | ||
12 | * Copyright (C) 2012 Red Hat, Inc. All rights reserved. | ||
13 | * Author: Alex Williamson <alex.williamson@redhat.com> | ||
14 | */ | ||
15 | |||
16 | #include <linux/module.h> | ||
17 | #include <linux/pci.h> | ||
18 | #include <linux/slab.h> | ||
19 | #include <linux/uaccess.h> | ||
20 | #include <linux/err.h> | ||
21 | #include <linux/vfio.h> | ||
22 | #include <asm/iommu.h> | ||
23 | #include <asm/tce.h> | ||
24 | |||
25 | #define DRIVER_VERSION "0.1" | ||
26 | #define DRIVER_AUTHOR "aik@ozlabs.ru" | ||
27 | #define DRIVER_DESC "VFIO IOMMU SPAPR TCE" | ||
28 | |||
29 | static void tce_iommu_detach_group(void *iommu_data, | ||
30 | struct iommu_group *iommu_group); | ||
31 | |||
32 | /* | ||
33 | * VFIO IOMMU fd for SPAPR_TCE IOMMU implementation | ||
34 | * | ||
35 | * This code handles mapping and unmapping of user data buffers | ||
36 | * into DMA'ble space using the IOMMU | ||
37 | */ | ||
38 | |||
39 | /* | ||
40 | * The container descriptor supports only a single group per container. | ||
41 | * Required by the API as the container is not supplied with the IOMMU group | ||
42 | * at the moment of initialization. | ||
43 | */ | ||
44 | struct tce_container { | ||
45 | struct mutex lock; | ||
46 | struct iommu_table *tbl; | ||
47 | bool enabled; | ||
48 | }; | ||
49 | |||
50 | static int tce_iommu_enable(struct tce_container *container) | ||
51 | { | ||
52 | int ret = 0; | ||
53 | unsigned long locked, lock_limit, npages; | ||
54 | struct iommu_table *tbl = container->tbl; | ||
55 | |||
56 | if (!container->tbl) | ||
57 | return -ENXIO; | ||
58 | |||
59 | if (!current->mm) | ||
60 | return -ESRCH; /* process exited */ | ||
61 | |||
62 | if (container->enabled) | ||
63 | return -EBUSY; | ||
64 | |||
65 | /* | ||
66 | * When userspace pages are mapped into the IOMMU, they are effectively | ||
67 | * locked memory, so, theoretically, we need to update the accounting | ||
68 | * of locked pages on each map and unmap. For powerpc, the map unmap | ||
69 | * paths can be very hot, though, and the accounting would kill | ||
70 | * performance, especially since it would be difficult to impossible | ||
71 | * to handle the accounting in real mode only. | ||
72 | * | ||
73 | * To address that, rather than precisely accounting every page, we | ||
74 | * instead account for a worst case on locked memory when the iommu is | ||
75 | * enabled and disabled. The worst case upper bound on locked memory | ||
76 | * is the size of the whole iommu window, which is usually relatively | ||
77 | * small (compared to total memory sizes) on POWER hardware. | ||
78 | * | ||
79 | * Also we don't have a nice way to fail on H_PUT_TCE due to ulimits, | ||
80 | * that would effectively kill the guest at random points, much better | ||
81 | * enforcing the limit based on the max that the guest can map. | ||
82 | */ | ||
83 | down_write(¤t->mm->mmap_sem); | ||
84 | npages = (tbl->it_size << IOMMU_PAGE_SHIFT) >> PAGE_SHIFT; | ||
85 | locked = current->mm->locked_vm + npages; | ||
86 | lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; | ||
87 | if (locked > lock_limit && !capable(CAP_IPC_LOCK)) { | ||
88 | pr_warn("RLIMIT_MEMLOCK (%ld) exceeded\n", | ||
89 | rlimit(RLIMIT_MEMLOCK)); | ||
90 | ret = -ENOMEM; | ||
91 | } else { | ||
92 | |||
93 | current->mm->locked_vm += npages; | ||
94 | container->enabled = true; | ||
95 | } | ||
96 | up_write(¤t->mm->mmap_sem); | ||
97 | |||
98 | return ret; | ||
99 | } | ||
100 | |||
101 | static void tce_iommu_disable(struct tce_container *container) | ||
102 | { | ||
103 | if (!container->enabled) | ||
104 | return; | ||
105 | |||
106 | container->enabled = false; | ||
107 | |||
108 | if (!container->tbl || !current->mm) | ||
109 | return; | ||
110 | |||
111 | down_write(¤t->mm->mmap_sem); | ||
112 | current->mm->locked_vm -= (container->tbl->it_size << | ||
113 | IOMMU_PAGE_SHIFT) >> PAGE_SHIFT; | ||
114 | up_write(¤t->mm->mmap_sem); | ||
115 | } | ||
116 | |||
117 | static void *tce_iommu_open(unsigned long arg) | ||
118 | { | ||
119 | struct tce_container *container; | ||
120 | |||
121 | if (arg != VFIO_SPAPR_TCE_IOMMU) { | ||
122 | pr_err("tce_vfio: Wrong IOMMU type\n"); | ||
123 | return ERR_PTR(-EINVAL); | ||
124 | } | ||
125 | |||
126 | container = kzalloc(sizeof(*container), GFP_KERNEL); | ||
127 | if (!container) | ||
128 | return ERR_PTR(-ENOMEM); | ||
129 | |||
130 | mutex_init(&container->lock); | ||
131 | |||
132 | return container; | ||
133 | } | ||
134 | |||
135 | static void tce_iommu_release(void *iommu_data) | ||
136 | { | ||
137 | struct tce_container *container = iommu_data; | ||
138 | |||
139 | WARN_ON(container->tbl && !container->tbl->it_group); | ||
140 | tce_iommu_disable(container); | ||
141 | |||
142 | if (container->tbl && container->tbl->it_group) | ||
143 | tce_iommu_detach_group(iommu_data, container->tbl->it_group); | ||
144 | |||
145 | mutex_destroy(&container->lock); | ||
146 | |||
147 | kfree(container); | ||
148 | } | ||
149 | |||
150 | static long tce_iommu_ioctl(void *iommu_data, | ||
151 | unsigned int cmd, unsigned long arg) | ||
152 | { | ||
153 | struct tce_container *container = iommu_data; | ||
154 | unsigned long minsz; | ||
155 | long ret; | ||
156 | |||
157 | switch (cmd) { | ||
158 | case VFIO_CHECK_EXTENSION: | ||
159 | return (arg == VFIO_SPAPR_TCE_IOMMU) ? 1 : 0; | ||
160 | |||
161 | case VFIO_IOMMU_SPAPR_TCE_GET_INFO: { | ||
162 | struct vfio_iommu_spapr_tce_info info; | ||
163 | struct iommu_table *tbl = container->tbl; | ||
164 | |||
165 | if (WARN_ON(!tbl)) | ||
166 | return -ENXIO; | ||
167 | |||
168 | minsz = offsetofend(struct vfio_iommu_spapr_tce_info, | ||
169 | dma32_window_size); | ||
170 | |||
171 | if (copy_from_user(&info, (void __user *)arg, minsz)) | ||
172 | return -EFAULT; | ||
173 | |||
174 | if (info.argsz < minsz) | ||
175 | return -EINVAL; | ||
176 | |||
177 | info.dma32_window_start = tbl->it_offset << IOMMU_PAGE_SHIFT; | ||
178 | info.dma32_window_size = tbl->it_size << IOMMU_PAGE_SHIFT; | ||
179 | info.flags = 0; | ||
180 | |||
181 | if (copy_to_user((void __user *)arg, &info, minsz)) | ||
182 | return -EFAULT; | ||
183 | |||
184 | return 0; | ||
185 | } | ||
186 | case VFIO_IOMMU_MAP_DMA: { | ||
187 | struct vfio_iommu_type1_dma_map param; | ||
188 | struct iommu_table *tbl = container->tbl; | ||
189 | unsigned long tce, i; | ||
190 | |||
191 | if (!tbl) | ||
192 | return -ENXIO; | ||
193 | |||
194 | BUG_ON(!tbl->it_group); | ||
195 | |||
196 | minsz = offsetofend(struct vfio_iommu_type1_dma_map, size); | ||
197 | |||
198 | if (copy_from_user(¶m, (void __user *)arg, minsz)) | ||
199 | return -EFAULT; | ||
200 | |||
201 | if (param.argsz < minsz) | ||
202 | return -EINVAL; | ||
203 | |||
204 | if (param.flags & ~(VFIO_DMA_MAP_FLAG_READ | | ||
205 | VFIO_DMA_MAP_FLAG_WRITE)) | ||
206 | return -EINVAL; | ||
207 | |||
208 | if ((param.size & ~IOMMU_PAGE_MASK) || | ||
209 | (param.vaddr & ~IOMMU_PAGE_MASK)) | ||
210 | return -EINVAL; | ||
211 | |||
212 | /* iova is checked by the IOMMU API */ | ||
213 | tce = param.vaddr; | ||
214 | if (param.flags & VFIO_DMA_MAP_FLAG_READ) | ||
215 | tce |= TCE_PCI_READ; | ||
216 | if (param.flags & VFIO_DMA_MAP_FLAG_WRITE) | ||
217 | tce |= TCE_PCI_WRITE; | ||
218 | |||
219 | ret = iommu_tce_put_param_check(tbl, param.iova, tce); | ||
220 | if (ret) | ||
221 | return ret; | ||
222 | |||
223 | for (i = 0; i < (param.size >> IOMMU_PAGE_SHIFT); ++i) { | ||
224 | ret = iommu_put_tce_user_mode(tbl, | ||
225 | (param.iova >> IOMMU_PAGE_SHIFT) + i, | ||
226 | tce); | ||
227 | if (ret) | ||
228 | break; | ||
229 | tce += IOMMU_PAGE_SIZE; | ||
230 | } | ||
231 | if (ret) | ||
232 | iommu_clear_tces_and_put_pages(tbl, | ||
233 | param.iova >> IOMMU_PAGE_SHIFT, i); | ||
234 | |||
235 | iommu_flush_tce(tbl); | ||
236 | |||
237 | return ret; | ||
238 | } | ||
239 | case VFIO_IOMMU_UNMAP_DMA: { | ||
240 | struct vfio_iommu_type1_dma_unmap param; | ||
241 | struct iommu_table *tbl = container->tbl; | ||
242 | |||
243 | if (WARN_ON(!tbl)) | ||
244 | return -ENXIO; | ||
245 | |||
246 | minsz = offsetofend(struct vfio_iommu_type1_dma_unmap, | ||
247 | size); | ||
248 | |||
249 | if (copy_from_user(¶m, (void __user *)arg, minsz)) | ||
250 | return -EFAULT; | ||
251 | |||
252 | if (param.argsz < minsz) | ||
253 | return -EINVAL; | ||
254 | |||
255 | /* No flag is supported now */ | ||
256 | if (param.flags) | ||
257 | return -EINVAL; | ||
258 | |||
259 | if (param.size & ~IOMMU_PAGE_MASK) | ||
260 | return -EINVAL; | ||
261 | |||
262 | ret = iommu_tce_clear_param_check(tbl, param.iova, 0, | ||
263 | param.size >> IOMMU_PAGE_SHIFT); | ||
264 | if (ret) | ||
265 | return ret; | ||
266 | |||
267 | ret = iommu_clear_tces_and_put_pages(tbl, | ||
268 | param.iova >> IOMMU_PAGE_SHIFT, | ||
269 | param.size >> IOMMU_PAGE_SHIFT); | ||
270 | iommu_flush_tce(tbl); | ||
271 | |||
272 | return ret; | ||
273 | } | ||
274 | case VFIO_IOMMU_ENABLE: | ||
275 | mutex_lock(&container->lock); | ||
276 | ret = tce_iommu_enable(container); | ||
277 | mutex_unlock(&container->lock); | ||
278 | return ret; | ||
279 | |||
280 | |||
281 | case VFIO_IOMMU_DISABLE: | ||
282 | mutex_lock(&container->lock); | ||
283 | tce_iommu_disable(container); | ||
284 | mutex_unlock(&container->lock); | ||
285 | return 0; | ||
286 | } | ||
287 | |||
288 | return -ENOTTY; | ||
289 | } | ||
290 | |||
291 | static int tce_iommu_attach_group(void *iommu_data, | ||
292 | struct iommu_group *iommu_group) | ||
293 | { | ||
294 | int ret; | ||
295 | struct tce_container *container = iommu_data; | ||
296 | struct iommu_table *tbl = iommu_group_get_iommudata(iommu_group); | ||
297 | |||
298 | BUG_ON(!tbl); | ||
299 | mutex_lock(&container->lock); | ||
300 | |||
301 | /* pr_debug("tce_vfio: Attaching group #%u to iommu %p\n", | ||
302 | iommu_group_id(iommu_group), iommu_group); */ | ||
303 | if (container->tbl) { | ||
304 | pr_warn("tce_vfio: Only one group per IOMMU container is allowed, existing id=%d, attaching id=%d\n", | ||
305 | iommu_group_id(container->tbl->it_group), | ||
306 | iommu_group_id(iommu_group)); | ||
307 | ret = -EBUSY; | ||
308 | } else if (container->enabled) { | ||
309 | pr_err("tce_vfio: attaching group #%u to enabled container\n", | ||
310 | iommu_group_id(iommu_group)); | ||
311 | ret = -EBUSY; | ||
312 | } else { | ||
313 | ret = iommu_take_ownership(tbl); | ||
314 | if (!ret) | ||
315 | container->tbl = tbl; | ||
316 | } | ||
317 | |||
318 | mutex_unlock(&container->lock); | ||
319 | |||
320 | return ret; | ||
321 | } | ||
322 | |||
323 | static void tce_iommu_detach_group(void *iommu_data, | ||
324 | struct iommu_group *iommu_group) | ||
325 | { | ||
326 | struct tce_container *container = iommu_data; | ||
327 | struct iommu_table *tbl = iommu_group_get_iommudata(iommu_group); | ||
328 | |||
329 | BUG_ON(!tbl); | ||
330 | mutex_lock(&container->lock); | ||
331 | if (tbl != container->tbl) { | ||
332 | pr_warn("tce_vfio: detaching group #%u, expected group is #%u\n", | ||
333 | iommu_group_id(iommu_group), | ||
334 | iommu_group_id(tbl->it_group)); | ||
335 | } else { | ||
336 | if (container->enabled) { | ||
337 | pr_warn("tce_vfio: detaching group #%u from enabled container, forcing disable\n", | ||
338 | iommu_group_id(tbl->it_group)); | ||
339 | tce_iommu_disable(container); | ||
340 | } | ||
341 | |||
342 | /* pr_debug("tce_vfio: detaching group #%u from iommu %p\n", | ||
343 | iommu_group_id(iommu_group), iommu_group); */ | ||
344 | container->tbl = NULL; | ||
345 | iommu_release_ownership(tbl); | ||
346 | } | ||
347 | mutex_unlock(&container->lock); | ||
348 | } | ||
349 | |||
350 | const struct vfio_iommu_driver_ops tce_iommu_driver_ops = { | ||
351 | .name = "iommu-vfio-powerpc", | ||
352 | .owner = THIS_MODULE, | ||
353 | .open = tce_iommu_open, | ||
354 | .release = tce_iommu_release, | ||
355 | .ioctl = tce_iommu_ioctl, | ||
356 | .attach_group = tce_iommu_attach_group, | ||
357 | .detach_group = tce_iommu_detach_group, | ||
358 | }; | ||
359 | |||
360 | static int __init tce_iommu_init(void) | ||
361 | { | ||
362 | return vfio_register_iommu_driver(&tce_iommu_driver_ops); | ||
363 | } | ||
364 | |||
365 | static void __exit tce_iommu_cleanup(void) | ||
366 | { | ||
367 | vfio_unregister_iommu_driver(&tce_iommu_driver_ops); | ||
368 | } | ||
369 | |||
370 | module_init(tce_iommu_init); | ||
371 | module_exit(tce_iommu_cleanup); | ||
372 | |||
373 | MODULE_VERSION(DRIVER_VERSION); | ||
374 | MODULE_LICENSE("GPL v2"); | ||
375 | MODULE_AUTHOR(DRIVER_AUTHOR); | ||
376 | MODULE_DESCRIPTION(DRIVER_DESC); | ||
377 | |||
diff --git a/drivers/watchdog/booke_wdt.c b/drivers/watchdog/booke_wdt.c index a8dbceb32914..f1b8d555080e 100644 --- a/drivers/watchdog/booke_wdt.c +++ b/drivers/watchdog/booke_wdt.c | |||
@@ -138,6 +138,14 @@ static void __booke_wdt_enable(void *data) | |||
138 | val &= ~WDTP_MASK; | 138 | val &= ~WDTP_MASK; |
139 | val |= (TCR_WIE|TCR_WRC(WRC_CHIP)|WDTP(booke_wdt_period)); | 139 | val |= (TCR_WIE|TCR_WRC(WRC_CHIP)|WDTP(booke_wdt_period)); |
140 | 140 | ||
141 | #ifdef CONFIG_PPC_BOOK3E_64 | ||
142 | /* | ||
143 | * Crit ints are currently broken on PPC64 Book-E, so | ||
144 | * just disable them for now. | ||
145 | */ | ||
146 | val &= ~TCR_WIE; | ||
147 | #endif | ||
148 | |||
141 | mtspr(SPRN_TCR, val); | 149 | mtspr(SPRN_TCR, val); |
142 | } | 150 | } |
143 | 151 | ||
diff --git a/fs/pstore/ftrace.c b/fs/pstore/ftrace.c index 43b12807a51d..76a4eeb92982 100644 --- a/fs/pstore/ftrace.c +++ b/fs/pstore/ftrace.c | |||
@@ -44,7 +44,7 @@ static void notrace pstore_ftrace_call(unsigned long ip, | |||
44 | rec.parent_ip = parent_ip; | 44 | rec.parent_ip = parent_ip; |
45 | pstore_ftrace_encode_cpu(&rec, raw_smp_processor_id()); | 45 | pstore_ftrace_encode_cpu(&rec, raw_smp_processor_id()); |
46 | psinfo->write_buf(PSTORE_TYPE_FTRACE, 0, NULL, 0, (void *)&rec, | 46 | psinfo->write_buf(PSTORE_TYPE_FTRACE, 0, NULL, 0, (void *)&rec, |
47 | sizeof(rec), psinfo); | 47 | 0, sizeof(rec), psinfo); |
48 | 48 | ||
49 | local_irq_restore(flags); | 49 | local_irq_restore(flags); |
50 | } | 50 | } |
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c index bfd95bf38005..71bf5f4ae84c 100644 --- a/fs/pstore/inode.c +++ b/fs/pstore/inode.c | |||
@@ -326,6 +326,15 @@ int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id, int count, | |||
326 | case PSTORE_TYPE_MCE: | 326 | case PSTORE_TYPE_MCE: |
327 | sprintf(name, "mce-%s-%lld", psname, id); | 327 | sprintf(name, "mce-%s-%lld", psname, id); |
328 | break; | 328 | break; |
329 | case PSTORE_TYPE_PPC_RTAS: | ||
330 | sprintf(name, "rtas-%s-%lld", psname, id); | ||
331 | break; | ||
332 | case PSTORE_TYPE_PPC_OF: | ||
333 | sprintf(name, "powerpc-ofw-%s-%lld", psname, id); | ||
334 | break; | ||
335 | case PSTORE_TYPE_PPC_COMMON: | ||
336 | sprintf(name, "powerpc-common-%s-%lld", psname, id); | ||
337 | break; | ||
329 | case PSTORE_TYPE_UNKNOWN: | 338 | case PSTORE_TYPE_UNKNOWN: |
330 | sprintf(name, "unknown-%s-%lld", psname, id); | 339 | sprintf(name, "unknown-%s-%lld", psname, id); |
331 | break; | 340 | break; |
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index b7ffe2bcd9c4..422962ae9fc2 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c | |||
@@ -159,7 +159,7 @@ static void pstore_dump(struct kmsg_dumper *dumper, | |||
159 | break; | 159 | break; |
160 | 160 | ||
161 | ret = psinfo->write(PSTORE_TYPE_DMESG, reason, &id, part, | 161 | ret = psinfo->write(PSTORE_TYPE_DMESG, reason, &id, part, |
162 | oopscount, hsize + len, psinfo); | 162 | oopscount, hsize, hsize + len, psinfo); |
163 | if (ret == 0 && reason == KMSG_DUMP_OOPS && pstore_is_mounted()) | 163 | if (ret == 0 && reason == KMSG_DUMP_OOPS && pstore_is_mounted()) |
164 | pstore_new_entry = 1; | 164 | pstore_new_entry = 1; |
165 | 165 | ||
@@ -196,7 +196,7 @@ static void pstore_console_write(struct console *con, const char *s, unsigned c) | |||
196 | spin_lock_irqsave(&psinfo->buf_lock, flags); | 196 | spin_lock_irqsave(&psinfo->buf_lock, flags); |
197 | } | 197 | } |
198 | memcpy(psinfo->buf, s, c); | 198 | memcpy(psinfo->buf, s, c); |
199 | psinfo->write(PSTORE_TYPE_CONSOLE, 0, &id, 0, 0, c, psinfo); | 199 | psinfo->write(PSTORE_TYPE_CONSOLE, 0, &id, 0, 0, 0, c, psinfo); |
200 | spin_unlock_irqrestore(&psinfo->buf_lock, flags); | 200 | spin_unlock_irqrestore(&psinfo->buf_lock, flags); |
201 | s += c; | 201 | s += c; |
202 | c = e - s; | 202 | c = e - s; |
@@ -221,9 +221,11 @@ static void pstore_register_console(void) {} | |||
221 | static int pstore_write_compat(enum pstore_type_id type, | 221 | static int pstore_write_compat(enum pstore_type_id type, |
222 | enum kmsg_dump_reason reason, | 222 | enum kmsg_dump_reason reason, |
223 | u64 *id, unsigned int part, int count, | 223 | u64 *id, unsigned int part, int count, |
224 | size_t size, struct pstore_info *psi) | 224 | size_t hsize, size_t size, |
225 | struct pstore_info *psi) | ||
225 | { | 226 | { |
226 | return psi->write_buf(type, reason, id, part, psinfo->buf, size, psi); | 227 | return psi->write_buf(type, reason, id, part, psinfo->buf, hsize, |
228 | size, psi); | ||
227 | } | 229 | } |
228 | 230 | ||
229 | /* | 231 | /* |
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index 43abee2c6cb9..a6119f9469e2 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c | |||
@@ -195,7 +195,8 @@ static size_t ramoops_write_kmsg_hdr(struct persistent_ram_zone *prz) | |||
195 | static int notrace ramoops_pstore_write_buf(enum pstore_type_id type, | 195 | static int notrace ramoops_pstore_write_buf(enum pstore_type_id type, |
196 | enum kmsg_dump_reason reason, | 196 | enum kmsg_dump_reason reason, |
197 | u64 *id, unsigned int part, | 197 | u64 *id, unsigned int part, |
198 | const char *buf, size_t size, | 198 | const char *buf, |
199 | size_t hsize, size_t size, | ||
199 | struct pstore_info *psi) | 200 | struct pstore_info *psi) |
200 | { | 201 | { |
201 | struct ramoops_context *cxt = psi->data; | 202 | struct ramoops_context *cxt = psi->data; |
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index a7126d28f4cf..2f47ade1b567 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h | |||
@@ -173,11 +173,12 @@ extern void pmdp_splitting_flush(struct vm_area_struct *vma, | |||
173 | #endif | 173 | #endif |
174 | 174 | ||
175 | #ifndef __HAVE_ARCH_PGTABLE_DEPOSIT | 175 | #ifndef __HAVE_ARCH_PGTABLE_DEPOSIT |
176 | extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pgtable_t pgtable); | 176 | extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, |
177 | pgtable_t pgtable); | ||
177 | #endif | 178 | #endif |
178 | 179 | ||
179 | #ifndef __HAVE_ARCH_PGTABLE_WITHDRAW | 180 | #ifndef __HAVE_ARCH_PGTABLE_WITHDRAW |
180 | extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm); | 181 | extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp); |
181 | #endif | 182 | #endif |
182 | 183 | ||
183 | #ifndef __HAVE_ARCH_PMDP_INVALIDATE | 184 | #ifndef __HAVE_ARCH_PMDP_INVALIDATE |
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 26ee56c80dc7..b60de92e2edc 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h | |||
@@ -60,9 +60,9 @@ extern pmd_t *page_check_address_pmd(struct page *page, | |||
60 | #define HPAGE_PMD_NR (1<<HPAGE_PMD_ORDER) | 60 | #define HPAGE_PMD_NR (1<<HPAGE_PMD_ORDER) |
61 | 61 | ||
62 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | 62 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
63 | #define HPAGE_PMD_SHIFT HPAGE_SHIFT | 63 | #define HPAGE_PMD_SHIFT PMD_SHIFT |
64 | #define HPAGE_PMD_MASK HPAGE_MASK | 64 | #define HPAGE_PMD_SIZE ((1UL) << HPAGE_PMD_SHIFT) |
65 | #define HPAGE_PMD_SIZE HPAGE_SIZE | 65 | #define HPAGE_PMD_MASK (~(HPAGE_PMD_SIZE - 1)) |
66 | 66 | ||
67 | extern bool is_vma_temporary_stack(struct vm_area_struct *vma); | 67 | extern bool is_vma_temporary_stack(struct vm_area_struct *vma); |
68 | 68 | ||
diff --git a/include/linux/pstore.h b/include/linux/pstore.h index 75d01760c911..4aa80ba830a2 100644 --- a/include/linux/pstore.h +++ b/include/linux/pstore.h | |||
@@ -35,6 +35,10 @@ enum pstore_type_id { | |||
35 | PSTORE_TYPE_MCE = 1, | 35 | PSTORE_TYPE_MCE = 1, |
36 | PSTORE_TYPE_CONSOLE = 2, | 36 | PSTORE_TYPE_CONSOLE = 2, |
37 | PSTORE_TYPE_FTRACE = 3, | 37 | PSTORE_TYPE_FTRACE = 3, |
38 | /* PPC64 partition types */ | ||
39 | PSTORE_TYPE_PPC_RTAS = 4, | ||
40 | PSTORE_TYPE_PPC_OF = 5, | ||
41 | PSTORE_TYPE_PPC_COMMON = 6, | ||
38 | PSTORE_TYPE_UNKNOWN = 255 | 42 | PSTORE_TYPE_UNKNOWN = 255 |
39 | }; | 43 | }; |
40 | 44 | ||
@@ -54,12 +58,12 @@ struct pstore_info { | |||
54 | struct pstore_info *psi); | 58 | struct pstore_info *psi); |
55 | int (*write)(enum pstore_type_id type, | 59 | int (*write)(enum pstore_type_id type, |
56 | enum kmsg_dump_reason reason, u64 *id, | 60 | enum kmsg_dump_reason reason, u64 *id, |
57 | unsigned int part, int count, size_t size, | 61 | unsigned int part, int count, size_t hsize, |
58 | struct pstore_info *psi); | 62 | size_t size, struct pstore_info *psi); |
59 | int (*write_buf)(enum pstore_type_id type, | 63 | int (*write_buf)(enum pstore_type_id type, |
60 | enum kmsg_dump_reason reason, u64 *id, | 64 | enum kmsg_dump_reason reason, u64 *id, |
61 | unsigned int part, const char *buf, size_t size, | 65 | unsigned int part, const char *buf, size_t hsize, |
62 | struct pstore_info *psi); | 66 | size_t size, struct pstore_info *psi); |
63 | int (*erase)(enum pstore_type_id type, u64 id, | 67 | int (*erase)(enum pstore_type_id type, u64 id, |
64 | int count, struct timespec time, | 68 | int count, struct timespec time, |
65 | struct pstore_info *psi); | 69 | struct pstore_info *psi); |
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 284ff2436829..87ee4f4cff25 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h | |||
@@ -22,6 +22,7 @@ | |||
22 | /* Extensions */ | 22 | /* Extensions */ |
23 | 23 | ||
24 | #define VFIO_TYPE1_IOMMU 1 | 24 | #define VFIO_TYPE1_IOMMU 1 |
25 | #define VFIO_SPAPR_TCE_IOMMU 2 | ||
25 | 26 | ||
26 | /* | 27 | /* |
27 | * The IOCTL interface is designed for extensibility by embedding the | 28 | * The IOCTL interface is designed for extensibility by embedding the |
@@ -375,4 +376,37 @@ struct vfio_iommu_type1_dma_unmap { | |||
375 | 376 | ||
376 | #define VFIO_IOMMU_UNMAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 14) | 377 | #define VFIO_IOMMU_UNMAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 14) |
377 | 378 | ||
379 | /* | ||
380 | * IOCTLs to enable/disable IOMMU container usage. | ||
381 | * No parameters are supported. | ||
382 | */ | ||
383 | #define VFIO_IOMMU_ENABLE _IO(VFIO_TYPE, VFIO_BASE + 15) | ||
384 | #define VFIO_IOMMU_DISABLE _IO(VFIO_TYPE, VFIO_BASE + 16) | ||
385 | |||
386 | /* -------- Additional API for SPAPR TCE (Server POWERPC) IOMMU -------- */ | ||
387 | |||
388 | /* | ||
389 | * The SPAPR TCE info struct provides the information about the PCI bus | ||
390 | * address ranges available for DMA, these values are programmed into | ||
391 | * the hardware so the guest has to know that information. | ||
392 | * | ||
393 | * The DMA 32 bit window start is an absolute PCI bus address. | ||
394 | * The IOVA address passed via map/unmap ioctls are absolute PCI bus | ||
395 | * addresses too so the window works as a filter rather than an offset | ||
396 | * for IOVA addresses. | ||
397 | * | ||
398 | * A flag will need to be added if other page sizes are supported, | ||
399 | * so as defined here, it is always 4k. | ||
400 | */ | ||
401 | struct vfio_iommu_spapr_tce_info { | ||
402 | __u32 argsz; | ||
403 | __u32 flags; /* reserved for future use */ | ||
404 | __u32 dma32_window_start; /* 32 bit window start (bytes) */ | ||
405 | __u32 dma32_window_size; /* 32 bit window size (bytes) */ | ||
406 | }; | ||
407 | |||
408 | #define VFIO_IOMMU_SPAPR_TCE_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12) | ||
409 | |||
410 | /* ***************************************************************** */ | ||
411 | |||
378 | #endif /* _UAPIVFIO_H */ | 412 | #endif /* _UAPIVFIO_H */ |
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index d8b3b850150c..243e710c6039 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -729,8 +729,8 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm, | |||
729 | pmd_t entry; | 729 | pmd_t entry; |
730 | entry = mk_huge_pmd(page, vma); | 730 | entry = mk_huge_pmd(page, vma); |
731 | page_add_new_anon_rmap(page, vma, haddr); | 731 | page_add_new_anon_rmap(page, vma, haddr); |
732 | pgtable_trans_huge_deposit(mm, pmd, pgtable); | ||
732 | set_pmd_at(mm, haddr, pmd, entry); | 733 | set_pmd_at(mm, haddr, pmd, entry); |
733 | pgtable_trans_huge_deposit(mm, pgtable); | ||
734 | add_mm_counter(mm, MM_ANONPAGES, HPAGE_PMD_NR); | 734 | add_mm_counter(mm, MM_ANONPAGES, HPAGE_PMD_NR); |
735 | mm->nr_ptes++; | 735 | mm->nr_ptes++; |
736 | spin_unlock(&mm->page_table_lock); | 736 | spin_unlock(&mm->page_table_lock); |
@@ -771,8 +771,8 @@ static bool set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm, | |||
771 | entry = mk_pmd(zero_page, vma->vm_page_prot); | 771 | entry = mk_pmd(zero_page, vma->vm_page_prot); |
772 | entry = pmd_wrprotect(entry); | 772 | entry = pmd_wrprotect(entry); |
773 | entry = pmd_mkhuge(entry); | 773 | entry = pmd_mkhuge(entry); |
774 | pgtable_trans_huge_deposit(mm, pmd, pgtable); | ||
774 | set_pmd_at(mm, haddr, pmd, entry); | 775 | set_pmd_at(mm, haddr, pmd, entry); |
775 | pgtable_trans_huge_deposit(mm, pgtable); | ||
776 | mm->nr_ptes++; | 776 | mm->nr_ptes++; |
777 | return true; | 777 | return true; |
778 | } | 778 | } |
@@ -916,8 +916,8 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, | |||
916 | 916 | ||
917 | pmdp_set_wrprotect(src_mm, addr, src_pmd); | 917 | pmdp_set_wrprotect(src_mm, addr, src_pmd); |
918 | pmd = pmd_mkold(pmd_wrprotect(pmd)); | 918 | pmd = pmd_mkold(pmd_wrprotect(pmd)); |
919 | pgtable_trans_huge_deposit(dst_mm, dst_pmd, pgtable); | ||
919 | set_pmd_at(dst_mm, addr, dst_pmd, pmd); | 920 | set_pmd_at(dst_mm, addr, dst_pmd, pmd); |
920 | pgtable_trans_huge_deposit(dst_mm, pgtable); | ||
921 | dst_mm->nr_ptes++; | 921 | dst_mm->nr_ptes++; |
922 | 922 | ||
923 | ret = 0; | 923 | ret = 0; |
@@ -987,7 +987,7 @@ static int do_huge_pmd_wp_zero_page_fallback(struct mm_struct *mm, | |||
987 | pmdp_clear_flush(vma, haddr, pmd); | 987 | pmdp_clear_flush(vma, haddr, pmd); |
988 | /* leave pmd empty until pte is filled */ | 988 | /* leave pmd empty until pte is filled */ |
989 | 989 | ||
990 | pgtable = pgtable_trans_huge_withdraw(mm); | 990 | pgtable = pgtable_trans_huge_withdraw(mm, pmd); |
991 | pmd_populate(mm, &_pmd, pgtable); | 991 | pmd_populate(mm, &_pmd, pgtable); |
992 | 992 | ||
993 | for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE) { | 993 | for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE) { |
@@ -1085,7 +1085,7 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm, | |||
1085 | pmdp_clear_flush(vma, haddr, pmd); | 1085 | pmdp_clear_flush(vma, haddr, pmd); |
1086 | /* leave pmd empty until pte is filled */ | 1086 | /* leave pmd empty until pte is filled */ |
1087 | 1087 | ||
1088 | pgtable = pgtable_trans_huge_withdraw(mm); | 1088 | pgtable = pgtable_trans_huge_withdraw(mm, pmd); |
1089 | pmd_populate(mm, &_pmd, pgtable); | 1089 | pmd_populate(mm, &_pmd, pgtable); |
1090 | 1090 | ||
1091 | for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE) { | 1091 | for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE) { |
@@ -1265,7 +1265,9 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, | |||
1265 | * young bit, instead of the current set_pmd_at. | 1265 | * young bit, instead of the current set_pmd_at. |
1266 | */ | 1266 | */ |
1267 | _pmd = pmd_mkyoung(pmd_mkdirty(*pmd)); | 1267 | _pmd = pmd_mkyoung(pmd_mkdirty(*pmd)); |
1268 | set_pmd_at(mm, addr & HPAGE_PMD_MASK, pmd, _pmd); | 1268 | if (pmdp_set_access_flags(vma, addr & HPAGE_PMD_MASK, |
1269 | pmd, _pmd, 1)) | ||
1270 | update_mmu_cache_pmd(vma, addr, pmd); | ||
1269 | } | 1271 | } |
1270 | if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) { | 1272 | if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) { |
1271 | if (page->mapping && trylock_page(page)) { | 1273 | if (page->mapping && trylock_page(page)) { |
@@ -1358,9 +1360,15 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, | |||
1358 | struct page *page; | 1360 | struct page *page; |
1359 | pgtable_t pgtable; | 1361 | pgtable_t pgtable; |
1360 | pmd_t orig_pmd; | 1362 | pmd_t orig_pmd; |
1361 | pgtable = pgtable_trans_huge_withdraw(tlb->mm); | 1363 | /* |
1364 | * For architectures like ppc64 we look at deposited pgtable | ||
1365 | * when calling pmdp_get_and_clear. So do the | ||
1366 | * pgtable_trans_huge_withdraw after finishing pmdp related | ||
1367 | * operations. | ||
1368 | */ | ||
1362 | orig_pmd = pmdp_get_and_clear(tlb->mm, addr, pmd); | 1369 | orig_pmd = pmdp_get_and_clear(tlb->mm, addr, pmd); |
1363 | tlb_remove_pmd_tlb_entry(tlb, pmd, addr); | 1370 | tlb_remove_pmd_tlb_entry(tlb, pmd, addr); |
1371 | pgtable = pgtable_trans_huge_withdraw(tlb->mm, pmd); | ||
1364 | if (is_huge_zero_pmd(orig_pmd)) { | 1372 | if (is_huge_zero_pmd(orig_pmd)) { |
1365 | tlb->mm->nr_ptes--; | 1373 | tlb->mm->nr_ptes--; |
1366 | spin_unlock(&tlb->mm->page_table_lock); | 1374 | spin_unlock(&tlb->mm->page_table_lock); |
@@ -1691,7 +1699,7 @@ static int __split_huge_page_map(struct page *page, | |||
1691 | pmd = page_check_address_pmd(page, mm, address, | 1699 | pmd = page_check_address_pmd(page, mm, address, |
1692 | PAGE_CHECK_ADDRESS_PMD_SPLITTING_FLAG); | 1700 | PAGE_CHECK_ADDRESS_PMD_SPLITTING_FLAG); |
1693 | if (pmd) { | 1701 | if (pmd) { |
1694 | pgtable = pgtable_trans_huge_withdraw(mm); | 1702 | pgtable = pgtable_trans_huge_withdraw(mm, pmd); |
1695 | pmd_populate(mm, &_pmd, pgtable); | 1703 | pmd_populate(mm, &_pmd, pgtable); |
1696 | 1704 | ||
1697 | haddr = address; | 1705 | haddr = address; |
@@ -2359,9 +2367,9 @@ static void collapse_huge_page(struct mm_struct *mm, | |||
2359 | spin_lock(&mm->page_table_lock); | 2367 | spin_lock(&mm->page_table_lock); |
2360 | BUG_ON(!pmd_none(*pmd)); | 2368 | BUG_ON(!pmd_none(*pmd)); |
2361 | page_add_new_anon_rmap(new_page, vma, address); | 2369 | page_add_new_anon_rmap(new_page, vma, address); |
2370 | pgtable_trans_huge_deposit(mm, pmd, pgtable); | ||
2362 | set_pmd_at(mm, address, pmd, _pmd); | 2371 | set_pmd_at(mm, address, pmd, _pmd); |
2363 | update_mmu_cache_pmd(vma, address, pmd); | 2372 | update_mmu_cache_pmd(vma, address, pmd); |
2364 | pgtable_trans_huge_deposit(mm, pgtable); | ||
2365 | spin_unlock(&mm->page_table_lock); | 2373 | spin_unlock(&mm->page_table_lock); |
2366 | 2374 | ||
2367 | *hpage = NULL; | 2375 | *hpage = NULL; |
@@ -2667,7 +2675,7 @@ static void __split_huge_zero_page_pmd(struct vm_area_struct *vma, | |||
2667 | pmdp_clear_flush(vma, haddr, pmd); | 2675 | pmdp_clear_flush(vma, haddr, pmd); |
2668 | /* leave pmd empty until pte is filled */ | 2676 | /* leave pmd empty until pte is filled */ |
2669 | 2677 | ||
2670 | pgtable = pgtable_trans_huge_withdraw(mm); | 2678 | pgtable = pgtable_trans_huge_withdraw(mm, pmd); |
2671 | pmd_populate(mm, &_pmd, pgtable); | 2679 | pmd_populate(mm, &_pmd, pgtable); |
2672 | 2680 | ||
2673 | for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE) { | 2681 | for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE) { |
diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c index 0c8323fe6c8f..e1a6e4fab016 100644 --- a/mm/pgtable-generic.c +++ b/mm/pgtable-generic.c | |||
@@ -124,7 +124,8 @@ void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address, | |||
124 | 124 | ||
125 | #ifndef __HAVE_ARCH_PGTABLE_DEPOSIT | 125 | #ifndef __HAVE_ARCH_PGTABLE_DEPOSIT |
126 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | 126 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
127 | void pgtable_trans_huge_deposit(struct mm_struct *mm, pgtable_t pgtable) | 127 | void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, |
128 | pgtable_t pgtable) | ||
128 | { | 129 | { |
129 | assert_spin_locked(&mm->page_table_lock); | 130 | assert_spin_locked(&mm->page_table_lock); |
130 | 131 | ||
@@ -141,7 +142,7 @@ void pgtable_trans_huge_deposit(struct mm_struct *mm, pgtable_t pgtable) | |||
141 | #ifndef __HAVE_ARCH_PGTABLE_WITHDRAW | 142 | #ifndef __HAVE_ARCH_PGTABLE_WITHDRAW |
142 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | 143 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
143 | /* no "address" argument so destroys page coloring of some arch */ | 144 | /* no "address" argument so destroys page coloring of some arch */ |
144 | pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm) | 145 | pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) |
145 | { | 146 | { |
146 | pgtable_t pgtable; | 147 | pgtable_t pgtable; |
147 | 148 | ||