aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/sparc/adi.rst (renamed from Documentation/sparc/adi.txt)188
-rw-r--r--Documentation/sparc/console.rst (renamed from Documentation/sparc/console.txt)4
-rw-r--r--Documentation/sparc/index.rst13
-rw-r--r--Documentation/sparc/oradax/oracle-dax.rst (renamed from Documentation/sparc/oradax/oracle-dax.txt)58
-rw-r--r--arch/sparc/kernel/cpumap.c3
-rw-r--r--arch/sparc/kernel/uprobes.c1
-rw-r--r--arch/sparc/mm/init_64.c42
-rw-r--r--arch/sparc/mm/iommu.c142
-rw-r--r--arch/sparc/vdso/Makefile2
-rw-r--r--drivers/sbus/char/oradax.c2
10 files changed, 213 insertions, 242 deletions
diff --git a/Documentation/sparc/adi.txt b/Documentation/sparc/adi.rst
index e1aed155fb89..857ad30f9569 100644
--- a/Documentation/sparc/adi.txt
+++ b/Documentation/sparc/adi.rst
@@ -1,3 +1,4 @@
1================================
1Application Data Integrity (ADI) 2Application Data Integrity (ADI)
2================================ 3================================
3 4
@@ -44,12 +45,15 @@ provided by the hypervisor to the kernel. Kernel returns the value of
44ADI block size to userspace using auxiliary vector along with other ADI 45ADI block size to userspace using auxiliary vector along with other ADI
45info. Following auxiliary vectors are provided by the kernel: 46info. Following auxiliary vectors are provided by the kernel:
46 47
48 ============ ===========================================
47 AT_ADI_BLKSZ ADI block size. This is the granularity and 49 AT_ADI_BLKSZ ADI block size. This is the granularity and
48 alignment, in bytes, of ADI versioning. 50 alignment, in bytes, of ADI versioning.
49 AT_ADI_NBITS Number of ADI version bits in the VA 51 AT_ADI_NBITS Number of ADI version bits in the VA
52 ============ ===========================================
50 53
51 54
52IMPORTANT NOTES: 55IMPORTANT NOTES
56===============
53 57
54- Version tag values of 0x0 and 0xf are reserved. These values match any 58- Version tag values of 0x0 and 0xf are reserved. These values match any
55 tag in virtual address and never generate a mismatch exception. 59 tag in virtual address and never generate a mismatch exception.
@@ -86,11 +90,12 @@ IMPORTANT NOTES:
86 90
87 91
88ADI related traps 92ADI related traps
89----------------- 93=================
90 94
91With ADI enabled, following new traps may occur: 95With ADI enabled, following new traps may occur:
92 96
93Disrupting memory corruption 97Disrupting memory corruption
98----------------------------
94 99
95 When a store accesses a memory localtion that has TTE.mcd=1, 100 When a store accesses a memory localtion that has TTE.mcd=1,
96 the task is running with ADI enabled (PSTATE.mcde=1), and the ADI 101 the task is running with ADI enabled (PSTATE.mcde=1), and the ADI
@@ -100,7 +105,7 @@ Disrupting memory corruption
100 first. Hypervisor creates a sun4v error report and sends a 105 first. Hypervisor creates a sun4v error report and sends a
101 resumable error (TT=0x7e) trap to the kernel. The kernel sends 106 resumable error (TT=0x7e) trap to the kernel. The kernel sends
102 a SIGSEGV to the task that resulted in this trap with the following 107 a SIGSEGV to the task that resulted in this trap with the following
103 info: 108 info::
104 109
105 siginfo.si_signo = SIGSEGV; 110 siginfo.si_signo = SIGSEGV;
106 siginfo.errno = 0; 111 siginfo.errno = 0;
@@ -110,6 +115,7 @@ Disrupting memory corruption
110 115
111 116
112Precise memory corruption 117Precise memory corruption
118-------------------------
113 119
114 When a store accesses a memory location that has TTE.mcd=1, 120 When a store accesses a memory location that has TTE.mcd=1,
115 the task is running with ADI enabled (PSTATE.mcde=1), and the ADI 121 the task is running with ADI enabled (PSTATE.mcde=1), and the ADI
@@ -118,7 +124,7 @@ Precise memory corruption
118 MCD precise exception is enabled (MCDPERR=1), a precise 124 MCD precise exception is enabled (MCDPERR=1), a precise
119 exception is sent to the kernel with TT=0x1a. The kernel sends 125 exception is sent to the kernel with TT=0x1a. The kernel sends
120 a SIGSEGV to the task that resulted in this trap with the following 126 a SIGSEGV to the task that resulted in this trap with the following
121 info: 127 info::
122 128
123 siginfo.si_signo = SIGSEGV; 129 siginfo.si_signo = SIGSEGV;
124 siginfo.errno = 0; 130 siginfo.errno = 0;
@@ -126,17 +132,19 @@ Precise memory corruption
126 siginfo.si_addr = addr; /* address that caused trap */ 132 siginfo.si_addr = addr; /* address that caused trap */
127 siginfo.si_trapno = 0; 133 siginfo.si_trapno = 0;
128 134
129 NOTE: ADI tag mismatch on a load always results in precise trap. 135 NOTE:
136 ADI tag mismatch on a load always results in precise trap.
130 137
131 138
132MCD disabled 139MCD disabled
140------------
133 141
134 When a task has not enabled ADI and attempts to set ADI version 142 When a task has not enabled ADI and attempts to set ADI version
135 on a memory address, processor sends an MCD disabled trap. This 143 on a memory address, processor sends an MCD disabled trap. This
136 trap is handled by hypervisor first and the hypervisor vectors this 144 trap is handled by hypervisor first and the hypervisor vectors this
137 trap through to the kernel as Data Access Exception trap with 145 trap through to the kernel as Data Access Exception trap with
138 fault type set to 0xa (invalid ASI). When this occurs, the kernel 146 fault type set to 0xa (invalid ASI). When this occurs, the kernel
139 sends the task SIGSEGV signal with following info: 147 sends the task SIGSEGV signal with following info::
140 148
141 siginfo.si_signo = SIGSEGV; 149 siginfo.si_signo = SIGSEGV;
142 siginfo.errno = 0; 150 siginfo.errno = 0;
@@ -149,35 +157,35 @@ Sample program to use ADI
149------------------------- 157-------------------------
150 158
151Following sample program is meant to illustrate how to use the ADI 159Following sample program is meant to illustrate how to use the ADI
152functionality. 160functionality::
153 161
154#include <unistd.h> 162 #include <unistd.h>
155#include <stdio.h> 163 #include <stdio.h>
156#include <stdlib.h> 164 #include <stdlib.h>
157#include <elf.h> 165 #include <elf.h>
158#include <sys/ipc.h> 166 #include <sys/ipc.h>
159#include <sys/shm.h> 167 #include <sys/shm.h>
160#include <sys/mman.h> 168 #include <sys/mman.h>
161#include <asm/asi.h> 169 #include <asm/asi.h>
162 170
163#ifndef AT_ADI_BLKSZ 171 #ifndef AT_ADI_BLKSZ
164#define AT_ADI_BLKSZ 48 172 #define AT_ADI_BLKSZ 48
165#endif 173 #endif
166#ifndef AT_ADI_NBITS 174 #ifndef AT_ADI_NBITS
167#define AT_ADI_NBITS 49 175 #define AT_ADI_NBITS 49
168#endif 176 #endif
169 177
170#ifndef PROT_ADI 178 #ifndef PROT_ADI
171#define PROT_ADI 0x10 179 #define PROT_ADI 0x10
172#endif 180 #endif
173 181
174#define BUFFER_SIZE 32*1024*1024UL 182 #define BUFFER_SIZE 32*1024*1024UL
175 183
176main(int argc, char* argv[], char* envp[]) 184 main(int argc, char* argv[], char* envp[])
177{ 185 {
178 unsigned long i, mcde, adi_blksz, adi_nbits; 186 unsigned long i, mcde, adi_blksz, adi_nbits;
179 char *shmaddr, *tmp_addr, *end, *veraddr, *clraddr; 187 char *shmaddr, *tmp_addr, *end, *veraddr, *clraddr;
180 int shmid, version; 188 int shmid, version;
181 Elf64_auxv_t *auxv; 189 Elf64_auxv_t *auxv;
182 190
183 adi_blksz = 0; 191 adi_blksz = 0;
@@ -202,77 +210,77 @@ main(int argc, char* argv[], char* envp[])
202 printf("\tBlock size = %ld\n", adi_blksz); 210 printf("\tBlock size = %ld\n", adi_blksz);
203 printf("\tNumber of bits = %ld\n", adi_nbits); 211 printf("\tNumber of bits = %ld\n", adi_nbits);
204 212
205 if ((shmid = shmget(2, BUFFER_SIZE, 213 if ((shmid = shmget(2, BUFFER_SIZE,
206 IPC_CREAT | SHM_R | SHM_W)) < 0) { 214 IPC_CREAT | SHM_R | SHM_W)) < 0) {
207 perror("shmget failed"); 215 perror("shmget failed");
208 exit(1); 216 exit(1);
209 } 217 }
210 218
211 shmaddr = shmat(shmid, NULL, 0); 219 shmaddr = shmat(shmid, NULL, 0);
212 if (shmaddr == (char *)-1) { 220 if (shmaddr == (char *)-1) {
213 perror("shm attach failed"); 221 perror("shm attach failed");
214 shmctl(shmid, IPC_RMID, NULL); 222 shmctl(shmid, IPC_RMID, NULL);
215 exit(1); 223 exit(1);
216 } 224 }
217 225
218 if (mprotect(shmaddr, BUFFER_SIZE, PROT_READ|PROT_WRITE|PROT_ADI)) { 226 if (mprotect(shmaddr, BUFFER_SIZE, PROT_READ|PROT_WRITE|PROT_ADI)) {
219 perror("mprotect failed"); 227 perror("mprotect failed");
220 goto err_out; 228 goto err_out;
221 } 229 }
222 230
223 /* Set the ADI version tag on the shm segment 231 /* Set the ADI version tag on the shm segment
224 */ 232 */
225 version = 10; 233 version = 10;
226 tmp_addr = shmaddr; 234 tmp_addr = shmaddr;
227 end = shmaddr + BUFFER_SIZE; 235 end = shmaddr + BUFFER_SIZE;
228 while (tmp_addr < end) { 236 while (tmp_addr < end) {
229 asm volatile( 237 asm volatile(
230 "stxa %1, [%0]0x90\n\t" 238 "stxa %1, [%0]0x90\n\t"
231 : 239 :
232 : "r" (tmp_addr), "r" (version)); 240 : "r" (tmp_addr), "r" (version));
233 tmp_addr += adi_blksz; 241 tmp_addr += adi_blksz;
234 } 242 }
235 asm volatile("membar #Sync\n\t"); 243 asm volatile("membar #Sync\n\t");
236 244
237 /* Create a versioned address from the normal address by placing 245 /* Create a versioned address from the normal address by placing
238 * version tag in the upper adi_nbits bits 246 * version tag in the upper adi_nbits bits
239 */ 247 */
240 tmp_addr = (void *) ((unsigned long)shmaddr << adi_nbits); 248 tmp_addr = (void *) ((unsigned long)shmaddr << adi_nbits);
241 tmp_addr = (void *) ((unsigned long)tmp_addr >> adi_nbits); 249 tmp_addr = (void *) ((unsigned long)tmp_addr >> adi_nbits);
242 veraddr = (void *) (((unsigned long)version << (64-adi_nbits)) 250 veraddr = (void *) (((unsigned long)version << (64-adi_nbits))
243 | (unsigned long)tmp_addr); 251 | (unsigned long)tmp_addr);
244 252
245 printf("Starting the writes:\n"); 253 printf("Starting the writes:\n");
246 for (i = 0; i < BUFFER_SIZE; i++) { 254 for (i = 0; i < BUFFER_SIZE; i++) {
247 veraddr[i] = (char)(i); 255 veraddr[i] = (char)(i);
248 if (!(i % (1024 * 1024))) 256 if (!(i % (1024 * 1024)))
249 printf("."); 257 printf(".");
250 } 258 }
251 printf("\n"); 259 printf("\n");
252 260
253 printf("Verifying data..."); 261 printf("Verifying data...");
254 fflush(stdout); 262 fflush(stdout);
255 for (i = 0; i < BUFFER_SIZE; i++) 263 for (i = 0; i < BUFFER_SIZE; i++)
256 if (veraddr[i] != (char)i) 264 if (veraddr[i] != (char)i)
257 printf("\nIndex %lu mismatched\n", i); 265 printf("\nIndex %lu mismatched\n", i);
258 printf("Done.\n"); 266 printf("Done.\n");
259 267
260 /* Disable ADI and clean up 268 /* Disable ADI and clean up
261 */ 269 */
262 if (mprotect(shmaddr, BUFFER_SIZE, PROT_READ|PROT_WRITE)) { 270 if (mprotect(shmaddr, BUFFER_SIZE, PROT_READ|PROT_WRITE)) {
263 perror("mprotect failed"); 271 perror("mprotect failed");
264 goto err_out; 272 goto err_out;
265 } 273 }
266 274
267 if (shmdt((const void *)shmaddr) != 0) 275 if (shmdt((const void *)shmaddr) != 0)
268 perror("Detach failure"); 276 perror("Detach failure");
269 shmctl(shmid, IPC_RMID, NULL); 277 shmctl(shmid, IPC_RMID, NULL);
270 278
271 exit(0); 279 exit(0);
272 280
273err_out: 281 err_out:
274 if (shmdt((const void *)shmaddr) != 0) 282 if (shmdt((const void *)shmaddr) != 0)
275 perror("Detach failure"); 283 perror("Detach failure");
276 shmctl(shmid, IPC_RMID, NULL); 284 shmctl(shmid, IPC_RMID, NULL);
277 exit(1); 285 exit(1);
278} 286 }
diff --git a/Documentation/sparc/console.txt b/Documentation/sparc/console.rst
index 5aa735a44e02..73132db83ece 100644
--- a/Documentation/sparc/console.txt
+++ b/Documentation/sparc/console.rst
@@ -1,5 +1,5 @@
1Steps for sending 'break' on sunhv console: 1Steps for sending 'break' on sunhv console
2=========================================== 2==========================================
3 3
4On Baremetal: 4On Baremetal:
5 1. press Esc + 'B' 5 1. press Esc + 'B'
diff --git a/Documentation/sparc/index.rst b/Documentation/sparc/index.rst
new file mode 100644
index 000000000000..91f7d6643dd5
--- /dev/null
+++ b/Documentation/sparc/index.rst
@@ -0,0 +1,13 @@
1:orphan:
2
3==================
4Sparc Architecture
5==================
6
7.. toctree::
8 :maxdepth: 1
9
10 console
11 adi
12
13 oradax/oracle-dax
diff --git a/Documentation/sparc/oradax/oracle-dax.txt b/Documentation/sparc/oradax/oracle-dax.rst
index 9d53ac93286f..d1e14d572918 100644
--- a/Documentation/sparc/oradax/oracle-dax.txt
+++ b/Documentation/sparc/oradax/oracle-dax.rst
@@ -1,5 +1,6 @@
1=======================================
1Oracle Data Analytics Accelerator (DAX) 2Oracle Data Analytics Accelerator (DAX)
2--------------------------------------- 3=======================================
3 4
4DAX is a coprocessor which resides on the SPARC M7 (DAX1) and M8 5DAX is a coprocessor which resides on the SPARC M7 (DAX1) and M8
5(DAX2) processor chips, and has direct access to the CPU's L3 caches 6(DAX2) processor chips, and has direct access to the CPU's L3 caches
@@ -17,6 +18,7 @@ code sufficient to write user or kernel applications that use DAX
17functionality. 18functionality.
18 19
19The user library is open source and available at: 20The user library is open source and available at:
21
20 https://oss.oracle.com/git/gitweb.cgi?p=libdax.git 22 https://oss.oracle.com/git/gitweb.cgi?p=libdax.git
21 23
22The Hypervisor interface to the coprocessor is described in detail in 24The Hypervisor interface to the coprocessor is described in detail in
@@ -26,7 +28,7 @@ Specification" version 3.0.20+15, dated 2017-09-25.
26 28
27 29
28High Level Overview 30High Level Overview
29------------------- 31===================
30 32
31A coprocessor request is described by a Command Control Block 33A coprocessor request is described by a Command Control Block
32(CCB). The CCB contains an opcode and various parameters. The opcode 34(CCB). The CCB contains an opcode and various parameters. The opcode
@@ -52,7 +54,7 @@ thread.
52 54
53 55
54Addressing Memory 56Addressing Memory
55----------------- 57=================
56 58
57The kernel does not have access to physical memory in the Sun4v 59The kernel does not have access to physical memory in the Sun4v
58architecture, as there is an additional level of memory virtualization 60architecture, as there is an additional level of memory virtualization
@@ -77,7 +79,7 @@ the request.
77 79
78 80
79The Driver API 81The Driver API
80-------------- 82==============
81 83
82An application makes requests to the driver via the write() system 84An application makes requests to the driver via the write() system
83call, and gets results (if any) via read(). The completion areas are 85call, and gets results (if any) via read(). The completion areas are
@@ -108,6 +110,7 @@ equal to the number of bytes given in the call. Otherwise -1 is
108returned and errno is set. 110returned and errno is set.
109 111
110CCB_DEQUEUE 112CCB_DEQUEUE
113-----------
111 114
112Tells the driver to clean up resources associated with past 115Tells the driver to clean up resources associated with past
113requests. Since no interrupt is generated upon the completion of a 116requests. Since no interrupt is generated upon the completion of a
@@ -116,12 +119,14 @@ further status information is returned, so the user should not
116subsequently call read(). 119subsequently call read().
117 120
118CCB_KILL 121CCB_KILL
122--------
119 123
120Kills a CCB during execution. The CCB is guaranteed to not continue 124Kills a CCB during execution. The CCB is guaranteed to not continue
121executing once this call returns successfully. On success, read() must 125executing once this call returns successfully. On success, read() must
122be called to retrieve the result of the action. 126be called to retrieve the result of the action.
123 127
124CCB_INFO 128CCB_INFO
129--------
125 130
126Retrieves information about a currently executing CCB. Note that some 131Retrieves information about a currently executing CCB. Note that some
127Hypervisors might return 'notfound' when the CCB is in 'inprogress' 132Hypervisors might return 'notfound' when the CCB is in 'inprogress'
@@ -130,6 +135,7 @@ CCB_KILL must be invoked on that CCB. Upon success, read() must be
130called to retrieve the details of the action. 135called to retrieve the details of the action.
131 136
132Submission of an array of CCBs for execution 137Submission of an array of CCBs for execution
138---------------------------------------------
133 139
134A write() whose length is a multiple of the CCB size is treated as a 140A write() whose length is a multiple of the CCB size is treated as a
135submit operation. The file offset is treated as the index of the 141submit operation. The file offset is treated as the index of the
@@ -146,6 +152,7 @@ status will reflect the error caused by the first CCB that was not
146accepted, and status_data will provide additional data in some cases. 152accepted, and status_data will provide additional data in some cases.
147 153
148MMAP 154MMAP
155----
149 156
150The mmap() function provides access to the completion area allocated 157The mmap() function provides access to the completion area allocated
151in the driver. Note that the completion area is not writeable by the 158in the driver. Note that the completion area is not writeable by the
@@ -153,7 +160,7 @@ user process, and the mmap call must not specify PROT_WRITE.
153 160
154 161
155Completion of a Request 162Completion of a Request
156----------------------- 163=======================
157 164
158The first byte in each completion area is the command status which is 165The first byte in each completion area is the command status which is
159updated by the coprocessor hardware. Software may take advantage of 166updated by the coprocessor hardware. Software may take advantage of
@@ -172,7 +179,7 @@ and resumption of execution may be just a few nanoseconds.
172 179
173 180
174Application Life Cycle of a DAX Submission 181Application Life Cycle of a DAX Submission
175------------------------------------------ 182==========================================
176 183
177 - open dax device 184 - open dax device
178 - call mmap() to get the completion area address 185 - call mmap() to get the completion area address
@@ -187,7 +194,7 @@ Application Life Cycle of a DAX Submission
187 194
188 195
189Memory Constraints 196Memory Constraints
190------------------ 197==================
191 198
192The DAX hardware operates only on physical addresses. Therefore, it is 199The DAX hardware operates only on physical addresses. Therefore, it is
193not aware of virtual memory mappings and the discontiguities that may 200not aware of virtual memory mappings and the discontiguities that may
@@ -226,7 +233,7 @@ CCB Structure
226------------- 233-------------
227A CCB is an array of 8 64-bit words. Several of these words provide 234A CCB is an array of 8 64-bit words. Several of these words provide
228command opcodes, parameters, flags, etc., and the rest are addresses 235command opcodes, parameters, flags, etc., and the rest are addresses
229for the completion area, output buffer, and various inputs: 236for the completion area, output buffer, and various inputs::
230 237
231 struct ccb { 238 struct ccb {
232 u64 control; 239 u64 control;
@@ -252,7 +259,7 @@ The first word (control) is examined by the driver for the following:
252 259
253 260
254Example Code 261Example Code
255------------ 262============
256 263
257The DAX is accessible to both user and kernel code. The kernel code 264The DAX is accessible to both user and kernel code. The kernel code
258can make hypercalls directly while the user code must use wrappers 265can make hypercalls directly while the user code must use wrappers
@@ -265,7 +272,7 @@ arch/sparc/include/uapi/asm/oradax.h must be included.
265 272
266First, the proper device must be opened. For M7 it will be 273First, the proper device must be opened. For M7 it will be
267/dev/oradax1 and for M8 it will be /dev/oradax2. The simplest 274/dev/oradax1 and for M8 it will be /dev/oradax2. The simplest
268procedure is to attempt to open both, as only one will succeed: 275procedure is to attempt to open both, as only one will succeed::
269 276
270 fd = open("/dev/oradax1", O_RDWR); 277 fd = open("/dev/oradax1", O_RDWR);
271 if (fd < 0) 278 if (fd < 0)
@@ -273,7 +280,7 @@ procedure is to attempt to open both, as only one will succeed:
273 if (fd < 0) 280 if (fd < 0)
274 /* No DAX found */ 281 /* No DAX found */
275 282
276Next, the completion area must be mapped: 283Next, the completion area must be mapped::
277 284
278 completion_area = mmap(NULL, DAX_MMAP_LEN, PROT_READ, MAP_SHARED, fd, 0); 285 completion_area = mmap(NULL, DAX_MMAP_LEN, PROT_READ, MAP_SHARED, fd, 0);
279 286
@@ -295,7 +302,7 @@ is the input bitmap inverted.
295 302
296For details of all the parameters and bits used in this CCB, please 303For details of all the parameters and bits used in this CCB, please
297refer to section 36.2.1.3 of the DAX Hypervisor API document, which 304refer to section 36.2.1.3 of the DAX Hypervisor API document, which
298describes the Scan command in detail. 305describes the Scan command in detail::
299 306
300 ccb->control = /* Table 36.1, CCB Header Format */ 307 ccb->control = /* Table 36.1, CCB Header Format */
301 (2L << 48) /* command = Scan Value */ 308 (2L << 48) /* command = Scan Value */
@@ -326,7 +333,7 @@ describes the Scan command in detail.
326 333
327The CCB submission is a write() or pwrite() system call to the 334The CCB submission is a write() or pwrite() system call to the
328driver. If the call fails, then a read() must be used to retrieve the 335driver. If the call fails, then a read() must be used to retrieve the
329status: 336status::
330 337
331 if (pwrite(fd, ccb, 64, 0) != 64) { 338 if (pwrite(fd, ccb, 64, 0) != 64) {
332 struct ccb_exec_result status; 339 struct ccb_exec_result status;
@@ -337,7 +344,7 @@ status:
337After a successful submission of the CCB, the completion area may be 344After a successful submission of the CCB, the completion area may be
338polled to determine when the DAX is finished. Detailed information on 345polled to determine when the DAX is finished. Detailed information on
339the contents of the completion area can be found in section 36.2.2 of 346the contents of the completion area can be found in section 36.2.2 of
340the DAX HV API document. 347the DAX HV API document::
341 348
342 while (1) { 349 while (1) {
343 /* Monitored Load */ 350 /* Monitored Load */
@@ -355,7 +362,7 @@ the DAX HV API document.
355A completion area status of 1 indicates successful completion of the 362A completion area status of 1 indicates successful completion of the
356CCB and validity of the output bitmap, which may be used immediately. 363CCB and validity of the output bitmap, which may be used immediately.
357All other non-zero values indicate error conditions which are 364All other non-zero values indicate error conditions which are
358described in section 36.2.2. 365described in section 36.2.2::
359 366
360 if (completion_area[0] != 1) { /* section 36.2.2, 1 = command ran and succeeded */ 367 if (completion_area[0] != 1) { /* section 36.2.2, 1 = command ran and succeeded */
361 /* completion_area[0] contains the completion status */ 368 /* completion_area[0] contains the completion status */
@@ -364,7 +371,7 @@ described in section 36.2.2.
364 371
365After the completion area has been processed, the driver must be 372After the completion area has been processed, the driver must be
366notified that it can release any resources associated with the 373notified that it can release any resources associated with the
367request. This is done via the dequeue operation: 374request. This is done via the dequeue operation::
368 375
369 struct dax_command cmd; 376 struct dax_command cmd;
370 cmd.command = CCB_DEQUEUE; 377 cmd.command = CCB_DEQUEUE;
@@ -375,13 +382,14 @@ request. This is done via the dequeue operation:
375Finally, normal program cleanup should be done, i.e., unmapping 382Finally, normal program cleanup should be done, i.e., unmapping
376completion area, closing the dax device, freeing memory etc. 383completion area, closing the dax device, freeing memory etc.
377 384
378[Kernel example] 385Kernel example
386--------------
379 387
380The only difference in using the DAX in kernel code is the treatment 388The only difference in using the DAX in kernel code is the treatment
381of the completion area. Unlike user applications which mmap the 389of the completion area. Unlike user applications which mmap the
382completion area allocated by the driver, kernel code must allocate its 390completion area allocated by the driver, kernel code must allocate its
383own memory to use for the completion area, and this address and its 391own memory to use for the completion area, and this address and its
384type must be given in the CCB: 392type must be given in the CCB::
385 393
386 ccb->control |= /* Table 36.1, CCB Header Format */ 394 ccb->control |= /* Table 36.1, CCB Header Format */
387 (3L << 32); /* completion area address type = primary virtual */ 395 (3L << 32); /* completion area address type = primary virtual */
@@ -389,9 +397,11 @@ type must be given in the CCB:
389 ccb->completion = (unsigned long) completion_area; /* Completion area address */ 397 ccb->completion = (unsigned long) completion_area; /* Completion area address */
390 398
391The dax submit hypercall is made directly. The flags used in the 399The dax submit hypercall is made directly. The flags used in the
392ccb_submit call are documented in the DAX HV API in section 36.3.1. 400ccb_submit call are documented in the DAX HV API in section 36.3.1/
393 401
394#include <asm/hypervisor.h> 402::
403
404 #include <asm/hypervisor.h>
395 405
396 hv_rv = sun4v_ccb_submit((unsigned long)ccb, 64, 406 hv_rv = sun4v_ccb_submit((unsigned long)ccb, 64,
397 HV_CCB_QUERY_CMD | 407 HV_CCB_QUERY_CMD |
@@ -405,7 +415,7 @@ ccb_submit call are documented in the DAX HV API in section 36.3.1.
405 } 415 }
406 416
407After the submission, the completion area polling code is identical to 417After the submission, the completion area polling code is identical to
408that in user land: 418that in user land::
409 419
410 while (1) { 420 while (1) {
411 /* Monitored Load */ 421 /* Monitored Load */
@@ -427,3 +437,9 @@ that in user land:
427 437
428The output bitmap is ready for consumption immediately after the 438The output bitmap is ready for consumption immediately after the
429completion status indicates success. 439completion status indicates success.
440
441Excer[t from UltraSPARC Virtual Machine Specification
442=====================================================
443
444 .. include:: dax-hv-api.txt
445 :literal:
diff --git a/arch/sparc/kernel/cpumap.c b/arch/sparc/kernel/cpumap.c
index d1d52822603d..1cb62bfeaa1f 100644
--- a/arch/sparc/kernel/cpumap.c
+++ b/arch/sparc/kernel/cpumap.c
@@ -194,8 +194,7 @@ static struct cpuinfo_tree *build_cpuinfo_tree(void)
194 194
195 n = enumerate_cpuinfo_nodes(tmp_level); 195 n = enumerate_cpuinfo_nodes(tmp_level);
196 196
197 new_tree = kzalloc(sizeof(struct cpuinfo_tree) + 197 new_tree = kzalloc(struct_size(new_tree, nodes, n), GFP_ATOMIC);
198 (sizeof(struct cpuinfo_node) * n), GFP_ATOMIC);
199 if (!new_tree) 198 if (!new_tree)
200 return NULL; 199 return NULL;
201 200
diff --git a/arch/sparc/kernel/uprobes.c b/arch/sparc/kernel/uprobes.c
index d852ae56ddc1..c44bf5b85de8 100644
--- a/arch/sparc/kernel/uprobes.c
+++ b/arch/sparc/kernel/uprobes.c
@@ -29,7 +29,6 @@
29#include <linux/kdebug.h> 29#include <linux/kdebug.h>
30 30
31#include <asm/cacheflush.h> 31#include <asm/cacheflush.h>
32#include <linux/uaccess.h>
33 32
34/* Compute the address of the breakpoint instruction and return it. 33/* Compute the address of the breakpoint instruction and return it.
35 * 34 *
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index f2d70ff7a284..bc2aaa47bc8a 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -2269,19 +2269,6 @@ static unsigned long last_valid_pfn;
2269static void sun4u_pgprot_init(void); 2269static void sun4u_pgprot_init(void);
2270static void sun4v_pgprot_init(void); 2270static void sun4v_pgprot_init(void);
2271 2271
2272static phys_addr_t __init available_memory(void)
2273{
2274 phys_addr_t available = 0ULL;
2275 phys_addr_t pa_start, pa_end;
2276 u64 i;
2277
2278 for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE, &pa_start,
2279 &pa_end, NULL)
2280 available = available + (pa_end - pa_start);
2281
2282 return available;
2283}
2284
2285#define _PAGE_CACHE_4U (_PAGE_CP_4U | _PAGE_CV_4U) 2272#define _PAGE_CACHE_4U (_PAGE_CP_4U | _PAGE_CV_4U)
2286#define _PAGE_CACHE_4V (_PAGE_CP_4V | _PAGE_CV_4V) 2273#define _PAGE_CACHE_4V (_PAGE_CP_4V | _PAGE_CV_4V)
2287#define __DIRTY_BITS_4U (_PAGE_MODIFIED_4U | _PAGE_WRITE_4U | _PAGE_W_4U) 2274#define __DIRTY_BITS_4U (_PAGE_MODIFIED_4U | _PAGE_WRITE_4U | _PAGE_W_4U)
@@ -2295,33 +2282,8 @@ static phys_addr_t __init available_memory(void)
2295 */ 2282 */
2296static void __init reduce_memory(phys_addr_t limit_ram) 2283static void __init reduce_memory(phys_addr_t limit_ram)
2297{ 2284{
2298 phys_addr_t avail_ram = available_memory(); 2285 limit_ram += memblock_reserved_size();
2299 phys_addr_t pa_start, pa_end; 2286 memblock_enforce_memory_limit(limit_ram);
2300 u64 i;
2301
2302 if (limit_ram >= avail_ram)
2303 return;
2304
2305 for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE, &pa_start,
2306 &pa_end, NULL) {
2307 phys_addr_t region_size = pa_end - pa_start;
2308 phys_addr_t clip_start = pa_start;
2309
2310 avail_ram = avail_ram - region_size;
2311 /* Are we consuming too much? */
2312 if (avail_ram < limit_ram) {
2313 phys_addr_t give_back = limit_ram - avail_ram;
2314
2315 region_size = region_size - give_back;
2316 clip_start = clip_start + give_back;
2317 }
2318
2319 memblock_remove(clip_start, region_size);
2320
2321 if (avail_ram <= limit_ram)
2322 break;
2323 i = 0UL;
2324 }
2325} 2287}
2326 2288
2327void __init paging_init(void) 2289void __init paging_init(void)
diff --git a/arch/sparc/mm/iommu.c b/arch/sparc/mm/iommu.c
index e8d5d73ca40d..71ac353032b6 100644
--- a/arch/sparc/mm/iommu.c
+++ b/arch/sparc/mm/iommu.c
@@ -175,16 +175,37 @@ static void iommu_flush_iotlb(iopte_t *iopte, unsigned int niopte)
175 } 175 }
176} 176}
177 177
178static u32 iommu_get_one(struct device *dev, struct page *page, int npages) 178static dma_addr_t __sbus_iommu_map_page(struct device *dev, struct page *page,
179 unsigned long offset, size_t len, bool per_page_flush)
179{ 180{
180 struct iommu_struct *iommu = dev->archdata.iommu; 181 struct iommu_struct *iommu = dev->archdata.iommu;
181 int ioptex; 182 phys_addr_t paddr = page_to_phys(page) + offset;
182 iopte_t *iopte, *iopte0; 183 unsigned long off = paddr & ~PAGE_MASK;
184 unsigned long npages = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
185 unsigned long pfn = __phys_to_pfn(paddr);
183 unsigned int busa, busa0; 186 unsigned int busa, busa0;
184 int i; 187 iopte_t *iopte, *iopte0;
188 int ioptex, i;
189
190 /* XXX So what is maxphys for us and how do drivers know it? */
191 if (!len || len > 256 * 1024)
192 return DMA_MAPPING_ERROR;
193
194 /*
195 * We expect unmapped highmem pages to be not in the cache.
196 * XXX Is this a good assumption?
197 * XXX What if someone else unmaps it here and races us?
198 */
199 if (per_page_flush && !PageHighMem(page)) {
200 unsigned long vaddr, p;
201
202 vaddr = (unsigned long)page_address(page) + offset;
203 for (p = vaddr & PAGE_MASK; p < vaddr + len; p += PAGE_SIZE)
204 flush_page_for_dma(p);
205 }
185 206
186 /* page color = pfn of page */ 207 /* page color = pfn of page */
187 ioptex = bit_map_string_get(&iommu->usemap, npages, page_to_pfn(page)); 208 ioptex = bit_map_string_get(&iommu->usemap, npages, pfn);
188 if (ioptex < 0) 209 if (ioptex < 0)
189 panic("iommu out"); 210 panic("iommu out");
190 busa0 = iommu->start + (ioptex << PAGE_SHIFT); 211 busa0 = iommu->start + (ioptex << PAGE_SHIFT);
@@ -193,29 +214,15 @@ static u32 iommu_get_one(struct device *dev, struct page *page, int npages)
193 busa = busa0; 214 busa = busa0;
194 iopte = iopte0; 215 iopte = iopte0;
195 for (i = 0; i < npages; i++) { 216 for (i = 0; i < npages; i++) {
196 iopte_val(*iopte) = MKIOPTE(page_to_pfn(page), IOPERM); 217 iopte_val(*iopte) = MKIOPTE(pfn, IOPERM);
197 iommu_invalidate_page(iommu->regs, busa); 218 iommu_invalidate_page(iommu->regs, busa);
198 busa += PAGE_SIZE; 219 busa += PAGE_SIZE;
199 iopte++; 220 iopte++;
200 page++; 221 pfn++;
201 } 222 }
202 223
203 iommu_flush_iotlb(iopte0, npages); 224 iommu_flush_iotlb(iopte0, npages);
204 225 return busa0 + off;
205 return busa0;
206}
207
208static dma_addr_t __sbus_iommu_map_page(struct device *dev, struct page *page,
209 unsigned long offset, size_t len)
210{
211 void *vaddr = page_address(page) + offset;
212 unsigned long off = (unsigned long)vaddr & ~PAGE_MASK;
213 unsigned long npages = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
214
215 /* XXX So what is maxphys for us and how do drivers know it? */
216 if (!len || len > 256 * 1024)
217 return DMA_MAPPING_ERROR;
218 return iommu_get_one(dev, virt_to_page(vaddr), npages) + off;
219} 226}
220 227
221static dma_addr_t sbus_iommu_map_page_gflush(struct device *dev, 228static dma_addr_t sbus_iommu_map_page_gflush(struct device *dev,
@@ -223,81 +230,58 @@ static dma_addr_t sbus_iommu_map_page_gflush(struct device *dev,
223 enum dma_data_direction dir, unsigned long attrs) 230 enum dma_data_direction dir, unsigned long attrs)
224{ 231{
225 flush_page_for_dma(0); 232 flush_page_for_dma(0);
226 return __sbus_iommu_map_page(dev, page, offset, len); 233 return __sbus_iommu_map_page(dev, page, offset, len, false);
227} 234}
228 235
229static dma_addr_t sbus_iommu_map_page_pflush(struct device *dev, 236static dma_addr_t sbus_iommu_map_page_pflush(struct device *dev,
230 struct page *page, unsigned long offset, size_t len, 237 struct page *page, unsigned long offset, size_t len,
231 enum dma_data_direction dir, unsigned long attrs) 238 enum dma_data_direction dir, unsigned long attrs)
232{ 239{
233 void *vaddr = page_address(page) + offset; 240 return __sbus_iommu_map_page(dev, page, offset, len, true);
234 unsigned long p = ((unsigned long)vaddr) & PAGE_MASK;
235
236 while (p < (unsigned long)vaddr + len) {
237 flush_page_for_dma(p);
238 p += PAGE_SIZE;
239 }
240
241 return __sbus_iommu_map_page(dev, page, offset, len);
242} 241}
243 242
244static int sbus_iommu_map_sg_gflush(struct device *dev, struct scatterlist *sgl, 243static int __sbus_iommu_map_sg(struct device *dev, struct scatterlist *sgl,
245 int nents, enum dma_data_direction dir, unsigned long attrs) 244 int nents, enum dma_data_direction dir, unsigned long attrs,
245 bool per_page_flush)
246{ 246{
247 struct scatterlist *sg; 247 struct scatterlist *sg;
248 int i, n; 248 int j;
249
250 flush_page_for_dma(0);
251 249
252 for_each_sg(sgl, sg, nents, i) { 250 for_each_sg(sgl, sg, nents, j) {
253 n = (sg->length + sg->offset + PAGE_SIZE-1) >> PAGE_SHIFT; 251 sg->dma_address =__sbus_iommu_map_page(dev, sg_page(sg),
254 sg->dma_address = iommu_get_one(dev, sg_page(sg), n) + sg->offset; 252 sg->offset, sg->length, per_page_flush);
253 if (sg->dma_address == DMA_MAPPING_ERROR)
254 return 0;
255 sg->dma_length = sg->length; 255 sg->dma_length = sg->length;
256 } 256 }
257 257
258 return nents; 258 return nents;
259} 259}
260 260
261static int sbus_iommu_map_sg_pflush(struct device *dev, struct scatterlist *sgl, 261static int sbus_iommu_map_sg_gflush(struct device *dev, struct scatterlist *sgl,
262 int nents, enum dma_data_direction dir, unsigned long attrs) 262 int nents, enum dma_data_direction dir, unsigned long attrs)
263{ 263{
264 unsigned long page, oldpage = 0; 264 flush_page_for_dma(0);
265 struct scatterlist *sg; 265 return __sbus_iommu_map_sg(dev, sgl, nents, dir, attrs, false);
266 int i, j, n; 266}
267
268 for_each_sg(sgl, sg, nents, j) {
269 n = (sg->length + sg->offset + PAGE_SIZE-1) >> PAGE_SHIFT;
270
271 /*
272 * We expect unmapped highmem pages to be not in the cache.
273 * XXX Is this a good assumption?
274 * XXX What if someone else unmaps it here and races us?
275 */
276 if ((page = (unsigned long) page_address(sg_page(sg))) != 0) {
277 for (i = 0; i < n; i++) {
278 if (page != oldpage) { /* Already flushed? */
279 flush_page_for_dma(page);
280 oldpage = page;
281 }
282 page += PAGE_SIZE;
283 }
284 }
285
286 sg->dma_address = iommu_get_one(dev, sg_page(sg), n) + sg->offset;
287 sg->dma_length = sg->length;
288 }
289 267
290 return nents; 268static int sbus_iommu_map_sg_pflush(struct device *dev, struct scatterlist *sgl,
269 int nents, enum dma_data_direction dir, unsigned long attrs)
270{
271 return __sbus_iommu_map_sg(dev, sgl, nents, dir, attrs, true);
291} 272}
292 273
293static void iommu_release_one(struct device *dev, u32 busa, int npages) 274static void sbus_iommu_unmap_page(struct device *dev, dma_addr_t dma_addr,
275 size_t len, enum dma_data_direction dir, unsigned long attrs)
294{ 276{
295 struct iommu_struct *iommu = dev->archdata.iommu; 277 struct iommu_struct *iommu = dev->archdata.iommu;
296 int ioptex; 278 unsigned int busa = dma_addr & PAGE_MASK;
297 int i; 279 unsigned long off = dma_addr & ~PAGE_MASK;
280 unsigned int npages = (off + len + PAGE_SIZE-1) >> PAGE_SHIFT;
281 unsigned int ioptex = (busa - iommu->start) >> PAGE_SHIFT;
282 unsigned int i;
298 283
299 BUG_ON(busa < iommu->start); 284 BUG_ON(busa < iommu->start);
300 ioptex = (busa - iommu->start) >> PAGE_SHIFT;
301 for (i = 0; i < npages; i++) { 285 for (i = 0; i < npages; i++) {
302 iopte_val(iommu->page_table[ioptex + i]) = 0; 286 iopte_val(iommu->page_table[ioptex + i]) = 0;
303 iommu_invalidate_page(iommu->regs, busa); 287 iommu_invalidate_page(iommu->regs, busa);
@@ -306,25 +290,15 @@ static void iommu_release_one(struct device *dev, u32 busa, int npages)
306 bit_map_clear(&iommu->usemap, ioptex, npages); 290 bit_map_clear(&iommu->usemap, ioptex, npages);
307} 291}
308 292
309static void sbus_iommu_unmap_page(struct device *dev, dma_addr_t dma_addr,
310 size_t len, enum dma_data_direction dir, unsigned long attrs)
311{
312 unsigned long off = dma_addr & ~PAGE_MASK;
313 int npages;
314
315 npages = (off + len + PAGE_SIZE-1) >> PAGE_SHIFT;
316 iommu_release_one(dev, dma_addr & PAGE_MASK, npages);
317}
318
319static void sbus_iommu_unmap_sg(struct device *dev, struct scatterlist *sgl, 293static void sbus_iommu_unmap_sg(struct device *dev, struct scatterlist *sgl,
320 int nents, enum dma_data_direction dir, unsigned long attrs) 294 int nents, enum dma_data_direction dir, unsigned long attrs)
321{ 295{
322 struct scatterlist *sg; 296 struct scatterlist *sg;
323 int i, n; 297 int i;
324 298
325 for_each_sg(sgl, sg, nents, i) { 299 for_each_sg(sgl, sg, nents, i) {
326 n = (sg->length + sg->offset + PAGE_SIZE-1) >> PAGE_SHIFT; 300 sbus_iommu_unmap_page(dev, sg->dma_address, sg->length, dir,
327 iommu_release_one(dev, sg->dma_address & PAGE_MASK, n); 301 attrs);
328 sg->dma_address = 0x21212121; 302 sg->dma_address = 0x21212121;
329 } 303 }
330} 304}
diff --git a/arch/sparc/vdso/Makefile b/arch/sparc/vdso/Makefile
index 74e97f77e23b..83c4b463cb3d 100644
--- a/arch/sparc/vdso/Makefile
+++ b/arch/sparc/vdso/Makefile
@@ -68,7 +68,7 @@ CFLAGS_REMOVE_vdso-note.o = -pg
68CFLAGS_REMOVE_vclock_gettime.o = -pg 68CFLAGS_REMOVE_vclock_gettime.o = -pg
69 69
70$(obj)/%.so: OBJCOPYFLAGS := -S 70$(obj)/%.so: OBJCOPYFLAGS := -S
71$(obj)/%.so: $(obj)/%.so.dbg 71$(obj)/%.so: $(obj)/%.so.dbg FORCE
72 $(call if_changed,objcopy) 72 $(call if_changed,objcopy)
73 73
74CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds) 74CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds)
diff --git a/drivers/sbus/char/oradax.c b/drivers/sbus/char/oradax.c
index 6516bc3cb58b..acd9ba40eabe 100644
--- a/drivers/sbus/char/oradax.c
+++ b/drivers/sbus/char/oradax.c
@@ -30,7 +30,7 @@
30 * the recommended way for applications to use the coprocessor, and 30 * the recommended way for applications to use the coprocessor, and
31 * the driver interface is not intended for general use. 31 * the driver interface is not intended for general use.
32 * 32 *
33 * See Documentation/sparc/oradax/oracle-dax.txt for more details. 33 * See Documentation/sparc/oradax/oracle-dax.rst for more details.
34 */ 34 */
35 35
36#include <linux/uaccess.h> 36#include <linux/uaccess.h>