aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/arm64/sve.txt16
-rw-r--r--arch/arm64/Makefile2
-rw-r--r--arch/arm64/include/asm/tlbflush.h3
-rw-r--r--arch/arm64/include/uapi/asm/kvm.h7
-rw-r--r--arch/arm64/include/uapi/asm/ptrace.h4
-rw-r--r--arch/arm64/include/uapi/asm/sigcontext.h14
-rw-r--r--arch/arm64/kernel/fpsimd.c42
7 files changed, 78 insertions, 10 deletions
diff --git a/Documentation/arm64/sve.txt b/Documentation/arm64/sve.txt
index 9940e924a47e..5689fc9a976a 100644
--- a/Documentation/arm64/sve.txt
+++ b/Documentation/arm64/sve.txt
@@ -56,6 +56,18 @@ model features for SVE is included in Appendix A.
56 is to connect to a target process first and then attempt a 56 is to connect to a target process first and then attempt a
57 ptrace(PTRACE_GETREGSET, pid, NT_ARM_SVE, &iov). 57 ptrace(PTRACE_GETREGSET, pid, NT_ARM_SVE, &iov).
58 58
59* Whenever SVE scalable register values (Zn, Pn, FFR) are exchanged in memory
60 between userspace and the kernel, the register value is encoded in memory in
61 an endianness-invariant layout, with bits [(8 * i + 7) : (8 * i)] encoded at
62 byte offset i from the start of the memory representation. This affects for
63 example the signal frame (struct sve_context) and ptrace interface
64 (struct user_sve_header) and associated data.
65
66 Beware that on big-endian systems this results in a different byte order than
67 for the FPSIMD V-registers, which are stored as single host-endian 128-bit
68 values, with bits [(127 - 8 * i) : (120 - 8 * i)] of the register encoded at
69 byte offset i. (struct fpsimd_context, struct user_fpsimd_state).
70
59 71
602. Vector length terminology 722. Vector length terminology
61----------------------------- 73-----------------------------
@@ -124,6 +136,10 @@ the SVE instruction set architecture.
124 size and layout. Macros SVE_SIG_* are defined [1] to facilitate access to 136 size and layout. Macros SVE_SIG_* are defined [1] to facilitate access to
125 the members. 137 the members.
126 138
139* Each scalable register (Zn, Pn, FFR) is stored in an endianness-invariant
140 layout, with bits [(8 * i + 7) : (8 * i)] stored at byte offset i from the
141 start of the register's representation in memory.
142
127* If the SVE context is too big to fit in sigcontext.__reserved[], then extra 143* If the SVE context is too big to fit in sigcontext.__reserved[], then extra
128 space is allocated on the stack, an extra_context record is written in 144 space is allocated on the stack, an extra_context record is written in
129 __reserved[] referencing this space. sve_context is then written in the 145 __reserved[] referencing this space. sve_context is then written in the
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 8fbd583b18e1..e9d2e578cbe6 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -51,7 +51,7 @@ endif
51 51
52KBUILD_CFLAGS += -mgeneral-regs-only $(lseinstr) $(brokengasinst) 52KBUILD_CFLAGS += -mgeneral-regs-only $(lseinstr) $(brokengasinst)
53KBUILD_CFLAGS += -fno-asynchronous-unwind-tables 53KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
54KBUILD_CFLAGS += -Wno-psabi 54KBUILD_CFLAGS += $(call cc-disable-warning, psabi)
55KBUILD_AFLAGS += $(lseinstr) $(brokengasinst) 55KBUILD_AFLAGS += $(lseinstr) $(brokengasinst)
56 56
57KBUILD_CFLAGS += $(call cc-option,-mabi=lp64) 57KBUILD_CFLAGS += $(call cc-option,-mabi=lp64)
diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index 3a1870228946..dff8f9ea5754 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -195,6 +195,9 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma,
195 unsigned long asid = ASID(vma->vm_mm); 195 unsigned long asid = ASID(vma->vm_mm);
196 unsigned long addr; 196 unsigned long addr;
197 197
198 start = round_down(start, stride);
199 end = round_up(end, stride);
200
198 if ((end - start) >= (MAX_TLBI_OPS * stride)) { 201 if ((end - start) >= (MAX_TLBI_OPS * stride)) {
199 flush_tlb_mm(vma->vm_mm); 202 flush_tlb_mm(vma->vm_mm);
200 return; 203 return;
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index 7b7ac0f6cec9..d819a3e8b552 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -260,6 +260,13 @@ struct kvm_vcpu_events {
260 KVM_REG_SIZE_U256 | \ 260 KVM_REG_SIZE_U256 | \
261 ((i) & (KVM_ARM64_SVE_MAX_SLICES - 1))) 261 ((i) & (KVM_ARM64_SVE_MAX_SLICES - 1)))
262 262
263/*
264 * Register values for KVM_REG_ARM64_SVE_ZREG(), KVM_REG_ARM64_SVE_PREG() and
265 * KVM_REG_ARM64_SVE_FFR() are represented in memory in an endianness-
266 * invariant layout which differs from the layout used for the FPSIMD
267 * V-registers on big-endian systems: see sigcontext.h for more explanation.
268 */
269
263#define KVM_ARM64_SVE_VQ_MIN __SVE_VQ_MIN 270#define KVM_ARM64_SVE_VQ_MIN __SVE_VQ_MIN
264#define KVM_ARM64_SVE_VQ_MAX __SVE_VQ_MAX 271#define KVM_ARM64_SVE_VQ_MAX __SVE_VQ_MAX
265 272
diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h
index d78623acb649..97c53203150b 100644
--- a/arch/arm64/include/uapi/asm/ptrace.h
+++ b/arch/arm64/include/uapi/asm/ptrace.h
@@ -176,6 +176,10 @@ struct user_sve_header {
176 * FPCR uint32_t FPCR 176 * FPCR uint32_t FPCR
177 * 177 *
178 * Additional data might be appended in the future. 178 * Additional data might be appended in the future.
179 *
180 * The Z-, P- and FFR registers are represented in memory in an endianness-
181 * invariant layout which differs from the layout used for the FPSIMD
182 * V-registers on big-endian systems: see sigcontext.h for more explanation.
179 */ 183 */
180 184
181#define SVE_PT_SVE_ZREG_SIZE(vq) __SVE_ZREG_SIZE(vq) 185#define SVE_PT_SVE_ZREG_SIZE(vq) __SVE_ZREG_SIZE(vq)
diff --git a/arch/arm64/include/uapi/asm/sigcontext.h b/arch/arm64/include/uapi/asm/sigcontext.h
index 5f3c0cec5af9..3d448a0bb225 100644
--- a/arch/arm64/include/uapi/asm/sigcontext.h
+++ b/arch/arm64/include/uapi/asm/sigcontext.h
@@ -77,6 +77,15 @@ struct fpsimd_context {
77 __uint128_t vregs[32]; 77 __uint128_t vregs[32];
78}; 78};
79 79
80/*
81 * Note: similarly to all other integer fields, each V-register is stored in an
82 * endianness-dependent format, with the byte at offset i from the start of the
83 * in-memory representation of the register value containing
84 *
85 * bits [(7 + 8 * i) : (8 * i)] of the register on little-endian hosts; or
86 * bits [(127 - 8 * i) : (120 - 8 * i)] on big-endian hosts.
87 */
88
80/* ESR_EL1 context */ 89/* ESR_EL1 context */
81#define ESR_MAGIC 0x45535201 90#define ESR_MAGIC 0x45535201
82 91
@@ -204,6 +213,11 @@ struct sve_context {
204 * FFR uint16_t[vq] first-fault status register 213 * FFR uint16_t[vq] first-fault status register
205 * 214 *
206 * Additional data might be appended in the future. 215 * Additional data might be appended in the future.
216 *
217 * Unlike vregs[] in fpsimd_context, each SVE scalable register (Z-, P- or FFR)
218 * is encoded in memory in an endianness-invariant format, with the byte at
219 * offset i from the start of the in-memory representation containing bits
220 * [(7 + 8 * i) : (8 * i)] of the register value.
207 */ 221 */
208 222
209#define SVE_SIG_ZREG_SIZE(vq) __SVE_ZREG_SIZE(vq) 223#define SVE_SIG_ZREG_SIZE(vq) __SVE_ZREG_SIZE(vq)
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index a38bf74bcca8..bb42cd04baec 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -39,6 +39,7 @@
39#include <linux/slab.h> 39#include <linux/slab.h>
40#include <linux/stddef.h> 40#include <linux/stddef.h>
41#include <linux/sysctl.h> 41#include <linux/sysctl.h>
42#include <linux/swab.h>
42 43
43#include <asm/esr.h> 44#include <asm/esr.h>
44#include <asm/fpsimd.h> 45#include <asm/fpsimd.h>
@@ -352,6 +353,23 @@ static int __init sve_sysctl_init(void) { return 0; }
352#define ZREG(sve_state, vq, n) ((char *)(sve_state) + \ 353#define ZREG(sve_state, vq, n) ((char *)(sve_state) + \
353 (SVE_SIG_ZREG_OFFSET(vq, n) - SVE_SIG_REGS_OFFSET)) 354 (SVE_SIG_ZREG_OFFSET(vq, n) - SVE_SIG_REGS_OFFSET))
354 355
356#ifdef CONFIG_CPU_BIG_ENDIAN
357static __uint128_t arm64_cpu_to_le128(__uint128_t x)
358{
359 u64 a = swab64(x);
360 u64 b = swab64(x >> 64);
361
362 return ((__uint128_t)a << 64) | b;
363}
364#else
365static __uint128_t arm64_cpu_to_le128(__uint128_t x)
366{
367 return x;
368}
369#endif
370
371#define arm64_le128_to_cpu(x) arm64_cpu_to_le128(x)
372
355/* 373/*
356 * Transfer the FPSIMD state in task->thread.uw.fpsimd_state to 374 * Transfer the FPSIMD state in task->thread.uw.fpsimd_state to
357 * task->thread.sve_state. 375 * task->thread.sve_state.
@@ -369,14 +387,16 @@ static void fpsimd_to_sve(struct task_struct *task)
369 void *sst = task->thread.sve_state; 387 void *sst = task->thread.sve_state;
370 struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state; 388 struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state;
371 unsigned int i; 389 unsigned int i;
390 __uint128_t *p;
372 391
373 if (!system_supports_sve()) 392 if (!system_supports_sve())
374 return; 393 return;
375 394
376 vq = sve_vq_from_vl(task->thread.sve_vl); 395 vq = sve_vq_from_vl(task->thread.sve_vl);
377 for (i = 0; i < 32; ++i) 396 for (i = 0; i < 32; ++i) {
378 memcpy(ZREG(sst, vq, i), &fst->vregs[i], 397 p = (__uint128_t *)ZREG(sst, vq, i);
379 sizeof(fst->vregs[i])); 398 *p = arm64_cpu_to_le128(fst->vregs[i]);
399 }
380} 400}
381 401
382/* 402/*
@@ -395,14 +415,16 @@ static void sve_to_fpsimd(struct task_struct *task)
395 void const *sst = task->thread.sve_state; 415 void const *sst = task->thread.sve_state;
396 struct user_fpsimd_state *fst = &task->thread.uw.fpsimd_state; 416 struct user_fpsimd_state *fst = &task->thread.uw.fpsimd_state;
397 unsigned int i; 417 unsigned int i;
418 __uint128_t const *p;
398 419
399 if (!system_supports_sve()) 420 if (!system_supports_sve())
400 return; 421 return;
401 422
402 vq = sve_vq_from_vl(task->thread.sve_vl); 423 vq = sve_vq_from_vl(task->thread.sve_vl);
403 for (i = 0; i < 32; ++i) 424 for (i = 0; i < 32; ++i) {
404 memcpy(&fst->vregs[i], ZREG(sst, vq, i), 425 p = (__uint128_t const *)ZREG(sst, vq, i);
405 sizeof(fst->vregs[i])); 426 fst->vregs[i] = arm64_le128_to_cpu(*p);
427 }
406} 428}
407 429
408#ifdef CONFIG_ARM64_SVE 430#ifdef CONFIG_ARM64_SVE
@@ -491,6 +513,7 @@ void sve_sync_from_fpsimd_zeropad(struct task_struct *task)
491 void *sst = task->thread.sve_state; 513 void *sst = task->thread.sve_state;
492 struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state; 514 struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state;
493 unsigned int i; 515 unsigned int i;
516 __uint128_t *p;
494 517
495 if (!test_tsk_thread_flag(task, TIF_SVE)) 518 if (!test_tsk_thread_flag(task, TIF_SVE))
496 return; 519 return;
@@ -499,9 +522,10 @@ void sve_sync_from_fpsimd_zeropad(struct task_struct *task)
499 522
500 memset(sst, 0, SVE_SIG_REGS_SIZE(vq)); 523 memset(sst, 0, SVE_SIG_REGS_SIZE(vq));
501 524
502 for (i = 0; i < 32; ++i) 525 for (i = 0; i < 32; ++i) {
503 memcpy(ZREG(sst, vq, i), &fst->vregs[i], 526 p = (__uint128_t *)ZREG(sst, vq, i);
504 sizeof(fst->vregs[i])); 527 *p = arm64_cpu_to_le128(fst->vregs[i]);
528 }
505} 529}
506 530
507int sve_set_vector_length(struct task_struct *task, 531int sve_set_vector_length(struct task_struct *task,