diff options
| -rw-r--r-- | Documentation/arm64/sve.txt | 16 | ||||
| -rw-r--r-- | arch/arm64/Makefile | 2 | ||||
| -rw-r--r-- | arch/arm64/include/asm/tlbflush.h | 3 | ||||
| -rw-r--r-- | arch/arm64/include/uapi/asm/kvm.h | 7 | ||||
| -rw-r--r-- | arch/arm64/include/uapi/asm/ptrace.h | 4 | ||||
| -rw-r--r-- | arch/arm64/include/uapi/asm/sigcontext.h | 14 | ||||
| -rw-r--r-- | arch/arm64/kernel/fpsimd.c | 42 |
7 files changed, 78 insertions, 10 deletions
diff --git a/Documentation/arm64/sve.txt b/Documentation/arm64/sve.txt index 9940e924a47e..5689fc9a976a 100644 --- a/Documentation/arm64/sve.txt +++ b/Documentation/arm64/sve.txt | |||
| @@ -56,6 +56,18 @@ model features for SVE is included in Appendix A. | |||
| 56 | is to connect to a target process first and then attempt a | 56 | is to connect to a target process first and then attempt a |
| 57 | ptrace(PTRACE_GETREGSET, pid, NT_ARM_SVE, &iov). | 57 | ptrace(PTRACE_GETREGSET, pid, NT_ARM_SVE, &iov). |
| 58 | 58 | ||
| 59 | * Whenever SVE scalable register values (Zn, Pn, FFR) are exchanged in memory | ||
| 60 | between userspace and the kernel, the register value is encoded in memory in | ||
| 61 | an endianness-invariant layout, with bits [(8 * i + 7) : (8 * i)] encoded at | ||
| 62 | byte offset i from the start of the memory representation. This affects for | ||
| 63 | example the signal frame (struct sve_context) and ptrace interface | ||
| 64 | (struct user_sve_header) and associated data. | ||
| 65 | |||
| 66 | Beware that on big-endian systems this results in a different byte order than | ||
| 67 | for the FPSIMD V-registers, which are stored as single host-endian 128-bit | ||
| 68 | values, with bits [(127 - 8 * i) : (120 - 8 * i)] of the register encoded at | ||
| 69 | byte offset i. (struct fpsimd_context, struct user_fpsimd_state). | ||
| 70 | |||
| 59 | 71 | ||
| 60 | 2. Vector length terminology | 72 | 2. Vector length terminology |
| 61 | ----------------------------- | 73 | ----------------------------- |
| @@ -124,6 +136,10 @@ the SVE instruction set architecture. | |||
| 124 | size and layout. Macros SVE_SIG_* are defined [1] to facilitate access to | 136 | size and layout. Macros SVE_SIG_* are defined [1] to facilitate access to |
| 125 | the members. | 137 | the members. |
| 126 | 138 | ||
| 139 | * Each scalable register (Zn, Pn, FFR) is stored in an endianness-invariant | ||
| 140 | layout, with bits [(8 * i + 7) : (8 * i)] stored at byte offset i from the | ||
| 141 | start of the register's representation in memory. | ||
| 142 | |||
| 127 | * If the SVE context is too big to fit in sigcontext.__reserved[], then extra | 143 | * If the SVE context is too big to fit in sigcontext.__reserved[], then extra |
| 128 | space is allocated on the stack, an extra_context record is written in | 144 | space is allocated on the stack, an extra_context record is written in |
| 129 | __reserved[] referencing this space. sve_context is then written in the | 145 | __reserved[] referencing this space. sve_context is then written in the |
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 8fbd583b18e1..e9d2e578cbe6 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile | |||
| @@ -51,7 +51,7 @@ endif | |||
| 51 | 51 | ||
| 52 | KBUILD_CFLAGS += -mgeneral-regs-only $(lseinstr) $(brokengasinst) | 52 | KBUILD_CFLAGS += -mgeneral-regs-only $(lseinstr) $(brokengasinst) |
| 53 | KBUILD_CFLAGS += -fno-asynchronous-unwind-tables | 53 | KBUILD_CFLAGS += -fno-asynchronous-unwind-tables |
| 54 | KBUILD_CFLAGS += -Wno-psabi | 54 | KBUILD_CFLAGS += $(call cc-disable-warning, psabi) |
| 55 | KBUILD_AFLAGS += $(lseinstr) $(brokengasinst) | 55 | KBUILD_AFLAGS += $(lseinstr) $(brokengasinst) |
| 56 | 56 | ||
| 57 | KBUILD_CFLAGS += $(call cc-option,-mabi=lp64) | 57 | KBUILD_CFLAGS += $(call cc-option,-mabi=lp64) |
diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index 3a1870228946..dff8f9ea5754 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h | |||
| @@ -195,6 +195,9 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma, | |||
| 195 | unsigned long asid = ASID(vma->vm_mm); | 195 | unsigned long asid = ASID(vma->vm_mm); |
| 196 | unsigned long addr; | 196 | unsigned long addr; |
| 197 | 197 | ||
| 198 | start = round_down(start, stride); | ||
| 199 | end = round_up(end, stride); | ||
| 200 | |||
| 198 | if ((end - start) >= (MAX_TLBI_OPS * stride)) { | 201 | if ((end - start) >= (MAX_TLBI_OPS * stride)) { |
| 199 | flush_tlb_mm(vma->vm_mm); | 202 | flush_tlb_mm(vma->vm_mm); |
| 200 | return; | 203 | return; |
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 7b7ac0f6cec9..d819a3e8b552 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h | |||
| @@ -260,6 +260,13 @@ struct kvm_vcpu_events { | |||
| 260 | KVM_REG_SIZE_U256 | \ | 260 | KVM_REG_SIZE_U256 | \ |
| 261 | ((i) & (KVM_ARM64_SVE_MAX_SLICES - 1))) | 261 | ((i) & (KVM_ARM64_SVE_MAX_SLICES - 1))) |
| 262 | 262 | ||
| 263 | /* | ||
| 264 | * Register values for KVM_REG_ARM64_SVE_ZREG(), KVM_REG_ARM64_SVE_PREG() and | ||
| 265 | * KVM_REG_ARM64_SVE_FFR() are represented in memory in an endianness- | ||
| 266 | * invariant layout which differs from the layout used for the FPSIMD | ||
| 267 | * V-registers on big-endian systems: see sigcontext.h for more explanation. | ||
| 268 | */ | ||
| 269 | |||
| 263 | #define KVM_ARM64_SVE_VQ_MIN __SVE_VQ_MIN | 270 | #define KVM_ARM64_SVE_VQ_MIN __SVE_VQ_MIN |
| 264 | #define KVM_ARM64_SVE_VQ_MAX __SVE_VQ_MAX | 271 | #define KVM_ARM64_SVE_VQ_MAX __SVE_VQ_MAX |
| 265 | 272 | ||
diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h index d78623acb649..97c53203150b 100644 --- a/arch/arm64/include/uapi/asm/ptrace.h +++ b/arch/arm64/include/uapi/asm/ptrace.h | |||
| @@ -176,6 +176,10 @@ struct user_sve_header { | |||
| 176 | * FPCR uint32_t FPCR | 176 | * FPCR uint32_t FPCR |
| 177 | * | 177 | * |
| 178 | * Additional data might be appended in the future. | 178 | * Additional data might be appended in the future. |
| 179 | * | ||
| 180 | * The Z-, P- and FFR registers are represented in memory in an endianness- | ||
| 181 | * invariant layout which differs from the layout used for the FPSIMD | ||
| 182 | * V-registers on big-endian systems: see sigcontext.h for more explanation. | ||
| 179 | */ | 183 | */ |
| 180 | 184 | ||
| 181 | #define SVE_PT_SVE_ZREG_SIZE(vq) __SVE_ZREG_SIZE(vq) | 185 | #define SVE_PT_SVE_ZREG_SIZE(vq) __SVE_ZREG_SIZE(vq) |
diff --git a/arch/arm64/include/uapi/asm/sigcontext.h b/arch/arm64/include/uapi/asm/sigcontext.h index 5f3c0cec5af9..3d448a0bb225 100644 --- a/arch/arm64/include/uapi/asm/sigcontext.h +++ b/arch/arm64/include/uapi/asm/sigcontext.h | |||
| @@ -77,6 +77,15 @@ struct fpsimd_context { | |||
| 77 | __uint128_t vregs[32]; | 77 | __uint128_t vregs[32]; |
| 78 | }; | 78 | }; |
| 79 | 79 | ||
| 80 | /* | ||
| 81 | * Note: similarly to all other integer fields, each V-register is stored in an | ||
| 82 | * endianness-dependent format, with the byte at offset i from the start of the | ||
| 83 | * in-memory representation of the register value containing | ||
| 84 | * | ||
| 85 | * bits [(7 + 8 * i) : (8 * i)] of the register on little-endian hosts; or | ||
| 86 | * bits [(127 - 8 * i) : (120 - 8 * i)] on big-endian hosts. | ||
| 87 | */ | ||
| 88 | |||
| 80 | /* ESR_EL1 context */ | 89 | /* ESR_EL1 context */ |
| 81 | #define ESR_MAGIC 0x45535201 | 90 | #define ESR_MAGIC 0x45535201 |
| 82 | 91 | ||
| @@ -204,6 +213,11 @@ struct sve_context { | |||
| 204 | * FFR uint16_t[vq] first-fault status register | 213 | * FFR uint16_t[vq] first-fault status register |
| 205 | * | 214 | * |
| 206 | * Additional data might be appended in the future. | 215 | * Additional data might be appended in the future. |
| 216 | * | ||
| 217 | * Unlike vregs[] in fpsimd_context, each SVE scalable register (Z-, P- or FFR) | ||
| 218 | * is encoded in memory in an endianness-invariant format, with the byte at | ||
| 219 | * offset i from the start of the in-memory representation containing bits | ||
| 220 | * [(7 + 8 * i) : (8 * i)] of the register value. | ||
| 207 | */ | 221 | */ |
| 208 | 222 | ||
| 209 | #define SVE_SIG_ZREG_SIZE(vq) __SVE_ZREG_SIZE(vq) | 223 | #define SVE_SIG_ZREG_SIZE(vq) __SVE_ZREG_SIZE(vq) |
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index a38bf74bcca8..bb42cd04baec 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c | |||
| @@ -39,6 +39,7 @@ | |||
| 39 | #include <linux/slab.h> | 39 | #include <linux/slab.h> |
| 40 | #include <linux/stddef.h> | 40 | #include <linux/stddef.h> |
| 41 | #include <linux/sysctl.h> | 41 | #include <linux/sysctl.h> |
| 42 | #include <linux/swab.h> | ||
| 42 | 43 | ||
| 43 | #include <asm/esr.h> | 44 | #include <asm/esr.h> |
| 44 | #include <asm/fpsimd.h> | 45 | #include <asm/fpsimd.h> |
| @@ -352,6 +353,23 @@ static int __init sve_sysctl_init(void) { return 0; } | |||
| 352 | #define ZREG(sve_state, vq, n) ((char *)(sve_state) + \ | 353 | #define ZREG(sve_state, vq, n) ((char *)(sve_state) + \ |
| 353 | (SVE_SIG_ZREG_OFFSET(vq, n) - SVE_SIG_REGS_OFFSET)) | 354 | (SVE_SIG_ZREG_OFFSET(vq, n) - SVE_SIG_REGS_OFFSET)) |
| 354 | 355 | ||
| 356 | #ifdef CONFIG_CPU_BIG_ENDIAN | ||
| 357 | static __uint128_t arm64_cpu_to_le128(__uint128_t x) | ||
| 358 | { | ||
| 359 | u64 a = swab64(x); | ||
| 360 | u64 b = swab64(x >> 64); | ||
| 361 | |||
| 362 | return ((__uint128_t)a << 64) | b; | ||
| 363 | } | ||
| 364 | #else | ||
| 365 | static __uint128_t arm64_cpu_to_le128(__uint128_t x) | ||
| 366 | { | ||
| 367 | return x; | ||
| 368 | } | ||
| 369 | #endif | ||
| 370 | |||
| 371 | #define arm64_le128_to_cpu(x) arm64_cpu_to_le128(x) | ||
| 372 | |||
| 355 | /* | 373 | /* |
| 356 | * Transfer the FPSIMD state in task->thread.uw.fpsimd_state to | 374 | * Transfer the FPSIMD state in task->thread.uw.fpsimd_state to |
| 357 | * task->thread.sve_state. | 375 | * task->thread.sve_state. |
| @@ -369,14 +387,16 @@ static void fpsimd_to_sve(struct task_struct *task) | |||
| 369 | void *sst = task->thread.sve_state; | 387 | void *sst = task->thread.sve_state; |
| 370 | struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state; | 388 | struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state; |
| 371 | unsigned int i; | 389 | unsigned int i; |
| 390 | __uint128_t *p; | ||
| 372 | 391 | ||
| 373 | if (!system_supports_sve()) | 392 | if (!system_supports_sve()) |
| 374 | return; | 393 | return; |
| 375 | 394 | ||
| 376 | vq = sve_vq_from_vl(task->thread.sve_vl); | 395 | vq = sve_vq_from_vl(task->thread.sve_vl); |
| 377 | for (i = 0; i < 32; ++i) | 396 | for (i = 0; i < 32; ++i) { |
| 378 | memcpy(ZREG(sst, vq, i), &fst->vregs[i], | 397 | p = (__uint128_t *)ZREG(sst, vq, i); |
| 379 | sizeof(fst->vregs[i])); | 398 | *p = arm64_cpu_to_le128(fst->vregs[i]); |
| 399 | } | ||
| 380 | } | 400 | } |
| 381 | 401 | ||
| 382 | /* | 402 | /* |
| @@ -395,14 +415,16 @@ static void sve_to_fpsimd(struct task_struct *task) | |||
| 395 | void const *sst = task->thread.sve_state; | 415 | void const *sst = task->thread.sve_state; |
| 396 | struct user_fpsimd_state *fst = &task->thread.uw.fpsimd_state; | 416 | struct user_fpsimd_state *fst = &task->thread.uw.fpsimd_state; |
| 397 | unsigned int i; | 417 | unsigned int i; |
| 418 | __uint128_t const *p; | ||
| 398 | 419 | ||
| 399 | if (!system_supports_sve()) | 420 | if (!system_supports_sve()) |
| 400 | return; | 421 | return; |
| 401 | 422 | ||
| 402 | vq = sve_vq_from_vl(task->thread.sve_vl); | 423 | vq = sve_vq_from_vl(task->thread.sve_vl); |
| 403 | for (i = 0; i < 32; ++i) | 424 | for (i = 0; i < 32; ++i) { |
| 404 | memcpy(&fst->vregs[i], ZREG(sst, vq, i), | 425 | p = (__uint128_t const *)ZREG(sst, vq, i); |
| 405 | sizeof(fst->vregs[i])); | 426 | fst->vregs[i] = arm64_le128_to_cpu(*p); |
| 427 | } | ||
| 406 | } | 428 | } |
| 407 | 429 | ||
| 408 | #ifdef CONFIG_ARM64_SVE | 430 | #ifdef CONFIG_ARM64_SVE |
| @@ -491,6 +513,7 @@ void sve_sync_from_fpsimd_zeropad(struct task_struct *task) | |||
| 491 | void *sst = task->thread.sve_state; | 513 | void *sst = task->thread.sve_state; |
| 492 | struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state; | 514 | struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state; |
| 493 | unsigned int i; | 515 | unsigned int i; |
| 516 | __uint128_t *p; | ||
| 494 | 517 | ||
| 495 | if (!test_tsk_thread_flag(task, TIF_SVE)) | 518 | if (!test_tsk_thread_flag(task, TIF_SVE)) |
| 496 | return; | 519 | return; |
| @@ -499,9 +522,10 @@ void sve_sync_from_fpsimd_zeropad(struct task_struct *task) | |||
| 499 | 522 | ||
| 500 | memset(sst, 0, SVE_SIG_REGS_SIZE(vq)); | 523 | memset(sst, 0, SVE_SIG_REGS_SIZE(vq)); |
| 501 | 524 | ||
| 502 | for (i = 0; i < 32; ++i) | 525 | for (i = 0; i < 32; ++i) { |
| 503 | memcpy(ZREG(sst, vq, i), &fst->vregs[i], | 526 | p = (__uint128_t *)ZREG(sst, vq, i); |
| 504 | sizeof(fst->vregs[i])); | 527 | *p = arm64_cpu_to_le128(fst->vregs[i]); |
| 528 | } | ||
| 505 | } | 529 | } |
| 506 | 530 | ||
| 507 | int sve_set_vector_length(struct task_struct *task, | 531 | int sve_set_vector_length(struct task_struct *task, |
