diff options
121 files changed, 8137 insertions, 628 deletions
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index acb80708accd..603d661b445d 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig | |||
@@ -5,6 +5,7 @@ config ARM | |||
5 | select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE | 5 | select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE |
6 | select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST | 6 | select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST |
7 | select ARCH_HAVE_CUSTOM_GPIO_H | 7 | select ARCH_HAVE_CUSTOM_GPIO_H |
8 | select ARCH_USE_CMPXCHG_LOCKREF | ||
8 | select ARCH_WANT_IPC_PARSE_VERSION | 9 | select ARCH_WANT_IPC_PARSE_VERSION |
9 | select BUILDTIME_EXTABLE_SORT if MMU | 10 | select BUILDTIME_EXTABLE_SORT if MMU |
10 | select CLONE_BACKWARDS | 11 | select CLONE_BACKWARDS |
@@ -51,6 +52,8 @@ config ARM | |||
51 | select HAVE_MOD_ARCH_SPECIFIC if ARM_UNWIND | 52 | select HAVE_MOD_ARCH_SPECIFIC if ARM_UNWIND |
52 | select HAVE_OPROFILE if (HAVE_PERF_EVENTS) | 53 | select HAVE_OPROFILE if (HAVE_PERF_EVENTS) |
53 | select HAVE_PERF_EVENTS | 54 | select HAVE_PERF_EVENTS |
55 | select HAVE_PERF_REGS | ||
56 | select HAVE_PERF_USER_STACK_DUMP | ||
54 | select HAVE_REGS_AND_STACK_ACCESS_API | 57 | select HAVE_REGS_AND_STACK_ACCESS_API |
55 | select HAVE_SYSCALL_TRACEPOINTS | 58 | select HAVE_SYSCALL_TRACEPOINTS |
56 | select HAVE_UID16 | 59 | select HAVE_UID16 |
@@ -481,6 +484,7 @@ config ARCH_IXP4XX | |||
481 | bool "IXP4xx-based" | 484 | bool "IXP4xx-based" |
482 | depends on MMU | 485 | depends on MMU |
483 | select ARCH_HAS_DMA_SET_COHERENT_MASK | 486 | select ARCH_HAS_DMA_SET_COHERENT_MASK |
487 | select ARCH_SUPPORTS_BIG_ENDIAN | ||
484 | select ARCH_REQUIRE_GPIOLIB | 488 | select ARCH_REQUIRE_GPIOLIB |
485 | select CLKSRC_MMIO | 489 | select CLKSRC_MMIO |
486 | select CPU_XSCALE | 490 | select CPU_XSCALE |
@@ -688,7 +692,6 @@ config ARCH_SA1100 | |||
688 | select GENERIC_CLOCKEVENTS | 692 | select GENERIC_CLOCKEVENTS |
689 | select HAVE_IDE | 693 | select HAVE_IDE |
690 | select ISA | 694 | select ISA |
691 | select NEED_MACH_GPIO_H | ||
692 | select NEED_MACH_MEMORY_H | 695 | select NEED_MACH_MEMORY_H |
693 | select SPARSE_IRQ | 696 | select SPARSE_IRQ |
694 | help | 697 | help |
@@ -1064,11 +1067,6 @@ config IWMMXT | |||
1064 | Enable support for iWMMXt context switching at run time if | 1067 | Enable support for iWMMXt context switching at run time if |
1065 | running on a CPU that supports it. | 1068 | running on a CPU that supports it. |
1066 | 1069 | ||
1067 | config XSCALE_PMU | ||
1068 | bool | ||
1069 | depends on CPU_XSCALE | ||
1070 | default y | ||
1071 | |||
1072 | config MULTI_IRQ_HANDLER | 1070 | config MULTI_IRQ_HANDLER |
1073 | bool | 1071 | bool |
1074 | help | 1072 | help |
@@ -1516,6 +1514,32 @@ config MCPM | |||
1516 | for (multi-)cluster based systems, such as big.LITTLE based | 1514 | for (multi-)cluster based systems, such as big.LITTLE based |
1517 | systems. | 1515 | systems. |
1518 | 1516 | ||
1517 | config BIG_LITTLE | ||
1518 | bool "big.LITTLE support (Experimental)" | ||
1519 | depends on CPU_V7 && SMP | ||
1520 | select MCPM | ||
1521 | help | ||
1522 | This option enables support selections for the big.LITTLE | ||
1523 | system architecture. | ||
1524 | |||
1525 | config BL_SWITCHER | ||
1526 | bool "big.LITTLE switcher support" | ||
1527 | depends on BIG_LITTLE && MCPM && HOTPLUG_CPU | ||
1528 | select CPU_PM | ||
1529 | select ARM_CPU_SUSPEND | ||
1530 | help | ||
1531 | The big.LITTLE "switcher" provides the core functionality to | ||
1532 | transparently handle transition between a cluster of A15's | ||
1533 | and a cluster of A7's in a big.LITTLE system. | ||
1534 | |||
1535 | config BL_SWITCHER_DUMMY_IF | ||
1536 | tristate "Simple big.LITTLE switcher user interface" | ||
1537 | depends on BL_SWITCHER && DEBUG_KERNEL | ||
1538 | help | ||
1539 | This is a simple and dummy char dev interface to control | ||
1540 | the big.LITTLE switcher core code. It is meant for | ||
1541 | debugging purposes only. | ||
1542 | |||
1519 | choice | 1543 | choice |
1520 | prompt "Memory split" | 1544 | prompt "Memory split" |
1521 | default VMSPLIT_3G | 1545 | default VMSPLIT_3G |
diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug index d597c6b8488b..5765abf5ce84 100644 --- a/arch/arm/Kconfig.debug +++ b/arch/arm/Kconfig.debug | |||
@@ -318,6 +318,7 @@ choice | |||
318 | config DEBUG_MSM_UART1 | 318 | config DEBUG_MSM_UART1 |
319 | bool "Kernel low-level debugging messages via MSM UART1" | 319 | bool "Kernel low-level debugging messages via MSM UART1" |
320 | depends on ARCH_MSM7X00A || ARCH_MSM7X30 || ARCH_QSD8X50 | 320 | depends on ARCH_MSM7X00A || ARCH_MSM7X30 || ARCH_QSD8X50 |
321 | select DEBUG_MSM_UART | ||
321 | help | 322 | help |
322 | Say Y here if you want the debug print routines to direct | 323 | Say Y here if you want the debug print routines to direct |
323 | their output to the first serial port on MSM devices. | 324 | their output to the first serial port on MSM devices. |
@@ -325,6 +326,7 @@ choice | |||
325 | config DEBUG_MSM_UART2 | 326 | config DEBUG_MSM_UART2 |
326 | bool "Kernel low-level debugging messages via MSM UART2" | 327 | bool "Kernel low-level debugging messages via MSM UART2" |
327 | depends on ARCH_MSM7X00A || ARCH_MSM7X30 || ARCH_QSD8X50 | 328 | depends on ARCH_MSM7X00A || ARCH_MSM7X30 || ARCH_QSD8X50 |
329 | select DEBUG_MSM_UART | ||
328 | help | 330 | help |
329 | Say Y here if you want the debug print routines to direct | 331 | Say Y here if you want the debug print routines to direct |
330 | their output to the second serial port on MSM devices. | 332 | their output to the second serial port on MSM devices. |
@@ -332,6 +334,7 @@ choice | |||
332 | config DEBUG_MSM_UART3 | 334 | config DEBUG_MSM_UART3 |
333 | bool "Kernel low-level debugging messages via MSM UART3" | 335 | bool "Kernel low-level debugging messages via MSM UART3" |
334 | depends on ARCH_MSM7X00A || ARCH_MSM7X30 || ARCH_QSD8X50 | 336 | depends on ARCH_MSM7X00A || ARCH_MSM7X30 || ARCH_QSD8X50 |
337 | select DEBUG_MSM_UART | ||
335 | help | 338 | help |
336 | Say Y here if you want the debug print routines to direct | 339 | Say Y here if you want the debug print routines to direct |
337 | their output to the third serial port on MSM devices. | 340 | their output to the third serial port on MSM devices. |
@@ -340,6 +343,7 @@ choice | |||
340 | bool "Kernel low-level debugging messages via MSM 8660 UART" | 343 | bool "Kernel low-level debugging messages via MSM 8660 UART" |
341 | depends on ARCH_MSM8X60 | 344 | depends on ARCH_MSM8X60 |
342 | select MSM_HAS_DEBUG_UART_HS | 345 | select MSM_HAS_DEBUG_UART_HS |
346 | select DEBUG_MSM_UART | ||
343 | help | 347 | help |
344 | Say Y here if you want the debug print routines to direct | 348 | Say Y here if you want the debug print routines to direct |
345 | their output to the serial port on MSM 8660 devices. | 349 | their output to the serial port on MSM 8660 devices. |
@@ -348,10 +352,20 @@ choice | |||
348 | bool "Kernel low-level debugging messages via MSM 8960 UART" | 352 | bool "Kernel low-level debugging messages via MSM 8960 UART" |
349 | depends on ARCH_MSM8960 | 353 | depends on ARCH_MSM8960 |
350 | select MSM_HAS_DEBUG_UART_HS | 354 | select MSM_HAS_DEBUG_UART_HS |
355 | select DEBUG_MSM_UART | ||
351 | help | 356 | help |
352 | Say Y here if you want the debug print routines to direct | 357 | Say Y here if you want the debug print routines to direct |
353 | their output to the serial port on MSM 8960 devices. | 358 | their output to the serial port on MSM 8960 devices. |
354 | 359 | ||
360 | config DEBUG_MSM8974_UART | ||
361 | bool "Kernel low-level debugging messages via MSM 8974 UART" | ||
362 | depends on ARCH_MSM8974 | ||
363 | select MSM_HAS_DEBUG_UART_HS | ||
364 | select DEBUG_MSM_UART | ||
365 | help | ||
366 | Say Y here if you want the debug print routines to direct | ||
367 | their output to the serial port on MSM 8974 devices. | ||
368 | |||
355 | config DEBUG_MVEBU_UART | 369 | config DEBUG_MVEBU_UART |
356 | bool "Kernel low-level debugging messages via MVEBU UART (old bootloaders)" | 370 | bool "Kernel low-level debugging messages via MVEBU UART (old bootloaders)" |
357 | depends on ARCH_MVEBU | 371 | depends on ARCH_MVEBU |
@@ -841,6 +855,20 @@ choice | |||
841 | options; the platform specific options are deprecated | 855 | options; the platform specific options are deprecated |
842 | and will be soon removed. | 856 | and will be soon removed. |
843 | 857 | ||
858 | config DEBUG_LL_UART_EFM32 | ||
859 | bool "Kernel low-level debugging via efm32 UART" | ||
860 | depends on ARCH_EFM32 | ||
861 | help | ||
862 | Say Y here if you want the debug print routines to direct | ||
863 | their output to an UART or USART port on efm32 based | ||
864 | machines. Use the following addresses for DEBUG_UART_PHYS: | ||
865 | |||
866 | 0x4000c000 | USART0 | ||
867 | 0x4000c400 | USART1 | ||
868 | 0x4000c800 | USART2 | ||
869 | 0x4000e000 | UART0 | ||
870 | 0x4000e400 | UART1 | ||
871 | |||
844 | config DEBUG_LL_UART_PL01X | 872 | config DEBUG_LL_UART_PL01X |
845 | bool "Kernel low-level debugging via ARM Ltd PL01x Primecell UART" | 873 | bool "Kernel low-level debugging via ARM Ltd PL01x Primecell UART" |
846 | help | 874 | help |
@@ -887,11 +915,16 @@ config DEBUG_STI_UART | |||
887 | bool | 915 | bool |
888 | depends on ARCH_STI | 916 | depends on ARCH_STI |
889 | 917 | ||
918 | config DEBUG_MSM_UART | ||
919 | bool | ||
920 | depends on ARCH_MSM | ||
921 | |||
890 | config DEBUG_LL_INCLUDE | 922 | config DEBUG_LL_INCLUDE |
891 | string | 923 | string |
892 | default "debug/8250.S" if DEBUG_LL_UART_8250 || DEBUG_UART_8250 | 924 | default "debug/8250.S" if DEBUG_LL_UART_8250 || DEBUG_UART_8250 |
893 | default "debug/pl01x.S" if DEBUG_LL_UART_PL01X || DEBUG_UART_PL01X | 925 | default "debug/pl01x.S" if DEBUG_LL_UART_PL01X || DEBUG_UART_PL01X |
894 | default "debug/exynos.S" if DEBUG_EXYNOS_UART | 926 | default "debug/exynos.S" if DEBUG_EXYNOS_UART |
927 | default "debug/efm32.S" if DEBUG_LL_UART_EFM32 | ||
895 | default "debug/icedcc.S" if DEBUG_ICEDCC | 928 | default "debug/icedcc.S" if DEBUG_ICEDCC |
896 | default "debug/imx.S" if DEBUG_IMX1_UART || \ | 929 | default "debug/imx.S" if DEBUG_IMX1_UART || \ |
897 | DEBUG_IMX25_UART || \ | 930 | DEBUG_IMX25_UART || \ |
@@ -902,11 +935,7 @@ config DEBUG_LL_INCLUDE | |||
902 | DEBUG_IMX53_UART ||\ | 935 | DEBUG_IMX53_UART ||\ |
903 | DEBUG_IMX6Q_UART || \ | 936 | DEBUG_IMX6Q_UART || \ |
904 | DEBUG_IMX6SL_UART | 937 | DEBUG_IMX6SL_UART |
905 | default "debug/msm.S" if DEBUG_MSM_UART1 || \ | 938 | default "debug/msm.S" if DEBUG_MSM_UART |
906 | DEBUG_MSM_UART2 || \ | ||
907 | DEBUG_MSM_UART3 || \ | ||
908 | DEBUG_MSM8660_UART || \ | ||
909 | DEBUG_MSM8960_UART | ||
910 | default "debug/omap2plus.S" if DEBUG_OMAP2PLUS_UART | 939 | default "debug/omap2plus.S" if DEBUG_OMAP2PLUS_UART |
911 | default "debug/sirf.S" if DEBUG_SIRFPRIMA2_UART1 || DEBUG_SIRFMARCO_UART1 | 940 | default "debug/sirf.S" if DEBUG_SIRFPRIMA2_UART1 || DEBUG_SIRFMARCO_UART1 |
912 | default "debug/sti.S" if DEBUG_STI_UART | 941 | default "debug/sti.S" if DEBUG_STI_UART |
@@ -959,6 +988,7 @@ config DEBUG_UART_PHYS | |||
959 | default 0x20064000 if DEBUG_RK29_UART1 || DEBUG_RK3X_UART2 | 988 | default 0x20064000 if DEBUG_RK29_UART1 || DEBUG_RK3X_UART2 |
960 | default 0x20068000 if DEBUG_RK29_UART2 || DEBUG_RK3X_UART3 | 989 | default 0x20068000 if DEBUG_RK29_UART2 || DEBUG_RK3X_UART3 |
961 | default 0x20201000 if DEBUG_BCM2835 | 990 | default 0x20201000 if DEBUG_BCM2835 |
991 | default 0x4000e400 if DEBUG_LL_UART_EFM32 | ||
962 | default 0x40090000 if ARCH_LPC32XX | 992 | default 0x40090000 if ARCH_LPC32XX |
963 | default 0x40100000 if DEBUG_PXA_UART1 | 993 | default 0x40100000 if DEBUG_PXA_UART1 |
964 | default 0x42000000 if ARCH_GEMINI | 994 | default 0x42000000 if ARCH_GEMINI |
@@ -989,6 +1019,7 @@ config DEBUG_UART_PHYS | |||
989 | default 0xfff36000 if DEBUG_HIGHBANK_UART | 1019 | default 0xfff36000 if DEBUG_HIGHBANK_UART |
990 | default 0xfffff700 if ARCH_IOP33X | 1020 | default 0xfffff700 if ARCH_IOP33X |
991 | depends on DEBUG_LL_UART_8250 || DEBUG_LL_UART_PL01X || \ | 1021 | depends on DEBUG_LL_UART_8250 || DEBUG_LL_UART_PL01X || \ |
1022 | DEBUG_LL_UART_EFM32 || \ | ||
992 | DEBUG_UART_8250 || DEBUG_UART_PL01X | 1023 | DEBUG_UART_8250 || DEBUG_UART_PL01X |
993 | 1024 | ||
994 | config DEBUG_UART_VIRT | 1025 | config DEBUG_UART_VIRT |
diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 8b667132d7b4..c99b1086d83d 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile | |||
@@ -16,6 +16,7 @@ LDFLAGS := | |||
16 | LDFLAGS_vmlinux :=-p --no-undefined -X | 16 | LDFLAGS_vmlinux :=-p --no-undefined -X |
17 | ifeq ($(CONFIG_CPU_ENDIAN_BE8),y) | 17 | ifeq ($(CONFIG_CPU_ENDIAN_BE8),y) |
18 | LDFLAGS_vmlinux += --be8 | 18 | LDFLAGS_vmlinux += --be8 |
19 | LDFLAGS_MODULE += --be8 | ||
19 | endif | 20 | endif |
20 | 21 | ||
21 | OBJCOPYFLAGS :=-O binary -R .comment -S | 22 | OBJCOPYFLAGS :=-O binary -R .comment -S |
diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index 75189f13cf54..066b03480b63 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S | |||
@@ -135,6 +135,7 @@ start: | |||
135 | .word _edata @ zImage end address | 135 | .word _edata @ zImage end address |
136 | THUMB( .thumb ) | 136 | THUMB( .thumb ) |
137 | 1: | 137 | 1: |
138 | ARM_BE8( setend be ) @ go BE8 if compiled for BE8 | ||
138 | mrs r9, cpsr | 139 | mrs r9, cpsr |
139 | #ifdef CONFIG_ARM_VIRT_EXT | 140 | #ifdef CONFIG_ARM_VIRT_EXT |
140 | bl __hyp_stub_install @ get into SVC mode, reversibly | 141 | bl __hyp_stub_install @ get into SVC mode, reversibly |
@@ -699,9 +700,7 @@ __armv4_mmu_cache_on: | |||
699 | mrc p15, 0, r0, c1, c0, 0 @ read control reg | 700 | mrc p15, 0, r0, c1, c0, 0 @ read control reg |
700 | orr r0, r0, #0x5000 @ I-cache enable, RR cache replacement | 701 | orr r0, r0, #0x5000 @ I-cache enable, RR cache replacement |
701 | orr r0, r0, #0x0030 | 702 | orr r0, r0, #0x0030 |
702 | #ifdef CONFIG_CPU_ENDIAN_BE8 | 703 | ARM_BE8( orr r0, r0, #1 << 25 ) @ big-endian page tables |
703 | orr r0, r0, #1 << 25 @ big-endian page tables | ||
704 | #endif | ||
705 | bl __common_mmu_cache_on | 704 | bl __common_mmu_cache_on |
706 | mov r0, #0 | 705 | mov r0, #0 |
707 | mcr p15, 0, r0, c8, c7, 0 @ flush I,D TLBs | 706 | mcr p15, 0, r0, c8, c7, 0 @ flush I,D TLBs |
@@ -728,9 +727,7 @@ __armv7_mmu_cache_on: | |||
728 | orr r0, r0, #1 << 22 @ U (v6 unaligned access model) | 727 | orr r0, r0, #1 << 22 @ U (v6 unaligned access model) |
729 | @ (needed for ARM1176) | 728 | @ (needed for ARM1176) |
730 | #ifdef CONFIG_MMU | 729 | #ifdef CONFIG_MMU |
731 | #ifdef CONFIG_CPU_ENDIAN_BE8 | 730 | ARM_BE8( orr r0, r0, #1 << 25 ) @ big-endian page tables |
732 | orr r0, r0, #1 << 25 @ big-endian page tables | ||
733 | #endif | ||
734 | mrcne p15, 0, r6, c2, c0, 2 @ read ttb control reg | 731 | mrcne p15, 0, r6, c2, c0, 2 @ read ttb control reg |
735 | orrne r0, r0, #1 @ MMU enabled | 732 | orrne r0, r0, #1 @ MMU enabled |
736 | movne r1, #0xfffffffd @ domain 0 = client | 733 | movne r1, #0xfffffffd @ domain 0 = client |
diff --git a/arch/arm/common/Makefile b/arch/arm/common/Makefile index eaa9cf4705a7..4bdc41622c36 100644 --- a/arch/arm/common/Makefile +++ b/arch/arm/common/Makefile | |||
@@ -16,3 +16,5 @@ obj-$(CONFIG_MCPM) += mcpm_head.o mcpm_entry.o mcpm_platsmp.o vlock.o | |||
16 | AFLAGS_mcpm_head.o := -march=armv7-a | 16 | AFLAGS_mcpm_head.o := -march=armv7-a |
17 | AFLAGS_vlock.o := -march=armv7-a | 17 | AFLAGS_vlock.o := -march=armv7-a |
18 | obj-$(CONFIG_TI_PRIV_EDMA) += edma.o | 18 | obj-$(CONFIG_TI_PRIV_EDMA) += edma.o |
19 | obj-$(CONFIG_BL_SWITCHER) += bL_switcher.o | ||
20 | obj-$(CONFIG_BL_SWITCHER_DUMMY_IF) += bL_switcher_dummy_if.o | ||
diff --git a/arch/arm/common/bL_switcher.c b/arch/arm/common/bL_switcher.c new file mode 100644 index 000000000000..5774b6ea7ad5 --- /dev/null +++ b/arch/arm/common/bL_switcher.c | |||
@@ -0,0 +1,822 @@ | |||
1 | /* | ||
2 | * arch/arm/common/bL_switcher.c -- big.LITTLE cluster switcher core driver | ||
3 | * | ||
4 | * Created by: Nicolas Pitre, March 2012 | ||
5 | * Copyright: (C) 2012-2013 Linaro Limited | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License version 2 as | ||
9 | * published by the Free Software Foundation. | ||
10 | */ | ||
11 | |||
12 | #include <linux/atomic.h> | ||
13 | #include <linux/init.h> | ||
14 | #include <linux/kernel.h> | ||
15 | #include <linux/module.h> | ||
16 | #include <linux/sched.h> | ||
17 | #include <linux/interrupt.h> | ||
18 | #include <linux/cpu_pm.h> | ||
19 | #include <linux/cpu.h> | ||
20 | #include <linux/cpumask.h> | ||
21 | #include <linux/kthread.h> | ||
22 | #include <linux/wait.h> | ||
23 | #include <linux/time.h> | ||
24 | #include <linux/clockchips.h> | ||
25 | #include <linux/hrtimer.h> | ||
26 | #include <linux/tick.h> | ||
27 | #include <linux/notifier.h> | ||
28 | #include <linux/mm.h> | ||
29 | #include <linux/mutex.h> | ||
30 | #include <linux/smp.h> | ||
31 | #include <linux/spinlock.h> | ||
32 | #include <linux/string.h> | ||
33 | #include <linux/sysfs.h> | ||
34 | #include <linux/irqchip/arm-gic.h> | ||
35 | #include <linux/moduleparam.h> | ||
36 | |||
37 | #include <asm/smp_plat.h> | ||
38 | #include <asm/cputype.h> | ||
39 | #include <asm/suspend.h> | ||
40 | #include <asm/mcpm.h> | ||
41 | #include <asm/bL_switcher.h> | ||
42 | |||
43 | #define CREATE_TRACE_POINTS | ||
44 | #include <trace/events/power_cpu_migrate.h> | ||
45 | |||
46 | |||
47 | /* | ||
48 | * Use our own MPIDR accessors as the generic ones in asm/cputype.h have | ||
49 | * __attribute_const__ and we don't want the compiler to assume any | ||
50 | * constness here as the value _does_ change along some code paths. | ||
51 | */ | ||
52 | |||
53 | static int read_mpidr(void) | ||
54 | { | ||
55 | unsigned int id; | ||
56 | asm volatile ("mrc p15, 0, %0, c0, c0, 5" : "=r" (id)); | ||
57 | return id & MPIDR_HWID_BITMASK; | ||
58 | } | ||
59 | |||
60 | /* | ||
61 | * Get a global nanosecond time stamp for tracing. | ||
62 | */ | ||
63 | static s64 get_ns(void) | ||
64 | { | ||
65 | struct timespec ts; | ||
66 | getnstimeofday(&ts); | ||
67 | return timespec_to_ns(&ts); | ||
68 | } | ||
69 | |||
70 | /* | ||
71 | * bL switcher core code. | ||
72 | */ | ||
73 | |||
74 | static void bL_do_switch(void *_arg) | ||
75 | { | ||
76 | unsigned ib_mpidr, ib_cpu, ib_cluster; | ||
77 | long volatile handshake, **handshake_ptr = _arg; | ||
78 | |||
79 | pr_debug("%s\n", __func__); | ||
80 | |||
81 | ib_mpidr = cpu_logical_map(smp_processor_id()); | ||
82 | ib_cpu = MPIDR_AFFINITY_LEVEL(ib_mpidr, 0); | ||
83 | ib_cluster = MPIDR_AFFINITY_LEVEL(ib_mpidr, 1); | ||
84 | |||
85 | /* Advertise our handshake location */ | ||
86 | if (handshake_ptr) { | ||
87 | handshake = 0; | ||
88 | *handshake_ptr = &handshake; | ||
89 | } else | ||
90 | handshake = -1; | ||
91 | |||
92 | /* | ||
93 | * Our state has been saved at this point. Let's release our | ||
94 | * inbound CPU. | ||
95 | */ | ||
96 | mcpm_set_entry_vector(ib_cpu, ib_cluster, cpu_resume); | ||
97 | sev(); | ||
98 | |||
99 | /* | ||
100 | * From this point, we must assume that our counterpart CPU might | ||
101 | * have taken over in its parallel world already, as if execution | ||
102 | * just returned from cpu_suspend(). It is therefore important to | ||
103 | * be very careful not to make any change the other guy is not | ||
104 | * expecting. This is why we need stack isolation. | ||
105 | * | ||
106 | * Fancy under cover tasks could be performed here. For now | ||
107 | * we have none. | ||
108 | */ | ||
109 | |||
110 | /* | ||
111 | * Let's wait until our inbound is alive. | ||
112 | */ | ||
113 | while (!handshake) { | ||
114 | wfe(); | ||
115 | smp_mb(); | ||
116 | } | ||
117 | |||
118 | /* Let's put ourself down. */ | ||
119 | mcpm_cpu_power_down(); | ||
120 | |||
121 | /* should never get here */ | ||
122 | BUG(); | ||
123 | } | ||
124 | |||
125 | /* | ||
126 | * Stack isolation. To ensure 'current' remains valid, we just use another | ||
127 | * piece of our thread's stack space which should be fairly lightly used. | ||
128 | * The selected area starts just above the thread_info structure located | ||
129 | * at the very bottom of the stack, aligned to a cache line, and indexed | ||
130 | * with the cluster number. | ||
131 | */ | ||
132 | #define STACK_SIZE 512 | ||
133 | extern void call_with_stack(void (*fn)(void *), void *arg, void *sp); | ||
134 | static int bL_switchpoint(unsigned long _arg) | ||
135 | { | ||
136 | unsigned int mpidr = read_mpidr(); | ||
137 | unsigned int clusterid = MPIDR_AFFINITY_LEVEL(mpidr, 1); | ||
138 | void *stack = current_thread_info() + 1; | ||
139 | stack = PTR_ALIGN(stack, L1_CACHE_BYTES); | ||
140 | stack += clusterid * STACK_SIZE + STACK_SIZE; | ||
141 | call_with_stack(bL_do_switch, (void *)_arg, stack); | ||
142 | BUG(); | ||
143 | } | ||
144 | |||
145 | /* | ||
146 | * Generic switcher interface | ||
147 | */ | ||
148 | |||
149 | static unsigned int bL_gic_id[MAX_CPUS_PER_CLUSTER][MAX_NR_CLUSTERS]; | ||
150 | static int bL_switcher_cpu_pairing[NR_CPUS]; | ||
151 | |||
152 | /* | ||
153 | * bL_switch_to - Switch to a specific cluster for the current CPU | ||
154 | * @new_cluster_id: the ID of the cluster to switch to. | ||
155 | * | ||
156 | * This function must be called on the CPU to be switched. | ||
157 | * Returns 0 on success, else a negative status code. | ||
158 | */ | ||
159 | static int bL_switch_to(unsigned int new_cluster_id) | ||
160 | { | ||
161 | unsigned int mpidr, this_cpu, that_cpu; | ||
162 | unsigned int ob_mpidr, ob_cpu, ob_cluster, ib_mpidr, ib_cpu, ib_cluster; | ||
163 | struct completion inbound_alive; | ||
164 | struct tick_device *tdev; | ||
165 | enum clock_event_mode tdev_mode; | ||
166 | long volatile *handshake_ptr; | ||
167 | int ipi_nr, ret; | ||
168 | |||
169 | this_cpu = smp_processor_id(); | ||
170 | ob_mpidr = read_mpidr(); | ||
171 | ob_cpu = MPIDR_AFFINITY_LEVEL(ob_mpidr, 0); | ||
172 | ob_cluster = MPIDR_AFFINITY_LEVEL(ob_mpidr, 1); | ||
173 | BUG_ON(cpu_logical_map(this_cpu) != ob_mpidr); | ||
174 | |||
175 | if (new_cluster_id == ob_cluster) | ||
176 | return 0; | ||
177 | |||
178 | that_cpu = bL_switcher_cpu_pairing[this_cpu]; | ||
179 | ib_mpidr = cpu_logical_map(that_cpu); | ||
180 | ib_cpu = MPIDR_AFFINITY_LEVEL(ib_mpidr, 0); | ||
181 | ib_cluster = MPIDR_AFFINITY_LEVEL(ib_mpidr, 1); | ||
182 | |||
183 | pr_debug("before switch: CPU %d MPIDR %#x -> %#x\n", | ||
184 | this_cpu, ob_mpidr, ib_mpidr); | ||
185 | |||
186 | this_cpu = smp_processor_id(); | ||
187 | |||
188 | /* Close the gate for our entry vectors */ | ||
189 | mcpm_set_entry_vector(ob_cpu, ob_cluster, NULL); | ||
190 | mcpm_set_entry_vector(ib_cpu, ib_cluster, NULL); | ||
191 | |||
192 | /* Install our "inbound alive" notifier. */ | ||
193 | init_completion(&inbound_alive); | ||
194 | ipi_nr = register_ipi_completion(&inbound_alive, this_cpu); | ||
195 | ipi_nr |= ((1 << 16) << bL_gic_id[ob_cpu][ob_cluster]); | ||
196 | mcpm_set_early_poke(ib_cpu, ib_cluster, gic_get_sgir_physaddr(), ipi_nr); | ||
197 | |||
198 | /* | ||
199 | * Let's wake up the inbound CPU now in case it requires some delay | ||
200 | * to come online, but leave it gated in our entry vector code. | ||
201 | */ | ||
202 | ret = mcpm_cpu_power_up(ib_cpu, ib_cluster); | ||
203 | if (ret) { | ||
204 | pr_err("%s: mcpm_cpu_power_up() returned %d\n", __func__, ret); | ||
205 | return ret; | ||
206 | } | ||
207 | |||
208 | /* | ||
209 | * Raise a SGI on the inbound CPU to make sure it doesn't stall | ||
210 | * in a possible WFI, such as in bL_power_down(). | ||
211 | */ | ||
212 | gic_send_sgi(bL_gic_id[ib_cpu][ib_cluster], 0); | ||
213 | |||
214 | /* | ||
215 | * Wait for the inbound to come up. This allows for other | ||
216 | * tasks to be scheduled in the mean time. | ||
217 | */ | ||
218 | wait_for_completion(&inbound_alive); | ||
219 | mcpm_set_early_poke(ib_cpu, ib_cluster, 0, 0); | ||
220 | |||
221 | /* | ||
222 | * From this point we are entering the switch critical zone | ||
223 | * and can't take any interrupts anymore. | ||
224 | */ | ||
225 | local_irq_disable(); | ||
226 | local_fiq_disable(); | ||
227 | trace_cpu_migrate_begin(get_ns(), ob_mpidr); | ||
228 | |||
229 | /* redirect GIC's SGIs to our counterpart */ | ||
230 | gic_migrate_target(bL_gic_id[ib_cpu][ib_cluster]); | ||
231 | |||
232 | tdev = tick_get_device(this_cpu); | ||
233 | if (tdev && !cpumask_equal(tdev->evtdev->cpumask, cpumask_of(this_cpu))) | ||
234 | tdev = NULL; | ||
235 | if (tdev) { | ||
236 | tdev_mode = tdev->evtdev->mode; | ||
237 | clockevents_set_mode(tdev->evtdev, CLOCK_EVT_MODE_SHUTDOWN); | ||
238 | } | ||
239 | |||
240 | ret = cpu_pm_enter(); | ||
241 | |||
242 | /* we can not tolerate errors at this point */ | ||
243 | if (ret) | ||
244 | panic("%s: cpu_pm_enter() returned %d\n", __func__, ret); | ||
245 | |||
246 | /* Swap the physical CPUs in the logical map for this logical CPU. */ | ||
247 | cpu_logical_map(this_cpu) = ib_mpidr; | ||
248 | cpu_logical_map(that_cpu) = ob_mpidr; | ||
249 | |||
250 | /* Let's do the actual CPU switch. */ | ||
251 | ret = cpu_suspend((unsigned long)&handshake_ptr, bL_switchpoint); | ||
252 | if (ret > 0) | ||
253 | panic("%s: cpu_suspend() returned %d\n", __func__, ret); | ||
254 | |||
255 | /* We are executing on the inbound CPU at this point */ | ||
256 | mpidr = read_mpidr(); | ||
257 | pr_debug("after switch: CPU %d MPIDR %#x\n", this_cpu, mpidr); | ||
258 | BUG_ON(mpidr != ib_mpidr); | ||
259 | |||
260 | mcpm_cpu_powered_up(); | ||
261 | |||
262 | ret = cpu_pm_exit(); | ||
263 | |||
264 | if (tdev) { | ||
265 | clockevents_set_mode(tdev->evtdev, tdev_mode); | ||
266 | clockevents_program_event(tdev->evtdev, | ||
267 | tdev->evtdev->next_event, 1); | ||
268 | } | ||
269 | |||
270 | trace_cpu_migrate_finish(get_ns(), ib_mpidr); | ||
271 | local_fiq_enable(); | ||
272 | local_irq_enable(); | ||
273 | |||
274 | *handshake_ptr = 1; | ||
275 | dsb_sev(); | ||
276 | |||
277 | if (ret) | ||
278 | pr_err("%s exiting with error %d\n", __func__, ret); | ||
279 | return ret; | ||
280 | } | ||
281 | |||
282 | struct bL_thread { | ||
283 | spinlock_t lock; | ||
284 | struct task_struct *task; | ||
285 | wait_queue_head_t wq; | ||
286 | int wanted_cluster; | ||
287 | struct completion started; | ||
288 | bL_switch_completion_handler completer; | ||
289 | void *completer_cookie; | ||
290 | }; | ||
291 | |||
292 | static struct bL_thread bL_threads[NR_CPUS]; | ||
293 | |||
294 | static int bL_switcher_thread(void *arg) | ||
295 | { | ||
296 | struct bL_thread *t = arg; | ||
297 | struct sched_param param = { .sched_priority = 1 }; | ||
298 | int cluster; | ||
299 | bL_switch_completion_handler completer; | ||
300 | void *completer_cookie; | ||
301 | |||
302 | sched_setscheduler_nocheck(current, SCHED_FIFO, ¶m); | ||
303 | complete(&t->started); | ||
304 | |||
305 | do { | ||
306 | if (signal_pending(current)) | ||
307 | flush_signals(current); | ||
308 | wait_event_interruptible(t->wq, | ||
309 | t->wanted_cluster != -1 || | ||
310 | kthread_should_stop()); | ||
311 | |||
312 | spin_lock(&t->lock); | ||
313 | cluster = t->wanted_cluster; | ||
314 | completer = t->completer; | ||
315 | completer_cookie = t->completer_cookie; | ||
316 | t->wanted_cluster = -1; | ||
317 | t->completer = NULL; | ||
318 | spin_unlock(&t->lock); | ||
319 | |||
320 | if (cluster != -1) { | ||
321 | bL_switch_to(cluster); | ||
322 | |||
323 | if (completer) | ||
324 | completer(completer_cookie); | ||
325 | } | ||
326 | } while (!kthread_should_stop()); | ||
327 | |||
328 | return 0; | ||
329 | } | ||
330 | |||
331 | static struct task_struct *bL_switcher_thread_create(int cpu, void *arg) | ||
332 | { | ||
333 | struct task_struct *task; | ||
334 | |||
335 | task = kthread_create_on_node(bL_switcher_thread, arg, | ||
336 | cpu_to_node(cpu), "kswitcher_%d", cpu); | ||
337 | if (!IS_ERR(task)) { | ||
338 | kthread_bind(task, cpu); | ||
339 | wake_up_process(task); | ||
340 | } else | ||
341 | pr_err("%s failed for CPU %d\n", __func__, cpu); | ||
342 | return task; | ||
343 | } | ||
344 | |||
345 | /* | ||
346 | * bL_switch_request_cb - Switch to a specific cluster for the given CPU, | ||
347 | * with completion notification via a callback | ||
348 | * | ||
349 | * @cpu: the CPU to switch | ||
350 | * @new_cluster_id: the ID of the cluster to switch to. | ||
351 | * @completer: switch completion callback. if non-NULL, | ||
352 | * @completer(@completer_cookie) will be called on completion of | ||
353 | * the switch, in non-atomic context. | ||
354 | * @completer_cookie: opaque context argument for @completer. | ||
355 | * | ||
356 | * This function causes a cluster switch on the given CPU by waking up | ||
357 | * the appropriate switcher thread. This function may or may not return | ||
358 | * before the switch has occurred. | ||
359 | * | ||
360 | * If a @completer callback function is supplied, it will be called when | ||
361 | * the switch is complete. This can be used to determine asynchronously | ||
362 | * when the switch is complete, regardless of when bL_switch_request() | ||
363 | * returns. When @completer is supplied, no new switch request is permitted | ||
364 | * for the affected CPU until after the switch is complete, and @completer | ||
365 | * has returned. | ||
366 | */ | ||
367 | int bL_switch_request_cb(unsigned int cpu, unsigned int new_cluster_id, | ||
368 | bL_switch_completion_handler completer, | ||
369 | void *completer_cookie) | ||
370 | { | ||
371 | struct bL_thread *t; | ||
372 | |||
373 | if (cpu >= ARRAY_SIZE(bL_threads)) { | ||
374 | pr_err("%s: cpu %d out of bounds\n", __func__, cpu); | ||
375 | return -EINVAL; | ||
376 | } | ||
377 | |||
378 | t = &bL_threads[cpu]; | ||
379 | |||
380 | if (IS_ERR(t->task)) | ||
381 | return PTR_ERR(t->task); | ||
382 | if (!t->task) | ||
383 | return -ESRCH; | ||
384 | |||
385 | spin_lock(&t->lock); | ||
386 | if (t->completer) { | ||
387 | spin_unlock(&t->lock); | ||
388 | return -EBUSY; | ||
389 | } | ||
390 | t->completer = completer; | ||
391 | t->completer_cookie = completer_cookie; | ||
392 | t->wanted_cluster = new_cluster_id; | ||
393 | spin_unlock(&t->lock); | ||
394 | wake_up(&t->wq); | ||
395 | return 0; | ||
396 | } | ||
397 | EXPORT_SYMBOL_GPL(bL_switch_request_cb); | ||
398 | |||
399 | /* | ||
400 | * Activation and configuration code. | ||
401 | */ | ||
402 | |||
403 | static DEFINE_MUTEX(bL_switcher_activation_lock); | ||
404 | static BLOCKING_NOTIFIER_HEAD(bL_activation_notifier); | ||
405 | static unsigned int bL_switcher_active; | ||
406 | static unsigned int bL_switcher_cpu_original_cluster[NR_CPUS]; | ||
407 | static cpumask_t bL_switcher_removed_logical_cpus; | ||
408 | |||
409 | int bL_switcher_register_notifier(struct notifier_block *nb) | ||
410 | { | ||
411 | return blocking_notifier_chain_register(&bL_activation_notifier, nb); | ||
412 | } | ||
413 | EXPORT_SYMBOL_GPL(bL_switcher_register_notifier); | ||
414 | |||
415 | int bL_switcher_unregister_notifier(struct notifier_block *nb) | ||
416 | { | ||
417 | return blocking_notifier_chain_unregister(&bL_activation_notifier, nb); | ||
418 | } | ||
419 | EXPORT_SYMBOL_GPL(bL_switcher_unregister_notifier); | ||
420 | |||
421 | static int bL_activation_notify(unsigned long val) | ||
422 | { | ||
423 | int ret; | ||
424 | |||
425 | ret = blocking_notifier_call_chain(&bL_activation_notifier, val, NULL); | ||
426 | if (ret & NOTIFY_STOP_MASK) | ||
427 | pr_err("%s: notifier chain failed with status 0x%x\n", | ||
428 | __func__, ret); | ||
429 | return notifier_to_errno(ret); | ||
430 | } | ||
431 | |||
432 | static void bL_switcher_restore_cpus(void) | ||
433 | { | ||
434 | int i; | ||
435 | |||
436 | for_each_cpu(i, &bL_switcher_removed_logical_cpus) | ||
437 | cpu_up(i); | ||
438 | } | ||
439 | |||
440 | static int bL_switcher_halve_cpus(void) | ||
441 | { | ||
442 | int i, j, cluster_0, gic_id, ret; | ||
443 | unsigned int cpu, cluster, mask; | ||
444 | cpumask_t available_cpus; | ||
445 | |||
446 | /* First pass to validate what we have */ | ||
447 | mask = 0; | ||
448 | for_each_online_cpu(i) { | ||
449 | cpu = MPIDR_AFFINITY_LEVEL(cpu_logical_map(i), 0); | ||
450 | cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(i), 1); | ||
451 | if (cluster >= 2) { | ||
452 | pr_err("%s: only dual cluster systems are supported\n", __func__); | ||
453 | return -EINVAL; | ||
454 | } | ||
455 | if (WARN_ON(cpu >= MAX_CPUS_PER_CLUSTER)) | ||
456 | return -EINVAL; | ||
457 | mask |= (1 << cluster); | ||
458 | } | ||
459 | if (mask != 3) { | ||
460 | pr_err("%s: no CPU pairing possible\n", __func__); | ||
461 | return -EINVAL; | ||
462 | } | ||
463 | |||
464 | /* | ||
465 | * Now let's do the pairing. We match each CPU with another CPU | ||
466 | * from a different cluster. To get a uniform scheduling behavior | ||
467 | * without fiddling with CPU topology and compute capacity data, | ||
468 | * we'll use logical CPUs initially belonging to the same cluster. | ||
469 | */ | ||
470 | memset(bL_switcher_cpu_pairing, -1, sizeof(bL_switcher_cpu_pairing)); | ||
471 | cpumask_copy(&available_cpus, cpu_online_mask); | ||
472 | cluster_0 = -1; | ||
473 | for_each_cpu(i, &available_cpus) { | ||
474 | int match = -1; | ||
475 | cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(i), 1); | ||
476 | if (cluster_0 == -1) | ||
477 | cluster_0 = cluster; | ||
478 | if (cluster != cluster_0) | ||
479 | continue; | ||
480 | cpumask_clear_cpu(i, &available_cpus); | ||
481 | for_each_cpu(j, &available_cpus) { | ||
482 | cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(j), 1); | ||
483 | /* | ||
484 | * Let's remember the last match to create "odd" | ||
485 | * pairings on purpose in order for other code not | ||
486 | * to assume any relation between physical and | ||
487 | * logical CPU numbers. | ||
488 | */ | ||
489 | if (cluster != cluster_0) | ||
490 | match = j; | ||
491 | } | ||
492 | if (match != -1) { | ||
493 | bL_switcher_cpu_pairing[i] = match; | ||
494 | cpumask_clear_cpu(match, &available_cpus); | ||
495 | pr_info("CPU%d paired with CPU%d\n", i, match); | ||
496 | } | ||
497 | } | ||
498 | |||
499 | /* | ||
500 | * Now we disable the unwanted CPUs i.e. everything that has no | ||
501 | * pairing information (that includes the pairing counterparts). | ||
502 | */ | ||
503 | cpumask_clear(&bL_switcher_removed_logical_cpus); | ||
504 | for_each_online_cpu(i) { | ||
505 | cpu = MPIDR_AFFINITY_LEVEL(cpu_logical_map(i), 0); | ||
506 | cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(i), 1); | ||
507 | |||
508 | /* Let's take note of the GIC ID for this CPU */ | ||
509 | gic_id = gic_get_cpu_id(i); | ||
510 | if (gic_id < 0) { | ||
511 | pr_err("%s: bad GIC ID for CPU %d\n", __func__, i); | ||
512 | bL_switcher_restore_cpus(); | ||
513 | return -EINVAL; | ||
514 | } | ||
515 | bL_gic_id[cpu][cluster] = gic_id; | ||
516 | pr_info("GIC ID for CPU %u cluster %u is %u\n", | ||
517 | cpu, cluster, gic_id); | ||
518 | |||
519 | if (bL_switcher_cpu_pairing[i] != -1) { | ||
520 | bL_switcher_cpu_original_cluster[i] = cluster; | ||
521 | continue; | ||
522 | } | ||
523 | |||
524 | ret = cpu_down(i); | ||
525 | if (ret) { | ||
526 | bL_switcher_restore_cpus(); | ||
527 | return ret; | ||
528 | } | ||
529 | cpumask_set_cpu(i, &bL_switcher_removed_logical_cpus); | ||
530 | } | ||
531 | |||
532 | return 0; | ||
533 | } | ||
534 | |||
535 | /* Determine the logical CPU a given physical CPU is grouped on. */ | ||
536 | int bL_switcher_get_logical_index(u32 mpidr) | ||
537 | { | ||
538 | int cpu; | ||
539 | |||
540 | if (!bL_switcher_active) | ||
541 | return -EUNATCH; | ||
542 | |||
543 | mpidr &= MPIDR_HWID_BITMASK; | ||
544 | for_each_online_cpu(cpu) { | ||
545 | int pairing = bL_switcher_cpu_pairing[cpu]; | ||
546 | if (pairing == -1) | ||
547 | continue; | ||
548 | if ((mpidr == cpu_logical_map(cpu)) || | ||
549 | (mpidr == cpu_logical_map(pairing))) | ||
550 | return cpu; | ||
551 | } | ||
552 | return -EINVAL; | ||
553 | } | ||
554 | |||
555 | static void bL_switcher_trace_trigger_cpu(void *__always_unused info) | ||
556 | { | ||
557 | trace_cpu_migrate_current(get_ns(), read_mpidr()); | ||
558 | } | ||
559 | |||
560 | int bL_switcher_trace_trigger(void) | ||
561 | { | ||
562 | int ret; | ||
563 | |||
564 | preempt_disable(); | ||
565 | |||
566 | bL_switcher_trace_trigger_cpu(NULL); | ||
567 | ret = smp_call_function(bL_switcher_trace_trigger_cpu, NULL, true); | ||
568 | |||
569 | preempt_enable(); | ||
570 | |||
571 | return ret; | ||
572 | } | ||
573 | EXPORT_SYMBOL_GPL(bL_switcher_trace_trigger); | ||
574 | |||
575 | static int bL_switcher_enable(void) | ||
576 | { | ||
577 | int cpu, ret; | ||
578 | |||
579 | mutex_lock(&bL_switcher_activation_lock); | ||
580 | lock_device_hotplug(); | ||
581 | if (bL_switcher_active) { | ||
582 | unlock_device_hotplug(); | ||
583 | mutex_unlock(&bL_switcher_activation_lock); | ||
584 | return 0; | ||
585 | } | ||
586 | |||
587 | pr_info("big.LITTLE switcher initializing\n"); | ||
588 | |||
589 | ret = bL_activation_notify(BL_NOTIFY_PRE_ENABLE); | ||
590 | if (ret) | ||
591 | goto error; | ||
592 | |||
593 | ret = bL_switcher_halve_cpus(); | ||
594 | if (ret) | ||
595 | goto error; | ||
596 | |||
597 | bL_switcher_trace_trigger(); | ||
598 | |||
599 | for_each_online_cpu(cpu) { | ||
600 | struct bL_thread *t = &bL_threads[cpu]; | ||
601 | spin_lock_init(&t->lock); | ||
602 | init_waitqueue_head(&t->wq); | ||
603 | init_completion(&t->started); | ||
604 | t->wanted_cluster = -1; | ||
605 | t->task = bL_switcher_thread_create(cpu, t); | ||
606 | } | ||
607 | |||
608 | bL_switcher_active = 1; | ||
609 | bL_activation_notify(BL_NOTIFY_POST_ENABLE); | ||
610 | pr_info("big.LITTLE switcher initialized\n"); | ||
611 | goto out; | ||
612 | |||
613 | error: | ||
614 | pr_warn("big.LITTLE switcher initialization failed\n"); | ||
615 | bL_activation_notify(BL_NOTIFY_POST_DISABLE); | ||
616 | |||
617 | out: | ||
618 | unlock_device_hotplug(); | ||
619 | mutex_unlock(&bL_switcher_activation_lock); | ||
620 | return ret; | ||
621 | } | ||
622 | |||
623 | #ifdef CONFIG_SYSFS | ||
624 | |||
625 | static void bL_switcher_disable(void) | ||
626 | { | ||
627 | unsigned int cpu, cluster; | ||
628 | struct bL_thread *t; | ||
629 | struct task_struct *task; | ||
630 | |||
631 | mutex_lock(&bL_switcher_activation_lock); | ||
632 | lock_device_hotplug(); | ||
633 | |||
634 | if (!bL_switcher_active) | ||
635 | goto out; | ||
636 | |||
637 | if (bL_activation_notify(BL_NOTIFY_PRE_DISABLE) != 0) { | ||
638 | bL_activation_notify(BL_NOTIFY_POST_ENABLE); | ||
639 | goto out; | ||
640 | } | ||
641 | |||
642 | bL_switcher_active = 0; | ||
643 | |||
644 | /* | ||
645 | * To deactivate the switcher, we must shut down the switcher | ||
646 | * threads to prevent any other requests from being accepted. | ||
647 | * Then, if the final cluster for given logical CPU is not the | ||
648 | * same as the original one, we'll recreate a switcher thread | ||
649 | * just for the purpose of switching the CPU back without any | ||
650 | * possibility for interference from external requests. | ||
651 | */ | ||
652 | for_each_online_cpu(cpu) { | ||
653 | t = &bL_threads[cpu]; | ||
654 | task = t->task; | ||
655 | t->task = NULL; | ||
656 | if (!task || IS_ERR(task)) | ||
657 | continue; | ||
658 | kthread_stop(task); | ||
659 | /* no more switch may happen on this CPU at this point */ | ||
660 | cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(cpu), 1); | ||
661 | if (cluster == bL_switcher_cpu_original_cluster[cpu]) | ||
662 | continue; | ||
663 | init_completion(&t->started); | ||
664 | t->wanted_cluster = bL_switcher_cpu_original_cluster[cpu]; | ||
665 | task = bL_switcher_thread_create(cpu, t); | ||
666 | if (!IS_ERR(task)) { | ||
667 | wait_for_completion(&t->started); | ||
668 | kthread_stop(task); | ||
669 | cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(cpu), 1); | ||
670 | if (cluster == bL_switcher_cpu_original_cluster[cpu]) | ||
671 | continue; | ||
672 | } | ||
673 | /* If execution gets here, we're in trouble. */ | ||
674 | pr_crit("%s: unable to restore original cluster for CPU %d\n", | ||
675 | __func__, cpu); | ||
676 | pr_crit("%s: CPU %d can't be restored\n", | ||
677 | __func__, bL_switcher_cpu_pairing[cpu]); | ||
678 | cpumask_clear_cpu(bL_switcher_cpu_pairing[cpu], | ||
679 | &bL_switcher_removed_logical_cpus); | ||
680 | } | ||
681 | |||
682 | bL_switcher_restore_cpus(); | ||
683 | bL_switcher_trace_trigger(); | ||
684 | |||
685 | bL_activation_notify(BL_NOTIFY_POST_DISABLE); | ||
686 | |||
687 | out: | ||
688 | unlock_device_hotplug(); | ||
689 | mutex_unlock(&bL_switcher_activation_lock); | ||
690 | } | ||
691 | |||
692 | static ssize_t bL_switcher_active_show(struct kobject *kobj, | ||
693 | struct kobj_attribute *attr, char *buf) | ||
694 | { | ||
695 | return sprintf(buf, "%u\n", bL_switcher_active); | ||
696 | } | ||
697 | |||
698 | static ssize_t bL_switcher_active_store(struct kobject *kobj, | ||
699 | struct kobj_attribute *attr, const char *buf, size_t count) | ||
700 | { | ||
701 | int ret; | ||
702 | |||
703 | switch (buf[0]) { | ||
704 | case '0': | ||
705 | bL_switcher_disable(); | ||
706 | ret = 0; | ||
707 | break; | ||
708 | case '1': | ||
709 | ret = bL_switcher_enable(); | ||
710 | break; | ||
711 | default: | ||
712 | ret = -EINVAL; | ||
713 | } | ||
714 | |||
715 | return (ret >= 0) ? count : ret; | ||
716 | } | ||
717 | |||
718 | static ssize_t bL_switcher_trace_trigger_store(struct kobject *kobj, | ||
719 | struct kobj_attribute *attr, const char *buf, size_t count) | ||
720 | { | ||
721 | int ret = bL_switcher_trace_trigger(); | ||
722 | |||
723 | return ret ? ret : count; | ||
724 | } | ||
725 | |||
726 | static struct kobj_attribute bL_switcher_active_attr = | ||
727 | __ATTR(active, 0644, bL_switcher_active_show, bL_switcher_active_store); | ||
728 | |||
729 | static struct kobj_attribute bL_switcher_trace_trigger_attr = | ||
730 | __ATTR(trace_trigger, 0200, NULL, bL_switcher_trace_trigger_store); | ||
731 | |||
732 | static struct attribute *bL_switcher_attrs[] = { | ||
733 | &bL_switcher_active_attr.attr, | ||
734 | &bL_switcher_trace_trigger_attr.attr, | ||
735 | NULL, | ||
736 | }; | ||
737 | |||
738 | static struct attribute_group bL_switcher_attr_group = { | ||
739 | .attrs = bL_switcher_attrs, | ||
740 | }; | ||
741 | |||
742 | static struct kobject *bL_switcher_kobj; | ||
743 | |||
744 | static int __init bL_switcher_sysfs_init(void) | ||
745 | { | ||
746 | int ret; | ||
747 | |||
748 | bL_switcher_kobj = kobject_create_and_add("bL_switcher", kernel_kobj); | ||
749 | if (!bL_switcher_kobj) | ||
750 | return -ENOMEM; | ||
751 | ret = sysfs_create_group(bL_switcher_kobj, &bL_switcher_attr_group); | ||
752 | if (ret) | ||
753 | kobject_put(bL_switcher_kobj); | ||
754 | return ret; | ||
755 | } | ||
756 | |||
757 | #endif /* CONFIG_SYSFS */ | ||
758 | |||
759 | bool bL_switcher_get_enabled(void) | ||
760 | { | ||
761 | mutex_lock(&bL_switcher_activation_lock); | ||
762 | |||
763 | return bL_switcher_active; | ||
764 | } | ||
765 | EXPORT_SYMBOL_GPL(bL_switcher_get_enabled); | ||
766 | |||
767 | void bL_switcher_put_enabled(void) | ||
768 | { | ||
769 | mutex_unlock(&bL_switcher_activation_lock); | ||
770 | } | ||
771 | EXPORT_SYMBOL_GPL(bL_switcher_put_enabled); | ||
772 | |||
773 | /* | ||
774 | * Veto any CPU hotplug operation on those CPUs we've removed | ||
775 | * while the switcher is active. | ||
776 | * We're just not ready to deal with that given the trickery involved. | ||
777 | */ | ||
778 | static int bL_switcher_hotplug_callback(struct notifier_block *nfb, | ||
779 | unsigned long action, void *hcpu) | ||
780 | { | ||
781 | if (bL_switcher_active) { | ||
782 | int pairing = bL_switcher_cpu_pairing[(unsigned long)hcpu]; | ||
783 | switch (action & 0xf) { | ||
784 | case CPU_UP_PREPARE: | ||
785 | case CPU_DOWN_PREPARE: | ||
786 | if (pairing == -1) | ||
787 | return NOTIFY_BAD; | ||
788 | } | ||
789 | } | ||
790 | return NOTIFY_DONE; | ||
791 | } | ||
792 | |||
793 | static bool no_bL_switcher; | ||
794 | core_param(no_bL_switcher, no_bL_switcher, bool, 0644); | ||
795 | |||
796 | static int __init bL_switcher_init(void) | ||
797 | { | ||
798 | int ret; | ||
799 | |||
800 | if (MAX_NR_CLUSTERS != 2) { | ||
801 | pr_err("%s: only dual cluster systems are supported\n", __func__); | ||
802 | return -EINVAL; | ||
803 | } | ||
804 | |||
805 | cpu_notifier(bL_switcher_hotplug_callback, 0); | ||
806 | |||
807 | if (!no_bL_switcher) { | ||
808 | ret = bL_switcher_enable(); | ||
809 | if (ret) | ||
810 | return ret; | ||
811 | } | ||
812 | |||
813 | #ifdef CONFIG_SYSFS | ||
814 | ret = bL_switcher_sysfs_init(); | ||
815 | if (ret) | ||
816 | pr_err("%s: unable to create sysfs entry\n", __func__); | ||
817 | #endif | ||
818 | |||
819 | return 0; | ||
820 | } | ||
821 | |||
822 | late_initcall(bL_switcher_init); | ||
diff --git a/arch/arm/common/bL_switcher_dummy_if.c b/arch/arm/common/bL_switcher_dummy_if.c new file mode 100644 index 000000000000..3f47f1203c6b --- /dev/null +++ b/arch/arm/common/bL_switcher_dummy_if.c | |||
@@ -0,0 +1,71 @@ | |||
1 | /* | ||
2 | * arch/arm/common/bL_switcher_dummy_if.c -- b.L switcher dummy interface | ||
3 | * | ||
4 | * Created by: Nicolas Pitre, November 2012 | ||
5 | * Copyright: (C) 2012-2013 Linaro Limited | ||
6 | * | ||
7 | * Dummy interface to user space for debugging purpose only. | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or modify | ||
10 | * it under the terms of the GNU General Public License version 2 as | ||
11 | * published by the Free Software Foundation. | ||
12 | */ | ||
13 | |||
14 | #include <linux/init.h> | ||
15 | #include <linux/module.h> | ||
16 | #include <linux/fs.h> | ||
17 | #include <linux/miscdevice.h> | ||
18 | #include <asm/uaccess.h> | ||
19 | #include <asm/bL_switcher.h> | ||
20 | |||
21 | static ssize_t bL_switcher_write(struct file *file, const char __user *buf, | ||
22 | size_t len, loff_t *pos) | ||
23 | { | ||
24 | unsigned char val[3]; | ||
25 | unsigned int cpu, cluster; | ||
26 | int ret; | ||
27 | |||
28 | pr_debug("%s\n", __func__); | ||
29 | |||
30 | if (len < 3) | ||
31 | return -EINVAL; | ||
32 | |||
33 | if (copy_from_user(val, buf, 3)) | ||
34 | return -EFAULT; | ||
35 | |||
36 | /* format: <cpu#>,<cluster#> */ | ||
37 | if (val[0] < '0' || val[0] > '9' || | ||
38 | val[1] != ',' || | ||
39 | val[2] < '0' || val[2] > '1') | ||
40 | return -EINVAL; | ||
41 | |||
42 | cpu = val[0] - '0'; | ||
43 | cluster = val[2] - '0'; | ||
44 | ret = bL_switch_request(cpu, cluster); | ||
45 | |||
46 | return ret ? : len; | ||
47 | } | ||
48 | |||
49 | static const struct file_operations bL_switcher_fops = { | ||
50 | .write = bL_switcher_write, | ||
51 | .owner = THIS_MODULE, | ||
52 | }; | ||
53 | |||
54 | static struct miscdevice bL_switcher_device = { | ||
55 | MISC_DYNAMIC_MINOR, | ||
56 | "b.L_switcher", | ||
57 | &bL_switcher_fops | ||
58 | }; | ||
59 | |||
60 | static int __init bL_switcher_dummy_if_init(void) | ||
61 | { | ||
62 | return misc_register(&bL_switcher_device); | ||
63 | } | ||
64 | |||
65 | static void __exit bL_switcher_dummy_if_exit(void) | ||
66 | { | ||
67 | misc_deregister(&bL_switcher_device); | ||
68 | } | ||
69 | |||
70 | module_init(bL_switcher_dummy_if_init); | ||
71 | module_exit(bL_switcher_dummy_if_exit); | ||
diff --git a/arch/arm/common/mcpm_entry.c b/arch/arm/common/mcpm_entry.c index 990250965f2c..26020a03f659 100644 --- a/arch/arm/common/mcpm_entry.c +++ b/arch/arm/common/mcpm_entry.c | |||
@@ -27,6 +27,18 @@ void mcpm_set_entry_vector(unsigned cpu, unsigned cluster, void *ptr) | |||
27 | sync_cache_w(&mcpm_entry_vectors[cluster][cpu]); | 27 | sync_cache_w(&mcpm_entry_vectors[cluster][cpu]); |
28 | } | 28 | } |
29 | 29 | ||
30 | extern unsigned long mcpm_entry_early_pokes[MAX_NR_CLUSTERS][MAX_CPUS_PER_CLUSTER][2]; | ||
31 | |||
32 | void mcpm_set_early_poke(unsigned cpu, unsigned cluster, | ||
33 | unsigned long poke_phys_addr, unsigned long poke_val) | ||
34 | { | ||
35 | unsigned long *poke = &mcpm_entry_early_pokes[cluster][cpu][0]; | ||
36 | poke[0] = poke_phys_addr; | ||
37 | poke[1] = poke_val; | ||
38 | __cpuc_flush_dcache_area((void *)poke, 8); | ||
39 | outer_clean_range(__pa(poke), __pa(poke + 2)); | ||
40 | } | ||
41 | |||
30 | static const struct mcpm_platform_ops *platform_ops; | 42 | static const struct mcpm_platform_ops *platform_ops; |
31 | 43 | ||
32 | int __init mcpm_platform_register(const struct mcpm_platform_ops *ops) | 44 | int __init mcpm_platform_register(const struct mcpm_platform_ops *ops) |
@@ -90,6 +102,21 @@ void mcpm_cpu_power_down(void) | |||
90 | BUG(); | 102 | BUG(); |
91 | } | 103 | } |
92 | 104 | ||
105 | int mcpm_cpu_power_down_finish(unsigned int cpu, unsigned int cluster) | ||
106 | { | ||
107 | int ret; | ||
108 | |||
109 | if (WARN_ON_ONCE(!platform_ops || !platform_ops->power_down_finish)) | ||
110 | return -EUNATCH; | ||
111 | |||
112 | ret = platform_ops->power_down_finish(cpu, cluster); | ||
113 | if (ret) | ||
114 | pr_warn("%s: cpu %u, cluster %u failed to power down (%d)\n", | ||
115 | __func__, cpu, cluster, ret); | ||
116 | |||
117 | return ret; | ||
118 | } | ||
119 | |||
93 | void mcpm_cpu_suspend(u64 expected_residency) | 120 | void mcpm_cpu_suspend(u64 expected_residency) |
94 | { | 121 | { |
95 | phys_reset_t phys_reset; | 122 | phys_reset_t phys_reset; |
diff --git a/arch/arm/common/mcpm_head.S b/arch/arm/common/mcpm_head.S index 39c96df3477a..e02db4b81a66 100644 --- a/arch/arm/common/mcpm_head.S +++ b/arch/arm/common/mcpm_head.S | |||
@@ -15,6 +15,7 @@ | |||
15 | 15 | ||
16 | #include <linux/linkage.h> | 16 | #include <linux/linkage.h> |
17 | #include <asm/mcpm.h> | 17 | #include <asm/mcpm.h> |
18 | #include <asm/assembler.h> | ||
18 | 19 | ||
19 | #include "vlock.h" | 20 | #include "vlock.h" |
20 | 21 | ||
@@ -47,6 +48,7 @@ | |||
47 | 48 | ||
48 | ENTRY(mcpm_entry_point) | 49 | ENTRY(mcpm_entry_point) |
49 | 50 | ||
51 | ARM_BE8(setend be) | ||
50 | THUMB( adr r12, BSYM(1f) ) | 52 | THUMB( adr r12, BSYM(1f) ) |
51 | THUMB( bx r12 ) | 53 | THUMB( bx r12 ) |
52 | THUMB( .thumb ) | 54 | THUMB( .thumb ) |
@@ -71,12 +73,19 @@ ENTRY(mcpm_entry_point) | |||
71 | * position independent way. | 73 | * position independent way. |
72 | */ | 74 | */ |
73 | adr r5, 3f | 75 | adr r5, 3f |
74 | ldmia r5, {r6, r7, r8, r11} | 76 | ldmia r5, {r0, r6, r7, r8, r11} |
77 | add r0, r5, r0 @ r0 = mcpm_entry_early_pokes | ||
75 | add r6, r5, r6 @ r6 = mcpm_entry_vectors | 78 | add r6, r5, r6 @ r6 = mcpm_entry_vectors |
76 | ldr r7, [r5, r7] @ r7 = mcpm_power_up_setup_phys | 79 | ldr r7, [r5, r7] @ r7 = mcpm_power_up_setup_phys |
77 | add r8, r5, r8 @ r8 = mcpm_sync | 80 | add r8, r5, r8 @ r8 = mcpm_sync |
78 | add r11, r5, r11 @ r11 = first_man_locks | 81 | add r11, r5, r11 @ r11 = first_man_locks |
79 | 82 | ||
83 | @ Perform an early poke, if any | ||
84 | add r0, r0, r4, lsl #3 | ||
85 | ldmia r0, {r0, r1} | ||
86 | teq r0, #0 | ||
87 | strne r1, [r0] | ||
88 | |||
80 | mov r0, #MCPM_SYNC_CLUSTER_SIZE | 89 | mov r0, #MCPM_SYNC_CLUSTER_SIZE |
81 | mla r8, r0, r10, r8 @ r8 = sync cluster base | 90 | mla r8, r0, r10, r8 @ r8 = sync cluster base |
82 | 91 | ||
@@ -195,7 +204,8 @@ mcpm_entry_gated: | |||
195 | 204 | ||
196 | .align 2 | 205 | .align 2 |
197 | 206 | ||
198 | 3: .word mcpm_entry_vectors - . | 207 | 3: .word mcpm_entry_early_pokes - . |
208 | .word mcpm_entry_vectors - 3b | ||
199 | .word mcpm_power_up_setup_phys - 3b | 209 | .word mcpm_power_up_setup_phys - 3b |
200 | .word mcpm_sync - 3b | 210 | .word mcpm_sync - 3b |
201 | .word first_man_locks - 3b | 211 | .word first_man_locks - 3b |
@@ -214,6 +224,10 @@ first_man_locks: | |||
214 | ENTRY(mcpm_entry_vectors) | 224 | ENTRY(mcpm_entry_vectors) |
215 | .space 4 * MAX_NR_CLUSTERS * MAX_CPUS_PER_CLUSTER | 225 | .space 4 * MAX_NR_CLUSTERS * MAX_CPUS_PER_CLUSTER |
216 | 226 | ||
227 | .type mcpm_entry_early_pokes, #object | ||
228 | ENTRY(mcpm_entry_early_pokes) | ||
229 | .space 8 * MAX_NR_CLUSTERS * MAX_CPUS_PER_CLUSTER | ||
230 | |||
217 | .type mcpm_power_up_setup_phys, #object | 231 | .type mcpm_power_up_setup_phys, #object |
218 | ENTRY(mcpm_power_up_setup_phys) | 232 | ENTRY(mcpm_power_up_setup_phys) |
219 | .space 4 @ set by mcpm_sync_init() | 233 | .space 4 @ set by mcpm_sync_init() |
diff --git a/arch/arm/common/mcpm_platsmp.c b/arch/arm/common/mcpm_platsmp.c index 1bc34c7567fd..177251a4dd9a 100644 --- a/arch/arm/common/mcpm_platsmp.c +++ b/arch/arm/common/mcpm_platsmp.c | |||
@@ -19,14 +19,23 @@ | |||
19 | #include <asm/smp.h> | 19 | #include <asm/smp.h> |
20 | #include <asm/smp_plat.h> | 20 | #include <asm/smp_plat.h> |
21 | 21 | ||
22 | static void cpu_to_pcpu(unsigned int cpu, | ||
23 | unsigned int *pcpu, unsigned int *pcluster) | ||
24 | { | ||
25 | unsigned int mpidr; | ||
26 | |||
27 | mpidr = cpu_logical_map(cpu); | ||
28 | *pcpu = MPIDR_AFFINITY_LEVEL(mpidr, 0); | ||
29 | *pcluster = MPIDR_AFFINITY_LEVEL(mpidr, 1); | ||
30 | } | ||
31 | |||
22 | static int mcpm_boot_secondary(unsigned int cpu, struct task_struct *idle) | 32 | static int mcpm_boot_secondary(unsigned int cpu, struct task_struct *idle) |
23 | { | 33 | { |
24 | unsigned int mpidr, pcpu, pcluster, ret; | 34 | unsigned int pcpu, pcluster, ret; |
25 | extern void secondary_startup(void); | 35 | extern void secondary_startup(void); |
26 | 36 | ||
27 | mpidr = cpu_logical_map(cpu); | 37 | cpu_to_pcpu(cpu, &pcpu, &pcluster); |
28 | pcpu = MPIDR_AFFINITY_LEVEL(mpidr, 0); | 38 | |
29 | pcluster = MPIDR_AFFINITY_LEVEL(mpidr, 1); | ||
30 | pr_debug("%s: logical CPU %d is physical CPU %d cluster %d\n", | 39 | pr_debug("%s: logical CPU %d is physical CPU %d cluster %d\n", |
31 | __func__, cpu, pcpu, pcluster); | 40 | __func__, cpu, pcpu, pcluster); |
32 | 41 | ||
@@ -47,6 +56,15 @@ static void mcpm_secondary_init(unsigned int cpu) | |||
47 | 56 | ||
48 | #ifdef CONFIG_HOTPLUG_CPU | 57 | #ifdef CONFIG_HOTPLUG_CPU |
49 | 58 | ||
59 | static int mcpm_cpu_kill(unsigned int cpu) | ||
60 | { | ||
61 | unsigned int pcpu, pcluster; | ||
62 | |||
63 | cpu_to_pcpu(cpu, &pcpu, &pcluster); | ||
64 | |||
65 | return !mcpm_cpu_power_down_finish(pcpu, pcluster); | ||
66 | } | ||
67 | |||
50 | static int mcpm_cpu_disable(unsigned int cpu) | 68 | static int mcpm_cpu_disable(unsigned int cpu) |
51 | { | 69 | { |
52 | /* | 70 | /* |
@@ -73,6 +91,7 @@ static struct smp_operations __initdata mcpm_smp_ops = { | |||
73 | .smp_boot_secondary = mcpm_boot_secondary, | 91 | .smp_boot_secondary = mcpm_boot_secondary, |
74 | .smp_secondary_init = mcpm_secondary_init, | 92 | .smp_secondary_init = mcpm_secondary_init, |
75 | #ifdef CONFIG_HOTPLUG_CPU | 93 | #ifdef CONFIG_HOTPLUG_CPU |
94 | .cpu_kill = mcpm_cpu_kill, | ||
76 | .cpu_disable = mcpm_cpu_disable, | 95 | .cpu_disable = mcpm_cpu_disable, |
77 | .cpu_die = mcpm_cpu_die, | 96 | .cpu_die = mcpm_cpu_die, |
78 | #endif | 97 | #endif |
diff --git a/arch/arm/common/timer-sp.c b/arch/arm/common/timer-sp.c index e901d0f3e0bb..ce922d0ea7aa 100644 --- a/arch/arm/common/timer-sp.c +++ b/arch/arm/common/timer-sp.c | |||
@@ -175,7 +175,7 @@ static struct clock_event_device sp804_clockevent = { | |||
175 | 175 | ||
176 | static struct irqaction sp804_timer_irq = { | 176 | static struct irqaction sp804_timer_irq = { |
177 | .name = "timer", | 177 | .name = "timer", |
178 | .flags = IRQF_DISABLED | IRQF_TIMER | IRQF_IRQPOLL, | 178 | .flags = IRQF_TIMER | IRQF_IRQPOLL, |
179 | .handler = sp804_timer_interrupt, | 179 | .handler = sp804_timer_interrupt, |
180 | .dev_id = &sp804_clockevent, | 180 | .dev_id = &sp804_clockevent, |
181 | }; | 181 | }; |
diff --git a/arch/arm/configs/h3600_defconfig b/arch/arm/configs/h3600_defconfig index 317960f12488..0142ec37e0be 100644 --- a/arch/arm/configs/h3600_defconfig +++ b/arch/arm/configs/h3600_defconfig | |||
@@ -1,5 +1,6 @@ | |||
1 | CONFIG_EXPERIMENTAL=y | ||
2 | CONFIG_SYSVIPC=y | 1 | CONFIG_SYSVIPC=y |
2 | CONFIG_NO_HZ_IDLE=y | ||
3 | CONFIG_HIGH_RES_TIMERS=y | ||
3 | CONFIG_LOG_BUF_SHIFT=14 | 4 | CONFIG_LOG_BUF_SHIFT=14 |
4 | CONFIG_BLK_DEV_INITRD=y | 5 | CONFIG_BLK_DEV_INITRD=y |
5 | CONFIG_MODULES=y | 6 | CONFIG_MODULES=y |
@@ -11,11 +12,11 @@ CONFIG_ARCH_SA1100=y | |||
11 | CONFIG_SA1100_H3600=y | 12 | CONFIG_SA1100_H3600=y |
12 | CONFIG_PCCARD=y | 13 | CONFIG_PCCARD=y |
13 | CONFIG_PCMCIA_SA1100=y | 14 | CONFIG_PCMCIA_SA1100=y |
15 | CONFIG_PREEMPT=y | ||
14 | CONFIG_ZBOOT_ROM_TEXT=0x0 | 16 | CONFIG_ZBOOT_ROM_TEXT=0x0 |
15 | CONFIG_ZBOOT_ROM_BSS=0x0 | 17 | CONFIG_ZBOOT_ROM_BSS=0x0 |
16 | # CONFIG_CPU_FREQ_STAT is not set | 18 | # CONFIG_CPU_FREQ_STAT is not set |
17 | CONFIG_FPE_NWFPE=y | 19 | CONFIG_FPE_NWFPE=y |
18 | CONFIG_PM=y | ||
19 | CONFIG_NET=y | 20 | CONFIG_NET=y |
20 | CONFIG_UNIX=y | 21 | CONFIG_UNIX=y |
21 | CONFIG_INET=y | 22 | CONFIG_INET=y |
@@ -24,13 +25,10 @@ CONFIG_IRDA=m | |||
24 | CONFIG_IRLAN=m | 25 | CONFIG_IRLAN=m |
25 | CONFIG_IRNET=m | 26 | CONFIG_IRNET=m |
26 | CONFIG_IRCOMM=m | 27 | CONFIG_IRCOMM=m |
27 | CONFIG_SA1100_FIR=m | ||
28 | # CONFIG_WIRELESS is not set | 28 | # CONFIG_WIRELESS is not set |
29 | CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" | 29 | CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" |
30 | CONFIG_MTD=y | 30 | CONFIG_MTD=y |
31 | CONFIG_MTD_PARTITIONS=y | ||
32 | CONFIG_MTD_REDBOOT_PARTS=y | 31 | CONFIG_MTD_REDBOOT_PARTS=y |
33 | CONFIG_MTD_CHAR=y | ||
34 | CONFIG_MTD_BLOCK=y | 32 | CONFIG_MTD_BLOCK=y |
35 | CONFIG_MTD_CFI=y | 33 | CONFIG_MTD_CFI=y |
36 | CONFIG_MTD_CFI_ADV_OPTIONS=y | 34 | CONFIG_MTD_CFI_ADV_OPTIONS=y |
@@ -41,19 +39,15 @@ CONFIG_MTD_SA1100=y | |||
41 | CONFIG_BLK_DEV_LOOP=m | 39 | CONFIG_BLK_DEV_LOOP=m |
42 | CONFIG_BLK_DEV_RAM=y | 40 | CONFIG_BLK_DEV_RAM=y |
43 | CONFIG_BLK_DEV_RAM_SIZE=8192 | 41 | CONFIG_BLK_DEV_RAM_SIZE=8192 |
44 | # CONFIG_MISC_DEVICES is not set | ||
45 | CONFIG_IDE=y | 42 | CONFIG_IDE=y |
46 | CONFIG_BLK_DEV_IDECS=y | 43 | CONFIG_BLK_DEV_IDECS=y |
47 | CONFIG_NETDEVICES=y | 44 | CONFIG_NETDEVICES=y |
48 | # CONFIG_NETDEV_1000 is not set | ||
49 | # CONFIG_NETDEV_10000 is not set | ||
50 | # CONFIG_WLAN is not set | ||
51 | CONFIG_NET_PCMCIA=y | ||
52 | CONFIG_PCMCIA_PCNET=y | 45 | CONFIG_PCMCIA_PCNET=y |
53 | CONFIG_PPP=m | 46 | CONFIG_PPP=m |
54 | CONFIG_PPP_ASYNC=m | ||
55 | CONFIG_PPP_DEFLATE=m | ||
56 | CONFIG_PPP_BSDCOMP=m | 47 | CONFIG_PPP_BSDCOMP=m |
48 | CONFIG_PPP_DEFLATE=m | ||
49 | CONFIG_PPP_ASYNC=m | ||
50 | # CONFIG_WLAN is not set | ||
57 | # CONFIG_KEYBOARD_ATKBD is not set | 51 | # CONFIG_KEYBOARD_ATKBD is not set |
58 | CONFIG_KEYBOARD_GPIO=y | 52 | CONFIG_KEYBOARD_GPIO=y |
59 | # CONFIG_INPUT_MOUSE is not set | 53 | # CONFIG_INPUT_MOUSE is not set |
@@ -64,8 +58,6 @@ CONFIG_SERIAL_SA1100_CONSOLE=y | |||
64 | # CONFIG_HWMON is not set | 58 | # CONFIG_HWMON is not set |
65 | CONFIG_FB=y | 59 | CONFIG_FB=y |
66 | CONFIG_FB_SA1100=y | 60 | CONFIG_FB_SA1100=y |
67 | # CONFIG_VGA_CONSOLE is not set | ||
68 | # CONFIG_HID_SUPPORT is not set | ||
69 | # CONFIG_USB_SUPPORT is not set | 61 | # CONFIG_USB_SUPPORT is not set |
70 | CONFIG_EXT2_FS=y | 62 | CONFIG_EXT2_FS=y |
71 | CONFIG_MSDOS_FS=m | 63 | CONFIG_MSDOS_FS=m |
@@ -74,6 +66,4 @@ CONFIG_JFFS2_FS=y | |||
74 | CONFIG_CRAMFS=m | 66 | CONFIG_CRAMFS=m |
75 | CONFIG_NFS_FS=y | 67 | CONFIG_NFS_FS=y |
76 | CONFIG_NFSD=m | 68 | CONFIG_NFSD=m |
77 | CONFIG_SMB_FS=m | ||
78 | CONFIG_NLS=y | 69 | CONFIG_NLS=y |
79 | # CONFIG_RCU_CPU_STALL_DETECTOR is not set | ||
diff --git a/arch/arm/crypto/.gitignore b/arch/arm/crypto/.gitignore new file mode 100644 index 000000000000..6231d36b3635 --- /dev/null +++ b/arch/arm/crypto/.gitignore | |||
@@ -0,0 +1 @@ | |||
aesbs-core.S | |||
diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile index a2c83851bc90..81cda39860c5 100644 --- a/arch/arm/crypto/Makefile +++ b/arch/arm/crypto/Makefile | |||
@@ -3,7 +3,17 @@ | |||
3 | # | 3 | # |
4 | 4 | ||
5 | obj-$(CONFIG_CRYPTO_AES_ARM) += aes-arm.o | 5 | obj-$(CONFIG_CRYPTO_AES_ARM) += aes-arm.o |
6 | obj-$(CONFIG_CRYPTO_AES_ARM_BS) += aes-arm-bs.o | ||
6 | obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o | 7 | obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o |
7 | 8 | ||
8 | aes-arm-y := aes-armv4.o aes_glue.o | 9 | aes-arm-y := aes-armv4.o aes_glue.o |
9 | sha1-arm-y := sha1-armv4-large.o sha1_glue.o | 10 | aes-arm-bs-y := aesbs-core.o aesbs-glue.o |
11 | sha1-arm-y := sha1-armv4-large.o sha1_glue.o | ||
12 | |||
13 | quiet_cmd_perl = PERL $@ | ||
14 | cmd_perl = $(PERL) $(<) > $(@) | ||
15 | |||
16 | $(src)/aesbs-core.S_shipped: $(src)/bsaes-armv7.pl | ||
17 | $(call cmd,perl) | ||
18 | |||
19 | .PRECIOUS: $(obj)/aesbs-core.S | ||
diff --git a/arch/arm/crypto/aes_glue.c b/arch/arm/crypto/aes_glue.c index 59f7877ead6a..3003fa1f6fb4 100644 --- a/arch/arm/crypto/aes_glue.c +++ b/arch/arm/crypto/aes_glue.c | |||
@@ -6,22 +6,12 @@ | |||
6 | #include <linux/crypto.h> | 6 | #include <linux/crypto.h> |
7 | #include <crypto/aes.h> | 7 | #include <crypto/aes.h> |
8 | 8 | ||
9 | #define AES_MAXNR 14 | 9 | #include "aes_glue.h" |
10 | 10 | ||
11 | typedef struct { | 11 | EXPORT_SYMBOL(AES_encrypt); |
12 | unsigned int rd_key[4 *(AES_MAXNR + 1)]; | 12 | EXPORT_SYMBOL(AES_decrypt); |
13 | int rounds; | 13 | EXPORT_SYMBOL(private_AES_set_encrypt_key); |
14 | } AES_KEY; | 14 | EXPORT_SYMBOL(private_AES_set_decrypt_key); |
15 | |||
16 | struct AES_CTX { | ||
17 | AES_KEY enc_key; | ||
18 | AES_KEY dec_key; | ||
19 | }; | ||
20 | |||
21 | asmlinkage void AES_encrypt(const u8 *in, u8 *out, AES_KEY *ctx); | ||
22 | asmlinkage void AES_decrypt(const u8 *in, u8 *out, AES_KEY *ctx); | ||
23 | asmlinkage int private_AES_set_decrypt_key(const unsigned char *userKey, const int bits, AES_KEY *key); | ||
24 | asmlinkage int private_AES_set_encrypt_key(const unsigned char *userKey, const int bits, AES_KEY *key); | ||
25 | 15 | ||
26 | static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) | 16 | static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) |
27 | { | 17 | { |
@@ -81,7 +71,7 @@ static struct crypto_alg aes_alg = { | |||
81 | .cipher = { | 71 | .cipher = { |
82 | .cia_min_keysize = AES_MIN_KEY_SIZE, | 72 | .cia_min_keysize = AES_MIN_KEY_SIZE, |
83 | .cia_max_keysize = AES_MAX_KEY_SIZE, | 73 | .cia_max_keysize = AES_MAX_KEY_SIZE, |
84 | .cia_setkey = aes_set_key, | 74 | .cia_setkey = aes_set_key, |
85 | .cia_encrypt = aes_encrypt, | 75 | .cia_encrypt = aes_encrypt, |
86 | .cia_decrypt = aes_decrypt | 76 | .cia_decrypt = aes_decrypt |
87 | } | 77 | } |
diff --git a/arch/arm/crypto/aes_glue.h b/arch/arm/crypto/aes_glue.h new file mode 100644 index 000000000000..cca3e51eb606 --- /dev/null +++ b/arch/arm/crypto/aes_glue.h | |||
@@ -0,0 +1,19 @@ | |||
1 | |||
2 | #define AES_MAXNR 14 | ||
3 | |||
4 | struct AES_KEY { | ||
5 | unsigned int rd_key[4 * (AES_MAXNR + 1)]; | ||
6 | int rounds; | ||
7 | }; | ||
8 | |||
9 | struct AES_CTX { | ||
10 | struct AES_KEY enc_key; | ||
11 | struct AES_KEY dec_key; | ||
12 | }; | ||
13 | |||
14 | asmlinkage void AES_encrypt(const u8 *in, u8 *out, struct AES_KEY *ctx); | ||
15 | asmlinkage void AES_decrypt(const u8 *in, u8 *out, struct AES_KEY *ctx); | ||
16 | asmlinkage int private_AES_set_decrypt_key(const unsigned char *userKey, | ||
17 | const int bits, struct AES_KEY *key); | ||
18 | asmlinkage int private_AES_set_encrypt_key(const unsigned char *userKey, | ||
19 | const int bits, struct AES_KEY *key); | ||
diff --git a/arch/arm/crypto/aesbs-core.S_shipped b/arch/arm/crypto/aesbs-core.S_shipped new file mode 100644 index 000000000000..64205d453260 --- /dev/null +++ b/arch/arm/crypto/aesbs-core.S_shipped | |||
@@ -0,0 +1,2544 @@ | |||
1 | |||
2 | @ ==================================================================== | ||
3 | @ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL | ||
4 | @ project. The module is, however, dual licensed under OpenSSL and | ||
5 | @ CRYPTOGAMS licenses depending on where you obtain it. For further | ||
6 | @ details see http://www.openssl.org/~appro/cryptogams/. | ||
7 | @ | ||
8 | @ Specific modes and adaptation for Linux kernel by Ard Biesheuvel | ||
9 | @ <ard.biesheuvel@linaro.org>. Permission to use under GPL terms is | ||
10 | @ granted. | ||
11 | @ ==================================================================== | ||
12 | |||
13 | @ Bit-sliced AES for ARM NEON | ||
14 | @ | ||
15 | @ February 2012. | ||
16 | @ | ||
17 | @ This implementation is direct adaptation of bsaes-x86_64 module for | ||
18 | @ ARM NEON. Except that this module is endian-neutral [in sense that | ||
19 | @ it can be compiled for either endianness] by courtesy of vld1.8's | ||
20 | @ neutrality. Initial version doesn't implement interface to OpenSSL, | ||
21 | @ only low-level primitives and unsupported entry points, just enough | ||
22 | @ to collect performance results, which for Cortex-A8 core are: | ||
23 | @ | ||
24 | @ encrypt 19.5 cycles per byte processed with 128-bit key | ||
25 | @ decrypt 22.1 cycles per byte processed with 128-bit key | ||
26 | @ key conv. 440 cycles per 128-bit key/0.18 of 8x block | ||
27 | @ | ||
28 | @ Snapdragon S4 encrypts byte in 17.6 cycles and decrypts in 19.7, | ||
29 | @ which is [much] worse than anticipated (for further details see | ||
30 | @ http://www.openssl.org/~appro/Snapdragon-S4.html). | ||
31 | @ | ||
32 | @ Cortex-A15 manages in 14.2/16.1 cycles [when integer-only code | ||
33 | @ manages in 20.0 cycles]. | ||
34 | @ | ||
35 | @ When comparing to x86_64 results keep in mind that NEON unit is | ||
36 | @ [mostly] single-issue and thus can't [fully] benefit from | ||
37 | @ instruction-level parallelism. And when comparing to aes-armv4 | ||
38 | @ results keep in mind key schedule conversion overhead (see | ||
39 | @ bsaes-x86_64.pl for further details)... | ||
40 | @ | ||
41 | @ <appro@openssl.org> | ||
42 | |||
43 | @ April-August 2013 | ||
44 | @ | ||
45 | @ Add CBC, CTR and XTS subroutines, adapt for kernel use. | ||
46 | @ | ||
47 | @ <ard.biesheuvel@linaro.org> | ||
48 | |||
49 | #ifndef __KERNEL__ | ||
50 | # include "arm_arch.h" | ||
51 | |||
52 | # define VFP_ABI_PUSH vstmdb sp!,{d8-d15} | ||
53 | # define VFP_ABI_POP vldmia sp!,{d8-d15} | ||
54 | # define VFP_ABI_FRAME 0x40 | ||
55 | #else | ||
56 | # define VFP_ABI_PUSH | ||
57 | # define VFP_ABI_POP | ||
58 | # define VFP_ABI_FRAME 0 | ||
59 | # define BSAES_ASM_EXTENDED_KEY | ||
60 | # define XTS_CHAIN_TWEAK | ||
61 | # define __ARM_ARCH__ __LINUX_ARM_ARCH__ | ||
62 | #endif | ||
63 | |||
64 | #ifdef __thumb__ | ||
65 | # define adrl adr | ||
66 | #endif | ||
67 | |||
68 | #if __ARM_ARCH__>=7 | ||
69 | .text | ||
70 | .syntax unified @ ARMv7-capable assembler is expected to handle this | ||
71 | #ifdef __thumb2__ | ||
72 | .thumb | ||
73 | #else | ||
74 | .code 32 | ||
75 | #endif | ||
76 | |||
77 | .fpu neon | ||
78 | |||
79 | .type _bsaes_decrypt8,%function | ||
80 | .align 4 | ||
81 | _bsaes_decrypt8: | ||
82 | adr r6,_bsaes_decrypt8 | ||
83 | vldmia r4!, {q9} @ round 0 key | ||
84 | add r6,r6,#.LM0ISR-_bsaes_decrypt8 | ||
85 | |||
86 | vldmia r6!, {q8} @ .LM0ISR | ||
87 | veor q10, q0, q9 @ xor with round0 key | ||
88 | veor q11, q1, q9 | ||
89 | vtbl.8 d0, {q10}, d16 | ||
90 | vtbl.8 d1, {q10}, d17 | ||
91 | veor q12, q2, q9 | ||
92 | vtbl.8 d2, {q11}, d16 | ||
93 | vtbl.8 d3, {q11}, d17 | ||
94 | veor q13, q3, q9 | ||
95 | vtbl.8 d4, {q12}, d16 | ||
96 | vtbl.8 d5, {q12}, d17 | ||
97 | veor q14, q4, q9 | ||
98 | vtbl.8 d6, {q13}, d16 | ||
99 | vtbl.8 d7, {q13}, d17 | ||
100 | veor q15, q5, q9 | ||
101 | vtbl.8 d8, {q14}, d16 | ||
102 | vtbl.8 d9, {q14}, d17 | ||
103 | veor q10, q6, q9 | ||
104 | vtbl.8 d10, {q15}, d16 | ||
105 | vtbl.8 d11, {q15}, d17 | ||
106 | veor q11, q7, q9 | ||
107 | vtbl.8 d12, {q10}, d16 | ||
108 | vtbl.8 d13, {q10}, d17 | ||
109 | vtbl.8 d14, {q11}, d16 | ||
110 | vtbl.8 d15, {q11}, d17 | ||
111 | vmov.i8 q8,#0x55 @ compose .LBS0 | ||
112 | vmov.i8 q9,#0x33 @ compose .LBS1 | ||
113 | vshr.u64 q10, q6, #1 | ||
114 | vshr.u64 q11, q4, #1 | ||
115 | veor q10, q10, q7 | ||
116 | veor q11, q11, q5 | ||
117 | vand q10, q10, q8 | ||
118 | vand q11, q11, q8 | ||
119 | veor q7, q7, q10 | ||
120 | vshl.u64 q10, q10, #1 | ||
121 | veor q5, q5, q11 | ||
122 | vshl.u64 q11, q11, #1 | ||
123 | veor q6, q6, q10 | ||
124 | veor q4, q4, q11 | ||
125 | vshr.u64 q10, q2, #1 | ||
126 | vshr.u64 q11, q0, #1 | ||
127 | veor q10, q10, q3 | ||
128 | veor q11, q11, q1 | ||
129 | vand q10, q10, q8 | ||
130 | vand q11, q11, q8 | ||
131 | veor q3, q3, q10 | ||
132 | vshl.u64 q10, q10, #1 | ||
133 | veor q1, q1, q11 | ||
134 | vshl.u64 q11, q11, #1 | ||
135 | veor q2, q2, q10 | ||
136 | veor q0, q0, q11 | ||
137 | vmov.i8 q8,#0x0f @ compose .LBS2 | ||
138 | vshr.u64 q10, q5, #2 | ||
139 | vshr.u64 q11, q4, #2 | ||
140 | veor q10, q10, q7 | ||
141 | veor q11, q11, q6 | ||
142 | vand q10, q10, q9 | ||
143 | vand q11, q11, q9 | ||
144 | veor q7, q7, q10 | ||
145 | vshl.u64 q10, q10, #2 | ||
146 | veor q6, q6, q11 | ||
147 | vshl.u64 q11, q11, #2 | ||
148 | veor q5, q5, q10 | ||
149 | veor q4, q4, q11 | ||
150 | vshr.u64 q10, q1, #2 | ||
151 | vshr.u64 q11, q0, #2 | ||
152 | veor q10, q10, q3 | ||
153 | veor q11, q11, q2 | ||
154 | vand q10, q10, q9 | ||
155 | vand q11, q11, q9 | ||
156 | veor q3, q3, q10 | ||
157 | vshl.u64 q10, q10, #2 | ||
158 | veor q2, q2, q11 | ||
159 | vshl.u64 q11, q11, #2 | ||
160 | veor q1, q1, q10 | ||
161 | veor q0, q0, q11 | ||
162 | vshr.u64 q10, q3, #4 | ||
163 | vshr.u64 q11, q2, #4 | ||
164 | veor q10, q10, q7 | ||
165 | veor q11, q11, q6 | ||
166 | vand q10, q10, q8 | ||
167 | vand q11, q11, q8 | ||
168 | veor q7, q7, q10 | ||
169 | vshl.u64 q10, q10, #4 | ||
170 | veor q6, q6, q11 | ||
171 | vshl.u64 q11, q11, #4 | ||
172 | veor q3, q3, q10 | ||
173 | veor q2, q2, q11 | ||
174 | vshr.u64 q10, q1, #4 | ||
175 | vshr.u64 q11, q0, #4 | ||
176 | veor q10, q10, q5 | ||
177 | veor q11, q11, q4 | ||
178 | vand q10, q10, q8 | ||
179 | vand q11, q11, q8 | ||
180 | veor q5, q5, q10 | ||
181 | vshl.u64 q10, q10, #4 | ||
182 | veor q4, q4, q11 | ||
183 | vshl.u64 q11, q11, #4 | ||
184 | veor q1, q1, q10 | ||
185 | veor q0, q0, q11 | ||
186 | sub r5,r5,#1 | ||
187 | b .Ldec_sbox | ||
188 | .align 4 | ||
189 | .Ldec_loop: | ||
190 | vldmia r4!, {q8-q11} | ||
191 | veor q8, q8, q0 | ||
192 | veor q9, q9, q1 | ||
193 | vtbl.8 d0, {q8}, d24 | ||
194 | vtbl.8 d1, {q8}, d25 | ||
195 | vldmia r4!, {q8} | ||
196 | veor q10, q10, q2 | ||
197 | vtbl.8 d2, {q9}, d24 | ||
198 | vtbl.8 d3, {q9}, d25 | ||
199 | vldmia r4!, {q9} | ||
200 | veor q11, q11, q3 | ||
201 | vtbl.8 d4, {q10}, d24 | ||
202 | vtbl.8 d5, {q10}, d25 | ||
203 | vldmia r4!, {q10} | ||
204 | vtbl.8 d6, {q11}, d24 | ||
205 | vtbl.8 d7, {q11}, d25 | ||
206 | vldmia r4!, {q11} | ||
207 | veor q8, q8, q4 | ||
208 | veor q9, q9, q5 | ||
209 | vtbl.8 d8, {q8}, d24 | ||
210 | vtbl.8 d9, {q8}, d25 | ||
211 | veor q10, q10, q6 | ||
212 | vtbl.8 d10, {q9}, d24 | ||
213 | vtbl.8 d11, {q9}, d25 | ||
214 | veor q11, q11, q7 | ||
215 | vtbl.8 d12, {q10}, d24 | ||
216 | vtbl.8 d13, {q10}, d25 | ||
217 | vtbl.8 d14, {q11}, d24 | ||
218 | vtbl.8 d15, {q11}, d25 | ||
219 | .Ldec_sbox: | ||
220 | veor q1, q1, q4 | ||
221 | veor q3, q3, q4 | ||
222 | |||
223 | veor q4, q4, q7 | ||
224 | veor q1, q1, q6 | ||
225 | veor q2, q2, q7 | ||
226 | veor q6, q6, q4 | ||
227 | |||
228 | veor q0, q0, q1 | ||
229 | veor q2, q2, q5 | ||
230 | veor q7, q7, q6 | ||
231 | veor q3, q3, q0 | ||
232 | veor q5, q5, q0 | ||
233 | veor q1, q1, q3 | ||
234 | veor q11, q3, q0 | ||
235 | veor q10, q7, q4 | ||
236 | veor q9, q1, q6 | ||
237 | veor q13, q4, q0 | ||
238 | vmov q8, q10 | ||
239 | veor q12, q5, q2 | ||
240 | |||
241 | vorr q10, q10, q9 | ||
242 | veor q15, q11, q8 | ||
243 | vand q14, q11, q12 | ||
244 | vorr q11, q11, q12 | ||
245 | veor q12, q12, q9 | ||
246 | vand q8, q8, q9 | ||
247 | veor q9, q6, q2 | ||
248 | vand q15, q15, q12 | ||
249 | vand q13, q13, q9 | ||
250 | veor q9, q3, q7 | ||
251 | veor q12, q1, q5 | ||
252 | veor q11, q11, q13 | ||
253 | veor q10, q10, q13 | ||
254 | vand q13, q9, q12 | ||
255 | vorr q9, q9, q12 | ||
256 | veor q11, q11, q15 | ||
257 | veor q8, q8, q13 | ||
258 | veor q10, q10, q14 | ||
259 | veor q9, q9, q15 | ||
260 | veor q8, q8, q14 | ||
261 | vand q12, q4, q6 | ||
262 | veor q9, q9, q14 | ||
263 | vand q13, q0, q2 | ||
264 | vand q14, q7, q1 | ||
265 | vorr q15, q3, q5 | ||
266 | veor q11, q11, q12 | ||
267 | veor q9, q9, q14 | ||
268 | veor q8, q8, q15 | ||
269 | veor q10, q10, q13 | ||
270 | |||
271 | @ Inv_GF16 0, 1, 2, 3, s0, s1, s2, s3 | ||
272 | |||
273 | @ new smaller inversion | ||
274 | |||
275 | vand q14, q11, q9 | ||
276 | vmov q12, q8 | ||
277 | |||
278 | veor q13, q10, q14 | ||
279 | veor q15, q8, q14 | ||
280 | veor q14, q8, q14 @ q14=q15 | ||
281 | |||
282 | vbsl q13, q9, q8 | ||
283 | vbsl q15, q11, q10 | ||
284 | veor q11, q11, q10 | ||
285 | |||
286 | vbsl q12, q13, q14 | ||
287 | vbsl q8, q14, q13 | ||
288 | |||
289 | vand q14, q12, q15 | ||
290 | veor q9, q9, q8 | ||
291 | |||
292 | veor q14, q14, q11 | ||
293 | veor q12, q5, q2 | ||
294 | veor q8, q1, q6 | ||
295 | veor q10, q15, q14 | ||
296 | vand q10, q10, q5 | ||
297 | veor q5, q5, q1 | ||
298 | vand q11, q1, q15 | ||
299 | vand q5, q5, q14 | ||
300 | veor q1, q11, q10 | ||
301 | veor q5, q5, q11 | ||
302 | veor q15, q15, q13 | ||
303 | veor q14, q14, q9 | ||
304 | veor q11, q15, q14 | ||
305 | veor q10, q13, q9 | ||
306 | vand q11, q11, q12 | ||
307 | vand q10, q10, q2 | ||
308 | veor q12, q12, q8 | ||
309 | veor q2, q2, q6 | ||
310 | vand q8, q8, q15 | ||
311 | vand q6, q6, q13 | ||
312 | vand q12, q12, q14 | ||
313 | vand q2, q2, q9 | ||
314 | veor q8, q8, q12 | ||
315 | veor q2, q2, q6 | ||
316 | veor q12, q12, q11 | ||
317 | veor q6, q6, q10 | ||
318 | veor q5, q5, q12 | ||
319 | veor q2, q2, q12 | ||
320 | veor q1, q1, q8 | ||
321 | veor q6, q6, q8 | ||
322 | |||
323 | veor q12, q3, q0 | ||
324 | veor q8, q7, q4 | ||
325 | veor q11, q15, q14 | ||
326 | veor q10, q13, q9 | ||
327 | vand q11, q11, q12 | ||
328 | vand q10, q10, q0 | ||
329 | veor q12, q12, q8 | ||
330 | veor q0, q0, q4 | ||
331 | vand q8, q8, q15 | ||
332 | vand q4, q4, q13 | ||
333 | vand q12, q12, q14 | ||
334 | vand q0, q0, q9 | ||
335 | veor q8, q8, q12 | ||
336 | veor q0, q0, q4 | ||
337 | veor q12, q12, q11 | ||
338 | veor q4, q4, q10 | ||
339 | veor q15, q15, q13 | ||
340 | veor q14, q14, q9 | ||
341 | veor q10, q15, q14 | ||
342 | vand q10, q10, q3 | ||
343 | veor q3, q3, q7 | ||
344 | vand q11, q7, q15 | ||
345 | vand q3, q3, q14 | ||
346 | veor q7, q11, q10 | ||
347 | veor q3, q3, q11 | ||
348 | veor q3, q3, q12 | ||
349 | veor q0, q0, q12 | ||
350 | veor q7, q7, q8 | ||
351 | veor q4, q4, q8 | ||
352 | veor q1, q1, q7 | ||
353 | veor q6, q6, q5 | ||
354 | |||
355 | veor q4, q4, q1 | ||
356 | veor q2, q2, q7 | ||
357 | veor q5, q5, q7 | ||
358 | veor q4, q4, q2 | ||
359 | veor q7, q7, q0 | ||
360 | veor q4, q4, q5 | ||
361 | veor q3, q3, q6 | ||
362 | veor q6, q6, q1 | ||
363 | veor q3, q3, q4 | ||
364 | |||
365 | veor q4, q4, q0 | ||
366 | veor q7, q7, q3 | ||
367 | subs r5,r5,#1 | ||
368 | bcc .Ldec_done | ||
369 | @ multiplication by 0x05-0x00-0x04-0x00 | ||
370 | vext.8 q8, q0, q0, #8 | ||
371 | vext.8 q14, q3, q3, #8 | ||
372 | vext.8 q15, q5, q5, #8 | ||
373 | veor q8, q8, q0 | ||
374 | vext.8 q9, q1, q1, #8 | ||
375 | veor q14, q14, q3 | ||
376 | vext.8 q10, q6, q6, #8 | ||
377 | veor q15, q15, q5 | ||
378 | vext.8 q11, q4, q4, #8 | ||
379 | veor q9, q9, q1 | ||
380 | vext.8 q12, q2, q2, #8 | ||
381 | veor q10, q10, q6 | ||
382 | vext.8 q13, q7, q7, #8 | ||
383 | veor q11, q11, q4 | ||
384 | veor q12, q12, q2 | ||
385 | veor q13, q13, q7 | ||
386 | |||
387 | veor q0, q0, q14 | ||
388 | veor q1, q1, q14 | ||
389 | veor q6, q6, q8 | ||
390 | veor q2, q2, q10 | ||
391 | veor q4, q4, q9 | ||
392 | veor q1, q1, q15 | ||
393 | veor q6, q6, q15 | ||
394 | veor q2, q2, q14 | ||
395 | veor q7, q7, q11 | ||
396 | veor q4, q4, q14 | ||
397 | veor q3, q3, q12 | ||
398 | veor q2, q2, q15 | ||
399 | veor q7, q7, q15 | ||
400 | veor q5, q5, q13 | ||
401 | vext.8 q8, q0, q0, #12 @ x0 <<< 32 | ||
402 | vext.8 q9, q1, q1, #12 | ||
403 | veor q0, q0, q8 @ x0 ^ (x0 <<< 32) | ||
404 | vext.8 q10, q6, q6, #12 | ||
405 | veor q1, q1, q9 | ||
406 | vext.8 q11, q4, q4, #12 | ||
407 | veor q6, q6, q10 | ||
408 | vext.8 q12, q2, q2, #12 | ||
409 | veor q4, q4, q11 | ||
410 | vext.8 q13, q7, q7, #12 | ||
411 | veor q2, q2, q12 | ||
412 | vext.8 q14, q3, q3, #12 | ||
413 | veor q7, q7, q13 | ||
414 | vext.8 q15, q5, q5, #12 | ||
415 | veor q3, q3, q14 | ||
416 | |||
417 | veor q9, q9, q0 | ||
418 | veor q5, q5, q15 | ||
419 | vext.8 q0, q0, q0, #8 @ (x0 ^ (x0 <<< 32)) <<< 64) | ||
420 | veor q10, q10, q1 | ||
421 | veor q8, q8, q5 | ||
422 | veor q9, q9, q5 | ||
423 | vext.8 q1, q1, q1, #8 | ||
424 | veor q13, q13, q2 | ||
425 | veor q0, q0, q8 | ||
426 | veor q14, q14, q7 | ||
427 | veor q1, q1, q9 | ||
428 | vext.8 q8, q2, q2, #8 | ||
429 | veor q12, q12, q4 | ||
430 | vext.8 q9, q7, q7, #8 | ||
431 | veor q15, q15, q3 | ||
432 | vext.8 q2, q4, q4, #8 | ||
433 | veor q11, q11, q6 | ||
434 | vext.8 q7, q5, q5, #8 | ||
435 | veor q12, q12, q5 | ||
436 | vext.8 q4, q3, q3, #8 | ||
437 | veor q11, q11, q5 | ||
438 | vext.8 q3, q6, q6, #8 | ||
439 | veor q5, q9, q13 | ||
440 | veor q11, q11, q2 | ||
441 | veor q7, q7, q15 | ||
442 | veor q6, q4, q14 | ||
443 | veor q4, q8, q12 | ||
444 | veor q2, q3, q10 | ||
445 | vmov q3, q11 | ||
446 | @ vmov q5, q9 | ||
447 | vldmia r6, {q12} @ .LISR | ||
448 | ite eq @ Thumb2 thing, sanity check in ARM | ||
449 | addeq r6,r6,#0x10 | ||
450 | bne .Ldec_loop | ||
451 | vldmia r6, {q12} @ .LISRM0 | ||
452 | b .Ldec_loop | ||
453 | .align 4 | ||
454 | .Ldec_done: | ||
455 | vmov.i8 q8,#0x55 @ compose .LBS0 | ||
456 | vmov.i8 q9,#0x33 @ compose .LBS1 | ||
457 | vshr.u64 q10, q3, #1 | ||
458 | vshr.u64 q11, q2, #1 | ||
459 | veor q10, q10, q5 | ||
460 | veor q11, q11, q7 | ||
461 | vand q10, q10, q8 | ||
462 | vand q11, q11, q8 | ||
463 | veor q5, q5, q10 | ||
464 | vshl.u64 q10, q10, #1 | ||
465 | veor q7, q7, q11 | ||
466 | vshl.u64 q11, q11, #1 | ||
467 | veor q3, q3, q10 | ||
468 | veor q2, q2, q11 | ||
469 | vshr.u64 q10, q6, #1 | ||
470 | vshr.u64 q11, q0, #1 | ||
471 | veor q10, q10, q4 | ||
472 | veor q11, q11, q1 | ||
473 | vand q10, q10, q8 | ||
474 | vand q11, q11, q8 | ||
475 | veor q4, q4, q10 | ||
476 | vshl.u64 q10, q10, #1 | ||
477 | veor q1, q1, q11 | ||
478 | vshl.u64 q11, q11, #1 | ||
479 | veor q6, q6, q10 | ||
480 | veor q0, q0, q11 | ||
481 | vmov.i8 q8,#0x0f @ compose .LBS2 | ||
482 | vshr.u64 q10, q7, #2 | ||
483 | vshr.u64 q11, q2, #2 | ||
484 | veor q10, q10, q5 | ||
485 | veor q11, q11, q3 | ||
486 | vand q10, q10, q9 | ||
487 | vand q11, q11, q9 | ||
488 | veor q5, q5, q10 | ||
489 | vshl.u64 q10, q10, #2 | ||
490 | veor q3, q3, q11 | ||
491 | vshl.u64 q11, q11, #2 | ||
492 | veor q7, q7, q10 | ||
493 | veor q2, q2, q11 | ||
494 | vshr.u64 q10, q1, #2 | ||
495 | vshr.u64 q11, q0, #2 | ||
496 | veor q10, q10, q4 | ||
497 | veor q11, q11, q6 | ||
498 | vand q10, q10, q9 | ||
499 | vand q11, q11, q9 | ||
500 | veor q4, q4, q10 | ||
501 | vshl.u64 q10, q10, #2 | ||
502 | veor q6, q6, q11 | ||
503 | vshl.u64 q11, q11, #2 | ||
504 | veor q1, q1, q10 | ||
505 | veor q0, q0, q11 | ||
506 | vshr.u64 q10, q4, #4 | ||
507 | vshr.u64 q11, q6, #4 | ||
508 | veor q10, q10, q5 | ||
509 | veor q11, q11, q3 | ||
510 | vand q10, q10, q8 | ||
511 | vand q11, q11, q8 | ||
512 | veor q5, q5, q10 | ||
513 | vshl.u64 q10, q10, #4 | ||
514 | veor q3, q3, q11 | ||
515 | vshl.u64 q11, q11, #4 | ||
516 | veor q4, q4, q10 | ||
517 | veor q6, q6, q11 | ||
518 | vshr.u64 q10, q1, #4 | ||
519 | vshr.u64 q11, q0, #4 | ||
520 | veor q10, q10, q7 | ||
521 | veor q11, q11, q2 | ||
522 | vand q10, q10, q8 | ||
523 | vand q11, q11, q8 | ||
524 | veor q7, q7, q10 | ||
525 | vshl.u64 q10, q10, #4 | ||
526 | veor q2, q2, q11 | ||
527 | vshl.u64 q11, q11, #4 | ||
528 | veor q1, q1, q10 | ||
529 | veor q0, q0, q11 | ||
530 | vldmia r4, {q8} @ last round key | ||
531 | veor q6, q6, q8 | ||
532 | veor q4, q4, q8 | ||
533 | veor q2, q2, q8 | ||
534 | veor q7, q7, q8 | ||
535 | veor q3, q3, q8 | ||
536 | veor q5, q5, q8 | ||
537 | veor q0, q0, q8 | ||
538 | veor q1, q1, q8 | ||
539 | bx lr | ||
540 | .size _bsaes_decrypt8,.-_bsaes_decrypt8 | ||
541 | |||
542 | .type _bsaes_const,%object | ||
543 | .align 6 | ||
544 | _bsaes_const: | ||
545 | .LM0ISR: @ InvShiftRows constants | ||
546 | .quad 0x0a0e0206070b0f03, 0x0004080c0d010509 | ||
547 | .LISR: | ||
548 | .quad 0x0504070602010003, 0x0f0e0d0c080b0a09 | ||
549 | .LISRM0: | ||
550 | .quad 0x01040b0e0205080f, 0x0306090c00070a0d | ||
551 | .LM0SR: @ ShiftRows constants | ||
552 | .quad 0x0a0e02060f03070b, 0x0004080c05090d01 | ||
553 | .LSR: | ||
554 | .quad 0x0504070600030201, 0x0f0e0d0c0a09080b | ||
555 | .LSRM0: | ||
556 | .quad 0x0304090e00050a0f, 0x01060b0c0207080d | ||
557 | .LM0: | ||
558 | .quad 0x02060a0e03070b0f, 0x0004080c0105090d | ||
559 | .LREVM0SR: | ||
560 | .quad 0x090d01050c000408, 0x03070b0f060a0e02 | ||
561 | .asciz "Bit-sliced AES for NEON, CRYPTOGAMS by <appro@openssl.org>" | ||
562 | .align 6 | ||
563 | .size _bsaes_const,.-_bsaes_const | ||
564 | |||
565 | .type _bsaes_encrypt8,%function | ||
566 | .align 4 | ||
567 | _bsaes_encrypt8: | ||
568 | adr r6,_bsaes_encrypt8 | ||
569 | vldmia r4!, {q9} @ round 0 key | ||
570 | sub r6,r6,#_bsaes_encrypt8-.LM0SR | ||
571 | |||
572 | vldmia r6!, {q8} @ .LM0SR | ||
573 | _bsaes_encrypt8_alt: | ||
574 | veor q10, q0, q9 @ xor with round0 key | ||
575 | veor q11, q1, q9 | ||
576 | vtbl.8 d0, {q10}, d16 | ||
577 | vtbl.8 d1, {q10}, d17 | ||
578 | veor q12, q2, q9 | ||
579 | vtbl.8 d2, {q11}, d16 | ||
580 | vtbl.8 d3, {q11}, d17 | ||
581 | veor q13, q3, q9 | ||
582 | vtbl.8 d4, {q12}, d16 | ||
583 | vtbl.8 d5, {q12}, d17 | ||
584 | veor q14, q4, q9 | ||
585 | vtbl.8 d6, {q13}, d16 | ||
586 | vtbl.8 d7, {q13}, d17 | ||
587 | veor q15, q5, q9 | ||
588 | vtbl.8 d8, {q14}, d16 | ||
589 | vtbl.8 d9, {q14}, d17 | ||
590 | veor q10, q6, q9 | ||
591 | vtbl.8 d10, {q15}, d16 | ||
592 | vtbl.8 d11, {q15}, d17 | ||
593 | veor q11, q7, q9 | ||
594 | vtbl.8 d12, {q10}, d16 | ||
595 | vtbl.8 d13, {q10}, d17 | ||
596 | vtbl.8 d14, {q11}, d16 | ||
597 | vtbl.8 d15, {q11}, d17 | ||
598 | _bsaes_encrypt8_bitslice: | ||
599 | vmov.i8 q8,#0x55 @ compose .LBS0 | ||
600 | vmov.i8 q9,#0x33 @ compose .LBS1 | ||
601 | vshr.u64 q10, q6, #1 | ||
602 | vshr.u64 q11, q4, #1 | ||
603 | veor q10, q10, q7 | ||
604 | veor q11, q11, q5 | ||
605 | vand q10, q10, q8 | ||
606 | vand q11, q11, q8 | ||
607 | veor q7, q7, q10 | ||
608 | vshl.u64 q10, q10, #1 | ||
609 | veor q5, q5, q11 | ||
610 | vshl.u64 q11, q11, #1 | ||
611 | veor q6, q6, q10 | ||
612 | veor q4, q4, q11 | ||
613 | vshr.u64 q10, q2, #1 | ||
614 | vshr.u64 q11, q0, #1 | ||
615 | veor q10, q10, q3 | ||
616 | veor q11, q11, q1 | ||
617 | vand q10, q10, q8 | ||
618 | vand q11, q11, q8 | ||
619 | veor q3, q3, q10 | ||
620 | vshl.u64 q10, q10, #1 | ||
621 | veor q1, q1, q11 | ||
622 | vshl.u64 q11, q11, #1 | ||
623 | veor q2, q2, q10 | ||
624 | veor q0, q0, q11 | ||
625 | vmov.i8 q8,#0x0f @ compose .LBS2 | ||
626 | vshr.u64 q10, q5, #2 | ||
627 | vshr.u64 q11, q4, #2 | ||
628 | veor q10, q10, q7 | ||
629 | veor q11, q11, q6 | ||
630 | vand q10, q10, q9 | ||
631 | vand q11, q11, q9 | ||
632 | veor q7, q7, q10 | ||
633 | vshl.u64 q10, q10, #2 | ||
634 | veor q6, q6, q11 | ||
635 | vshl.u64 q11, q11, #2 | ||
636 | veor q5, q5, q10 | ||
637 | veor q4, q4, q11 | ||
638 | vshr.u64 q10, q1, #2 | ||
639 | vshr.u64 q11, q0, #2 | ||
640 | veor q10, q10, q3 | ||
641 | veor q11, q11, q2 | ||
642 | vand q10, q10, q9 | ||
643 | vand q11, q11, q9 | ||
644 | veor q3, q3, q10 | ||
645 | vshl.u64 q10, q10, #2 | ||
646 | veor q2, q2, q11 | ||
647 | vshl.u64 q11, q11, #2 | ||
648 | veor q1, q1, q10 | ||
649 | veor q0, q0, q11 | ||
650 | vshr.u64 q10, q3, #4 | ||
651 | vshr.u64 q11, q2, #4 | ||
652 | veor q10, q10, q7 | ||
653 | veor q11, q11, q6 | ||
654 | vand q10, q10, q8 | ||
655 | vand q11, q11, q8 | ||
656 | veor q7, q7, q10 | ||
657 | vshl.u64 q10, q10, #4 | ||
658 | veor q6, q6, q11 | ||
659 | vshl.u64 q11, q11, #4 | ||
660 | veor q3, q3, q10 | ||
661 | veor q2, q2, q11 | ||
662 | vshr.u64 q10, q1, #4 | ||
663 | vshr.u64 q11, q0, #4 | ||
664 | veor q10, q10, q5 | ||
665 | veor q11, q11, q4 | ||
666 | vand q10, q10, q8 | ||
667 | vand q11, q11, q8 | ||
668 | veor q5, q5, q10 | ||
669 | vshl.u64 q10, q10, #4 | ||
670 | veor q4, q4, q11 | ||
671 | vshl.u64 q11, q11, #4 | ||
672 | veor q1, q1, q10 | ||
673 | veor q0, q0, q11 | ||
674 | sub r5,r5,#1 | ||
675 | b .Lenc_sbox | ||
676 | .align 4 | ||
677 | .Lenc_loop: | ||
678 | vldmia r4!, {q8-q11} | ||
679 | veor q8, q8, q0 | ||
680 | veor q9, q9, q1 | ||
681 | vtbl.8 d0, {q8}, d24 | ||
682 | vtbl.8 d1, {q8}, d25 | ||
683 | vldmia r4!, {q8} | ||
684 | veor q10, q10, q2 | ||
685 | vtbl.8 d2, {q9}, d24 | ||
686 | vtbl.8 d3, {q9}, d25 | ||
687 | vldmia r4!, {q9} | ||
688 | veor q11, q11, q3 | ||
689 | vtbl.8 d4, {q10}, d24 | ||
690 | vtbl.8 d5, {q10}, d25 | ||
691 | vldmia r4!, {q10} | ||
692 | vtbl.8 d6, {q11}, d24 | ||
693 | vtbl.8 d7, {q11}, d25 | ||
694 | vldmia r4!, {q11} | ||
695 | veor q8, q8, q4 | ||
696 | veor q9, q9, q5 | ||
697 | vtbl.8 d8, {q8}, d24 | ||
698 | vtbl.8 d9, {q8}, d25 | ||
699 | veor q10, q10, q6 | ||
700 | vtbl.8 d10, {q9}, d24 | ||
701 | vtbl.8 d11, {q9}, d25 | ||
702 | veor q11, q11, q7 | ||
703 | vtbl.8 d12, {q10}, d24 | ||
704 | vtbl.8 d13, {q10}, d25 | ||
705 | vtbl.8 d14, {q11}, d24 | ||
706 | vtbl.8 d15, {q11}, d25 | ||
707 | .Lenc_sbox: | ||
708 | veor q2, q2, q1 | ||
709 | veor q5, q5, q6 | ||
710 | veor q3, q3, q0 | ||
711 | veor q6, q6, q2 | ||
712 | veor q5, q5, q0 | ||
713 | |||
714 | veor q6, q6, q3 | ||
715 | veor q3, q3, q7 | ||
716 | veor q7, q7, q5 | ||
717 | veor q3, q3, q4 | ||
718 | veor q4, q4, q5 | ||
719 | |||
720 | veor q2, q2, q7 | ||
721 | veor q3, q3, q1 | ||
722 | veor q1, q1, q5 | ||
723 | veor q11, q7, q4 | ||
724 | veor q10, q1, q2 | ||
725 | veor q9, q5, q3 | ||
726 | veor q13, q2, q4 | ||
727 | vmov q8, q10 | ||
728 | veor q12, q6, q0 | ||
729 | |||
730 | vorr q10, q10, q9 | ||
731 | veor q15, q11, q8 | ||
732 | vand q14, q11, q12 | ||
733 | vorr q11, q11, q12 | ||
734 | veor q12, q12, q9 | ||
735 | vand q8, q8, q9 | ||
736 | veor q9, q3, q0 | ||
737 | vand q15, q15, q12 | ||
738 | vand q13, q13, q9 | ||
739 | veor q9, q7, q1 | ||
740 | veor q12, q5, q6 | ||
741 | veor q11, q11, q13 | ||
742 | veor q10, q10, q13 | ||
743 | vand q13, q9, q12 | ||
744 | vorr q9, q9, q12 | ||
745 | veor q11, q11, q15 | ||
746 | veor q8, q8, q13 | ||
747 | veor q10, q10, q14 | ||
748 | veor q9, q9, q15 | ||
749 | veor q8, q8, q14 | ||
750 | vand q12, q2, q3 | ||
751 | veor q9, q9, q14 | ||
752 | vand q13, q4, q0 | ||
753 | vand q14, q1, q5 | ||
754 | vorr q15, q7, q6 | ||
755 | veor q11, q11, q12 | ||
756 | veor q9, q9, q14 | ||
757 | veor q8, q8, q15 | ||
758 | veor q10, q10, q13 | ||
759 | |||
760 | @ Inv_GF16 0, 1, 2, 3, s0, s1, s2, s3 | ||
761 | |||
762 | @ new smaller inversion | ||
763 | |||
764 | vand q14, q11, q9 | ||
765 | vmov q12, q8 | ||
766 | |||
767 | veor q13, q10, q14 | ||
768 | veor q15, q8, q14 | ||
769 | veor q14, q8, q14 @ q14=q15 | ||
770 | |||
771 | vbsl q13, q9, q8 | ||
772 | vbsl q15, q11, q10 | ||
773 | veor q11, q11, q10 | ||
774 | |||
775 | vbsl q12, q13, q14 | ||
776 | vbsl q8, q14, q13 | ||
777 | |||
778 | vand q14, q12, q15 | ||
779 | veor q9, q9, q8 | ||
780 | |||
781 | veor q14, q14, q11 | ||
782 | veor q12, q6, q0 | ||
783 | veor q8, q5, q3 | ||
784 | veor q10, q15, q14 | ||
785 | vand q10, q10, q6 | ||
786 | veor q6, q6, q5 | ||
787 | vand q11, q5, q15 | ||
788 | vand q6, q6, q14 | ||
789 | veor q5, q11, q10 | ||
790 | veor q6, q6, q11 | ||
791 | veor q15, q15, q13 | ||
792 | veor q14, q14, q9 | ||
793 | veor q11, q15, q14 | ||
794 | veor q10, q13, q9 | ||
795 | vand q11, q11, q12 | ||
796 | vand q10, q10, q0 | ||
797 | veor q12, q12, q8 | ||
798 | veor q0, q0, q3 | ||
799 | vand q8, q8, q15 | ||
800 | vand q3, q3, q13 | ||
801 | vand q12, q12, q14 | ||
802 | vand q0, q0, q9 | ||
803 | veor q8, q8, q12 | ||
804 | veor q0, q0, q3 | ||
805 | veor q12, q12, q11 | ||
806 | veor q3, q3, q10 | ||
807 | veor q6, q6, q12 | ||
808 | veor q0, q0, q12 | ||
809 | veor q5, q5, q8 | ||
810 | veor q3, q3, q8 | ||
811 | |||
812 | veor q12, q7, q4 | ||
813 | veor q8, q1, q2 | ||
814 | veor q11, q15, q14 | ||
815 | veor q10, q13, q9 | ||
816 | vand q11, q11, q12 | ||
817 | vand q10, q10, q4 | ||
818 | veor q12, q12, q8 | ||
819 | veor q4, q4, q2 | ||
820 | vand q8, q8, q15 | ||
821 | vand q2, q2, q13 | ||
822 | vand q12, q12, q14 | ||
823 | vand q4, q4, q9 | ||
824 | veor q8, q8, q12 | ||
825 | veor q4, q4, q2 | ||
826 | veor q12, q12, q11 | ||
827 | veor q2, q2, q10 | ||
828 | veor q15, q15, q13 | ||
829 | veor q14, q14, q9 | ||
830 | veor q10, q15, q14 | ||
831 | vand q10, q10, q7 | ||
832 | veor q7, q7, q1 | ||
833 | vand q11, q1, q15 | ||
834 | vand q7, q7, q14 | ||
835 | veor q1, q11, q10 | ||
836 | veor q7, q7, q11 | ||
837 | veor q7, q7, q12 | ||
838 | veor q4, q4, q12 | ||
839 | veor q1, q1, q8 | ||
840 | veor q2, q2, q8 | ||
841 | veor q7, q7, q0 | ||
842 | veor q1, q1, q6 | ||
843 | veor q6, q6, q0 | ||
844 | veor q4, q4, q7 | ||
845 | veor q0, q0, q1 | ||
846 | |||
847 | veor q1, q1, q5 | ||
848 | veor q5, q5, q2 | ||
849 | veor q2, q2, q3 | ||
850 | veor q3, q3, q5 | ||
851 | veor q4, q4, q5 | ||
852 | |||
853 | veor q6, q6, q3 | ||
854 | subs r5,r5,#1 | ||
855 | bcc .Lenc_done | ||
856 | vext.8 q8, q0, q0, #12 @ x0 <<< 32 | ||
857 | vext.8 q9, q1, q1, #12 | ||
858 | veor q0, q0, q8 @ x0 ^ (x0 <<< 32) | ||
859 | vext.8 q10, q4, q4, #12 | ||
860 | veor q1, q1, q9 | ||
861 | vext.8 q11, q6, q6, #12 | ||
862 | veor q4, q4, q10 | ||
863 | vext.8 q12, q3, q3, #12 | ||
864 | veor q6, q6, q11 | ||
865 | vext.8 q13, q7, q7, #12 | ||
866 | veor q3, q3, q12 | ||
867 | vext.8 q14, q2, q2, #12 | ||
868 | veor q7, q7, q13 | ||
869 | vext.8 q15, q5, q5, #12 | ||
870 | veor q2, q2, q14 | ||
871 | |||
872 | veor q9, q9, q0 | ||
873 | veor q5, q5, q15 | ||
874 | vext.8 q0, q0, q0, #8 @ (x0 ^ (x0 <<< 32)) <<< 64) | ||
875 | veor q10, q10, q1 | ||
876 | veor q8, q8, q5 | ||
877 | veor q9, q9, q5 | ||
878 | vext.8 q1, q1, q1, #8 | ||
879 | veor q13, q13, q3 | ||
880 | veor q0, q0, q8 | ||
881 | veor q14, q14, q7 | ||
882 | veor q1, q1, q9 | ||
883 | vext.8 q8, q3, q3, #8 | ||
884 | veor q12, q12, q6 | ||
885 | vext.8 q9, q7, q7, #8 | ||
886 | veor q15, q15, q2 | ||
887 | vext.8 q3, q6, q6, #8 | ||
888 | veor q11, q11, q4 | ||
889 | vext.8 q7, q5, q5, #8 | ||
890 | veor q12, q12, q5 | ||
891 | vext.8 q6, q2, q2, #8 | ||
892 | veor q11, q11, q5 | ||
893 | vext.8 q2, q4, q4, #8 | ||
894 | veor q5, q9, q13 | ||
895 | veor q4, q8, q12 | ||
896 | veor q3, q3, q11 | ||
897 | veor q7, q7, q15 | ||
898 | veor q6, q6, q14 | ||
899 | @ vmov q4, q8 | ||
900 | veor q2, q2, q10 | ||
901 | @ vmov q5, q9 | ||
902 | vldmia r6, {q12} @ .LSR | ||
903 | ite eq @ Thumb2 thing, samity check in ARM | ||
904 | addeq r6,r6,#0x10 | ||
905 | bne .Lenc_loop | ||
906 | vldmia r6, {q12} @ .LSRM0 | ||
907 | b .Lenc_loop | ||
908 | .align 4 | ||
909 | .Lenc_done: | ||
910 | vmov.i8 q8,#0x55 @ compose .LBS0 | ||
911 | vmov.i8 q9,#0x33 @ compose .LBS1 | ||
912 | vshr.u64 q10, q2, #1 | ||
913 | vshr.u64 q11, q3, #1 | ||
914 | veor q10, q10, q5 | ||
915 | veor q11, q11, q7 | ||
916 | vand q10, q10, q8 | ||
917 | vand q11, q11, q8 | ||
918 | veor q5, q5, q10 | ||
919 | vshl.u64 q10, q10, #1 | ||
920 | veor q7, q7, q11 | ||
921 | vshl.u64 q11, q11, #1 | ||
922 | veor q2, q2, q10 | ||
923 | veor q3, q3, q11 | ||
924 | vshr.u64 q10, q4, #1 | ||
925 | vshr.u64 q11, q0, #1 | ||
926 | veor q10, q10, q6 | ||
927 | veor q11, q11, q1 | ||
928 | vand q10, q10, q8 | ||
929 | vand q11, q11, q8 | ||
930 | veor q6, q6, q10 | ||
931 | vshl.u64 q10, q10, #1 | ||
932 | veor q1, q1, q11 | ||
933 | vshl.u64 q11, q11, #1 | ||
934 | veor q4, q4, q10 | ||
935 | veor q0, q0, q11 | ||
936 | vmov.i8 q8,#0x0f @ compose .LBS2 | ||
937 | vshr.u64 q10, q7, #2 | ||
938 | vshr.u64 q11, q3, #2 | ||
939 | veor q10, q10, q5 | ||
940 | veor q11, q11, q2 | ||
941 | vand q10, q10, q9 | ||
942 | vand q11, q11, q9 | ||
943 | veor q5, q5, q10 | ||
944 | vshl.u64 q10, q10, #2 | ||
945 | veor q2, q2, q11 | ||
946 | vshl.u64 q11, q11, #2 | ||
947 | veor q7, q7, q10 | ||
948 | veor q3, q3, q11 | ||
949 | vshr.u64 q10, q1, #2 | ||
950 | vshr.u64 q11, q0, #2 | ||
951 | veor q10, q10, q6 | ||
952 | veor q11, q11, q4 | ||
953 | vand q10, q10, q9 | ||
954 | vand q11, q11, q9 | ||
955 | veor q6, q6, q10 | ||
956 | vshl.u64 q10, q10, #2 | ||
957 | veor q4, q4, q11 | ||
958 | vshl.u64 q11, q11, #2 | ||
959 | veor q1, q1, q10 | ||
960 | veor q0, q0, q11 | ||
961 | vshr.u64 q10, q6, #4 | ||
962 | vshr.u64 q11, q4, #4 | ||
963 | veor q10, q10, q5 | ||
964 | veor q11, q11, q2 | ||
965 | vand q10, q10, q8 | ||
966 | vand q11, q11, q8 | ||
967 | veor q5, q5, q10 | ||
968 | vshl.u64 q10, q10, #4 | ||
969 | veor q2, q2, q11 | ||
970 | vshl.u64 q11, q11, #4 | ||
971 | veor q6, q6, q10 | ||
972 | veor q4, q4, q11 | ||
973 | vshr.u64 q10, q1, #4 | ||
974 | vshr.u64 q11, q0, #4 | ||
975 | veor q10, q10, q7 | ||
976 | veor q11, q11, q3 | ||
977 | vand q10, q10, q8 | ||
978 | vand q11, q11, q8 | ||
979 | veor q7, q7, q10 | ||
980 | vshl.u64 q10, q10, #4 | ||
981 | veor q3, q3, q11 | ||
982 | vshl.u64 q11, q11, #4 | ||
983 | veor q1, q1, q10 | ||
984 | veor q0, q0, q11 | ||
985 | vldmia r4, {q8} @ last round key | ||
986 | veor q4, q4, q8 | ||
987 | veor q6, q6, q8 | ||
988 | veor q3, q3, q8 | ||
989 | veor q7, q7, q8 | ||
990 | veor q2, q2, q8 | ||
991 | veor q5, q5, q8 | ||
992 | veor q0, q0, q8 | ||
993 | veor q1, q1, q8 | ||
994 | bx lr | ||
995 | .size _bsaes_encrypt8,.-_bsaes_encrypt8 | ||
996 | .type _bsaes_key_convert,%function | ||
997 | .align 4 | ||
998 | _bsaes_key_convert: | ||
999 | adr r6,_bsaes_key_convert | ||
1000 | vld1.8 {q7}, [r4]! @ load round 0 key | ||
1001 | sub r6,r6,#_bsaes_key_convert-.LM0 | ||
1002 | vld1.8 {q15}, [r4]! @ load round 1 key | ||
1003 | |||
1004 | vmov.i8 q8, #0x01 @ bit masks | ||
1005 | vmov.i8 q9, #0x02 | ||
1006 | vmov.i8 q10, #0x04 | ||
1007 | vmov.i8 q11, #0x08 | ||
1008 | vmov.i8 q12, #0x10 | ||
1009 | vmov.i8 q13, #0x20 | ||
1010 | vldmia r6, {q14} @ .LM0 | ||
1011 | |||
1012 | #ifdef __ARMEL__ | ||
1013 | vrev32.8 q7, q7 | ||
1014 | vrev32.8 q15, q15 | ||
1015 | #endif | ||
1016 | sub r5,r5,#1 | ||
1017 | vstmia r12!, {q7} @ save round 0 key | ||
1018 | b .Lkey_loop | ||
1019 | |||
1020 | .align 4 | ||
1021 | .Lkey_loop: | ||
1022 | vtbl.8 d14,{q15},d28 | ||
1023 | vtbl.8 d15,{q15},d29 | ||
1024 | vmov.i8 q6, #0x40 | ||
1025 | vmov.i8 q15, #0x80 | ||
1026 | |||
1027 | vtst.8 q0, q7, q8 | ||
1028 | vtst.8 q1, q7, q9 | ||
1029 | vtst.8 q2, q7, q10 | ||
1030 | vtst.8 q3, q7, q11 | ||
1031 | vtst.8 q4, q7, q12 | ||
1032 | vtst.8 q5, q7, q13 | ||
1033 | vtst.8 q6, q7, q6 | ||
1034 | vtst.8 q7, q7, q15 | ||
1035 | vld1.8 {q15}, [r4]! @ load next round key | ||
1036 | vmvn q0, q0 @ "pnot" | ||
1037 | vmvn q1, q1 | ||
1038 | vmvn q5, q5 | ||
1039 | vmvn q6, q6 | ||
1040 | #ifdef __ARMEL__ | ||
1041 | vrev32.8 q15, q15 | ||
1042 | #endif | ||
1043 | subs r5,r5,#1 | ||
1044 | vstmia r12!,{q0-q7} @ write bit-sliced round key | ||
1045 | bne .Lkey_loop | ||
1046 | |||
1047 | vmov.i8 q7,#0x63 @ compose .L63 | ||
1048 | @ don't save last round key | ||
1049 | bx lr | ||
1050 | .size _bsaes_key_convert,.-_bsaes_key_convert | ||
1051 | .extern AES_cbc_encrypt | ||
1052 | .extern AES_decrypt | ||
1053 | |||
1054 | .global bsaes_cbc_encrypt | ||
1055 | .type bsaes_cbc_encrypt,%function | ||
1056 | .align 5 | ||
1057 | bsaes_cbc_encrypt: | ||
1058 | #ifndef __KERNEL__ | ||
1059 | cmp r2, #128 | ||
1060 | #ifndef __thumb__ | ||
1061 | blo AES_cbc_encrypt | ||
1062 | #else | ||
1063 | bhs 1f | ||
1064 | b AES_cbc_encrypt | ||
1065 | 1: | ||
1066 | #endif | ||
1067 | #endif | ||
1068 | |||
1069 | @ it is up to the caller to make sure we are called with enc == 0 | ||
1070 | |||
1071 | mov ip, sp | ||
1072 | stmdb sp!, {r4-r10, lr} | ||
1073 | VFP_ABI_PUSH | ||
1074 | ldr r8, [ip] @ IV is 1st arg on the stack | ||
1075 | mov r2, r2, lsr#4 @ len in 16 byte blocks | ||
1076 | sub sp, #0x10 @ scratch space to carry over the IV | ||
1077 | mov r9, sp @ save sp | ||
1078 | |||
1079 | ldr r10, [r3, #240] @ get # of rounds | ||
1080 | #ifndef BSAES_ASM_EXTENDED_KEY | ||
1081 | @ allocate the key schedule on the stack | ||
1082 | sub r12, sp, r10, lsl#7 @ 128 bytes per inner round key | ||
1083 | add r12, #96 @ sifze of bit-slices key schedule | ||
1084 | |||
1085 | @ populate the key schedule | ||
1086 | mov r4, r3 @ pass key | ||
1087 | mov r5, r10 @ pass # of rounds | ||
1088 | mov sp, r12 @ sp is sp | ||
1089 | bl _bsaes_key_convert | ||
1090 | vldmia sp, {q6} | ||
1091 | vstmia r12, {q15} @ save last round key | ||
1092 | veor q7, q7, q6 @ fix up round 0 key | ||
1093 | vstmia sp, {q7} | ||
1094 | #else | ||
1095 | ldr r12, [r3, #244] | ||
1096 | eors r12, #1 | ||
1097 | beq 0f | ||
1098 | |||
1099 | @ populate the key schedule | ||
1100 | str r12, [r3, #244] | ||
1101 | mov r4, r3 @ pass key | ||
1102 | mov r5, r10 @ pass # of rounds | ||
1103 | add r12, r3, #248 @ pass key schedule | ||
1104 | bl _bsaes_key_convert | ||
1105 | add r4, r3, #248 | ||
1106 | vldmia r4, {q6} | ||
1107 | vstmia r12, {q15} @ save last round key | ||
1108 | veor q7, q7, q6 @ fix up round 0 key | ||
1109 | vstmia r4, {q7} | ||
1110 | |||
1111 | .align 2 | ||
1112 | 0: | ||
1113 | #endif | ||
1114 | |||
1115 | vld1.8 {q15}, [r8] @ load IV | ||
1116 | b .Lcbc_dec_loop | ||
1117 | |||
1118 | .align 4 | ||
1119 | .Lcbc_dec_loop: | ||
1120 | subs r2, r2, #0x8 | ||
1121 | bmi .Lcbc_dec_loop_finish | ||
1122 | |||
1123 | vld1.8 {q0-q1}, [r0]! @ load input | ||
1124 | vld1.8 {q2-q3}, [r0]! | ||
1125 | #ifndef BSAES_ASM_EXTENDED_KEY | ||
1126 | mov r4, sp @ pass the key | ||
1127 | #else | ||
1128 | add r4, r3, #248 | ||
1129 | #endif | ||
1130 | vld1.8 {q4-q5}, [r0]! | ||
1131 | mov r5, r10 | ||
1132 | vld1.8 {q6-q7}, [r0] | ||
1133 | sub r0, r0, #0x60 | ||
1134 | vstmia r9, {q15} @ put aside IV | ||
1135 | |||
1136 | bl _bsaes_decrypt8 | ||
1137 | |||
1138 | vldmia r9, {q14} @ reload IV | ||
1139 | vld1.8 {q8-q9}, [r0]! @ reload input | ||
1140 | veor q0, q0, q14 @ ^= IV | ||
1141 | vld1.8 {q10-q11}, [r0]! | ||
1142 | veor q1, q1, q8 | ||
1143 | veor q6, q6, q9 | ||
1144 | vld1.8 {q12-q13}, [r0]! | ||
1145 | veor q4, q4, q10 | ||
1146 | veor q2, q2, q11 | ||
1147 | vld1.8 {q14-q15}, [r0]! | ||
1148 | veor q7, q7, q12 | ||
1149 | vst1.8 {q0-q1}, [r1]! @ write output | ||
1150 | veor q3, q3, q13 | ||
1151 | vst1.8 {q6}, [r1]! | ||
1152 | veor q5, q5, q14 | ||
1153 | vst1.8 {q4}, [r1]! | ||
1154 | vst1.8 {q2}, [r1]! | ||
1155 | vst1.8 {q7}, [r1]! | ||
1156 | vst1.8 {q3}, [r1]! | ||
1157 | vst1.8 {q5}, [r1]! | ||
1158 | |||
1159 | b .Lcbc_dec_loop | ||
1160 | |||
1161 | .Lcbc_dec_loop_finish: | ||
1162 | adds r2, r2, #8 | ||
1163 | beq .Lcbc_dec_done | ||
1164 | |||
1165 | vld1.8 {q0}, [r0]! @ load input | ||
1166 | cmp r2, #2 | ||
1167 | blo .Lcbc_dec_one | ||
1168 | vld1.8 {q1}, [r0]! | ||
1169 | #ifndef BSAES_ASM_EXTENDED_KEY | ||
1170 | mov r4, sp @ pass the key | ||
1171 | #else | ||
1172 | add r4, r3, #248 | ||
1173 | #endif | ||
1174 | mov r5, r10 | ||
1175 | vstmia r9, {q15} @ put aside IV | ||
1176 | beq .Lcbc_dec_two | ||
1177 | vld1.8 {q2}, [r0]! | ||
1178 | cmp r2, #4 | ||
1179 | blo .Lcbc_dec_three | ||
1180 | vld1.8 {q3}, [r0]! | ||
1181 | beq .Lcbc_dec_four | ||
1182 | vld1.8 {q4}, [r0]! | ||
1183 | cmp r2, #6 | ||
1184 | blo .Lcbc_dec_five | ||
1185 | vld1.8 {q5}, [r0]! | ||
1186 | beq .Lcbc_dec_six | ||
1187 | vld1.8 {q6}, [r0]! | ||
1188 | sub r0, r0, #0x70 | ||
1189 | |||
1190 | bl _bsaes_decrypt8 | ||
1191 | |||
1192 | vldmia r9, {q14} @ reload IV | ||
1193 | vld1.8 {q8-q9}, [r0]! @ reload input | ||
1194 | veor q0, q0, q14 @ ^= IV | ||
1195 | vld1.8 {q10-q11}, [r0]! | ||
1196 | veor q1, q1, q8 | ||
1197 | veor q6, q6, q9 | ||
1198 | vld1.8 {q12-q13}, [r0]! | ||
1199 | veor q4, q4, q10 | ||
1200 | veor q2, q2, q11 | ||
1201 | vld1.8 {q15}, [r0]! | ||
1202 | veor q7, q7, q12 | ||
1203 | vst1.8 {q0-q1}, [r1]! @ write output | ||
1204 | veor q3, q3, q13 | ||
1205 | vst1.8 {q6}, [r1]! | ||
1206 | vst1.8 {q4}, [r1]! | ||
1207 | vst1.8 {q2}, [r1]! | ||
1208 | vst1.8 {q7}, [r1]! | ||
1209 | vst1.8 {q3}, [r1]! | ||
1210 | b .Lcbc_dec_done | ||
1211 | .align 4 | ||
1212 | .Lcbc_dec_six: | ||
1213 | sub r0, r0, #0x60 | ||
1214 | bl _bsaes_decrypt8 | ||
1215 | vldmia r9,{q14} @ reload IV | ||
1216 | vld1.8 {q8-q9}, [r0]! @ reload input | ||
1217 | veor q0, q0, q14 @ ^= IV | ||
1218 | vld1.8 {q10-q11}, [r0]! | ||
1219 | veor q1, q1, q8 | ||
1220 | veor q6, q6, q9 | ||
1221 | vld1.8 {q12}, [r0]! | ||
1222 | veor q4, q4, q10 | ||
1223 | veor q2, q2, q11 | ||
1224 | vld1.8 {q15}, [r0]! | ||
1225 | veor q7, q7, q12 | ||
1226 | vst1.8 {q0-q1}, [r1]! @ write output | ||
1227 | vst1.8 {q6}, [r1]! | ||
1228 | vst1.8 {q4}, [r1]! | ||
1229 | vst1.8 {q2}, [r1]! | ||
1230 | vst1.8 {q7}, [r1]! | ||
1231 | b .Lcbc_dec_done | ||
1232 | .align 4 | ||
1233 | .Lcbc_dec_five: | ||
1234 | sub r0, r0, #0x50 | ||
1235 | bl _bsaes_decrypt8 | ||
1236 | vldmia r9, {q14} @ reload IV | ||
1237 | vld1.8 {q8-q9}, [r0]! @ reload input | ||
1238 | veor q0, q0, q14 @ ^= IV | ||
1239 | vld1.8 {q10-q11}, [r0]! | ||
1240 | veor q1, q1, q8 | ||
1241 | veor q6, q6, q9 | ||
1242 | vld1.8 {q15}, [r0]! | ||
1243 | veor q4, q4, q10 | ||
1244 | vst1.8 {q0-q1}, [r1]! @ write output | ||
1245 | veor q2, q2, q11 | ||
1246 | vst1.8 {q6}, [r1]! | ||
1247 | vst1.8 {q4}, [r1]! | ||
1248 | vst1.8 {q2}, [r1]! | ||
1249 | b .Lcbc_dec_done | ||
1250 | .align 4 | ||
1251 | .Lcbc_dec_four: | ||
1252 | sub r0, r0, #0x40 | ||
1253 | bl _bsaes_decrypt8 | ||
1254 | vldmia r9, {q14} @ reload IV | ||
1255 | vld1.8 {q8-q9}, [r0]! @ reload input | ||
1256 | veor q0, q0, q14 @ ^= IV | ||
1257 | vld1.8 {q10}, [r0]! | ||
1258 | veor q1, q1, q8 | ||
1259 | veor q6, q6, q9 | ||
1260 | vld1.8 {q15}, [r0]! | ||
1261 | veor q4, q4, q10 | ||
1262 | vst1.8 {q0-q1}, [r1]! @ write output | ||
1263 | vst1.8 {q6}, [r1]! | ||
1264 | vst1.8 {q4}, [r1]! | ||
1265 | b .Lcbc_dec_done | ||
1266 | .align 4 | ||
1267 | .Lcbc_dec_three: | ||
1268 | sub r0, r0, #0x30 | ||
1269 | bl _bsaes_decrypt8 | ||
1270 | vldmia r9, {q14} @ reload IV | ||
1271 | vld1.8 {q8-q9}, [r0]! @ reload input | ||
1272 | veor q0, q0, q14 @ ^= IV | ||
1273 | vld1.8 {q15}, [r0]! | ||
1274 | veor q1, q1, q8 | ||
1275 | veor q6, q6, q9 | ||
1276 | vst1.8 {q0-q1}, [r1]! @ write output | ||
1277 | vst1.8 {q6}, [r1]! | ||
1278 | b .Lcbc_dec_done | ||
1279 | .align 4 | ||
1280 | .Lcbc_dec_two: | ||
1281 | sub r0, r0, #0x20 | ||
1282 | bl _bsaes_decrypt8 | ||
1283 | vldmia r9, {q14} @ reload IV | ||
1284 | vld1.8 {q8}, [r0]! @ reload input | ||
1285 | veor q0, q0, q14 @ ^= IV | ||
1286 | vld1.8 {q15}, [r0]! @ reload input | ||
1287 | veor q1, q1, q8 | ||
1288 | vst1.8 {q0-q1}, [r1]! @ write output | ||
1289 | b .Lcbc_dec_done | ||
1290 | .align 4 | ||
1291 | .Lcbc_dec_one: | ||
1292 | sub r0, r0, #0x10 | ||
1293 | mov r10, r1 @ save original out pointer | ||
1294 | mov r1, r9 @ use the iv scratch space as out buffer | ||
1295 | mov r2, r3 | ||
1296 | vmov q4,q15 @ just in case ensure that IV | ||
1297 | vmov q5,q0 @ and input are preserved | ||
1298 | bl AES_decrypt | ||
1299 | vld1.8 {q0}, [r9,:64] @ load result | ||
1300 | veor q0, q0, q4 @ ^= IV | ||
1301 | vmov q15, q5 @ q5 holds input | ||
1302 | vst1.8 {q0}, [r10] @ write output | ||
1303 | |||
1304 | .Lcbc_dec_done: | ||
1305 | #ifndef BSAES_ASM_EXTENDED_KEY | ||
1306 | vmov.i32 q0, #0 | ||
1307 | vmov.i32 q1, #0 | ||
1308 | .Lcbc_dec_bzero: @ wipe key schedule [if any] | ||
1309 | vstmia sp!, {q0-q1} | ||
1310 | cmp sp, r9 | ||
1311 | bne .Lcbc_dec_bzero | ||
1312 | #endif | ||
1313 | |||
1314 | mov sp, r9 | ||
1315 | add sp, #0x10 @ add sp,r9,#0x10 is no good for thumb | ||
1316 | vst1.8 {q15}, [r8] @ return IV | ||
1317 | VFP_ABI_POP | ||
1318 | ldmia sp!, {r4-r10, pc} | ||
1319 | .size bsaes_cbc_encrypt,.-bsaes_cbc_encrypt | ||
1320 | .extern AES_encrypt | ||
1321 | .global bsaes_ctr32_encrypt_blocks | ||
1322 | .type bsaes_ctr32_encrypt_blocks,%function | ||
1323 | .align 5 | ||
1324 | bsaes_ctr32_encrypt_blocks: | ||
1325 | cmp r2, #8 @ use plain AES for | ||
1326 | blo .Lctr_enc_short @ small sizes | ||
1327 | |||
1328 | mov ip, sp | ||
1329 | stmdb sp!, {r4-r10, lr} | ||
1330 | VFP_ABI_PUSH | ||
1331 | ldr r8, [ip] @ ctr is 1st arg on the stack | ||
1332 | sub sp, sp, #0x10 @ scratch space to carry over the ctr | ||
1333 | mov r9, sp @ save sp | ||
1334 | |||
1335 | ldr r10, [r3, #240] @ get # of rounds | ||
1336 | #ifndef BSAES_ASM_EXTENDED_KEY | ||
1337 | @ allocate the key schedule on the stack | ||
1338 | sub r12, sp, r10, lsl#7 @ 128 bytes per inner round key | ||
1339 | add r12, #96 @ size of bit-sliced key schedule | ||
1340 | |||
1341 | @ populate the key schedule | ||
1342 | mov r4, r3 @ pass key | ||
1343 | mov r5, r10 @ pass # of rounds | ||
1344 | mov sp, r12 @ sp is sp | ||
1345 | bl _bsaes_key_convert | ||
1346 | veor q7,q7,q15 @ fix up last round key | ||
1347 | vstmia r12, {q7} @ save last round key | ||
1348 | |||
1349 | vld1.8 {q0}, [r8] @ load counter | ||
1350 | add r8, r6, #.LREVM0SR-.LM0 @ borrow r8 | ||
1351 | vldmia sp, {q4} @ load round0 key | ||
1352 | #else | ||
1353 | ldr r12, [r3, #244] | ||
1354 | eors r12, #1 | ||
1355 | beq 0f | ||
1356 | |||
1357 | @ populate the key schedule | ||
1358 | str r12, [r3, #244] | ||
1359 | mov r4, r3 @ pass key | ||
1360 | mov r5, r10 @ pass # of rounds | ||
1361 | add r12, r3, #248 @ pass key schedule | ||
1362 | bl _bsaes_key_convert | ||
1363 | veor q7,q7,q15 @ fix up last round key | ||
1364 | vstmia r12, {q7} @ save last round key | ||
1365 | |||
1366 | .align 2 | ||
1367 | 0: add r12, r3, #248 | ||
1368 | vld1.8 {q0}, [r8] @ load counter | ||
1369 | adrl r8, .LREVM0SR @ borrow r8 | ||
1370 | vldmia r12, {q4} @ load round0 key | ||
1371 | sub sp, #0x10 @ place for adjusted round0 key | ||
1372 | #endif | ||
1373 | |||
1374 | vmov.i32 q8,#1 @ compose 1<<96 | ||
1375 | veor q9,q9,q9 | ||
1376 | vrev32.8 q0,q0 | ||
1377 | vext.8 q8,q9,q8,#4 | ||
1378 | vrev32.8 q4,q4 | ||
1379 | vadd.u32 q9,q8,q8 @ compose 2<<96 | ||
1380 | vstmia sp, {q4} @ save adjusted round0 key | ||
1381 | b .Lctr_enc_loop | ||
1382 | |||
1383 | .align 4 | ||
1384 | .Lctr_enc_loop: | ||
1385 | vadd.u32 q10, q8, q9 @ compose 3<<96 | ||
1386 | vadd.u32 q1, q0, q8 @ +1 | ||
1387 | vadd.u32 q2, q0, q9 @ +2 | ||
1388 | vadd.u32 q3, q0, q10 @ +3 | ||
1389 | vadd.u32 q4, q1, q10 | ||
1390 | vadd.u32 q5, q2, q10 | ||
1391 | vadd.u32 q6, q3, q10 | ||
1392 | vadd.u32 q7, q4, q10 | ||
1393 | vadd.u32 q10, q5, q10 @ next counter | ||
1394 | |||
1395 | @ Borrow prologue from _bsaes_encrypt8 to use the opportunity | ||
1396 | @ to flip byte order in 32-bit counter | ||
1397 | |||
1398 | vldmia sp, {q9} @ load round0 key | ||
1399 | #ifndef BSAES_ASM_EXTENDED_KEY | ||
1400 | add r4, sp, #0x10 @ pass next round key | ||
1401 | #else | ||
1402 | add r4, r3, #264 | ||
1403 | #endif | ||
1404 | vldmia r8, {q8} @ .LREVM0SR | ||
1405 | mov r5, r10 @ pass rounds | ||
1406 | vstmia r9, {q10} @ save next counter | ||
1407 | sub r6, r8, #.LREVM0SR-.LSR @ pass constants | ||
1408 | |||
1409 | bl _bsaes_encrypt8_alt | ||
1410 | |||
1411 | subs r2, r2, #8 | ||
1412 | blo .Lctr_enc_loop_done | ||
1413 | |||
1414 | vld1.8 {q8-q9}, [r0]! @ load input | ||
1415 | vld1.8 {q10-q11}, [r0]! | ||
1416 | veor q0, q8 | ||
1417 | veor q1, q9 | ||
1418 | vld1.8 {q12-q13}, [r0]! | ||
1419 | veor q4, q10 | ||
1420 | veor q6, q11 | ||
1421 | vld1.8 {q14-q15}, [r0]! | ||
1422 | veor q3, q12 | ||
1423 | vst1.8 {q0-q1}, [r1]! @ write output | ||
1424 | veor q7, q13 | ||
1425 | veor q2, q14 | ||
1426 | vst1.8 {q4}, [r1]! | ||
1427 | veor q5, q15 | ||
1428 | vst1.8 {q6}, [r1]! | ||
1429 | vmov.i32 q8, #1 @ compose 1<<96 | ||
1430 | vst1.8 {q3}, [r1]! | ||
1431 | veor q9, q9, q9 | ||
1432 | vst1.8 {q7}, [r1]! | ||
1433 | vext.8 q8, q9, q8, #4 | ||
1434 | vst1.8 {q2}, [r1]! | ||
1435 | vadd.u32 q9,q8,q8 @ compose 2<<96 | ||
1436 | vst1.8 {q5}, [r1]! | ||
1437 | vldmia r9, {q0} @ load counter | ||
1438 | |||
1439 | bne .Lctr_enc_loop | ||
1440 | b .Lctr_enc_done | ||
1441 | |||
1442 | .align 4 | ||
1443 | .Lctr_enc_loop_done: | ||
1444 | add r2, r2, #8 | ||
1445 | vld1.8 {q8}, [r0]! @ load input | ||
1446 | veor q0, q8 | ||
1447 | vst1.8 {q0}, [r1]! @ write output | ||
1448 | cmp r2, #2 | ||
1449 | blo .Lctr_enc_done | ||
1450 | vld1.8 {q9}, [r0]! | ||
1451 | veor q1, q9 | ||
1452 | vst1.8 {q1}, [r1]! | ||
1453 | beq .Lctr_enc_done | ||
1454 | vld1.8 {q10}, [r0]! | ||
1455 | veor q4, q10 | ||
1456 | vst1.8 {q4}, [r1]! | ||
1457 | cmp r2, #4 | ||
1458 | blo .Lctr_enc_done | ||
1459 | vld1.8 {q11}, [r0]! | ||
1460 | veor q6, q11 | ||
1461 | vst1.8 {q6}, [r1]! | ||
1462 | beq .Lctr_enc_done | ||
1463 | vld1.8 {q12}, [r0]! | ||
1464 | veor q3, q12 | ||
1465 | vst1.8 {q3}, [r1]! | ||
1466 | cmp r2, #6 | ||
1467 | blo .Lctr_enc_done | ||
1468 | vld1.8 {q13}, [r0]! | ||
1469 | veor q7, q13 | ||
1470 | vst1.8 {q7}, [r1]! | ||
1471 | beq .Lctr_enc_done | ||
1472 | vld1.8 {q14}, [r0] | ||
1473 | veor q2, q14 | ||
1474 | vst1.8 {q2}, [r1]! | ||
1475 | |||
1476 | .Lctr_enc_done: | ||
1477 | vmov.i32 q0, #0 | ||
1478 | vmov.i32 q1, #0 | ||
1479 | #ifndef BSAES_ASM_EXTENDED_KEY | ||
1480 | .Lctr_enc_bzero: @ wipe key schedule [if any] | ||
1481 | vstmia sp!, {q0-q1} | ||
1482 | cmp sp, r9 | ||
1483 | bne .Lctr_enc_bzero | ||
1484 | #else | ||
1485 | vstmia sp, {q0-q1} | ||
1486 | #endif | ||
1487 | |||
1488 | mov sp, r9 | ||
1489 | add sp, #0x10 @ add sp,r9,#0x10 is no good for thumb | ||
1490 | VFP_ABI_POP | ||
1491 | ldmia sp!, {r4-r10, pc} @ return | ||
1492 | |||
1493 | .align 4 | ||
1494 | .Lctr_enc_short: | ||
1495 | ldr ip, [sp] @ ctr pointer is passed on stack | ||
1496 | stmdb sp!, {r4-r8, lr} | ||
1497 | |||
1498 | mov r4, r0 @ copy arguments | ||
1499 | mov r5, r1 | ||
1500 | mov r6, r2 | ||
1501 | mov r7, r3 | ||
1502 | ldr r8, [ip, #12] @ load counter LSW | ||
1503 | vld1.8 {q1}, [ip] @ load whole counter value | ||
1504 | #ifdef __ARMEL__ | ||
1505 | rev r8, r8 | ||
1506 | #endif | ||
1507 | sub sp, sp, #0x10 | ||
1508 | vst1.8 {q1}, [sp,:64] @ copy counter value | ||
1509 | sub sp, sp, #0x10 | ||
1510 | |||
1511 | .Lctr_enc_short_loop: | ||
1512 | add r0, sp, #0x10 @ input counter value | ||
1513 | mov r1, sp @ output on the stack | ||
1514 | mov r2, r7 @ key | ||
1515 | |||
1516 | bl AES_encrypt | ||
1517 | |||
1518 | vld1.8 {q0}, [r4]! @ load input | ||
1519 | vld1.8 {q1}, [sp,:64] @ load encrypted counter | ||
1520 | add r8, r8, #1 | ||
1521 | #ifdef __ARMEL__ | ||
1522 | rev r0, r8 | ||
1523 | str r0, [sp, #0x1c] @ next counter value | ||
1524 | #else | ||
1525 | str r8, [sp, #0x1c] @ next counter value | ||
1526 | #endif | ||
1527 | veor q0,q0,q1 | ||
1528 | vst1.8 {q0}, [r5]! @ store output | ||
1529 | subs r6, r6, #1 | ||
1530 | bne .Lctr_enc_short_loop | ||
1531 | |||
1532 | vmov.i32 q0, #0 | ||
1533 | vmov.i32 q1, #0 | ||
1534 | vstmia sp!, {q0-q1} | ||
1535 | |||
1536 | ldmia sp!, {r4-r8, pc} | ||
1537 | .size bsaes_ctr32_encrypt_blocks,.-bsaes_ctr32_encrypt_blocks | ||
1538 | .globl bsaes_xts_encrypt | ||
1539 | .type bsaes_xts_encrypt,%function | ||
1540 | .align 4 | ||
1541 | bsaes_xts_encrypt: | ||
1542 | mov ip, sp | ||
1543 | stmdb sp!, {r4-r10, lr} @ 0x20 | ||
1544 | VFP_ABI_PUSH | ||
1545 | mov r6, sp @ future r3 | ||
1546 | |||
1547 | mov r7, r0 | ||
1548 | mov r8, r1 | ||
1549 | mov r9, r2 | ||
1550 | mov r10, r3 | ||
1551 | |||
1552 | sub r0, sp, #0x10 @ 0x10 | ||
1553 | bic r0, #0xf @ align at 16 bytes | ||
1554 | mov sp, r0 | ||
1555 | |||
1556 | #ifdef XTS_CHAIN_TWEAK | ||
1557 | ldr r0, [ip] @ pointer to input tweak | ||
1558 | #else | ||
1559 | @ generate initial tweak | ||
1560 | ldr r0, [ip, #4] @ iv[] | ||
1561 | mov r1, sp | ||
1562 | ldr r2, [ip, #0] @ key2 | ||
1563 | bl AES_encrypt | ||
1564 | mov r0,sp @ pointer to initial tweak | ||
1565 | #endif | ||
1566 | |||
1567 | ldr r1, [r10, #240] @ get # of rounds | ||
1568 | mov r3, r6 | ||
1569 | #ifndef BSAES_ASM_EXTENDED_KEY | ||
1570 | @ allocate the key schedule on the stack | ||
1571 | sub r12, sp, r1, lsl#7 @ 128 bytes per inner round key | ||
1572 | @ add r12, #96 @ size of bit-sliced key schedule | ||
1573 | sub r12, #48 @ place for tweak[9] | ||
1574 | |||
1575 | @ populate the key schedule | ||
1576 | mov r4, r10 @ pass key | ||
1577 | mov r5, r1 @ pass # of rounds | ||
1578 | mov sp, r12 | ||
1579 | add r12, #0x90 @ pass key schedule | ||
1580 | bl _bsaes_key_convert | ||
1581 | veor q7, q7, q15 @ fix up last round key | ||
1582 | vstmia r12, {q7} @ save last round key | ||
1583 | #else | ||
1584 | ldr r12, [r10, #244] | ||
1585 | eors r12, #1 | ||
1586 | beq 0f | ||
1587 | |||
1588 | str r12, [r10, #244] | ||
1589 | mov r4, r10 @ pass key | ||
1590 | mov r5, r1 @ pass # of rounds | ||
1591 | add r12, r10, #248 @ pass key schedule | ||
1592 | bl _bsaes_key_convert | ||
1593 | veor q7, q7, q15 @ fix up last round key | ||
1594 | vstmia r12, {q7} | ||
1595 | |||
1596 | .align 2 | ||
1597 | 0: sub sp, #0x90 @ place for tweak[9] | ||
1598 | #endif | ||
1599 | |||
1600 | vld1.8 {q8}, [r0] @ initial tweak | ||
1601 | adr r2, .Lxts_magic | ||
1602 | |||
1603 | subs r9, #0x80 | ||
1604 | blo .Lxts_enc_short | ||
1605 | b .Lxts_enc_loop | ||
1606 | |||
1607 | .align 4 | ||
1608 | .Lxts_enc_loop: | ||
1609 | vldmia r2, {q5} @ load XTS magic | ||
1610 | vshr.s64 q6, q8, #63 | ||
1611 | mov r0, sp | ||
1612 | vand q6, q6, q5 | ||
1613 | vadd.u64 q9, q8, q8 | ||
1614 | vst1.64 {q8}, [r0,:128]! | ||
1615 | vswp d13,d12 | ||
1616 | vshr.s64 q7, q9, #63 | ||
1617 | veor q9, q9, q6 | ||
1618 | vand q7, q7, q5 | ||
1619 | vadd.u64 q10, q9, q9 | ||
1620 | vst1.64 {q9}, [r0,:128]! | ||
1621 | vswp d15,d14 | ||
1622 | vshr.s64 q6, q10, #63 | ||
1623 | veor q10, q10, q7 | ||
1624 | vand q6, q6, q5 | ||
1625 | vld1.8 {q0}, [r7]! | ||
1626 | vadd.u64 q11, q10, q10 | ||
1627 | vst1.64 {q10}, [r0,:128]! | ||
1628 | vswp d13,d12 | ||
1629 | vshr.s64 q7, q11, #63 | ||
1630 | veor q11, q11, q6 | ||
1631 | vand q7, q7, q5 | ||
1632 | vld1.8 {q1}, [r7]! | ||
1633 | veor q0, q0, q8 | ||
1634 | vadd.u64 q12, q11, q11 | ||
1635 | vst1.64 {q11}, [r0,:128]! | ||
1636 | vswp d15,d14 | ||
1637 | vshr.s64 q6, q12, #63 | ||
1638 | veor q12, q12, q7 | ||
1639 | vand q6, q6, q5 | ||
1640 | vld1.8 {q2}, [r7]! | ||
1641 | veor q1, q1, q9 | ||
1642 | vadd.u64 q13, q12, q12 | ||
1643 | vst1.64 {q12}, [r0,:128]! | ||
1644 | vswp d13,d12 | ||
1645 | vshr.s64 q7, q13, #63 | ||
1646 | veor q13, q13, q6 | ||
1647 | vand q7, q7, q5 | ||
1648 | vld1.8 {q3}, [r7]! | ||
1649 | veor q2, q2, q10 | ||
1650 | vadd.u64 q14, q13, q13 | ||
1651 | vst1.64 {q13}, [r0,:128]! | ||
1652 | vswp d15,d14 | ||
1653 | vshr.s64 q6, q14, #63 | ||
1654 | veor q14, q14, q7 | ||
1655 | vand q6, q6, q5 | ||
1656 | vld1.8 {q4}, [r7]! | ||
1657 | veor q3, q3, q11 | ||
1658 | vadd.u64 q15, q14, q14 | ||
1659 | vst1.64 {q14}, [r0,:128]! | ||
1660 | vswp d13,d12 | ||
1661 | vshr.s64 q7, q15, #63 | ||
1662 | veor q15, q15, q6 | ||
1663 | vand q7, q7, q5 | ||
1664 | vld1.8 {q5}, [r7]! | ||
1665 | veor q4, q4, q12 | ||
1666 | vadd.u64 q8, q15, q15 | ||
1667 | vst1.64 {q15}, [r0,:128]! | ||
1668 | vswp d15,d14 | ||
1669 | veor q8, q8, q7 | ||
1670 | vst1.64 {q8}, [r0,:128] @ next round tweak | ||
1671 | |||
1672 | vld1.8 {q6-q7}, [r7]! | ||
1673 | veor q5, q5, q13 | ||
1674 | #ifndef BSAES_ASM_EXTENDED_KEY | ||
1675 | add r4, sp, #0x90 @ pass key schedule | ||
1676 | #else | ||
1677 | add r4, r10, #248 @ pass key schedule | ||
1678 | #endif | ||
1679 | veor q6, q6, q14 | ||
1680 | mov r5, r1 @ pass rounds | ||
1681 | veor q7, q7, q15 | ||
1682 | mov r0, sp | ||
1683 | |||
1684 | bl _bsaes_encrypt8 | ||
1685 | |||
1686 | vld1.64 {q8-q9}, [r0,:128]! | ||
1687 | vld1.64 {q10-q11}, [r0,:128]! | ||
1688 | veor q0, q0, q8 | ||
1689 | vld1.64 {q12-q13}, [r0,:128]! | ||
1690 | veor q1, q1, q9 | ||
1691 | veor q8, q4, q10 | ||
1692 | vst1.8 {q0-q1}, [r8]! | ||
1693 | veor q9, q6, q11 | ||
1694 | vld1.64 {q14-q15}, [r0,:128]! | ||
1695 | veor q10, q3, q12 | ||
1696 | vst1.8 {q8-q9}, [r8]! | ||
1697 | veor q11, q7, q13 | ||
1698 | veor q12, q2, q14 | ||
1699 | vst1.8 {q10-q11}, [r8]! | ||
1700 | veor q13, q5, q15 | ||
1701 | vst1.8 {q12-q13}, [r8]! | ||
1702 | |||
1703 | vld1.64 {q8}, [r0,:128] @ next round tweak | ||
1704 | |||
1705 | subs r9, #0x80 | ||
1706 | bpl .Lxts_enc_loop | ||
1707 | |||
1708 | .Lxts_enc_short: | ||
1709 | adds r9, #0x70 | ||
1710 | bmi .Lxts_enc_done | ||
1711 | |||
1712 | vldmia r2, {q5} @ load XTS magic | ||
1713 | vshr.s64 q7, q8, #63 | ||
1714 | mov r0, sp | ||
1715 | vand q7, q7, q5 | ||
1716 | vadd.u64 q9, q8, q8 | ||
1717 | vst1.64 {q8}, [r0,:128]! | ||
1718 | vswp d15,d14 | ||
1719 | vshr.s64 q6, q9, #63 | ||
1720 | veor q9, q9, q7 | ||
1721 | vand q6, q6, q5 | ||
1722 | vadd.u64 q10, q9, q9 | ||
1723 | vst1.64 {q9}, [r0,:128]! | ||
1724 | vswp d13,d12 | ||
1725 | vshr.s64 q7, q10, #63 | ||
1726 | veor q10, q10, q6 | ||
1727 | vand q7, q7, q5 | ||
1728 | vld1.8 {q0}, [r7]! | ||
1729 | subs r9, #0x10 | ||
1730 | bmi .Lxts_enc_1 | ||
1731 | vadd.u64 q11, q10, q10 | ||
1732 | vst1.64 {q10}, [r0,:128]! | ||
1733 | vswp d15,d14 | ||
1734 | vshr.s64 q6, q11, #63 | ||
1735 | veor q11, q11, q7 | ||
1736 | vand q6, q6, q5 | ||
1737 | vld1.8 {q1}, [r7]! | ||
1738 | subs r9, #0x10 | ||
1739 | bmi .Lxts_enc_2 | ||
1740 | veor q0, q0, q8 | ||
1741 | vadd.u64 q12, q11, q11 | ||
1742 | vst1.64 {q11}, [r0,:128]! | ||
1743 | vswp d13,d12 | ||
1744 | vshr.s64 q7, q12, #63 | ||
1745 | veor q12, q12, q6 | ||
1746 | vand q7, q7, q5 | ||
1747 | vld1.8 {q2}, [r7]! | ||
1748 | subs r9, #0x10 | ||
1749 | bmi .Lxts_enc_3 | ||
1750 | veor q1, q1, q9 | ||
1751 | vadd.u64 q13, q12, q12 | ||
1752 | vst1.64 {q12}, [r0,:128]! | ||
1753 | vswp d15,d14 | ||
1754 | vshr.s64 q6, q13, #63 | ||
1755 | veor q13, q13, q7 | ||
1756 | vand q6, q6, q5 | ||
1757 | vld1.8 {q3}, [r7]! | ||
1758 | subs r9, #0x10 | ||
1759 | bmi .Lxts_enc_4 | ||
1760 | veor q2, q2, q10 | ||
1761 | vadd.u64 q14, q13, q13 | ||
1762 | vst1.64 {q13}, [r0,:128]! | ||
1763 | vswp d13,d12 | ||
1764 | vshr.s64 q7, q14, #63 | ||
1765 | veor q14, q14, q6 | ||
1766 | vand q7, q7, q5 | ||
1767 | vld1.8 {q4}, [r7]! | ||
1768 | subs r9, #0x10 | ||
1769 | bmi .Lxts_enc_5 | ||
1770 | veor q3, q3, q11 | ||
1771 | vadd.u64 q15, q14, q14 | ||
1772 | vst1.64 {q14}, [r0,:128]! | ||
1773 | vswp d15,d14 | ||
1774 | vshr.s64 q6, q15, #63 | ||
1775 | veor q15, q15, q7 | ||
1776 | vand q6, q6, q5 | ||
1777 | vld1.8 {q5}, [r7]! | ||
1778 | subs r9, #0x10 | ||
1779 | bmi .Lxts_enc_6 | ||
1780 | veor q4, q4, q12 | ||
1781 | sub r9, #0x10 | ||
1782 | vst1.64 {q15}, [r0,:128] @ next round tweak | ||
1783 | |||
1784 | vld1.8 {q6}, [r7]! | ||
1785 | veor q5, q5, q13 | ||
1786 | #ifndef BSAES_ASM_EXTENDED_KEY | ||
1787 | add r4, sp, #0x90 @ pass key schedule | ||
1788 | #else | ||
1789 | add r4, r10, #248 @ pass key schedule | ||
1790 | #endif | ||
1791 | veor q6, q6, q14 | ||
1792 | mov r5, r1 @ pass rounds | ||
1793 | mov r0, sp | ||
1794 | |||
1795 | bl _bsaes_encrypt8 | ||
1796 | |||
1797 | vld1.64 {q8-q9}, [r0,:128]! | ||
1798 | vld1.64 {q10-q11}, [r0,:128]! | ||
1799 | veor q0, q0, q8 | ||
1800 | vld1.64 {q12-q13}, [r0,:128]! | ||
1801 | veor q1, q1, q9 | ||
1802 | veor q8, q4, q10 | ||
1803 | vst1.8 {q0-q1}, [r8]! | ||
1804 | veor q9, q6, q11 | ||
1805 | vld1.64 {q14}, [r0,:128]! | ||
1806 | veor q10, q3, q12 | ||
1807 | vst1.8 {q8-q9}, [r8]! | ||
1808 | veor q11, q7, q13 | ||
1809 | veor q12, q2, q14 | ||
1810 | vst1.8 {q10-q11}, [r8]! | ||
1811 | vst1.8 {q12}, [r8]! | ||
1812 | |||
1813 | vld1.64 {q8}, [r0,:128] @ next round tweak | ||
1814 | b .Lxts_enc_done | ||
1815 | .align 4 | ||
1816 | .Lxts_enc_6: | ||
1817 | vst1.64 {q14}, [r0,:128] @ next round tweak | ||
1818 | |||
1819 | veor q4, q4, q12 | ||
1820 | #ifndef BSAES_ASM_EXTENDED_KEY | ||
1821 | add r4, sp, #0x90 @ pass key schedule | ||
1822 | #else | ||
1823 | add r4, r10, #248 @ pass key schedule | ||
1824 | #endif | ||
1825 | veor q5, q5, q13 | ||
1826 | mov r5, r1 @ pass rounds | ||
1827 | mov r0, sp | ||
1828 | |||
1829 | bl _bsaes_encrypt8 | ||
1830 | |||
1831 | vld1.64 {q8-q9}, [r0,:128]! | ||
1832 | vld1.64 {q10-q11}, [r0,:128]! | ||
1833 | veor q0, q0, q8 | ||
1834 | vld1.64 {q12-q13}, [r0,:128]! | ||
1835 | veor q1, q1, q9 | ||
1836 | veor q8, q4, q10 | ||
1837 | vst1.8 {q0-q1}, [r8]! | ||
1838 | veor q9, q6, q11 | ||
1839 | veor q10, q3, q12 | ||
1840 | vst1.8 {q8-q9}, [r8]! | ||
1841 | veor q11, q7, q13 | ||
1842 | vst1.8 {q10-q11}, [r8]! | ||
1843 | |||
1844 | vld1.64 {q8}, [r0,:128] @ next round tweak | ||
1845 | b .Lxts_enc_done | ||
1846 | |||
1847 | @ put this in range for both ARM and Thumb mode adr instructions | ||
1848 | .align 5 | ||
1849 | .Lxts_magic: | ||
1850 | .quad 1, 0x87 | ||
1851 | |||
1852 | .align 5 | ||
1853 | .Lxts_enc_5: | ||
1854 | vst1.64 {q13}, [r0,:128] @ next round tweak | ||
1855 | |||
1856 | veor q3, q3, q11 | ||
1857 | #ifndef BSAES_ASM_EXTENDED_KEY | ||
1858 | add r4, sp, #0x90 @ pass key schedule | ||
1859 | #else | ||
1860 | add r4, r10, #248 @ pass key schedule | ||
1861 | #endif | ||
1862 | veor q4, q4, q12 | ||
1863 | mov r5, r1 @ pass rounds | ||
1864 | mov r0, sp | ||
1865 | |||
1866 | bl _bsaes_encrypt8 | ||
1867 | |||
1868 | vld1.64 {q8-q9}, [r0,:128]! | ||
1869 | vld1.64 {q10-q11}, [r0,:128]! | ||
1870 | veor q0, q0, q8 | ||
1871 | vld1.64 {q12}, [r0,:128]! | ||
1872 | veor q1, q1, q9 | ||
1873 | veor q8, q4, q10 | ||
1874 | vst1.8 {q0-q1}, [r8]! | ||
1875 | veor q9, q6, q11 | ||
1876 | veor q10, q3, q12 | ||
1877 | vst1.8 {q8-q9}, [r8]! | ||
1878 | vst1.8 {q10}, [r8]! | ||
1879 | |||
1880 | vld1.64 {q8}, [r0,:128] @ next round tweak | ||
1881 | b .Lxts_enc_done | ||
1882 | .align 4 | ||
1883 | .Lxts_enc_4: | ||
1884 | vst1.64 {q12}, [r0,:128] @ next round tweak | ||
1885 | |||
1886 | veor q2, q2, q10 | ||
1887 | #ifndef BSAES_ASM_EXTENDED_KEY | ||
1888 | add r4, sp, #0x90 @ pass key schedule | ||
1889 | #else | ||
1890 | add r4, r10, #248 @ pass key schedule | ||
1891 | #endif | ||
1892 | veor q3, q3, q11 | ||
1893 | mov r5, r1 @ pass rounds | ||
1894 | mov r0, sp | ||
1895 | |||
1896 | bl _bsaes_encrypt8 | ||
1897 | |||
1898 | vld1.64 {q8-q9}, [r0,:128]! | ||
1899 | vld1.64 {q10-q11}, [r0,:128]! | ||
1900 | veor q0, q0, q8 | ||
1901 | veor q1, q1, q9 | ||
1902 | veor q8, q4, q10 | ||
1903 | vst1.8 {q0-q1}, [r8]! | ||
1904 | veor q9, q6, q11 | ||
1905 | vst1.8 {q8-q9}, [r8]! | ||
1906 | |||
1907 | vld1.64 {q8}, [r0,:128] @ next round tweak | ||
1908 | b .Lxts_enc_done | ||
1909 | .align 4 | ||
1910 | .Lxts_enc_3: | ||
1911 | vst1.64 {q11}, [r0,:128] @ next round tweak | ||
1912 | |||
1913 | veor q1, q1, q9 | ||
1914 | #ifndef BSAES_ASM_EXTENDED_KEY | ||
1915 | add r4, sp, #0x90 @ pass key schedule | ||
1916 | #else | ||
1917 | add r4, r10, #248 @ pass key schedule | ||
1918 | #endif | ||
1919 | veor q2, q2, q10 | ||
1920 | mov r5, r1 @ pass rounds | ||
1921 | mov r0, sp | ||
1922 | |||
1923 | bl _bsaes_encrypt8 | ||
1924 | |||
1925 | vld1.64 {q8-q9}, [r0,:128]! | ||
1926 | vld1.64 {q10}, [r0,:128]! | ||
1927 | veor q0, q0, q8 | ||
1928 | veor q1, q1, q9 | ||
1929 | veor q8, q4, q10 | ||
1930 | vst1.8 {q0-q1}, [r8]! | ||
1931 | vst1.8 {q8}, [r8]! | ||
1932 | |||
1933 | vld1.64 {q8}, [r0,:128] @ next round tweak | ||
1934 | b .Lxts_enc_done | ||
1935 | .align 4 | ||
1936 | .Lxts_enc_2: | ||
1937 | vst1.64 {q10}, [r0,:128] @ next round tweak | ||
1938 | |||
1939 | veor q0, q0, q8 | ||
1940 | #ifndef BSAES_ASM_EXTENDED_KEY | ||
1941 | add r4, sp, #0x90 @ pass key schedule | ||
1942 | #else | ||
1943 | add r4, r10, #248 @ pass key schedule | ||
1944 | #endif | ||
1945 | veor q1, q1, q9 | ||
1946 | mov r5, r1 @ pass rounds | ||
1947 | mov r0, sp | ||
1948 | |||
1949 | bl _bsaes_encrypt8 | ||
1950 | |||
1951 | vld1.64 {q8-q9}, [r0,:128]! | ||
1952 | veor q0, q0, q8 | ||
1953 | veor q1, q1, q9 | ||
1954 | vst1.8 {q0-q1}, [r8]! | ||
1955 | |||
1956 | vld1.64 {q8}, [r0,:128] @ next round tweak | ||
1957 | b .Lxts_enc_done | ||
1958 | .align 4 | ||
1959 | .Lxts_enc_1: | ||
1960 | mov r0, sp | ||
1961 | veor q0, q8 | ||
1962 | mov r1, sp | ||
1963 | vst1.8 {q0}, [sp,:128] | ||
1964 | mov r2, r10 | ||
1965 | mov r4, r3 @ preserve fp | ||
1966 | |||
1967 | bl AES_encrypt | ||
1968 | |||
1969 | vld1.8 {q0}, [sp,:128] | ||
1970 | veor q0, q0, q8 | ||
1971 | vst1.8 {q0}, [r8]! | ||
1972 | mov r3, r4 | ||
1973 | |||
1974 | vmov q8, q9 @ next round tweak | ||
1975 | |||
1976 | .Lxts_enc_done: | ||
1977 | #ifndef XTS_CHAIN_TWEAK | ||
1978 | adds r9, #0x10 | ||
1979 | beq .Lxts_enc_ret | ||
1980 | sub r6, r8, #0x10 | ||
1981 | |||
1982 | .Lxts_enc_steal: | ||
1983 | ldrb r0, [r7], #1 | ||
1984 | ldrb r1, [r8, #-0x10] | ||
1985 | strb r0, [r8, #-0x10] | ||
1986 | strb r1, [r8], #1 | ||
1987 | |||
1988 | subs r9, #1 | ||
1989 | bhi .Lxts_enc_steal | ||
1990 | |||
1991 | vld1.8 {q0}, [r6] | ||
1992 | mov r0, sp | ||
1993 | veor q0, q0, q8 | ||
1994 | mov r1, sp | ||
1995 | vst1.8 {q0}, [sp,:128] | ||
1996 | mov r2, r10 | ||
1997 | mov r4, r3 @ preserve fp | ||
1998 | |||
1999 | bl AES_encrypt | ||
2000 | |||
2001 | vld1.8 {q0}, [sp,:128] | ||
2002 | veor q0, q0, q8 | ||
2003 | vst1.8 {q0}, [r6] | ||
2004 | mov r3, r4 | ||
2005 | #endif | ||
2006 | |||
2007 | .Lxts_enc_ret: | ||
2008 | bic r0, r3, #0xf | ||
2009 | vmov.i32 q0, #0 | ||
2010 | vmov.i32 q1, #0 | ||
2011 | #ifdef XTS_CHAIN_TWEAK | ||
2012 | ldr r1, [r3, #0x20+VFP_ABI_FRAME] @ chain tweak | ||
2013 | #endif | ||
2014 | .Lxts_enc_bzero: @ wipe key schedule [if any] | ||
2015 | vstmia sp!, {q0-q1} | ||
2016 | cmp sp, r0 | ||
2017 | bne .Lxts_enc_bzero | ||
2018 | |||
2019 | mov sp, r3 | ||
2020 | #ifdef XTS_CHAIN_TWEAK | ||
2021 | vst1.8 {q8}, [r1] | ||
2022 | #endif | ||
2023 | VFP_ABI_POP | ||
2024 | ldmia sp!, {r4-r10, pc} @ return | ||
2025 | |||
2026 | .size bsaes_xts_encrypt,.-bsaes_xts_encrypt | ||
2027 | |||
2028 | .globl bsaes_xts_decrypt | ||
2029 | .type bsaes_xts_decrypt,%function | ||
2030 | .align 4 | ||
2031 | bsaes_xts_decrypt: | ||
2032 | mov ip, sp | ||
2033 | stmdb sp!, {r4-r10, lr} @ 0x20 | ||
2034 | VFP_ABI_PUSH | ||
2035 | mov r6, sp @ future r3 | ||
2036 | |||
2037 | mov r7, r0 | ||
2038 | mov r8, r1 | ||
2039 | mov r9, r2 | ||
2040 | mov r10, r3 | ||
2041 | |||
2042 | sub r0, sp, #0x10 @ 0x10 | ||
2043 | bic r0, #0xf @ align at 16 bytes | ||
2044 | mov sp, r0 | ||
2045 | |||
2046 | #ifdef XTS_CHAIN_TWEAK | ||
2047 | ldr r0, [ip] @ pointer to input tweak | ||
2048 | #else | ||
2049 | @ generate initial tweak | ||
2050 | ldr r0, [ip, #4] @ iv[] | ||
2051 | mov r1, sp | ||
2052 | ldr r2, [ip, #0] @ key2 | ||
2053 | bl AES_encrypt | ||
2054 | mov r0, sp @ pointer to initial tweak | ||
2055 | #endif | ||
2056 | |||
2057 | ldr r1, [r10, #240] @ get # of rounds | ||
2058 | mov r3, r6 | ||
2059 | #ifndef BSAES_ASM_EXTENDED_KEY | ||
2060 | @ allocate the key schedule on the stack | ||
2061 | sub r12, sp, r1, lsl#7 @ 128 bytes per inner round key | ||
2062 | @ add r12, #96 @ size of bit-sliced key schedule | ||
2063 | sub r12, #48 @ place for tweak[9] | ||
2064 | |||
2065 | @ populate the key schedule | ||
2066 | mov r4, r10 @ pass key | ||
2067 | mov r5, r1 @ pass # of rounds | ||
2068 | mov sp, r12 | ||
2069 | add r12, #0x90 @ pass key schedule | ||
2070 | bl _bsaes_key_convert | ||
2071 | add r4, sp, #0x90 | ||
2072 | vldmia r4, {q6} | ||
2073 | vstmia r12, {q15} @ save last round key | ||
2074 | veor q7, q7, q6 @ fix up round 0 key | ||
2075 | vstmia r4, {q7} | ||
2076 | #else | ||
2077 | ldr r12, [r10, #244] | ||
2078 | eors r12, #1 | ||
2079 | beq 0f | ||
2080 | |||
2081 | str r12, [r10, #244] | ||
2082 | mov r4, r10 @ pass key | ||
2083 | mov r5, r1 @ pass # of rounds | ||
2084 | add r12, r10, #248 @ pass key schedule | ||
2085 | bl _bsaes_key_convert | ||
2086 | add r4, r10, #248 | ||
2087 | vldmia r4, {q6} | ||
2088 | vstmia r12, {q15} @ save last round key | ||
2089 | veor q7, q7, q6 @ fix up round 0 key | ||
2090 | vstmia r4, {q7} | ||
2091 | |||
2092 | .align 2 | ||
2093 | 0: sub sp, #0x90 @ place for tweak[9] | ||
2094 | #endif | ||
2095 | vld1.8 {q8}, [r0] @ initial tweak | ||
2096 | adr r2, .Lxts_magic | ||
2097 | |||
2098 | tst r9, #0xf @ if not multiple of 16 | ||
2099 | it ne @ Thumb2 thing, sanity check in ARM | ||
2100 | subne r9, #0x10 @ subtract another 16 bytes | ||
2101 | subs r9, #0x80 | ||
2102 | |||
2103 | blo .Lxts_dec_short | ||
2104 | b .Lxts_dec_loop | ||
2105 | |||
2106 | .align 4 | ||
2107 | .Lxts_dec_loop: | ||
2108 | vldmia r2, {q5} @ load XTS magic | ||
2109 | vshr.s64 q6, q8, #63 | ||
2110 | mov r0, sp | ||
2111 | vand q6, q6, q5 | ||
2112 | vadd.u64 q9, q8, q8 | ||
2113 | vst1.64 {q8}, [r0,:128]! | ||
2114 | vswp d13,d12 | ||
2115 | vshr.s64 q7, q9, #63 | ||
2116 | veor q9, q9, q6 | ||
2117 | vand q7, q7, q5 | ||
2118 | vadd.u64 q10, q9, q9 | ||
2119 | vst1.64 {q9}, [r0,:128]! | ||
2120 | vswp d15,d14 | ||
2121 | vshr.s64 q6, q10, #63 | ||
2122 | veor q10, q10, q7 | ||
2123 | vand q6, q6, q5 | ||
2124 | vld1.8 {q0}, [r7]! | ||
2125 | vadd.u64 q11, q10, q10 | ||
2126 | vst1.64 {q10}, [r0,:128]! | ||
2127 | vswp d13,d12 | ||
2128 | vshr.s64 q7, q11, #63 | ||
2129 | veor q11, q11, q6 | ||
2130 | vand q7, q7, q5 | ||
2131 | vld1.8 {q1}, [r7]! | ||
2132 | veor q0, q0, q8 | ||
2133 | vadd.u64 q12, q11, q11 | ||
2134 | vst1.64 {q11}, [r0,:128]! | ||
2135 | vswp d15,d14 | ||
2136 | vshr.s64 q6, q12, #63 | ||
2137 | veor q12, q12, q7 | ||
2138 | vand q6, q6, q5 | ||
2139 | vld1.8 {q2}, [r7]! | ||
2140 | veor q1, q1, q9 | ||
2141 | vadd.u64 q13, q12, q12 | ||
2142 | vst1.64 {q12}, [r0,:128]! | ||
2143 | vswp d13,d12 | ||
2144 | vshr.s64 q7, q13, #63 | ||
2145 | veor q13, q13, q6 | ||
2146 | vand q7, q7, q5 | ||
2147 | vld1.8 {q3}, [r7]! | ||
2148 | veor q2, q2, q10 | ||
2149 | vadd.u64 q14, q13, q13 | ||
2150 | vst1.64 {q13}, [r0,:128]! | ||
2151 | vswp d15,d14 | ||
2152 | vshr.s64 q6, q14, #63 | ||
2153 | veor q14, q14, q7 | ||
2154 | vand q6, q6, q5 | ||
2155 | vld1.8 {q4}, [r7]! | ||
2156 | veor q3, q3, q11 | ||
2157 | vadd.u64 q15, q14, q14 | ||
2158 | vst1.64 {q14}, [r0,:128]! | ||
2159 | vswp d13,d12 | ||
2160 | vshr.s64 q7, q15, #63 | ||
2161 | veor q15, q15, q6 | ||
2162 | vand q7, q7, q5 | ||
2163 | vld1.8 {q5}, [r7]! | ||
2164 | veor q4, q4, q12 | ||
2165 | vadd.u64 q8, q15, q15 | ||
2166 | vst1.64 {q15}, [r0,:128]! | ||
2167 | vswp d15,d14 | ||
2168 | veor q8, q8, q7 | ||
2169 | vst1.64 {q8}, [r0,:128] @ next round tweak | ||
2170 | |||
2171 | vld1.8 {q6-q7}, [r7]! | ||
2172 | veor q5, q5, q13 | ||
2173 | #ifndef BSAES_ASM_EXTENDED_KEY | ||
2174 | add r4, sp, #0x90 @ pass key schedule | ||
2175 | #else | ||
2176 | add r4, r10, #248 @ pass key schedule | ||
2177 | #endif | ||
2178 | veor q6, q6, q14 | ||
2179 | mov r5, r1 @ pass rounds | ||
2180 | veor q7, q7, q15 | ||
2181 | mov r0, sp | ||
2182 | |||
2183 | bl _bsaes_decrypt8 | ||
2184 | |||
2185 | vld1.64 {q8-q9}, [r0,:128]! | ||
2186 | vld1.64 {q10-q11}, [r0,:128]! | ||
2187 | veor q0, q0, q8 | ||
2188 | vld1.64 {q12-q13}, [r0,:128]! | ||
2189 | veor q1, q1, q9 | ||
2190 | veor q8, q6, q10 | ||
2191 | vst1.8 {q0-q1}, [r8]! | ||
2192 | veor q9, q4, q11 | ||
2193 | vld1.64 {q14-q15}, [r0,:128]! | ||
2194 | veor q10, q2, q12 | ||
2195 | vst1.8 {q8-q9}, [r8]! | ||
2196 | veor q11, q7, q13 | ||
2197 | veor q12, q3, q14 | ||
2198 | vst1.8 {q10-q11}, [r8]! | ||
2199 | veor q13, q5, q15 | ||
2200 | vst1.8 {q12-q13}, [r8]! | ||
2201 | |||
2202 | vld1.64 {q8}, [r0,:128] @ next round tweak | ||
2203 | |||
2204 | subs r9, #0x80 | ||
2205 | bpl .Lxts_dec_loop | ||
2206 | |||
2207 | .Lxts_dec_short: | ||
2208 | adds r9, #0x70 | ||
2209 | bmi .Lxts_dec_done | ||
2210 | |||
2211 | vldmia r2, {q5} @ load XTS magic | ||
2212 | vshr.s64 q7, q8, #63 | ||
2213 | mov r0, sp | ||
2214 | vand q7, q7, q5 | ||
2215 | vadd.u64 q9, q8, q8 | ||
2216 | vst1.64 {q8}, [r0,:128]! | ||
2217 | vswp d15,d14 | ||
2218 | vshr.s64 q6, q9, #63 | ||
2219 | veor q9, q9, q7 | ||
2220 | vand q6, q6, q5 | ||
2221 | vadd.u64 q10, q9, q9 | ||
2222 | vst1.64 {q9}, [r0,:128]! | ||
2223 | vswp d13,d12 | ||
2224 | vshr.s64 q7, q10, #63 | ||
2225 | veor q10, q10, q6 | ||
2226 | vand q7, q7, q5 | ||
2227 | vld1.8 {q0}, [r7]! | ||
2228 | subs r9, #0x10 | ||
2229 | bmi .Lxts_dec_1 | ||
2230 | vadd.u64 q11, q10, q10 | ||
2231 | vst1.64 {q10}, [r0,:128]! | ||
2232 | vswp d15,d14 | ||
2233 | vshr.s64 q6, q11, #63 | ||
2234 | veor q11, q11, q7 | ||
2235 | vand q6, q6, q5 | ||
2236 | vld1.8 {q1}, [r7]! | ||
2237 | subs r9, #0x10 | ||
2238 | bmi .Lxts_dec_2 | ||
2239 | veor q0, q0, q8 | ||
2240 | vadd.u64 q12, q11, q11 | ||
2241 | vst1.64 {q11}, [r0,:128]! | ||
2242 | vswp d13,d12 | ||
2243 | vshr.s64 q7, q12, #63 | ||
2244 | veor q12, q12, q6 | ||
2245 | vand q7, q7, q5 | ||
2246 | vld1.8 {q2}, [r7]! | ||
2247 | subs r9, #0x10 | ||
2248 | bmi .Lxts_dec_3 | ||
2249 | veor q1, q1, q9 | ||
2250 | vadd.u64 q13, q12, q12 | ||
2251 | vst1.64 {q12}, [r0,:128]! | ||
2252 | vswp d15,d14 | ||
2253 | vshr.s64 q6, q13, #63 | ||
2254 | veor q13, q13, q7 | ||
2255 | vand q6, q6, q5 | ||
2256 | vld1.8 {q3}, [r7]! | ||
2257 | subs r9, #0x10 | ||
2258 | bmi .Lxts_dec_4 | ||
2259 | veor q2, q2, q10 | ||
2260 | vadd.u64 q14, q13, q13 | ||
2261 | vst1.64 {q13}, [r0,:128]! | ||
2262 | vswp d13,d12 | ||
2263 | vshr.s64 q7, q14, #63 | ||
2264 | veor q14, q14, q6 | ||
2265 | vand q7, q7, q5 | ||
2266 | vld1.8 {q4}, [r7]! | ||
2267 | subs r9, #0x10 | ||
2268 | bmi .Lxts_dec_5 | ||
2269 | veor q3, q3, q11 | ||
2270 | vadd.u64 q15, q14, q14 | ||
2271 | vst1.64 {q14}, [r0,:128]! | ||
2272 | vswp d15,d14 | ||
2273 | vshr.s64 q6, q15, #63 | ||
2274 | veor q15, q15, q7 | ||
2275 | vand q6, q6, q5 | ||
2276 | vld1.8 {q5}, [r7]! | ||
2277 | subs r9, #0x10 | ||
2278 | bmi .Lxts_dec_6 | ||
2279 | veor q4, q4, q12 | ||
2280 | sub r9, #0x10 | ||
2281 | vst1.64 {q15}, [r0,:128] @ next round tweak | ||
2282 | |||
2283 | vld1.8 {q6}, [r7]! | ||
2284 | veor q5, q5, q13 | ||
2285 | #ifndef BSAES_ASM_EXTENDED_KEY | ||
2286 | add r4, sp, #0x90 @ pass key schedule | ||
2287 | #else | ||
2288 | add r4, r10, #248 @ pass key schedule | ||
2289 | #endif | ||
2290 | veor q6, q6, q14 | ||
2291 | mov r5, r1 @ pass rounds | ||
2292 | mov r0, sp | ||
2293 | |||
2294 | bl _bsaes_decrypt8 | ||
2295 | |||
2296 | vld1.64 {q8-q9}, [r0,:128]! | ||
2297 | vld1.64 {q10-q11}, [r0,:128]! | ||
2298 | veor q0, q0, q8 | ||
2299 | vld1.64 {q12-q13}, [r0,:128]! | ||
2300 | veor q1, q1, q9 | ||
2301 | veor q8, q6, q10 | ||
2302 | vst1.8 {q0-q1}, [r8]! | ||
2303 | veor q9, q4, q11 | ||
2304 | vld1.64 {q14}, [r0,:128]! | ||
2305 | veor q10, q2, q12 | ||
2306 | vst1.8 {q8-q9}, [r8]! | ||
2307 | veor q11, q7, q13 | ||
2308 | veor q12, q3, q14 | ||
2309 | vst1.8 {q10-q11}, [r8]! | ||
2310 | vst1.8 {q12}, [r8]! | ||
2311 | |||
2312 | vld1.64 {q8}, [r0,:128] @ next round tweak | ||
2313 | b .Lxts_dec_done | ||
2314 | .align 4 | ||
2315 | .Lxts_dec_6: | ||
2316 | vst1.64 {q14}, [r0,:128] @ next round tweak | ||
2317 | |||
2318 | veor q4, q4, q12 | ||
2319 | #ifndef BSAES_ASM_EXTENDED_KEY | ||
2320 | add r4, sp, #0x90 @ pass key schedule | ||
2321 | #else | ||
2322 | add r4, r10, #248 @ pass key schedule | ||
2323 | #endif | ||
2324 | veor q5, q5, q13 | ||
2325 | mov r5, r1 @ pass rounds | ||
2326 | mov r0, sp | ||
2327 | |||
2328 | bl _bsaes_decrypt8 | ||
2329 | |||
2330 | vld1.64 {q8-q9}, [r0,:128]! | ||
2331 | vld1.64 {q10-q11}, [r0,:128]! | ||
2332 | veor q0, q0, q8 | ||
2333 | vld1.64 {q12-q13}, [r0,:128]! | ||
2334 | veor q1, q1, q9 | ||
2335 | veor q8, q6, q10 | ||
2336 | vst1.8 {q0-q1}, [r8]! | ||
2337 | veor q9, q4, q11 | ||
2338 | veor q10, q2, q12 | ||
2339 | vst1.8 {q8-q9}, [r8]! | ||
2340 | veor q11, q7, q13 | ||
2341 | vst1.8 {q10-q11}, [r8]! | ||
2342 | |||
2343 | vld1.64 {q8}, [r0,:128] @ next round tweak | ||
2344 | b .Lxts_dec_done | ||
2345 | .align 4 | ||
2346 | .Lxts_dec_5: | ||
2347 | vst1.64 {q13}, [r0,:128] @ next round tweak | ||
2348 | |||
2349 | veor q3, q3, q11 | ||
2350 | #ifndef BSAES_ASM_EXTENDED_KEY | ||
2351 | add r4, sp, #0x90 @ pass key schedule | ||
2352 | #else | ||
2353 | add r4, r10, #248 @ pass key schedule | ||
2354 | #endif | ||
2355 | veor q4, q4, q12 | ||
2356 | mov r5, r1 @ pass rounds | ||
2357 | mov r0, sp | ||
2358 | |||
2359 | bl _bsaes_decrypt8 | ||
2360 | |||
2361 | vld1.64 {q8-q9}, [r0,:128]! | ||
2362 | vld1.64 {q10-q11}, [r0,:128]! | ||
2363 | veor q0, q0, q8 | ||
2364 | vld1.64 {q12}, [r0,:128]! | ||
2365 | veor q1, q1, q9 | ||
2366 | veor q8, q6, q10 | ||
2367 | vst1.8 {q0-q1}, [r8]! | ||
2368 | veor q9, q4, q11 | ||
2369 | veor q10, q2, q12 | ||
2370 | vst1.8 {q8-q9}, [r8]! | ||
2371 | vst1.8 {q10}, [r8]! | ||
2372 | |||
2373 | vld1.64 {q8}, [r0,:128] @ next round tweak | ||
2374 | b .Lxts_dec_done | ||
2375 | .align 4 | ||
2376 | .Lxts_dec_4: | ||
2377 | vst1.64 {q12}, [r0,:128] @ next round tweak | ||
2378 | |||
2379 | veor q2, q2, q10 | ||
2380 | #ifndef BSAES_ASM_EXTENDED_KEY | ||
2381 | add r4, sp, #0x90 @ pass key schedule | ||
2382 | #else | ||
2383 | add r4, r10, #248 @ pass key schedule | ||
2384 | #endif | ||
2385 | veor q3, q3, q11 | ||
2386 | mov r5, r1 @ pass rounds | ||
2387 | mov r0, sp | ||
2388 | |||
2389 | bl _bsaes_decrypt8 | ||
2390 | |||
2391 | vld1.64 {q8-q9}, [r0,:128]! | ||
2392 | vld1.64 {q10-q11}, [r0,:128]! | ||
2393 | veor q0, q0, q8 | ||
2394 | veor q1, q1, q9 | ||
2395 | veor q8, q6, q10 | ||
2396 | vst1.8 {q0-q1}, [r8]! | ||
2397 | veor q9, q4, q11 | ||
2398 | vst1.8 {q8-q9}, [r8]! | ||
2399 | |||
2400 | vld1.64 {q8}, [r0,:128] @ next round tweak | ||
2401 | b .Lxts_dec_done | ||
2402 | .align 4 | ||
2403 | .Lxts_dec_3: | ||
2404 | vst1.64 {q11}, [r0,:128] @ next round tweak | ||
2405 | |||
2406 | veor q1, q1, q9 | ||
2407 | #ifndef BSAES_ASM_EXTENDED_KEY | ||
2408 | add r4, sp, #0x90 @ pass key schedule | ||
2409 | #else | ||
2410 | add r4, r10, #248 @ pass key schedule | ||
2411 | #endif | ||
2412 | veor q2, q2, q10 | ||
2413 | mov r5, r1 @ pass rounds | ||
2414 | mov r0, sp | ||
2415 | |||
2416 | bl _bsaes_decrypt8 | ||
2417 | |||
2418 | vld1.64 {q8-q9}, [r0,:128]! | ||
2419 | vld1.64 {q10}, [r0,:128]! | ||
2420 | veor q0, q0, q8 | ||
2421 | veor q1, q1, q9 | ||
2422 | veor q8, q6, q10 | ||
2423 | vst1.8 {q0-q1}, [r8]! | ||
2424 | vst1.8 {q8}, [r8]! | ||
2425 | |||
2426 | vld1.64 {q8}, [r0,:128] @ next round tweak | ||
2427 | b .Lxts_dec_done | ||
2428 | .align 4 | ||
2429 | .Lxts_dec_2: | ||
2430 | vst1.64 {q10}, [r0,:128] @ next round tweak | ||
2431 | |||
2432 | veor q0, q0, q8 | ||
2433 | #ifndef BSAES_ASM_EXTENDED_KEY | ||
2434 | add r4, sp, #0x90 @ pass key schedule | ||
2435 | #else | ||
2436 | add r4, r10, #248 @ pass key schedule | ||
2437 | #endif | ||
2438 | veor q1, q1, q9 | ||
2439 | mov r5, r1 @ pass rounds | ||
2440 | mov r0, sp | ||
2441 | |||
2442 | bl _bsaes_decrypt8 | ||
2443 | |||
2444 | vld1.64 {q8-q9}, [r0,:128]! | ||
2445 | veor q0, q0, q8 | ||
2446 | veor q1, q1, q9 | ||
2447 | vst1.8 {q0-q1}, [r8]! | ||
2448 | |||
2449 | vld1.64 {q8}, [r0,:128] @ next round tweak | ||
2450 | b .Lxts_dec_done | ||
2451 | .align 4 | ||
2452 | .Lxts_dec_1: | ||
2453 | mov r0, sp | ||
2454 | veor q0, q8 | ||
2455 | mov r1, sp | ||
2456 | vst1.8 {q0}, [sp,:128] | ||
2457 | mov r2, r10 | ||
2458 | mov r4, r3 @ preserve fp | ||
2459 | mov r5, r2 @ preserve magic | ||
2460 | |||
2461 | bl AES_decrypt | ||
2462 | |||
2463 | vld1.8 {q0}, [sp,:128] | ||
2464 | veor q0, q0, q8 | ||
2465 | vst1.8 {q0}, [r8]! | ||
2466 | mov r3, r4 | ||
2467 | mov r2, r5 | ||
2468 | |||
2469 | vmov q8, q9 @ next round tweak | ||
2470 | |||
2471 | .Lxts_dec_done: | ||
2472 | #ifndef XTS_CHAIN_TWEAK | ||
2473 | adds r9, #0x10 | ||
2474 | beq .Lxts_dec_ret | ||
2475 | |||
2476 | @ calculate one round of extra tweak for the stolen ciphertext | ||
2477 | vldmia r2, {q5} | ||
2478 | vshr.s64 q6, q8, #63 | ||
2479 | vand q6, q6, q5 | ||
2480 | vadd.u64 q9, q8, q8 | ||
2481 | vswp d13,d12 | ||
2482 | veor q9, q9, q6 | ||
2483 | |||
2484 | @ perform the final decryption with the last tweak value | ||
2485 | vld1.8 {q0}, [r7]! | ||
2486 | mov r0, sp | ||
2487 | veor q0, q0, q9 | ||
2488 | mov r1, sp | ||
2489 | vst1.8 {q0}, [sp,:128] | ||
2490 | mov r2, r10 | ||
2491 | mov r4, r3 @ preserve fp | ||
2492 | |||
2493 | bl AES_decrypt | ||
2494 | |||
2495 | vld1.8 {q0}, [sp,:128] | ||
2496 | veor q0, q0, q9 | ||
2497 | vst1.8 {q0}, [r8] | ||
2498 | |||
2499 | mov r6, r8 | ||
2500 | .Lxts_dec_steal: | ||
2501 | ldrb r1, [r8] | ||
2502 | ldrb r0, [r7], #1 | ||
2503 | strb r1, [r8, #0x10] | ||
2504 | strb r0, [r8], #1 | ||
2505 | |||
2506 | subs r9, #1 | ||
2507 | bhi .Lxts_dec_steal | ||
2508 | |||
2509 | vld1.8 {q0}, [r6] | ||
2510 | mov r0, sp | ||
2511 | veor q0, q8 | ||
2512 | mov r1, sp | ||
2513 | vst1.8 {q0}, [sp,:128] | ||
2514 | mov r2, r10 | ||
2515 | |||
2516 | bl AES_decrypt | ||
2517 | |||
2518 | vld1.8 {q0}, [sp,:128] | ||
2519 | veor q0, q0, q8 | ||
2520 | vst1.8 {q0}, [r6] | ||
2521 | mov r3, r4 | ||
2522 | #endif | ||
2523 | |||
2524 | .Lxts_dec_ret: | ||
2525 | bic r0, r3, #0xf | ||
2526 | vmov.i32 q0, #0 | ||
2527 | vmov.i32 q1, #0 | ||
2528 | #ifdef XTS_CHAIN_TWEAK | ||
2529 | ldr r1, [r3, #0x20+VFP_ABI_FRAME] @ chain tweak | ||
2530 | #endif | ||
2531 | .Lxts_dec_bzero: @ wipe key schedule [if any] | ||
2532 | vstmia sp!, {q0-q1} | ||
2533 | cmp sp, r0 | ||
2534 | bne .Lxts_dec_bzero | ||
2535 | |||
2536 | mov sp, r3 | ||
2537 | #ifdef XTS_CHAIN_TWEAK | ||
2538 | vst1.8 {q8}, [r1] | ||
2539 | #endif | ||
2540 | VFP_ABI_POP | ||
2541 | ldmia sp!, {r4-r10, pc} @ return | ||
2542 | |||
2543 | .size bsaes_xts_decrypt,.-bsaes_xts_decrypt | ||
2544 | #endif | ||
diff --git a/arch/arm/crypto/aesbs-glue.c b/arch/arm/crypto/aesbs-glue.c new file mode 100644 index 000000000000..4522366da759 --- /dev/null +++ b/arch/arm/crypto/aesbs-glue.c | |||
@@ -0,0 +1,434 @@ | |||
1 | /* | ||
2 | * linux/arch/arm/crypto/aesbs-glue.c - glue code for NEON bit sliced AES | ||
3 | * | ||
4 | * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License version 2 as | ||
8 | * published by the Free Software Foundation. | ||
9 | */ | ||
10 | |||
11 | #include <asm/neon.h> | ||
12 | #include <crypto/aes.h> | ||
13 | #include <crypto/ablk_helper.h> | ||
14 | #include <crypto/algapi.h> | ||
15 | #include <linux/module.h> | ||
16 | |||
17 | #include "aes_glue.h" | ||
18 | |||
19 | #define BIT_SLICED_KEY_MAXSIZE (128 * (AES_MAXNR - 1) + 2 * AES_BLOCK_SIZE) | ||
20 | |||
21 | struct BS_KEY { | ||
22 | struct AES_KEY rk; | ||
23 | int converted; | ||
24 | u8 __aligned(8) bs[BIT_SLICED_KEY_MAXSIZE]; | ||
25 | } __aligned(8); | ||
26 | |||
27 | asmlinkage void bsaes_enc_key_convert(u8 out[], struct AES_KEY const *in); | ||
28 | asmlinkage void bsaes_dec_key_convert(u8 out[], struct AES_KEY const *in); | ||
29 | |||
30 | asmlinkage void bsaes_cbc_encrypt(u8 const in[], u8 out[], u32 bytes, | ||
31 | struct BS_KEY *key, u8 iv[]); | ||
32 | |||
33 | asmlinkage void bsaes_ctr32_encrypt_blocks(u8 const in[], u8 out[], u32 blocks, | ||
34 | struct BS_KEY *key, u8 const iv[]); | ||
35 | |||
36 | asmlinkage void bsaes_xts_encrypt(u8 const in[], u8 out[], u32 bytes, | ||
37 | struct BS_KEY *key, u8 tweak[]); | ||
38 | |||
39 | asmlinkage void bsaes_xts_decrypt(u8 const in[], u8 out[], u32 bytes, | ||
40 | struct BS_KEY *key, u8 tweak[]); | ||
41 | |||
42 | struct aesbs_cbc_ctx { | ||
43 | struct AES_KEY enc; | ||
44 | struct BS_KEY dec; | ||
45 | }; | ||
46 | |||
47 | struct aesbs_ctr_ctx { | ||
48 | struct BS_KEY enc; | ||
49 | }; | ||
50 | |||
51 | struct aesbs_xts_ctx { | ||
52 | struct BS_KEY enc; | ||
53 | struct BS_KEY dec; | ||
54 | struct AES_KEY twkey; | ||
55 | }; | ||
56 | |||
57 | static int aesbs_cbc_set_key(struct crypto_tfm *tfm, const u8 *in_key, | ||
58 | unsigned int key_len) | ||
59 | { | ||
60 | struct aesbs_cbc_ctx *ctx = crypto_tfm_ctx(tfm); | ||
61 | int bits = key_len * 8; | ||
62 | |||
63 | if (private_AES_set_encrypt_key(in_key, bits, &ctx->enc)) { | ||
64 | tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; | ||
65 | return -EINVAL; | ||
66 | } | ||
67 | ctx->dec.rk = ctx->enc; | ||
68 | private_AES_set_decrypt_key(in_key, bits, &ctx->dec.rk); | ||
69 | ctx->dec.converted = 0; | ||
70 | return 0; | ||
71 | } | ||
72 | |||
73 | static int aesbs_ctr_set_key(struct crypto_tfm *tfm, const u8 *in_key, | ||
74 | unsigned int key_len) | ||
75 | { | ||
76 | struct aesbs_ctr_ctx *ctx = crypto_tfm_ctx(tfm); | ||
77 | int bits = key_len * 8; | ||
78 | |||
79 | if (private_AES_set_encrypt_key(in_key, bits, &ctx->enc.rk)) { | ||
80 | tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; | ||
81 | return -EINVAL; | ||
82 | } | ||
83 | ctx->enc.converted = 0; | ||
84 | return 0; | ||
85 | } | ||
86 | |||
87 | static int aesbs_xts_set_key(struct crypto_tfm *tfm, const u8 *in_key, | ||
88 | unsigned int key_len) | ||
89 | { | ||
90 | struct aesbs_xts_ctx *ctx = crypto_tfm_ctx(tfm); | ||
91 | int bits = key_len * 4; | ||
92 | |||
93 | if (private_AES_set_encrypt_key(in_key, bits, &ctx->enc.rk)) { | ||
94 | tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; | ||
95 | return -EINVAL; | ||
96 | } | ||
97 | ctx->dec.rk = ctx->enc.rk; | ||
98 | private_AES_set_decrypt_key(in_key, bits, &ctx->dec.rk); | ||
99 | private_AES_set_encrypt_key(in_key + key_len / 2, bits, &ctx->twkey); | ||
100 | ctx->enc.converted = ctx->dec.converted = 0; | ||
101 | return 0; | ||
102 | } | ||
103 | |||
104 | static int aesbs_cbc_encrypt(struct blkcipher_desc *desc, | ||
105 | struct scatterlist *dst, | ||
106 | struct scatterlist *src, unsigned int nbytes) | ||
107 | { | ||
108 | struct aesbs_cbc_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
109 | struct blkcipher_walk walk; | ||
110 | int err; | ||
111 | |||
112 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
113 | err = blkcipher_walk_virt(desc, &walk); | ||
114 | |||
115 | while (walk.nbytes) { | ||
116 | u32 blocks = walk.nbytes / AES_BLOCK_SIZE; | ||
117 | u8 *src = walk.src.virt.addr; | ||
118 | |||
119 | if (walk.dst.virt.addr == walk.src.virt.addr) { | ||
120 | u8 *iv = walk.iv; | ||
121 | |||
122 | do { | ||
123 | crypto_xor(src, iv, AES_BLOCK_SIZE); | ||
124 | AES_encrypt(src, src, &ctx->enc); | ||
125 | iv = src; | ||
126 | src += AES_BLOCK_SIZE; | ||
127 | } while (--blocks); | ||
128 | memcpy(walk.iv, iv, AES_BLOCK_SIZE); | ||
129 | } else { | ||
130 | u8 *dst = walk.dst.virt.addr; | ||
131 | |||
132 | do { | ||
133 | crypto_xor(walk.iv, src, AES_BLOCK_SIZE); | ||
134 | AES_encrypt(walk.iv, dst, &ctx->enc); | ||
135 | memcpy(walk.iv, dst, AES_BLOCK_SIZE); | ||
136 | src += AES_BLOCK_SIZE; | ||
137 | dst += AES_BLOCK_SIZE; | ||
138 | } while (--blocks); | ||
139 | } | ||
140 | err = blkcipher_walk_done(desc, &walk, 0); | ||
141 | } | ||
142 | return err; | ||
143 | } | ||
144 | |||
145 | static int aesbs_cbc_decrypt(struct blkcipher_desc *desc, | ||
146 | struct scatterlist *dst, | ||
147 | struct scatterlist *src, unsigned int nbytes) | ||
148 | { | ||
149 | struct aesbs_cbc_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
150 | struct blkcipher_walk walk; | ||
151 | int err; | ||
152 | |||
153 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
154 | err = blkcipher_walk_virt_block(desc, &walk, 8 * AES_BLOCK_SIZE); | ||
155 | |||
156 | while ((walk.nbytes / AES_BLOCK_SIZE) >= 8) { | ||
157 | kernel_neon_begin(); | ||
158 | bsaes_cbc_encrypt(walk.src.virt.addr, walk.dst.virt.addr, | ||
159 | walk.nbytes, &ctx->dec, walk.iv); | ||
160 | kernel_neon_end(); | ||
161 | err = blkcipher_walk_done(desc, &walk, 0); | ||
162 | } | ||
163 | while (walk.nbytes) { | ||
164 | u32 blocks = walk.nbytes / AES_BLOCK_SIZE; | ||
165 | u8 *dst = walk.dst.virt.addr; | ||
166 | u8 *src = walk.src.virt.addr; | ||
167 | u8 bk[2][AES_BLOCK_SIZE]; | ||
168 | u8 *iv = walk.iv; | ||
169 | |||
170 | do { | ||
171 | if (walk.dst.virt.addr == walk.src.virt.addr) | ||
172 | memcpy(bk[blocks & 1], src, AES_BLOCK_SIZE); | ||
173 | |||
174 | AES_decrypt(src, dst, &ctx->dec.rk); | ||
175 | crypto_xor(dst, iv, AES_BLOCK_SIZE); | ||
176 | |||
177 | if (walk.dst.virt.addr == walk.src.virt.addr) | ||
178 | iv = bk[blocks & 1]; | ||
179 | else | ||
180 | iv = src; | ||
181 | |||
182 | dst += AES_BLOCK_SIZE; | ||
183 | src += AES_BLOCK_SIZE; | ||
184 | } while (--blocks); | ||
185 | err = blkcipher_walk_done(desc, &walk, 0); | ||
186 | } | ||
187 | return err; | ||
188 | } | ||
189 | |||
190 | static void inc_be128_ctr(__be32 ctr[], u32 addend) | ||
191 | { | ||
192 | int i; | ||
193 | |||
194 | for (i = 3; i >= 0; i--, addend = 1) { | ||
195 | u32 n = be32_to_cpu(ctr[i]) + addend; | ||
196 | |||
197 | ctr[i] = cpu_to_be32(n); | ||
198 | if (n >= addend) | ||
199 | break; | ||
200 | } | ||
201 | } | ||
202 | |||
203 | static int aesbs_ctr_encrypt(struct blkcipher_desc *desc, | ||
204 | struct scatterlist *dst, struct scatterlist *src, | ||
205 | unsigned int nbytes) | ||
206 | { | ||
207 | struct aesbs_ctr_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
208 | struct blkcipher_walk walk; | ||
209 | u32 blocks; | ||
210 | int err; | ||
211 | |||
212 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
213 | err = blkcipher_walk_virt_block(desc, &walk, 8 * AES_BLOCK_SIZE); | ||
214 | |||
215 | while ((blocks = walk.nbytes / AES_BLOCK_SIZE)) { | ||
216 | u32 tail = walk.nbytes % AES_BLOCK_SIZE; | ||
217 | __be32 *ctr = (__be32 *)walk.iv; | ||
218 | u32 headroom = UINT_MAX - be32_to_cpu(ctr[3]); | ||
219 | |||
220 | /* avoid 32 bit counter overflow in the NEON code */ | ||
221 | if (unlikely(headroom < blocks)) { | ||
222 | blocks = headroom + 1; | ||
223 | tail = walk.nbytes - blocks * AES_BLOCK_SIZE; | ||
224 | } | ||
225 | kernel_neon_begin(); | ||
226 | bsaes_ctr32_encrypt_blocks(walk.src.virt.addr, | ||
227 | walk.dst.virt.addr, blocks, | ||
228 | &ctx->enc, walk.iv); | ||
229 | kernel_neon_end(); | ||
230 | inc_be128_ctr(ctr, blocks); | ||
231 | |||
232 | nbytes -= blocks * AES_BLOCK_SIZE; | ||
233 | if (nbytes && nbytes == tail && nbytes <= AES_BLOCK_SIZE) | ||
234 | break; | ||
235 | |||
236 | err = blkcipher_walk_done(desc, &walk, tail); | ||
237 | } | ||
238 | if (walk.nbytes) { | ||
239 | u8 *tdst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE; | ||
240 | u8 *tsrc = walk.src.virt.addr + blocks * AES_BLOCK_SIZE; | ||
241 | u8 ks[AES_BLOCK_SIZE]; | ||
242 | |||
243 | AES_encrypt(walk.iv, ks, &ctx->enc.rk); | ||
244 | if (tdst != tsrc) | ||
245 | memcpy(tdst, tsrc, nbytes); | ||
246 | crypto_xor(tdst, ks, nbytes); | ||
247 | err = blkcipher_walk_done(desc, &walk, 0); | ||
248 | } | ||
249 | return err; | ||
250 | } | ||
251 | |||
252 | static int aesbs_xts_encrypt(struct blkcipher_desc *desc, | ||
253 | struct scatterlist *dst, | ||
254 | struct scatterlist *src, unsigned int nbytes) | ||
255 | { | ||
256 | struct aesbs_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
257 | struct blkcipher_walk walk; | ||
258 | int err; | ||
259 | |||
260 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
261 | err = blkcipher_walk_virt_block(desc, &walk, 8 * AES_BLOCK_SIZE); | ||
262 | |||
263 | /* generate the initial tweak */ | ||
264 | AES_encrypt(walk.iv, walk.iv, &ctx->twkey); | ||
265 | |||
266 | while (walk.nbytes) { | ||
267 | kernel_neon_begin(); | ||
268 | bsaes_xts_encrypt(walk.src.virt.addr, walk.dst.virt.addr, | ||
269 | walk.nbytes, &ctx->enc, walk.iv); | ||
270 | kernel_neon_end(); | ||
271 | err = blkcipher_walk_done(desc, &walk, 0); | ||
272 | } | ||
273 | return err; | ||
274 | } | ||
275 | |||
276 | static int aesbs_xts_decrypt(struct blkcipher_desc *desc, | ||
277 | struct scatterlist *dst, | ||
278 | struct scatterlist *src, unsigned int nbytes) | ||
279 | { | ||
280 | struct aesbs_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
281 | struct blkcipher_walk walk; | ||
282 | int err; | ||
283 | |||
284 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
285 | err = blkcipher_walk_virt_block(desc, &walk, 8 * AES_BLOCK_SIZE); | ||
286 | |||
287 | /* generate the initial tweak */ | ||
288 | AES_encrypt(walk.iv, walk.iv, &ctx->twkey); | ||
289 | |||
290 | while (walk.nbytes) { | ||
291 | kernel_neon_begin(); | ||
292 | bsaes_xts_decrypt(walk.src.virt.addr, walk.dst.virt.addr, | ||
293 | walk.nbytes, &ctx->dec, walk.iv); | ||
294 | kernel_neon_end(); | ||
295 | err = blkcipher_walk_done(desc, &walk, 0); | ||
296 | } | ||
297 | return err; | ||
298 | } | ||
299 | |||
300 | static struct crypto_alg aesbs_algs[] = { { | ||
301 | .cra_name = "__cbc-aes-neonbs", | ||
302 | .cra_driver_name = "__driver-cbc-aes-neonbs", | ||
303 | .cra_priority = 0, | ||
304 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
305 | .cra_blocksize = AES_BLOCK_SIZE, | ||
306 | .cra_ctxsize = sizeof(struct aesbs_cbc_ctx), | ||
307 | .cra_alignmask = 7, | ||
308 | .cra_type = &crypto_blkcipher_type, | ||
309 | .cra_module = THIS_MODULE, | ||
310 | .cra_blkcipher = { | ||
311 | .min_keysize = AES_MIN_KEY_SIZE, | ||
312 | .max_keysize = AES_MAX_KEY_SIZE, | ||
313 | .ivsize = AES_BLOCK_SIZE, | ||
314 | .setkey = aesbs_cbc_set_key, | ||
315 | .encrypt = aesbs_cbc_encrypt, | ||
316 | .decrypt = aesbs_cbc_decrypt, | ||
317 | }, | ||
318 | }, { | ||
319 | .cra_name = "__ctr-aes-neonbs", | ||
320 | .cra_driver_name = "__driver-ctr-aes-neonbs", | ||
321 | .cra_priority = 0, | ||
322 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
323 | .cra_blocksize = 1, | ||
324 | .cra_ctxsize = sizeof(struct aesbs_ctr_ctx), | ||
325 | .cra_alignmask = 7, | ||
326 | .cra_type = &crypto_blkcipher_type, | ||
327 | .cra_module = THIS_MODULE, | ||
328 | .cra_blkcipher = { | ||
329 | .min_keysize = AES_MIN_KEY_SIZE, | ||
330 | .max_keysize = AES_MAX_KEY_SIZE, | ||
331 | .ivsize = AES_BLOCK_SIZE, | ||
332 | .setkey = aesbs_ctr_set_key, | ||
333 | .encrypt = aesbs_ctr_encrypt, | ||
334 | .decrypt = aesbs_ctr_encrypt, | ||
335 | }, | ||
336 | }, { | ||
337 | .cra_name = "__xts-aes-neonbs", | ||
338 | .cra_driver_name = "__driver-xts-aes-neonbs", | ||
339 | .cra_priority = 0, | ||
340 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
341 | .cra_blocksize = AES_BLOCK_SIZE, | ||
342 | .cra_ctxsize = sizeof(struct aesbs_xts_ctx), | ||
343 | .cra_alignmask = 7, | ||
344 | .cra_type = &crypto_blkcipher_type, | ||
345 | .cra_module = THIS_MODULE, | ||
346 | .cra_blkcipher = { | ||
347 | .min_keysize = 2 * AES_MIN_KEY_SIZE, | ||
348 | .max_keysize = 2 * AES_MAX_KEY_SIZE, | ||
349 | .ivsize = AES_BLOCK_SIZE, | ||
350 | .setkey = aesbs_xts_set_key, | ||
351 | .encrypt = aesbs_xts_encrypt, | ||
352 | .decrypt = aesbs_xts_decrypt, | ||
353 | }, | ||
354 | }, { | ||
355 | .cra_name = "cbc(aes)", | ||
356 | .cra_driver_name = "cbc-aes-neonbs", | ||
357 | .cra_priority = 300, | ||
358 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, | ||
359 | .cra_blocksize = AES_BLOCK_SIZE, | ||
360 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
361 | .cra_alignmask = 7, | ||
362 | .cra_type = &crypto_ablkcipher_type, | ||
363 | .cra_module = THIS_MODULE, | ||
364 | .cra_init = ablk_init, | ||
365 | .cra_exit = ablk_exit, | ||
366 | .cra_ablkcipher = { | ||
367 | .min_keysize = AES_MIN_KEY_SIZE, | ||
368 | .max_keysize = AES_MAX_KEY_SIZE, | ||
369 | .ivsize = AES_BLOCK_SIZE, | ||
370 | .setkey = ablk_set_key, | ||
371 | .encrypt = __ablk_encrypt, | ||
372 | .decrypt = ablk_decrypt, | ||
373 | } | ||
374 | }, { | ||
375 | .cra_name = "ctr(aes)", | ||
376 | .cra_driver_name = "ctr-aes-neonbs", | ||
377 | .cra_priority = 300, | ||
378 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, | ||
379 | .cra_blocksize = 1, | ||
380 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
381 | .cra_alignmask = 7, | ||
382 | .cra_type = &crypto_ablkcipher_type, | ||
383 | .cra_module = THIS_MODULE, | ||
384 | .cra_init = ablk_init, | ||
385 | .cra_exit = ablk_exit, | ||
386 | .cra_ablkcipher = { | ||
387 | .min_keysize = AES_MIN_KEY_SIZE, | ||
388 | .max_keysize = AES_MAX_KEY_SIZE, | ||
389 | .ivsize = AES_BLOCK_SIZE, | ||
390 | .setkey = ablk_set_key, | ||
391 | .encrypt = ablk_encrypt, | ||
392 | .decrypt = ablk_decrypt, | ||
393 | } | ||
394 | }, { | ||
395 | .cra_name = "xts(aes)", | ||
396 | .cra_driver_name = "xts-aes-neonbs", | ||
397 | .cra_priority = 300, | ||
398 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, | ||
399 | .cra_blocksize = AES_BLOCK_SIZE, | ||
400 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
401 | .cra_alignmask = 7, | ||
402 | .cra_type = &crypto_ablkcipher_type, | ||
403 | .cra_module = THIS_MODULE, | ||
404 | .cra_init = ablk_init, | ||
405 | .cra_exit = ablk_exit, | ||
406 | .cra_ablkcipher = { | ||
407 | .min_keysize = 2 * AES_MIN_KEY_SIZE, | ||
408 | .max_keysize = 2 * AES_MAX_KEY_SIZE, | ||
409 | .ivsize = AES_BLOCK_SIZE, | ||
410 | .setkey = ablk_set_key, | ||
411 | .encrypt = ablk_encrypt, | ||
412 | .decrypt = ablk_decrypt, | ||
413 | } | ||
414 | } }; | ||
415 | |||
416 | static int __init aesbs_mod_init(void) | ||
417 | { | ||
418 | if (!cpu_has_neon()) | ||
419 | return -ENODEV; | ||
420 | |||
421 | return crypto_register_algs(aesbs_algs, ARRAY_SIZE(aesbs_algs)); | ||
422 | } | ||
423 | |||
424 | static void __exit aesbs_mod_exit(void) | ||
425 | { | ||
426 | crypto_unregister_algs(aesbs_algs, ARRAY_SIZE(aesbs_algs)); | ||
427 | } | ||
428 | |||
429 | module_init(aesbs_mod_init); | ||
430 | module_exit(aesbs_mod_exit); | ||
431 | |||
432 | MODULE_DESCRIPTION("Bit sliced AES in CBC/CTR/XTS modes using NEON"); | ||
433 | MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); | ||
434 | MODULE_LICENSE("GPL"); | ||
diff --git a/arch/arm/crypto/bsaes-armv7.pl b/arch/arm/crypto/bsaes-armv7.pl new file mode 100644 index 000000000000..f3d96d932573 --- /dev/null +++ b/arch/arm/crypto/bsaes-armv7.pl | |||
@@ -0,0 +1,2467 @@ | |||
1 | #!/usr/bin/env perl | ||
2 | |||
3 | # ==================================================================== | ||
4 | # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL | ||
5 | # project. The module is, however, dual licensed under OpenSSL and | ||
6 | # CRYPTOGAMS licenses depending on where you obtain it. For further | ||
7 | # details see http://www.openssl.org/~appro/cryptogams/. | ||
8 | # | ||
9 | # Specific modes and adaptation for Linux kernel by Ard Biesheuvel | ||
10 | # <ard.biesheuvel@linaro.org>. Permission to use under GPL terms is | ||
11 | # granted. | ||
12 | # ==================================================================== | ||
13 | |||
14 | # Bit-sliced AES for ARM NEON | ||
15 | # | ||
16 | # February 2012. | ||
17 | # | ||
18 | # This implementation is direct adaptation of bsaes-x86_64 module for | ||
19 | # ARM NEON. Except that this module is endian-neutral [in sense that | ||
20 | # it can be compiled for either endianness] by courtesy of vld1.8's | ||
21 | # neutrality. Initial version doesn't implement interface to OpenSSL, | ||
22 | # only low-level primitives and unsupported entry points, just enough | ||
23 | # to collect performance results, which for Cortex-A8 core are: | ||
24 | # | ||
25 | # encrypt 19.5 cycles per byte processed with 128-bit key | ||
26 | # decrypt 22.1 cycles per byte processed with 128-bit key | ||
27 | # key conv. 440 cycles per 128-bit key/0.18 of 8x block | ||
28 | # | ||
29 | # Snapdragon S4 encrypts byte in 17.6 cycles and decrypts in 19.7, | ||
30 | # which is [much] worse than anticipated (for further details see | ||
31 | # http://www.openssl.org/~appro/Snapdragon-S4.html). | ||
32 | # | ||
33 | # Cortex-A15 manages in 14.2/16.1 cycles [when integer-only code | ||
34 | # manages in 20.0 cycles]. | ||
35 | # | ||
36 | # When comparing to x86_64 results keep in mind that NEON unit is | ||
37 | # [mostly] single-issue and thus can't [fully] benefit from | ||
38 | # instruction-level parallelism. And when comparing to aes-armv4 | ||
39 | # results keep in mind key schedule conversion overhead (see | ||
40 | # bsaes-x86_64.pl for further details)... | ||
41 | # | ||
42 | # <appro@openssl.org> | ||
43 | |||
44 | # April-August 2013 | ||
45 | # | ||
46 | # Add CBC, CTR and XTS subroutines, adapt for kernel use. | ||
47 | # | ||
48 | # <ard.biesheuvel@linaro.org> | ||
49 | |||
50 | while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} | ||
51 | open STDOUT,">$output"; | ||
52 | |||
53 | my ($inp,$out,$len,$key)=("r0","r1","r2","r3"); | ||
54 | my @XMM=map("q$_",(0..15)); | ||
55 | |||
56 | { | ||
57 | my ($key,$rounds,$const)=("r4","r5","r6"); | ||
58 | |||
59 | sub Dlo() { shift=~m|q([1]?[0-9])|?"d".($1*2):""; } | ||
60 | sub Dhi() { shift=~m|q([1]?[0-9])|?"d".($1*2+1):""; } | ||
61 | |||
62 | sub Sbox { | ||
63 | # input in lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb | ||
64 | # output in lsb > [b0, b1, b4, b6, b3, b7, b2, b5] < msb | ||
65 | my @b=@_[0..7]; | ||
66 | my @t=@_[8..11]; | ||
67 | my @s=@_[12..15]; | ||
68 | &InBasisChange (@b); | ||
69 | &Inv_GF256 (@b[6,5,0,3,7,1,4,2],@t,@s); | ||
70 | &OutBasisChange (@b[7,1,4,2,6,5,0,3]); | ||
71 | } | ||
72 | |||
73 | sub InBasisChange { | ||
74 | # input in lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb | ||
75 | # output in lsb > [b6, b5, b0, b3, b7, b1, b4, b2] < msb | ||
76 | my @b=@_[0..7]; | ||
77 | $code.=<<___; | ||
78 | veor @b[2], @b[2], @b[1] | ||
79 | veor @b[5], @b[5], @b[6] | ||
80 | veor @b[3], @b[3], @b[0] | ||
81 | veor @b[6], @b[6], @b[2] | ||
82 | veor @b[5], @b[5], @b[0] | ||
83 | |||
84 | veor @b[6], @b[6], @b[3] | ||
85 | veor @b[3], @b[3], @b[7] | ||
86 | veor @b[7], @b[7], @b[5] | ||
87 | veor @b[3], @b[3], @b[4] | ||
88 | veor @b[4], @b[4], @b[5] | ||
89 | |||
90 | veor @b[2], @b[2], @b[7] | ||
91 | veor @b[3], @b[3], @b[1] | ||
92 | veor @b[1], @b[1], @b[5] | ||
93 | ___ | ||
94 | } | ||
95 | |||
96 | sub OutBasisChange { | ||
97 | # input in lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb | ||
98 | # output in lsb > [b6, b1, b2, b4, b7, b0, b3, b5] < msb | ||
99 | my @b=@_[0..7]; | ||
100 | $code.=<<___; | ||
101 | veor @b[0], @b[0], @b[6] | ||
102 | veor @b[1], @b[1], @b[4] | ||
103 | veor @b[4], @b[4], @b[6] | ||
104 | veor @b[2], @b[2], @b[0] | ||
105 | veor @b[6], @b[6], @b[1] | ||
106 | |||
107 | veor @b[1], @b[1], @b[5] | ||
108 | veor @b[5], @b[5], @b[3] | ||
109 | veor @b[3], @b[3], @b[7] | ||
110 | veor @b[7], @b[7], @b[5] | ||
111 | veor @b[2], @b[2], @b[5] | ||
112 | |||
113 | veor @b[4], @b[4], @b[7] | ||
114 | ___ | ||
115 | } | ||
116 | |||
117 | sub InvSbox { | ||
118 | # input in lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb | ||
119 | # output in lsb > [b0, b1, b6, b4, b2, b7, b3, b5] < msb | ||
120 | my @b=@_[0..7]; | ||
121 | my @t=@_[8..11]; | ||
122 | my @s=@_[12..15]; | ||
123 | &InvInBasisChange (@b); | ||
124 | &Inv_GF256 (@b[5,1,2,6,3,7,0,4],@t,@s); | ||
125 | &InvOutBasisChange (@b[3,7,0,4,5,1,2,6]); | ||
126 | } | ||
127 | |||
128 | sub InvInBasisChange { # OutBasisChange in reverse (with twist) | ||
129 | my @b=@_[5,1,2,6,3,7,0,4]; | ||
130 | $code.=<<___ | ||
131 | veor @b[1], @b[1], @b[7] | ||
132 | veor @b[4], @b[4], @b[7] | ||
133 | |||
134 | veor @b[7], @b[7], @b[5] | ||
135 | veor @b[1], @b[1], @b[3] | ||
136 | veor @b[2], @b[2], @b[5] | ||
137 | veor @b[3], @b[3], @b[7] | ||
138 | |||
139 | veor @b[6], @b[6], @b[1] | ||
140 | veor @b[2], @b[2], @b[0] | ||
141 | veor @b[5], @b[5], @b[3] | ||
142 | veor @b[4], @b[4], @b[6] | ||
143 | veor @b[0], @b[0], @b[6] | ||
144 | veor @b[1], @b[1], @b[4] | ||
145 | ___ | ||
146 | } | ||
147 | |||
148 | sub InvOutBasisChange { # InBasisChange in reverse | ||
149 | my @b=@_[2,5,7,3,6,1,0,4]; | ||
150 | $code.=<<___; | ||
151 | veor @b[1], @b[1], @b[5] | ||
152 | veor @b[2], @b[2], @b[7] | ||
153 | |||
154 | veor @b[3], @b[3], @b[1] | ||
155 | veor @b[4], @b[4], @b[5] | ||
156 | veor @b[7], @b[7], @b[5] | ||
157 | veor @b[3], @b[3], @b[4] | ||
158 | veor @b[5], @b[5], @b[0] | ||
159 | veor @b[3], @b[3], @b[7] | ||
160 | veor @b[6], @b[6], @b[2] | ||
161 | veor @b[2], @b[2], @b[1] | ||
162 | veor @b[6], @b[6], @b[3] | ||
163 | |||
164 | veor @b[3], @b[3], @b[0] | ||
165 | veor @b[5], @b[5], @b[6] | ||
166 | ___ | ||
167 | } | ||
168 | |||
169 | sub Mul_GF4 { | ||
170 | #;************************************************************* | ||
171 | #;* Mul_GF4: Input x0-x1,y0-y1 Output x0-x1 Temp t0 (8) * | ||
172 | #;************************************************************* | ||
173 | my ($x0,$x1,$y0,$y1,$t0,$t1)=@_; | ||
174 | $code.=<<___; | ||
175 | veor $t0, $y0, $y1 | ||
176 | vand $t0, $t0, $x0 | ||
177 | veor $x0, $x0, $x1 | ||
178 | vand $t1, $x1, $y0 | ||
179 | vand $x0, $x0, $y1 | ||
180 | veor $x1, $t1, $t0 | ||
181 | veor $x0, $x0, $t1 | ||
182 | ___ | ||
183 | } | ||
184 | |||
185 | sub Mul_GF4_N { # not used, see next subroutine | ||
186 | # multiply and scale by N | ||
187 | my ($x0,$x1,$y0,$y1,$t0)=@_; | ||
188 | $code.=<<___; | ||
189 | veor $t0, $y0, $y1 | ||
190 | vand $t0, $t0, $x0 | ||
191 | veor $x0, $x0, $x1 | ||
192 | vand $x1, $x1, $y0 | ||
193 | vand $x0, $x0, $y1 | ||
194 | veor $x1, $x1, $x0 | ||
195 | veor $x0, $x0, $t0 | ||
196 | ___ | ||
197 | } | ||
198 | |||
199 | sub Mul_GF4_N_GF4 { | ||
200 | # interleaved Mul_GF4_N and Mul_GF4 | ||
201 | my ($x0,$x1,$y0,$y1,$t0, | ||
202 | $x2,$x3,$y2,$y3,$t1)=@_; | ||
203 | $code.=<<___; | ||
204 | veor $t0, $y0, $y1 | ||
205 | veor $t1, $y2, $y3 | ||
206 | vand $t0, $t0, $x0 | ||
207 | vand $t1, $t1, $x2 | ||
208 | veor $x0, $x0, $x1 | ||
209 | veor $x2, $x2, $x3 | ||
210 | vand $x1, $x1, $y0 | ||
211 | vand $x3, $x3, $y2 | ||
212 | vand $x0, $x0, $y1 | ||
213 | vand $x2, $x2, $y3 | ||
214 | veor $x1, $x1, $x0 | ||
215 | veor $x2, $x2, $x3 | ||
216 | veor $x0, $x0, $t0 | ||
217 | veor $x3, $x3, $t1 | ||
218 | ___ | ||
219 | } | ||
220 | sub Mul_GF16_2 { | ||
221 | my @x=@_[0..7]; | ||
222 | my @y=@_[8..11]; | ||
223 | my @t=@_[12..15]; | ||
224 | $code.=<<___; | ||
225 | veor @t[0], @x[0], @x[2] | ||
226 | veor @t[1], @x[1], @x[3] | ||
227 | ___ | ||
228 | &Mul_GF4 (@x[0], @x[1], @y[0], @y[1], @t[2..3]); | ||
229 | $code.=<<___; | ||
230 | veor @y[0], @y[0], @y[2] | ||
231 | veor @y[1], @y[1], @y[3] | ||
232 | ___ | ||
233 | Mul_GF4_N_GF4 (@t[0], @t[1], @y[0], @y[1], @t[3], | ||
234 | @x[2], @x[3], @y[2], @y[3], @t[2]); | ||
235 | $code.=<<___; | ||
236 | veor @x[0], @x[0], @t[0] | ||
237 | veor @x[2], @x[2], @t[0] | ||
238 | veor @x[1], @x[1], @t[1] | ||
239 | veor @x[3], @x[3], @t[1] | ||
240 | |||
241 | veor @t[0], @x[4], @x[6] | ||
242 | veor @t[1], @x[5], @x[7] | ||
243 | ___ | ||
244 | &Mul_GF4_N_GF4 (@t[0], @t[1], @y[0], @y[1], @t[3], | ||
245 | @x[6], @x[7], @y[2], @y[3], @t[2]); | ||
246 | $code.=<<___; | ||
247 | veor @y[0], @y[0], @y[2] | ||
248 | veor @y[1], @y[1], @y[3] | ||
249 | ___ | ||
250 | &Mul_GF4 (@x[4], @x[5], @y[0], @y[1], @t[2..3]); | ||
251 | $code.=<<___; | ||
252 | veor @x[4], @x[4], @t[0] | ||
253 | veor @x[6], @x[6], @t[0] | ||
254 | veor @x[5], @x[5], @t[1] | ||
255 | veor @x[7], @x[7], @t[1] | ||
256 | ___ | ||
257 | } | ||
258 | sub Inv_GF256 { | ||
259 | #;******************************************************************** | ||
260 | #;* Inv_GF256: Input x0-x7 Output x0-x7 Temp t0-t3,s0-s3 (144) * | ||
261 | #;******************************************************************** | ||
262 | my @x=@_[0..7]; | ||
263 | my @t=@_[8..11]; | ||
264 | my @s=@_[12..15]; | ||
265 | # direct optimizations from hardware | ||
266 | $code.=<<___; | ||
267 | veor @t[3], @x[4], @x[6] | ||
268 | veor @t[2], @x[5], @x[7] | ||
269 | veor @t[1], @x[1], @x[3] | ||
270 | veor @s[1], @x[7], @x[6] | ||
271 | vmov @t[0], @t[2] | ||
272 | veor @s[0], @x[0], @x[2] | ||
273 | |||
274 | vorr @t[2], @t[2], @t[1] | ||
275 | veor @s[3], @t[3], @t[0] | ||
276 | vand @s[2], @t[3], @s[0] | ||
277 | vorr @t[3], @t[3], @s[0] | ||
278 | veor @s[0], @s[0], @t[1] | ||
279 | vand @t[0], @t[0], @t[1] | ||
280 | veor @t[1], @x[3], @x[2] | ||
281 | vand @s[3], @s[3], @s[0] | ||
282 | vand @s[1], @s[1], @t[1] | ||
283 | veor @t[1], @x[4], @x[5] | ||
284 | veor @s[0], @x[1], @x[0] | ||
285 | veor @t[3], @t[3], @s[1] | ||
286 | veor @t[2], @t[2], @s[1] | ||
287 | vand @s[1], @t[1], @s[0] | ||
288 | vorr @t[1], @t[1], @s[0] | ||
289 | veor @t[3], @t[3], @s[3] | ||
290 | veor @t[0], @t[0], @s[1] | ||
291 | veor @t[2], @t[2], @s[2] | ||
292 | veor @t[1], @t[1], @s[3] | ||
293 | veor @t[0], @t[0], @s[2] | ||
294 | vand @s[0], @x[7], @x[3] | ||
295 | veor @t[1], @t[1], @s[2] | ||
296 | vand @s[1], @x[6], @x[2] | ||
297 | vand @s[2], @x[5], @x[1] | ||
298 | vorr @s[3], @x[4], @x[0] | ||
299 | veor @t[3], @t[3], @s[0] | ||
300 | veor @t[1], @t[1], @s[2] | ||
301 | veor @t[0], @t[0], @s[3] | ||
302 | veor @t[2], @t[2], @s[1] | ||
303 | |||
304 | @ Inv_GF16 \t0, \t1, \t2, \t3, \s0, \s1, \s2, \s3 | ||
305 | |||
306 | @ new smaller inversion | ||
307 | |||
308 | vand @s[2], @t[3], @t[1] | ||
309 | vmov @s[0], @t[0] | ||
310 | |||
311 | veor @s[1], @t[2], @s[2] | ||
312 | veor @s[3], @t[0], @s[2] | ||
313 | veor @s[2], @t[0], @s[2] @ @s[2]=@s[3] | ||
314 | |||
315 | vbsl @s[1], @t[1], @t[0] | ||
316 | vbsl @s[3], @t[3], @t[2] | ||
317 | veor @t[3], @t[3], @t[2] | ||
318 | |||
319 | vbsl @s[0], @s[1], @s[2] | ||
320 | vbsl @t[0], @s[2], @s[1] | ||
321 | |||
322 | vand @s[2], @s[0], @s[3] | ||
323 | veor @t[1], @t[1], @t[0] | ||
324 | |||
325 | veor @s[2], @s[2], @t[3] | ||
326 | ___ | ||
327 | # output in s3, s2, s1, t1 | ||
328 | |||
329 | # Mul_GF16_2 \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \t2, \t3, \t0, \t1, \s0, \s1, \s2, \s3 | ||
330 | |||
331 | # Mul_GF16_2 \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \s3, \s2, \s1, \t1, \s0, \t0, \t2, \t3 | ||
332 | &Mul_GF16_2(@x,@s[3,2,1],@t[1],@s[0],@t[0,2,3]); | ||
333 | |||
334 | ### output msb > [x3,x2,x1,x0,x7,x6,x5,x4] < lsb | ||
335 | } | ||
336 | |||
337 | # AES linear components | ||
338 | |||
339 | sub ShiftRows { | ||
340 | my @x=@_[0..7]; | ||
341 | my @t=@_[8..11]; | ||
342 | my $mask=pop; | ||
343 | $code.=<<___; | ||
344 | vldmia $key!, {@t[0]-@t[3]} | ||
345 | veor @t[0], @t[0], @x[0] | ||
346 | veor @t[1], @t[1], @x[1] | ||
347 | vtbl.8 `&Dlo(@x[0])`, {@t[0]}, `&Dlo($mask)` | ||
348 | vtbl.8 `&Dhi(@x[0])`, {@t[0]}, `&Dhi($mask)` | ||
349 | vldmia $key!, {@t[0]} | ||
350 | veor @t[2], @t[2], @x[2] | ||
351 | vtbl.8 `&Dlo(@x[1])`, {@t[1]}, `&Dlo($mask)` | ||
352 | vtbl.8 `&Dhi(@x[1])`, {@t[1]}, `&Dhi($mask)` | ||
353 | vldmia $key!, {@t[1]} | ||
354 | veor @t[3], @t[3], @x[3] | ||
355 | vtbl.8 `&Dlo(@x[2])`, {@t[2]}, `&Dlo($mask)` | ||
356 | vtbl.8 `&Dhi(@x[2])`, {@t[2]}, `&Dhi($mask)` | ||
357 | vldmia $key!, {@t[2]} | ||
358 | vtbl.8 `&Dlo(@x[3])`, {@t[3]}, `&Dlo($mask)` | ||
359 | vtbl.8 `&Dhi(@x[3])`, {@t[3]}, `&Dhi($mask)` | ||
360 | vldmia $key!, {@t[3]} | ||
361 | veor @t[0], @t[0], @x[4] | ||
362 | veor @t[1], @t[1], @x[5] | ||
363 | vtbl.8 `&Dlo(@x[4])`, {@t[0]}, `&Dlo($mask)` | ||
364 | vtbl.8 `&Dhi(@x[4])`, {@t[0]}, `&Dhi($mask)` | ||
365 | veor @t[2], @t[2], @x[6] | ||
366 | vtbl.8 `&Dlo(@x[5])`, {@t[1]}, `&Dlo($mask)` | ||
367 | vtbl.8 `&Dhi(@x[5])`, {@t[1]}, `&Dhi($mask)` | ||
368 | veor @t[3], @t[3], @x[7] | ||
369 | vtbl.8 `&Dlo(@x[6])`, {@t[2]}, `&Dlo($mask)` | ||
370 | vtbl.8 `&Dhi(@x[6])`, {@t[2]}, `&Dhi($mask)` | ||
371 | vtbl.8 `&Dlo(@x[7])`, {@t[3]}, `&Dlo($mask)` | ||
372 | vtbl.8 `&Dhi(@x[7])`, {@t[3]}, `&Dhi($mask)` | ||
373 | ___ | ||
374 | } | ||
375 | |||
376 | sub MixColumns { | ||
377 | # modified to emit output in order suitable for feeding back to aesenc[last] | ||
378 | my @x=@_[0..7]; | ||
379 | my @t=@_[8..15]; | ||
380 | my $inv=@_[16]; # optional | ||
381 | $code.=<<___; | ||
382 | vext.8 @t[0], @x[0], @x[0], #12 @ x0 <<< 32 | ||
383 | vext.8 @t[1], @x[1], @x[1], #12 | ||
384 | veor @x[0], @x[0], @t[0] @ x0 ^ (x0 <<< 32) | ||
385 | vext.8 @t[2], @x[2], @x[2], #12 | ||
386 | veor @x[1], @x[1], @t[1] | ||
387 | vext.8 @t[3], @x[3], @x[3], #12 | ||
388 | veor @x[2], @x[2], @t[2] | ||
389 | vext.8 @t[4], @x[4], @x[4], #12 | ||
390 | veor @x[3], @x[3], @t[3] | ||
391 | vext.8 @t[5], @x[5], @x[5], #12 | ||
392 | veor @x[4], @x[4], @t[4] | ||
393 | vext.8 @t[6], @x[6], @x[6], #12 | ||
394 | veor @x[5], @x[5], @t[5] | ||
395 | vext.8 @t[7], @x[7], @x[7], #12 | ||
396 | veor @x[6], @x[6], @t[6] | ||
397 | |||
398 | veor @t[1], @t[1], @x[0] | ||
399 | veor @x[7], @x[7], @t[7] | ||
400 | vext.8 @x[0], @x[0], @x[0], #8 @ (x0 ^ (x0 <<< 32)) <<< 64) | ||
401 | veor @t[2], @t[2], @x[1] | ||
402 | veor @t[0], @t[0], @x[7] | ||
403 | veor @t[1], @t[1], @x[7] | ||
404 | vext.8 @x[1], @x[1], @x[1], #8 | ||
405 | veor @t[5], @t[5], @x[4] | ||
406 | veor @x[0], @x[0], @t[0] | ||
407 | veor @t[6], @t[6], @x[5] | ||
408 | veor @x[1], @x[1], @t[1] | ||
409 | vext.8 @t[0], @x[4], @x[4], #8 | ||
410 | veor @t[4], @t[4], @x[3] | ||
411 | vext.8 @t[1], @x[5], @x[5], #8 | ||
412 | veor @t[7], @t[7], @x[6] | ||
413 | vext.8 @x[4], @x[3], @x[3], #8 | ||
414 | veor @t[3], @t[3], @x[2] | ||
415 | vext.8 @x[5], @x[7], @x[7], #8 | ||
416 | veor @t[4], @t[4], @x[7] | ||
417 | vext.8 @x[3], @x[6], @x[6], #8 | ||
418 | veor @t[3], @t[3], @x[7] | ||
419 | vext.8 @x[6], @x[2], @x[2], #8 | ||
420 | veor @x[7], @t[1], @t[5] | ||
421 | ___ | ||
422 | $code.=<<___ if (!$inv); | ||
423 | veor @x[2], @t[0], @t[4] | ||
424 | veor @x[4], @x[4], @t[3] | ||
425 | veor @x[5], @x[5], @t[7] | ||
426 | veor @x[3], @x[3], @t[6] | ||
427 | @ vmov @x[2], @t[0] | ||
428 | veor @x[6], @x[6], @t[2] | ||
429 | @ vmov @x[7], @t[1] | ||
430 | ___ | ||
431 | $code.=<<___ if ($inv); | ||
432 | veor @t[3], @t[3], @x[4] | ||
433 | veor @x[5], @x[5], @t[7] | ||
434 | veor @x[2], @x[3], @t[6] | ||
435 | veor @x[3], @t[0], @t[4] | ||
436 | veor @x[4], @x[6], @t[2] | ||
437 | vmov @x[6], @t[3] | ||
438 | @ vmov @x[7], @t[1] | ||
439 | ___ | ||
440 | } | ||
441 | |||
442 | sub InvMixColumns_orig { | ||
443 | my @x=@_[0..7]; | ||
444 | my @t=@_[8..15]; | ||
445 | |||
446 | $code.=<<___; | ||
447 | @ multiplication by 0x0e | ||
448 | vext.8 @t[7], @x[7], @x[7], #12 | ||
449 | vmov @t[2], @x[2] | ||
450 | veor @x[2], @x[2], @x[5] @ 2 5 | ||
451 | veor @x[7], @x[7], @x[5] @ 7 5 | ||
452 | vext.8 @t[0], @x[0], @x[0], #12 | ||
453 | vmov @t[5], @x[5] | ||
454 | veor @x[5], @x[5], @x[0] @ 5 0 [1] | ||
455 | veor @x[0], @x[0], @x[1] @ 0 1 | ||
456 | vext.8 @t[1], @x[1], @x[1], #12 | ||
457 | veor @x[1], @x[1], @x[2] @ 1 25 | ||
458 | veor @x[0], @x[0], @x[6] @ 01 6 [2] | ||
459 | vext.8 @t[3], @x[3], @x[3], #12 | ||
460 | veor @x[1], @x[1], @x[3] @ 125 3 [4] | ||
461 | veor @x[2], @x[2], @x[0] @ 25 016 [3] | ||
462 | veor @x[3], @x[3], @x[7] @ 3 75 | ||
463 | veor @x[7], @x[7], @x[6] @ 75 6 [0] | ||
464 | vext.8 @t[6], @x[6], @x[6], #12 | ||
465 | vmov @t[4], @x[4] | ||
466 | veor @x[6], @x[6], @x[4] @ 6 4 | ||
467 | veor @x[4], @x[4], @x[3] @ 4 375 [6] | ||
468 | veor @x[3], @x[3], @x[7] @ 375 756=36 | ||
469 | veor @x[6], @x[6], @t[5] @ 64 5 [7] | ||
470 | veor @x[3], @x[3], @t[2] @ 36 2 | ||
471 | vext.8 @t[5], @t[5], @t[5], #12 | ||
472 | veor @x[3], @x[3], @t[4] @ 362 4 [5] | ||
473 | ___ | ||
474 | my @y = @x[7,5,0,2,1,3,4,6]; | ||
475 | $code.=<<___; | ||
476 | @ multiplication by 0x0b | ||
477 | veor @y[1], @y[1], @y[0] | ||
478 | veor @y[0], @y[0], @t[0] | ||
479 | vext.8 @t[2], @t[2], @t[2], #12 | ||
480 | veor @y[1], @y[1], @t[1] | ||
481 | veor @y[0], @y[0], @t[5] | ||
482 | vext.8 @t[4], @t[4], @t[4], #12 | ||
483 | veor @y[1], @y[1], @t[6] | ||
484 | veor @y[0], @y[0], @t[7] | ||
485 | veor @t[7], @t[7], @t[6] @ clobber t[7] | ||
486 | |||
487 | veor @y[3], @y[3], @t[0] | ||
488 | veor @y[1], @y[1], @y[0] | ||
489 | vext.8 @t[0], @t[0], @t[0], #12 | ||
490 | veor @y[2], @y[2], @t[1] | ||
491 | veor @y[4], @y[4], @t[1] | ||
492 | vext.8 @t[1], @t[1], @t[1], #12 | ||
493 | veor @y[2], @y[2], @t[2] | ||
494 | veor @y[3], @y[3], @t[2] | ||
495 | veor @y[5], @y[5], @t[2] | ||
496 | veor @y[2], @y[2], @t[7] | ||
497 | vext.8 @t[2], @t[2], @t[2], #12 | ||
498 | veor @y[3], @y[3], @t[3] | ||
499 | veor @y[6], @y[6], @t[3] | ||
500 | veor @y[4], @y[4], @t[3] | ||
501 | veor @y[7], @y[7], @t[4] | ||
502 | vext.8 @t[3], @t[3], @t[3], #12 | ||
503 | veor @y[5], @y[5], @t[4] | ||
504 | veor @y[7], @y[7], @t[7] | ||
505 | veor @t[7], @t[7], @t[5] @ clobber t[7] even more | ||
506 | veor @y[3], @y[3], @t[5] | ||
507 | veor @y[4], @y[4], @t[4] | ||
508 | |||
509 | veor @y[5], @y[5], @t[7] | ||
510 | vext.8 @t[4], @t[4], @t[4], #12 | ||
511 | veor @y[6], @y[6], @t[7] | ||
512 | veor @y[4], @y[4], @t[7] | ||
513 | |||
514 | veor @t[7], @t[7], @t[5] | ||
515 | vext.8 @t[5], @t[5], @t[5], #12 | ||
516 | |||
517 | @ multiplication by 0x0d | ||
518 | veor @y[4], @y[4], @y[7] | ||
519 | veor @t[7], @t[7], @t[6] @ restore t[7] | ||
520 | veor @y[7], @y[7], @t[4] | ||
521 | vext.8 @t[6], @t[6], @t[6], #12 | ||
522 | veor @y[2], @y[2], @t[0] | ||
523 | veor @y[7], @y[7], @t[5] | ||
524 | vext.8 @t[7], @t[7], @t[7], #12 | ||
525 | veor @y[2], @y[2], @t[2] | ||
526 | |||
527 | veor @y[3], @y[3], @y[1] | ||
528 | veor @y[1], @y[1], @t[1] | ||
529 | veor @y[0], @y[0], @t[0] | ||
530 | veor @y[3], @y[3], @t[0] | ||
531 | veor @y[1], @y[1], @t[5] | ||
532 | veor @y[0], @y[0], @t[5] | ||
533 | vext.8 @t[0], @t[0], @t[0], #12 | ||
534 | veor @y[1], @y[1], @t[7] | ||
535 | veor @y[0], @y[0], @t[6] | ||
536 | veor @y[3], @y[3], @y[1] | ||
537 | veor @y[4], @y[4], @t[1] | ||
538 | vext.8 @t[1], @t[1], @t[1], #12 | ||
539 | |||
540 | veor @y[7], @y[7], @t[7] | ||
541 | veor @y[4], @y[4], @t[2] | ||
542 | veor @y[5], @y[5], @t[2] | ||
543 | veor @y[2], @y[2], @t[6] | ||
544 | veor @t[6], @t[6], @t[3] @ clobber t[6] | ||
545 | vext.8 @t[2], @t[2], @t[2], #12 | ||
546 | veor @y[4], @y[4], @y[7] | ||
547 | veor @y[3], @y[3], @t[6] | ||
548 | |||
549 | veor @y[6], @y[6], @t[6] | ||
550 | veor @y[5], @y[5], @t[5] | ||
551 | vext.8 @t[5], @t[5], @t[5], #12 | ||
552 | veor @y[6], @y[6], @t[4] | ||
553 | vext.8 @t[4], @t[4], @t[4], #12 | ||
554 | veor @y[5], @y[5], @t[6] | ||
555 | veor @y[6], @y[6], @t[7] | ||
556 | vext.8 @t[7], @t[7], @t[7], #12 | ||
557 | veor @t[6], @t[6], @t[3] @ restore t[6] | ||
558 | vext.8 @t[3], @t[3], @t[3], #12 | ||
559 | |||
560 | @ multiplication by 0x09 | ||
561 | veor @y[4], @y[4], @y[1] | ||
562 | veor @t[1], @t[1], @y[1] @ t[1]=y[1] | ||
563 | veor @t[0], @t[0], @t[5] @ clobber t[0] | ||
564 | vext.8 @t[6], @t[6], @t[6], #12 | ||
565 | veor @t[1], @t[1], @t[5] | ||
566 | veor @y[3], @y[3], @t[0] | ||
567 | veor @t[0], @t[0], @y[0] @ t[0]=y[0] | ||
568 | veor @t[1], @t[1], @t[6] | ||
569 | veor @t[6], @t[6], @t[7] @ clobber t[6] | ||
570 | veor @y[4], @y[4], @t[1] | ||
571 | veor @y[7], @y[7], @t[4] | ||
572 | veor @y[6], @y[6], @t[3] | ||
573 | veor @y[5], @y[5], @t[2] | ||
574 | veor @t[4], @t[4], @y[4] @ t[4]=y[4] | ||
575 | veor @t[3], @t[3], @y[3] @ t[3]=y[3] | ||
576 | veor @t[5], @t[5], @y[5] @ t[5]=y[5] | ||
577 | veor @t[2], @t[2], @y[2] @ t[2]=y[2] | ||
578 | veor @t[3], @t[3], @t[7] | ||
579 | veor @XMM[5], @t[5], @t[6] | ||
580 | veor @XMM[6], @t[6], @y[6] @ t[6]=y[6] | ||
581 | veor @XMM[2], @t[2], @t[6] | ||
582 | veor @XMM[7], @t[7], @y[7] @ t[7]=y[7] | ||
583 | |||
584 | vmov @XMM[0], @t[0] | ||
585 | vmov @XMM[1], @t[1] | ||
586 | @ vmov @XMM[2], @t[2] | ||
587 | vmov @XMM[3], @t[3] | ||
588 | vmov @XMM[4], @t[4] | ||
589 | @ vmov @XMM[5], @t[5] | ||
590 | @ vmov @XMM[6], @t[6] | ||
591 | @ vmov @XMM[7], @t[7] | ||
592 | ___ | ||
593 | } | ||
594 | |||
595 | sub InvMixColumns { | ||
596 | my @x=@_[0..7]; | ||
597 | my @t=@_[8..15]; | ||
598 | |||
599 | # Thanks to Jussi Kivilinna for providing pointer to | ||
600 | # | ||
601 | # | 0e 0b 0d 09 | | 02 03 01 01 | | 05 00 04 00 | | ||
602 | # | 09 0e 0b 0d | = | 01 02 03 01 | x | 00 05 00 04 | | ||
603 | # | 0d 09 0e 0b | | 01 01 02 03 | | 04 00 05 00 | | ||
604 | # | 0b 0d 09 0e | | 03 01 01 02 | | 00 04 00 05 | | ||
605 | |||
606 | $code.=<<___; | ||
607 | @ multiplication by 0x05-0x00-0x04-0x00 | ||
608 | vext.8 @t[0], @x[0], @x[0], #8 | ||
609 | vext.8 @t[6], @x[6], @x[6], #8 | ||
610 | vext.8 @t[7], @x[7], @x[7], #8 | ||
611 | veor @t[0], @t[0], @x[0] | ||
612 | vext.8 @t[1], @x[1], @x[1], #8 | ||
613 | veor @t[6], @t[6], @x[6] | ||
614 | vext.8 @t[2], @x[2], @x[2], #8 | ||
615 | veor @t[7], @t[7], @x[7] | ||
616 | vext.8 @t[3], @x[3], @x[3], #8 | ||
617 | veor @t[1], @t[1], @x[1] | ||
618 | vext.8 @t[4], @x[4], @x[4], #8 | ||
619 | veor @t[2], @t[2], @x[2] | ||
620 | vext.8 @t[5], @x[5], @x[5], #8 | ||
621 | veor @t[3], @t[3], @x[3] | ||
622 | veor @t[4], @t[4], @x[4] | ||
623 | veor @t[5], @t[5], @x[5] | ||
624 | |||
625 | veor @x[0], @x[0], @t[6] | ||
626 | veor @x[1], @x[1], @t[6] | ||
627 | veor @x[2], @x[2], @t[0] | ||
628 | veor @x[4], @x[4], @t[2] | ||
629 | veor @x[3], @x[3], @t[1] | ||
630 | veor @x[1], @x[1], @t[7] | ||
631 | veor @x[2], @x[2], @t[7] | ||
632 | veor @x[4], @x[4], @t[6] | ||
633 | veor @x[5], @x[5], @t[3] | ||
634 | veor @x[3], @x[3], @t[6] | ||
635 | veor @x[6], @x[6], @t[4] | ||
636 | veor @x[4], @x[4], @t[7] | ||
637 | veor @x[5], @x[5], @t[7] | ||
638 | veor @x[7], @x[7], @t[5] | ||
639 | ___ | ||
640 | &MixColumns (@x,@t,1); # flipped 2<->3 and 4<->6 | ||
641 | } | ||
642 | |||
643 | sub swapmove { | ||
644 | my ($a,$b,$n,$mask,$t)=@_; | ||
645 | $code.=<<___; | ||
646 | vshr.u64 $t, $b, #$n | ||
647 | veor $t, $t, $a | ||
648 | vand $t, $t, $mask | ||
649 | veor $a, $a, $t | ||
650 | vshl.u64 $t, $t, #$n | ||
651 | veor $b, $b, $t | ||
652 | ___ | ||
653 | } | ||
654 | sub swapmove2x { | ||
655 | my ($a0,$b0,$a1,$b1,$n,$mask,$t0,$t1)=@_; | ||
656 | $code.=<<___; | ||
657 | vshr.u64 $t0, $b0, #$n | ||
658 | vshr.u64 $t1, $b1, #$n | ||
659 | veor $t0, $t0, $a0 | ||
660 | veor $t1, $t1, $a1 | ||
661 | vand $t0, $t0, $mask | ||
662 | vand $t1, $t1, $mask | ||
663 | veor $a0, $a0, $t0 | ||
664 | vshl.u64 $t0, $t0, #$n | ||
665 | veor $a1, $a1, $t1 | ||
666 | vshl.u64 $t1, $t1, #$n | ||
667 | veor $b0, $b0, $t0 | ||
668 | veor $b1, $b1, $t1 | ||
669 | ___ | ||
670 | } | ||
671 | |||
672 | sub bitslice { | ||
673 | my @x=reverse(@_[0..7]); | ||
674 | my ($t0,$t1,$t2,$t3)=@_[8..11]; | ||
675 | $code.=<<___; | ||
676 | vmov.i8 $t0,#0x55 @ compose .LBS0 | ||
677 | vmov.i8 $t1,#0x33 @ compose .LBS1 | ||
678 | ___ | ||
679 | &swapmove2x(@x[0,1,2,3],1,$t0,$t2,$t3); | ||
680 | &swapmove2x(@x[4,5,6,7],1,$t0,$t2,$t3); | ||
681 | $code.=<<___; | ||
682 | vmov.i8 $t0,#0x0f @ compose .LBS2 | ||
683 | ___ | ||
684 | &swapmove2x(@x[0,2,1,3],2,$t1,$t2,$t3); | ||
685 | &swapmove2x(@x[4,6,5,7],2,$t1,$t2,$t3); | ||
686 | |||
687 | &swapmove2x(@x[0,4,1,5],4,$t0,$t2,$t3); | ||
688 | &swapmove2x(@x[2,6,3,7],4,$t0,$t2,$t3); | ||
689 | } | ||
690 | |||
691 | $code.=<<___; | ||
692 | #ifndef __KERNEL__ | ||
693 | # include "arm_arch.h" | ||
694 | |||
695 | # define VFP_ABI_PUSH vstmdb sp!,{d8-d15} | ||
696 | # define VFP_ABI_POP vldmia sp!,{d8-d15} | ||
697 | # define VFP_ABI_FRAME 0x40 | ||
698 | #else | ||
699 | # define VFP_ABI_PUSH | ||
700 | # define VFP_ABI_POP | ||
701 | # define VFP_ABI_FRAME 0 | ||
702 | # define BSAES_ASM_EXTENDED_KEY | ||
703 | # define XTS_CHAIN_TWEAK | ||
704 | # define __ARM_ARCH__ __LINUX_ARM_ARCH__ | ||
705 | #endif | ||
706 | |||
707 | #ifdef __thumb__ | ||
708 | # define adrl adr | ||
709 | #endif | ||
710 | |||
711 | #if __ARM_ARCH__>=7 | ||
712 | .text | ||
713 | .syntax unified @ ARMv7-capable assembler is expected to handle this | ||
714 | #ifdef __thumb2__ | ||
715 | .thumb | ||
716 | #else | ||
717 | .code 32 | ||
718 | #endif | ||
719 | |||
720 | .fpu neon | ||
721 | |||
722 | .type _bsaes_decrypt8,%function | ||
723 | .align 4 | ||
724 | _bsaes_decrypt8: | ||
725 | adr $const,_bsaes_decrypt8 | ||
726 | vldmia $key!, {@XMM[9]} @ round 0 key | ||
727 | add $const,$const,#.LM0ISR-_bsaes_decrypt8 | ||
728 | |||
729 | vldmia $const!, {@XMM[8]} @ .LM0ISR | ||
730 | veor @XMM[10], @XMM[0], @XMM[9] @ xor with round0 key | ||
731 | veor @XMM[11], @XMM[1], @XMM[9] | ||
732 | vtbl.8 `&Dlo(@XMM[0])`, {@XMM[10]}, `&Dlo(@XMM[8])` | ||
733 | vtbl.8 `&Dhi(@XMM[0])`, {@XMM[10]}, `&Dhi(@XMM[8])` | ||
734 | veor @XMM[12], @XMM[2], @XMM[9] | ||
735 | vtbl.8 `&Dlo(@XMM[1])`, {@XMM[11]}, `&Dlo(@XMM[8])` | ||
736 | vtbl.8 `&Dhi(@XMM[1])`, {@XMM[11]}, `&Dhi(@XMM[8])` | ||
737 | veor @XMM[13], @XMM[3], @XMM[9] | ||
738 | vtbl.8 `&Dlo(@XMM[2])`, {@XMM[12]}, `&Dlo(@XMM[8])` | ||
739 | vtbl.8 `&Dhi(@XMM[2])`, {@XMM[12]}, `&Dhi(@XMM[8])` | ||
740 | veor @XMM[14], @XMM[4], @XMM[9] | ||
741 | vtbl.8 `&Dlo(@XMM[3])`, {@XMM[13]}, `&Dlo(@XMM[8])` | ||
742 | vtbl.8 `&Dhi(@XMM[3])`, {@XMM[13]}, `&Dhi(@XMM[8])` | ||
743 | veor @XMM[15], @XMM[5], @XMM[9] | ||
744 | vtbl.8 `&Dlo(@XMM[4])`, {@XMM[14]}, `&Dlo(@XMM[8])` | ||
745 | vtbl.8 `&Dhi(@XMM[4])`, {@XMM[14]}, `&Dhi(@XMM[8])` | ||
746 | veor @XMM[10], @XMM[6], @XMM[9] | ||
747 | vtbl.8 `&Dlo(@XMM[5])`, {@XMM[15]}, `&Dlo(@XMM[8])` | ||
748 | vtbl.8 `&Dhi(@XMM[5])`, {@XMM[15]}, `&Dhi(@XMM[8])` | ||
749 | veor @XMM[11], @XMM[7], @XMM[9] | ||
750 | vtbl.8 `&Dlo(@XMM[6])`, {@XMM[10]}, `&Dlo(@XMM[8])` | ||
751 | vtbl.8 `&Dhi(@XMM[6])`, {@XMM[10]}, `&Dhi(@XMM[8])` | ||
752 | vtbl.8 `&Dlo(@XMM[7])`, {@XMM[11]}, `&Dlo(@XMM[8])` | ||
753 | vtbl.8 `&Dhi(@XMM[7])`, {@XMM[11]}, `&Dhi(@XMM[8])` | ||
754 | ___ | ||
755 | &bitslice (@XMM[0..7, 8..11]); | ||
756 | $code.=<<___; | ||
757 | sub $rounds,$rounds,#1 | ||
758 | b .Ldec_sbox | ||
759 | .align 4 | ||
760 | .Ldec_loop: | ||
761 | ___ | ||
762 | &ShiftRows (@XMM[0..7, 8..12]); | ||
763 | $code.=".Ldec_sbox:\n"; | ||
764 | &InvSbox (@XMM[0..7, 8..15]); | ||
765 | $code.=<<___; | ||
766 | subs $rounds,$rounds,#1 | ||
767 | bcc .Ldec_done | ||
768 | ___ | ||
769 | &InvMixColumns (@XMM[0,1,6,4,2,7,3,5, 8..15]); | ||
770 | $code.=<<___; | ||
771 | vldmia $const, {@XMM[12]} @ .LISR | ||
772 | ite eq @ Thumb2 thing, sanity check in ARM | ||
773 | addeq $const,$const,#0x10 | ||
774 | bne .Ldec_loop | ||
775 | vldmia $const, {@XMM[12]} @ .LISRM0 | ||
776 | b .Ldec_loop | ||
777 | .align 4 | ||
778 | .Ldec_done: | ||
779 | ___ | ||
780 | &bitslice (@XMM[0,1,6,4,2,7,3,5, 8..11]); | ||
781 | $code.=<<___; | ||
782 | vldmia $key, {@XMM[8]} @ last round key | ||
783 | veor @XMM[6], @XMM[6], @XMM[8] | ||
784 | veor @XMM[4], @XMM[4], @XMM[8] | ||
785 | veor @XMM[2], @XMM[2], @XMM[8] | ||
786 | veor @XMM[7], @XMM[7], @XMM[8] | ||
787 | veor @XMM[3], @XMM[3], @XMM[8] | ||
788 | veor @XMM[5], @XMM[5], @XMM[8] | ||
789 | veor @XMM[0], @XMM[0], @XMM[8] | ||
790 | veor @XMM[1], @XMM[1], @XMM[8] | ||
791 | bx lr | ||
792 | .size _bsaes_decrypt8,.-_bsaes_decrypt8 | ||
793 | |||
794 | .type _bsaes_const,%object | ||
795 | .align 6 | ||
796 | _bsaes_const: | ||
797 | .LM0ISR: @ InvShiftRows constants | ||
798 | .quad 0x0a0e0206070b0f03, 0x0004080c0d010509 | ||
799 | .LISR: | ||
800 | .quad 0x0504070602010003, 0x0f0e0d0c080b0a09 | ||
801 | .LISRM0: | ||
802 | .quad 0x01040b0e0205080f, 0x0306090c00070a0d | ||
803 | .LM0SR: @ ShiftRows constants | ||
804 | .quad 0x0a0e02060f03070b, 0x0004080c05090d01 | ||
805 | .LSR: | ||
806 | .quad 0x0504070600030201, 0x0f0e0d0c0a09080b | ||
807 | .LSRM0: | ||
808 | .quad 0x0304090e00050a0f, 0x01060b0c0207080d | ||
809 | .LM0: | ||
810 | .quad 0x02060a0e03070b0f, 0x0004080c0105090d | ||
811 | .LREVM0SR: | ||
812 | .quad 0x090d01050c000408, 0x03070b0f060a0e02 | ||
813 | .asciz "Bit-sliced AES for NEON, CRYPTOGAMS by <appro\@openssl.org>" | ||
814 | .align 6 | ||
815 | .size _bsaes_const,.-_bsaes_const | ||
816 | |||
817 | .type _bsaes_encrypt8,%function | ||
818 | .align 4 | ||
819 | _bsaes_encrypt8: | ||
820 | adr $const,_bsaes_encrypt8 | ||
821 | vldmia $key!, {@XMM[9]} @ round 0 key | ||
822 | sub $const,$const,#_bsaes_encrypt8-.LM0SR | ||
823 | |||
824 | vldmia $const!, {@XMM[8]} @ .LM0SR | ||
825 | _bsaes_encrypt8_alt: | ||
826 | veor @XMM[10], @XMM[0], @XMM[9] @ xor with round0 key | ||
827 | veor @XMM[11], @XMM[1], @XMM[9] | ||
828 | vtbl.8 `&Dlo(@XMM[0])`, {@XMM[10]}, `&Dlo(@XMM[8])` | ||
829 | vtbl.8 `&Dhi(@XMM[0])`, {@XMM[10]}, `&Dhi(@XMM[8])` | ||
830 | veor @XMM[12], @XMM[2], @XMM[9] | ||
831 | vtbl.8 `&Dlo(@XMM[1])`, {@XMM[11]}, `&Dlo(@XMM[8])` | ||
832 | vtbl.8 `&Dhi(@XMM[1])`, {@XMM[11]}, `&Dhi(@XMM[8])` | ||
833 | veor @XMM[13], @XMM[3], @XMM[9] | ||
834 | vtbl.8 `&Dlo(@XMM[2])`, {@XMM[12]}, `&Dlo(@XMM[8])` | ||
835 | vtbl.8 `&Dhi(@XMM[2])`, {@XMM[12]}, `&Dhi(@XMM[8])` | ||
836 | veor @XMM[14], @XMM[4], @XMM[9] | ||
837 | vtbl.8 `&Dlo(@XMM[3])`, {@XMM[13]}, `&Dlo(@XMM[8])` | ||
838 | vtbl.8 `&Dhi(@XMM[3])`, {@XMM[13]}, `&Dhi(@XMM[8])` | ||
839 | veor @XMM[15], @XMM[5], @XMM[9] | ||
840 | vtbl.8 `&Dlo(@XMM[4])`, {@XMM[14]}, `&Dlo(@XMM[8])` | ||
841 | vtbl.8 `&Dhi(@XMM[4])`, {@XMM[14]}, `&Dhi(@XMM[8])` | ||
842 | veor @XMM[10], @XMM[6], @XMM[9] | ||
843 | vtbl.8 `&Dlo(@XMM[5])`, {@XMM[15]}, `&Dlo(@XMM[8])` | ||
844 | vtbl.8 `&Dhi(@XMM[5])`, {@XMM[15]}, `&Dhi(@XMM[8])` | ||
845 | veor @XMM[11], @XMM[7], @XMM[9] | ||
846 | vtbl.8 `&Dlo(@XMM[6])`, {@XMM[10]}, `&Dlo(@XMM[8])` | ||
847 | vtbl.8 `&Dhi(@XMM[6])`, {@XMM[10]}, `&Dhi(@XMM[8])` | ||
848 | vtbl.8 `&Dlo(@XMM[7])`, {@XMM[11]}, `&Dlo(@XMM[8])` | ||
849 | vtbl.8 `&Dhi(@XMM[7])`, {@XMM[11]}, `&Dhi(@XMM[8])` | ||
850 | _bsaes_encrypt8_bitslice: | ||
851 | ___ | ||
852 | &bitslice (@XMM[0..7, 8..11]); | ||
853 | $code.=<<___; | ||
854 | sub $rounds,$rounds,#1 | ||
855 | b .Lenc_sbox | ||
856 | .align 4 | ||
857 | .Lenc_loop: | ||
858 | ___ | ||
859 | &ShiftRows (@XMM[0..7, 8..12]); | ||
860 | $code.=".Lenc_sbox:\n"; | ||
861 | &Sbox (@XMM[0..7, 8..15]); | ||
862 | $code.=<<___; | ||
863 | subs $rounds,$rounds,#1 | ||
864 | bcc .Lenc_done | ||
865 | ___ | ||
866 | &MixColumns (@XMM[0,1,4,6,3,7,2,5, 8..15]); | ||
867 | $code.=<<___; | ||
868 | vldmia $const, {@XMM[12]} @ .LSR | ||
869 | ite eq @ Thumb2 thing, samity check in ARM | ||
870 | addeq $const,$const,#0x10 | ||
871 | bne .Lenc_loop | ||
872 | vldmia $const, {@XMM[12]} @ .LSRM0 | ||
873 | b .Lenc_loop | ||
874 | .align 4 | ||
875 | .Lenc_done: | ||
876 | ___ | ||
877 | # output in lsb > [t0, t1, t4, t6, t3, t7, t2, t5] < msb | ||
878 | &bitslice (@XMM[0,1,4,6,3,7,2,5, 8..11]); | ||
879 | $code.=<<___; | ||
880 | vldmia $key, {@XMM[8]} @ last round key | ||
881 | veor @XMM[4], @XMM[4], @XMM[8] | ||
882 | veor @XMM[6], @XMM[6], @XMM[8] | ||
883 | veor @XMM[3], @XMM[3], @XMM[8] | ||
884 | veor @XMM[7], @XMM[7], @XMM[8] | ||
885 | veor @XMM[2], @XMM[2], @XMM[8] | ||
886 | veor @XMM[5], @XMM[5], @XMM[8] | ||
887 | veor @XMM[0], @XMM[0], @XMM[8] | ||
888 | veor @XMM[1], @XMM[1], @XMM[8] | ||
889 | bx lr | ||
890 | .size _bsaes_encrypt8,.-_bsaes_encrypt8 | ||
891 | ___ | ||
892 | } | ||
893 | { | ||
894 | my ($out,$inp,$rounds,$const)=("r12","r4","r5","r6"); | ||
895 | |||
896 | sub bitslice_key { | ||
897 | my @x=reverse(@_[0..7]); | ||
898 | my ($bs0,$bs1,$bs2,$t2,$t3)=@_[8..12]; | ||
899 | |||
900 | &swapmove (@x[0,1],1,$bs0,$t2,$t3); | ||
901 | $code.=<<___; | ||
902 | @ &swapmove(@x[2,3],1,$t0,$t2,$t3); | ||
903 | vmov @x[2], @x[0] | ||
904 | vmov @x[3], @x[1] | ||
905 | ___ | ||
906 | #&swapmove2x(@x[4,5,6,7],1,$t0,$t2,$t3); | ||
907 | |||
908 | &swapmove2x (@x[0,2,1,3],2,$bs1,$t2,$t3); | ||
909 | $code.=<<___; | ||
910 | @ &swapmove2x(@x[4,6,5,7],2,$t1,$t2,$t3); | ||
911 | vmov @x[4], @x[0] | ||
912 | vmov @x[6], @x[2] | ||
913 | vmov @x[5], @x[1] | ||
914 | vmov @x[7], @x[3] | ||
915 | ___ | ||
916 | &swapmove2x (@x[0,4,1,5],4,$bs2,$t2,$t3); | ||
917 | &swapmove2x (@x[2,6,3,7],4,$bs2,$t2,$t3); | ||
918 | } | ||
919 | |||
920 | $code.=<<___; | ||
921 | .type _bsaes_key_convert,%function | ||
922 | .align 4 | ||
923 | _bsaes_key_convert: | ||
924 | adr $const,_bsaes_key_convert | ||
925 | vld1.8 {@XMM[7]}, [$inp]! @ load round 0 key | ||
926 | sub $const,$const,#_bsaes_key_convert-.LM0 | ||
927 | vld1.8 {@XMM[15]}, [$inp]! @ load round 1 key | ||
928 | |||
929 | vmov.i8 @XMM[8], #0x01 @ bit masks | ||
930 | vmov.i8 @XMM[9], #0x02 | ||
931 | vmov.i8 @XMM[10], #0x04 | ||
932 | vmov.i8 @XMM[11], #0x08 | ||
933 | vmov.i8 @XMM[12], #0x10 | ||
934 | vmov.i8 @XMM[13], #0x20 | ||
935 | vldmia $const, {@XMM[14]} @ .LM0 | ||
936 | |||
937 | #ifdef __ARMEL__ | ||
938 | vrev32.8 @XMM[7], @XMM[7] | ||
939 | vrev32.8 @XMM[15], @XMM[15] | ||
940 | #endif | ||
941 | sub $rounds,$rounds,#1 | ||
942 | vstmia $out!, {@XMM[7]} @ save round 0 key | ||
943 | b .Lkey_loop | ||
944 | |||
945 | .align 4 | ||
946 | .Lkey_loop: | ||
947 | vtbl.8 `&Dlo(@XMM[7])`,{@XMM[15]},`&Dlo(@XMM[14])` | ||
948 | vtbl.8 `&Dhi(@XMM[7])`,{@XMM[15]},`&Dhi(@XMM[14])` | ||
949 | vmov.i8 @XMM[6], #0x40 | ||
950 | vmov.i8 @XMM[15], #0x80 | ||
951 | |||
952 | vtst.8 @XMM[0], @XMM[7], @XMM[8] | ||
953 | vtst.8 @XMM[1], @XMM[7], @XMM[9] | ||
954 | vtst.8 @XMM[2], @XMM[7], @XMM[10] | ||
955 | vtst.8 @XMM[3], @XMM[7], @XMM[11] | ||
956 | vtst.8 @XMM[4], @XMM[7], @XMM[12] | ||
957 | vtst.8 @XMM[5], @XMM[7], @XMM[13] | ||
958 | vtst.8 @XMM[6], @XMM[7], @XMM[6] | ||
959 | vtst.8 @XMM[7], @XMM[7], @XMM[15] | ||
960 | vld1.8 {@XMM[15]}, [$inp]! @ load next round key | ||
961 | vmvn @XMM[0], @XMM[0] @ "pnot" | ||
962 | vmvn @XMM[1], @XMM[1] | ||
963 | vmvn @XMM[5], @XMM[5] | ||
964 | vmvn @XMM[6], @XMM[6] | ||
965 | #ifdef __ARMEL__ | ||
966 | vrev32.8 @XMM[15], @XMM[15] | ||
967 | #endif | ||
968 | subs $rounds,$rounds,#1 | ||
969 | vstmia $out!,{@XMM[0]-@XMM[7]} @ write bit-sliced round key | ||
970 | bne .Lkey_loop | ||
971 | |||
972 | vmov.i8 @XMM[7],#0x63 @ compose .L63 | ||
973 | @ don't save last round key | ||
974 | bx lr | ||
975 | .size _bsaes_key_convert,.-_bsaes_key_convert | ||
976 | ___ | ||
977 | } | ||
978 | |||
979 | if (0) { # following four functions are unsupported interface | ||
980 | # used for benchmarking... | ||
981 | $code.=<<___; | ||
982 | .globl bsaes_enc_key_convert | ||
983 | .type bsaes_enc_key_convert,%function | ||
984 | .align 4 | ||
985 | bsaes_enc_key_convert: | ||
986 | stmdb sp!,{r4-r6,lr} | ||
987 | vstmdb sp!,{d8-d15} @ ABI specification says so | ||
988 | |||
989 | ldr r5,[$inp,#240] @ pass rounds | ||
990 | mov r4,$inp @ pass key | ||
991 | mov r12,$out @ pass key schedule | ||
992 | bl _bsaes_key_convert | ||
993 | veor @XMM[7],@XMM[7],@XMM[15] @ fix up last round key | ||
994 | vstmia r12, {@XMM[7]} @ save last round key | ||
995 | |||
996 | vldmia sp!,{d8-d15} | ||
997 | ldmia sp!,{r4-r6,pc} | ||
998 | .size bsaes_enc_key_convert,.-bsaes_enc_key_convert | ||
999 | |||
1000 | .globl bsaes_encrypt_128 | ||
1001 | .type bsaes_encrypt_128,%function | ||
1002 | .align 4 | ||
1003 | bsaes_encrypt_128: | ||
1004 | stmdb sp!,{r4-r6,lr} | ||
1005 | vstmdb sp!,{d8-d15} @ ABI specification says so | ||
1006 | .Lenc128_loop: | ||
1007 | vld1.8 {@XMM[0]-@XMM[1]}, [$inp]! @ load input | ||
1008 | vld1.8 {@XMM[2]-@XMM[3]}, [$inp]! | ||
1009 | mov r4,$key @ pass the key | ||
1010 | vld1.8 {@XMM[4]-@XMM[5]}, [$inp]! | ||
1011 | mov r5,#10 @ pass rounds | ||
1012 | vld1.8 {@XMM[6]-@XMM[7]}, [$inp]! | ||
1013 | |||
1014 | bl _bsaes_encrypt8 | ||
1015 | |||
1016 | vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output | ||
1017 | vst1.8 {@XMM[4]}, [$out]! | ||
1018 | vst1.8 {@XMM[6]}, [$out]! | ||
1019 | vst1.8 {@XMM[3]}, [$out]! | ||
1020 | vst1.8 {@XMM[7]}, [$out]! | ||
1021 | vst1.8 {@XMM[2]}, [$out]! | ||
1022 | subs $len,$len,#0x80 | ||
1023 | vst1.8 {@XMM[5]}, [$out]! | ||
1024 | bhi .Lenc128_loop | ||
1025 | |||
1026 | vldmia sp!,{d8-d15} | ||
1027 | ldmia sp!,{r4-r6,pc} | ||
1028 | .size bsaes_encrypt_128,.-bsaes_encrypt_128 | ||
1029 | |||
1030 | .globl bsaes_dec_key_convert | ||
1031 | .type bsaes_dec_key_convert,%function | ||
1032 | .align 4 | ||
1033 | bsaes_dec_key_convert: | ||
1034 | stmdb sp!,{r4-r6,lr} | ||
1035 | vstmdb sp!,{d8-d15} @ ABI specification says so | ||
1036 | |||
1037 | ldr r5,[$inp,#240] @ pass rounds | ||
1038 | mov r4,$inp @ pass key | ||
1039 | mov r12,$out @ pass key schedule | ||
1040 | bl _bsaes_key_convert | ||
1041 | vldmia $out, {@XMM[6]} | ||
1042 | vstmia r12, {@XMM[15]} @ save last round key | ||
1043 | veor @XMM[7], @XMM[7], @XMM[6] @ fix up round 0 key | ||
1044 | vstmia $out, {@XMM[7]} | ||
1045 | |||
1046 | vldmia sp!,{d8-d15} | ||
1047 | ldmia sp!,{r4-r6,pc} | ||
1048 | .size bsaes_dec_key_convert,.-bsaes_dec_key_convert | ||
1049 | |||
1050 | .globl bsaes_decrypt_128 | ||
1051 | .type bsaes_decrypt_128,%function | ||
1052 | .align 4 | ||
1053 | bsaes_decrypt_128: | ||
1054 | stmdb sp!,{r4-r6,lr} | ||
1055 | vstmdb sp!,{d8-d15} @ ABI specification says so | ||
1056 | .Ldec128_loop: | ||
1057 | vld1.8 {@XMM[0]-@XMM[1]}, [$inp]! @ load input | ||
1058 | vld1.8 {@XMM[2]-@XMM[3]}, [$inp]! | ||
1059 | mov r4,$key @ pass the key | ||
1060 | vld1.8 {@XMM[4]-@XMM[5]}, [$inp]! | ||
1061 | mov r5,#10 @ pass rounds | ||
1062 | vld1.8 {@XMM[6]-@XMM[7]}, [$inp]! | ||
1063 | |||
1064 | bl _bsaes_decrypt8 | ||
1065 | |||
1066 | vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output | ||
1067 | vst1.8 {@XMM[6]}, [$out]! | ||
1068 | vst1.8 {@XMM[4]}, [$out]! | ||
1069 | vst1.8 {@XMM[2]}, [$out]! | ||
1070 | vst1.8 {@XMM[7]}, [$out]! | ||
1071 | vst1.8 {@XMM[3]}, [$out]! | ||
1072 | subs $len,$len,#0x80 | ||
1073 | vst1.8 {@XMM[5]}, [$out]! | ||
1074 | bhi .Ldec128_loop | ||
1075 | |||
1076 | vldmia sp!,{d8-d15} | ||
1077 | ldmia sp!,{r4-r6,pc} | ||
1078 | .size bsaes_decrypt_128,.-bsaes_decrypt_128 | ||
1079 | ___ | ||
1080 | } | ||
1081 | { | ||
1082 | my ($inp,$out,$len,$key, $ivp,$fp,$rounds)=map("r$_",(0..3,8..10)); | ||
1083 | my ($keysched)=("sp"); | ||
1084 | |||
1085 | $code.=<<___; | ||
1086 | .extern AES_cbc_encrypt | ||
1087 | .extern AES_decrypt | ||
1088 | |||
1089 | .global bsaes_cbc_encrypt | ||
1090 | .type bsaes_cbc_encrypt,%function | ||
1091 | .align 5 | ||
1092 | bsaes_cbc_encrypt: | ||
1093 | #ifndef __KERNEL__ | ||
1094 | cmp $len, #128 | ||
1095 | #ifndef __thumb__ | ||
1096 | blo AES_cbc_encrypt | ||
1097 | #else | ||
1098 | bhs 1f | ||
1099 | b AES_cbc_encrypt | ||
1100 | 1: | ||
1101 | #endif | ||
1102 | #endif | ||
1103 | |||
1104 | @ it is up to the caller to make sure we are called with enc == 0 | ||
1105 | |||
1106 | mov ip, sp | ||
1107 | stmdb sp!, {r4-r10, lr} | ||
1108 | VFP_ABI_PUSH | ||
1109 | ldr $ivp, [ip] @ IV is 1st arg on the stack | ||
1110 | mov $len, $len, lsr#4 @ len in 16 byte blocks | ||
1111 | sub sp, #0x10 @ scratch space to carry over the IV | ||
1112 | mov $fp, sp @ save sp | ||
1113 | |||
1114 | ldr $rounds, [$key, #240] @ get # of rounds | ||
1115 | #ifndef BSAES_ASM_EXTENDED_KEY | ||
1116 | @ allocate the key schedule on the stack | ||
1117 | sub r12, sp, $rounds, lsl#7 @ 128 bytes per inner round key | ||
1118 | add r12, #`128-32` @ sifze of bit-slices key schedule | ||
1119 | |||
1120 | @ populate the key schedule | ||
1121 | mov r4, $key @ pass key | ||
1122 | mov r5, $rounds @ pass # of rounds | ||
1123 | mov sp, r12 @ sp is $keysched | ||
1124 | bl _bsaes_key_convert | ||
1125 | vldmia $keysched, {@XMM[6]} | ||
1126 | vstmia r12, {@XMM[15]} @ save last round key | ||
1127 | veor @XMM[7], @XMM[7], @XMM[6] @ fix up round 0 key | ||
1128 | vstmia $keysched, {@XMM[7]} | ||
1129 | #else | ||
1130 | ldr r12, [$key, #244] | ||
1131 | eors r12, #1 | ||
1132 | beq 0f | ||
1133 | |||
1134 | @ populate the key schedule | ||
1135 | str r12, [$key, #244] | ||
1136 | mov r4, $key @ pass key | ||
1137 | mov r5, $rounds @ pass # of rounds | ||
1138 | add r12, $key, #248 @ pass key schedule | ||
1139 | bl _bsaes_key_convert | ||
1140 | add r4, $key, #248 | ||
1141 | vldmia r4, {@XMM[6]} | ||
1142 | vstmia r12, {@XMM[15]} @ save last round key | ||
1143 | veor @XMM[7], @XMM[7], @XMM[6] @ fix up round 0 key | ||
1144 | vstmia r4, {@XMM[7]} | ||
1145 | |||
1146 | .align 2 | ||
1147 | 0: | ||
1148 | #endif | ||
1149 | |||
1150 | vld1.8 {@XMM[15]}, [$ivp] @ load IV | ||
1151 | b .Lcbc_dec_loop | ||
1152 | |||
1153 | .align 4 | ||
1154 | .Lcbc_dec_loop: | ||
1155 | subs $len, $len, #0x8 | ||
1156 | bmi .Lcbc_dec_loop_finish | ||
1157 | |||
1158 | vld1.8 {@XMM[0]-@XMM[1]}, [$inp]! @ load input | ||
1159 | vld1.8 {@XMM[2]-@XMM[3]}, [$inp]! | ||
1160 | #ifndef BSAES_ASM_EXTENDED_KEY | ||
1161 | mov r4, $keysched @ pass the key | ||
1162 | #else | ||
1163 | add r4, $key, #248 | ||
1164 | #endif | ||
1165 | vld1.8 {@XMM[4]-@XMM[5]}, [$inp]! | ||
1166 | mov r5, $rounds | ||
1167 | vld1.8 {@XMM[6]-@XMM[7]}, [$inp] | ||
1168 | sub $inp, $inp, #0x60 | ||
1169 | vstmia $fp, {@XMM[15]} @ put aside IV | ||
1170 | |||
1171 | bl _bsaes_decrypt8 | ||
1172 | |||
1173 | vldmia $fp, {@XMM[14]} @ reload IV | ||
1174 | vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ reload input | ||
1175 | veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV | ||
1176 | vld1.8 {@XMM[10]-@XMM[11]}, [$inp]! | ||
1177 | veor @XMM[1], @XMM[1], @XMM[8] | ||
1178 | veor @XMM[6], @XMM[6], @XMM[9] | ||
1179 | vld1.8 {@XMM[12]-@XMM[13]}, [$inp]! | ||
1180 | veor @XMM[4], @XMM[4], @XMM[10] | ||
1181 | veor @XMM[2], @XMM[2], @XMM[11] | ||
1182 | vld1.8 {@XMM[14]-@XMM[15]}, [$inp]! | ||
1183 | veor @XMM[7], @XMM[7], @XMM[12] | ||
1184 | vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output | ||
1185 | veor @XMM[3], @XMM[3], @XMM[13] | ||
1186 | vst1.8 {@XMM[6]}, [$out]! | ||
1187 | veor @XMM[5], @XMM[5], @XMM[14] | ||
1188 | vst1.8 {@XMM[4]}, [$out]! | ||
1189 | vst1.8 {@XMM[2]}, [$out]! | ||
1190 | vst1.8 {@XMM[7]}, [$out]! | ||
1191 | vst1.8 {@XMM[3]}, [$out]! | ||
1192 | vst1.8 {@XMM[5]}, [$out]! | ||
1193 | |||
1194 | b .Lcbc_dec_loop | ||
1195 | |||
1196 | .Lcbc_dec_loop_finish: | ||
1197 | adds $len, $len, #8 | ||
1198 | beq .Lcbc_dec_done | ||
1199 | |||
1200 | vld1.8 {@XMM[0]}, [$inp]! @ load input | ||
1201 | cmp $len, #2 | ||
1202 | blo .Lcbc_dec_one | ||
1203 | vld1.8 {@XMM[1]}, [$inp]! | ||
1204 | #ifndef BSAES_ASM_EXTENDED_KEY | ||
1205 | mov r4, $keysched @ pass the key | ||
1206 | #else | ||
1207 | add r4, $key, #248 | ||
1208 | #endif | ||
1209 | mov r5, $rounds | ||
1210 | vstmia $fp, {@XMM[15]} @ put aside IV | ||
1211 | beq .Lcbc_dec_two | ||
1212 | vld1.8 {@XMM[2]}, [$inp]! | ||
1213 | cmp $len, #4 | ||
1214 | blo .Lcbc_dec_three | ||
1215 | vld1.8 {@XMM[3]}, [$inp]! | ||
1216 | beq .Lcbc_dec_four | ||
1217 | vld1.8 {@XMM[4]}, [$inp]! | ||
1218 | cmp $len, #6 | ||
1219 | blo .Lcbc_dec_five | ||
1220 | vld1.8 {@XMM[5]}, [$inp]! | ||
1221 | beq .Lcbc_dec_six | ||
1222 | vld1.8 {@XMM[6]}, [$inp]! | ||
1223 | sub $inp, $inp, #0x70 | ||
1224 | |||
1225 | bl _bsaes_decrypt8 | ||
1226 | |||
1227 | vldmia $fp, {@XMM[14]} @ reload IV | ||
1228 | vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ reload input | ||
1229 | veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV | ||
1230 | vld1.8 {@XMM[10]-@XMM[11]}, [$inp]! | ||
1231 | veor @XMM[1], @XMM[1], @XMM[8] | ||
1232 | veor @XMM[6], @XMM[6], @XMM[9] | ||
1233 | vld1.8 {@XMM[12]-@XMM[13]}, [$inp]! | ||
1234 | veor @XMM[4], @XMM[4], @XMM[10] | ||
1235 | veor @XMM[2], @XMM[2], @XMM[11] | ||
1236 | vld1.8 {@XMM[15]}, [$inp]! | ||
1237 | veor @XMM[7], @XMM[7], @XMM[12] | ||
1238 | vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output | ||
1239 | veor @XMM[3], @XMM[3], @XMM[13] | ||
1240 | vst1.8 {@XMM[6]}, [$out]! | ||
1241 | vst1.8 {@XMM[4]}, [$out]! | ||
1242 | vst1.8 {@XMM[2]}, [$out]! | ||
1243 | vst1.8 {@XMM[7]}, [$out]! | ||
1244 | vst1.8 {@XMM[3]}, [$out]! | ||
1245 | b .Lcbc_dec_done | ||
1246 | .align 4 | ||
1247 | .Lcbc_dec_six: | ||
1248 | sub $inp, $inp, #0x60 | ||
1249 | bl _bsaes_decrypt8 | ||
1250 | vldmia $fp,{@XMM[14]} @ reload IV | ||
1251 | vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ reload input | ||
1252 | veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV | ||
1253 | vld1.8 {@XMM[10]-@XMM[11]}, [$inp]! | ||
1254 | veor @XMM[1], @XMM[1], @XMM[8] | ||
1255 | veor @XMM[6], @XMM[6], @XMM[9] | ||
1256 | vld1.8 {@XMM[12]}, [$inp]! | ||
1257 | veor @XMM[4], @XMM[4], @XMM[10] | ||
1258 | veor @XMM[2], @XMM[2], @XMM[11] | ||
1259 | vld1.8 {@XMM[15]}, [$inp]! | ||
1260 | veor @XMM[7], @XMM[7], @XMM[12] | ||
1261 | vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output | ||
1262 | vst1.8 {@XMM[6]}, [$out]! | ||
1263 | vst1.8 {@XMM[4]}, [$out]! | ||
1264 | vst1.8 {@XMM[2]}, [$out]! | ||
1265 | vst1.8 {@XMM[7]}, [$out]! | ||
1266 | b .Lcbc_dec_done | ||
1267 | .align 4 | ||
1268 | .Lcbc_dec_five: | ||
1269 | sub $inp, $inp, #0x50 | ||
1270 | bl _bsaes_decrypt8 | ||
1271 | vldmia $fp, {@XMM[14]} @ reload IV | ||
1272 | vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ reload input | ||
1273 | veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV | ||
1274 | vld1.8 {@XMM[10]-@XMM[11]}, [$inp]! | ||
1275 | veor @XMM[1], @XMM[1], @XMM[8] | ||
1276 | veor @XMM[6], @XMM[6], @XMM[9] | ||
1277 | vld1.8 {@XMM[15]}, [$inp]! | ||
1278 | veor @XMM[4], @XMM[4], @XMM[10] | ||
1279 | vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output | ||
1280 | veor @XMM[2], @XMM[2], @XMM[11] | ||
1281 | vst1.8 {@XMM[6]}, [$out]! | ||
1282 | vst1.8 {@XMM[4]}, [$out]! | ||
1283 | vst1.8 {@XMM[2]}, [$out]! | ||
1284 | b .Lcbc_dec_done | ||
1285 | .align 4 | ||
1286 | .Lcbc_dec_four: | ||
1287 | sub $inp, $inp, #0x40 | ||
1288 | bl _bsaes_decrypt8 | ||
1289 | vldmia $fp, {@XMM[14]} @ reload IV | ||
1290 | vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ reload input | ||
1291 | veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV | ||
1292 | vld1.8 {@XMM[10]}, [$inp]! | ||
1293 | veor @XMM[1], @XMM[1], @XMM[8] | ||
1294 | veor @XMM[6], @XMM[6], @XMM[9] | ||
1295 | vld1.8 {@XMM[15]}, [$inp]! | ||
1296 | veor @XMM[4], @XMM[4], @XMM[10] | ||
1297 | vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output | ||
1298 | vst1.8 {@XMM[6]}, [$out]! | ||
1299 | vst1.8 {@XMM[4]}, [$out]! | ||
1300 | b .Lcbc_dec_done | ||
1301 | .align 4 | ||
1302 | .Lcbc_dec_three: | ||
1303 | sub $inp, $inp, #0x30 | ||
1304 | bl _bsaes_decrypt8 | ||
1305 | vldmia $fp, {@XMM[14]} @ reload IV | ||
1306 | vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ reload input | ||
1307 | veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV | ||
1308 | vld1.8 {@XMM[15]}, [$inp]! | ||
1309 | veor @XMM[1], @XMM[1], @XMM[8] | ||
1310 | veor @XMM[6], @XMM[6], @XMM[9] | ||
1311 | vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output | ||
1312 | vst1.8 {@XMM[6]}, [$out]! | ||
1313 | b .Lcbc_dec_done | ||
1314 | .align 4 | ||
1315 | .Lcbc_dec_two: | ||
1316 | sub $inp, $inp, #0x20 | ||
1317 | bl _bsaes_decrypt8 | ||
1318 | vldmia $fp, {@XMM[14]} @ reload IV | ||
1319 | vld1.8 {@XMM[8]}, [$inp]! @ reload input | ||
1320 | veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV | ||
1321 | vld1.8 {@XMM[15]}, [$inp]! @ reload input | ||
1322 | veor @XMM[1], @XMM[1], @XMM[8] | ||
1323 | vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output | ||
1324 | b .Lcbc_dec_done | ||
1325 | .align 4 | ||
1326 | .Lcbc_dec_one: | ||
1327 | sub $inp, $inp, #0x10 | ||
1328 | mov $rounds, $out @ save original out pointer | ||
1329 | mov $out, $fp @ use the iv scratch space as out buffer | ||
1330 | mov r2, $key | ||
1331 | vmov @XMM[4],@XMM[15] @ just in case ensure that IV | ||
1332 | vmov @XMM[5],@XMM[0] @ and input are preserved | ||
1333 | bl AES_decrypt | ||
1334 | vld1.8 {@XMM[0]}, [$fp,:64] @ load result | ||
1335 | veor @XMM[0], @XMM[0], @XMM[4] @ ^= IV | ||
1336 | vmov @XMM[15], @XMM[5] @ @XMM[5] holds input | ||
1337 | vst1.8 {@XMM[0]}, [$rounds] @ write output | ||
1338 | |||
1339 | .Lcbc_dec_done: | ||
1340 | #ifndef BSAES_ASM_EXTENDED_KEY | ||
1341 | vmov.i32 q0, #0 | ||
1342 | vmov.i32 q1, #0 | ||
1343 | .Lcbc_dec_bzero: @ wipe key schedule [if any] | ||
1344 | vstmia $keysched!, {q0-q1} | ||
1345 | cmp $keysched, $fp | ||
1346 | bne .Lcbc_dec_bzero | ||
1347 | #endif | ||
1348 | |||
1349 | mov sp, $fp | ||
1350 | add sp, #0x10 @ add sp,$fp,#0x10 is no good for thumb | ||
1351 | vst1.8 {@XMM[15]}, [$ivp] @ return IV | ||
1352 | VFP_ABI_POP | ||
1353 | ldmia sp!, {r4-r10, pc} | ||
1354 | .size bsaes_cbc_encrypt,.-bsaes_cbc_encrypt | ||
1355 | ___ | ||
1356 | } | ||
1357 | { | ||
1358 | my ($inp,$out,$len,$key, $ctr,$fp,$rounds)=(map("r$_",(0..3,8..10))); | ||
1359 | my $const = "r6"; # shared with _bsaes_encrypt8_alt | ||
1360 | my $keysched = "sp"; | ||
1361 | |||
1362 | $code.=<<___; | ||
1363 | .extern AES_encrypt | ||
1364 | .global bsaes_ctr32_encrypt_blocks | ||
1365 | .type bsaes_ctr32_encrypt_blocks,%function | ||
1366 | .align 5 | ||
1367 | bsaes_ctr32_encrypt_blocks: | ||
1368 | cmp $len, #8 @ use plain AES for | ||
1369 | blo .Lctr_enc_short @ small sizes | ||
1370 | |||
1371 | mov ip, sp | ||
1372 | stmdb sp!, {r4-r10, lr} | ||
1373 | VFP_ABI_PUSH | ||
1374 | ldr $ctr, [ip] @ ctr is 1st arg on the stack | ||
1375 | sub sp, sp, #0x10 @ scratch space to carry over the ctr | ||
1376 | mov $fp, sp @ save sp | ||
1377 | |||
1378 | ldr $rounds, [$key, #240] @ get # of rounds | ||
1379 | #ifndef BSAES_ASM_EXTENDED_KEY | ||
1380 | @ allocate the key schedule on the stack | ||
1381 | sub r12, sp, $rounds, lsl#7 @ 128 bytes per inner round key | ||
1382 | add r12, #`128-32` @ size of bit-sliced key schedule | ||
1383 | |||
1384 | @ populate the key schedule | ||
1385 | mov r4, $key @ pass key | ||
1386 | mov r5, $rounds @ pass # of rounds | ||
1387 | mov sp, r12 @ sp is $keysched | ||
1388 | bl _bsaes_key_convert | ||
1389 | veor @XMM[7],@XMM[7],@XMM[15] @ fix up last round key | ||
1390 | vstmia r12, {@XMM[7]} @ save last round key | ||
1391 | |||
1392 | vld1.8 {@XMM[0]}, [$ctr] @ load counter | ||
1393 | add $ctr, $const, #.LREVM0SR-.LM0 @ borrow $ctr | ||
1394 | vldmia $keysched, {@XMM[4]} @ load round0 key | ||
1395 | #else | ||
1396 | ldr r12, [$key, #244] | ||
1397 | eors r12, #1 | ||
1398 | beq 0f | ||
1399 | |||
1400 | @ populate the key schedule | ||
1401 | str r12, [$key, #244] | ||
1402 | mov r4, $key @ pass key | ||
1403 | mov r5, $rounds @ pass # of rounds | ||
1404 | add r12, $key, #248 @ pass key schedule | ||
1405 | bl _bsaes_key_convert | ||
1406 | veor @XMM[7],@XMM[7],@XMM[15] @ fix up last round key | ||
1407 | vstmia r12, {@XMM[7]} @ save last round key | ||
1408 | |||
1409 | .align 2 | ||
1410 | 0: add r12, $key, #248 | ||
1411 | vld1.8 {@XMM[0]}, [$ctr] @ load counter | ||
1412 | adrl $ctr, .LREVM0SR @ borrow $ctr | ||
1413 | vldmia r12, {@XMM[4]} @ load round0 key | ||
1414 | sub sp, #0x10 @ place for adjusted round0 key | ||
1415 | #endif | ||
1416 | |||
1417 | vmov.i32 @XMM[8],#1 @ compose 1<<96 | ||
1418 | veor @XMM[9],@XMM[9],@XMM[9] | ||
1419 | vrev32.8 @XMM[0],@XMM[0] | ||
1420 | vext.8 @XMM[8],@XMM[9],@XMM[8],#4 | ||
1421 | vrev32.8 @XMM[4],@XMM[4] | ||
1422 | vadd.u32 @XMM[9],@XMM[8],@XMM[8] @ compose 2<<96 | ||
1423 | vstmia $keysched, {@XMM[4]} @ save adjusted round0 key | ||
1424 | b .Lctr_enc_loop | ||
1425 | |||
1426 | .align 4 | ||
1427 | .Lctr_enc_loop: | ||
1428 | vadd.u32 @XMM[10], @XMM[8], @XMM[9] @ compose 3<<96 | ||
1429 | vadd.u32 @XMM[1], @XMM[0], @XMM[8] @ +1 | ||
1430 | vadd.u32 @XMM[2], @XMM[0], @XMM[9] @ +2 | ||
1431 | vadd.u32 @XMM[3], @XMM[0], @XMM[10] @ +3 | ||
1432 | vadd.u32 @XMM[4], @XMM[1], @XMM[10] | ||
1433 | vadd.u32 @XMM[5], @XMM[2], @XMM[10] | ||
1434 | vadd.u32 @XMM[6], @XMM[3], @XMM[10] | ||
1435 | vadd.u32 @XMM[7], @XMM[4], @XMM[10] | ||
1436 | vadd.u32 @XMM[10], @XMM[5], @XMM[10] @ next counter | ||
1437 | |||
1438 | @ Borrow prologue from _bsaes_encrypt8 to use the opportunity | ||
1439 | @ to flip byte order in 32-bit counter | ||
1440 | |||
1441 | vldmia $keysched, {@XMM[9]} @ load round0 key | ||
1442 | #ifndef BSAES_ASM_EXTENDED_KEY | ||
1443 | add r4, $keysched, #0x10 @ pass next round key | ||
1444 | #else | ||
1445 | add r4, $key, #`248+16` | ||
1446 | #endif | ||
1447 | vldmia $ctr, {@XMM[8]} @ .LREVM0SR | ||
1448 | mov r5, $rounds @ pass rounds | ||
1449 | vstmia $fp, {@XMM[10]} @ save next counter | ||
1450 | sub $const, $ctr, #.LREVM0SR-.LSR @ pass constants | ||
1451 | |||
1452 | bl _bsaes_encrypt8_alt | ||
1453 | |||
1454 | subs $len, $len, #8 | ||
1455 | blo .Lctr_enc_loop_done | ||
1456 | |||
1457 | vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ load input | ||
1458 | vld1.8 {@XMM[10]-@XMM[11]}, [$inp]! | ||
1459 | veor @XMM[0], @XMM[8] | ||
1460 | veor @XMM[1], @XMM[9] | ||
1461 | vld1.8 {@XMM[12]-@XMM[13]}, [$inp]! | ||
1462 | veor @XMM[4], @XMM[10] | ||
1463 | veor @XMM[6], @XMM[11] | ||
1464 | vld1.8 {@XMM[14]-@XMM[15]}, [$inp]! | ||
1465 | veor @XMM[3], @XMM[12] | ||
1466 | vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output | ||
1467 | veor @XMM[7], @XMM[13] | ||
1468 | veor @XMM[2], @XMM[14] | ||
1469 | vst1.8 {@XMM[4]}, [$out]! | ||
1470 | veor @XMM[5], @XMM[15] | ||
1471 | vst1.8 {@XMM[6]}, [$out]! | ||
1472 | vmov.i32 @XMM[8], #1 @ compose 1<<96 | ||
1473 | vst1.8 {@XMM[3]}, [$out]! | ||
1474 | veor @XMM[9], @XMM[9], @XMM[9] | ||
1475 | vst1.8 {@XMM[7]}, [$out]! | ||
1476 | vext.8 @XMM[8], @XMM[9], @XMM[8], #4 | ||
1477 | vst1.8 {@XMM[2]}, [$out]! | ||
1478 | vadd.u32 @XMM[9],@XMM[8],@XMM[8] @ compose 2<<96 | ||
1479 | vst1.8 {@XMM[5]}, [$out]! | ||
1480 | vldmia $fp, {@XMM[0]} @ load counter | ||
1481 | |||
1482 | bne .Lctr_enc_loop | ||
1483 | b .Lctr_enc_done | ||
1484 | |||
1485 | .align 4 | ||
1486 | .Lctr_enc_loop_done: | ||
1487 | add $len, $len, #8 | ||
1488 | vld1.8 {@XMM[8]}, [$inp]! @ load input | ||
1489 | veor @XMM[0], @XMM[8] | ||
1490 | vst1.8 {@XMM[0]}, [$out]! @ write output | ||
1491 | cmp $len, #2 | ||
1492 | blo .Lctr_enc_done | ||
1493 | vld1.8 {@XMM[9]}, [$inp]! | ||
1494 | veor @XMM[1], @XMM[9] | ||
1495 | vst1.8 {@XMM[1]}, [$out]! | ||
1496 | beq .Lctr_enc_done | ||
1497 | vld1.8 {@XMM[10]}, [$inp]! | ||
1498 | veor @XMM[4], @XMM[10] | ||
1499 | vst1.8 {@XMM[4]}, [$out]! | ||
1500 | cmp $len, #4 | ||
1501 | blo .Lctr_enc_done | ||
1502 | vld1.8 {@XMM[11]}, [$inp]! | ||
1503 | veor @XMM[6], @XMM[11] | ||
1504 | vst1.8 {@XMM[6]}, [$out]! | ||
1505 | beq .Lctr_enc_done | ||
1506 | vld1.8 {@XMM[12]}, [$inp]! | ||
1507 | veor @XMM[3], @XMM[12] | ||
1508 | vst1.8 {@XMM[3]}, [$out]! | ||
1509 | cmp $len, #6 | ||
1510 | blo .Lctr_enc_done | ||
1511 | vld1.8 {@XMM[13]}, [$inp]! | ||
1512 | veor @XMM[7], @XMM[13] | ||
1513 | vst1.8 {@XMM[7]}, [$out]! | ||
1514 | beq .Lctr_enc_done | ||
1515 | vld1.8 {@XMM[14]}, [$inp] | ||
1516 | veor @XMM[2], @XMM[14] | ||
1517 | vst1.8 {@XMM[2]}, [$out]! | ||
1518 | |||
1519 | .Lctr_enc_done: | ||
1520 | vmov.i32 q0, #0 | ||
1521 | vmov.i32 q1, #0 | ||
1522 | #ifndef BSAES_ASM_EXTENDED_KEY | ||
1523 | .Lctr_enc_bzero: @ wipe key schedule [if any] | ||
1524 | vstmia $keysched!, {q0-q1} | ||
1525 | cmp $keysched, $fp | ||
1526 | bne .Lctr_enc_bzero | ||
1527 | #else | ||
1528 | vstmia $keysched, {q0-q1} | ||
1529 | #endif | ||
1530 | |||
1531 | mov sp, $fp | ||
1532 | add sp, #0x10 @ add sp,$fp,#0x10 is no good for thumb | ||
1533 | VFP_ABI_POP | ||
1534 | ldmia sp!, {r4-r10, pc} @ return | ||
1535 | |||
1536 | .align 4 | ||
1537 | .Lctr_enc_short: | ||
1538 | ldr ip, [sp] @ ctr pointer is passed on stack | ||
1539 | stmdb sp!, {r4-r8, lr} | ||
1540 | |||
1541 | mov r4, $inp @ copy arguments | ||
1542 | mov r5, $out | ||
1543 | mov r6, $len | ||
1544 | mov r7, $key | ||
1545 | ldr r8, [ip, #12] @ load counter LSW | ||
1546 | vld1.8 {@XMM[1]}, [ip] @ load whole counter value | ||
1547 | #ifdef __ARMEL__ | ||
1548 | rev r8, r8 | ||
1549 | #endif | ||
1550 | sub sp, sp, #0x10 | ||
1551 | vst1.8 {@XMM[1]}, [sp,:64] @ copy counter value | ||
1552 | sub sp, sp, #0x10 | ||
1553 | |||
1554 | .Lctr_enc_short_loop: | ||
1555 | add r0, sp, #0x10 @ input counter value | ||
1556 | mov r1, sp @ output on the stack | ||
1557 | mov r2, r7 @ key | ||
1558 | |||
1559 | bl AES_encrypt | ||
1560 | |||
1561 | vld1.8 {@XMM[0]}, [r4]! @ load input | ||
1562 | vld1.8 {@XMM[1]}, [sp,:64] @ load encrypted counter | ||
1563 | add r8, r8, #1 | ||
1564 | #ifdef __ARMEL__ | ||
1565 | rev r0, r8 | ||
1566 | str r0, [sp, #0x1c] @ next counter value | ||
1567 | #else | ||
1568 | str r8, [sp, #0x1c] @ next counter value | ||
1569 | #endif | ||
1570 | veor @XMM[0],@XMM[0],@XMM[1] | ||
1571 | vst1.8 {@XMM[0]}, [r5]! @ store output | ||
1572 | subs r6, r6, #1 | ||
1573 | bne .Lctr_enc_short_loop | ||
1574 | |||
1575 | vmov.i32 q0, #0 | ||
1576 | vmov.i32 q1, #0 | ||
1577 | vstmia sp!, {q0-q1} | ||
1578 | |||
1579 | ldmia sp!, {r4-r8, pc} | ||
1580 | .size bsaes_ctr32_encrypt_blocks,.-bsaes_ctr32_encrypt_blocks | ||
1581 | ___ | ||
1582 | } | ||
1583 | { | ||
1584 | ###################################################################### | ||
1585 | # void bsaes_xts_[en|de]crypt(const char *inp,char *out,size_t len, | ||
1586 | # const AES_KEY *key1, const AES_KEY *key2, | ||
1587 | # const unsigned char iv[16]); | ||
1588 | # | ||
1589 | my ($inp,$out,$len,$key,$rounds,$magic,$fp)=(map("r$_",(7..10,1..3))); | ||
1590 | my $const="r6"; # returned by _bsaes_key_convert | ||
1591 | my $twmask=@XMM[5]; | ||
1592 | my @T=@XMM[6..7]; | ||
1593 | |||
1594 | $code.=<<___; | ||
1595 | .globl bsaes_xts_encrypt | ||
1596 | .type bsaes_xts_encrypt,%function | ||
1597 | .align 4 | ||
1598 | bsaes_xts_encrypt: | ||
1599 | mov ip, sp | ||
1600 | stmdb sp!, {r4-r10, lr} @ 0x20 | ||
1601 | VFP_ABI_PUSH | ||
1602 | mov r6, sp @ future $fp | ||
1603 | |||
1604 | mov $inp, r0 | ||
1605 | mov $out, r1 | ||
1606 | mov $len, r2 | ||
1607 | mov $key, r3 | ||
1608 | |||
1609 | sub r0, sp, #0x10 @ 0x10 | ||
1610 | bic r0, #0xf @ align at 16 bytes | ||
1611 | mov sp, r0 | ||
1612 | |||
1613 | #ifdef XTS_CHAIN_TWEAK | ||
1614 | ldr r0, [ip] @ pointer to input tweak | ||
1615 | #else | ||
1616 | @ generate initial tweak | ||
1617 | ldr r0, [ip, #4] @ iv[] | ||
1618 | mov r1, sp | ||
1619 | ldr r2, [ip, #0] @ key2 | ||
1620 | bl AES_encrypt | ||
1621 | mov r0,sp @ pointer to initial tweak | ||
1622 | #endif | ||
1623 | |||
1624 | ldr $rounds, [$key, #240] @ get # of rounds | ||
1625 | mov $fp, r6 | ||
1626 | #ifndef BSAES_ASM_EXTENDED_KEY | ||
1627 | @ allocate the key schedule on the stack | ||
1628 | sub r12, sp, $rounds, lsl#7 @ 128 bytes per inner round key | ||
1629 | @ add r12, #`128-32` @ size of bit-sliced key schedule | ||
1630 | sub r12, #`32+16` @ place for tweak[9] | ||
1631 | |||
1632 | @ populate the key schedule | ||
1633 | mov r4, $key @ pass key | ||
1634 | mov r5, $rounds @ pass # of rounds | ||
1635 | mov sp, r12 | ||
1636 | add r12, #0x90 @ pass key schedule | ||
1637 | bl _bsaes_key_convert | ||
1638 | veor @XMM[7], @XMM[7], @XMM[15] @ fix up last round key | ||
1639 | vstmia r12, {@XMM[7]} @ save last round key | ||
1640 | #else | ||
1641 | ldr r12, [$key, #244] | ||
1642 | eors r12, #1 | ||
1643 | beq 0f | ||
1644 | |||
1645 | str r12, [$key, #244] | ||
1646 | mov r4, $key @ pass key | ||
1647 | mov r5, $rounds @ pass # of rounds | ||
1648 | add r12, $key, #248 @ pass key schedule | ||
1649 | bl _bsaes_key_convert | ||
1650 | veor @XMM[7], @XMM[7], @XMM[15] @ fix up last round key | ||
1651 | vstmia r12, {@XMM[7]} | ||
1652 | |||
1653 | .align 2 | ||
1654 | 0: sub sp, #0x90 @ place for tweak[9] | ||
1655 | #endif | ||
1656 | |||
1657 | vld1.8 {@XMM[8]}, [r0] @ initial tweak | ||
1658 | adr $magic, .Lxts_magic | ||
1659 | |||
1660 | subs $len, #0x80 | ||
1661 | blo .Lxts_enc_short | ||
1662 | b .Lxts_enc_loop | ||
1663 | |||
1664 | .align 4 | ||
1665 | .Lxts_enc_loop: | ||
1666 | vldmia $magic, {$twmask} @ load XTS magic | ||
1667 | vshr.s64 @T[0], @XMM[8], #63 | ||
1668 | mov r0, sp | ||
1669 | vand @T[0], @T[0], $twmask | ||
1670 | ___ | ||
1671 | for($i=9;$i<16;$i++) { | ||
1672 | $code.=<<___; | ||
1673 | vadd.u64 @XMM[$i], @XMM[$i-1], @XMM[$i-1] | ||
1674 | vst1.64 {@XMM[$i-1]}, [r0,:128]! | ||
1675 | vswp `&Dhi("@T[0]")`,`&Dlo("@T[0]")` | ||
1676 | vshr.s64 @T[1], @XMM[$i], #63 | ||
1677 | veor @XMM[$i], @XMM[$i], @T[0] | ||
1678 | vand @T[1], @T[1], $twmask | ||
1679 | ___ | ||
1680 | @T=reverse(@T); | ||
1681 | |||
1682 | $code.=<<___ if ($i>=10); | ||
1683 | vld1.8 {@XMM[$i-10]}, [$inp]! | ||
1684 | ___ | ||
1685 | $code.=<<___ if ($i>=11); | ||
1686 | veor @XMM[$i-11], @XMM[$i-11], @XMM[$i-3] | ||
1687 | ___ | ||
1688 | } | ||
1689 | $code.=<<___; | ||
1690 | vadd.u64 @XMM[8], @XMM[15], @XMM[15] | ||
1691 | vst1.64 {@XMM[15]}, [r0,:128]! | ||
1692 | vswp `&Dhi("@T[0]")`,`&Dlo("@T[0]")` | ||
1693 | veor @XMM[8], @XMM[8], @T[0] | ||
1694 | vst1.64 {@XMM[8]}, [r0,:128] @ next round tweak | ||
1695 | |||
1696 | vld1.8 {@XMM[6]-@XMM[7]}, [$inp]! | ||
1697 | veor @XMM[5], @XMM[5], @XMM[13] | ||
1698 | #ifndef BSAES_ASM_EXTENDED_KEY | ||
1699 | add r4, sp, #0x90 @ pass key schedule | ||
1700 | #else | ||
1701 | add r4, $key, #248 @ pass key schedule | ||
1702 | #endif | ||
1703 | veor @XMM[6], @XMM[6], @XMM[14] | ||
1704 | mov r5, $rounds @ pass rounds | ||
1705 | veor @XMM[7], @XMM[7], @XMM[15] | ||
1706 | mov r0, sp | ||
1707 | |||
1708 | bl _bsaes_encrypt8 | ||
1709 | |||
1710 | vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! | ||
1711 | vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! | ||
1712 | veor @XMM[0], @XMM[0], @XMM[ 8] | ||
1713 | vld1.64 {@XMM[12]-@XMM[13]}, [r0,:128]! | ||
1714 | veor @XMM[1], @XMM[1], @XMM[ 9] | ||
1715 | veor @XMM[8], @XMM[4], @XMM[10] | ||
1716 | vst1.8 {@XMM[0]-@XMM[1]}, [$out]! | ||
1717 | veor @XMM[9], @XMM[6], @XMM[11] | ||
1718 | vld1.64 {@XMM[14]-@XMM[15]}, [r0,:128]! | ||
1719 | veor @XMM[10], @XMM[3], @XMM[12] | ||
1720 | vst1.8 {@XMM[8]-@XMM[9]}, [$out]! | ||
1721 | veor @XMM[11], @XMM[7], @XMM[13] | ||
1722 | veor @XMM[12], @XMM[2], @XMM[14] | ||
1723 | vst1.8 {@XMM[10]-@XMM[11]}, [$out]! | ||
1724 | veor @XMM[13], @XMM[5], @XMM[15] | ||
1725 | vst1.8 {@XMM[12]-@XMM[13]}, [$out]! | ||
1726 | |||
1727 | vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak | ||
1728 | |||
1729 | subs $len, #0x80 | ||
1730 | bpl .Lxts_enc_loop | ||
1731 | |||
1732 | .Lxts_enc_short: | ||
1733 | adds $len, #0x70 | ||
1734 | bmi .Lxts_enc_done | ||
1735 | |||
1736 | vldmia $magic, {$twmask} @ load XTS magic | ||
1737 | vshr.s64 @T[0], @XMM[8], #63 | ||
1738 | mov r0, sp | ||
1739 | vand @T[0], @T[0], $twmask | ||
1740 | ___ | ||
1741 | for($i=9;$i<16;$i++) { | ||
1742 | $code.=<<___; | ||
1743 | vadd.u64 @XMM[$i], @XMM[$i-1], @XMM[$i-1] | ||
1744 | vst1.64 {@XMM[$i-1]}, [r0,:128]! | ||
1745 | vswp `&Dhi("@T[0]")`,`&Dlo("@T[0]")` | ||
1746 | vshr.s64 @T[1], @XMM[$i], #63 | ||
1747 | veor @XMM[$i], @XMM[$i], @T[0] | ||
1748 | vand @T[1], @T[1], $twmask | ||
1749 | ___ | ||
1750 | @T=reverse(@T); | ||
1751 | |||
1752 | $code.=<<___ if ($i>=10); | ||
1753 | vld1.8 {@XMM[$i-10]}, [$inp]! | ||
1754 | subs $len, #0x10 | ||
1755 | bmi .Lxts_enc_`$i-9` | ||
1756 | ___ | ||
1757 | $code.=<<___ if ($i>=11); | ||
1758 | veor @XMM[$i-11], @XMM[$i-11], @XMM[$i-3] | ||
1759 | ___ | ||
1760 | } | ||
1761 | $code.=<<___; | ||
1762 | sub $len, #0x10 | ||
1763 | vst1.64 {@XMM[15]}, [r0,:128] @ next round tweak | ||
1764 | |||
1765 | vld1.8 {@XMM[6]}, [$inp]! | ||
1766 | veor @XMM[5], @XMM[5], @XMM[13] | ||
1767 | #ifndef BSAES_ASM_EXTENDED_KEY | ||
1768 | add r4, sp, #0x90 @ pass key schedule | ||
1769 | #else | ||
1770 | add r4, $key, #248 @ pass key schedule | ||
1771 | #endif | ||
1772 | veor @XMM[6], @XMM[6], @XMM[14] | ||
1773 | mov r5, $rounds @ pass rounds | ||
1774 | mov r0, sp | ||
1775 | |||
1776 | bl _bsaes_encrypt8 | ||
1777 | |||
1778 | vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! | ||
1779 | vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! | ||
1780 | veor @XMM[0], @XMM[0], @XMM[ 8] | ||
1781 | vld1.64 {@XMM[12]-@XMM[13]}, [r0,:128]! | ||
1782 | veor @XMM[1], @XMM[1], @XMM[ 9] | ||
1783 | veor @XMM[8], @XMM[4], @XMM[10] | ||
1784 | vst1.8 {@XMM[0]-@XMM[1]}, [$out]! | ||
1785 | veor @XMM[9], @XMM[6], @XMM[11] | ||
1786 | vld1.64 {@XMM[14]}, [r0,:128]! | ||
1787 | veor @XMM[10], @XMM[3], @XMM[12] | ||
1788 | vst1.8 {@XMM[8]-@XMM[9]}, [$out]! | ||
1789 | veor @XMM[11], @XMM[7], @XMM[13] | ||
1790 | veor @XMM[12], @XMM[2], @XMM[14] | ||
1791 | vst1.8 {@XMM[10]-@XMM[11]}, [$out]! | ||
1792 | vst1.8 {@XMM[12]}, [$out]! | ||
1793 | |||
1794 | vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak | ||
1795 | b .Lxts_enc_done | ||
1796 | .align 4 | ||
1797 | .Lxts_enc_6: | ||
1798 | vst1.64 {@XMM[14]}, [r0,:128] @ next round tweak | ||
1799 | |||
1800 | veor @XMM[4], @XMM[4], @XMM[12] | ||
1801 | #ifndef BSAES_ASM_EXTENDED_KEY | ||
1802 | add r4, sp, #0x90 @ pass key schedule | ||
1803 | #else | ||
1804 | add r4, $key, #248 @ pass key schedule | ||
1805 | #endif | ||
1806 | veor @XMM[5], @XMM[5], @XMM[13] | ||
1807 | mov r5, $rounds @ pass rounds | ||
1808 | mov r0, sp | ||
1809 | |||
1810 | bl _bsaes_encrypt8 | ||
1811 | |||
1812 | vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! | ||
1813 | vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! | ||
1814 | veor @XMM[0], @XMM[0], @XMM[ 8] | ||
1815 | vld1.64 {@XMM[12]-@XMM[13]}, [r0,:128]! | ||
1816 | veor @XMM[1], @XMM[1], @XMM[ 9] | ||
1817 | veor @XMM[8], @XMM[4], @XMM[10] | ||
1818 | vst1.8 {@XMM[0]-@XMM[1]}, [$out]! | ||
1819 | veor @XMM[9], @XMM[6], @XMM[11] | ||
1820 | veor @XMM[10], @XMM[3], @XMM[12] | ||
1821 | vst1.8 {@XMM[8]-@XMM[9]}, [$out]! | ||
1822 | veor @XMM[11], @XMM[7], @XMM[13] | ||
1823 | vst1.8 {@XMM[10]-@XMM[11]}, [$out]! | ||
1824 | |||
1825 | vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak | ||
1826 | b .Lxts_enc_done | ||
1827 | |||
1828 | @ put this in range for both ARM and Thumb mode adr instructions | ||
1829 | .align 5 | ||
1830 | .Lxts_magic: | ||
1831 | .quad 1, 0x87 | ||
1832 | |||
1833 | .align 5 | ||
1834 | .Lxts_enc_5: | ||
1835 | vst1.64 {@XMM[13]}, [r0,:128] @ next round tweak | ||
1836 | |||
1837 | veor @XMM[3], @XMM[3], @XMM[11] | ||
1838 | #ifndef BSAES_ASM_EXTENDED_KEY | ||
1839 | add r4, sp, #0x90 @ pass key schedule | ||
1840 | #else | ||
1841 | add r4, $key, #248 @ pass key schedule | ||
1842 | #endif | ||
1843 | veor @XMM[4], @XMM[4], @XMM[12] | ||
1844 | mov r5, $rounds @ pass rounds | ||
1845 | mov r0, sp | ||
1846 | |||
1847 | bl _bsaes_encrypt8 | ||
1848 | |||
1849 | vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! | ||
1850 | vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! | ||
1851 | veor @XMM[0], @XMM[0], @XMM[ 8] | ||
1852 | vld1.64 {@XMM[12]}, [r0,:128]! | ||
1853 | veor @XMM[1], @XMM[1], @XMM[ 9] | ||
1854 | veor @XMM[8], @XMM[4], @XMM[10] | ||
1855 | vst1.8 {@XMM[0]-@XMM[1]}, [$out]! | ||
1856 | veor @XMM[9], @XMM[6], @XMM[11] | ||
1857 | veor @XMM[10], @XMM[3], @XMM[12] | ||
1858 | vst1.8 {@XMM[8]-@XMM[9]}, [$out]! | ||
1859 | vst1.8 {@XMM[10]}, [$out]! | ||
1860 | |||
1861 | vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak | ||
1862 | b .Lxts_enc_done | ||
1863 | .align 4 | ||
1864 | .Lxts_enc_4: | ||
1865 | vst1.64 {@XMM[12]}, [r0,:128] @ next round tweak | ||
1866 | |||
1867 | veor @XMM[2], @XMM[2], @XMM[10] | ||
1868 | #ifndef BSAES_ASM_EXTENDED_KEY | ||
1869 | add r4, sp, #0x90 @ pass key schedule | ||
1870 | #else | ||
1871 | add r4, $key, #248 @ pass key schedule | ||
1872 | #endif | ||
1873 | veor @XMM[3], @XMM[3], @XMM[11] | ||
1874 | mov r5, $rounds @ pass rounds | ||
1875 | mov r0, sp | ||
1876 | |||
1877 | bl _bsaes_encrypt8 | ||
1878 | |||
1879 | vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! | ||
1880 | vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! | ||
1881 | veor @XMM[0], @XMM[0], @XMM[ 8] | ||
1882 | veor @XMM[1], @XMM[1], @XMM[ 9] | ||
1883 | veor @XMM[8], @XMM[4], @XMM[10] | ||
1884 | vst1.8 {@XMM[0]-@XMM[1]}, [$out]! | ||
1885 | veor @XMM[9], @XMM[6], @XMM[11] | ||
1886 | vst1.8 {@XMM[8]-@XMM[9]}, [$out]! | ||
1887 | |||
1888 | vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak | ||
1889 | b .Lxts_enc_done | ||
1890 | .align 4 | ||
1891 | .Lxts_enc_3: | ||
1892 | vst1.64 {@XMM[11]}, [r0,:128] @ next round tweak | ||
1893 | |||
1894 | veor @XMM[1], @XMM[1], @XMM[9] | ||
1895 | #ifndef BSAES_ASM_EXTENDED_KEY | ||
1896 | add r4, sp, #0x90 @ pass key schedule | ||
1897 | #else | ||
1898 | add r4, $key, #248 @ pass key schedule | ||
1899 | #endif | ||
1900 | veor @XMM[2], @XMM[2], @XMM[10] | ||
1901 | mov r5, $rounds @ pass rounds | ||
1902 | mov r0, sp | ||
1903 | |||
1904 | bl _bsaes_encrypt8 | ||
1905 | |||
1906 | vld1.64 {@XMM[8]-@XMM[9]}, [r0,:128]! | ||
1907 | vld1.64 {@XMM[10]}, [r0,:128]! | ||
1908 | veor @XMM[0], @XMM[0], @XMM[ 8] | ||
1909 | veor @XMM[1], @XMM[1], @XMM[ 9] | ||
1910 | veor @XMM[8], @XMM[4], @XMM[10] | ||
1911 | vst1.8 {@XMM[0]-@XMM[1]}, [$out]! | ||
1912 | vst1.8 {@XMM[8]}, [$out]! | ||
1913 | |||
1914 | vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak | ||
1915 | b .Lxts_enc_done | ||
1916 | .align 4 | ||
1917 | .Lxts_enc_2: | ||
1918 | vst1.64 {@XMM[10]}, [r0,:128] @ next round tweak | ||
1919 | |||
1920 | veor @XMM[0], @XMM[0], @XMM[8] | ||
1921 | #ifndef BSAES_ASM_EXTENDED_KEY | ||
1922 | add r4, sp, #0x90 @ pass key schedule | ||
1923 | #else | ||
1924 | add r4, $key, #248 @ pass key schedule | ||
1925 | #endif | ||
1926 | veor @XMM[1], @XMM[1], @XMM[9] | ||
1927 | mov r5, $rounds @ pass rounds | ||
1928 | mov r0, sp | ||
1929 | |||
1930 | bl _bsaes_encrypt8 | ||
1931 | |||
1932 | vld1.64 {@XMM[8]-@XMM[9]}, [r0,:128]! | ||
1933 | veor @XMM[0], @XMM[0], @XMM[ 8] | ||
1934 | veor @XMM[1], @XMM[1], @XMM[ 9] | ||
1935 | vst1.8 {@XMM[0]-@XMM[1]}, [$out]! | ||
1936 | |||
1937 | vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak | ||
1938 | b .Lxts_enc_done | ||
1939 | .align 4 | ||
1940 | .Lxts_enc_1: | ||
1941 | mov r0, sp | ||
1942 | veor @XMM[0], @XMM[8] | ||
1943 | mov r1, sp | ||
1944 | vst1.8 {@XMM[0]}, [sp,:128] | ||
1945 | mov r2, $key | ||
1946 | mov r4, $fp @ preserve fp | ||
1947 | |||
1948 | bl AES_encrypt | ||
1949 | |||
1950 | vld1.8 {@XMM[0]}, [sp,:128] | ||
1951 | veor @XMM[0], @XMM[0], @XMM[8] | ||
1952 | vst1.8 {@XMM[0]}, [$out]! | ||
1953 | mov $fp, r4 | ||
1954 | |||
1955 | vmov @XMM[8], @XMM[9] @ next round tweak | ||
1956 | |||
1957 | .Lxts_enc_done: | ||
1958 | #ifndef XTS_CHAIN_TWEAK | ||
1959 | adds $len, #0x10 | ||
1960 | beq .Lxts_enc_ret | ||
1961 | sub r6, $out, #0x10 | ||
1962 | |||
1963 | .Lxts_enc_steal: | ||
1964 | ldrb r0, [$inp], #1 | ||
1965 | ldrb r1, [$out, #-0x10] | ||
1966 | strb r0, [$out, #-0x10] | ||
1967 | strb r1, [$out], #1 | ||
1968 | |||
1969 | subs $len, #1 | ||
1970 | bhi .Lxts_enc_steal | ||
1971 | |||
1972 | vld1.8 {@XMM[0]}, [r6] | ||
1973 | mov r0, sp | ||
1974 | veor @XMM[0], @XMM[0], @XMM[8] | ||
1975 | mov r1, sp | ||
1976 | vst1.8 {@XMM[0]}, [sp,:128] | ||
1977 | mov r2, $key | ||
1978 | mov r4, $fp @ preserve fp | ||
1979 | |||
1980 | bl AES_encrypt | ||
1981 | |||
1982 | vld1.8 {@XMM[0]}, [sp,:128] | ||
1983 | veor @XMM[0], @XMM[0], @XMM[8] | ||
1984 | vst1.8 {@XMM[0]}, [r6] | ||
1985 | mov $fp, r4 | ||
1986 | #endif | ||
1987 | |||
1988 | .Lxts_enc_ret: | ||
1989 | bic r0, $fp, #0xf | ||
1990 | vmov.i32 q0, #0 | ||
1991 | vmov.i32 q1, #0 | ||
1992 | #ifdef XTS_CHAIN_TWEAK | ||
1993 | ldr r1, [$fp, #0x20+VFP_ABI_FRAME] @ chain tweak | ||
1994 | #endif | ||
1995 | .Lxts_enc_bzero: @ wipe key schedule [if any] | ||
1996 | vstmia sp!, {q0-q1} | ||
1997 | cmp sp, r0 | ||
1998 | bne .Lxts_enc_bzero | ||
1999 | |||
2000 | mov sp, $fp | ||
2001 | #ifdef XTS_CHAIN_TWEAK | ||
2002 | vst1.8 {@XMM[8]}, [r1] | ||
2003 | #endif | ||
2004 | VFP_ABI_POP | ||
2005 | ldmia sp!, {r4-r10, pc} @ return | ||
2006 | |||
2007 | .size bsaes_xts_encrypt,.-bsaes_xts_encrypt | ||
2008 | |||
2009 | .globl bsaes_xts_decrypt | ||
2010 | .type bsaes_xts_decrypt,%function | ||
2011 | .align 4 | ||
2012 | bsaes_xts_decrypt: | ||
2013 | mov ip, sp | ||
2014 | stmdb sp!, {r4-r10, lr} @ 0x20 | ||
2015 | VFP_ABI_PUSH | ||
2016 | mov r6, sp @ future $fp | ||
2017 | |||
2018 | mov $inp, r0 | ||
2019 | mov $out, r1 | ||
2020 | mov $len, r2 | ||
2021 | mov $key, r3 | ||
2022 | |||
2023 | sub r0, sp, #0x10 @ 0x10 | ||
2024 | bic r0, #0xf @ align at 16 bytes | ||
2025 | mov sp, r0 | ||
2026 | |||
2027 | #ifdef XTS_CHAIN_TWEAK | ||
2028 | ldr r0, [ip] @ pointer to input tweak | ||
2029 | #else | ||
2030 | @ generate initial tweak | ||
2031 | ldr r0, [ip, #4] @ iv[] | ||
2032 | mov r1, sp | ||
2033 | ldr r2, [ip, #0] @ key2 | ||
2034 | bl AES_encrypt | ||
2035 | mov r0, sp @ pointer to initial tweak | ||
2036 | #endif | ||
2037 | |||
2038 | ldr $rounds, [$key, #240] @ get # of rounds | ||
2039 | mov $fp, r6 | ||
2040 | #ifndef BSAES_ASM_EXTENDED_KEY | ||
2041 | @ allocate the key schedule on the stack | ||
2042 | sub r12, sp, $rounds, lsl#7 @ 128 bytes per inner round key | ||
2043 | @ add r12, #`128-32` @ size of bit-sliced key schedule | ||
2044 | sub r12, #`32+16` @ place for tweak[9] | ||
2045 | |||
2046 | @ populate the key schedule | ||
2047 | mov r4, $key @ pass key | ||
2048 | mov r5, $rounds @ pass # of rounds | ||
2049 | mov sp, r12 | ||
2050 | add r12, #0x90 @ pass key schedule | ||
2051 | bl _bsaes_key_convert | ||
2052 | add r4, sp, #0x90 | ||
2053 | vldmia r4, {@XMM[6]} | ||
2054 | vstmia r12, {@XMM[15]} @ save last round key | ||
2055 | veor @XMM[7], @XMM[7], @XMM[6] @ fix up round 0 key | ||
2056 | vstmia r4, {@XMM[7]} | ||
2057 | #else | ||
2058 | ldr r12, [$key, #244] | ||
2059 | eors r12, #1 | ||
2060 | beq 0f | ||
2061 | |||
2062 | str r12, [$key, #244] | ||
2063 | mov r4, $key @ pass key | ||
2064 | mov r5, $rounds @ pass # of rounds | ||
2065 | add r12, $key, #248 @ pass key schedule | ||
2066 | bl _bsaes_key_convert | ||
2067 | add r4, $key, #248 | ||
2068 | vldmia r4, {@XMM[6]} | ||
2069 | vstmia r12, {@XMM[15]} @ save last round key | ||
2070 | veor @XMM[7], @XMM[7], @XMM[6] @ fix up round 0 key | ||
2071 | vstmia r4, {@XMM[7]} | ||
2072 | |||
2073 | .align 2 | ||
2074 | 0: sub sp, #0x90 @ place for tweak[9] | ||
2075 | #endif | ||
2076 | vld1.8 {@XMM[8]}, [r0] @ initial tweak | ||
2077 | adr $magic, .Lxts_magic | ||
2078 | |||
2079 | tst $len, #0xf @ if not multiple of 16 | ||
2080 | it ne @ Thumb2 thing, sanity check in ARM | ||
2081 | subne $len, #0x10 @ subtract another 16 bytes | ||
2082 | subs $len, #0x80 | ||
2083 | |||
2084 | blo .Lxts_dec_short | ||
2085 | b .Lxts_dec_loop | ||
2086 | |||
2087 | .align 4 | ||
2088 | .Lxts_dec_loop: | ||
2089 | vldmia $magic, {$twmask} @ load XTS magic | ||
2090 | vshr.s64 @T[0], @XMM[8], #63 | ||
2091 | mov r0, sp | ||
2092 | vand @T[0], @T[0], $twmask | ||
2093 | ___ | ||
2094 | for($i=9;$i<16;$i++) { | ||
2095 | $code.=<<___; | ||
2096 | vadd.u64 @XMM[$i], @XMM[$i-1], @XMM[$i-1] | ||
2097 | vst1.64 {@XMM[$i-1]}, [r0,:128]! | ||
2098 | vswp `&Dhi("@T[0]")`,`&Dlo("@T[0]")` | ||
2099 | vshr.s64 @T[1], @XMM[$i], #63 | ||
2100 | veor @XMM[$i], @XMM[$i], @T[0] | ||
2101 | vand @T[1], @T[1], $twmask | ||
2102 | ___ | ||
2103 | @T=reverse(@T); | ||
2104 | |||
2105 | $code.=<<___ if ($i>=10); | ||
2106 | vld1.8 {@XMM[$i-10]}, [$inp]! | ||
2107 | ___ | ||
2108 | $code.=<<___ if ($i>=11); | ||
2109 | veor @XMM[$i-11], @XMM[$i-11], @XMM[$i-3] | ||
2110 | ___ | ||
2111 | } | ||
2112 | $code.=<<___; | ||
2113 | vadd.u64 @XMM[8], @XMM[15], @XMM[15] | ||
2114 | vst1.64 {@XMM[15]}, [r0,:128]! | ||
2115 | vswp `&Dhi("@T[0]")`,`&Dlo("@T[0]")` | ||
2116 | veor @XMM[8], @XMM[8], @T[0] | ||
2117 | vst1.64 {@XMM[8]}, [r0,:128] @ next round tweak | ||
2118 | |||
2119 | vld1.8 {@XMM[6]-@XMM[7]}, [$inp]! | ||
2120 | veor @XMM[5], @XMM[5], @XMM[13] | ||
2121 | #ifndef BSAES_ASM_EXTENDED_KEY | ||
2122 | add r4, sp, #0x90 @ pass key schedule | ||
2123 | #else | ||
2124 | add r4, $key, #248 @ pass key schedule | ||
2125 | #endif | ||
2126 | veor @XMM[6], @XMM[6], @XMM[14] | ||
2127 | mov r5, $rounds @ pass rounds | ||
2128 | veor @XMM[7], @XMM[7], @XMM[15] | ||
2129 | mov r0, sp | ||
2130 | |||
2131 | bl _bsaes_decrypt8 | ||
2132 | |||
2133 | vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! | ||
2134 | vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! | ||
2135 | veor @XMM[0], @XMM[0], @XMM[ 8] | ||
2136 | vld1.64 {@XMM[12]-@XMM[13]}, [r0,:128]! | ||
2137 | veor @XMM[1], @XMM[1], @XMM[ 9] | ||
2138 | veor @XMM[8], @XMM[6], @XMM[10] | ||
2139 | vst1.8 {@XMM[0]-@XMM[1]}, [$out]! | ||
2140 | veor @XMM[9], @XMM[4], @XMM[11] | ||
2141 | vld1.64 {@XMM[14]-@XMM[15]}, [r0,:128]! | ||
2142 | veor @XMM[10], @XMM[2], @XMM[12] | ||
2143 | vst1.8 {@XMM[8]-@XMM[9]}, [$out]! | ||
2144 | veor @XMM[11], @XMM[7], @XMM[13] | ||
2145 | veor @XMM[12], @XMM[3], @XMM[14] | ||
2146 | vst1.8 {@XMM[10]-@XMM[11]}, [$out]! | ||
2147 | veor @XMM[13], @XMM[5], @XMM[15] | ||
2148 | vst1.8 {@XMM[12]-@XMM[13]}, [$out]! | ||
2149 | |||
2150 | vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak | ||
2151 | |||
2152 | subs $len, #0x80 | ||
2153 | bpl .Lxts_dec_loop | ||
2154 | |||
2155 | .Lxts_dec_short: | ||
2156 | adds $len, #0x70 | ||
2157 | bmi .Lxts_dec_done | ||
2158 | |||
2159 | vldmia $magic, {$twmask} @ load XTS magic | ||
2160 | vshr.s64 @T[0], @XMM[8], #63 | ||
2161 | mov r0, sp | ||
2162 | vand @T[0], @T[0], $twmask | ||
2163 | ___ | ||
2164 | for($i=9;$i<16;$i++) { | ||
2165 | $code.=<<___; | ||
2166 | vadd.u64 @XMM[$i], @XMM[$i-1], @XMM[$i-1] | ||
2167 | vst1.64 {@XMM[$i-1]}, [r0,:128]! | ||
2168 | vswp `&Dhi("@T[0]")`,`&Dlo("@T[0]")` | ||
2169 | vshr.s64 @T[1], @XMM[$i], #63 | ||
2170 | veor @XMM[$i], @XMM[$i], @T[0] | ||
2171 | vand @T[1], @T[1], $twmask | ||
2172 | ___ | ||
2173 | @T=reverse(@T); | ||
2174 | |||
2175 | $code.=<<___ if ($i>=10); | ||
2176 | vld1.8 {@XMM[$i-10]}, [$inp]! | ||
2177 | subs $len, #0x10 | ||
2178 | bmi .Lxts_dec_`$i-9` | ||
2179 | ___ | ||
2180 | $code.=<<___ if ($i>=11); | ||
2181 | veor @XMM[$i-11], @XMM[$i-11], @XMM[$i-3] | ||
2182 | ___ | ||
2183 | } | ||
2184 | $code.=<<___; | ||
2185 | sub $len, #0x10 | ||
2186 | vst1.64 {@XMM[15]}, [r0,:128] @ next round tweak | ||
2187 | |||
2188 | vld1.8 {@XMM[6]}, [$inp]! | ||
2189 | veor @XMM[5], @XMM[5], @XMM[13] | ||
2190 | #ifndef BSAES_ASM_EXTENDED_KEY | ||
2191 | add r4, sp, #0x90 @ pass key schedule | ||
2192 | #else | ||
2193 | add r4, $key, #248 @ pass key schedule | ||
2194 | #endif | ||
2195 | veor @XMM[6], @XMM[6], @XMM[14] | ||
2196 | mov r5, $rounds @ pass rounds | ||
2197 | mov r0, sp | ||
2198 | |||
2199 | bl _bsaes_decrypt8 | ||
2200 | |||
2201 | vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! | ||
2202 | vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! | ||
2203 | veor @XMM[0], @XMM[0], @XMM[ 8] | ||
2204 | vld1.64 {@XMM[12]-@XMM[13]}, [r0,:128]! | ||
2205 | veor @XMM[1], @XMM[1], @XMM[ 9] | ||
2206 | veor @XMM[8], @XMM[6], @XMM[10] | ||
2207 | vst1.8 {@XMM[0]-@XMM[1]}, [$out]! | ||
2208 | veor @XMM[9], @XMM[4], @XMM[11] | ||
2209 | vld1.64 {@XMM[14]}, [r0,:128]! | ||
2210 | veor @XMM[10], @XMM[2], @XMM[12] | ||
2211 | vst1.8 {@XMM[8]-@XMM[9]}, [$out]! | ||
2212 | veor @XMM[11], @XMM[7], @XMM[13] | ||
2213 | veor @XMM[12], @XMM[3], @XMM[14] | ||
2214 | vst1.8 {@XMM[10]-@XMM[11]}, [$out]! | ||
2215 | vst1.8 {@XMM[12]}, [$out]! | ||
2216 | |||
2217 | vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak | ||
2218 | b .Lxts_dec_done | ||
2219 | .align 4 | ||
2220 | .Lxts_dec_6: | ||
2221 | vst1.64 {@XMM[14]}, [r0,:128] @ next round tweak | ||
2222 | |||
2223 | veor @XMM[4], @XMM[4], @XMM[12] | ||
2224 | #ifndef BSAES_ASM_EXTENDED_KEY | ||
2225 | add r4, sp, #0x90 @ pass key schedule | ||
2226 | #else | ||
2227 | add r4, $key, #248 @ pass key schedule | ||
2228 | #endif | ||
2229 | veor @XMM[5], @XMM[5], @XMM[13] | ||
2230 | mov r5, $rounds @ pass rounds | ||
2231 | mov r0, sp | ||
2232 | |||
2233 | bl _bsaes_decrypt8 | ||
2234 | |||
2235 | vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! | ||
2236 | vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! | ||
2237 | veor @XMM[0], @XMM[0], @XMM[ 8] | ||
2238 | vld1.64 {@XMM[12]-@XMM[13]}, [r0,:128]! | ||
2239 | veor @XMM[1], @XMM[1], @XMM[ 9] | ||
2240 | veor @XMM[8], @XMM[6], @XMM[10] | ||
2241 | vst1.8 {@XMM[0]-@XMM[1]}, [$out]! | ||
2242 | veor @XMM[9], @XMM[4], @XMM[11] | ||
2243 | veor @XMM[10], @XMM[2], @XMM[12] | ||
2244 | vst1.8 {@XMM[8]-@XMM[9]}, [$out]! | ||
2245 | veor @XMM[11], @XMM[7], @XMM[13] | ||
2246 | vst1.8 {@XMM[10]-@XMM[11]}, [$out]! | ||
2247 | |||
2248 | vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak | ||
2249 | b .Lxts_dec_done | ||
2250 | .align 4 | ||
2251 | .Lxts_dec_5: | ||
2252 | vst1.64 {@XMM[13]}, [r0,:128] @ next round tweak | ||
2253 | |||
2254 | veor @XMM[3], @XMM[3], @XMM[11] | ||
2255 | #ifndef BSAES_ASM_EXTENDED_KEY | ||
2256 | add r4, sp, #0x90 @ pass key schedule | ||
2257 | #else | ||
2258 | add r4, $key, #248 @ pass key schedule | ||
2259 | #endif | ||
2260 | veor @XMM[4], @XMM[4], @XMM[12] | ||
2261 | mov r5, $rounds @ pass rounds | ||
2262 | mov r0, sp | ||
2263 | |||
2264 | bl _bsaes_decrypt8 | ||
2265 | |||
2266 | vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! | ||
2267 | vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! | ||
2268 | veor @XMM[0], @XMM[0], @XMM[ 8] | ||
2269 | vld1.64 {@XMM[12]}, [r0,:128]! | ||
2270 | veor @XMM[1], @XMM[1], @XMM[ 9] | ||
2271 | veor @XMM[8], @XMM[6], @XMM[10] | ||
2272 | vst1.8 {@XMM[0]-@XMM[1]}, [$out]! | ||
2273 | veor @XMM[9], @XMM[4], @XMM[11] | ||
2274 | veor @XMM[10], @XMM[2], @XMM[12] | ||
2275 | vst1.8 {@XMM[8]-@XMM[9]}, [$out]! | ||
2276 | vst1.8 {@XMM[10]}, [$out]! | ||
2277 | |||
2278 | vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak | ||
2279 | b .Lxts_dec_done | ||
2280 | .align 4 | ||
2281 | .Lxts_dec_4: | ||
2282 | vst1.64 {@XMM[12]}, [r0,:128] @ next round tweak | ||
2283 | |||
2284 | veor @XMM[2], @XMM[2], @XMM[10] | ||
2285 | #ifndef BSAES_ASM_EXTENDED_KEY | ||
2286 | add r4, sp, #0x90 @ pass key schedule | ||
2287 | #else | ||
2288 | add r4, $key, #248 @ pass key schedule | ||
2289 | #endif | ||
2290 | veor @XMM[3], @XMM[3], @XMM[11] | ||
2291 | mov r5, $rounds @ pass rounds | ||
2292 | mov r0, sp | ||
2293 | |||
2294 | bl _bsaes_decrypt8 | ||
2295 | |||
2296 | vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! | ||
2297 | vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! | ||
2298 | veor @XMM[0], @XMM[0], @XMM[ 8] | ||
2299 | veor @XMM[1], @XMM[1], @XMM[ 9] | ||
2300 | veor @XMM[8], @XMM[6], @XMM[10] | ||
2301 | vst1.8 {@XMM[0]-@XMM[1]}, [$out]! | ||
2302 | veor @XMM[9], @XMM[4], @XMM[11] | ||
2303 | vst1.8 {@XMM[8]-@XMM[9]}, [$out]! | ||
2304 | |||
2305 | vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak | ||
2306 | b .Lxts_dec_done | ||
2307 | .align 4 | ||
2308 | .Lxts_dec_3: | ||
2309 | vst1.64 {@XMM[11]}, [r0,:128] @ next round tweak | ||
2310 | |||
2311 | veor @XMM[1], @XMM[1], @XMM[9] | ||
2312 | #ifndef BSAES_ASM_EXTENDED_KEY | ||
2313 | add r4, sp, #0x90 @ pass key schedule | ||
2314 | #else | ||
2315 | add r4, $key, #248 @ pass key schedule | ||
2316 | #endif | ||
2317 | veor @XMM[2], @XMM[2], @XMM[10] | ||
2318 | mov r5, $rounds @ pass rounds | ||
2319 | mov r0, sp | ||
2320 | |||
2321 | bl _bsaes_decrypt8 | ||
2322 | |||
2323 | vld1.64 {@XMM[8]-@XMM[9]}, [r0,:128]! | ||
2324 | vld1.64 {@XMM[10]}, [r0,:128]! | ||
2325 | veor @XMM[0], @XMM[0], @XMM[ 8] | ||
2326 | veor @XMM[1], @XMM[1], @XMM[ 9] | ||
2327 | veor @XMM[8], @XMM[6], @XMM[10] | ||
2328 | vst1.8 {@XMM[0]-@XMM[1]}, [$out]! | ||
2329 | vst1.8 {@XMM[8]}, [$out]! | ||
2330 | |||
2331 | vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak | ||
2332 | b .Lxts_dec_done | ||
2333 | .align 4 | ||
2334 | .Lxts_dec_2: | ||
2335 | vst1.64 {@XMM[10]}, [r0,:128] @ next round tweak | ||
2336 | |||
2337 | veor @XMM[0], @XMM[0], @XMM[8] | ||
2338 | #ifndef BSAES_ASM_EXTENDED_KEY | ||
2339 | add r4, sp, #0x90 @ pass key schedule | ||
2340 | #else | ||
2341 | add r4, $key, #248 @ pass key schedule | ||
2342 | #endif | ||
2343 | veor @XMM[1], @XMM[1], @XMM[9] | ||
2344 | mov r5, $rounds @ pass rounds | ||
2345 | mov r0, sp | ||
2346 | |||
2347 | bl _bsaes_decrypt8 | ||
2348 | |||
2349 | vld1.64 {@XMM[8]-@XMM[9]}, [r0,:128]! | ||
2350 | veor @XMM[0], @XMM[0], @XMM[ 8] | ||
2351 | veor @XMM[1], @XMM[1], @XMM[ 9] | ||
2352 | vst1.8 {@XMM[0]-@XMM[1]}, [$out]! | ||
2353 | |||
2354 | vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak | ||
2355 | b .Lxts_dec_done | ||
2356 | .align 4 | ||
2357 | .Lxts_dec_1: | ||
2358 | mov r0, sp | ||
2359 | veor @XMM[0], @XMM[8] | ||
2360 | mov r1, sp | ||
2361 | vst1.8 {@XMM[0]}, [sp,:128] | ||
2362 | mov r2, $key | ||
2363 | mov r4, $fp @ preserve fp | ||
2364 | mov r5, $magic @ preserve magic | ||
2365 | |||
2366 | bl AES_decrypt | ||
2367 | |||
2368 | vld1.8 {@XMM[0]}, [sp,:128] | ||
2369 | veor @XMM[0], @XMM[0], @XMM[8] | ||
2370 | vst1.8 {@XMM[0]}, [$out]! | ||
2371 | mov $fp, r4 | ||
2372 | mov $magic, r5 | ||
2373 | |||
2374 | vmov @XMM[8], @XMM[9] @ next round tweak | ||
2375 | |||
2376 | .Lxts_dec_done: | ||
2377 | #ifndef XTS_CHAIN_TWEAK | ||
2378 | adds $len, #0x10 | ||
2379 | beq .Lxts_dec_ret | ||
2380 | |||
2381 | @ calculate one round of extra tweak for the stolen ciphertext | ||
2382 | vldmia $magic, {$twmask} | ||
2383 | vshr.s64 @XMM[6], @XMM[8], #63 | ||
2384 | vand @XMM[6], @XMM[6], $twmask | ||
2385 | vadd.u64 @XMM[9], @XMM[8], @XMM[8] | ||
2386 | vswp `&Dhi("@XMM[6]")`,`&Dlo("@XMM[6]")` | ||
2387 | veor @XMM[9], @XMM[9], @XMM[6] | ||
2388 | |||
2389 | @ perform the final decryption with the last tweak value | ||
2390 | vld1.8 {@XMM[0]}, [$inp]! | ||
2391 | mov r0, sp | ||
2392 | veor @XMM[0], @XMM[0], @XMM[9] | ||
2393 | mov r1, sp | ||
2394 | vst1.8 {@XMM[0]}, [sp,:128] | ||
2395 | mov r2, $key | ||
2396 | mov r4, $fp @ preserve fp | ||
2397 | |||
2398 | bl AES_decrypt | ||
2399 | |||
2400 | vld1.8 {@XMM[0]}, [sp,:128] | ||
2401 | veor @XMM[0], @XMM[0], @XMM[9] | ||
2402 | vst1.8 {@XMM[0]}, [$out] | ||
2403 | |||
2404 | mov r6, $out | ||
2405 | .Lxts_dec_steal: | ||
2406 | ldrb r1, [$out] | ||
2407 | ldrb r0, [$inp], #1 | ||
2408 | strb r1, [$out, #0x10] | ||
2409 | strb r0, [$out], #1 | ||
2410 | |||
2411 | subs $len, #1 | ||
2412 | bhi .Lxts_dec_steal | ||
2413 | |||
2414 | vld1.8 {@XMM[0]}, [r6] | ||
2415 | mov r0, sp | ||
2416 | veor @XMM[0], @XMM[8] | ||
2417 | mov r1, sp | ||
2418 | vst1.8 {@XMM[0]}, [sp,:128] | ||
2419 | mov r2, $key | ||
2420 | |||
2421 | bl AES_decrypt | ||
2422 | |||
2423 | vld1.8 {@XMM[0]}, [sp,:128] | ||
2424 | veor @XMM[0], @XMM[0], @XMM[8] | ||
2425 | vst1.8 {@XMM[0]}, [r6] | ||
2426 | mov $fp, r4 | ||
2427 | #endif | ||
2428 | |||
2429 | .Lxts_dec_ret: | ||
2430 | bic r0, $fp, #0xf | ||
2431 | vmov.i32 q0, #0 | ||
2432 | vmov.i32 q1, #0 | ||
2433 | #ifdef XTS_CHAIN_TWEAK | ||
2434 | ldr r1, [$fp, #0x20+VFP_ABI_FRAME] @ chain tweak | ||
2435 | #endif | ||
2436 | .Lxts_dec_bzero: @ wipe key schedule [if any] | ||
2437 | vstmia sp!, {q0-q1} | ||
2438 | cmp sp, r0 | ||
2439 | bne .Lxts_dec_bzero | ||
2440 | |||
2441 | mov sp, $fp | ||
2442 | #ifdef XTS_CHAIN_TWEAK | ||
2443 | vst1.8 {@XMM[8]}, [r1] | ||
2444 | #endif | ||
2445 | VFP_ABI_POP | ||
2446 | ldmia sp!, {r4-r10, pc} @ return | ||
2447 | |||
2448 | .size bsaes_xts_decrypt,.-bsaes_xts_decrypt | ||
2449 | ___ | ||
2450 | } | ||
2451 | $code.=<<___; | ||
2452 | #endif | ||
2453 | ___ | ||
2454 | |||
2455 | $code =~ s/\`([^\`]*)\`/eval($1)/gem; | ||
2456 | |||
2457 | open SELF,$0; | ||
2458 | while(<SELF>) { | ||
2459 | next if (/^#!/); | ||
2460 | last if (!s/^#/@/ and !/^$/); | ||
2461 | print; | ||
2462 | } | ||
2463 | close SELF; | ||
2464 | |||
2465 | print $code; | ||
2466 | |||
2467 | close STDOUT; | ||
diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild index 1a7024b41351..c38b58c80202 100644 --- a/arch/arm/include/asm/Kbuild +++ b/arch/arm/include/asm/Kbuild | |||
@@ -24,6 +24,7 @@ generic-y += sembuf.h | |||
24 | generic-y += serial.h | 24 | generic-y += serial.h |
25 | generic-y += shmbuf.h | 25 | generic-y += shmbuf.h |
26 | generic-y += siginfo.h | 26 | generic-y += siginfo.h |
27 | generic-y += simd.h | ||
27 | generic-y += sizes.h | 28 | generic-y += sizes.h |
28 | generic-y += socket.h | 29 | generic-y += socket.h |
29 | generic-y += sockios.h | 30 | generic-y += sockios.h |
diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index fcc1b5bf6979..5c2285160575 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h | |||
@@ -53,6 +53,13 @@ | |||
53 | #define put_byte_3 lsl #0 | 53 | #define put_byte_3 lsl #0 |
54 | #endif | 54 | #endif |
55 | 55 | ||
56 | /* Select code for any configuration running in BE8 mode */ | ||
57 | #ifdef CONFIG_CPU_ENDIAN_BE8 | ||
58 | #define ARM_BE8(code...) code | ||
59 | #else | ||
60 | #define ARM_BE8(code...) | ||
61 | #endif | ||
62 | |||
56 | /* | 63 | /* |
57 | * Data preload for architectures that support it | 64 | * Data preload for architectures that support it |
58 | */ | 65 | */ |
diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h index da1c77d39327..62d2cb53b069 100644 --- a/arch/arm/include/asm/atomic.h +++ b/arch/arm/include/asm/atomic.h | |||
@@ -12,6 +12,7 @@ | |||
12 | #define __ASM_ARM_ATOMIC_H | 12 | #define __ASM_ARM_ATOMIC_H |
13 | 13 | ||
14 | #include <linux/compiler.h> | 14 | #include <linux/compiler.h> |
15 | #include <linux/prefetch.h> | ||
15 | #include <linux/types.h> | 16 | #include <linux/types.h> |
16 | #include <linux/irqflags.h> | 17 | #include <linux/irqflags.h> |
17 | #include <asm/barrier.h> | 18 | #include <asm/barrier.h> |
@@ -41,6 +42,7 @@ static inline void atomic_add(int i, atomic_t *v) | |||
41 | unsigned long tmp; | 42 | unsigned long tmp; |
42 | int result; | 43 | int result; |
43 | 44 | ||
45 | prefetchw(&v->counter); | ||
44 | __asm__ __volatile__("@ atomic_add\n" | 46 | __asm__ __volatile__("@ atomic_add\n" |
45 | "1: ldrex %0, [%3]\n" | 47 | "1: ldrex %0, [%3]\n" |
46 | " add %0, %0, %4\n" | 48 | " add %0, %0, %4\n" |
@@ -79,6 +81,7 @@ static inline void atomic_sub(int i, atomic_t *v) | |||
79 | unsigned long tmp; | 81 | unsigned long tmp; |
80 | int result; | 82 | int result; |
81 | 83 | ||
84 | prefetchw(&v->counter); | ||
82 | __asm__ __volatile__("@ atomic_sub\n" | 85 | __asm__ __volatile__("@ atomic_sub\n" |
83 | "1: ldrex %0, [%3]\n" | 86 | "1: ldrex %0, [%3]\n" |
84 | " sub %0, %0, %4\n" | 87 | " sub %0, %0, %4\n" |
@@ -114,7 +117,8 @@ static inline int atomic_sub_return(int i, atomic_t *v) | |||
114 | 117 | ||
115 | static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new) | 118 | static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new) |
116 | { | 119 | { |
117 | unsigned long oldval, res; | 120 | int oldval; |
121 | unsigned long res; | ||
118 | 122 | ||
119 | smp_mb(); | 123 | smp_mb(); |
120 | 124 | ||
@@ -134,21 +138,6 @@ static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new) | |||
134 | return oldval; | 138 | return oldval; |
135 | } | 139 | } |
136 | 140 | ||
137 | static inline void atomic_clear_mask(unsigned long mask, unsigned long *addr) | ||
138 | { | ||
139 | unsigned long tmp, tmp2; | ||
140 | |||
141 | __asm__ __volatile__("@ atomic_clear_mask\n" | ||
142 | "1: ldrex %0, [%3]\n" | ||
143 | " bic %0, %0, %4\n" | ||
144 | " strex %1, %0, [%3]\n" | ||
145 | " teq %1, #0\n" | ||
146 | " bne 1b" | ||
147 | : "=&r" (tmp), "=&r" (tmp2), "+Qo" (*addr) | ||
148 | : "r" (addr), "Ir" (mask) | ||
149 | : "cc"); | ||
150 | } | ||
151 | |||
152 | #else /* ARM_ARCH_6 */ | 141 | #else /* ARM_ARCH_6 */ |
153 | 142 | ||
154 | #ifdef CONFIG_SMP | 143 | #ifdef CONFIG_SMP |
@@ -197,15 +186,6 @@ static inline int atomic_cmpxchg(atomic_t *v, int old, int new) | |||
197 | return ret; | 186 | return ret; |
198 | } | 187 | } |
199 | 188 | ||
200 | static inline void atomic_clear_mask(unsigned long mask, unsigned long *addr) | ||
201 | { | ||
202 | unsigned long flags; | ||
203 | |||
204 | raw_local_irq_save(flags); | ||
205 | *addr &= ~mask; | ||
206 | raw_local_irq_restore(flags); | ||
207 | } | ||
208 | |||
209 | #endif /* __LINUX_ARM_ARCH__ */ | 189 | #endif /* __LINUX_ARM_ARCH__ */ |
210 | 190 | ||
211 | #define atomic_xchg(v, new) (xchg(&((v)->counter), new)) | 191 | #define atomic_xchg(v, new) (xchg(&((v)->counter), new)) |
@@ -238,15 +218,15 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u) | |||
238 | 218 | ||
239 | #ifndef CONFIG_GENERIC_ATOMIC64 | 219 | #ifndef CONFIG_GENERIC_ATOMIC64 |
240 | typedef struct { | 220 | typedef struct { |
241 | u64 __aligned(8) counter; | 221 | long long counter; |
242 | } atomic64_t; | 222 | } atomic64_t; |
243 | 223 | ||
244 | #define ATOMIC64_INIT(i) { (i) } | 224 | #define ATOMIC64_INIT(i) { (i) } |
245 | 225 | ||
246 | #ifdef CONFIG_ARM_LPAE | 226 | #ifdef CONFIG_ARM_LPAE |
247 | static inline u64 atomic64_read(const atomic64_t *v) | 227 | static inline long long atomic64_read(const atomic64_t *v) |
248 | { | 228 | { |
249 | u64 result; | 229 | long long result; |
250 | 230 | ||
251 | __asm__ __volatile__("@ atomic64_read\n" | 231 | __asm__ __volatile__("@ atomic64_read\n" |
252 | " ldrd %0, %H0, [%1]" | 232 | " ldrd %0, %H0, [%1]" |
@@ -257,7 +237,7 @@ static inline u64 atomic64_read(const atomic64_t *v) | |||
257 | return result; | 237 | return result; |
258 | } | 238 | } |
259 | 239 | ||
260 | static inline void atomic64_set(atomic64_t *v, u64 i) | 240 | static inline void atomic64_set(atomic64_t *v, long long i) |
261 | { | 241 | { |
262 | __asm__ __volatile__("@ atomic64_set\n" | 242 | __asm__ __volatile__("@ atomic64_set\n" |
263 | " strd %2, %H2, [%1]" | 243 | " strd %2, %H2, [%1]" |
@@ -266,9 +246,9 @@ static inline void atomic64_set(atomic64_t *v, u64 i) | |||
266 | ); | 246 | ); |
267 | } | 247 | } |
268 | #else | 248 | #else |
269 | static inline u64 atomic64_read(const atomic64_t *v) | 249 | static inline long long atomic64_read(const atomic64_t *v) |
270 | { | 250 | { |
271 | u64 result; | 251 | long long result; |
272 | 252 | ||
273 | __asm__ __volatile__("@ atomic64_read\n" | 253 | __asm__ __volatile__("@ atomic64_read\n" |
274 | " ldrexd %0, %H0, [%1]" | 254 | " ldrexd %0, %H0, [%1]" |
@@ -279,10 +259,11 @@ static inline u64 atomic64_read(const atomic64_t *v) | |||
279 | return result; | 259 | return result; |
280 | } | 260 | } |
281 | 261 | ||
282 | static inline void atomic64_set(atomic64_t *v, u64 i) | 262 | static inline void atomic64_set(atomic64_t *v, long long i) |
283 | { | 263 | { |
284 | u64 tmp; | 264 | long long tmp; |
285 | 265 | ||
266 | prefetchw(&v->counter); | ||
286 | __asm__ __volatile__("@ atomic64_set\n" | 267 | __asm__ __volatile__("@ atomic64_set\n" |
287 | "1: ldrexd %0, %H0, [%2]\n" | 268 | "1: ldrexd %0, %H0, [%2]\n" |
288 | " strexd %0, %3, %H3, [%2]\n" | 269 | " strexd %0, %3, %H3, [%2]\n" |
@@ -294,15 +275,16 @@ static inline void atomic64_set(atomic64_t *v, u64 i) | |||
294 | } | 275 | } |
295 | #endif | 276 | #endif |
296 | 277 | ||
297 | static inline void atomic64_add(u64 i, atomic64_t *v) | 278 | static inline void atomic64_add(long long i, atomic64_t *v) |
298 | { | 279 | { |
299 | u64 result; | 280 | long long result; |
300 | unsigned long tmp; | 281 | unsigned long tmp; |
301 | 282 | ||
283 | prefetchw(&v->counter); | ||
302 | __asm__ __volatile__("@ atomic64_add\n" | 284 | __asm__ __volatile__("@ atomic64_add\n" |
303 | "1: ldrexd %0, %H0, [%3]\n" | 285 | "1: ldrexd %0, %H0, [%3]\n" |
304 | " adds %0, %0, %4\n" | 286 | " adds %Q0, %Q0, %Q4\n" |
305 | " adc %H0, %H0, %H4\n" | 287 | " adc %R0, %R0, %R4\n" |
306 | " strexd %1, %0, %H0, [%3]\n" | 288 | " strexd %1, %0, %H0, [%3]\n" |
307 | " teq %1, #0\n" | 289 | " teq %1, #0\n" |
308 | " bne 1b" | 290 | " bne 1b" |
@@ -311,17 +293,17 @@ static inline void atomic64_add(u64 i, atomic64_t *v) | |||
311 | : "cc"); | 293 | : "cc"); |
312 | } | 294 | } |
313 | 295 | ||
314 | static inline u64 atomic64_add_return(u64 i, atomic64_t *v) | 296 | static inline long long atomic64_add_return(long long i, atomic64_t *v) |
315 | { | 297 | { |
316 | u64 result; | 298 | long long result; |
317 | unsigned long tmp; | 299 | unsigned long tmp; |
318 | 300 | ||
319 | smp_mb(); | 301 | smp_mb(); |
320 | 302 | ||
321 | __asm__ __volatile__("@ atomic64_add_return\n" | 303 | __asm__ __volatile__("@ atomic64_add_return\n" |
322 | "1: ldrexd %0, %H0, [%3]\n" | 304 | "1: ldrexd %0, %H0, [%3]\n" |
323 | " adds %0, %0, %4\n" | 305 | " adds %Q0, %Q0, %Q4\n" |
324 | " adc %H0, %H0, %H4\n" | 306 | " adc %R0, %R0, %R4\n" |
325 | " strexd %1, %0, %H0, [%3]\n" | 307 | " strexd %1, %0, %H0, [%3]\n" |
326 | " teq %1, #0\n" | 308 | " teq %1, #0\n" |
327 | " bne 1b" | 309 | " bne 1b" |
@@ -334,15 +316,16 @@ static inline u64 atomic64_add_return(u64 i, atomic64_t *v) | |||
334 | return result; | 316 | return result; |
335 | } | 317 | } |
336 | 318 | ||
337 | static inline void atomic64_sub(u64 i, atomic64_t *v) | 319 | static inline void atomic64_sub(long long i, atomic64_t *v) |
338 | { | 320 | { |
339 | u64 result; | 321 | long long result; |
340 | unsigned long tmp; | 322 | unsigned long tmp; |
341 | 323 | ||
324 | prefetchw(&v->counter); | ||
342 | __asm__ __volatile__("@ atomic64_sub\n" | 325 | __asm__ __volatile__("@ atomic64_sub\n" |
343 | "1: ldrexd %0, %H0, [%3]\n" | 326 | "1: ldrexd %0, %H0, [%3]\n" |
344 | " subs %0, %0, %4\n" | 327 | " subs %Q0, %Q0, %Q4\n" |
345 | " sbc %H0, %H0, %H4\n" | 328 | " sbc %R0, %R0, %R4\n" |
346 | " strexd %1, %0, %H0, [%3]\n" | 329 | " strexd %1, %0, %H0, [%3]\n" |
347 | " teq %1, #0\n" | 330 | " teq %1, #0\n" |
348 | " bne 1b" | 331 | " bne 1b" |
@@ -351,17 +334,17 @@ static inline void atomic64_sub(u64 i, atomic64_t *v) | |||
351 | : "cc"); | 334 | : "cc"); |
352 | } | 335 | } |
353 | 336 | ||
354 | static inline u64 atomic64_sub_return(u64 i, atomic64_t *v) | 337 | static inline long long atomic64_sub_return(long long i, atomic64_t *v) |
355 | { | 338 | { |
356 | u64 result; | 339 | long long result; |
357 | unsigned long tmp; | 340 | unsigned long tmp; |
358 | 341 | ||
359 | smp_mb(); | 342 | smp_mb(); |
360 | 343 | ||
361 | __asm__ __volatile__("@ atomic64_sub_return\n" | 344 | __asm__ __volatile__("@ atomic64_sub_return\n" |
362 | "1: ldrexd %0, %H0, [%3]\n" | 345 | "1: ldrexd %0, %H0, [%3]\n" |
363 | " subs %0, %0, %4\n" | 346 | " subs %Q0, %Q0, %Q4\n" |
364 | " sbc %H0, %H0, %H4\n" | 347 | " sbc %R0, %R0, %R4\n" |
365 | " strexd %1, %0, %H0, [%3]\n" | 348 | " strexd %1, %0, %H0, [%3]\n" |
366 | " teq %1, #0\n" | 349 | " teq %1, #0\n" |
367 | " bne 1b" | 350 | " bne 1b" |
@@ -374,9 +357,10 @@ static inline u64 atomic64_sub_return(u64 i, atomic64_t *v) | |||
374 | return result; | 357 | return result; |
375 | } | 358 | } |
376 | 359 | ||
377 | static inline u64 atomic64_cmpxchg(atomic64_t *ptr, u64 old, u64 new) | 360 | static inline long long atomic64_cmpxchg(atomic64_t *ptr, long long old, |
361 | long long new) | ||
378 | { | 362 | { |
379 | u64 oldval; | 363 | long long oldval; |
380 | unsigned long res; | 364 | unsigned long res; |
381 | 365 | ||
382 | smp_mb(); | 366 | smp_mb(); |
@@ -398,9 +382,9 @@ static inline u64 atomic64_cmpxchg(atomic64_t *ptr, u64 old, u64 new) | |||
398 | return oldval; | 382 | return oldval; |
399 | } | 383 | } |
400 | 384 | ||
401 | static inline u64 atomic64_xchg(atomic64_t *ptr, u64 new) | 385 | static inline long long atomic64_xchg(atomic64_t *ptr, long long new) |
402 | { | 386 | { |
403 | u64 result; | 387 | long long result; |
404 | unsigned long tmp; | 388 | unsigned long tmp; |
405 | 389 | ||
406 | smp_mb(); | 390 | smp_mb(); |
@@ -419,18 +403,18 @@ static inline u64 atomic64_xchg(atomic64_t *ptr, u64 new) | |||
419 | return result; | 403 | return result; |
420 | } | 404 | } |
421 | 405 | ||
422 | static inline u64 atomic64_dec_if_positive(atomic64_t *v) | 406 | static inline long long atomic64_dec_if_positive(atomic64_t *v) |
423 | { | 407 | { |
424 | u64 result; | 408 | long long result; |
425 | unsigned long tmp; | 409 | unsigned long tmp; |
426 | 410 | ||
427 | smp_mb(); | 411 | smp_mb(); |
428 | 412 | ||
429 | __asm__ __volatile__("@ atomic64_dec_if_positive\n" | 413 | __asm__ __volatile__("@ atomic64_dec_if_positive\n" |
430 | "1: ldrexd %0, %H0, [%3]\n" | 414 | "1: ldrexd %0, %H0, [%3]\n" |
431 | " subs %0, %0, #1\n" | 415 | " subs %Q0, %Q0, #1\n" |
432 | " sbc %H0, %H0, #0\n" | 416 | " sbc %R0, %R0, #0\n" |
433 | " teq %H0, #0\n" | 417 | " teq %R0, #0\n" |
434 | " bmi 2f\n" | 418 | " bmi 2f\n" |
435 | " strexd %1, %0, %H0, [%3]\n" | 419 | " strexd %1, %0, %H0, [%3]\n" |
436 | " teq %1, #0\n" | 420 | " teq %1, #0\n" |
@@ -445,9 +429,9 @@ static inline u64 atomic64_dec_if_positive(atomic64_t *v) | |||
445 | return result; | 429 | return result; |
446 | } | 430 | } |
447 | 431 | ||
448 | static inline int atomic64_add_unless(atomic64_t *v, u64 a, u64 u) | 432 | static inline int atomic64_add_unless(atomic64_t *v, long long a, long long u) |
449 | { | 433 | { |
450 | u64 val; | 434 | long long val; |
451 | unsigned long tmp; | 435 | unsigned long tmp; |
452 | int ret = 1; | 436 | int ret = 1; |
453 | 437 | ||
@@ -459,8 +443,8 @@ static inline int atomic64_add_unless(atomic64_t *v, u64 a, u64 u) | |||
459 | " teqeq %H0, %H5\n" | 443 | " teqeq %H0, %H5\n" |
460 | " moveq %1, #0\n" | 444 | " moveq %1, #0\n" |
461 | " beq 2f\n" | 445 | " beq 2f\n" |
462 | " adds %0, %0, %6\n" | 446 | " adds %Q0, %Q0, %Q6\n" |
463 | " adc %H0, %H0, %H6\n" | 447 | " adc %R0, %R0, %R6\n" |
464 | " strexd %2, %0, %H0, [%4]\n" | 448 | " strexd %2, %0, %H0, [%4]\n" |
465 | " teq %2, #0\n" | 449 | " teq %2, #0\n" |
466 | " bne 1b\n" | 450 | " bne 1b\n" |
diff --git a/arch/arm/include/asm/bL_switcher.h b/arch/arm/include/asm/bL_switcher.h new file mode 100644 index 000000000000..1714800fa113 --- /dev/null +++ b/arch/arm/include/asm/bL_switcher.h | |||
@@ -0,0 +1,77 @@ | |||
1 | /* | ||
2 | * arch/arm/include/asm/bL_switcher.h | ||
3 | * | ||
4 | * Created by: Nicolas Pitre, April 2012 | ||
5 | * Copyright: (C) 2012-2013 Linaro Limited | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License version 2 as | ||
9 | * published by the Free Software Foundation. | ||
10 | */ | ||
11 | |||
12 | #ifndef ASM_BL_SWITCHER_H | ||
13 | #define ASM_BL_SWITCHER_H | ||
14 | |||
15 | #include <linux/compiler.h> | ||
16 | #include <linux/types.h> | ||
17 | |||
18 | typedef void (*bL_switch_completion_handler)(void *cookie); | ||
19 | |||
20 | int bL_switch_request_cb(unsigned int cpu, unsigned int new_cluster_id, | ||
21 | bL_switch_completion_handler completer, | ||
22 | void *completer_cookie); | ||
23 | static inline int bL_switch_request(unsigned int cpu, unsigned int new_cluster_id) | ||
24 | { | ||
25 | return bL_switch_request_cb(cpu, new_cluster_id, NULL, NULL); | ||
26 | } | ||
27 | |||
28 | /* | ||
29 | * Register here to be notified about runtime enabling/disabling of | ||
30 | * the switcher. | ||
31 | * | ||
32 | * The notifier chain is called with the switcher activation lock held: | ||
33 | * the switcher will not be enabled or disabled during callbacks. | ||
34 | * Callbacks must not call bL_switcher_{get,put}_enabled(). | ||
35 | */ | ||
36 | #define BL_NOTIFY_PRE_ENABLE 0 | ||
37 | #define BL_NOTIFY_POST_ENABLE 1 | ||
38 | #define BL_NOTIFY_PRE_DISABLE 2 | ||
39 | #define BL_NOTIFY_POST_DISABLE 3 | ||
40 | |||
41 | #ifdef CONFIG_BL_SWITCHER | ||
42 | |||
43 | int bL_switcher_register_notifier(struct notifier_block *nb); | ||
44 | int bL_switcher_unregister_notifier(struct notifier_block *nb); | ||
45 | |||
46 | /* | ||
47 | * Use these functions to temporarily prevent enabling/disabling of | ||
48 | * the switcher. | ||
49 | * bL_switcher_get_enabled() returns true if the switcher is currently | ||
50 | * enabled. Each call to bL_switcher_get_enabled() must be followed | ||
51 | * by a call to bL_switcher_put_enabled(). These functions are not | ||
52 | * recursive. | ||
53 | */ | ||
54 | bool bL_switcher_get_enabled(void); | ||
55 | void bL_switcher_put_enabled(void); | ||
56 | |||
57 | int bL_switcher_trace_trigger(void); | ||
58 | int bL_switcher_get_logical_index(u32 mpidr); | ||
59 | |||
60 | #else | ||
61 | static inline int bL_switcher_register_notifier(struct notifier_block *nb) | ||
62 | { | ||
63 | return 0; | ||
64 | } | ||
65 | |||
66 | static inline int bL_switcher_unregister_notifier(struct notifier_block *nb) | ||
67 | { | ||
68 | return 0; | ||
69 | } | ||
70 | |||
71 | static inline bool bL_switcher_get_enabled(void) { return false; } | ||
72 | static inline void bL_switcher_put_enabled(void) { } | ||
73 | static inline int bL_switcher_trace_trigger(void) { return 0; } | ||
74 | static inline int bL_switcher_get_logical_index(u32 mpidr) { return -EUNATCH; } | ||
75 | #endif /* CONFIG_BL_SWITCHER */ | ||
76 | |||
77 | #endif | ||
diff --git a/arch/arm/include/asm/bug.h b/arch/arm/include/asm/bug.h index 7af5c6c3653a..b274bde24905 100644 --- a/arch/arm/include/asm/bug.h +++ b/arch/arm/include/asm/bug.h | |||
@@ -2,6 +2,8 @@ | |||
2 | #define _ASMARM_BUG_H | 2 | #define _ASMARM_BUG_H |
3 | 3 | ||
4 | #include <linux/linkage.h> | 4 | #include <linux/linkage.h> |
5 | #include <linux/types.h> | ||
6 | #include <asm/opcodes.h> | ||
5 | 7 | ||
6 | #ifdef CONFIG_BUG | 8 | #ifdef CONFIG_BUG |
7 | 9 | ||
@@ -12,10 +14,10 @@ | |||
12 | */ | 14 | */ |
13 | #ifdef CONFIG_THUMB2_KERNEL | 15 | #ifdef CONFIG_THUMB2_KERNEL |
14 | #define BUG_INSTR_VALUE 0xde02 | 16 | #define BUG_INSTR_VALUE 0xde02 |
15 | #define BUG_INSTR_TYPE ".hword " | 17 | #define BUG_INSTR(__value) __inst_thumb16(__value) |
16 | #else | 18 | #else |
17 | #define BUG_INSTR_VALUE 0xe7f001f2 | 19 | #define BUG_INSTR_VALUE 0xe7f001f2 |
18 | #define BUG_INSTR_TYPE ".word " | 20 | #define BUG_INSTR(__value) __inst_arm(__value) |
19 | #endif | 21 | #endif |
20 | 22 | ||
21 | 23 | ||
@@ -33,7 +35,7 @@ | |||
33 | 35 | ||
34 | #define __BUG(__file, __line, __value) \ | 36 | #define __BUG(__file, __line, __value) \ |
35 | do { \ | 37 | do { \ |
36 | asm volatile("1:\t" BUG_INSTR_TYPE #__value "\n" \ | 38 | asm volatile("1:\t" BUG_INSTR(__value) "\n" \ |
37 | ".pushsection .rodata.str, \"aMS\", %progbits, 1\n" \ | 39 | ".pushsection .rodata.str, \"aMS\", %progbits, 1\n" \ |
38 | "2:\t.asciz " #__file "\n" \ | 40 | "2:\t.asciz " #__file "\n" \ |
39 | ".popsection\n" \ | 41 | ".popsection\n" \ |
@@ -48,7 +50,7 @@ do { \ | |||
48 | 50 | ||
49 | #define __BUG(__file, __line, __value) \ | 51 | #define __BUG(__file, __line, __value) \ |
50 | do { \ | 52 | do { \ |
51 | asm volatile(BUG_INSTR_TYPE #__value); \ | 53 | asm volatile(BUG_INSTR(__value) "\n"); \ |
52 | unreachable(); \ | 54 | unreachable(); \ |
53 | } while (0) | 55 | } while (0) |
54 | #endif /* CONFIG_DEBUG_BUGVERBOSE */ | 56 | #endif /* CONFIG_DEBUG_BUGVERBOSE */ |
diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h index 15f2d5bf8875..ee753f1749cd 100644 --- a/arch/arm/include/asm/cacheflush.h +++ b/arch/arm/include/asm/cacheflush.h | |||
@@ -435,4 +435,50 @@ static inline void __sync_cache_range_r(volatile void *p, size_t size) | |||
435 | #define sync_cache_w(ptr) __sync_cache_range_w(ptr, sizeof *(ptr)) | 435 | #define sync_cache_w(ptr) __sync_cache_range_w(ptr, sizeof *(ptr)) |
436 | #define sync_cache_r(ptr) __sync_cache_range_r(ptr, sizeof *(ptr)) | 436 | #define sync_cache_r(ptr) __sync_cache_range_r(ptr, sizeof *(ptr)) |
437 | 437 | ||
438 | /* | ||
439 | * Disabling cache access for one CPU in an ARMv7 SMP system is tricky. | ||
440 | * To do so we must: | ||
441 | * | ||
442 | * - Clear the SCTLR.C bit to prevent further cache allocations | ||
443 | * - Flush the desired level of cache | ||
444 | * - Clear the ACTLR "SMP" bit to disable local coherency | ||
445 | * | ||
446 | * ... and so without any intervening memory access in between those steps, | ||
447 | * not even to the stack. | ||
448 | * | ||
449 | * WARNING -- After this has been called: | ||
450 | * | ||
451 | * - No ldrex/strex (and similar) instructions must be used. | ||
452 | * - The CPU is obviously no longer coherent with the other CPUs. | ||
453 | * - This is unlikely to work as expected if Linux is running non-secure. | ||
454 | * | ||
455 | * Note: | ||
456 | * | ||
457 | * - This is known to apply to several ARMv7 processor implementations, | ||
458 | * however some exceptions may exist. Caveat emptor. | ||
459 | * | ||
460 | * - The clobber list is dictated by the call to v7_flush_dcache_*. | ||
461 | * fp is preserved to the stack explicitly prior disabling the cache | ||
462 | * since adding it to the clobber list is incompatible with having | ||
463 | * CONFIG_FRAME_POINTER=y. ip is saved as well if ever r12-clobbering | ||
464 | * trampoline are inserted by the linker and to keep sp 64-bit aligned. | ||
465 | */ | ||
466 | #define v7_exit_coherency_flush(level) \ | ||
467 | asm volatile( \ | ||
468 | "stmfd sp!, {fp, ip} \n\t" \ | ||
469 | "mrc p15, 0, r0, c1, c0, 0 @ get SCTLR \n\t" \ | ||
470 | "bic r0, r0, #"__stringify(CR_C)" \n\t" \ | ||
471 | "mcr p15, 0, r0, c1, c0, 0 @ set SCTLR \n\t" \ | ||
472 | "isb \n\t" \ | ||
473 | "bl v7_flush_dcache_"__stringify(level)" \n\t" \ | ||
474 | "clrex \n\t" \ | ||
475 | "mrc p15, 0, r0, c1, c0, 1 @ get ACTLR \n\t" \ | ||
476 | "bic r0, r0, #(1 << 6) @ disable local coherency \n\t" \ | ||
477 | "mcr p15, 0, r0, c1, c0, 1 @ set ACTLR \n\t" \ | ||
478 | "isb \n\t" \ | ||
479 | "dsb \n\t" \ | ||
480 | "ldmfd sp!, {fp, ip}" \ | ||
481 | : : : "r0","r1","r2","r3","r4","r5","r6","r7", \ | ||
482 | "r9","r10","lr","memory" ) | ||
483 | |||
438 | #endif | 484 | #endif |
diff --git a/arch/arm/include/asm/cmpxchg.h b/arch/arm/include/asm/cmpxchg.h index 4f009c10540d..df2fbba7efc8 100644 --- a/arch/arm/include/asm/cmpxchg.h +++ b/arch/arm/include/asm/cmpxchg.h | |||
@@ -223,6 +223,42 @@ static inline unsigned long __cmpxchg_local(volatile void *ptr, | |||
223 | return ret; | 223 | return ret; |
224 | } | 224 | } |
225 | 225 | ||
226 | static inline unsigned long long __cmpxchg64(unsigned long long *ptr, | ||
227 | unsigned long long old, | ||
228 | unsigned long long new) | ||
229 | { | ||
230 | unsigned long long oldval; | ||
231 | unsigned long res; | ||
232 | |||
233 | __asm__ __volatile__( | ||
234 | "1: ldrexd %1, %H1, [%3]\n" | ||
235 | " teq %1, %4\n" | ||
236 | " teqeq %H1, %H4\n" | ||
237 | " bne 2f\n" | ||
238 | " strexd %0, %5, %H5, [%3]\n" | ||
239 | " teq %0, #0\n" | ||
240 | " bne 1b\n" | ||
241 | "2:" | ||
242 | : "=&r" (res), "=&r" (oldval), "+Qo" (*ptr) | ||
243 | : "r" (ptr), "r" (old), "r" (new) | ||
244 | : "cc"); | ||
245 | |||
246 | return oldval; | ||
247 | } | ||
248 | |||
249 | static inline unsigned long long __cmpxchg64_mb(unsigned long long *ptr, | ||
250 | unsigned long long old, | ||
251 | unsigned long long new) | ||
252 | { | ||
253 | unsigned long long ret; | ||
254 | |||
255 | smp_mb(); | ||
256 | ret = __cmpxchg64(ptr, old, new); | ||
257 | smp_mb(); | ||
258 | |||
259 | return ret; | ||
260 | } | ||
261 | |||
226 | #define cmpxchg_local(ptr,o,n) \ | 262 | #define cmpxchg_local(ptr,o,n) \ |
227 | ((__typeof__(*(ptr)))__cmpxchg_local((ptr), \ | 263 | ((__typeof__(*(ptr)))__cmpxchg_local((ptr), \ |
228 | (unsigned long)(o), \ | 264 | (unsigned long)(o), \ |
@@ -230,18 +266,16 @@ static inline unsigned long __cmpxchg_local(volatile void *ptr, | |||
230 | sizeof(*(ptr)))) | 266 | sizeof(*(ptr)))) |
231 | 267 | ||
232 | #define cmpxchg64(ptr, o, n) \ | 268 | #define cmpxchg64(ptr, o, n) \ |
233 | ((__typeof__(*(ptr)))atomic64_cmpxchg(container_of((ptr), \ | 269 | ((__typeof__(*(ptr)))__cmpxchg64_mb((ptr), \ |
234 | atomic64_t, \ | 270 | (unsigned long long)(o), \ |
235 | counter), \ | 271 | (unsigned long long)(n))) |
236 | (unsigned long long)(o), \ | 272 | |
237 | (unsigned long long)(n))) | 273 | #define cmpxchg64_relaxed(ptr, o, n) \ |
238 | 274 | ((__typeof__(*(ptr)))__cmpxchg64((ptr), \ | |
239 | #define cmpxchg64_local(ptr, o, n) \ | 275 | (unsigned long long)(o), \ |
240 | ((__typeof__(*(ptr)))local64_cmpxchg(container_of((ptr), \ | 276 | (unsigned long long)(n))) |
241 | local64_t, \ | 277 | |
242 | a), \ | 278 | #define cmpxchg64_local(ptr, o, n) cmpxchg64_relaxed((ptr), (o), (n)) |
243 | (unsigned long long)(o), \ | ||
244 | (unsigned long long)(n))) | ||
245 | 279 | ||
246 | #endif /* __LINUX_ARM_ARCH__ >= 6 */ | 280 | #endif /* __LINUX_ARM_ARCH__ >= 6 */ |
247 | 281 | ||
diff --git a/arch/arm/include/asm/cputype.h b/arch/arm/include/asm/cputype.h index 9672e978d50d..acdde76b39bb 100644 --- a/arch/arm/include/asm/cputype.h +++ b/arch/arm/include/asm/cputype.h | |||
@@ -10,6 +10,7 @@ | |||
10 | #define CPUID_TLBTYPE 3 | 10 | #define CPUID_TLBTYPE 3 |
11 | #define CPUID_MPUIR 4 | 11 | #define CPUID_MPUIR 4 |
12 | #define CPUID_MPIDR 5 | 12 | #define CPUID_MPIDR 5 |
13 | #define CPUID_REVIDR 6 | ||
13 | 14 | ||
14 | #ifdef CONFIG_CPU_V7M | 15 | #ifdef CONFIG_CPU_V7M |
15 | #define CPUID_EXT_PFR0 0x40 | 16 | #define CPUID_EXT_PFR0 0x40 |
diff --git a/arch/arm/include/asm/hardirq.h b/arch/arm/include/asm/hardirq.h index 2740c2a2df63..fe3ea776dc34 100644 --- a/arch/arm/include/asm/hardirq.h +++ b/arch/arm/include/asm/hardirq.h | |||
@@ -5,7 +5,7 @@ | |||
5 | #include <linux/threads.h> | 5 | #include <linux/threads.h> |
6 | #include <asm/irq.h> | 6 | #include <asm/irq.h> |
7 | 7 | ||
8 | #define NR_IPI 6 | 8 | #define NR_IPI 8 |
9 | 9 | ||
10 | typedef struct { | 10 | typedef struct { |
11 | unsigned int __softirq_pending; | 11 | unsigned int __softirq_pending; |
diff --git a/arch/arm/include/asm/hardware/coresight.h b/arch/arm/include/asm/hardware/coresight.h index 0cf7a6b842ff..ad774f37c47c 100644 --- a/arch/arm/include/asm/hardware/coresight.h +++ b/arch/arm/include/asm/hardware/coresight.h | |||
@@ -24,8 +24,8 @@ | |||
24 | #define TRACER_TIMEOUT 10000 | 24 | #define TRACER_TIMEOUT 10000 |
25 | 25 | ||
26 | #define etm_writel(t, v, x) \ | 26 | #define etm_writel(t, v, x) \ |
27 | (__raw_writel((v), (t)->etm_regs + (x))) | 27 | (writel_relaxed((v), (t)->etm_regs + (x))) |
28 | #define etm_readl(t, x) (__raw_readl((t)->etm_regs + (x))) | 28 | #define etm_readl(t, x) (readl_relaxed((t)->etm_regs + (x))) |
29 | 29 | ||
30 | /* CoreSight Management Registers */ | 30 | /* CoreSight Management Registers */ |
31 | #define CSMR_LOCKACCESS 0xfb0 | 31 | #define CSMR_LOCKACCESS 0xfb0 |
@@ -142,8 +142,8 @@ | |||
142 | #define ETBFF_TRIGFL BIT(10) | 142 | #define ETBFF_TRIGFL BIT(10) |
143 | 143 | ||
144 | #define etb_writel(t, v, x) \ | 144 | #define etb_writel(t, v, x) \ |
145 | (__raw_writel((v), (t)->etb_regs + (x))) | 145 | (writel_relaxed((v), (t)->etb_regs + (x))) |
146 | #define etb_readl(t, x) (__raw_readl((t)->etb_regs + (x))) | 146 | #define etb_readl(t, x) (readl_relaxed((t)->etb_regs + (x))) |
147 | 147 | ||
148 | #define etm_lock(t) do { etm_writel((t), 0, CSMR_LOCKACCESS); } while (0) | 148 | #define etm_lock(t) do { etm_writel((t), 0, CSMR_LOCKACCESS); } while (0) |
149 | #define etm_unlock(t) \ | 149 | #define etm_unlock(t) \ |
diff --git a/arch/arm/include/asm/kgdb.h b/arch/arm/include/asm/kgdb.h index 48066ce9ea34..0a9d5dd93294 100644 --- a/arch/arm/include/asm/kgdb.h +++ b/arch/arm/include/asm/kgdb.h | |||
@@ -11,6 +11,7 @@ | |||
11 | #define __ARM_KGDB_H__ | 11 | #define __ARM_KGDB_H__ |
12 | 12 | ||
13 | #include <linux/ptrace.h> | 13 | #include <linux/ptrace.h> |
14 | #include <asm/opcodes.h> | ||
14 | 15 | ||
15 | /* | 16 | /* |
16 | * GDB assumes that we're a user process being debugged, so | 17 | * GDB assumes that we're a user process being debugged, so |
@@ -41,7 +42,7 @@ | |||
41 | 42 | ||
42 | static inline void arch_kgdb_breakpoint(void) | 43 | static inline void arch_kgdb_breakpoint(void) |
43 | { | 44 | { |
44 | asm(".word 0xe7ffdeff"); | 45 | asm(__inst_arm(0xe7ffdeff)); |
45 | } | 46 | } |
46 | 47 | ||
47 | extern void kgdb_handle_bus_error(void); | 48 | extern void kgdb_handle_bus_error(void); |
diff --git a/arch/arm/include/asm/mach/arch.h b/arch/arm/include/asm/mach/arch.h index 402a2bc6aa68..17a3fa2979e8 100644 --- a/arch/arm/include/asm/mach/arch.h +++ b/arch/arm/include/asm/mach/arch.h | |||
@@ -49,6 +49,7 @@ struct machine_desc { | |||
49 | bool (*smp_init)(void); | 49 | bool (*smp_init)(void); |
50 | void (*fixup)(struct tag *, char **, | 50 | void (*fixup)(struct tag *, char **, |
51 | struct meminfo *); | 51 | struct meminfo *); |
52 | void (*init_meminfo)(void); | ||
52 | void (*reserve)(void);/* reserve mem blocks */ | 53 | void (*reserve)(void);/* reserve mem blocks */ |
53 | void (*map_io)(void);/* IO mapping function */ | 54 | void (*map_io)(void);/* IO mapping function */ |
54 | void (*init_early)(void); | 55 | void (*init_early)(void); |
diff --git a/arch/arm/include/asm/mcpm.h b/arch/arm/include/asm/mcpm.h index fc82a88f5b69..608516ebabfe 100644 --- a/arch/arm/include/asm/mcpm.h +++ b/arch/arm/include/asm/mcpm.h | |||
@@ -42,6 +42,14 @@ extern void mcpm_entry_point(void); | |||
42 | void mcpm_set_entry_vector(unsigned cpu, unsigned cluster, void *ptr); | 42 | void mcpm_set_entry_vector(unsigned cpu, unsigned cluster, void *ptr); |
43 | 43 | ||
44 | /* | 44 | /* |
45 | * This sets an early poke i.e a value to be poked into some address | ||
46 | * from very early assembly code before the CPU is ungated. The | ||
47 | * address must be physical, and if 0 then nothing will happen. | ||
48 | */ | ||
49 | void mcpm_set_early_poke(unsigned cpu, unsigned cluster, | ||
50 | unsigned long poke_phys_addr, unsigned long poke_val); | ||
51 | |||
52 | /* | ||
45 | * CPU/cluster power operations API for higher subsystems to use. | 53 | * CPU/cluster power operations API for higher subsystems to use. |
46 | */ | 54 | */ |
47 | 55 | ||
@@ -81,10 +89,40 @@ int mcpm_cpu_power_up(unsigned int cpu, unsigned int cluster); | |||
81 | * | 89 | * |
82 | * This will return if mcpm_platform_register() has not been called | 90 | * This will return if mcpm_platform_register() has not been called |
83 | * previously in which case the caller should take appropriate action. | 91 | * previously in which case the caller should take appropriate action. |
92 | * | ||
93 | * On success, the CPU is not guaranteed to be truly halted until | ||
94 | * mcpm_cpu_power_down_finish() subsequently returns non-zero for the | ||
95 | * specified cpu. Until then, other CPUs should make sure they do not | ||
96 | * trash memory the target CPU might be executing/accessing. | ||
84 | */ | 97 | */ |
85 | void mcpm_cpu_power_down(void); | 98 | void mcpm_cpu_power_down(void); |
86 | 99 | ||
87 | /** | 100 | /** |
101 | * mcpm_cpu_power_down_finish - wait for a specified CPU to halt, and | ||
102 | * make sure it is powered off | ||
103 | * | ||
104 | * @cpu: CPU number within given cluster | ||
105 | * @cluster: cluster number for the CPU | ||
106 | * | ||
107 | * Call this function to ensure that a pending powerdown has taken | ||
108 | * effect and the CPU is safely parked before performing non-mcpm | ||
109 | * operations that may affect the CPU (such as kexec trashing the | ||
110 | * kernel text). | ||
111 | * | ||
112 | * It is *not* necessary to call this function if you only need to | ||
113 | * serialise a pending powerdown with mcpm_cpu_power_up() or a wakeup | ||
114 | * event. | ||
115 | * | ||
116 | * Do not call this function unless the specified CPU has already | ||
117 | * called mcpm_cpu_power_down() or has committed to doing so. | ||
118 | * | ||
119 | * @return: | ||
120 | * - zero if the CPU is in a safely parked state | ||
121 | * - nonzero otherwise (e.g., timeout) | ||
122 | */ | ||
123 | int mcpm_cpu_power_down_finish(unsigned int cpu, unsigned int cluster); | ||
124 | |||
125 | /** | ||
88 | * mcpm_cpu_suspend - bring the calling CPU in a suspended state | 126 | * mcpm_cpu_suspend - bring the calling CPU in a suspended state |
89 | * | 127 | * |
90 | * @expected_residency: duration in microseconds the CPU is expected | 128 | * @expected_residency: duration in microseconds the CPU is expected |
@@ -126,6 +164,7 @@ int mcpm_cpu_powered_up(void); | |||
126 | struct mcpm_platform_ops { | 164 | struct mcpm_platform_ops { |
127 | int (*power_up)(unsigned int cpu, unsigned int cluster); | 165 | int (*power_up)(unsigned int cpu, unsigned int cluster); |
128 | void (*power_down)(void); | 166 | void (*power_down)(void); |
167 | int (*power_down_finish)(unsigned int cpu, unsigned int cluster); | ||
129 | void (*suspend)(u64); | 168 | void (*suspend)(u64); |
130 | void (*powered_up)(void); | 169 | void (*powered_up)(void); |
131 | }; | 170 | }; |
diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h index e750a938fd3c..4dd21457ef9d 100644 --- a/arch/arm/include/asm/memory.h +++ b/arch/arm/include/asm/memory.h | |||
@@ -172,8 +172,13 @@ | |||
172 | * so that all we need to do is modify the 8-bit constant field. | 172 | * so that all we need to do is modify the 8-bit constant field. |
173 | */ | 173 | */ |
174 | #define __PV_BITS_31_24 0x81000000 | 174 | #define __PV_BITS_31_24 0x81000000 |
175 | #define __PV_BITS_7_0 0x81 | ||
176 | |||
177 | extern u64 __pv_phys_offset; | ||
178 | extern u64 __pv_offset; | ||
179 | extern void fixup_pv_table(const void *, unsigned long); | ||
180 | extern const void *__pv_table_begin, *__pv_table_end; | ||
175 | 181 | ||
176 | extern unsigned long __pv_phys_offset; | ||
177 | #define PHYS_OFFSET __pv_phys_offset | 182 | #define PHYS_OFFSET __pv_phys_offset |
178 | 183 | ||
179 | #define __pv_stub(from,to,instr,type) \ | 184 | #define __pv_stub(from,to,instr,type) \ |
@@ -185,22 +190,58 @@ extern unsigned long __pv_phys_offset; | |||
185 | : "=r" (to) \ | 190 | : "=r" (to) \ |
186 | : "r" (from), "I" (type)) | 191 | : "r" (from), "I" (type)) |
187 | 192 | ||
188 | static inline unsigned long __virt_to_phys(unsigned long x) | 193 | #define __pv_stub_mov_hi(t) \ |
194 | __asm__ volatile("@ __pv_stub_mov\n" \ | ||
195 | "1: mov %R0, %1\n" \ | ||
196 | " .pushsection .pv_table,\"a\"\n" \ | ||
197 | " .long 1b\n" \ | ||
198 | " .popsection\n" \ | ||
199 | : "=r" (t) \ | ||
200 | : "I" (__PV_BITS_7_0)) | ||
201 | |||
202 | #define __pv_add_carry_stub(x, y) \ | ||
203 | __asm__ volatile("@ __pv_add_carry_stub\n" \ | ||
204 | "1: adds %Q0, %1, %2\n" \ | ||
205 | " adc %R0, %R0, #0\n" \ | ||
206 | " .pushsection .pv_table,\"a\"\n" \ | ||
207 | " .long 1b\n" \ | ||
208 | " .popsection\n" \ | ||
209 | : "+r" (y) \ | ||
210 | : "r" (x), "I" (__PV_BITS_31_24) \ | ||
211 | : "cc") | ||
212 | |||
213 | static inline phys_addr_t __virt_to_phys(unsigned long x) | ||
189 | { | 214 | { |
190 | unsigned long t; | 215 | phys_addr_t t; |
191 | __pv_stub(x, t, "add", __PV_BITS_31_24); | 216 | |
217 | if (sizeof(phys_addr_t) == 4) { | ||
218 | __pv_stub(x, t, "add", __PV_BITS_31_24); | ||
219 | } else { | ||
220 | __pv_stub_mov_hi(t); | ||
221 | __pv_add_carry_stub(x, t); | ||
222 | } | ||
192 | return t; | 223 | return t; |
193 | } | 224 | } |
194 | 225 | ||
195 | static inline unsigned long __phys_to_virt(unsigned long x) | 226 | static inline unsigned long __phys_to_virt(phys_addr_t x) |
196 | { | 227 | { |
197 | unsigned long t; | 228 | unsigned long t; |
198 | __pv_stub(x, t, "sub", __PV_BITS_31_24); | 229 | __pv_stub(x, t, "sub", __PV_BITS_31_24); |
199 | return t; | 230 | return t; |
200 | } | 231 | } |
232 | |||
201 | #else | 233 | #else |
202 | #define __virt_to_phys(x) ((x) - PAGE_OFFSET + PHYS_OFFSET) | 234 | |
203 | #define __phys_to_virt(x) ((x) - PHYS_OFFSET + PAGE_OFFSET) | 235 | static inline phys_addr_t __virt_to_phys(unsigned long x) |
236 | { | ||
237 | return (phys_addr_t)x - PAGE_OFFSET + PHYS_OFFSET; | ||
238 | } | ||
239 | |||
240 | static inline unsigned long __phys_to_virt(phys_addr_t x) | ||
241 | { | ||
242 | return x - PHYS_OFFSET + PAGE_OFFSET; | ||
243 | } | ||
244 | |||
204 | #endif | 245 | #endif |
205 | #endif | 246 | #endif |
206 | #endif /* __ASSEMBLY__ */ | 247 | #endif /* __ASSEMBLY__ */ |
@@ -238,16 +279,33 @@ static inline phys_addr_t virt_to_phys(const volatile void *x) | |||
238 | 279 | ||
239 | static inline void *phys_to_virt(phys_addr_t x) | 280 | static inline void *phys_to_virt(phys_addr_t x) |
240 | { | 281 | { |
241 | return (void *)(__phys_to_virt((unsigned long)(x))); | 282 | return (void *)__phys_to_virt(x); |
242 | } | 283 | } |
243 | 284 | ||
244 | /* | 285 | /* |
245 | * Drivers should NOT use these either. | 286 | * Drivers should NOT use these either. |
246 | */ | 287 | */ |
247 | #define __pa(x) __virt_to_phys((unsigned long)(x)) | 288 | #define __pa(x) __virt_to_phys((unsigned long)(x)) |
248 | #define __va(x) ((void *)__phys_to_virt((unsigned long)(x))) | 289 | #define __va(x) ((void *)__phys_to_virt((phys_addr_t)(x))) |
249 | #define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) | 290 | #define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) |
250 | 291 | ||
292 | extern phys_addr_t (*arch_virt_to_idmap)(unsigned long x); | ||
293 | |||
294 | /* | ||
295 | * These are for systems that have a hardware interconnect supported alias of | ||
296 | * physical memory for idmap purposes. Most cases should leave these | ||
297 | * untouched. | ||
298 | */ | ||
299 | static inline phys_addr_t __virt_to_idmap(unsigned long x) | ||
300 | { | ||
301 | if (arch_virt_to_idmap) | ||
302 | return arch_virt_to_idmap(x); | ||
303 | else | ||
304 | return __virt_to_phys(x); | ||
305 | } | ||
306 | |||
307 | #define virt_to_idmap(x) __virt_to_idmap((unsigned long)(x)) | ||
308 | |||
251 | /* | 309 | /* |
252 | * Virtual <-> DMA view memory address translations | 310 | * Virtual <-> DMA view memory address translations |
253 | * Again, these are *only* valid on the kernel direct mapped RAM | 311 | * Again, these are *only* valid on the kernel direct mapped RAM |
diff --git a/arch/arm/include/asm/mmu.h b/arch/arm/include/asm/mmu.h index 6f18da09668b..64fd15159b7d 100644 --- a/arch/arm/include/asm/mmu.h +++ b/arch/arm/include/asm/mmu.h | |||
@@ -16,7 +16,7 @@ typedef struct { | |||
16 | #ifdef CONFIG_CPU_HAS_ASID | 16 | #ifdef CONFIG_CPU_HAS_ASID |
17 | #define ASID_BITS 8 | 17 | #define ASID_BITS 8 |
18 | #define ASID_MASK ((~0ULL) << ASID_BITS) | 18 | #define ASID_MASK ((~0ULL) << ASID_BITS) |
19 | #define ASID(mm) ((mm)->context.id.counter & ~ASID_MASK) | 19 | #define ASID(mm) ((unsigned int)((mm)->context.id.counter & ~ASID_MASK)) |
20 | #else | 20 | #else |
21 | #define ASID(mm) (0) | 21 | #define ASID(mm) (0) |
22 | #endif | 22 | #endif |
diff --git a/arch/arm/include/asm/pgtable-2level.h b/arch/arm/include/asm/pgtable-2level.h index f97ee02386ee..86a659a19526 100644 --- a/arch/arm/include/asm/pgtable-2level.h +++ b/arch/arm/include/asm/pgtable-2level.h | |||
@@ -181,6 +181,13 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr) | |||
181 | 181 | ||
182 | #define set_pte_ext(ptep,pte,ext) cpu_set_pte_ext(ptep,pte,ext) | 182 | #define set_pte_ext(ptep,pte,ext) cpu_set_pte_ext(ptep,pte,ext) |
183 | 183 | ||
184 | /* | ||
185 | * We don't have huge page support for short descriptors, for the moment | ||
186 | * define empty stubs for use by pin_page_for_write. | ||
187 | */ | ||
188 | #define pmd_hugewillfault(pmd) (0) | ||
189 | #define pmd_thp_or_huge(pmd) (0) | ||
190 | |||
184 | #endif /* __ASSEMBLY__ */ | 191 | #endif /* __ASSEMBLY__ */ |
185 | 192 | ||
186 | #endif /* _ASM_PGTABLE_2LEVEL_H */ | 193 | #endif /* _ASM_PGTABLE_2LEVEL_H */ |
diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h index 5689c18c85f5..39c54cfa03e9 100644 --- a/arch/arm/include/asm/pgtable-3level.h +++ b/arch/arm/include/asm/pgtable-3level.h | |||
@@ -206,6 +206,9 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr) | |||
206 | #define __HAVE_ARCH_PMD_WRITE | 206 | #define __HAVE_ARCH_PMD_WRITE |
207 | #define pmd_write(pmd) (!(pmd_val(pmd) & PMD_SECT_RDONLY)) | 207 | #define pmd_write(pmd) (!(pmd_val(pmd) & PMD_SECT_RDONLY)) |
208 | 208 | ||
209 | #define pmd_hugewillfault(pmd) (!pmd_young(pmd) || !pmd_write(pmd)) | ||
210 | #define pmd_thp_or_huge(pmd) (pmd_huge(pmd) || pmd_trans_huge(pmd)) | ||
211 | |||
209 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | 212 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
210 | #define pmd_trans_huge(pmd) (pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT)) | 213 | #define pmd_trans_huge(pmd) (pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT)) |
211 | #define pmd_trans_splitting(pmd) (pmd_val(pmd) & PMD_SECT_SPLITTING) | 214 | #define pmd_trans_splitting(pmd) (pmd_val(pmd) & PMD_SECT_SPLITTING) |
diff --git a/arch/arm/include/asm/processor.h b/arch/arm/include/asm/processor.h index 413f3876341c..c3d5fc124a05 100644 --- a/arch/arm/include/asm/processor.h +++ b/arch/arm/include/asm/processor.h | |||
@@ -22,6 +22,7 @@ | |||
22 | #include <asm/hw_breakpoint.h> | 22 | #include <asm/hw_breakpoint.h> |
23 | #include <asm/ptrace.h> | 23 | #include <asm/ptrace.h> |
24 | #include <asm/types.h> | 24 | #include <asm/types.h> |
25 | #include <asm/unified.h> | ||
25 | 26 | ||
26 | #ifdef __KERNEL__ | 27 | #ifdef __KERNEL__ |
27 | #define STACK_TOP ((current->personality & ADDR_LIMIT_32BIT) ? \ | 28 | #define STACK_TOP ((current->personality & ADDR_LIMIT_32BIT) ? \ |
@@ -87,6 +88,17 @@ unsigned long get_wchan(struct task_struct *p); | |||
87 | #define KSTK_EIP(tsk) task_pt_regs(tsk)->ARM_pc | 88 | #define KSTK_EIP(tsk) task_pt_regs(tsk)->ARM_pc |
88 | #define KSTK_ESP(tsk) task_pt_regs(tsk)->ARM_sp | 89 | #define KSTK_ESP(tsk) task_pt_regs(tsk)->ARM_sp |
89 | 90 | ||
91 | #ifdef CONFIG_SMP | ||
92 | #define __ALT_SMP_ASM(smp, up) \ | ||
93 | "9998: " smp "\n" \ | ||
94 | " .pushsection \".alt.smp.init\", \"a\"\n" \ | ||
95 | " .long 9998b\n" \ | ||
96 | " " up "\n" \ | ||
97 | " .popsection\n" | ||
98 | #else | ||
99 | #define __ALT_SMP_ASM(smp, up) up | ||
100 | #endif | ||
101 | |||
90 | /* | 102 | /* |
91 | * Prefetching support - only ARMv5. | 103 | * Prefetching support - only ARMv5. |
92 | */ | 104 | */ |
@@ -97,17 +109,22 @@ static inline void prefetch(const void *ptr) | |||
97 | { | 109 | { |
98 | __asm__ __volatile__( | 110 | __asm__ __volatile__( |
99 | "pld\t%a0" | 111 | "pld\t%a0" |
100 | : | 112 | :: "p" (ptr)); |
101 | : "p" (ptr) | ||
102 | : "cc"); | ||
103 | } | 113 | } |
104 | 114 | ||
115 | #if __LINUX_ARM_ARCH__ >= 7 && defined(CONFIG_SMP) | ||
105 | #define ARCH_HAS_PREFETCHW | 116 | #define ARCH_HAS_PREFETCHW |
106 | #define prefetchw(ptr) prefetch(ptr) | 117 | static inline void prefetchw(const void *ptr) |
107 | 118 | { | |
108 | #define ARCH_HAS_SPINLOCK_PREFETCH | 119 | __asm__ __volatile__( |
109 | #define spin_lock_prefetch(x) do { } while (0) | 120 | ".arch_extension mp\n" |
110 | 121 | __ALT_SMP_ASM( | |
122 | WASM(pldw) "\t%a0", | ||
123 | WASM(pld) "\t%a0" | ||
124 | ) | ||
125 | :: "p" (ptr)); | ||
126 | } | ||
127 | #endif | ||
111 | #endif | 128 | #endif |
112 | 129 | ||
113 | #define HAVE_ARCH_PICK_MMAP_LAYOUT | 130 | #define HAVE_ARCH_PICK_MMAP_LAYOUT |
diff --git a/arch/arm/include/asm/setup.h b/arch/arm/include/asm/setup.h index c50f05609501..8d6a089dfb76 100644 --- a/arch/arm/include/asm/setup.h +++ b/arch/arm/include/asm/setup.h | |||
@@ -49,7 +49,7 @@ extern struct meminfo meminfo; | |||
49 | #define bank_phys_end(bank) ((bank)->start + (bank)->size) | 49 | #define bank_phys_end(bank) ((bank)->start + (bank)->size) |
50 | #define bank_phys_size(bank) (bank)->size | 50 | #define bank_phys_size(bank) (bank)->size |
51 | 51 | ||
52 | extern int arm_add_memory(phys_addr_t start, phys_addr_t size); | 52 | extern int arm_add_memory(u64 start, u64 size); |
53 | extern void early_print(const char *str, ...); | 53 | extern void early_print(const char *str, ...); |
54 | extern void dump_machine_table(void); | 54 | extern void dump_machine_table(void); |
55 | 55 | ||
diff --git a/arch/arm/include/asm/smp.h b/arch/arm/include/asm/smp.h index a8cae71caceb..22a3b9b5d4a1 100644 --- a/arch/arm/include/asm/smp.h +++ b/arch/arm/include/asm/smp.h | |||
@@ -84,6 +84,8 @@ extern void arch_send_call_function_single_ipi(int cpu); | |||
84 | extern void arch_send_call_function_ipi_mask(const struct cpumask *mask); | 84 | extern void arch_send_call_function_ipi_mask(const struct cpumask *mask); |
85 | extern void arch_send_wakeup_ipi_mask(const struct cpumask *mask); | 85 | extern void arch_send_wakeup_ipi_mask(const struct cpumask *mask); |
86 | 86 | ||
87 | extern int register_ipi_completion(struct completion *completion, int cpu); | ||
88 | |||
87 | struct smp_operations { | 89 | struct smp_operations { |
88 | #ifdef CONFIG_SMP | 90 | #ifdef CONFIG_SMP |
89 | /* | 91 | /* |
diff --git a/arch/arm/include/asm/spinlock.h b/arch/arm/include/asm/spinlock.h index 4f2c28060c9a..ef3c6072aa45 100644 --- a/arch/arm/include/asm/spinlock.h +++ b/arch/arm/include/asm/spinlock.h | |||
@@ -5,21 +5,13 @@ | |||
5 | #error SMP not supported on pre-ARMv6 CPUs | 5 | #error SMP not supported on pre-ARMv6 CPUs |
6 | #endif | 6 | #endif |
7 | 7 | ||
8 | #include <asm/processor.h> | 8 | #include <linux/prefetch.h> |
9 | 9 | ||
10 | /* | 10 | /* |
11 | * sev and wfe are ARMv6K extensions. Uniprocessor ARMv6 may not have the K | 11 | * sev and wfe are ARMv6K extensions. Uniprocessor ARMv6 may not have the K |
12 | * extensions, so when running on UP, we have to patch these instructions away. | 12 | * extensions, so when running on UP, we have to patch these instructions away. |
13 | */ | 13 | */ |
14 | #define ALT_SMP(smp, up) \ | ||
15 | "9998: " smp "\n" \ | ||
16 | " .pushsection \".alt.smp.init\", \"a\"\n" \ | ||
17 | " .long 9998b\n" \ | ||
18 | " " up "\n" \ | ||
19 | " .popsection\n" | ||
20 | |||
21 | #ifdef CONFIG_THUMB2_KERNEL | 14 | #ifdef CONFIG_THUMB2_KERNEL |
22 | #define SEV ALT_SMP("sev.w", "nop.w") | ||
23 | /* | 15 | /* |
24 | * For Thumb-2, special care is needed to ensure that the conditional WFE | 16 | * For Thumb-2, special care is needed to ensure that the conditional WFE |
25 | * instruction really does assemble to exactly 4 bytes (as required by | 17 | * instruction really does assemble to exactly 4 bytes (as required by |
@@ -31,17 +23,18 @@ | |||
31 | * the assembler won't change IT instructions which are explicitly present | 23 | * the assembler won't change IT instructions which are explicitly present |
32 | * in the input. | 24 | * in the input. |
33 | */ | 25 | */ |
34 | #define WFE(cond) ALT_SMP( \ | 26 | #define WFE(cond) __ALT_SMP_ASM( \ |
35 | "it " cond "\n\t" \ | 27 | "it " cond "\n\t" \ |
36 | "wfe" cond ".n", \ | 28 | "wfe" cond ".n", \ |
37 | \ | 29 | \ |
38 | "nop.w" \ | 30 | "nop.w" \ |
39 | ) | 31 | ) |
40 | #else | 32 | #else |
41 | #define SEV ALT_SMP("sev", "nop") | 33 | #define WFE(cond) __ALT_SMP_ASM("wfe" cond, "nop") |
42 | #define WFE(cond) ALT_SMP("wfe" cond, "nop") | ||
43 | #endif | 34 | #endif |
44 | 35 | ||
36 | #define SEV __ALT_SMP_ASM(WASM(sev), WASM(nop)) | ||
37 | |||
45 | static inline void dsb_sev(void) | 38 | static inline void dsb_sev(void) |
46 | { | 39 | { |
47 | #if __LINUX_ARM_ARCH__ >= 7 | 40 | #if __LINUX_ARM_ARCH__ >= 7 |
@@ -77,6 +70,7 @@ static inline void arch_spin_lock(arch_spinlock_t *lock) | |||
77 | u32 newval; | 70 | u32 newval; |
78 | arch_spinlock_t lockval; | 71 | arch_spinlock_t lockval; |
79 | 72 | ||
73 | prefetchw(&lock->slock); | ||
80 | __asm__ __volatile__( | 74 | __asm__ __volatile__( |
81 | "1: ldrex %0, [%3]\n" | 75 | "1: ldrex %0, [%3]\n" |
82 | " add %1, %0, %4\n" | 76 | " add %1, %0, %4\n" |
@@ -100,6 +94,7 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock) | |||
100 | unsigned long contended, res; | 94 | unsigned long contended, res; |
101 | u32 slock; | 95 | u32 slock; |
102 | 96 | ||
97 | prefetchw(&lock->slock); | ||
103 | do { | 98 | do { |
104 | __asm__ __volatile__( | 99 | __asm__ __volatile__( |
105 | " ldrex %0, [%3]\n" | 100 | " ldrex %0, [%3]\n" |
@@ -127,10 +122,14 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock) | |||
127 | dsb_sev(); | 122 | dsb_sev(); |
128 | } | 123 | } |
129 | 124 | ||
125 | static inline int arch_spin_value_unlocked(arch_spinlock_t lock) | ||
126 | { | ||
127 | return lock.tickets.owner == lock.tickets.next; | ||
128 | } | ||
129 | |||
130 | static inline int arch_spin_is_locked(arch_spinlock_t *lock) | 130 | static inline int arch_spin_is_locked(arch_spinlock_t *lock) |
131 | { | 131 | { |
132 | struct __raw_tickets tickets = ACCESS_ONCE(lock->tickets); | 132 | return !arch_spin_value_unlocked(ACCESS_ONCE(*lock)); |
133 | return tickets.owner != tickets.next; | ||
134 | } | 133 | } |
135 | 134 | ||
136 | static inline int arch_spin_is_contended(arch_spinlock_t *lock) | 135 | static inline int arch_spin_is_contended(arch_spinlock_t *lock) |
@@ -152,6 +151,7 @@ static inline void arch_write_lock(arch_rwlock_t *rw) | |||
152 | { | 151 | { |
153 | unsigned long tmp; | 152 | unsigned long tmp; |
154 | 153 | ||
154 | prefetchw(&rw->lock); | ||
155 | __asm__ __volatile__( | 155 | __asm__ __volatile__( |
156 | "1: ldrex %0, [%1]\n" | 156 | "1: ldrex %0, [%1]\n" |
157 | " teq %0, #0\n" | 157 | " teq %0, #0\n" |
@@ -170,6 +170,7 @@ static inline int arch_write_trylock(arch_rwlock_t *rw) | |||
170 | { | 170 | { |
171 | unsigned long contended, res; | 171 | unsigned long contended, res; |
172 | 172 | ||
173 | prefetchw(&rw->lock); | ||
173 | do { | 174 | do { |
174 | __asm__ __volatile__( | 175 | __asm__ __volatile__( |
175 | " ldrex %0, [%2]\n" | 176 | " ldrex %0, [%2]\n" |
@@ -203,7 +204,7 @@ static inline void arch_write_unlock(arch_rwlock_t *rw) | |||
203 | } | 204 | } |
204 | 205 | ||
205 | /* write_can_lock - would write_trylock() succeed? */ | 206 | /* write_can_lock - would write_trylock() succeed? */ |
206 | #define arch_write_can_lock(x) ((x)->lock == 0) | 207 | #define arch_write_can_lock(x) (ACCESS_ONCE((x)->lock) == 0) |
207 | 208 | ||
208 | /* | 209 | /* |
209 | * Read locks are a bit more hairy: | 210 | * Read locks are a bit more hairy: |
@@ -221,6 +222,7 @@ static inline void arch_read_lock(arch_rwlock_t *rw) | |||
221 | { | 222 | { |
222 | unsigned long tmp, tmp2; | 223 | unsigned long tmp, tmp2; |
223 | 224 | ||
225 | prefetchw(&rw->lock); | ||
224 | __asm__ __volatile__( | 226 | __asm__ __volatile__( |
225 | "1: ldrex %0, [%2]\n" | 227 | "1: ldrex %0, [%2]\n" |
226 | " adds %0, %0, #1\n" | 228 | " adds %0, %0, #1\n" |
@@ -241,6 +243,7 @@ static inline void arch_read_unlock(arch_rwlock_t *rw) | |||
241 | 243 | ||
242 | smp_mb(); | 244 | smp_mb(); |
243 | 245 | ||
246 | prefetchw(&rw->lock); | ||
244 | __asm__ __volatile__( | 247 | __asm__ __volatile__( |
245 | "1: ldrex %0, [%2]\n" | 248 | "1: ldrex %0, [%2]\n" |
246 | " sub %0, %0, #1\n" | 249 | " sub %0, %0, #1\n" |
@@ -259,6 +262,7 @@ static inline int arch_read_trylock(arch_rwlock_t *rw) | |||
259 | { | 262 | { |
260 | unsigned long contended, res; | 263 | unsigned long contended, res; |
261 | 264 | ||
265 | prefetchw(&rw->lock); | ||
262 | do { | 266 | do { |
263 | __asm__ __volatile__( | 267 | __asm__ __volatile__( |
264 | " ldrex %0, [%2]\n" | 268 | " ldrex %0, [%2]\n" |
@@ -280,7 +284,7 @@ static inline int arch_read_trylock(arch_rwlock_t *rw) | |||
280 | } | 284 | } |
281 | 285 | ||
282 | /* read_can_lock - would read_trylock() succeed? */ | 286 | /* read_can_lock - would read_trylock() succeed? */ |
283 | #define arch_read_can_lock(x) ((x)->lock < 0x80000000) | 287 | #define arch_read_can_lock(x) (ACCESS_ONCE((x)->lock) < 0x80000000) |
284 | 288 | ||
285 | #define arch_read_lock_flags(lock, flags) arch_read_lock(lock) | 289 | #define arch_read_lock_flags(lock, flags) arch_read_lock(lock) |
286 | #define arch_write_lock_flags(lock, flags) arch_write_lock(lock) | 290 | #define arch_write_lock_flags(lock, flags) arch_write_lock(lock) |
diff --git a/arch/arm/include/asm/spinlock_types.h b/arch/arm/include/asm/spinlock_types.h index b262d2f8b478..47663fcb10ad 100644 --- a/arch/arm/include/asm/spinlock_types.h +++ b/arch/arm/include/asm/spinlock_types.h | |||
@@ -25,7 +25,7 @@ typedef struct { | |||
25 | #define __ARCH_SPIN_LOCK_UNLOCKED { { 0 } } | 25 | #define __ARCH_SPIN_LOCK_UNLOCKED { { 0 } } |
26 | 26 | ||
27 | typedef struct { | 27 | typedef struct { |
28 | volatile unsigned int lock; | 28 | u32 lock; |
29 | } arch_rwlock_t; | 29 | } arch_rwlock_t; |
30 | 30 | ||
31 | #define __ARCH_RW_LOCK_UNLOCKED { 0 } | 31 | #define __ARCH_RW_LOCK_UNLOCKED { 0 } |
diff --git a/arch/arm/include/asm/tlbflush.h b/arch/arm/include/asm/tlbflush.h index 38960264040c..def9e570199f 100644 --- a/arch/arm/include/asm/tlbflush.h +++ b/arch/arm/include/asm/tlbflush.h | |||
@@ -560,37 +560,6 @@ static inline void __flush_bp_all(void) | |||
560 | asm("mcr p15, 0, %0, c7, c1, 6" : : "r" (zero)); | 560 | asm("mcr p15, 0, %0, c7, c1, 6" : : "r" (zero)); |
561 | } | 561 | } |
562 | 562 | ||
563 | #include <asm/cputype.h> | ||
564 | #ifdef CONFIG_ARM_ERRATA_798181 | ||
565 | static inline int erratum_a15_798181(void) | ||
566 | { | ||
567 | unsigned int midr = read_cpuid_id(); | ||
568 | |||
569 | /* Cortex-A15 r0p0..r3p2 affected */ | ||
570 | if ((midr & 0xff0ffff0) != 0x410fc0f0 || midr > 0x413fc0f2) | ||
571 | return 0; | ||
572 | return 1; | ||
573 | } | ||
574 | |||
575 | static inline void dummy_flush_tlb_a15_erratum(void) | ||
576 | { | ||
577 | /* | ||
578 | * Dummy TLBIMVAIS. Using the unmapped address 0 and ASID 0. | ||
579 | */ | ||
580 | asm("mcr p15, 0, %0, c8, c3, 1" : : "r" (0)); | ||
581 | dsb(ish); | ||
582 | } | ||
583 | #else | ||
584 | static inline int erratum_a15_798181(void) | ||
585 | { | ||
586 | return 0; | ||
587 | } | ||
588 | |||
589 | static inline void dummy_flush_tlb_a15_erratum(void) | ||
590 | { | ||
591 | } | ||
592 | #endif | ||
593 | |||
594 | /* | 563 | /* |
595 | * flush_pmd_entry | 564 | * flush_pmd_entry |
596 | * | 565 | * |
@@ -697,4 +666,21 @@ extern void flush_bp_all(void); | |||
697 | 666 | ||
698 | #endif | 667 | #endif |
699 | 668 | ||
669 | #ifndef __ASSEMBLY__ | ||
670 | #ifdef CONFIG_ARM_ERRATA_798181 | ||
671 | extern void erratum_a15_798181_init(void); | ||
672 | #else | ||
673 | static inline void erratum_a15_798181_init(void) {} | ||
674 | #endif | ||
675 | extern bool (*erratum_a15_798181_handler)(void); | ||
676 | |||
677 | static inline bool erratum_a15_798181(void) | ||
678 | { | ||
679 | if (unlikely(IS_ENABLED(CONFIG_ARM_ERRATA_798181) && | ||
680 | erratum_a15_798181_handler)) | ||
681 | return erratum_a15_798181_handler(); | ||
682 | return false; | ||
683 | } | ||
684 | #endif | ||
685 | |||
700 | #endif | 686 | #endif |
diff --git a/arch/arm/include/asm/unified.h b/arch/arm/include/asm/unified.h index f5989f46b4d2..b88beaba6b4a 100644 --- a/arch/arm/include/asm/unified.h +++ b/arch/arm/include/asm/unified.h | |||
@@ -38,6 +38,8 @@ | |||
38 | #ifdef __ASSEMBLY__ | 38 | #ifdef __ASSEMBLY__ |
39 | #define W(instr) instr.w | 39 | #define W(instr) instr.w |
40 | #define BSYM(sym) sym + 1 | 40 | #define BSYM(sym) sym + 1 |
41 | #else | ||
42 | #define WASM(instr) #instr ".w" | ||
41 | #endif | 43 | #endif |
42 | 44 | ||
43 | #else /* !CONFIG_THUMB2_KERNEL */ | 45 | #else /* !CONFIG_THUMB2_KERNEL */ |
@@ -50,6 +52,8 @@ | |||
50 | #ifdef __ASSEMBLY__ | 52 | #ifdef __ASSEMBLY__ |
51 | #define W(instr) instr | 53 | #define W(instr) instr |
52 | #define BSYM(sym) sym | 54 | #define BSYM(sym) sym |
55 | #else | ||
56 | #define WASM(instr) #instr | ||
53 | #endif | 57 | #endif |
54 | 58 | ||
55 | #endif /* CONFIG_THUMB2_KERNEL */ | 59 | #endif /* CONFIG_THUMB2_KERNEL */ |
diff --git a/arch/arm/include/debug/efm32.S b/arch/arm/include/debug/efm32.S new file mode 100644 index 000000000000..2265a199280c --- /dev/null +++ b/arch/arm/include/debug/efm32.S | |||
@@ -0,0 +1,45 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2013 Pengutronix | ||
3 | * Uwe Kleine-Koenig <u.kleine-koenig@pengutronix.de> | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify | ||
6 | * it under the terms of the GNU General Public License version 2 as | ||
7 | * published by the Free Software Foundation. | ||
8 | */ | ||
9 | |||
10 | #define UARTn_CMD 0x000c | ||
11 | #define UARTn_CMD_TXEN 0x0004 | ||
12 | |||
13 | #define UARTn_STATUS 0x0010 | ||
14 | #define UARTn_STATUS_TXC 0x0020 | ||
15 | #define UARTn_STATUS_TXBL 0x0040 | ||
16 | |||
17 | #define UARTn_TXDATA 0x0034 | ||
18 | |||
19 | .macro addruart, rx, tmp | ||
20 | ldr \rx, =(CONFIG_DEBUG_UART_PHYS) | ||
21 | |||
22 | /* | ||
23 | * enable TX. The driver might disable it to save energy. We | ||
24 | * don't care about disabling at the end as during debug power | ||
25 | * consumption isn't that important. | ||
26 | */ | ||
27 | ldr \tmp, =(UARTn_CMD_TXEN) | ||
28 | str \tmp, [\rx, #UARTn_CMD] | ||
29 | .endm | ||
30 | |||
31 | .macro senduart,rd,rx | ||
32 | strb \rd, [\rx, #UARTn_TXDATA] | ||
33 | .endm | ||
34 | |||
35 | .macro waituart,rd,rx | ||
36 | 1001: ldr \rd, [\rx, #UARTn_STATUS] | ||
37 | tst \rd, #UARTn_STATUS_TXBL | ||
38 | beq 1001b | ||
39 | .endm | ||
40 | |||
41 | .macro busyuart,rd,rx | ||
42 | 1001: ldr \rd, [\rx, UARTn_STATUS] | ||
43 | tst \rd, #UARTn_STATUS_TXC | ||
44 | bne 1001b | ||
45 | .endm | ||
diff --git a/arch/arm/include/debug/msm.S b/arch/arm/include/debug/msm.S index 9166e1bc470e..9d653d475903 100644 --- a/arch/arm/include/debug/msm.S +++ b/arch/arm/include/debug/msm.S | |||
@@ -46,6 +46,11 @@ | |||
46 | #define MSM_DEBUG_UART_PHYS 0x16440000 | 46 | #define MSM_DEBUG_UART_PHYS 0x16440000 |
47 | #endif | 47 | #endif |
48 | 48 | ||
49 | #ifdef CONFIG_DEBUG_MSM8974_UART | ||
50 | #define MSM_DEBUG_UART_BASE 0xFA71E000 | ||
51 | #define MSM_DEBUG_UART_PHYS 0xF991E000 | ||
52 | #endif | ||
53 | |||
49 | .macro addruart, rp, rv, tmp | 54 | .macro addruart, rp, rv, tmp |
50 | #ifdef MSM_DEBUG_UART_PHYS | 55 | #ifdef MSM_DEBUG_UART_PHYS |
51 | ldr \rp, =MSM_DEBUG_UART_PHYS | 56 | ldr \rp, =MSM_DEBUG_UART_PHYS |
diff --git a/arch/arm/include/debug/pl01x.S b/arch/arm/include/debug/pl01x.S index 37c6895b87e6..92ef808a2337 100644 --- a/arch/arm/include/debug/pl01x.S +++ b/arch/arm/include/debug/pl01x.S | |||
@@ -25,12 +25,14 @@ | |||
25 | 25 | ||
26 | .macro waituart,rd,rx | 26 | .macro waituart,rd,rx |
27 | 1001: ldr \rd, [\rx, #UART01x_FR] | 27 | 1001: ldr \rd, [\rx, #UART01x_FR] |
28 | ARM_BE8( rev \rd, \rd ) | ||
28 | tst \rd, #UART01x_FR_TXFF | 29 | tst \rd, #UART01x_FR_TXFF |
29 | bne 1001b | 30 | bne 1001b |
30 | .endm | 31 | .endm |
31 | 32 | ||
32 | .macro busyuart,rd,rx | 33 | .macro busyuart,rd,rx |
33 | 1001: ldr \rd, [\rx, #UART01x_FR] | 34 | 1001: ldr \rd, [\rx, #UART01x_FR] |
35 | ARM_BE8( rev \rd, \rd ) | ||
34 | tst \rd, #UART01x_FR_BUSY | 36 | tst \rd, #UART01x_FR_BUSY |
35 | bne 1001b | 37 | bne 1001b |
36 | .endm | 38 | .endm |
diff --git a/arch/arm/include/uapi/asm/Kbuild b/arch/arm/include/uapi/asm/Kbuild index 18d76fd5a2af..70a1c9da30ca 100644 --- a/arch/arm/include/uapi/asm/Kbuild +++ b/arch/arm/include/uapi/asm/Kbuild | |||
@@ -7,6 +7,7 @@ header-y += hwcap.h | |||
7 | header-y += ioctls.h | 7 | header-y += ioctls.h |
8 | header-y += kvm_para.h | 8 | header-y += kvm_para.h |
9 | header-y += mman.h | 9 | header-y += mman.h |
10 | header-y += perf_regs.h | ||
10 | header-y += posix_types.h | 11 | header-y += posix_types.h |
11 | header-y += ptrace.h | 12 | header-y += ptrace.h |
12 | header-y += setup.h | 13 | header-y += setup.h |
diff --git a/arch/arm/include/uapi/asm/perf_regs.h b/arch/arm/include/uapi/asm/perf_regs.h new file mode 100644 index 000000000000..ce59448458b2 --- /dev/null +++ b/arch/arm/include/uapi/asm/perf_regs.h | |||
@@ -0,0 +1,23 @@ | |||
1 | #ifndef _ASM_ARM_PERF_REGS_H | ||
2 | #define _ASM_ARM_PERF_REGS_H | ||
3 | |||
4 | enum perf_event_arm_regs { | ||
5 | PERF_REG_ARM_R0, | ||
6 | PERF_REG_ARM_R1, | ||
7 | PERF_REG_ARM_R2, | ||
8 | PERF_REG_ARM_R3, | ||
9 | PERF_REG_ARM_R4, | ||
10 | PERF_REG_ARM_R5, | ||
11 | PERF_REG_ARM_R6, | ||
12 | PERF_REG_ARM_R7, | ||
13 | PERF_REG_ARM_R8, | ||
14 | PERF_REG_ARM_R9, | ||
15 | PERF_REG_ARM_R10, | ||
16 | PERF_REG_ARM_FP, | ||
17 | PERF_REG_ARM_IP, | ||
18 | PERF_REG_ARM_SP, | ||
19 | PERF_REG_ARM_LR, | ||
20 | PERF_REG_ARM_PC, | ||
21 | PERF_REG_ARM_MAX, | ||
22 | }; | ||
23 | #endif /* _ASM_ARM_PERF_REGS_H */ | ||
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index 5140df5f23aa..a30fc9be9e9e 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile | |||
@@ -17,7 +17,8 @@ CFLAGS_REMOVE_return_address.o = -pg | |||
17 | 17 | ||
18 | obj-y := elf.o entry-common.o irq.o opcodes.o \ | 18 | obj-y := elf.o entry-common.o irq.o opcodes.o \ |
19 | process.o ptrace.o return_address.o \ | 19 | process.o ptrace.o return_address.o \ |
20 | setup.o signal.o stacktrace.o sys_arm.o time.o traps.o | 20 | setup.o signal.o sigreturn_codes.o \ |
21 | stacktrace.o sys_arm.o time.o traps.o | ||
21 | 22 | ||
22 | obj-$(CONFIG_ATAGS) += atags_parse.o | 23 | obj-$(CONFIG_ATAGS) += atags_parse.o |
23 | obj-$(CONFIG_ATAGS_PROC) += atags_proc.o | 24 | obj-$(CONFIG_ATAGS_PROC) += atags_proc.o |
@@ -78,6 +79,7 @@ obj-$(CONFIG_CPU_XSC3) += xscale-cp0.o | |||
78 | obj-$(CONFIG_CPU_MOHAWK) += xscale-cp0.o | 79 | obj-$(CONFIG_CPU_MOHAWK) += xscale-cp0.o |
79 | obj-$(CONFIG_CPU_PJ4) += pj4-cp0.o | 80 | obj-$(CONFIG_CPU_PJ4) += pj4-cp0.o |
80 | obj-$(CONFIG_IWMMXT) += iwmmxt.o | 81 | obj-$(CONFIG_IWMMXT) += iwmmxt.o |
82 | obj-$(CONFIG_PERF_EVENTS) += perf_regs.o | ||
81 | obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o perf_event_cpu.o | 83 | obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o perf_event_cpu.o |
82 | AFLAGS_iwmmxt.o := -Wa,-mcpu=iwmmxt | 84 | AFLAGS_iwmmxt.o := -Wa,-mcpu=iwmmxt |
83 | obj-$(CONFIG_ARM_CPU_TOPOLOGY) += topology.o | 85 | obj-$(CONFIG_ARM_CPU_TOPOLOGY) += topology.o |
diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c index 60d3b738d420..1f031ddd0667 100644 --- a/arch/arm/kernel/armksyms.c +++ b/arch/arm/kernel/armksyms.c | |||
@@ -155,4 +155,5 @@ EXPORT_SYMBOL(__gnu_mcount_nc); | |||
155 | 155 | ||
156 | #ifdef CONFIG_ARM_PATCH_PHYS_VIRT | 156 | #ifdef CONFIG_ARM_PATCH_PHYS_VIRT |
157 | EXPORT_SYMBOL(__pv_phys_offset); | 157 | EXPORT_SYMBOL(__pv_phys_offset); |
158 | EXPORT_SYMBOL(__pv_offset); | ||
158 | #endif | 159 | #endif |
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index 9cbe70c8b0ef..b3fb8c9e1ff2 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S | |||
@@ -192,6 +192,7 @@ __dabt_svc: | |||
192 | svc_entry | 192 | svc_entry |
193 | mov r2, sp | 193 | mov r2, sp |
194 | dabt_helper | 194 | dabt_helper |
195 | THUMB( ldr r5, [sp, #S_PSR] ) @ potentially updated CPSR | ||
195 | svc_exit r5 @ return from exception | 196 | svc_exit r5 @ return from exception |
196 | UNWIND(.fnend ) | 197 | UNWIND(.fnend ) |
197 | ENDPROC(__dabt_svc) | 198 | ENDPROC(__dabt_svc) |
@@ -416,9 +417,8 @@ __und_usr: | |||
416 | bne __und_usr_thumb | 417 | bne __und_usr_thumb |
417 | sub r4, r2, #4 @ ARM instr at LR - 4 | 418 | sub r4, r2, #4 @ ARM instr at LR - 4 |
418 | 1: ldrt r0, [r4] | 419 | 1: ldrt r0, [r4] |
419 | #ifdef CONFIG_CPU_ENDIAN_BE8 | 420 | ARM_BE8(rev r0, r0) @ little endian instruction |
420 | rev r0, r0 @ little endian instruction | 421 | |
421 | #endif | ||
422 | @ r0 = 32-bit ARM instruction which caused the exception | 422 | @ r0 = 32-bit ARM instruction which caused the exception |
423 | @ r2 = PC value for the following instruction (:= regs->ARM_pc) | 423 | @ r2 = PC value for the following instruction (:= regs->ARM_pc) |
424 | @ r4 = PC value for the faulting instruction | 424 | @ r4 = PC value for the faulting instruction |
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index bc6bd9683ba4..a2dcafdf1bc8 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S | |||
@@ -393,9 +393,7 @@ ENTRY(vector_swi) | |||
393 | #else | 393 | #else |
394 | USER( ldr r10, [lr, #-4] ) @ get SWI instruction | 394 | USER( ldr r10, [lr, #-4] ) @ get SWI instruction |
395 | #endif | 395 | #endif |
396 | #ifdef CONFIG_CPU_ENDIAN_BE8 | 396 | ARM_BE8(rev r10, r10) @ little endian instruction |
397 | rev r10, r10 @ little endian instruction | ||
398 | #endif | ||
399 | 397 | ||
400 | #elif defined(CONFIG_AEABI) | 398 | #elif defined(CONFIG_AEABI) |
401 | 399 | ||
diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index 476de57dcef2..7801866e626a 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S | |||
@@ -77,6 +77,7 @@ | |||
77 | 77 | ||
78 | __HEAD | 78 | __HEAD |
79 | ENTRY(stext) | 79 | ENTRY(stext) |
80 | ARM_BE8(setend be ) @ ensure we are in BE8 mode | ||
80 | 81 | ||
81 | THUMB( adr r9, BSYM(1f) ) @ Kernel is always entered in ARM. | 82 | THUMB( adr r9, BSYM(1f) ) @ Kernel is always entered in ARM. |
82 | THUMB( bx r9 ) @ If this is a Thumb-2 kernel, | 83 | THUMB( bx r9 ) @ If this is a Thumb-2 kernel, |
@@ -352,6 +353,9 @@ ENTRY(secondary_startup) | |||
352 | * the processor type - there is no need to check the machine type | 353 | * the processor type - there is no need to check the machine type |
353 | * as it has already been validated by the primary processor. | 354 | * as it has already been validated by the primary processor. |
354 | */ | 355 | */ |
356 | |||
357 | ARM_BE8(setend be) @ ensure we are in BE8 mode | ||
358 | |||
355 | #ifdef CONFIG_ARM_VIRT_EXT | 359 | #ifdef CONFIG_ARM_VIRT_EXT |
356 | bl __hyp_stub_install_secondary | 360 | bl __hyp_stub_install_secondary |
357 | #endif | 361 | #endif |
@@ -555,6 +559,14 @@ ENTRY(fixup_smp) | |||
555 | ldmfd sp!, {r4 - r6, pc} | 559 | ldmfd sp!, {r4 - r6, pc} |
556 | ENDPROC(fixup_smp) | 560 | ENDPROC(fixup_smp) |
557 | 561 | ||
562 | #ifdef __ARMEB__ | ||
563 | #define LOW_OFFSET 0x4 | ||
564 | #define HIGH_OFFSET 0x0 | ||
565 | #else | ||
566 | #define LOW_OFFSET 0x0 | ||
567 | #define HIGH_OFFSET 0x4 | ||
568 | #endif | ||
569 | |||
558 | #ifdef CONFIG_ARM_PATCH_PHYS_VIRT | 570 | #ifdef CONFIG_ARM_PATCH_PHYS_VIRT |
559 | 571 | ||
560 | /* __fixup_pv_table - patch the stub instructions with the delta between | 572 | /* __fixup_pv_table - patch the stub instructions with the delta between |
@@ -565,17 +577,20 @@ ENDPROC(fixup_smp) | |||
565 | __HEAD | 577 | __HEAD |
566 | __fixup_pv_table: | 578 | __fixup_pv_table: |
567 | adr r0, 1f | 579 | adr r0, 1f |
568 | ldmia r0, {r3-r5, r7} | 580 | ldmia r0, {r3-r7} |
569 | sub r3, r0, r3 @ PHYS_OFFSET - PAGE_OFFSET | 581 | mvn ip, #0 |
582 | subs r3, r0, r3 @ PHYS_OFFSET - PAGE_OFFSET | ||
570 | add r4, r4, r3 @ adjust table start address | 583 | add r4, r4, r3 @ adjust table start address |
571 | add r5, r5, r3 @ adjust table end address | 584 | add r5, r5, r3 @ adjust table end address |
572 | add r7, r7, r3 @ adjust __pv_phys_offset address | 585 | add r6, r6, r3 @ adjust __pv_phys_offset address |
573 | str r8, [r7] @ save computed PHYS_OFFSET to __pv_phys_offset | 586 | add r7, r7, r3 @ adjust __pv_offset address |
587 | str r8, [r6, #LOW_OFFSET] @ save computed PHYS_OFFSET to __pv_phys_offset | ||
588 | strcc ip, [r7, #HIGH_OFFSET] @ save to __pv_offset high bits | ||
574 | mov r6, r3, lsr #24 @ constant for add/sub instructions | 589 | mov r6, r3, lsr #24 @ constant for add/sub instructions |
575 | teq r3, r6, lsl #24 @ must be 16MiB aligned | 590 | teq r3, r6, lsl #24 @ must be 16MiB aligned |
576 | THUMB( it ne @ cross section branch ) | 591 | THUMB( it ne @ cross section branch ) |
577 | bne __error | 592 | bne __error |
578 | str r6, [r7, #4] @ save to __pv_offset | 593 | str r3, [r7, #LOW_OFFSET] @ save to __pv_offset low bits |
579 | b __fixup_a_pv_table | 594 | b __fixup_a_pv_table |
580 | ENDPROC(__fixup_pv_table) | 595 | ENDPROC(__fixup_pv_table) |
581 | 596 | ||
@@ -584,10 +599,19 @@ ENDPROC(__fixup_pv_table) | |||
584 | .long __pv_table_begin | 599 | .long __pv_table_begin |
585 | .long __pv_table_end | 600 | .long __pv_table_end |
586 | 2: .long __pv_phys_offset | 601 | 2: .long __pv_phys_offset |
602 | .long __pv_offset | ||
587 | 603 | ||
588 | .text | 604 | .text |
589 | __fixup_a_pv_table: | 605 | __fixup_a_pv_table: |
606 | adr r0, 3f | ||
607 | ldr r6, [r0] | ||
608 | add r6, r6, r3 | ||
609 | ldr r0, [r6, #HIGH_OFFSET] @ pv_offset high word | ||
610 | ldr r6, [r6, #LOW_OFFSET] @ pv_offset low word | ||
611 | mov r6, r6, lsr #24 | ||
612 | cmn r0, #1 | ||
590 | #ifdef CONFIG_THUMB2_KERNEL | 613 | #ifdef CONFIG_THUMB2_KERNEL |
614 | moveq r0, #0x200000 @ set bit 21, mov to mvn instruction | ||
591 | lsls r6, #24 | 615 | lsls r6, #24 |
592 | beq 2f | 616 | beq 2f |
593 | clz r7, r6 | 617 | clz r7, r6 |
@@ -601,18 +625,42 @@ __fixup_a_pv_table: | |||
601 | b 2f | 625 | b 2f |
602 | 1: add r7, r3 | 626 | 1: add r7, r3 |
603 | ldrh ip, [r7, #2] | 627 | ldrh ip, [r7, #2] |
604 | and ip, 0x8f00 | 628 | ARM_BE8(rev16 ip, ip) |
605 | orr ip, r6 @ mask in offset bits 31-24 | 629 | tst ip, #0x4000 |
630 | and ip, #0x8f00 | ||
631 | orrne ip, r6 @ mask in offset bits 31-24 | ||
632 | orreq ip, r0 @ mask in offset bits 7-0 | ||
633 | ARM_BE8(rev16 ip, ip) | ||
606 | strh ip, [r7, #2] | 634 | strh ip, [r7, #2] |
635 | bne 2f | ||
636 | ldrh ip, [r7] | ||
637 | ARM_BE8(rev16 ip, ip) | ||
638 | bic ip, #0x20 | ||
639 | orr ip, ip, r0, lsr #16 | ||
640 | ARM_BE8(rev16 ip, ip) | ||
641 | strh ip, [r7] | ||
607 | 2: cmp r4, r5 | 642 | 2: cmp r4, r5 |
608 | ldrcc r7, [r4], #4 @ use branch for delay slot | 643 | ldrcc r7, [r4], #4 @ use branch for delay slot |
609 | bcc 1b | 644 | bcc 1b |
610 | bx lr | 645 | bx lr |
611 | #else | 646 | #else |
647 | moveq r0, #0x400000 @ set bit 22, mov to mvn instruction | ||
612 | b 2f | 648 | b 2f |
613 | 1: ldr ip, [r7, r3] | 649 | 1: ldr ip, [r7, r3] |
650 | #ifdef CONFIG_CPU_ENDIAN_BE8 | ||
651 | @ in BE8, we load data in BE, but instructions still in LE | ||
652 | bic ip, ip, #0xff000000 | ||
653 | tst ip, #0x000f0000 @ check the rotation field | ||
654 | orrne ip, ip, r6, lsl #24 @ mask in offset bits 31-24 | ||
655 | biceq ip, ip, #0x00004000 @ clear bit 22 | ||
656 | orreq ip, ip, r0, lsl #24 @ mask in offset bits 7-0 | ||
657 | #else | ||
614 | bic ip, ip, #0x000000ff | 658 | bic ip, ip, #0x000000ff |
615 | orr ip, ip, r6 @ mask in offset bits 31-24 | 659 | tst ip, #0xf00 @ check the rotation field |
660 | orrne ip, ip, r6 @ mask in offset bits 31-24 | ||
661 | biceq ip, ip, #0x400000 @ clear bit 22 | ||
662 | orreq ip, ip, r0 @ mask in offset bits 7-0 | ||
663 | #endif | ||
616 | str ip, [r7, r3] | 664 | str ip, [r7, r3] |
617 | 2: cmp r4, r5 | 665 | 2: cmp r4, r5 |
618 | ldrcc r7, [r4], #4 @ use branch for delay slot | 666 | ldrcc r7, [r4], #4 @ use branch for delay slot |
@@ -621,28 +669,30 @@ __fixup_a_pv_table: | |||
621 | #endif | 669 | #endif |
622 | ENDPROC(__fixup_a_pv_table) | 670 | ENDPROC(__fixup_a_pv_table) |
623 | 671 | ||
672 | .align | ||
673 | 3: .long __pv_offset | ||
674 | |||
624 | ENTRY(fixup_pv_table) | 675 | ENTRY(fixup_pv_table) |
625 | stmfd sp!, {r4 - r7, lr} | 676 | stmfd sp!, {r4 - r7, lr} |
626 | ldr r2, 2f @ get address of __pv_phys_offset | ||
627 | mov r3, #0 @ no offset | 677 | mov r3, #0 @ no offset |
628 | mov r4, r0 @ r0 = table start | 678 | mov r4, r0 @ r0 = table start |
629 | add r5, r0, r1 @ r1 = table size | 679 | add r5, r0, r1 @ r1 = table size |
630 | ldr r6, [r2, #4] @ get __pv_offset | ||
631 | bl __fixup_a_pv_table | 680 | bl __fixup_a_pv_table |
632 | ldmfd sp!, {r4 - r7, pc} | 681 | ldmfd sp!, {r4 - r7, pc} |
633 | ENDPROC(fixup_pv_table) | 682 | ENDPROC(fixup_pv_table) |
634 | 683 | ||
635 | .align | ||
636 | 2: .long __pv_phys_offset | ||
637 | |||
638 | .data | 684 | .data |
639 | .globl __pv_phys_offset | 685 | .globl __pv_phys_offset |
640 | .type __pv_phys_offset, %object | 686 | .type __pv_phys_offset, %object |
641 | __pv_phys_offset: | 687 | __pv_phys_offset: |
642 | .long 0 | 688 | .quad 0 |
643 | .size __pv_phys_offset, . - __pv_phys_offset | 689 | .size __pv_phys_offset, . -__pv_phys_offset |
690 | |||
691 | .globl __pv_offset | ||
692 | .type __pv_offset, %object | ||
644 | __pv_offset: | 693 | __pv_offset: |
645 | .long 0 | 694 | .quad 0 |
695 | .size __pv_offset, . -__pv_offset | ||
646 | #endif | 696 | #endif |
647 | 697 | ||
648 | #include "head-common.S" | 698 | #include "head-common.S" |
diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c index 7b95de601357..3d446605cbf8 100644 --- a/arch/arm/kernel/hw_breakpoint.c +++ b/arch/arm/kernel/hw_breakpoint.c | |||
@@ -344,13 +344,13 @@ int arch_install_hw_breakpoint(struct perf_event *bp) | |||
344 | /* Breakpoint */ | 344 | /* Breakpoint */ |
345 | ctrl_base = ARM_BASE_BCR; | 345 | ctrl_base = ARM_BASE_BCR; |
346 | val_base = ARM_BASE_BVR; | 346 | val_base = ARM_BASE_BVR; |
347 | slots = (struct perf_event **)__get_cpu_var(bp_on_reg); | 347 | slots = this_cpu_ptr(bp_on_reg); |
348 | max_slots = core_num_brps; | 348 | max_slots = core_num_brps; |
349 | } else { | 349 | } else { |
350 | /* Watchpoint */ | 350 | /* Watchpoint */ |
351 | ctrl_base = ARM_BASE_WCR; | 351 | ctrl_base = ARM_BASE_WCR; |
352 | val_base = ARM_BASE_WVR; | 352 | val_base = ARM_BASE_WVR; |
353 | slots = (struct perf_event **)__get_cpu_var(wp_on_reg); | 353 | slots = this_cpu_ptr(wp_on_reg); |
354 | max_slots = core_num_wrps; | 354 | max_slots = core_num_wrps; |
355 | } | 355 | } |
356 | 356 | ||
@@ -396,12 +396,12 @@ void arch_uninstall_hw_breakpoint(struct perf_event *bp) | |||
396 | if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE) { | 396 | if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE) { |
397 | /* Breakpoint */ | 397 | /* Breakpoint */ |
398 | base = ARM_BASE_BCR; | 398 | base = ARM_BASE_BCR; |
399 | slots = (struct perf_event **)__get_cpu_var(bp_on_reg); | 399 | slots = this_cpu_ptr(bp_on_reg); |
400 | max_slots = core_num_brps; | 400 | max_slots = core_num_brps; |
401 | } else { | 401 | } else { |
402 | /* Watchpoint */ | 402 | /* Watchpoint */ |
403 | base = ARM_BASE_WCR; | 403 | base = ARM_BASE_WCR; |
404 | slots = (struct perf_event **)__get_cpu_var(wp_on_reg); | 404 | slots = this_cpu_ptr(wp_on_reg); |
405 | max_slots = core_num_wrps; | 405 | max_slots = core_num_wrps; |
406 | } | 406 | } |
407 | 407 | ||
@@ -697,7 +697,7 @@ static void watchpoint_handler(unsigned long addr, unsigned int fsr, | |||
697 | struct arch_hw_breakpoint *info; | 697 | struct arch_hw_breakpoint *info; |
698 | struct arch_hw_breakpoint_ctrl ctrl; | 698 | struct arch_hw_breakpoint_ctrl ctrl; |
699 | 699 | ||
700 | slots = (struct perf_event **)__get_cpu_var(wp_on_reg); | 700 | slots = this_cpu_ptr(wp_on_reg); |
701 | 701 | ||
702 | for (i = 0; i < core_num_wrps; ++i) { | 702 | for (i = 0; i < core_num_wrps; ++i) { |
703 | rcu_read_lock(); | 703 | rcu_read_lock(); |
@@ -768,7 +768,7 @@ static void watchpoint_single_step_handler(unsigned long pc) | |||
768 | struct perf_event *wp, **slots; | 768 | struct perf_event *wp, **slots; |
769 | struct arch_hw_breakpoint *info; | 769 | struct arch_hw_breakpoint *info; |
770 | 770 | ||
771 | slots = (struct perf_event **)__get_cpu_var(wp_on_reg); | 771 | slots = this_cpu_ptr(wp_on_reg); |
772 | 772 | ||
773 | for (i = 0; i < core_num_wrps; ++i) { | 773 | for (i = 0; i < core_num_wrps; ++i) { |
774 | rcu_read_lock(); | 774 | rcu_read_lock(); |
@@ -802,7 +802,7 @@ static void breakpoint_handler(unsigned long unknown, struct pt_regs *regs) | |||
802 | struct arch_hw_breakpoint *info; | 802 | struct arch_hw_breakpoint *info; |
803 | struct arch_hw_breakpoint_ctrl ctrl; | 803 | struct arch_hw_breakpoint_ctrl ctrl; |
804 | 804 | ||
805 | slots = (struct perf_event **)__get_cpu_var(bp_on_reg); | 805 | slots = this_cpu_ptr(bp_on_reg); |
806 | 806 | ||
807 | /* The exception entry code places the amended lr in the PC. */ | 807 | /* The exception entry code places the amended lr in the PC. */ |
808 | addr = regs->ARM_pc; | 808 | addr = regs->ARM_pc; |
diff --git a/arch/arm/kernel/kprobes.c b/arch/arm/kernel/kprobes.c index 170e9f34003f..a7b621ece23d 100644 --- a/arch/arm/kernel/kprobes.c +++ b/arch/arm/kernel/kprobes.c | |||
@@ -171,13 +171,13 @@ static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb) | |||
171 | 171 | ||
172 | static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb) | 172 | static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb) |
173 | { | 173 | { |
174 | __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp; | 174 | __this_cpu_write(current_kprobe, kcb->prev_kprobe.kp); |
175 | kcb->kprobe_status = kcb->prev_kprobe.status; | 175 | kcb->kprobe_status = kcb->prev_kprobe.status; |
176 | } | 176 | } |
177 | 177 | ||
178 | static void __kprobes set_current_kprobe(struct kprobe *p) | 178 | static void __kprobes set_current_kprobe(struct kprobe *p) |
179 | { | 179 | { |
180 | __get_cpu_var(current_kprobe) = p; | 180 | __this_cpu_write(current_kprobe, p); |
181 | } | 181 | } |
182 | 182 | ||
183 | static void __kprobes | 183 | static void __kprobes |
@@ -421,10 +421,10 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) | |||
421 | continue; | 421 | continue; |
422 | 422 | ||
423 | if (ri->rp && ri->rp->handler) { | 423 | if (ri->rp && ri->rp->handler) { |
424 | __get_cpu_var(current_kprobe) = &ri->rp->kp; | 424 | __this_cpu_write(current_kprobe, &ri->rp->kp); |
425 | get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE; | 425 | get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE; |
426 | ri->rp->handler(ri, regs); | 426 | ri->rp->handler(ri, regs); |
427 | __get_cpu_var(current_kprobe) = NULL; | 427 | __this_cpu_write(current_kprobe, NULL); |
428 | } | 428 | } |
429 | 429 | ||
430 | orig_ret_address = (unsigned long)ri->ret_addr; | 430 | orig_ret_address = (unsigned long)ri->ret_addr; |
diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c index c9dfff3b8008..45e478157278 100644 --- a/arch/arm/kernel/module.c +++ b/arch/arm/kernel/module.c | |||
@@ -24,6 +24,7 @@ | |||
24 | #include <asm/sections.h> | 24 | #include <asm/sections.h> |
25 | #include <asm/smp_plat.h> | 25 | #include <asm/smp_plat.h> |
26 | #include <asm/unwind.h> | 26 | #include <asm/unwind.h> |
27 | #include <asm/opcodes.h> | ||
27 | 28 | ||
28 | #ifdef CONFIG_XIP_KERNEL | 29 | #ifdef CONFIG_XIP_KERNEL |
29 | /* | 30 | /* |
@@ -60,6 +61,7 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, | |||
60 | Elf32_Sym *sym; | 61 | Elf32_Sym *sym; |
61 | const char *symname; | 62 | const char *symname; |
62 | s32 offset; | 63 | s32 offset; |
64 | u32 tmp; | ||
63 | #ifdef CONFIG_THUMB2_KERNEL | 65 | #ifdef CONFIG_THUMB2_KERNEL |
64 | u32 upper, lower, sign, j1, j2; | 66 | u32 upper, lower, sign, j1, j2; |
65 | #endif | 67 | #endif |
@@ -95,7 +97,8 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, | |||
95 | case R_ARM_PC24: | 97 | case R_ARM_PC24: |
96 | case R_ARM_CALL: | 98 | case R_ARM_CALL: |
97 | case R_ARM_JUMP24: | 99 | case R_ARM_JUMP24: |
98 | offset = (*(u32 *)loc & 0x00ffffff) << 2; | 100 | offset = __mem_to_opcode_arm(*(u32 *)loc); |
101 | offset = (offset & 0x00ffffff) << 2; | ||
99 | if (offset & 0x02000000) | 102 | if (offset & 0x02000000) |
100 | offset -= 0x04000000; | 103 | offset -= 0x04000000; |
101 | 104 | ||
@@ -111,9 +114,10 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, | |||
111 | } | 114 | } |
112 | 115 | ||
113 | offset >>= 2; | 116 | offset >>= 2; |
117 | offset &= 0x00ffffff; | ||
114 | 118 | ||
115 | *(u32 *)loc &= 0xff000000; | 119 | *(u32 *)loc &= __opcode_to_mem_arm(0xff000000); |
116 | *(u32 *)loc |= offset & 0x00ffffff; | 120 | *(u32 *)loc |= __opcode_to_mem_arm(offset); |
117 | break; | 121 | break; |
118 | 122 | ||
119 | case R_ARM_V4BX: | 123 | case R_ARM_V4BX: |
@@ -121,8 +125,8 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, | |||
121 | * other bits to re-code instruction as | 125 | * other bits to re-code instruction as |
122 | * MOV PC,Rm. | 126 | * MOV PC,Rm. |
123 | */ | 127 | */ |
124 | *(u32 *)loc &= 0xf000000f; | 128 | *(u32 *)loc &= __opcode_to_mem_arm(0xf000000f); |
125 | *(u32 *)loc |= 0x01a0f000; | 129 | *(u32 *)loc |= __opcode_to_mem_arm(0x01a0f000); |
126 | break; | 130 | break; |
127 | 131 | ||
128 | case R_ARM_PREL31: | 132 | case R_ARM_PREL31: |
@@ -132,7 +136,7 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, | |||
132 | 136 | ||
133 | case R_ARM_MOVW_ABS_NC: | 137 | case R_ARM_MOVW_ABS_NC: |
134 | case R_ARM_MOVT_ABS: | 138 | case R_ARM_MOVT_ABS: |
135 | offset = *(u32 *)loc; | 139 | offset = tmp = __mem_to_opcode_arm(*(u32 *)loc); |
136 | offset = ((offset & 0xf0000) >> 4) | (offset & 0xfff); | 140 | offset = ((offset & 0xf0000) >> 4) | (offset & 0xfff); |
137 | offset = (offset ^ 0x8000) - 0x8000; | 141 | offset = (offset ^ 0x8000) - 0x8000; |
138 | 142 | ||
@@ -140,16 +144,18 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, | |||
140 | if (ELF32_R_TYPE(rel->r_info) == R_ARM_MOVT_ABS) | 144 | if (ELF32_R_TYPE(rel->r_info) == R_ARM_MOVT_ABS) |
141 | offset >>= 16; | 145 | offset >>= 16; |
142 | 146 | ||
143 | *(u32 *)loc &= 0xfff0f000; | 147 | tmp &= 0xfff0f000; |
144 | *(u32 *)loc |= ((offset & 0xf000) << 4) | | 148 | tmp |= ((offset & 0xf000) << 4) | |
145 | (offset & 0x0fff); | 149 | (offset & 0x0fff); |
150 | |||
151 | *(u32 *)loc = __opcode_to_mem_arm(tmp); | ||
146 | break; | 152 | break; |
147 | 153 | ||
148 | #ifdef CONFIG_THUMB2_KERNEL | 154 | #ifdef CONFIG_THUMB2_KERNEL |
149 | case R_ARM_THM_CALL: | 155 | case R_ARM_THM_CALL: |
150 | case R_ARM_THM_JUMP24: | 156 | case R_ARM_THM_JUMP24: |
151 | upper = *(u16 *)loc; | 157 | upper = __mem_to_opcode_thumb16(*(u16 *)loc); |
152 | lower = *(u16 *)(loc + 2); | 158 | lower = __mem_to_opcode_thumb16(*(u16 *)(loc + 2)); |
153 | 159 | ||
154 | /* | 160 | /* |
155 | * 25 bit signed address range (Thumb-2 BL and B.W | 161 | * 25 bit signed address range (Thumb-2 BL and B.W |
@@ -198,17 +204,20 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, | |||
198 | sign = (offset >> 24) & 1; | 204 | sign = (offset >> 24) & 1; |
199 | j1 = sign ^ (~(offset >> 23) & 1); | 205 | j1 = sign ^ (~(offset >> 23) & 1); |
200 | j2 = sign ^ (~(offset >> 22) & 1); | 206 | j2 = sign ^ (~(offset >> 22) & 1); |
201 | *(u16 *)loc = (u16)((upper & 0xf800) | (sign << 10) | | 207 | upper = (u16)((upper & 0xf800) | (sign << 10) | |
202 | ((offset >> 12) & 0x03ff)); | 208 | ((offset >> 12) & 0x03ff)); |
203 | *(u16 *)(loc + 2) = (u16)((lower & 0xd000) | | 209 | lower = (u16)((lower & 0xd000) | |
204 | (j1 << 13) | (j2 << 11) | | 210 | (j1 << 13) | (j2 << 11) | |
205 | ((offset >> 1) & 0x07ff)); | 211 | ((offset >> 1) & 0x07ff)); |
212 | |||
213 | *(u16 *)loc = __opcode_to_mem_thumb16(upper); | ||
214 | *(u16 *)(loc + 2) = __opcode_to_mem_thumb16(lower); | ||
206 | break; | 215 | break; |
207 | 216 | ||
208 | case R_ARM_THM_MOVW_ABS_NC: | 217 | case R_ARM_THM_MOVW_ABS_NC: |
209 | case R_ARM_THM_MOVT_ABS: | 218 | case R_ARM_THM_MOVT_ABS: |
210 | upper = *(u16 *)loc; | 219 | upper = __mem_to_opcode_thumb16(*(u16 *)loc); |
211 | lower = *(u16 *)(loc + 2); | 220 | lower = __mem_to_opcode_thumb16(*(u16 *)(loc + 2)); |
212 | 221 | ||
213 | /* | 222 | /* |
214 | * MOVT/MOVW instructions encoding in Thumb-2: | 223 | * MOVT/MOVW instructions encoding in Thumb-2: |
@@ -229,12 +238,14 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, | |||
229 | if (ELF32_R_TYPE(rel->r_info) == R_ARM_THM_MOVT_ABS) | 238 | if (ELF32_R_TYPE(rel->r_info) == R_ARM_THM_MOVT_ABS) |
230 | offset >>= 16; | 239 | offset >>= 16; |
231 | 240 | ||
232 | *(u16 *)loc = (u16)((upper & 0xfbf0) | | 241 | upper = (u16)((upper & 0xfbf0) | |
233 | ((offset & 0xf000) >> 12) | | 242 | ((offset & 0xf000) >> 12) | |
234 | ((offset & 0x0800) >> 1)); | 243 | ((offset & 0x0800) >> 1)); |
235 | *(u16 *)(loc + 2) = (u16)((lower & 0x8f00) | | 244 | lower = (u16)((lower & 0x8f00) | |
236 | ((offset & 0x0700) << 4) | | 245 | ((offset & 0x0700) << 4) | |
237 | (offset & 0x00ff)); | 246 | (offset & 0x00ff)); |
247 | *(u16 *)loc = __opcode_to_mem_thumb16(upper); | ||
248 | *(u16 *)(loc + 2) = __opcode_to_mem_thumb16(lower); | ||
238 | break; | 249 | break; |
239 | #endif | 250 | #endif |
240 | 251 | ||
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index e186ee1e63f6..bc3f2efa0d86 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c | |||
@@ -256,12 +256,11 @@ validate_event(struct pmu_hw_events *hw_events, | |||
256 | struct perf_event *event) | 256 | struct perf_event *event) |
257 | { | 257 | { |
258 | struct arm_pmu *armpmu = to_arm_pmu(event->pmu); | 258 | struct arm_pmu *armpmu = to_arm_pmu(event->pmu); |
259 | struct pmu *leader_pmu = event->group_leader->pmu; | ||
260 | 259 | ||
261 | if (is_software_event(event)) | 260 | if (is_software_event(event)) |
262 | return 1; | 261 | return 1; |
263 | 262 | ||
264 | if (event->pmu != leader_pmu || event->state < PERF_EVENT_STATE_OFF) | 263 | if (event->state < PERF_EVENT_STATE_OFF) |
265 | return 1; | 264 | return 1; |
266 | 265 | ||
267 | if (event->state == PERF_EVENT_STATE_OFF && !event->attr.enable_on_exec) | 266 | if (event->state == PERF_EVENT_STATE_OFF && !event->attr.enable_on_exec) |
diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c index 8d6147b2001f..d85055cd24ba 100644 --- a/arch/arm/kernel/perf_event_cpu.c +++ b/arch/arm/kernel/perf_event_cpu.c | |||
@@ -68,7 +68,7 @@ EXPORT_SYMBOL_GPL(perf_num_counters); | |||
68 | 68 | ||
69 | static struct pmu_hw_events *cpu_pmu_get_cpu_events(void) | 69 | static struct pmu_hw_events *cpu_pmu_get_cpu_events(void) |
70 | { | 70 | { |
71 | return &__get_cpu_var(cpu_hw_events); | 71 | return this_cpu_ptr(&cpu_hw_events); |
72 | } | 72 | } |
73 | 73 | ||
74 | static void cpu_pmu_free_irq(struct arm_pmu *cpu_pmu) | 74 | static void cpu_pmu_free_irq(struct arm_pmu *cpu_pmu) |
diff --git a/arch/arm/kernel/perf_regs.c b/arch/arm/kernel/perf_regs.c new file mode 100644 index 000000000000..6e4379c67cbc --- /dev/null +++ b/arch/arm/kernel/perf_regs.c | |||
@@ -0,0 +1,30 @@ | |||
1 | |||
2 | #include <linux/errno.h> | ||
3 | #include <linux/kernel.h> | ||
4 | #include <linux/perf_event.h> | ||
5 | #include <linux/bug.h> | ||
6 | #include <asm/perf_regs.h> | ||
7 | #include <asm/ptrace.h> | ||
8 | |||
9 | u64 perf_reg_value(struct pt_regs *regs, int idx) | ||
10 | { | ||
11 | if (WARN_ON_ONCE((u32)idx >= PERF_REG_ARM_MAX)) | ||
12 | return 0; | ||
13 | |||
14 | return regs->uregs[idx]; | ||
15 | } | ||
16 | |||
17 | #define REG_RESERVED (~((1ULL << PERF_REG_ARM_MAX) - 1)) | ||
18 | |||
19 | int perf_reg_validate(u64 mask) | ||
20 | { | ||
21 | if (!mask || mask & REG_RESERVED) | ||
22 | return -EINVAL; | ||
23 | |||
24 | return 0; | ||
25 | } | ||
26 | |||
27 | u64 perf_reg_abi(struct task_struct *task) | ||
28 | { | ||
29 | return PERF_SAMPLE_REGS_ABI_32; | ||
30 | } | ||
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 5d65438685d8..6a1b8a81b1ae 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c | |||
@@ -73,6 +73,8 @@ __setup("fpe=", fpe_setup); | |||
73 | #endif | 73 | #endif |
74 | 74 | ||
75 | extern void paging_init(const struct machine_desc *desc); | 75 | extern void paging_init(const struct machine_desc *desc); |
76 | extern void early_paging_init(const struct machine_desc *, | ||
77 | struct proc_info_list *); | ||
76 | extern void sanity_check_meminfo(void); | 78 | extern void sanity_check_meminfo(void); |
77 | extern enum reboot_mode reboot_mode; | 79 | extern enum reboot_mode reboot_mode; |
78 | extern void setup_dma_zone(const struct machine_desc *desc); | 80 | extern void setup_dma_zone(const struct machine_desc *desc); |
@@ -599,6 +601,8 @@ static void __init setup_processor(void) | |||
599 | elf_hwcap &= ~(HWCAP_THUMB | HWCAP_IDIVT); | 601 | elf_hwcap &= ~(HWCAP_THUMB | HWCAP_IDIVT); |
600 | #endif | 602 | #endif |
601 | 603 | ||
604 | erratum_a15_798181_init(); | ||
605 | |||
602 | feat_v6_fixup(); | 606 | feat_v6_fixup(); |
603 | 607 | ||
604 | cacheid_init(); | 608 | cacheid_init(); |
@@ -619,9 +623,10 @@ void __init dump_machine_table(void) | |||
619 | /* can't use cpu_relax() here as it may require MMU setup */; | 623 | /* can't use cpu_relax() here as it may require MMU setup */; |
620 | } | 624 | } |
621 | 625 | ||
622 | int __init arm_add_memory(phys_addr_t start, phys_addr_t size) | 626 | int __init arm_add_memory(u64 start, u64 size) |
623 | { | 627 | { |
624 | struct membank *bank = &meminfo.bank[meminfo.nr_banks]; | 628 | struct membank *bank = &meminfo.bank[meminfo.nr_banks]; |
629 | u64 aligned_start; | ||
625 | 630 | ||
626 | if (meminfo.nr_banks >= NR_BANKS) { | 631 | if (meminfo.nr_banks >= NR_BANKS) { |
627 | printk(KERN_CRIT "NR_BANKS too low, " | 632 | printk(KERN_CRIT "NR_BANKS too low, " |
@@ -634,10 +639,16 @@ int __init arm_add_memory(phys_addr_t start, phys_addr_t size) | |||
634 | * Size is appropriately rounded down, start is rounded up. | 639 | * Size is appropriately rounded down, start is rounded up. |
635 | */ | 640 | */ |
636 | size -= start & ~PAGE_MASK; | 641 | size -= start & ~PAGE_MASK; |
637 | bank->start = PAGE_ALIGN(start); | 642 | aligned_start = PAGE_ALIGN(start); |
638 | 643 | ||
639 | #ifndef CONFIG_ARM_LPAE | 644 | #ifndef CONFIG_ARCH_PHYS_ADDR_T_64BIT |
640 | if (bank->start + size < bank->start) { | 645 | if (aligned_start > ULONG_MAX) { |
646 | printk(KERN_CRIT "Ignoring memory at 0x%08llx outside " | ||
647 | "32-bit physical address space\n", (long long)start); | ||
648 | return -EINVAL; | ||
649 | } | ||
650 | |||
651 | if (aligned_start + size > ULONG_MAX) { | ||
641 | printk(KERN_CRIT "Truncating memory at 0x%08llx to fit in " | 652 | printk(KERN_CRIT "Truncating memory at 0x%08llx to fit in " |
642 | "32-bit physical address space\n", (long long)start); | 653 | "32-bit physical address space\n", (long long)start); |
643 | /* | 654 | /* |
@@ -645,10 +656,11 @@ int __init arm_add_memory(phys_addr_t start, phys_addr_t size) | |||
645 | * 32 bits, we use ULONG_MAX as the upper limit rather than 4GB. | 656 | * 32 bits, we use ULONG_MAX as the upper limit rather than 4GB. |
646 | * This means we lose a page after masking. | 657 | * This means we lose a page after masking. |
647 | */ | 658 | */ |
648 | size = ULONG_MAX - bank->start; | 659 | size = ULONG_MAX - aligned_start; |
649 | } | 660 | } |
650 | #endif | 661 | #endif |
651 | 662 | ||
663 | bank->start = aligned_start; | ||
652 | bank->size = size & ~(phys_addr_t)(PAGE_SIZE - 1); | 664 | bank->size = size & ~(phys_addr_t)(PAGE_SIZE - 1); |
653 | 665 | ||
654 | /* | 666 | /* |
@@ -669,8 +681,8 @@ int __init arm_add_memory(phys_addr_t start, phys_addr_t size) | |||
669 | static int __init early_mem(char *p) | 681 | static int __init early_mem(char *p) |
670 | { | 682 | { |
671 | static int usermem __initdata = 0; | 683 | static int usermem __initdata = 0; |
672 | phys_addr_t size; | 684 | u64 size; |
673 | phys_addr_t start; | 685 | u64 start; |
674 | char *endp; | 686 | char *endp; |
675 | 687 | ||
676 | /* | 688 | /* |
@@ -878,6 +890,8 @@ void __init setup_arch(char **cmdline_p) | |||
878 | parse_early_param(); | 890 | parse_early_param(); |
879 | 891 | ||
880 | sort(&meminfo.bank, meminfo.nr_banks, sizeof(meminfo.bank[0]), meminfo_cmp, NULL); | 892 | sort(&meminfo.bank, meminfo.nr_banks, sizeof(meminfo.bank[0]), meminfo_cmp, NULL); |
893 | |||
894 | early_paging_init(mdesc, lookup_processor_type(read_cpuid_id())); | ||
881 | sanity_check_meminfo(); | 895 | sanity_check_meminfo(); |
882 | arm_memblock_init(&meminfo, mdesc); | 896 | arm_memblock_init(&meminfo, mdesc); |
883 | 897 | ||
diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index ab3304225272..04d63880037f 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c | |||
@@ -21,29 +21,7 @@ | |||
21 | #include <asm/unistd.h> | 21 | #include <asm/unistd.h> |
22 | #include <asm/vfp.h> | 22 | #include <asm/vfp.h> |
23 | 23 | ||
24 | /* | 24 | extern const unsigned long sigreturn_codes[7]; |
25 | * For ARM syscalls, we encode the syscall number into the instruction. | ||
26 | */ | ||
27 | #define SWI_SYS_SIGRETURN (0xef000000|(__NR_sigreturn)|(__NR_OABI_SYSCALL_BASE)) | ||
28 | #define SWI_SYS_RT_SIGRETURN (0xef000000|(__NR_rt_sigreturn)|(__NR_OABI_SYSCALL_BASE)) | ||
29 | |||
30 | /* | ||
31 | * With EABI, the syscall number has to be loaded into r7. | ||
32 | */ | ||
33 | #define MOV_R7_NR_SIGRETURN (0xe3a07000 | (__NR_sigreturn - __NR_SYSCALL_BASE)) | ||
34 | #define MOV_R7_NR_RT_SIGRETURN (0xe3a07000 | (__NR_rt_sigreturn - __NR_SYSCALL_BASE)) | ||
35 | |||
36 | /* | ||
37 | * For Thumb syscalls, we pass the syscall number via r7. We therefore | ||
38 | * need two 16-bit instructions. | ||
39 | */ | ||
40 | #define SWI_THUMB_SIGRETURN (0xdf00 << 16 | 0x2700 | (__NR_sigreturn - __NR_SYSCALL_BASE)) | ||
41 | #define SWI_THUMB_RT_SIGRETURN (0xdf00 << 16 | 0x2700 | (__NR_rt_sigreturn - __NR_SYSCALL_BASE)) | ||
42 | |||
43 | static const unsigned long sigreturn_codes[7] = { | ||
44 | MOV_R7_NR_SIGRETURN, SWI_SYS_SIGRETURN, SWI_THUMB_SIGRETURN, | ||
45 | MOV_R7_NR_RT_SIGRETURN, SWI_SYS_RT_SIGRETURN, SWI_THUMB_RT_SIGRETURN, | ||
46 | }; | ||
47 | 25 | ||
48 | static unsigned long signal_return_offset; | 26 | static unsigned long signal_return_offset; |
49 | 27 | ||
@@ -375,12 +353,18 @@ setup_return(struct pt_regs *regs, struct ksignal *ksig, | |||
375 | */ | 353 | */ |
376 | thumb = handler & 1; | 354 | thumb = handler & 1; |
377 | 355 | ||
378 | if (thumb) { | ||
379 | cpsr |= PSR_T_BIT; | ||
380 | #if __LINUX_ARM_ARCH__ >= 7 | 356 | #if __LINUX_ARM_ARCH__ >= 7 |
381 | /* clear the If-Then Thumb-2 execution state */ | 357 | /* |
382 | cpsr &= ~PSR_IT_MASK; | 358 | * Clear the If-Then Thumb-2 execution state |
359 | * ARM spec requires this to be all 000s in ARM mode | ||
360 | * Snapdragon S4/Krait misbehaves on a Thumb=>ARM | ||
361 | * signal transition without this. | ||
362 | */ | ||
363 | cpsr &= ~PSR_IT_MASK; | ||
383 | #endif | 364 | #endif |
365 | |||
366 | if (thumb) { | ||
367 | cpsr |= PSR_T_BIT; | ||
384 | } else | 368 | } else |
385 | cpsr &= ~PSR_T_BIT; | 369 | cpsr &= ~PSR_T_BIT; |
386 | } | 370 | } |
diff --git a/arch/arm/kernel/sigreturn_codes.S b/arch/arm/kernel/sigreturn_codes.S new file mode 100644 index 000000000000..3c5d0f2170fd --- /dev/null +++ b/arch/arm/kernel/sigreturn_codes.S | |||
@@ -0,0 +1,80 @@ | |||
1 | /* | ||
2 | * sigreturn_codes.S - code sinpets for sigreturn syscalls | ||
3 | * | ||
4 | * Created by: Victor Kamensky, 2013-08-13 | ||
5 | * Copyright: (C) 2013 Linaro Limited | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License version 2 as | ||
9 | * published by the Free Software Foundation. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | */ | ||
16 | |||
17 | #include <asm/unistd.h> | ||
18 | |||
19 | /* | ||
20 | * For ARM syscalls, we encode the syscall number into the instruction. | ||
21 | * With EABI, the syscall number has to be loaded into r7. As result | ||
22 | * ARM syscall sequence snippet will have move and svc in .arm encoding | ||
23 | * | ||
24 | * For Thumb syscalls, we pass the syscall number via r7. We therefore | ||
25 | * need two 16-bit instructions in .thumb encoding | ||
26 | * | ||
27 | * Please note sigreturn_codes code are not executed in place. Instead | ||
28 | * they just copied by kernel into appropriate places. Code inside of | ||
29 | * arch/arm/kernel/signal.c is very sensitive to layout of these code | ||
30 | * snippets. | ||
31 | */ | ||
32 | |||
33 | #if __LINUX_ARM_ARCH__ <= 4 | ||
34 | /* | ||
35 | * Note we manually set minimally required arch that supports | ||
36 | * required thumb opcodes for early arch versions. It is OK | ||
37 | * for this file to be used in combination with other | ||
38 | * lower arch variants, since these code snippets are only | ||
39 | * used as input data. | ||
40 | */ | ||
41 | .arch armv4t | ||
42 | #endif | ||
43 | |||
44 | .section .rodata | ||
45 | .global sigreturn_codes | ||
46 | .type sigreturn_codes, #object | ||
47 | |||
48 | .arm | ||
49 | |||
50 | sigreturn_codes: | ||
51 | |||
52 | /* ARM sigreturn syscall code snippet */ | ||
53 | mov r7, #(__NR_sigreturn - __NR_SYSCALL_BASE) | ||
54 | swi #(__NR_sigreturn)|(__NR_OABI_SYSCALL_BASE) | ||
55 | |||
56 | /* Thumb sigreturn syscall code snippet */ | ||
57 | .thumb | ||
58 | movs r7, #(__NR_sigreturn - __NR_SYSCALL_BASE) | ||
59 | swi #0 | ||
60 | |||
61 | /* ARM sigreturn_rt syscall code snippet */ | ||
62 | .arm | ||
63 | mov r7, #(__NR_rt_sigreturn - __NR_SYSCALL_BASE) | ||
64 | swi #(__NR_rt_sigreturn)|(__NR_OABI_SYSCALL_BASE) | ||
65 | |||
66 | /* Thumb sigreturn_rt syscall code snippet */ | ||
67 | .thumb | ||
68 | movs r7, #(__NR_rt_sigreturn - __NR_SYSCALL_BASE) | ||
69 | swi #0 | ||
70 | |||
71 | /* | ||
72 | * Note on addtional space: setup_return in signal.c | ||
73 | * algorithm uses two words copy regardless whether | ||
74 | * it is thumb case or not, so we need additional | ||
75 | * word after real last entry. | ||
76 | */ | ||
77 | .arm | ||
78 | .space 4 | ||
79 | |||
80 | .size sigreturn_codes, . - sigreturn_codes | ||
diff --git a/arch/arm/kernel/sleep.S b/arch/arm/kernel/sleep.S index db1536b8b30b..b907d9b790ab 100644 --- a/arch/arm/kernel/sleep.S +++ b/arch/arm/kernel/sleep.S | |||
@@ -55,6 +55,7 @@ | |||
55 | * specific registers and some other data for resume. | 55 | * specific registers and some other data for resume. |
56 | * r0 = suspend function arg0 | 56 | * r0 = suspend function arg0 |
57 | * r1 = suspend function | 57 | * r1 = suspend function |
58 | * r2 = MPIDR value the resuming CPU will use | ||
58 | */ | 59 | */ |
59 | ENTRY(__cpu_suspend) | 60 | ENTRY(__cpu_suspend) |
60 | stmfd sp!, {r4 - r11, lr} | 61 | stmfd sp!, {r4 - r11, lr} |
@@ -67,23 +68,18 @@ ENTRY(__cpu_suspend) | |||
67 | mov r5, sp @ current virtual SP | 68 | mov r5, sp @ current virtual SP |
68 | add r4, r4, #12 @ Space for pgd, virt sp, phys resume fn | 69 | add r4, r4, #12 @ Space for pgd, virt sp, phys resume fn |
69 | sub sp, sp, r4 @ allocate CPU state on stack | 70 | sub sp, sp, r4 @ allocate CPU state on stack |
70 | stmfd sp!, {r0, r1} @ save suspend func arg and pointer | ||
71 | add r0, sp, #8 @ save pointer to save block | ||
72 | mov r1, r4 @ size of save block | ||
73 | mov r2, r5 @ virtual SP | ||
74 | ldr r3, =sleep_save_sp | 71 | ldr r3, =sleep_save_sp |
72 | stmfd sp!, {r0, r1} @ save suspend func arg and pointer | ||
75 | ldr r3, [r3, #SLEEP_SAVE_SP_VIRT] | 73 | ldr r3, [r3, #SLEEP_SAVE_SP_VIRT] |
76 | ALT_SMP(mrc p15, 0, r9, c0, c0, 5) | 74 | ALT_SMP(ldr r0, =mpidr_hash) |
77 | ALT_UP_B(1f) | 75 | ALT_UP_B(1f) |
78 | ldr r8, =mpidr_hash | 76 | /* This ldmia relies on the memory layout of the mpidr_hash struct */ |
79 | /* | 77 | ldmia r0, {r1, r6-r8} @ r1 = mpidr mask (r6,r7,r8) = l[0,1,2] shifts |
80 | * This ldmia relies on the memory layout of the mpidr_hash | 78 | compute_mpidr_hash r0, r6, r7, r8, r2, r1 |
81 | * struct mpidr_hash. | 79 | add r3, r3, r0, lsl #2 |
82 | */ | 80 | 1: mov r2, r5 @ virtual SP |
83 | ldmia r8, {r4-r7} @ r4 = mpidr mask (r5,r6,r7) = l[0,1,2] shifts | 81 | mov r1, r4 @ size of save block |
84 | compute_mpidr_hash lr, r5, r6, r7, r9, r4 | 82 | add r0, sp, #8 @ pointer to save block |
85 | add r3, r3, lr, lsl #2 | ||
86 | 1: | ||
87 | bl __cpu_suspend_save | 83 | bl __cpu_suspend_save |
88 | adr lr, BSYM(cpu_suspend_abort) | 84 | adr lr, BSYM(cpu_suspend_abort) |
89 | ldmfd sp!, {r0, pc} @ call suspend fn | 85 | ldmfd sp!, {r0, pc} @ call suspend fn |
@@ -130,6 +126,7 @@ ENDPROC(cpu_resume_after_mmu) | |||
130 | .data | 126 | .data |
131 | .align | 127 | .align |
132 | ENTRY(cpu_resume) | 128 | ENTRY(cpu_resume) |
129 | ARM_BE8(setend be) @ ensure we are in BE mode | ||
133 | mov r1, #0 | 130 | mov r1, #0 |
134 | ALT_SMP(mrc p15, 0, r0, c0, c0, 5) | 131 | ALT_SMP(mrc p15, 0, r0, c0, c0, 5) |
135 | ALT_UP_B(1f) | 132 | ALT_UP_B(1f) |
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 72024ea8a3a6..dc894ab3622b 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #include <linux/clockchips.h> | 25 | #include <linux/clockchips.h> |
26 | #include <linux/completion.h> | 26 | #include <linux/completion.h> |
27 | #include <linux/cpufreq.h> | 27 | #include <linux/cpufreq.h> |
28 | #include <linux/irq_work.h> | ||
28 | 29 | ||
29 | #include <linux/atomic.h> | 30 | #include <linux/atomic.h> |
30 | #include <asm/smp.h> | 31 | #include <asm/smp.h> |
@@ -66,6 +67,8 @@ enum ipi_msg_type { | |||
66 | IPI_CALL_FUNC, | 67 | IPI_CALL_FUNC, |
67 | IPI_CALL_FUNC_SINGLE, | 68 | IPI_CALL_FUNC_SINGLE, |
68 | IPI_CPU_STOP, | 69 | IPI_CPU_STOP, |
70 | IPI_IRQ_WORK, | ||
71 | IPI_COMPLETION, | ||
69 | }; | 72 | }; |
70 | 73 | ||
71 | static DECLARE_COMPLETION(cpu_running); | 74 | static DECLARE_COMPLETION(cpu_running); |
@@ -80,7 +83,7 @@ void __init smp_set_ops(struct smp_operations *ops) | |||
80 | 83 | ||
81 | static unsigned long get_arch_pgd(pgd_t *pgd) | 84 | static unsigned long get_arch_pgd(pgd_t *pgd) |
82 | { | 85 | { |
83 | phys_addr_t pgdir = virt_to_phys(pgd); | 86 | phys_addr_t pgdir = virt_to_idmap(pgd); |
84 | BUG_ON(pgdir & ARCH_PGD_MASK); | 87 | BUG_ON(pgdir & ARCH_PGD_MASK); |
85 | return pgdir >> ARCH_PGD_SHIFT; | 88 | return pgdir >> ARCH_PGD_SHIFT; |
86 | } | 89 | } |
@@ -448,6 +451,14 @@ void arch_send_call_function_single_ipi(int cpu) | |||
448 | smp_cross_call(cpumask_of(cpu), IPI_CALL_FUNC_SINGLE); | 451 | smp_cross_call(cpumask_of(cpu), IPI_CALL_FUNC_SINGLE); |
449 | } | 452 | } |
450 | 453 | ||
454 | #ifdef CONFIG_IRQ_WORK | ||
455 | void arch_irq_work_raise(void) | ||
456 | { | ||
457 | if (is_smp()) | ||
458 | smp_cross_call(cpumask_of(smp_processor_id()), IPI_IRQ_WORK); | ||
459 | } | ||
460 | #endif | ||
461 | |||
451 | static const char *ipi_types[NR_IPI] = { | 462 | static const char *ipi_types[NR_IPI] = { |
452 | #define S(x,s) [x] = s | 463 | #define S(x,s) [x] = s |
453 | S(IPI_WAKEUP, "CPU wakeup interrupts"), | 464 | S(IPI_WAKEUP, "CPU wakeup interrupts"), |
@@ -456,6 +467,8 @@ static const char *ipi_types[NR_IPI] = { | |||
456 | S(IPI_CALL_FUNC, "Function call interrupts"), | 467 | S(IPI_CALL_FUNC, "Function call interrupts"), |
457 | S(IPI_CALL_FUNC_SINGLE, "Single function call interrupts"), | 468 | S(IPI_CALL_FUNC_SINGLE, "Single function call interrupts"), |
458 | S(IPI_CPU_STOP, "CPU stop interrupts"), | 469 | S(IPI_CPU_STOP, "CPU stop interrupts"), |
470 | S(IPI_IRQ_WORK, "IRQ work interrupts"), | ||
471 | S(IPI_COMPLETION, "completion interrupts"), | ||
459 | }; | 472 | }; |
460 | 473 | ||
461 | void show_ipi_list(struct seq_file *p, int prec) | 474 | void show_ipi_list(struct seq_file *p, int prec) |
@@ -515,6 +528,19 @@ static void ipi_cpu_stop(unsigned int cpu) | |||
515 | cpu_relax(); | 528 | cpu_relax(); |
516 | } | 529 | } |
517 | 530 | ||
531 | static DEFINE_PER_CPU(struct completion *, cpu_completion); | ||
532 | |||
533 | int register_ipi_completion(struct completion *completion, int cpu) | ||
534 | { | ||
535 | per_cpu(cpu_completion, cpu) = completion; | ||
536 | return IPI_COMPLETION; | ||
537 | } | ||
538 | |||
539 | static void ipi_complete(unsigned int cpu) | ||
540 | { | ||
541 | complete(per_cpu(cpu_completion, cpu)); | ||
542 | } | ||
543 | |||
518 | /* | 544 | /* |
519 | * Main handler for inter-processor interrupts | 545 | * Main handler for inter-processor interrupts |
520 | */ | 546 | */ |
@@ -565,6 +591,20 @@ void handle_IPI(int ipinr, struct pt_regs *regs) | |||
565 | irq_exit(); | 591 | irq_exit(); |
566 | break; | 592 | break; |
567 | 593 | ||
594 | #ifdef CONFIG_IRQ_WORK | ||
595 | case IPI_IRQ_WORK: | ||
596 | irq_enter(); | ||
597 | irq_work_run(); | ||
598 | irq_exit(); | ||
599 | break; | ||
600 | #endif | ||
601 | |||
602 | case IPI_COMPLETION: | ||
603 | irq_enter(); | ||
604 | ipi_complete(cpu); | ||
605 | irq_exit(); | ||
606 | break; | ||
607 | |||
568 | default: | 608 | default: |
569 | printk(KERN_CRIT "CPU%u: Unknown IPI message 0x%x\n", | 609 | printk(KERN_CRIT "CPU%u: Unknown IPI message 0x%x\n", |
570 | cpu, ipinr); | 610 | cpu, ipinr); |
diff --git a/arch/arm/kernel/smp_scu.c b/arch/arm/kernel/smp_scu.c index 5bc1a63284e3..1aafa0d785eb 100644 --- a/arch/arm/kernel/smp_scu.c +++ b/arch/arm/kernel/smp_scu.c | |||
@@ -28,7 +28,7 @@ | |||
28 | */ | 28 | */ |
29 | unsigned int __init scu_get_core_count(void __iomem *scu_base) | 29 | unsigned int __init scu_get_core_count(void __iomem *scu_base) |
30 | { | 30 | { |
31 | unsigned int ncores = __raw_readl(scu_base + SCU_CONFIG); | 31 | unsigned int ncores = readl_relaxed(scu_base + SCU_CONFIG); |
32 | return (ncores & 0x03) + 1; | 32 | return (ncores & 0x03) + 1; |
33 | } | 33 | } |
34 | 34 | ||
@@ -42,19 +42,19 @@ void scu_enable(void __iomem *scu_base) | |||
42 | #ifdef CONFIG_ARM_ERRATA_764369 | 42 | #ifdef CONFIG_ARM_ERRATA_764369 |
43 | /* Cortex-A9 only */ | 43 | /* Cortex-A9 only */ |
44 | if ((read_cpuid_id() & 0xff0ffff0) == 0x410fc090) { | 44 | if ((read_cpuid_id() & 0xff0ffff0) == 0x410fc090) { |
45 | scu_ctrl = __raw_readl(scu_base + 0x30); | 45 | scu_ctrl = readl_relaxed(scu_base + 0x30); |
46 | if (!(scu_ctrl & 1)) | 46 | if (!(scu_ctrl & 1)) |
47 | __raw_writel(scu_ctrl | 0x1, scu_base + 0x30); | 47 | writel_relaxed(scu_ctrl | 0x1, scu_base + 0x30); |
48 | } | 48 | } |
49 | #endif | 49 | #endif |
50 | 50 | ||
51 | scu_ctrl = __raw_readl(scu_base + SCU_CTRL); | 51 | scu_ctrl = readl_relaxed(scu_base + SCU_CTRL); |
52 | /* already enabled? */ | 52 | /* already enabled? */ |
53 | if (scu_ctrl & 1) | 53 | if (scu_ctrl & 1) |
54 | return; | 54 | return; |
55 | 55 | ||
56 | scu_ctrl |= 1; | 56 | scu_ctrl |= 1; |
57 | __raw_writel(scu_ctrl, scu_base + SCU_CTRL); | 57 | writel_relaxed(scu_ctrl, scu_base + SCU_CTRL); |
58 | 58 | ||
59 | /* | 59 | /* |
60 | * Ensure that the data accessed by CPU0 before the SCU was | 60 | * Ensure that the data accessed by CPU0 before the SCU was |
@@ -80,9 +80,9 @@ int scu_power_mode(void __iomem *scu_base, unsigned int mode) | |||
80 | if (mode > 3 || mode == 1 || cpu > 3) | 80 | if (mode > 3 || mode == 1 || cpu > 3) |
81 | return -EINVAL; | 81 | return -EINVAL; |
82 | 82 | ||
83 | val = __raw_readb(scu_base + SCU_CPU_STATUS + cpu) & ~0x03; | 83 | val = readb_relaxed(scu_base + SCU_CPU_STATUS + cpu) & ~0x03; |
84 | val |= mode; | 84 | val |= mode; |
85 | __raw_writeb(val, scu_base + SCU_CPU_STATUS + cpu); | 85 | writeb_relaxed(val, scu_base + SCU_CPU_STATUS + cpu); |
86 | 86 | ||
87 | return 0; | 87 | return 0; |
88 | } | 88 | } |
diff --git a/arch/arm/kernel/smp_tlb.c b/arch/arm/kernel/smp_tlb.c index 83ccca303df8..95d063620b76 100644 --- a/arch/arm/kernel/smp_tlb.c +++ b/arch/arm/kernel/smp_tlb.c | |||
@@ -70,6 +70,40 @@ static inline void ipi_flush_bp_all(void *ignored) | |||
70 | local_flush_bp_all(); | 70 | local_flush_bp_all(); |
71 | } | 71 | } |
72 | 72 | ||
73 | #ifdef CONFIG_ARM_ERRATA_798181 | ||
74 | bool (*erratum_a15_798181_handler)(void); | ||
75 | |||
76 | static bool erratum_a15_798181_partial(void) | ||
77 | { | ||
78 | asm("mcr p15, 0, %0, c8, c3, 1" : : "r" (0)); | ||
79 | dsb(ish); | ||
80 | return false; | ||
81 | } | ||
82 | |||
83 | static bool erratum_a15_798181_broadcast(void) | ||
84 | { | ||
85 | asm("mcr p15, 0, %0, c8, c3, 1" : : "r" (0)); | ||
86 | dsb(ish); | ||
87 | return true; | ||
88 | } | ||
89 | |||
90 | void erratum_a15_798181_init(void) | ||
91 | { | ||
92 | unsigned int midr = read_cpuid_id(); | ||
93 | unsigned int revidr = read_cpuid(CPUID_REVIDR); | ||
94 | |||
95 | /* Cortex-A15 r0p0..r3p2 w/o ECO fix affected */ | ||
96 | if ((midr & 0xff0ffff0) != 0x410fc0f0 || midr > 0x413fc0f2 || | ||
97 | (revidr & 0x210) == 0x210) { | ||
98 | return; | ||
99 | } | ||
100 | if (revidr & 0x10) | ||
101 | erratum_a15_798181_handler = erratum_a15_798181_partial; | ||
102 | else | ||
103 | erratum_a15_798181_handler = erratum_a15_798181_broadcast; | ||
104 | } | ||
105 | #endif | ||
106 | |||
73 | static void ipi_flush_tlb_a15_erratum(void *arg) | 107 | static void ipi_flush_tlb_a15_erratum(void *arg) |
74 | { | 108 | { |
75 | dmb(); | 109 | dmb(); |
@@ -80,7 +114,6 @@ static void broadcast_tlb_a15_erratum(void) | |||
80 | if (!erratum_a15_798181()) | 114 | if (!erratum_a15_798181()) |
81 | return; | 115 | return; |
82 | 116 | ||
83 | dummy_flush_tlb_a15_erratum(); | ||
84 | smp_call_function(ipi_flush_tlb_a15_erratum, NULL, 1); | 117 | smp_call_function(ipi_flush_tlb_a15_erratum, NULL, 1); |
85 | } | 118 | } |
86 | 119 | ||
@@ -92,7 +125,6 @@ static void broadcast_tlb_mm_a15_erratum(struct mm_struct *mm) | |||
92 | if (!erratum_a15_798181()) | 125 | if (!erratum_a15_798181()) |
93 | return; | 126 | return; |
94 | 127 | ||
95 | dummy_flush_tlb_a15_erratum(); | ||
96 | this_cpu = get_cpu(); | 128 | this_cpu = get_cpu(); |
97 | a15_erratum_get_cpumask(this_cpu, mm, &mask); | 129 | a15_erratum_get_cpumask(this_cpu, mm, &mask); |
98 | smp_call_function_many(&mask, ipi_flush_tlb_a15_erratum, NULL, 1); | 130 | smp_call_function_many(&mask, ipi_flush_tlb_a15_erratum, NULL, 1); |
diff --git a/arch/arm/kernel/smp_twd.c b/arch/arm/kernel/smp_twd.c index 2985c9f0905d..6591e26fc13f 100644 --- a/arch/arm/kernel/smp_twd.c +++ b/arch/arm/kernel/smp_twd.c | |||
@@ -45,7 +45,7 @@ static void twd_set_mode(enum clock_event_mode mode, | |||
45 | case CLOCK_EVT_MODE_PERIODIC: | 45 | case CLOCK_EVT_MODE_PERIODIC: |
46 | ctrl = TWD_TIMER_CONTROL_ENABLE | TWD_TIMER_CONTROL_IT_ENABLE | 46 | ctrl = TWD_TIMER_CONTROL_ENABLE | TWD_TIMER_CONTROL_IT_ENABLE |
47 | | TWD_TIMER_CONTROL_PERIODIC; | 47 | | TWD_TIMER_CONTROL_PERIODIC; |
48 | __raw_writel(DIV_ROUND_CLOSEST(twd_timer_rate, HZ), | 48 | writel_relaxed(DIV_ROUND_CLOSEST(twd_timer_rate, HZ), |
49 | twd_base + TWD_TIMER_LOAD); | 49 | twd_base + TWD_TIMER_LOAD); |
50 | break; | 50 | break; |
51 | case CLOCK_EVT_MODE_ONESHOT: | 51 | case CLOCK_EVT_MODE_ONESHOT: |
@@ -58,18 +58,18 @@ static void twd_set_mode(enum clock_event_mode mode, | |||
58 | ctrl = 0; | 58 | ctrl = 0; |
59 | } | 59 | } |
60 | 60 | ||
61 | __raw_writel(ctrl, twd_base + TWD_TIMER_CONTROL); | 61 | writel_relaxed(ctrl, twd_base + TWD_TIMER_CONTROL); |
62 | } | 62 | } |
63 | 63 | ||
64 | static int twd_set_next_event(unsigned long evt, | 64 | static int twd_set_next_event(unsigned long evt, |
65 | struct clock_event_device *unused) | 65 | struct clock_event_device *unused) |
66 | { | 66 | { |
67 | unsigned long ctrl = __raw_readl(twd_base + TWD_TIMER_CONTROL); | 67 | unsigned long ctrl = readl_relaxed(twd_base + TWD_TIMER_CONTROL); |
68 | 68 | ||
69 | ctrl |= TWD_TIMER_CONTROL_ENABLE; | 69 | ctrl |= TWD_TIMER_CONTROL_ENABLE; |
70 | 70 | ||
71 | __raw_writel(evt, twd_base + TWD_TIMER_COUNTER); | 71 | writel_relaxed(evt, twd_base + TWD_TIMER_COUNTER); |
72 | __raw_writel(ctrl, twd_base + TWD_TIMER_CONTROL); | 72 | writel_relaxed(ctrl, twd_base + TWD_TIMER_CONTROL); |
73 | 73 | ||
74 | return 0; | 74 | return 0; |
75 | } | 75 | } |
@@ -82,8 +82,8 @@ static int twd_set_next_event(unsigned long evt, | |||
82 | */ | 82 | */ |
83 | static int twd_timer_ack(void) | 83 | static int twd_timer_ack(void) |
84 | { | 84 | { |
85 | if (__raw_readl(twd_base + TWD_TIMER_INTSTAT)) { | 85 | if (readl_relaxed(twd_base + TWD_TIMER_INTSTAT)) { |
86 | __raw_writel(1, twd_base + TWD_TIMER_INTSTAT); | 86 | writel_relaxed(1, twd_base + TWD_TIMER_INTSTAT); |
87 | return 1; | 87 | return 1; |
88 | } | 88 | } |
89 | 89 | ||
@@ -211,15 +211,15 @@ static void twd_calibrate_rate(void) | |||
211 | waitjiffies += 5; | 211 | waitjiffies += 5; |
212 | 212 | ||
213 | /* enable, no interrupt or reload */ | 213 | /* enable, no interrupt or reload */ |
214 | __raw_writel(0x1, twd_base + TWD_TIMER_CONTROL); | 214 | writel_relaxed(0x1, twd_base + TWD_TIMER_CONTROL); |
215 | 215 | ||
216 | /* maximum value */ | 216 | /* maximum value */ |
217 | __raw_writel(0xFFFFFFFFU, twd_base + TWD_TIMER_COUNTER); | 217 | writel_relaxed(0xFFFFFFFFU, twd_base + TWD_TIMER_COUNTER); |
218 | 218 | ||
219 | while (get_jiffies_64() < waitjiffies) | 219 | while (get_jiffies_64() < waitjiffies) |
220 | udelay(10); | 220 | udelay(10); |
221 | 221 | ||
222 | count = __raw_readl(twd_base + TWD_TIMER_COUNTER); | 222 | count = readl_relaxed(twd_base + TWD_TIMER_COUNTER); |
223 | 223 | ||
224 | twd_timer_rate = (0xFFFFFFFFU - count) * (HZ / 5); | 224 | twd_timer_rate = (0xFFFFFFFFU - count) * (HZ / 5); |
225 | 225 | ||
@@ -277,7 +277,7 @@ static void twd_timer_setup(void) | |||
277 | * bother with the below. | 277 | * bother with the below. |
278 | */ | 278 | */ |
279 | if (per_cpu(percpu_setup_called, cpu)) { | 279 | if (per_cpu(percpu_setup_called, cpu)) { |
280 | __raw_writel(0, twd_base + TWD_TIMER_CONTROL); | 280 | writel_relaxed(0, twd_base + TWD_TIMER_CONTROL); |
281 | clockevents_register_device(clk); | 281 | clockevents_register_device(clk); |
282 | enable_percpu_irq(clk->irq, 0); | 282 | enable_percpu_irq(clk->irq, 0); |
283 | return; | 283 | return; |
@@ -290,7 +290,7 @@ static void twd_timer_setup(void) | |||
290 | * The following is done once per CPU the first time .setup() is | 290 | * The following is done once per CPU the first time .setup() is |
291 | * called. | 291 | * called. |
292 | */ | 292 | */ |
293 | __raw_writel(0, twd_base + TWD_TIMER_CONTROL); | 293 | writel_relaxed(0, twd_base + TWD_TIMER_CONTROL); |
294 | 294 | ||
295 | clk->name = "local_timer"; | 295 | clk->name = "local_timer"; |
296 | clk->features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT | | 296 | clk->features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT | |
diff --git a/arch/arm/kernel/suspend.c b/arch/arm/kernel/suspend.c index 41cf3cbf756d..2835d35234ca 100644 --- a/arch/arm/kernel/suspend.c +++ b/arch/arm/kernel/suspend.c | |||
@@ -10,7 +10,7 @@ | |||
10 | #include <asm/suspend.h> | 10 | #include <asm/suspend.h> |
11 | #include <asm/tlbflush.h> | 11 | #include <asm/tlbflush.h> |
12 | 12 | ||
13 | extern int __cpu_suspend(unsigned long, int (*)(unsigned long)); | 13 | extern int __cpu_suspend(unsigned long, int (*)(unsigned long), u32 cpuid); |
14 | extern void cpu_resume_mmu(void); | 14 | extern void cpu_resume_mmu(void); |
15 | 15 | ||
16 | #ifdef CONFIG_MMU | 16 | #ifdef CONFIG_MMU |
@@ -21,6 +21,7 @@ extern void cpu_resume_mmu(void); | |||
21 | int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) | 21 | int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) |
22 | { | 22 | { |
23 | struct mm_struct *mm = current->active_mm; | 23 | struct mm_struct *mm = current->active_mm; |
24 | u32 __mpidr = cpu_logical_map(smp_processor_id()); | ||
24 | int ret; | 25 | int ret; |
25 | 26 | ||
26 | if (!idmap_pgd) | 27 | if (!idmap_pgd) |
@@ -32,7 +33,7 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) | |||
32 | * resume (indicated by a zero return code), we need to switch | 33 | * resume (indicated by a zero return code), we need to switch |
33 | * back to the correct page tables. | 34 | * back to the correct page tables. |
34 | */ | 35 | */ |
35 | ret = __cpu_suspend(arg, fn); | 36 | ret = __cpu_suspend(arg, fn, __mpidr); |
36 | if (ret == 0) { | 37 | if (ret == 0) { |
37 | cpu_switch_mm(mm->pgd, mm); | 38 | cpu_switch_mm(mm->pgd, mm); |
38 | local_flush_bp_all(); | 39 | local_flush_bp_all(); |
@@ -44,7 +45,8 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) | |||
44 | #else | 45 | #else |
45 | int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) | 46 | int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) |
46 | { | 47 | { |
47 | return __cpu_suspend(arg, fn); | 48 | u32 __mpidr = cpu_logical_map(smp_processor_id()); |
49 | return __cpu_suspend(arg, fn, __mpidr); | ||
48 | } | 50 | } |
49 | #define idmap_pgd NULL | 51 | #define idmap_pgd NULL |
50 | #endif | 52 | #endif |
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 8fcda140358d..6125f259b7b5 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c | |||
@@ -34,6 +34,7 @@ | |||
34 | #include <asm/unwind.h> | 34 | #include <asm/unwind.h> |
35 | #include <asm/tls.h> | 35 | #include <asm/tls.h> |
36 | #include <asm/system_misc.h> | 36 | #include <asm/system_misc.h> |
37 | #include <asm/opcodes.h> | ||
37 | 38 | ||
38 | static const char *handler[]= { "prefetch abort", "data abort", "address exception", "interrupt" }; | 39 | static const char *handler[]= { "prefetch abort", "data abort", "address exception", "interrupt" }; |
39 | 40 | ||
@@ -341,15 +342,17 @@ void arm_notify_die(const char *str, struct pt_regs *regs, | |||
341 | int is_valid_bugaddr(unsigned long pc) | 342 | int is_valid_bugaddr(unsigned long pc) |
342 | { | 343 | { |
343 | #ifdef CONFIG_THUMB2_KERNEL | 344 | #ifdef CONFIG_THUMB2_KERNEL |
344 | unsigned short bkpt; | 345 | u16 bkpt; |
346 | u16 insn = __opcode_to_mem_thumb16(BUG_INSTR_VALUE); | ||
345 | #else | 347 | #else |
346 | unsigned long bkpt; | 348 | u32 bkpt; |
349 | u32 insn = __opcode_to_mem_arm(BUG_INSTR_VALUE); | ||
347 | #endif | 350 | #endif |
348 | 351 | ||
349 | if (probe_kernel_address((unsigned *)pc, bkpt)) | 352 | if (probe_kernel_address((unsigned *)pc, bkpt)) |
350 | return 0; | 353 | return 0; |
351 | 354 | ||
352 | return bkpt == BUG_INSTR_VALUE; | 355 | return bkpt == insn; |
353 | } | 356 | } |
354 | 357 | ||
355 | #endif | 358 | #endif |
@@ -402,25 +405,28 @@ asmlinkage void __exception do_undefinstr(struct pt_regs *regs) | |||
402 | if (processor_mode(regs) == SVC_MODE) { | 405 | if (processor_mode(regs) == SVC_MODE) { |
403 | #ifdef CONFIG_THUMB2_KERNEL | 406 | #ifdef CONFIG_THUMB2_KERNEL |
404 | if (thumb_mode(regs)) { | 407 | if (thumb_mode(regs)) { |
405 | instr = ((u16 *)pc)[0]; | 408 | instr = __mem_to_opcode_thumb16(((u16 *)pc)[0]); |
406 | if (is_wide_instruction(instr)) { | 409 | if (is_wide_instruction(instr)) { |
407 | instr <<= 16; | 410 | u16 inst2; |
408 | instr |= ((u16 *)pc)[1]; | 411 | inst2 = __mem_to_opcode_thumb16(((u16 *)pc)[1]); |
412 | instr = __opcode_thumb32_compose(instr, inst2); | ||
409 | } | 413 | } |
410 | } else | 414 | } else |
411 | #endif | 415 | #endif |
412 | instr = *(u32 *) pc; | 416 | instr = __mem_to_opcode_arm(*(u32 *) pc); |
413 | } else if (thumb_mode(regs)) { | 417 | } else if (thumb_mode(regs)) { |
414 | if (get_user(instr, (u16 __user *)pc)) | 418 | if (get_user(instr, (u16 __user *)pc)) |
415 | goto die_sig; | 419 | goto die_sig; |
420 | instr = __mem_to_opcode_thumb16(instr); | ||
416 | if (is_wide_instruction(instr)) { | 421 | if (is_wide_instruction(instr)) { |
417 | unsigned int instr2; | 422 | unsigned int instr2; |
418 | if (get_user(instr2, (u16 __user *)pc+1)) | 423 | if (get_user(instr2, (u16 __user *)pc+1)) |
419 | goto die_sig; | 424 | goto die_sig; |
420 | instr <<= 16; | 425 | instr2 = __mem_to_opcode_thumb16(instr2); |
421 | instr |= instr2; | 426 | instr = __opcode_thumb32_compose(instr, instr2); |
422 | } | 427 | } |
423 | } else if (get_user(instr, (u32 __user *)pc)) { | 428 | } else if (get_user(instr, (u32 __user *)pc)) { |
429 | instr = __mem_to_opcode_arm(instr); | ||
424 | goto die_sig; | 430 | goto die_sig; |
425 | } | 431 | } |
426 | 432 | ||
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 9c697db2787e..aea7ccb8d397 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c | |||
@@ -65,7 +65,7 @@ static bool vgic_present; | |||
65 | static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu) | 65 | static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu) |
66 | { | 66 | { |
67 | BUG_ON(preemptible()); | 67 | BUG_ON(preemptible()); |
68 | __get_cpu_var(kvm_arm_running_vcpu) = vcpu; | 68 | __this_cpu_write(kvm_arm_running_vcpu, vcpu); |
69 | } | 69 | } |
70 | 70 | ||
71 | /** | 71 | /** |
@@ -75,7 +75,7 @@ static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu) | |||
75 | struct kvm_vcpu *kvm_arm_get_running_vcpu(void) | 75 | struct kvm_vcpu *kvm_arm_get_running_vcpu(void) |
76 | { | 76 | { |
77 | BUG_ON(preemptible()); | 77 | BUG_ON(preemptible()); |
78 | return __get_cpu_var(kvm_arm_running_vcpu); | 78 | return __this_cpu_read(kvm_arm_running_vcpu); |
79 | } | 79 | } |
80 | 80 | ||
81 | /** | 81 | /** |
@@ -815,7 +815,7 @@ static void cpu_init_hyp_mode(void *dummy) | |||
815 | 815 | ||
816 | boot_pgd_ptr = kvm_mmu_get_boot_httbr(); | 816 | boot_pgd_ptr = kvm_mmu_get_boot_httbr(); |
817 | pgd_ptr = kvm_mmu_get_httbr(); | 817 | pgd_ptr = kvm_mmu_get_httbr(); |
818 | stack_page = __get_cpu_var(kvm_arm_hyp_stack_page); | 818 | stack_page = __this_cpu_read(kvm_arm_hyp_stack_page); |
819 | hyp_stack_ptr = stack_page + PAGE_SIZE; | 819 | hyp_stack_ptr = stack_page + PAGE_SIZE; |
820 | vector_ptr = (unsigned long)__kvm_hyp_vector; | 820 | vector_ptr = (unsigned long)__kvm_hyp_vector; |
821 | 821 | ||
diff --git a/arch/arm/lib/bitops.h b/arch/arm/lib/bitops.h index d6408d1ee543..e0c68d5bb7dc 100644 --- a/arch/arm/lib/bitops.h +++ b/arch/arm/lib/bitops.h | |||
@@ -10,6 +10,11 @@ UNWIND( .fnstart ) | |||
10 | and r3, r0, #31 @ Get bit offset | 10 | and r3, r0, #31 @ Get bit offset |
11 | mov r0, r0, lsr #5 | 11 | mov r0, r0, lsr #5 |
12 | add r1, r1, r0, lsl #2 @ Get word offset | 12 | add r1, r1, r0, lsl #2 @ Get word offset |
13 | #if __LINUX_ARM_ARCH__ >= 7 | ||
14 | .arch_extension mp | ||
15 | ALT_SMP(W(pldw) [r1]) | ||
16 | ALT_UP(W(nop)) | ||
17 | #endif | ||
13 | mov r3, r2, lsl r3 | 18 | mov r3, r2, lsl r3 |
14 | 1: ldrex r2, [r1] | 19 | 1: ldrex r2, [r1] |
15 | \instr r2, r2, r3 | 20 | \instr r2, r2, r3 |
diff --git a/arch/arm/lib/uaccess_with_memcpy.c b/arch/arm/lib/uaccess_with_memcpy.c index 025f742dd4df..3e58d710013c 100644 --- a/arch/arm/lib/uaccess_with_memcpy.c +++ b/arch/arm/lib/uaccess_with_memcpy.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <linux/hardirq.h> /* for in_atomic() */ | 18 | #include <linux/hardirq.h> /* for in_atomic() */ |
19 | #include <linux/gfp.h> | 19 | #include <linux/gfp.h> |
20 | #include <linux/highmem.h> | 20 | #include <linux/highmem.h> |
21 | #include <linux/hugetlb.h> | ||
21 | #include <asm/current.h> | 22 | #include <asm/current.h> |
22 | #include <asm/page.h> | 23 | #include <asm/page.h> |
23 | 24 | ||
@@ -40,7 +41,35 @@ pin_page_for_write(const void __user *_addr, pte_t **ptep, spinlock_t **ptlp) | |||
40 | return 0; | 41 | return 0; |
41 | 42 | ||
42 | pmd = pmd_offset(pud, addr); | 43 | pmd = pmd_offset(pud, addr); |
43 | if (unlikely(pmd_none(*pmd) || pmd_bad(*pmd))) | 44 | if (unlikely(pmd_none(*pmd))) |
45 | return 0; | ||
46 | |||
47 | /* | ||
48 | * A pmd can be bad if it refers to a HugeTLB or THP page. | ||
49 | * | ||
50 | * Both THP and HugeTLB pages have the same pmd layout | ||
51 | * and should not be manipulated by the pte functions. | ||
52 | * | ||
53 | * Lock the page table for the destination and check | ||
54 | * to see that it's still huge and whether or not we will | ||
55 | * need to fault on write, or if we have a splitting THP. | ||
56 | */ | ||
57 | if (unlikely(pmd_thp_or_huge(*pmd))) { | ||
58 | ptl = ¤t->mm->page_table_lock; | ||
59 | spin_lock(ptl); | ||
60 | if (unlikely(!pmd_thp_or_huge(*pmd) | ||
61 | || pmd_hugewillfault(*pmd) | ||
62 | || pmd_trans_splitting(*pmd))) { | ||
63 | spin_unlock(ptl); | ||
64 | return 0; | ||
65 | } | ||
66 | |||
67 | *ptep = NULL; | ||
68 | *ptlp = ptl; | ||
69 | return 1; | ||
70 | } | ||
71 | |||
72 | if (unlikely(pmd_bad(*pmd))) | ||
44 | return 0; | 73 | return 0; |
45 | 74 | ||
46 | pte = pte_offset_map_lock(current->mm, pmd, addr, &ptl); | 75 | pte = pte_offset_map_lock(current->mm, pmd, addr, &ptl); |
@@ -94,7 +123,10 @@ __copy_to_user_memcpy(void __user *to, const void *from, unsigned long n) | |||
94 | from += tocopy; | 123 | from += tocopy; |
95 | n -= tocopy; | 124 | n -= tocopy; |
96 | 125 | ||
97 | pte_unmap_unlock(pte, ptl); | 126 | if (pte) |
127 | pte_unmap_unlock(pte, ptl); | ||
128 | else | ||
129 | spin_unlock(ptl); | ||
98 | } | 130 | } |
99 | if (!atomic) | 131 | if (!atomic) |
100 | up_read(¤t->mm->mmap_sem); | 132 | up_read(¤t->mm->mmap_sem); |
@@ -147,7 +179,10 @@ __clear_user_memset(void __user *addr, unsigned long n) | |||
147 | addr += tocopy; | 179 | addr += tocopy; |
148 | n -= tocopy; | 180 | n -= tocopy; |
149 | 181 | ||
150 | pte_unmap_unlock(pte, ptl); | 182 | if (pte) |
183 | pte_unmap_unlock(pte, ptl); | ||
184 | else | ||
185 | spin_unlock(ptl); | ||
151 | } | 186 | } |
152 | up_read(¤t->mm->mmap_sem); | 187 | up_read(¤t->mm->mmap_sem); |
153 | 188 | ||
diff --git a/arch/arm/mach-footbridge/netwinder-hw.c b/arch/arm/mach-footbridge/netwinder-hw.c index 1fd2cf097e30..eb1fa5c84723 100644 --- a/arch/arm/mach-footbridge/netwinder-hw.c +++ b/arch/arm/mach-footbridge/netwinder-hw.c | |||
@@ -692,14 +692,14 @@ static void netwinder_led_set(struct led_classdev *cdev, | |||
692 | unsigned long flags; | 692 | unsigned long flags; |
693 | u32 reg; | 693 | u32 reg; |
694 | 694 | ||
695 | spin_lock_irqsave(&nw_gpio_lock, flags); | 695 | raw_spin_lock_irqsave(&nw_gpio_lock, flags); |
696 | reg = nw_gpio_read(); | 696 | reg = nw_gpio_read(); |
697 | if (b != LED_OFF) | 697 | if (b != LED_OFF) |
698 | reg &= ~led->mask; | 698 | reg &= ~led->mask; |
699 | else | 699 | else |
700 | reg |= led->mask; | 700 | reg |= led->mask; |
701 | nw_gpio_modify_op(led->mask, reg); | 701 | nw_gpio_modify_op(led->mask, reg); |
702 | spin_unlock_irqrestore(&nw_gpio_lock, flags); | 702 | raw_spin_unlock_irqrestore(&nw_gpio_lock, flags); |
703 | } | 703 | } |
704 | 704 | ||
705 | static enum led_brightness netwinder_led_get(struct led_classdev *cdev) | 705 | static enum led_brightness netwinder_led_get(struct led_classdev *cdev) |
@@ -709,9 +709,9 @@ static enum led_brightness netwinder_led_get(struct led_classdev *cdev) | |||
709 | unsigned long flags; | 709 | unsigned long flags; |
710 | u32 reg; | 710 | u32 reg; |
711 | 711 | ||
712 | spin_lock_irqsave(&nw_gpio_lock, flags); | 712 | raw_spin_lock_irqsave(&nw_gpio_lock, flags); |
713 | reg = nw_gpio_read(); | 713 | reg = nw_gpio_read(); |
714 | spin_unlock_irqrestore(&nw_gpio_lock, flags); | 714 | raw_spin_unlock_irqrestore(&nw_gpio_lock, flags); |
715 | 715 | ||
716 | return (reg & led->mask) ? LED_OFF : LED_FULL; | 716 | return (reg & led->mask) ? LED_OFF : LED_FULL; |
717 | } | 717 | } |
diff --git a/arch/arm/mach-highbank/Kconfig b/arch/arm/mach-highbank/Kconfig index fe98df44579c..08332d841440 100644 --- a/arch/arm/mach-highbank/Kconfig +++ b/arch/arm/mach-highbank/Kconfig | |||
@@ -4,11 +4,12 @@ config ARCH_HIGHBANK | |||
4 | select ARCH_HAS_CPUFREQ | 4 | select ARCH_HAS_CPUFREQ |
5 | select ARCH_HAS_HOLES_MEMORYMODEL | 5 | select ARCH_HAS_HOLES_MEMORYMODEL |
6 | select ARCH_HAS_OPP | 6 | select ARCH_HAS_OPP |
7 | select ARCH_SUPPORTS_BIG_ENDIAN | ||
7 | select ARCH_WANT_OPTIONAL_GPIOLIB | 8 | select ARCH_WANT_OPTIONAL_GPIOLIB |
8 | select ARM_AMBA | 9 | select ARM_AMBA |
9 | select ARM_ERRATA_764369 | 10 | select ARM_ERRATA_764369 |
10 | select ARM_ERRATA_775420 | 11 | select ARM_ERRATA_775420 |
11 | select ARM_ERRATA_798181 | 12 | select ARM_ERRATA_798181 if SMP |
12 | select ARM_GIC | 13 | select ARM_GIC |
13 | select ARM_PSCI | 14 | select ARM_PSCI |
14 | select ARM_TIMER_SP804 | 15 | select ARM_TIMER_SP804 |
diff --git a/arch/arm/mach-ixp4xx/Kconfig b/arch/arm/mach-ixp4xx/Kconfig index 30e1ebe3a891..c342dc4e8a45 100644 --- a/arch/arm/mach-ixp4xx/Kconfig +++ b/arch/arm/mach-ixp4xx/Kconfig | |||
@@ -1,9 +1,5 @@ | |||
1 | if ARCH_IXP4XX | 1 | if ARCH_IXP4XX |
2 | 2 | ||
3 | config ARCH_SUPPORTS_BIG_ENDIAN | ||
4 | bool | ||
5 | default y | ||
6 | |||
7 | menu "Intel IXP4xx Implementation Options" | 3 | menu "Intel IXP4xx Implementation Options" |
8 | 4 | ||
9 | comment "IXP4xx Platforms" | 5 | comment "IXP4xx Platforms" |
diff --git a/arch/arm/mach-mvebu/Kconfig b/arch/arm/mach-mvebu/Kconfig index 9eb63d724602..5e269d7263ce 100644 --- a/arch/arm/mach-mvebu/Kconfig +++ b/arch/arm/mach-mvebu/Kconfig | |||
@@ -1,5 +1,6 @@ | |||
1 | config ARCH_MVEBU | 1 | config ARCH_MVEBU |
2 | bool "Marvell SOCs with Device Tree support" if ARCH_MULTI_V7 | 2 | bool "Marvell SOCs with Device Tree support" if ARCH_MULTI_V7 |
3 | select ARCH_SUPPORTS_BIG_ENDIAN | ||
3 | select CLKSRC_MMIO | 4 | select CLKSRC_MMIO |
4 | select COMMON_CLK | 5 | select COMMON_CLK |
5 | select GENERIC_CLOCKEVENTS | 6 | select GENERIC_CLOCKEVENTS |
diff --git a/arch/arm/mach-mvebu/coherency_ll.S b/arch/arm/mach-mvebu/coherency_ll.S index 5476669ba905..ee7598fe75db 100644 --- a/arch/arm/mach-mvebu/coherency_ll.S +++ b/arch/arm/mach-mvebu/coherency_ll.S | |||
@@ -20,6 +20,8 @@ | |||
20 | #define ARMADA_XP_CFB_CTL_REG_OFFSET 0x0 | 20 | #define ARMADA_XP_CFB_CTL_REG_OFFSET 0x0 |
21 | #define ARMADA_XP_CFB_CFG_REG_OFFSET 0x4 | 21 | #define ARMADA_XP_CFB_CFG_REG_OFFSET 0x4 |
22 | 22 | ||
23 | #include <asm/assembler.h> | ||
24 | |||
23 | .text | 25 | .text |
24 | /* | 26 | /* |
25 | * r0: Coherency fabric base register address | 27 | * r0: Coherency fabric base register address |
@@ -29,6 +31,7 @@ ENTRY(ll_set_cpu_coherent) | |||
29 | /* Create bit by cpu index */ | 31 | /* Create bit by cpu index */ |
30 | mov r3, #(1 << 24) | 32 | mov r3, #(1 << 24) |
31 | lsl r1, r3, r1 | 33 | lsl r1, r3, r1 |
34 | ARM_BE8(rev r1, r1) | ||
32 | 35 | ||
33 | /* Add CPU to SMP group - Atomic */ | 36 | /* Add CPU to SMP group - Atomic */ |
34 | add r3, r0, #ARMADA_XP_CFB_CTL_REG_OFFSET | 37 | add r3, r0, #ARMADA_XP_CFB_CTL_REG_OFFSET |
diff --git a/arch/arm/mach-mvebu/headsmp.S b/arch/arm/mach-mvebu/headsmp.S index 8a1b0c96e9ec..3dd80df428f7 100644 --- a/arch/arm/mach-mvebu/headsmp.S +++ b/arch/arm/mach-mvebu/headsmp.S | |||
@@ -21,12 +21,16 @@ | |||
21 | #include <linux/linkage.h> | 21 | #include <linux/linkage.h> |
22 | #include <linux/init.h> | 22 | #include <linux/init.h> |
23 | 23 | ||
24 | #include <asm/assembler.h> | ||
25 | |||
24 | /* | 26 | /* |
25 | * Armada XP specific entry point for secondary CPUs. | 27 | * Armada XP specific entry point for secondary CPUs. |
26 | * We add the CPU to the coherency fabric and then jump to secondary | 28 | * We add the CPU to the coherency fabric and then jump to secondary |
27 | * startup | 29 | * startup |
28 | */ | 30 | */ |
29 | ENTRY(armada_xp_secondary_startup) | 31 | ENTRY(armada_xp_secondary_startup) |
32 | ARM_BE8(setend be ) @ go BE8 if entered LE | ||
33 | |||
30 | /* Get coherency fabric base physical address */ | 34 | /* Get coherency fabric base physical address */ |
31 | adr r0, 1f | 35 | adr r0, 1f |
32 | ldr r1, [r0] | 36 | ldr r1, [r0] |
diff --git a/arch/arm/mach-sa1100/assabet.c b/arch/arm/mach-sa1100/assabet.c index e838ba27e443..c9808c684152 100644 --- a/arch/arm/mach-sa1100/assabet.c +++ b/arch/arm/mach-sa1100/assabet.c | |||
@@ -512,6 +512,9 @@ static void __init assabet_map_io(void) | |||
512 | * Its called GPCLKR0 in my SA1110 manual. | 512 | * Its called GPCLKR0 in my SA1110 manual. |
513 | */ | 513 | */ |
514 | Ser1SDCR0 |= SDCR0_SUS; | 514 | Ser1SDCR0 |= SDCR0_SUS; |
515 | MSC1 = (MSC1 & ~0xffff) | | ||
516 | MSC_NonBrst | MSC_32BitStMem | | ||
517 | MSC_RdAcc(2) | MSC_WrAcc(2) | MSC_Rec(0); | ||
515 | 518 | ||
516 | if (!machine_has_neponset()) | 519 | if (!machine_has_neponset()) |
517 | sa1100_register_uart_fns(&assabet_port_fns); | 520 | sa1100_register_uart_fns(&assabet_port_fns); |
diff --git a/arch/arm/mach-sa1100/include/mach/gpio.h b/arch/arm/mach-sa1100/include/mach/gpio.h deleted file mode 100644 index 6a9eecf3137e..000000000000 --- a/arch/arm/mach-sa1100/include/mach/gpio.h +++ /dev/null | |||
@@ -1,55 +0,0 @@ | |||
1 | /* | ||
2 | * arch/arm/mach-sa1100/include/mach/gpio.h | ||
3 | * | ||
4 | * SA1100 GPIO wrappers for arch-neutral GPIO calls | ||
5 | * | ||
6 | * Written by Philipp Zabel <philipp.zabel@gmail.com> | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify | ||
9 | * it under the terms of the GNU General Public License as published by | ||
10 | * the Free Software Foundation; either version 2 of the License, or | ||
11 | * (at your option) any later version. | ||
12 | * | ||
13 | * This program is distributed in the hope that it will be useful, | ||
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
16 | * GNU General Public License for more details. | ||
17 | * | ||
18 | * You should have received a copy of the GNU General Public License | ||
19 | * along with this program; if not, write to the Free Software | ||
20 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
21 | * | ||
22 | */ | ||
23 | |||
24 | #ifndef __ASM_ARCH_SA1100_GPIO_H | ||
25 | #define __ASM_ARCH_SA1100_GPIO_H | ||
26 | |||
27 | #include <linux/io.h> | ||
28 | #include <mach/hardware.h> | ||
29 | #include <asm/irq.h> | ||
30 | #include <asm-generic/gpio.h> | ||
31 | |||
32 | #define __ARM_GPIOLIB_COMPLEX | ||
33 | |||
34 | static inline int gpio_get_value(unsigned gpio) | ||
35 | { | ||
36 | if (__builtin_constant_p(gpio) && (gpio <= GPIO_MAX)) | ||
37 | return GPLR & GPIO_GPIO(gpio); | ||
38 | else | ||
39 | return __gpio_get_value(gpio); | ||
40 | } | ||
41 | |||
42 | static inline void gpio_set_value(unsigned gpio, int value) | ||
43 | { | ||
44 | if (__builtin_constant_p(gpio) && (gpio <= GPIO_MAX)) | ||
45 | if (value) | ||
46 | GPSR = GPIO_GPIO(gpio); | ||
47 | else | ||
48 | GPCR = GPIO_GPIO(gpio); | ||
49 | else | ||
50 | __gpio_set_value(gpio, value); | ||
51 | } | ||
52 | |||
53 | #define gpio_cansleep __gpio_cansleep | ||
54 | |||
55 | #endif | ||
diff --git a/arch/arm/mach-sa1100/include/mach/h3xxx.h b/arch/arm/mach-sa1100/include/mach/h3xxx.h index 7d9df16f04a2..c810620db53d 100644 --- a/arch/arm/mach-sa1100/include/mach/h3xxx.h +++ b/arch/arm/mach-sa1100/include/mach/h3xxx.h | |||
@@ -13,6 +13,8 @@ | |||
13 | #ifndef _INCLUDE_H3XXX_H_ | 13 | #ifndef _INCLUDE_H3XXX_H_ |
14 | #define _INCLUDE_H3XXX_H_ | 14 | #define _INCLUDE_H3XXX_H_ |
15 | 15 | ||
16 | #include "hardware.h" /* Gives GPIO_MAX */ | ||
17 | |||
16 | /* Physical memory regions corresponding to chip selects */ | 18 | /* Physical memory regions corresponding to chip selects */ |
17 | #define H3600_EGPIO_PHYS (SA1100_CS5_PHYS + 0x01000000) | 19 | #define H3600_EGPIO_PHYS (SA1100_CS5_PHYS + 0x01000000) |
18 | #define H3600_BANK_2_PHYS SA1100_CS2_PHYS | 20 | #define H3600_BANK_2_PHYS SA1100_CS2_PHYS |
diff --git a/arch/arm/mach-sa1100/simpad.c b/arch/arm/mach-sa1100/simpad.c index bcbc94540e45..41e476e571d7 100644 --- a/arch/arm/mach-sa1100/simpad.c +++ b/arch/arm/mach-sa1100/simpad.c | |||
@@ -19,6 +19,7 @@ | |||
19 | 19 | ||
20 | #include <mach/hardware.h> | 20 | #include <mach/hardware.h> |
21 | #include <asm/setup.h> | 21 | #include <asm/setup.h> |
22 | #include <asm/irq.h> | ||
22 | 23 | ||
23 | #include <asm/mach-types.h> | 24 | #include <asm/mach-types.h> |
24 | #include <asm/mach/arch.h> | 25 | #include <asm/mach/arch.h> |
diff --git a/arch/arm/mach-tegra/Kconfig b/arch/arm/mach-tegra/Kconfig index 0bf04a0bca9d..09e740f58b27 100644 --- a/arch/arm/mach-tegra/Kconfig +++ b/arch/arm/mach-tegra/Kconfig | |||
@@ -51,7 +51,7 @@ config ARCH_TEGRA_3x_SOC | |||
51 | 51 | ||
52 | config ARCH_TEGRA_114_SOC | 52 | config ARCH_TEGRA_114_SOC |
53 | bool "Enable support for Tegra114 family" | 53 | bool "Enable support for Tegra114 family" |
54 | select ARM_ERRATA_798181 | 54 | select ARM_ERRATA_798181 if SMP |
55 | select ARM_L1_CACHE_SHIFT_6 | 55 | select ARM_L1_CACHE_SHIFT_6 |
56 | select HAVE_ARM_ARCH_TIMER | 56 | select HAVE_ARM_ARCH_TIMER |
57 | select PINCTRL_TEGRA114 | 57 | select PINCTRL_TEGRA114 |
diff --git a/arch/arm/mach-vexpress/Kconfig b/arch/arm/mach-vexpress/Kconfig index d7e7422527ca..cbbb81e0e509 100644 --- a/arch/arm/mach-vexpress/Kconfig +++ b/arch/arm/mach-vexpress/Kconfig | |||
@@ -1,6 +1,7 @@ | |||
1 | config ARCH_VEXPRESS | 1 | config ARCH_VEXPRESS |
2 | bool "ARM Ltd. Versatile Express family" if ARCH_MULTI_V7 | 2 | bool "ARM Ltd. Versatile Express family" if ARCH_MULTI_V7 |
3 | select ARCH_REQUIRE_GPIOLIB | 3 | select ARCH_REQUIRE_GPIOLIB |
4 | select ARCH_SUPPORTS_BIG_ENDIAN | ||
4 | select ARM_AMBA | 5 | select ARM_AMBA |
5 | select ARM_GIC | 6 | select ARM_GIC |
6 | select ARM_TIMER_SP804 | 7 | select ARM_TIMER_SP804 |
diff --git a/arch/arm/mach-vexpress/dcscb.c b/arch/arm/mach-vexpress/dcscb.c index 3a6384c6c435..14d499688736 100644 --- a/arch/arm/mach-vexpress/dcscb.c +++ b/arch/arm/mach-vexpress/dcscb.c | |||
@@ -133,38 +133,8 @@ static void dcscb_power_down(void) | |||
133 | if (last_man && __mcpm_outbound_enter_critical(cpu, cluster)) { | 133 | if (last_man && __mcpm_outbound_enter_critical(cpu, cluster)) { |
134 | arch_spin_unlock(&dcscb_lock); | 134 | arch_spin_unlock(&dcscb_lock); |
135 | 135 | ||
136 | /* | 136 | /* Flush all cache levels for this cluster. */ |
137 | * Flush all cache levels for this cluster. | 137 | v7_exit_coherency_flush(all); |
138 | * | ||
139 | * To do so we do: | ||
140 | * - Clear the SCTLR.C bit to prevent further cache allocations | ||
141 | * - Flush the whole cache | ||
142 | * - Clear the ACTLR "SMP" bit to disable local coherency | ||
143 | * | ||
144 | * Let's do it in the safest possible way i.e. with | ||
145 | * no memory access within the following sequence | ||
146 | * including to the stack. | ||
147 | * | ||
148 | * Note: fp is preserved to the stack explicitly prior doing | ||
149 | * this since adding it to the clobber list is incompatible | ||
150 | * with having CONFIG_FRAME_POINTER=y. | ||
151 | */ | ||
152 | asm volatile( | ||
153 | "str fp, [sp, #-4]! \n\t" | ||
154 | "mrc p15, 0, r0, c1, c0, 0 @ get CR \n\t" | ||
155 | "bic r0, r0, #"__stringify(CR_C)" \n\t" | ||
156 | "mcr p15, 0, r0, c1, c0, 0 @ set CR \n\t" | ||
157 | "isb \n\t" | ||
158 | "bl v7_flush_dcache_all \n\t" | ||
159 | "clrex \n\t" | ||
160 | "mrc p15, 0, r0, c1, c0, 1 @ get AUXCR \n\t" | ||
161 | "bic r0, r0, #(1 << 6) @ disable local coherency \n\t" | ||
162 | "mcr p15, 0, r0, c1, c0, 1 @ set AUXCR \n\t" | ||
163 | "isb \n\t" | ||
164 | "dsb \n\t" | ||
165 | "ldr fp, [sp], #4" | ||
166 | : : : "r0","r1","r2","r3","r4","r5","r6","r7", | ||
167 | "r9","r10","lr","memory"); | ||
168 | 138 | ||
169 | /* | 139 | /* |
170 | * This is a harmless no-op. On platforms with a real | 140 | * This is a harmless no-op. On platforms with a real |
@@ -183,26 +153,8 @@ static void dcscb_power_down(void) | |||
183 | } else { | 153 | } else { |
184 | arch_spin_unlock(&dcscb_lock); | 154 | arch_spin_unlock(&dcscb_lock); |
185 | 155 | ||
186 | /* | 156 | /* Disable and flush the local CPU cache. */ |
187 | * Flush the local CPU cache. | 157 | v7_exit_coherency_flush(louis); |
188 | * Let's do it in the safest possible way as above. | ||
189 | */ | ||
190 | asm volatile( | ||
191 | "str fp, [sp, #-4]! \n\t" | ||
192 | "mrc p15, 0, r0, c1, c0, 0 @ get CR \n\t" | ||
193 | "bic r0, r0, #"__stringify(CR_C)" \n\t" | ||
194 | "mcr p15, 0, r0, c1, c0, 0 @ set CR \n\t" | ||
195 | "isb \n\t" | ||
196 | "bl v7_flush_dcache_louis \n\t" | ||
197 | "clrex \n\t" | ||
198 | "mrc p15, 0, r0, c1, c0, 1 @ get AUXCR \n\t" | ||
199 | "bic r0, r0, #(1 << 6) @ disable local coherency \n\t" | ||
200 | "mcr p15, 0, r0, c1, c0, 1 @ set AUXCR \n\t" | ||
201 | "isb \n\t" | ||
202 | "dsb \n\t" | ||
203 | "ldr fp, [sp], #4" | ||
204 | : : : "r0","r1","r2","r3","r4","r5","r6","r7", | ||
205 | "r9","r10","lr","memory"); | ||
206 | } | 158 | } |
207 | 159 | ||
208 | __mcpm_cpu_down(cpu, cluster); | 160 | __mcpm_cpu_down(cpu, cluster); |
diff --git a/arch/arm/mach-vexpress/tc2_pm.c b/arch/arm/mach-vexpress/tc2_pm.c index e6eb48192912..4eb92ebfd953 100644 --- a/arch/arm/mach-vexpress/tc2_pm.c +++ b/arch/arm/mach-vexpress/tc2_pm.c | |||
@@ -156,32 +156,7 @@ static void tc2_pm_down(u64 residency) | |||
156 | : : "r" (0x400) ); | 156 | : : "r" (0x400) ); |
157 | } | 157 | } |
158 | 158 | ||
159 | /* | 159 | v7_exit_coherency_flush(all); |
160 | * We need to disable and flush the whole (L1 and L2) cache. | ||
161 | * Let's do it in the safest possible way i.e. with | ||
162 | * no memory access within the following sequence | ||
163 | * including the stack. | ||
164 | * | ||
165 | * Note: fp is preserved to the stack explicitly prior doing | ||
166 | * this since adding it to the clobber list is incompatible | ||
167 | * with having CONFIG_FRAME_POINTER=y. | ||
168 | */ | ||
169 | asm volatile( | ||
170 | "str fp, [sp, #-4]! \n\t" | ||
171 | "mrc p15, 0, r0, c1, c0, 0 @ get CR \n\t" | ||
172 | "bic r0, r0, #"__stringify(CR_C)" \n\t" | ||
173 | "mcr p15, 0, r0, c1, c0, 0 @ set CR \n\t" | ||
174 | "isb \n\t" | ||
175 | "bl v7_flush_dcache_all \n\t" | ||
176 | "clrex \n\t" | ||
177 | "mrc p15, 0, r0, c1, c0, 1 @ get AUXCR \n\t" | ||
178 | "bic r0, r0, #(1 << 6) @ disable local coherency \n\t" | ||
179 | "mcr p15, 0, r0, c1, c0, 1 @ set AUXCR \n\t" | ||
180 | "isb \n\t" | ||
181 | "dsb \n\t" | ||
182 | "ldr fp, [sp], #4" | ||
183 | : : : "r0","r1","r2","r3","r4","r5","r6","r7", | ||
184 | "r9","r10","lr","memory"); | ||
185 | 160 | ||
186 | cci_disable_port_by_cpu(mpidr); | 161 | cci_disable_port_by_cpu(mpidr); |
187 | 162 | ||
@@ -197,26 +172,7 @@ static void tc2_pm_down(u64 residency) | |||
197 | 172 | ||
198 | arch_spin_unlock(&tc2_pm_lock); | 173 | arch_spin_unlock(&tc2_pm_lock); |
199 | 174 | ||
200 | /* | 175 | v7_exit_coherency_flush(louis); |
201 | * We need to disable and flush only the L1 cache. | ||
202 | * Let's do it in the safest possible way as above. | ||
203 | */ | ||
204 | asm volatile( | ||
205 | "str fp, [sp, #-4]! \n\t" | ||
206 | "mrc p15, 0, r0, c1, c0, 0 @ get CR \n\t" | ||
207 | "bic r0, r0, #"__stringify(CR_C)" \n\t" | ||
208 | "mcr p15, 0, r0, c1, c0, 0 @ set CR \n\t" | ||
209 | "isb \n\t" | ||
210 | "bl v7_flush_dcache_louis \n\t" | ||
211 | "clrex \n\t" | ||
212 | "mrc p15, 0, r0, c1, c0, 1 @ get AUXCR \n\t" | ||
213 | "bic r0, r0, #(1 << 6) @ disable local coherency \n\t" | ||
214 | "mcr p15, 0, r0, c1, c0, 1 @ set AUXCR \n\t" | ||
215 | "isb \n\t" | ||
216 | "dsb \n\t" | ||
217 | "ldr fp, [sp], #4" | ||
218 | : : : "r0","r1","r2","r3","r4","r5","r6","r7", | ||
219 | "r9","r10","lr","memory"); | ||
220 | } | 176 | } |
221 | 177 | ||
222 | __mcpm_cpu_down(cpu, cluster); | 178 | __mcpm_cpu_down(cpu, cluster); |
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index cd2c88e7a8f7..1f8fed94c2a4 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig | |||
@@ -952,3 +952,9 @@ config ARCH_HAS_BARRIERS | |||
952 | help | 952 | help |
953 | This option allows the use of custom mandatory barriers | 953 | This option allows the use of custom mandatory barriers |
954 | included via the mach/barriers.h file. | 954 | included via the mach/barriers.h file. |
955 | |||
956 | config ARCH_SUPPORTS_BIG_ENDIAN | ||
957 | bool | ||
958 | help | ||
959 | This option specifies the architecture can support big endian | ||
960 | operation. | ||
diff --git a/arch/arm/mm/abort-ev6.S b/arch/arm/mm/abort-ev6.S index 80741992a9fc..3815a8262af0 100644 --- a/arch/arm/mm/abort-ev6.S +++ b/arch/arm/mm/abort-ev6.S | |||
@@ -38,9 +38,8 @@ ENTRY(v6_early_abort) | |||
38 | bne do_DataAbort | 38 | bne do_DataAbort |
39 | bic r1, r1, #1 << 11 @ clear bit 11 of FSR | 39 | bic r1, r1, #1 << 11 @ clear bit 11 of FSR |
40 | ldr r3, [r4] @ read aborted ARM instruction | 40 | ldr r3, [r4] @ read aborted ARM instruction |
41 | #ifdef CONFIG_CPU_ENDIAN_BE8 | 41 | ARM_BE8(rev r3, r3) |
42 | rev r3, r3 | 42 | |
43 | #endif | ||
44 | do_ldrd_abort tmp=ip, insn=r3 | 43 | do_ldrd_abort tmp=ip, insn=r3 |
45 | tst r3, #1 << 20 @ L = 0 -> write | 44 | tst r3, #1 << 20 @ L = 0 -> write |
46 | orreq r1, r1, #1 << 11 @ yes. | 45 | orreq r1, r1, #1 << 11 @ yes. |
diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c index 6f4585b89078..924036473b16 100644 --- a/arch/arm/mm/alignment.c +++ b/arch/arm/mm/alignment.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #include <asm/cp15.h> | 25 | #include <asm/cp15.h> |
26 | #include <asm/system_info.h> | 26 | #include <asm/system_info.h> |
27 | #include <asm/unaligned.h> | 27 | #include <asm/unaligned.h> |
28 | #include <asm/opcodes.h> | ||
28 | 29 | ||
29 | #include "fault.h" | 30 | #include "fault.h" |
30 | 31 | ||
@@ -762,21 +763,25 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs) | |||
762 | if (thumb_mode(regs)) { | 763 | if (thumb_mode(regs)) { |
763 | u16 *ptr = (u16 *)(instrptr & ~1); | 764 | u16 *ptr = (u16 *)(instrptr & ~1); |
764 | fault = probe_kernel_address(ptr, tinstr); | 765 | fault = probe_kernel_address(ptr, tinstr); |
766 | tinstr = __mem_to_opcode_thumb16(tinstr); | ||
765 | if (!fault) { | 767 | if (!fault) { |
766 | if (cpu_architecture() >= CPU_ARCH_ARMv7 && | 768 | if (cpu_architecture() >= CPU_ARCH_ARMv7 && |
767 | IS_T32(tinstr)) { | 769 | IS_T32(tinstr)) { |
768 | /* Thumb-2 32-bit */ | 770 | /* Thumb-2 32-bit */ |
769 | u16 tinst2 = 0; | 771 | u16 tinst2 = 0; |
770 | fault = probe_kernel_address(ptr + 1, tinst2); | 772 | fault = probe_kernel_address(ptr + 1, tinst2); |
771 | instr = (tinstr << 16) | tinst2; | 773 | tinst2 = __mem_to_opcode_thumb16(tinst2); |
774 | instr = __opcode_thumb32_compose(tinstr, tinst2); | ||
772 | thumb2_32b = 1; | 775 | thumb2_32b = 1; |
773 | } else { | 776 | } else { |
774 | isize = 2; | 777 | isize = 2; |
775 | instr = thumb2arm(tinstr); | 778 | instr = thumb2arm(tinstr); |
776 | } | 779 | } |
777 | } | 780 | } |
778 | } else | 781 | } else { |
779 | fault = probe_kernel_address(instrptr, instr); | 782 | fault = probe_kernel_address(instrptr, instr); |
783 | instr = __mem_to_opcode_arm(instr); | ||
784 | } | ||
780 | 785 | ||
781 | if (fault) { | 786 | if (fault) { |
782 | type = TYPE_FAULT; | 787 | type = TYPE_FAULT; |
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 644d91f73b00..79f8b39801a8 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c | |||
@@ -707,7 +707,7 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, | |||
707 | void *arm_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, | 707 | void *arm_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, |
708 | gfp_t gfp, struct dma_attrs *attrs) | 708 | gfp_t gfp, struct dma_attrs *attrs) |
709 | { | 709 | { |
710 | pgprot_t prot = __get_dma_pgprot(attrs, pgprot_kernel); | 710 | pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL); |
711 | void *memory; | 711 | void *memory; |
712 | 712 | ||
713 | if (dma_alloc_from_coherent(dev, size, handle, &memory)) | 713 | if (dma_alloc_from_coherent(dev, size, handle, &memory)) |
@@ -720,7 +720,7 @@ void *arm_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, | |||
720 | static void *arm_coherent_dma_alloc(struct device *dev, size_t size, | 720 | static void *arm_coherent_dma_alloc(struct device *dev, size_t size, |
721 | dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs) | 721 | dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs) |
722 | { | 722 | { |
723 | pgprot_t prot = __get_dma_pgprot(attrs, pgprot_kernel); | 723 | pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL); |
724 | void *memory; | 724 | void *memory; |
725 | 725 | ||
726 | if (dma_alloc_from_coherent(dev, size, handle, &memory)) | 726 | if (dma_alloc_from_coherent(dev, size, handle, &memory)) |
diff --git a/arch/arm/mm/extable.c b/arch/arm/mm/extable.c index 9d285626bc7d..312e15e6d00b 100644 --- a/arch/arm/mm/extable.c +++ b/arch/arm/mm/extable.c | |||
@@ -9,8 +9,13 @@ int fixup_exception(struct pt_regs *regs) | |||
9 | const struct exception_table_entry *fixup; | 9 | const struct exception_table_entry *fixup; |
10 | 10 | ||
11 | fixup = search_exception_tables(instruction_pointer(regs)); | 11 | fixup = search_exception_tables(instruction_pointer(regs)); |
12 | if (fixup) | 12 | if (fixup) { |
13 | regs->ARM_pc = fixup->fixup; | 13 | regs->ARM_pc = fixup->fixup; |
14 | #ifdef CONFIG_THUMB2_KERNEL | ||
15 | /* Clear the IT state to avoid nasty surprises in the fixup */ | ||
16 | regs->ARM_cpsr &= ~PSR_IT_MASK; | ||
17 | #endif | ||
18 | } | ||
14 | 19 | ||
15 | return fixup != NULL; | 20 | return fixup != NULL; |
16 | } | 21 | } |
diff --git a/arch/arm/mm/idmap.c b/arch/arm/mm/idmap.c index 83cb3ac27095..8e0e52eb76b5 100644 --- a/arch/arm/mm/idmap.c +++ b/arch/arm/mm/idmap.c | |||
@@ -10,6 +10,7 @@ | |||
10 | #include <asm/system_info.h> | 10 | #include <asm/system_info.h> |
11 | 11 | ||
12 | pgd_t *idmap_pgd; | 12 | pgd_t *idmap_pgd; |
13 | phys_addr_t (*arch_virt_to_idmap) (unsigned long x); | ||
13 | 14 | ||
14 | #ifdef CONFIG_ARM_LPAE | 15 | #ifdef CONFIG_ARM_LPAE |
15 | static void idmap_add_pmd(pud_t *pud, unsigned long addr, unsigned long end, | 16 | static void idmap_add_pmd(pud_t *pud, unsigned long addr, unsigned long end, |
@@ -67,8 +68,9 @@ static void identity_mapping_add(pgd_t *pgd, const char *text_start, | |||
67 | unsigned long addr, end; | 68 | unsigned long addr, end; |
68 | unsigned long next; | 69 | unsigned long next; |
69 | 70 | ||
70 | addr = virt_to_phys(text_start); | 71 | addr = virt_to_idmap(text_start); |
71 | end = virt_to_phys(text_end); | 72 | end = virt_to_idmap(text_end); |
73 | pr_info("Setting up static identity map for 0x%lx - 0x%lx\n", addr, end); | ||
72 | 74 | ||
73 | prot |= PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_AF; | 75 | prot |= PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_AF; |
74 | 76 | ||
@@ -90,8 +92,6 @@ static int __init init_static_idmap(void) | |||
90 | if (!idmap_pgd) | 92 | if (!idmap_pgd) |
91 | return -ENOMEM; | 93 | return -ENOMEM; |
92 | 94 | ||
93 | pr_info("Setting up static identity map for 0x%p - 0x%p\n", | ||
94 | __idmap_text_start, __idmap_text_end); | ||
95 | identity_mapping_add(idmap_pgd, __idmap_text_start, | 95 | identity_mapping_add(idmap_pgd, __idmap_text_start, |
96 | __idmap_text_end, 0); | 96 | __idmap_text_end, 0); |
97 | 97 | ||
diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c index 0c6356255fe3..d27158c38eb0 100644 --- a/arch/arm/mm/mmap.c +++ b/arch/arm/mm/mmap.c | |||
@@ -202,13 +202,11 @@ int valid_phys_addr_range(phys_addr_t addr, size_t size) | |||
202 | } | 202 | } |
203 | 203 | ||
204 | /* | 204 | /* |
205 | * We don't use supersection mappings for mmap() on /dev/mem, which | 205 | * Do not allow /dev/mem mappings beyond the supported physical range. |
206 | * means that we can't map the memory area above the 4G barrier into | ||
207 | * userspace. | ||
208 | */ | 206 | */ |
209 | int valid_mmap_phys_addr_range(unsigned long pfn, size_t size) | 207 | int valid_mmap_phys_addr_range(unsigned long pfn, size_t size) |
210 | { | 208 | { |
211 | return !(pfn + (size >> PAGE_SHIFT) > 0x00100000); | 209 | return (pfn + (size >> PAGE_SHIFT)) <= (1 + (PHYS_MASK >> PAGE_SHIFT)); |
212 | } | 210 | } |
213 | 211 | ||
214 | #ifdef CONFIG_STRICT_DEVMEM | 212 | #ifdef CONFIG_STRICT_DEVMEM |
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index b1d17eeb59b8..78eeeca78f5a 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c | |||
@@ -28,6 +28,8 @@ | |||
28 | #include <asm/highmem.h> | 28 | #include <asm/highmem.h> |
29 | #include <asm/system_info.h> | 29 | #include <asm/system_info.h> |
30 | #include <asm/traps.h> | 30 | #include <asm/traps.h> |
31 | #include <asm/procinfo.h> | ||
32 | #include <asm/memory.h> | ||
31 | 33 | ||
32 | #include <asm/mach/arch.h> | 34 | #include <asm/mach/arch.h> |
33 | #include <asm/mach/map.h> | 35 | #include <asm/mach/map.h> |
@@ -1315,6 +1317,86 @@ static void __init map_lowmem(void) | |||
1315 | } | 1317 | } |
1316 | } | 1318 | } |
1317 | 1319 | ||
1320 | #ifdef CONFIG_ARM_LPAE | ||
1321 | /* | ||
1322 | * early_paging_init() recreates boot time page table setup, allowing machines | ||
1323 | * to switch over to a high (>4G) address space on LPAE systems | ||
1324 | */ | ||
1325 | void __init early_paging_init(const struct machine_desc *mdesc, | ||
1326 | struct proc_info_list *procinfo) | ||
1327 | { | ||
1328 | pmdval_t pmdprot = procinfo->__cpu_mm_mmu_flags; | ||
1329 | unsigned long map_start, map_end; | ||
1330 | pgd_t *pgd0, *pgdk; | ||
1331 | pud_t *pud0, *pudk, *pud_start; | ||
1332 | pmd_t *pmd0, *pmdk; | ||
1333 | phys_addr_t phys; | ||
1334 | int i; | ||
1335 | |||
1336 | if (!(mdesc->init_meminfo)) | ||
1337 | return; | ||
1338 | |||
1339 | /* remap kernel code and data */ | ||
1340 | map_start = init_mm.start_code; | ||
1341 | map_end = init_mm.brk; | ||
1342 | |||
1343 | /* get a handle on things... */ | ||
1344 | pgd0 = pgd_offset_k(0); | ||
1345 | pud_start = pud0 = pud_offset(pgd0, 0); | ||
1346 | pmd0 = pmd_offset(pud0, 0); | ||
1347 | |||
1348 | pgdk = pgd_offset_k(map_start); | ||
1349 | pudk = pud_offset(pgdk, map_start); | ||
1350 | pmdk = pmd_offset(pudk, map_start); | ||
1351 | |||
1352 | mdesc->init_meminfo(); | ||
1353 | |||
1354 | /* Run the patch stub to update the constants */ | ||
1355 | fixup_pv_table(&__pv_table_begin, | ||
1356 | (&__pv_table_end - &__pv_table_begin) << 2); | ||
1357 | |||
1358 | /* | ||
1359 | * Cache cleaning operations for self-modifying code | ||
1360 | * We should clean the entries by MVA but running a | ||
1361 | * for loop over every pv_table entry pointer would | ||
1362 | * just complicate the code. | ||
1363 | */ | ||
1364 | flush_cache_louis(); | ||
1365 | dsb(); | ||
1366 | isb(); | ||
1367 | |||
1368 | /* remap level 1 table */ | ||
1369 | for (i = 0; i < PTRS_PER_PGD; pud0++, i++) { | ||
1370 | set_pud(pud0, | ||
1371 | __pud(__pa(pmd0) | PMD_TYPE_TABLE | L_PGD_SWAPPER)); | ||
1372 | pmd0 += PTRS_PER_PMD; | ||
1373 | } | ||
1374 | |||
1375 | /* remap pmds for kernel mapping */ | ||
1376 | phys = __pa(map_start) & PMD_MASK; | ||
1377 | do { | ||
1378 | *pmdk++ = __pmd(phys | pmdprot); | ||
1379 | phys += PMD_SIZE; | ||
1380 | } while (phys < map_end); | ||
1381 | |||
1382 | flush_cache_all(); | ||
1383 | cpu_switch_mm(pgd0, &init_mm); | ||
1384 | cpu_set_ttbr(1, __pa(pgd0) + TTBR1_OFFSET); | ||
1385 | local_flush_bp_all(); | ||
1386 | local_flush_tlb_all(); | ||
1387 | } | ||
1388 | |||
1389 | #else | ||
1390 | |||
1391 | void __init early_paging_init(const struct machine_desc *mdesc, | ||
1392 | struct proc_info_list *procinfo) | ||
1393 | { | ||
1394 | if (mdesc->init_meminfo) | ||
1395 | mdesc->init_meminfo(); | ||
1396 | } | ||
1397 | |||
1398 | #endif | ||
1399 | |||
1318 | /* | 1400 | /* |
1319 | * paging_init() sets up the page tables, initialises the zone memory | 1401 | * paging_init() sets up the page tables, initialises the zone memory |
1320 | * maps, and sets up the zero page, bad page and bad page tables. | 1402 | * maps, and sets up the zero page, bad page and bad page tables. |
diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c index 34d4ab217bab..5c668b7a31f9 100644 --- a/arch/arm/mm/nommu.c +++ b/arch/arm/mm/nommu.c | |||
@@ -296,6 +296,15 @@ void __init sanity_check_meminfo(void) | |||
296 | } | 296 | } |
297 | 297 | ||
298 | /* | 298 | /* |
299 | * early_paging_init() recreates boot time page table setup, allowing machines | ||
300 | * to switch over to a high (>4G) address space on LPAE systems | ||
301 | */ | ||
302 | void __init early_paging_init(const struct machine_desc *mdesc, | ||
303 | struct proc_info_list *procinfo) | ||
304 | { | ||
305 | } | ||
306 | |||
307 | /* | ||
299 | * paging_init() sets up the page tables, initialises the zone memory | 308 | * paging_init() sets up the page tables, initialises the zone memory |
300 | * maps, and sets up the zero page, bad page and bad page tables. | 309 | * maps, and sets up the zero page, bad page and bad page tables. |
301 | */ | 310 | */ |
diff --git a/arch/arm/mm/proc-v6.S b/arch/arm/mm/proc-v6.S index 1128064fddcb..45dc29f85d56 100644 --- a/arch/arm/mm/proc-v6.S +++ b/arch/arm/mm/proc-v6.S | |||
@@ -220,9 +220,7 @@ __v6_setup: | |||
220 | #endif /* CONFIG_MMU */ | 220 | #endif /* CONFIG_MMU */ |
221 | adr r5, v6_crval | 221 | adr r5, v6_crval |
222 | ldmia r5, {r5, r6} | 222 | ldmia r5, {r5, r6} |
223 | #ifdef CONFIG_CPU_ENDIAN_BE8 | 223 | ARM_BE8(orr r6, r6, #1 << 25) @ big-endian page tables |
224 | orr r6, r6, #1 << 25 @ big-endian page tables | ||
225 | #endif | ||
226 | mrc p15, 0, r0, c1, c0, 0 @ read control register | 224 | mrc p15, 0, r0, c1, c0, 0 @ read control register |
227 | bic r0, r0, r5 @ clear bits them | 225 | bic r0, r0, r5 @ clear bits them |
228 | orr r0, r0, r6 @ set them | 226 | orr r0, r0, r6 @ set them |
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index c63d9bdee51e..60920f62fdf5 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S | |||
@@ -367,9 +367,7 @@ __v7_setup: | |||
367 | #endif | 367 | #endif |
368 | adr r5, v7_crval | 368 | adr r5, v7_crval |
369 | ldmia r5, {r5, r6} | 369 | ldmia r5, {r5, r6} |
370 | #ifdef CONFIG_CPU_ENDIAN_BE8 | 370 | ARM_BE8(orr r6, r6, #1 << 25) @ big-endian page tables |
371 | orr r6, r6, #1 << 25 @ big-endian page tables | ||
372 | #endif | ||
373 | #ifdef CONFIG_SWP_EMULATE | 371 | #ifdef CONFIG_SWP_EMULATE |
374 | orr r5, r5, #(1 << 10) @ set SW bit in "clear" | 372 | orr r5, r5, #(1 << 10) @ set SW bit in "clear" |
375 | bic r6, r6, #(1 << 10) @ clear it in "mmuset" | 373 | bic r6, r6, #(1 << 10) @ clear it in "mmuset" |
diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index 99b44e0e8d86..9ed155ad0f97 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c | |||
@@ -19,6 +19,7 @@ | |||
19 | #include <linux/if_vlan.h> | 19 | #include <linux/if_vlan.h> |
20 | #include <asm/cacheflush.h> | 20 | #include <asm/cacheflush.h> |
21 | #include <asm/hwcap.h> | 21 | #include <asm/hwcap.h> |
22 | #include <asm/opcodes.h> | ||
22 | 23 | ||
23 | #include "bpf_jit_32.h" | 24 | #include "bpf_jit_32.h" |
24 | 25 | ||
@@ -113,8 +114,11 @@ static u32 jit_udiv(u32 dividend, u32 divisor) | |||
113 | 114 | ||
114 | static inline void _emit(int cond, u32 inst, struct jit_ctx *ctx) | 115 | static inline void _emit(int cond, u32 inst, struct jit_ctx *ctx) |
115 | { | 116 | { |
117 | inst |= (cond << 28); | ||
118 | inst = __opcode_to_mem_arm(inst); | ||
119 | |||
116 | if (ctx->target != NULL) | 120 | if (ctx->target != NULL) |
117 | ctx->target[ctx->idx] = inst | (cond << 28); | 121 | ctx->target[ctx->idx] = inst; |
118 | 122 | ||
119 | ctx->idx++; | 123 | ctx->idx++; |
120 | } | 124 | } |
diff --git a/arch/arm/plat-versatile/headsmp.S b/arch/arm/plat-versatile/headsmp.S index 2677bc3762d7..40f27e52de75 100644 --- a/arch/arm/plat-versatile/headsmp.S +++ b/arch/arm/plat-versatile/headsmp.S | |||
@@ -10,6 +10,7 @@ | |||
10 | */ | 10 | */ |
11 | #include <linux/linkage.h> | 11 | #include <linux/linkage.h> |
12 | #include <linux/init.h> | 12 | #include <linux/init.h> |
13 | #include <asm/assembler.h> | ||
13 | 14 | ||
14 | /* | 15 | /* |
15 | * Realview/Versatile Express specific entry point for secondary CPUs. | 16 | * Realview/Versatile Express specific entry point for secondary CPUs. |
@@ -17,6 +18,7 @@ | |||
17 | * until we're ready for them to initialise. | 18 | * until we're ready for them to initialise. |
18 | */ | 19 | */ |
19 | ENTRY(versatile_secondary_startup) | 20 | ENTRY(versatile_secondary_startup) |
21 | ARM_BE8(setend be) | ||
20 | mrc p15, 0, r0, c0, c0, 5 | 22 | mrc p15, 0, r0, c0, c0, 5 |
21 | bic r0, #0xff000000 | 23 | bic r0, #0xff000000 |
22 | adr r4, 1f | 24 | adr r4, 1f |
diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c index 52b8f40b1c73..2f37e1d6cb45 100644 --- a/arch/arm/vfp/vfpmodule.c +++ b/arch/arm/vfp/vfpmodule.c | |||
@@ -642,9 +642,9 @@ int vfp_restore_user_hwstate(struct user_vfp __user *ufp, | |||
642 | static int vfp_hotplug(struct notifier_block *b, unsigned long action, | 642 | static int vfp_hotplug(struct notifier_block *b, unsigned long action, |
643 | void *hcpu) | 643 | void *hcpu) |
644 | { | 644 | { |
645 | if (action == CPU_DYING || action == CPU_DYING_FROZEN) { | 645 | if (action == CPU_DYING || action == CPU_DYING_FROZEN) |
646 | vfp_force_reload((long)hcpu, current_thread_info()); | 646 | vfp_current_hw_state[(long)hcpu] = NULL; |
647 | } else if (action == CPU_STARTING || action == CPU_STARTING_FROZEN) | 647 | else if (action == CPU_STARTING || action == CPU_STARTING_FROZEN) |
648 | vfp_enable(NULL); | 648 | vfp_enable(NULL); |
649 | return NOTIFY_OK; | 649 | return NOTIFY_OK; |
650 | } | 650 | } |
diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h index 836364468571..01de5aaa3edc 100644 --- a/arch/arm64/include/asm/atomic.h +++ b/arch/arm64/include/asm/atomic.h | |||
@@ -126,20 +126,6 @@ static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new) | |||
126 | return oldval; | 126 | return oldval; |
127 | } | 127 | } |
128 | 128 | ||
129 | static inline void atomic_clear_mask(unsigned long mask, unsigned long *addr) | ||
130 | { | ||
131 | unsigned long tmp, tmp2; | ||
132 | |||
133 | asm volatile("// atomic_clear_mask\n" | ||
134 | "1: ldxr %0, %2\n" | ||
135 | " bic %0, %0, %3\n" | ||
136 | " stxr %w1, %0, %2\n" | ||
137 | " cbnz %w1, 1b" | ||
138 | : "=&r" (tmp), "=&r" (tmp2), "+Q" (*addr) | ||
139 | : "Ir" (mask) | ||
140 | : "cc"); | ||
141 | } | ||
142 | |||
143 | #define atomic_xchg(v, new) (xchg(&((v)->counter), new)) | 129 | #define atomic_xchg(v, new) (xchg(&((v)->counter), new)) |
144 | 130 | ||
145 | static inline int __atomic_add_unless(atomic_t *v, int a, int u) | 131 | static inline int __atomic_add_unless(atomic_t *v, int a, int u) |
diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index cbfacf7fb438..6a0a9b132d7a 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c | |||
@@ -27,7 +27,6 @@ | |||
27 | #include <linux/uaccess.h> | 27 | #include <linux/uaccess.h> |
28 | 28 | ||
29 | #include <asm/debug-monitors.h> | 29 | #include <asm/debug-monitors.h> |
30 | #include <asm/local.h> | ||
31 | #include <asm/cputype.h> | 30 | #include <asm/cputype.h> |
32 | #include <asm/system_misc.h> | 31 | #include <asm/system_misc.h> |
33 | 32 | ||
@@ -89,8 +88,8 @@ early_param("nodebugmon", early_debug_disable); | |||
89 | * Keep track of debug users on each core. | 88 | * Keep track of debug users on each core. |
90 | * The ref counts are per-cpu so we use a local_t type. | 89 | * The ref counts are per-cpu so we use a local_t type. |
91 | */ | 90 | */ |
92 | static DEFINE_PER_CPU(local_t, mde_ref_count); | 91 | static DEFINE_PER_CPU(int, mde_ref_count); |
93 | static DEFINE_PER_CPU(local_t, kde_ref_count); | 92 | static DEFINE_PER_CPU(int, kde_ref_count); |
94 | 93 | ||
95 | void enable_debug_monitors(enum debug_el el) | 94 | void enable_debug_monitors(enum debug_el el) |
96 | { | 95 | { |
@@ -98,11 +97,11 @@ void enable_debug_monitors(enum debug_el el) | |||
98 | 97 | ||
99 | WARN_ON(preemptible()); | 98 | WARN_ON(preemptible()); |
100 | 99 | ||
101 | if (local_inc_return(&__get_cpu_var(mde_ref_count)) == 1) | 100 | if (this_cpu_inc_return(mde_ref_count) == 1) |
102 | enable = DBG_MDSCR_MDE; | 101 | enable = DBG_MDSCR_MDE; |
103 | 102 | ||
104 | if (el == DBG_ACTIVE_EL1 && | 103 | if (el == DBG_ACTIVE_EL1 && |
105 | local_inc_return(&__get_cpu_var(kde_ref_count)) == 1) | 104 | this_cpu_inc_return(kde_ref_count) == 1) |
106 | enable |= DBG_MDSCR_KDE; | 105 | enable |= DBG_MDSCR_KDE; |
107 | 106 | ||
108 | if (enable && debug_enabled) { | 107 | if (enable && debug_enabled) { |
@@ -118,11 +117,11 @@ void disable_debug_monitors(enum debug_el el) | |||
118 | 117 | ||
119 | WARN_ON(preemptible()); | 118 | WARN_ON(preemptible()); |
120 | 119 | ||
121 | if (local_dec_and_test(&__get_cpu_var(mde_ref_count))) | 120 | if (this_cpu_dec_return(mde_ref_count) == 0) |
122 | disable = ~DBG_MDSCR_MDE; | 121 | disable = ~DBG_MDSCR_MDE; |
123 | 122 | ||
124 | if (el == DBG_ACTIVE_EL1 && | 123 | if (el == DBG_ACTIVE_EL1 && |
125 | local_dec_and_test(&__get_cpu_var(kde_ref_count))) | 124 | this_cpu_dec_return(kde_ref_count) == 0) |
126 | disable &= ~DBG_MDSCR_KDE; | 125 | disable &= ~DBG_MDSCR_KDE; |
127 | 126 | ||
128 | if (disable) { | 127 | if (disable) { |
diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index 329218ca9ffb..ff516f6691e4 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c | |||
@@ -184,14 +184,14 @@ int arch_install_hw_breakpoint(struct perf_event *bp) | |||
184 | /* Breakpoint */ | 184 | /* Breakpoint */ |
185 | ctrl_reg = AARCH64_DBG_REG_BCR; | 185 | ctrl_reg = AARCH64_DBG_REG_BCR; |
186 | val_reg = AARCH64_DBG_REG_BVR; | 186 | val_reg = AARCH64_DBG_REG_BVR; |
187 | slots = __get_cpu_var(bp_on_reg); | 187 | slots = this_cpu_ptr(bp_on_reg); |
188 | max_slots = core_num_brps; | 188 | max_slots = core_num_brps; |
189 | reg_enable = !debug_info->bps_disabled; | 189 | reg_enable = !debug_info->bps_disabled; |
190 | } else { | 190 | } else { |
191 | /* Watchpoint */ | 191 | /* Watchpoint */ |
192 | ctrl_reg = AARCH64_DBG_REG_WCR; | 192 | ctrl_reg = AARCH64_DBG_REG_WCR; |
193 | val_reg = AARCH64_DBG_REG_WVR; | 193 | val_reg = AARCH64_DBG_REG_WVR; |
194 | slots = __get_cpu_var(wp_on_reg); | 194 | slots = this_cpu_ptr(wp_on_reg); |
195 | max_slots = core_num_wrps; | 195 | max_slots = core_num_wrps; |
196 | reg_enable = !debug_info->wps_disabled; | 196 | reg_enable = !debug_info->wps_disabled; |
197 | } | 197 | } |
@@ -230,12 +230,12 @@ void arch_uninstall_hw_breakpoint(struct perf_event *bp) | |||
230 | if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE) { | 230 | if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE) { |
231 | /* Breakpoint */ | 231 | /* Breakpoint */ |
232 | base = AARCH64_DBG_REG_BCR; | 232 | base = AARCH64_DBG_REG_BCR; |
233 | slots = __get_cpu_var(bp_on_reg); | 233 | slots = this_cpu_ptr(bp_on_reg); |
234 | max_slots = core_num_brps; | 234 | max_slots = core_num_brps; |
235 | } else { | 235 | } else { |
236 | /* Watchpoint */ | 236 | /* Watchpoint */ |
237 | base = AARCH64_DBG_REG_WCR; | 237 | base = AARCH64_DBG_REG_WCR; |
238 | slots = __get_cpu_var(wp_on_reg); | 238 | slots = this_cpu_ptr(wp_on_reg); |
239 | max_slots = core_num_wrps; | 239 | max_slots = core_num_wrps; |
240 | } | 240 | } |
241 | 241 | ||
@@ -505,11 +505,11 @@ static void toggle_bp_registers(int reg, enum debug_el el, int enable) | |||
505 | 505 | ||
506 | switch (reg) { | 506 | switch (reg) { |
507 | case AARCH64_DBG_REG_BCR: | 507 | case AARCH64_DBG_REG_BCR: |
508 | slots = __get_cpu_var(bp_on_reg); | 508 | slots = this_cpu_ptr(bp_on_reg); |
509 | max_slots = core_num_brps; | 509 | max_slots = core_num_brps; |
510 | break; | 510 | break; |
511 | case AARCH64_DBG_REG_WCR: | 511 | case AARCH64_DBG_REG_WCR: |
512 | slots = __get_cpu_var(wp_on_reg); | 512 | slots = this_cpu_ptr(wp_on_reg); |
513 | max_slots = core_num_wrps; | 513 | max_slots = core_num_wrps; |
514 | break; | 514 | break; |
515 | default: | 515 | default: |
@@ -546,7 +546,7 @@ static int breakpoint_handler(unsigned long unused, unsigned int esr, | |||
546 | struct debug_info *debug_info; | 546 | struct debug_info *debug_info; |
547 | struct arch_hw_breakpoint_ctrl ctrl; | 547 | struct arch_hw_breakpoint_ctrl ctrl; |
548 | 548 | ||
549 | slots = (struct perf_event **)__get_cpu_var(bp_on_reg); | 549 | slots = this_cpu_ptr(bp_on_reg); |
550 | addr = instruction_pointer(regs); | 550 | addr = instruction_pointer(regs); |
551 | debug_info = ¤t->thread.debug; | 551 | debug_info = ¤t->thread.debug; |
552 | 552 | ||
@@ -596,7 +596,7 @@ unlock: | |||
596 | user_enable_single_step(current); | 596 | user_enable_single_step(current); |
597 | } else { | 597 | } else { |
598 | toggle_bp_registers(AARCH64_DBG_REG_BCR, DBG_ACTIVE_EL1, 0); | 598 | toggle_bp_registers(AARCH64_DBG_REG_BCR, DBG_ACTIVE_EL1, 0); |
599 | kernel_step = &__get_cpu_var(stepping_kernel_bp); | 599 | kernel_step = this_cpu_ptr(&stepping_kernel_bp); |
600 | 600 | ||
601 | if (*kernel_step != ARM_KERNEL_STEP_NONE) | 601 | if (*kernel_step != ARM_KERNEL_STEP_NONE) |
602 | return 0; | 602 | return 0; |
@@ -623,7 +623,7 @@ static int watchpoint_handler(unsigned long addr, unsigned int esr, | |||
623 | struct arch_hw_breakpoint *info; | 623 | struct arch_hw_breakpoint *info; |
624 | struct arch_hw_breakpoint_ctrl ctrl; | 624 | struct arch_hw_breakpoint_ctrl ctrl; |
625 | 625 | ||
626 | slots = (struct perf_event **)__get_cpu_var(wp_on_reg); | 626 | slots = this_cpu_ptr(wp_on_reg); |
627 | debug_info = ¤t->thread.debug; | 627 | debug_info = ¤t->thread.debug; |
628 | 628 | ||
629 | for (i = 0; i < core_num_wrps; ++i) { | 629 | for (i = 0; i < core_num_wrps; ++i) { |
@@ -698,7 +698,7 @@ unlock: | |||
698 | user_enable_single_step(current); | 698 | user_enable_single_step(current); |
699 | } else { | 699 | } else { |
700 | toggle_bp_registers(AARCH64_DBG_REG_WCR, DBG_ACTIVE_EL1, 0); | 700 | toggle_bp_registers(AARCH64_DBG_REG_WCR, DBG_ACTIVE_EL1, 0); |
701 | kernel_step = &__get_cpu_var(stepping_kernel_bp); | 701 | kernel_step = this_cpu_ptr(&stepping_kernel_bp); |
702 | 702 | ||
703 | if (*kernel_step != ARM_KERNEL_STEP_NONE) | 703 | if (*kernel_step != ARM_KERNEL_STEP_NONE) |
704 | return 0; | 704 | return 0; |
@@ -722,7 +722,7 @@ int reinstall_suspended_bps(struct pt_regs *regs) | |||
722 | struct debug_info *debug_info = ¤t->thread.debug; | 722 | struct debug_info *debug_info = ¤t->thread.debug; |
723 | int handled_exception = 0, *kernel_step; | 723 | int handled_exception = 0, *kernel_step; |
724 | 724 | ||
725 | kernel_step = &__get_cpu_var(stepping_kernel_bp); | 725 | kernel_step = this_cpu_ptr(&stepping_kernel_bp); |
726 | 726 | ||
727 | /* | 727 | /* |
728 | * Called from single-step exception handler. | 728 | * Called from single-step exception handler. |
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 5d14470452ac..0e63c98d224c 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c | |||
@@ -1044,7 +1044,7 @@ static irqreturn_t armv8pmu_handle_irq(int irq_num, void *dev) | |||
1044 | */ | 1044 | */ |
1045 | regs = get_irq_regs(); | 1045 | regs = get_irq_regs(); |
1046 | 1046 | ||
1047 | cpuc = &__get_cpu_var(cpu_hw_events); | 1047 | cpuc = this_cpu_ptr(&cpu_hw_events); |
1048 | for (idx = 0; idx < cpu_pmu->num_events; ++idx) { | 1048 | for (idx = 0; idx < cpu_pmu->num_events; ++idx) { |
1049 | struct perf_event *event = cpuc->events[idx]; | 1049 | struct perf_event *event = cpuc->events[idx]; |
1050 | struct hw_perf_event *hwc; | 1050 | struct hw_perf_event *hwc; |
@@ -1258,7 +1258,7 @@ device_initcall(register_pmu_driver); | |||
1258 | 1258 | ||
1259 | static struct pmu_hw_events *armpmu_get_cpu_events(void) | 1259 | static struct pmu_hw_events *armpmu_get_cpu_events(void) |
1260 | { | 1260 | { |
1261 | return &__get_cpu_var(cpu_hw_events); | 1261 | return this_cpu_ptr(&cpu_hw_events); |
1262 | } | 1262 | } |
1263 | 1263 | ||
1264 | static void __init cpu_pmu_init(struct arm_pmu *armpmu) | 1264 | static void __init cpu_pmu_init(struct arm_pmu *armpmu) |
diff --git a/crypto/Kconfig b/crypto/Kconfig index 69ce573f1224..71f337aefa39 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig | |||
@@ -776,6 +776,22 @@ config CRYPTO_AES_ARM | |||
776 | 776 | ||
777 | See <http://csrc.nist.gov/encryption/aes/> for more information. | 777 | See <http://csrc.nist.gov/encryption/aes/> for more information. |
778 | 778 | ||
779 | config CRYPTO_AES_ARM_BS | ||
780 | tristate "Bit sliced AES using NEON instructions" | ||
781 | depends on ARM && KERNEL_MODE_NEON | ||
782 | select CRYPTO_ALGAPI | ||
783 | select CRYPTO_AES_ARM | ||
784 | select CRYPTO_ABLK_HELPER | ||
785 | help | ||
786 | Use a faster and more secure NEON based implementation of AES in CBC, | ||
787 | CTR and XTS modes | ||
788 | |||
789 | Bit sliced AES gives around 45% speedup on Cortex-A15 for CTR mode | ||
790 | and for XTS mode encryption, CBC and XTS mode decryption speedup is | ||
791 | around 25%. (CBC encryption speed is not affected by this driver.) | ||
792 | This implementation does not rely on any lookup tables so it is | ||
793 | believed to be invulnerable to cache timing attacks. | ||
794 | |||
779 | config CRYPTO_ANUBIS | 795 | config CRYPTO_ANUBIS |
780 | tristate "Anubis cipher algorithm" | 796 | tristate "Anubis cipher algorithm" |
781 | select CRYPTO_ALGAPI | 797 | select CRYPTO_ALGAPI |
diff --git a/drivers/bus/arm-cci.c b/drivers/bus/arm-cci.c index bb5b90e8e768..b6739cb78e32 100644 --- a/drivers/bus/arm-cci.c +++ b/drivers/bus/arm-cci.c | |||
@@ -852,7 +852,7 @@ asmlinkage void __naked cci_enable_port_for_self(void) | |||
852 | 852 | ||
853 | /* Enable the CCI port */ | 853 | /* Enable the CCI port */ |
854 | " ldr r0, [r0, %[offsetof_port_phys]] \n" | 854 | " ldr r0, [r0, %[offsetof_port_phys]] \n" |
855 | " mov r3, #"__stringify(CCI_ENABLE_REQ)" \n" | 855 | " mov r3, %[cci_enable_req]\n" |
856 | " str r3, [r0, #"__stringify(CCI_PORT_CTRL)"] \n" | 856 | " str r3, [r0, #"__stringify(CCI_PORT_CTRL)"] \n" |
857 | 857 | ||
858 | /* poll the status reg for completion */ | 858 | /* poll the status reg for completion */ |
@@ -860,7 +860,7 @@ asmlinkage void __naked cci_enable_port_for_self(void) | |||
860 | " ldr r0, [r1] \n" | 860 | " ldr r0, [r1] \n" |
861 | " ldr r0, [r0, r1] @ cci_ctrl_base \n" | 861 | " ldr r0, [r0, r1] @ cci_ctrl_base \n" |
862 | "4: ldr r1, [r0, #"__stringify(CCI_CTRL_STATUS)"] \n" | 862 | "4: ldr r1, [r0, #"__stringify(CCI_CTRL_STATUS)"] \n" |
863 | " tst r1, #1 \n" | 863 | " tst r1, %[cci_control_status_bits] \n" |
864 | " bne 4b \n" | 864 | " bne 4b \n" |
865 | 865 | ||
866 | " mov r0, #0 \n" | 866 | " mov r0, #0 \n" |
@@ -873,6 +873,8 @@ asmlinkage void __naked cci_enable_port_for_self(void) | |||
873 | "7: .word cci_ctrl_phys - . \n" | 873 | "7: .word cci_ctrl_phys - . \n" |
874 | : : | 874 | : : |
875 | [sizeof_cpu_port] "i" (sizeof(cpu_port)), | 875 | [sizeof_cpu_port] "i" (sizeof(cpu_port)), |
876 | [cci_enable_req] "i" cpu_to_le32(CCI_ENABLE_REQ), | ||
877 | [cci_control_status_bits] "i" cpu_to_le32(1), | ||
876 | #ifndef __ARMEB__ | 878 | #ifndef __ARMEB__ |
877 | [offsetof_cpu_port_mpidr_lsb] "i" (offsetof(struct cpu_port, mpidr)), | 879 | [offsetof_cpu_port_mpidr_lsb] "i" (offsetof(struct cpu_port, mpidr)), |
878 | #else | 880 | #else |
diff --git a/drivers/gpio/gpio-sa1100.c b/drivers/gpio/gpio-sa1100.c index 8ea3b33d4b40..a90be34e4d5c 100644 --- a/drivers/gpio/gpio-sa1100.c +++ b/drivers/gpio/gpio-sa1100.c | |||
@@ -10,7 +10,7 @@ | |||
10 | #include <linux/gpio.h> | 10 | #include <linux/gpio.h> |
11 | #include <linux/init.h> | 11 | #include <linux/init.h> |
12 | #include <linux/module.h> | 12 | #include <linux/module.h> |
13 | 13 | #include <linux/io.h> | |
14 | #include <mach/hardware.h> | 14 | #include <mach/hardware.h> |
15 | #include <mach/irqs.h> | 15 | #include <mach/irqs.h> |
16 | 16 | ||
diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index d0e948084eaf..9031171c141b 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c | |||
@@ -253,10 +253,9 @@ static int gic_set_affinity(struct irq_data *d, const struct cpumask *mask_val, | |||
253 | if (cpu >= NR_GIC_CPU_IF || cpu >= nr_cpu_ids) | 253 | if (cpu >= NR_GIC_CPU_IF || cpu >= nr_cpu_ids) |
254 | return -EINVAL; | 254 | return -EINVAL; |
255 | 255 | ||
256 | raw_spin_lock(&irq_controller_lock); | ||
256 | mask = 0xff << shift; | 257 | mask = 0xff << shift; |
257 | bit = gic_cpu_map[cpu] << shift; | 258 | bit = gic_cpu_map[cpu] << shift; |
258 | |||
259 | raw_spin_lock(&irq_controller_lock); | ||
260 | val = readl_relaxed(reg) & ~mask; | 259 | val = readl_relaxed(reg) & ~mask; |
261 | writel_relaxed(val | bit, reg); | 260 | writel_relaxed(val | bit, reg); |
262 | raw_spin_unlock(&irq_controller_lock); | 261 | raw_spin_unlock(&irq_controller_lock); |
@@ -652,7 +651,9 @@ static void __init gic_pm_init(struct gic_chip_data *gic) | |||
652 | void gic_raise_softirq(const struct cpumask *mask, unsigned int irq) | 651 | void gic_raise_softirq(const struct cpumask *mask, unsigned int irq) |
653 | { | 652 | { |
654 | int cpu; | 653 | int cpu; |
655 | unsigned long map = 0; | 654 | unsigned long flags, map = 0; |
655 | |||
656 | raw_spin_lock_irqsave(&irq_controller_lock, flags); | ||
656 | 657 | ||
657 | /* Convert our logical CPU mask into a physical one. */ | 658 | /* Convert our logical CPU mask into a physical one. */ |
658 | for_each_cpu(cpu, mask) | 659 | for_each_cpu(cpu, mask) |
@@ -666,7 +667,149 @@ void gic_raise_softirq(const struct cpumask *mask, unsigned int irq) | |||
666 | 667 | ||
667 | /* this always happens on GIC0 */ | 668 | /* this always happens on GIC0 */ |
668 | writel_relaxed(map << 16 | irq, gic_data_dist_base(&gic_data[0]) + GIC_DIST_SOFTINT); | 669 | writel_relaxed(map << 16 | irq, gic_data_dist_base(&gic_data[0]) + GIC_DIST_SOFTINT); |
670 | |||
671 | raw_spin_unlock_irqrestore(&irq_controller_lock, flags); | ||
672 | } | ||
673 | #endif | ||
674 | |||
675 | #ifdef CONFIG_BL_SWITCHER | ||
676 | /* | ||
677 | * gic_send_sgi - send a SGI directly to given CPU interface number | ||
678 | * | ||
679 | * cpu_id: the ID for the destination CPU interface | ||
680 | * irq: the IPI number to send a SGI for | ||
681 | */ | ||
682 | void gic_send_sgi(unsigned int cpu_id, unsigned int irq) | ||
683 | { | ||
684 | BUG_ON(cpu_id >= NR_GIC_CPU_IF); | ||
685 | cpu_id = 1 << cpu_id; | ||
686 | /* this always happens on GIC0 */ | ||
687 | writel_relaxed((cpu_id << 16) | irq, gic_data_dist_base(&gic_data[0]) + GIC_DIST_SOFTINT); | ||
688 | } | ||
689 | |||
690 | /* | ||
691 | * gic_get_cpu_id - get the CPU interface ID for the specified CPU | ||
692 | * | ||
693 | * @cpu: the logical CPU number to get the GIC ID for. | ||
694 | * | ||
695 | * Return the CPU interface ID for the given logical CPU number, | ||
696 | * or -1 if the CPU number is too large or the interface ID is | ||
697 | * unknown (more than one bit set). | ||
698 | */ | ||
699 | int gic_get_cpu_id(unsigned int cpu) | ||
700 | { | ||
701 | unsigned int cpu_bit; | ||
702 | |||
703 | if (cpu >= NR_GIC_CPU_IF) | ||
704 | return -1; | ||
705 | cpu_bit = gic_cpu_map[cpu]; | ||
706 | if (cpu_bit & (cpu_bit - 1)) | ||
707 | return -1; | ||
708 | return __ffs(cpu_bit); | ||
669 | } | 709 | } |
710 | |||
711 | /* | ||
712 | * gic_migrate_target - migrate IRQs to another CPU interface | ||
713 | * | ||
714 | * @new_cpu_id: the CPU target ID to migrate IRQs to | ||
715 | * | ||
716 | * Migrate all peripheral interrupts with a target matching the current CPU | ||
717 | * to the interface corresponding to @new_cpu_id. The CPU interface mapping | ||
718 | * is also updated. Targets to other CPU interfaces are unchanged. | ||
719 | * This must be called with IRQs locally disabled. | ||
720 | */ | ||
721 | void gic_migrate_target(unsigned int new_cpu_id) | ||
722 | { | ||
723 | unsigned int cur_cpu_id, gic_irqs, gic_nr = 0; | ||
724 | void __iomem *dist_base; | ||
725 | int i, ror_val, cpu = smp_processor_id(); | ||
726 | u32 val, cur_target_mask, active_mask; | ||
727 | |||
728 | if (gic_nr >= MAX_GIC_NR) | ||
729 | BUG(); | ||
730 | |||
731 | dist_base = gic_data_dist_base(&gic_data[gic_nr]); | ||
732 | if (!dist_base) | ||
733 | return; | ||
734 | gic_irqs = gic_data[gic_nr].gic_irqs; | ||
735 | |||
736 | cur_cpu_id = __ffs(gic_cpu_map[cpu]); | ||
737 | cur_target_mask = 0x01010101 << cur_cpu_id; | ||
738 | ror_val = (cur_cpu_id - new_cpu_id) & 31; | ||
739 | |||
740 | raw_spin_lock(&irq_controller_lock); | ||
741 | |||
742 | /* Update the target interface for this logical CPU */ | ||
743 | gic_cpu_map[cpu] = 1 << new_cpu_id; | ||
744 | |||
745 | /* | ||
746 | * Find all the peripheral interrupts targetting the current | ||
747 | * CPU interface and migrate them to the new CPU interface. | ||
748 | * We skip DIST_TARGET 0 to 7 as they are read-only. | ||
749 | */ | ||
750 | for (i = 8; i < DIV_ROUND_UP(gic_irqs, 4); i++) { | ||
751 | val = readl_relaxed(dist_base + GIC_DIST_TARGET + i * 4); | ||
752 | active_mask = val & cur_target_mask; | ||
753 | if (active_mask) { | ||
754 | val &= ~active_mask; | ||
755 | val |= ror32(active_mask, ror_val); | ||
756 | writel_relaxed(val, dist_base + GIC_DIST_TARGET + i*4); | ||
757 | } | ||
758 | } | ||
759 | |||
760 | raw_spin_unlock(&irq_controller_lock); | ||
761 | |||
762 | /* | ||
763 | * Now let's migrate and clear any potential SGIs that might be | ||
764 | * pending for us (cur_cpu_id). Since GIC_DIST_SGI_PENDING_SET | ||
765 | * is a banked register, we can only forward the SGI using | ||
766 | * GIC_DIST_SOFTINT. The original SGI source is lost but Linux | ||
767 | * doesn't use that information anyway. | ||
768 | * | ||
769 | * For the same reason we do not adjust SGI source information | ||
770 | * for previously sent SGIs by us to other CPUs either. | ||
771 | */ | ||
772 | for (i = 0; i < 16; i += 4) { | ||
773 | int j; | ||
774 | val = readl_relaxed(dist_base + GIC_DIST_SGI_PENDING_SET + i); | ||
775 | if (!val) | ||
776 | continue; | ||
777 | writel_relaxed(val, dist_base + GIC_DIST_SGI_PENDING_CLEAR + i); | ||
778 | for (j = i; j < i + 4; j++) { | ||
779 | if (val & 0xff) | ||
780 | writel_relaxed((1 << (new_cpu_id + 16)) | j, | ||
781 | dist_base + GIC_DIST_SOFTINT); | ||
782 | val >>= 8; | ||
783 | } | ||
784 | } | ||
785 | } | ||
786 | |||
787 | /* | ||
788 | * gic_get_sgir_physaddr - get the physical address for the SGI register | ||
789 | * | ||
790 | * REturn the physical address of the SGI register to be used | ||
791 | * by some early assembly code when the kernel is not yet available. | ||
792 | */ | ||
793 | static unsigned long gic_dist_physaddr; | ||
794 | |||
795 | unsigned long gic_get_sgir_physaddr(void) | ||
796 | { | ||
797 | if (!gic_dist_physaddr) | ||
798 | return 0; | ||
799 | return gic_dist_physaddr + GIC_DIST_SOFTINT; | ||
800 | } | ||
801 | |||
802 | void __init gic_init_physaddr(struct device_node *node) | ||
803 | { | ||
804 | struct resource res; | ||
805 | if (of_address_to_resource(node, 0, &res) == 0) { | ||
806 | gic_dist_physaddr = res.start; | ||
807 | pr_info("GIC physical location is %#lx\n", gic_dist_physaddr); | ||
808 | } | ||
809 | } | ||
810 | |||
811 | #else | ||
812 | #define gic_init_physaddr(node) do { } while (0) | ||
670 | #endif | 813 | #endif |
671 | 814 | ||
672 | static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq, | 815 | static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq, |
@@ -850,6 +993,8 @@ int __init gic_of_init(struct device_node *node, struct device_node *parent) | |||
850 | percpu_offset = 0; | 993 | percpu_offset = 0; |
851 | 994 | ||
852 | gic_init_bases(gic_cnt, -1, dist_base, cpu_base, percpu_offset, node); | 995 | gic_init_bases(gic_cnt, -1, dist_base, cpu_base, percpu_offset, node); |
996 | if (!gic_cnt) | ||
997 | gic_init_physaddr(node); | ||
853 | 998 | ||
854 | if (parent) { | 999 | if (parent) { |
855 | irq = irq_of_parse_and_map(node, 0); | 1000 | irq = irq_of_parse_and_map(node, 0); |
diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c index c3785edc0e92..d135c76c4855 100644 --- a/drivers/mmc/host/mmci.c +++ b/drivers/mmc/host/mmci.c | |||
@@ -62,6 +62,7 @@ static unsigned int fmax = 515633; | |||
62 | * @signal_direction: input/out direction of bus signals can be indicated | 62 | * @signal_direction: input/out direction of bus signals can be indicated |
63 | * @pwrreg_clkgate: MMCIPOWER register must be used to gate the clock | 63 | * @pwrreg_clkgate: MMCIPOWER register must be used to gate the clock |
64 | * @busy_detect: true if busy detection on dat0 is supported | 64 | * @busy_detect: true if busy detection on dat0 is supported |
65 | * @pwrreg_nopower: bits in MMCIPOWER don't controls ext. power supply | ||
65 | */ | 66 | */ |
66 | struct variant_data { | 67 | struct variant_data { |
67 | unsigned int clkreg; | 68 | unsigned int clkreg; |
@@ -76,6 +77,7 @@ struct variant_data { | |||
76 | bool signal_direction; | 77 | bool signal_direction; |
77 | bool pwrreg_clkgate; | 78 | bool pwrreg_clkgate; |
78 | bool busy_detect; | 79 | bool busy_detect; |
80 | bool pwrreg_nopower; | ||
79 | }; | 81 | }; |
80 | 82 | ||
81 | static struct variant_data variant_arm = { | 83 | static struct variant_data variant_arm = { |
@@ -109,6 +111,7 @@ static struct variant_data variant_u300 = { | |||
109 | .pwrreg_powerup = MCI_PWR_ON, | 111 | .pwrreg_powerup = MCI_PWR_ON, |
110 | .signal_direction = true, | 112 | .signal_direction = true, |
111 | .pwrreg_clkgate = true, | 113 | .pwrreg_clkgate = true, |
114 | .pwrreg_nopower = true, | ||
112 | }; | 115 | }; |
113 | 116 | ||
114 | static struct variant_data variant_nomadik = { | 117 | static struct variant_data variant_nomadik = { |
@@ -121,6 +124,7 @@ static struct variant_data variant_nomadik = { | |||
121 | .pwrreg_powerup = MCI_PWR_ON, | 124 | .pwrreg_powerup = MCI_PWR_ON, |
122 | .signal_direction = true, | 125 | .signal_direction = true, |
123 | .pwrreg_clkgate = true, | 126 | .pwrreg_clkgate = true, |
127 | .pwrreg_nopower = true, | ||
124 | }; | 128 | }; |
125 | 129 | ||
126 | static struct variant_data variant_ux500 = { | 130 | static struct variant_data variant_ux500 = { |
@@ -135,6 +139,7 @@ static struct variant_data variant_ux500 = { | |||
135 | .signal_direction = true, | 139 | .signal_direction = true, |
136 | .pwrreg_clkgate = true, | 140 | .pwrreg_clkgate = true, |
137 | .busy_detect = true, | 141 | .busy_detect = true, |
142 | .pwrreg_nopower = true, | ||
138 | }; | 143 | }; |
139 | 144 | ||
140 | static struct variant_data variant_ux500v2 = { | 145 | static struct variant_data variant_ux500v2 = { |
@@ -150,6 +155,7 @@ static struct variant_data variant_ux500v2 = { | |||
150 | .signal_direction = true, | 155 | .signal_direction = true, |
151 | .pwrreg_clkgate = true, | 156 | .pwrreg_clkgate = true, |
152 | .busy_detect = true, | 157 | .busy_detect = true, |
158 | .pwrreg_nopower = true, | ||
153 | }; | 159 | }; |
154 | 160 | ||
155 | static int mmci_card_busy(struct mmc_host *mmc) | 161 | static int mmci_card_busy(struct mmc_host *mmc) |
@@ -189,6 +195,21 @@ static int mmci_validate_data(struct mmci_host *host, | |||
189 | return 0; | 195 | return 0; |
190 | } | 196 | } |
191 | 197 | ||
198 | static void mmci_reg_delay(struct mmci_host *host) | ||
199 | { | ||
200 | /* | ||
201 | * According to the spec, at least three feedback clock cycles | ||
202 | * of max 52 MHz must pass between two writes to the MMCICLOCK reg. | ||
203 | * Three MCLK clock cycles must pass between two MMCIPOWER reg writes. | ||
204 | * Worst delay time during card init is at 100 kHz => 30 us. | ||
205 | * Worst delay time when up and running is at 25 MHz => 120 ns. | ||
206 | */ | ||
207 | if (host->cclk < 25000000) | ||
208 | udelay(30); | ||
209 | else | ||
210 | ndelay(120); | ||
211 | } | ||
212 | |||
192 | /* | 213 | /* |
193 | * This must be called with host->lock held | 214 | * This must be called with host->lock held |
194 | */ | 215 | */ |
@@ -1264,6 +1285,7 @@ static void mmci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) | |||
1264 | 1285 | ||
1265 | mmci_set_clkreg(host, ios->clock); | 1286 | mmci_set_clkreg(host, ios->clock); |
1266 | mmci_write_pwrreg(host, pwr); | 1287 | mmci_write_pwrreg(host, pwr); |
1288 | mmci_reg_delay(host); | ||
1267 | 1289 | ||
1268 | spin_unlock_irqrestore(&host->lock, flags); | 1290 | spin_unlock_irqrestore(&host->lock, flags); |
1269 | 1291 | ||
@@ -1510,23 +1532,6 @@ static int mmci_probe(struct amba_device *dev, | |||
1510 | mmc->f_max = min(host->mclk, fmax); | 1532 | mmc->f_max = min(host->mclk, fmax); |
1511 | dev_dbg(mmc_dev(mmc), "clocking block at %u Hz\n", mmc->f_max); | 1533 | dev_dbg(mmc_dev(mmc), "clocking block at %u Hz\n", mmc->f_max); |
1512 | 1534 | ||
1513 | host->pinctrl = devm_pinctrl_get(&dev->dev); | ||
1514 | if (IS_ERR(host->pinctrl)) { | ||
1515 | ret = PTR_ERR(host->pinctrl); | ||
1516 | goto clk_disable; | ||
1517 | } | ||
1518 | |||
1519 | host->pins_default = pinctrl_lookup_state(host->pinctrl, | ||
1520 | PINCTRL_STATE_DEFAULT); | ||
1521 | |||
1522 | /* enable pins to be muxed in and configured */ | ||
1523 | if (!IS_ERR(host->pins_default)) { | ||
1524 | ret = pinctrl_select_state(host->pinctrl, host->pins_default); | ||
1525 | if (ret) | ||
1526 | dev_warn(&dev->dev, "could not set default pins\n"); | ||
1527 | } else | ||
1528 | dev_warn(&dev->dev, "could not get default pinstate\n"); | ||
1529 | |||
1530 | /* Get regulators and the supported OCR mask */ | 1535 | /* Get regulators and the supported OCR mask */ |
1531 | mmc_regulator_get_supply(mmc); | 1536 | mmc_regulator_get_supply(mmc); |
1532 | if (!mmc->ocr_avail) | 1537 | if (!mmc->ocr_avail) |
@@ -1760,6 +1765,41 @@ static int mmci_resume(struct device *dev) | |||
1760 | #endif | 1765 | #endif |
1761 | 1766 | ||
1762 | #ifdef CONFIG_PM_RUNTIME | 1767 | #ifdef CONFIG_PM_RUNTIME |
1768 | static void mmci_save(struct mmci_host *host) | ||
1769 | { | ||
1770 | unsigned long flags; | ||
1771 | |||
1772 | if (host->variant->pwrreg_nopower) { | ||
1773 | spin_lock_irqsave(&host->lock, flags); | ||
1774 | |||
1775 | writel(0, host->base + MMCIMASK0); | ||
1776 | writel(0, host->base + MMCIDATACTRL); | ||
1777 | writel(0, host->base + MMCIPOWER); | ||
1778 | writel(0, host->base + MMCICLOCK); | ||
1779 | mmci_reg_delay(host); | ||
1780 | |||
1781 | spin_unlock_irqrestore(&host->lock, flags); | ||
1782 | } | ||
1783 | |||
1784 | } | ||
1785 | |||
1786 | static void mmci_restore(struct mmci_host *host) | ||
1787 | { | ||
1788 | unsigned long flags; | ||
1789 | |||
1790 | if (host->variant->pwrreg_nopower) { | ||
1791 | spin_lock_irqsave(&host->lock, flags); | ||
1792 | |||
1793 | writel(host->clk_reg, host->base + MMCICLOCK); | ||
1794 | writel(host->datactrl_reg, host->base + MMCIDATACTRL); | ||
1795 | writel(host->pwr_reg, host->base + MMCIPOWER); | ||
1796 | writel(MCI_IRQENABLE, host->base + MMCIMASK0); | ||
1797 | mmci_reg_delay(host); | ||
1798 | |||
1799 | spin_unlock_irqrestore(&host->lock, flags); | ||
1800 | } | ||
1801 | } | ||
1802 | |||
1763 | static int mmci_runtime_suspend(struct device *dev) | 1803 | static int mmci_runtime_suspend(struct device *dev) |
1764 | { | 1804 | { |
1765 | struct amba_device *adev = to_amba_device(dev); | 1805 | struct amba_device *adev = to_amba_device(dev); |
@@ -1767,6 +1807,8 @@ static int mmci_runtime_suspend(struct device *dev) | |||
1767 | 1807 | ||
1768 | if (mmc) { | 1808 | if (mmc) { |
1769 | struct mmci_host *host = mmc_priv(mmc); | 1809 | struct mmci_host *host = mmc_priv(mmc); |
1810 | pinctrl_pm_select_sleep_state(dev); | ||
1811 | mmci_save(host); | ||
1770 | clk_disable_unprepare(host->clk); | 1812 | clk_disable_unprepare(host->clk); |
1771 | } | 1813 | } |
1772 | 1814 | ||
@@ -1781,6 +1823,8 @@ static int mmci_runtime_resume(struct device *dev) | |||
1781 | if (mmc) { | 1823 | if (mmc) { |
1782 | struct mmci_host *host = mmc_priv(mmc); | 1824 | struct mmci_host *host = mmc_priv(mmc); |
1783 | clk_prepare_enable(host->clk); | 1825 | clk_prepare_enable(host->clk); |
1826 | mmci_restore(host); | ||
1827 | pinctrl_pm_select_default_state(dev); | ||
1784 | } | 1828 | } |
1785 | 1829 | ||
1786 | return 0; | 1830 | return 0; |
diff --git a/drivers/mmc/host/mmci.h b/drivers/mmc/host/mmci.h index 69080fab6375..168bc72f7a94 100644 --- a/drivers/mmc/host/mmci.h +++ b/drivers/mmc/host/mmci.h | |||
@@ -200,10 +200,6 @@ struct mmci_host { | |||
200 | struct sg_mapping_iter sg_miter; | 200 | struct sg_mapping_iter sg_miter; |
201 | unsigned int size; | 201 | unsigned int size; |
202 | 202 | ||
203 | /* pinctrl handles */ | ||
204 | struct pinctrl *pinctrl; | ||
205 | struct pinctrl_state *pins_default; | ||
206 | |||
207 | #ifdef CONFIG_DMA_ENGINE | 203 | #ifdef CONFIG_DMA_ENGINE |
208 | /* DMA stuff */ | 204 | /* DMA stuff */ |
209 | struct dma_chan *dma_current; | 205 | struct dma_chan *dma_current; |
diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h index 682df0e1954a..63b5eff0a80f 100644 --- a/include/linux/amba/bus.h +++ b/include/linux/amba/bus.h | |||
@@ -21,7 +21,7 @@ | |||
21 | #include <linux/resource.h> | 21 | #include <linux/resource.h> |
22 | #include <linux/regulator/consumer.h> | 22 | #include <linux/regulator/consumer.h> |
23 | 23 | ||
24 | #define AMBA_NR_IRQS 2 | 24 | #define AMBA_NR_IRQS 9 |
25 | #define AMBA_CID 0xb105f00d | 25 | #define AMBA_CID 0xb105f00d |
26 | 26 | ||
27 | struct clk; | 27 | struct clk; |
diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h index 0e5d9ecdb2b6..cac496b1e279 100644 --- a/include/linux/irqchip/arm-gic.h +++ b/include/linux/irqchip/arm-gic.h | |||
@@ -31,6 +31,8 @@ | |||
31 | #define GIC_DIST_TARGET 0x800 | 31 | #define GIC_DIST_TARGET 0x800 |
32 | #define GIC_DIST_CONFIG 0xc00 | 32 | #define GIC_DIST_CONFIG 0xc00 |
33 | #define GIC_DIST_SOFTINT 0xf00 | 33 | #define GIC_DIST_SOFTINT 0xf00 |
34 | #define GIC_DIST_SGI_PENDING_CLEAR 0xf10 | ||
35 | #define GIC_DIST_SGI_PENDING_SET 0xf20 | ||
34 | 36 | ||
35 | #define GICH_HCR 0x0 | 37 | #define GICH_HCR 0x0 |
36 | #define GICH_VTR 0x4 | 38 | #define GICH_VTR 0x4 |
@@ -74,6 +76,11 @@ static inline void gic_init(unsigned int nr, int start, | |||
74 | gic_init_bases(nr, start, dist, cpu, 0, NULL); | 76 | gic_init_bases(nr, start, dist, cpu, 0, NULL); |
75 | } | 77 | } |
76 | 78 | ||
79 | void gic_send_sgi(unsigned int cpu_id, unsigned int irq); | ||
80 | int gic_get_cpu_id(unsigned int cpu); | ||
81 | void gic_migrate_target(unsigned int new_cpu_id); | ||
82 | unsigned long gic_get_sgir_physaddr(void); | ||
83 | |||
77 | #endif /* __ASSEMBLY */ | 84 | #endif /* __ASSEMBLY */ |
78 | 85 | ||
79 | #endif | 86 | #endif |
diff --git a/include/trace/events/power_cpu_migrate.h b/include/trace/events/power_cpu_migrate.h new file mode 100644 index 000000000000..f76dd4de625e --- /dev/null +++ b/include/trace/events/power_cpu_migrate.h | |||
@@ -0,0 +1,67 @@ | |||
1 | #undef TRACE_SYSTEM | ||
2 | #define TRACE_SYSTEM power | ||
3 | |||
4 | #if !defined(_TRACE_POWER_CPU_MIGRATE_H) || defined(TRACE_HEADER_MULTI_READ) | ||
5 | #define _TRACE_POWER_CPU_MIGRATE_H | ||
6 | |||
7 | #include <linux/tracepoint.h> | ||
8 | |||
9 | #define __cpu_migrate_proto \ | ||
10 | TP_PROTO(u64 timestamp, \ | ||
11 | u32 cpu_hwid) | ||
12 | #define __cpu_migrate_args \ | ||
13 | TP_ARGS(timestamp, \ | ||
14 | cpu_hwid) | ||
15 | |||
16 | DECLARE_EVENT_CLASS(cpu_migrate, | ||
17 | |||
18 | __cpu_migrate_proto, | ||
19 | __cpu_migrate_args, | ||
20 | |||
21 | TP_STRUCT__entry( | ||
22 | __field(u64, timestamp ) | ||
23 | __field(u32, cpu_hwid ) | ||
24 | ), | ||
25 | |||
26 | TP_fast_assign( | ||
27 | __entry->timestamp = timestamp; | ||
28 | __entry->cpu_hwid = cpu_hwid; | ||
29 | ), | ||
30 | |||
31 | TP_printk("timestamp=%llu cpu_hwid=0x%08lX", | ||
32 | (unsigned long long)__entry->timestamp, | ||
33 | (unsigned long)__entry->cpu_hwid | ||
34 | ) | ||
35 | ); | ||
36 | |||
37 | #define __define_cpu_migrate_event(name) \ | ||
38 | DEFINE_EVENT(cpu_migrate, cpu_migrate_##name, \ | ||
39 | __cpu_migrate_proto, \ | ||
40 | __cpu_migrate_args \ | ||
41 | ) | ||
42 | |||
43 | __define_cpu_migrate_event(begin); | ||
44 | __define_cpu_migrate_event(finish); | ||
45 | __define_cpu_migrate_event(current); | ||
46 | |||
47 | #undef __define_cpu_migrate | ||
48 | #undef __cpu_migrate_proto | ||
49 | #undef __cpu_migrate_args | ||
50 | |||
51 | /* This file can get included multiple times, TRACE_HEADER_MULTI_READ at top */ | ||
52 | #ifndef _PWR_CPU_MIGRATE_EVENT_AVOID_DOUBLE_DEFINING | ||
53 | #define _PWR_CPU_MIGRATE_EVENT_AVOID_DOUBLE_DEFINING | ||
54 | |||
55 | /* | ||
56 | * Set from_phys_cpu and to_phys_cpu to CPU_MIGRATE_ALL_CPUS to indicate | ||
57 | * a whole-cluster migration: | ||
58 | */ | ||
59 | #define CPU_MIGRATE_ALL_CPUS 0x80000000U | ||
60 | #endif | ||
61 | |||
62 | #endif /* _TRACE_POWER_CPU_MIGRATE_H */ | ||
63 | |||
64 | /* This part must be outside protection */ | ||
65 | #undef TRACE_INCLUDE_FILE | ||
66 | #define TRACE_INCLUDE_FILE power_cpu_migrate | ||
67 | #include <trace/define_trace.h> | ||
diff --git a/tools/perf/arch/arm/Makefile b/tools/perf/arch/arm/Makefile index 15130b50dfe3..fe9b61e322a5 100644 --- a/tools/perf/arch/arm/Makefile +++ b/tools/perf/arch/arm/Makefile | |||
@@ -2,3 +2,6 @@ ifndef NO_DWARF | |||
2 | PERF_HAVE_DWARF_REGS := 1 | 2 | PERF_HAVE_DWARF_REGS := 1 |
3 | LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o | 3 | LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o |
4 | endif | 4 | endif |
5 | ifndef NO_LIBUNWIND | ||
6 | LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind.o | ||
7 | endif | ||
diff --git a/tools/perf/arch/arm/include/perf_regs.h b/tools/perf/arch/arm/include/perf_regs.h new file mode 100644 index 000000000000..2a1cfde66b69 --- /dev/null +++ b/tools/perf/arch/arm/include/perf_regs.h | |||
@@ -0,0 +1,54 @@ | |||
1 | #ifndef ARCH_PERF_REGS_H | ||
2 | #define ARCH_PERF_REGS_H | ||
3 | |||
4 | #include <stdlib.h> | ||
5 | #include "../../util/types.h" | ||
6 | #include <asm/perf_regs.h> | ||
7 | |||
8 | #define PERF_REGS_MASK ((1ULL << PERF_REG_ARM_MAX) - 1) | ||
9 | #define PERF_REG_IP PERF_REG_ARM_PC | ||
10 | #define PERF_REG_SP PERF_REG_ARM_SP | ||
11 | |||
12 | static inline const char *perf_reg_name(int id) | ||
13 | { | ||
14 | switch (id) { | ||
15 | case PERF_REG_ARM_R0: | ||
16 | return "r0"; | ||
17 | case PERF_REG_ARM_R1: | ||
18 | return "r1"; | ||
19 | case PERF_REG_ARM_R2: | ||
20 | return "r2"; | ||
21 | case PERF_REG_ARM_R3: | ||
22 | return "r3"; | ||
23 | case PERF_REG_ARM_R4: | ||
24 | return "r4"; | ||
25 | case PERF_REG_ARM_R5: | ||
26 | return "r5"; | ||
27 | case PERF_REG_ARM_R6: | ||
28 | return "r6"; | ||
29 | case PERF_REG_ARM_R7: | ||
30 | return "r7"; | ||
31 | case PERF_REG_ARM_R8: | ||
32 | return "r8"; | ||
33 | case PERF_REG_ARM_R9: | ||
34 | return "r9"; | ||
35 | case PERF_REG_ARM_R10: | ||
36 | return "r10"; | ||
37 | case PERF_REG_ARM_FP: | ||
38 | return "fp"; | ||
39 | case PERF_REG_ARM_IP: | ||
40 | return "ip"; | ||
41 | case PERF_REG_ARM_SP: | ||
42 | return "sp"; | ||
43 | case PERF_REG_ARM_LR: | ||
44 | return "lr"; | ||
45 | case PERF_REG_ARM_PC: | ||
46 | return "pc"; | ||
47 | default: | ||
48 | return NULL; | ||
49 | } | ||
50 | |||
51 | return NULL; | ||
52 | } | ||
53 | |||
54 | #endif /* ARCH_PERF_REGS_H */ | ||
diff --git a/tools/perf/arch/arm/util/unwind.c b/tools/perf/arch/arm/util/unwind.c new file mode 100644 index 000000000000..da3dc950550c --- /dev/null +++ b/tools/perf/arch/arm/util/unwind.c | |||
@@ -0,0 +1,48 @@ | |||
1 | |||
2 | #include <errno.h> | ||
3 | #include <libunwind.h> | ||
4 | #include "perf_regs.h" | ||
5 | #include "../../util/unwind.h" | ||
6 | |||
7 | int unwind__arch_reg_id(int regnum) | ||
8 | { | ||
9 | switch (regnum) { | ||
10 | case UNW_ARM_R0: | ||
11 | return PERF_REG_ARM_R0; | ||
12 | case UNW_ARM_R1: | ||
13 | return PERF_REG_ARM_R1; | ||
14 | case UNW_ARM_R2: | ||
15 | return PERF_REG_ARM_R2; | ||
16 | case UNW_ARM_R3: | ||
17 | return PERF_REG_ARM_R3; | ||
18 | case UNW_ARM_R4: | ||
19 | return PERF_REG_ARM_R4; | ||
20 | case UNW_ARM_R5: | ||
21 | return PERF_REG_ARM_R5; | ||
22 | case UNW_ARM_R6: | ||
23 | return PERF_REG_ARM_R6; | ||
24 | case UNW_ARM_R7: | ||
25 | return PERF_REG_ARM_R7; | ||
26 | case UNW_ARM_R8: | ||
27 | return PERF_REG_ARM_R8; | ||
28 | case UNW_ARM_R9: | ||
29 | return PERF_REG_ARM_R9; | ||
30 | case UNW_ARM_R10: | ||
31 | return PERF_REG_ARM_R10; | ||
32 | case UNW_ARM_R11: | ||
33 | return PERF_REG_ARM_FP; | ||
34 | case UNW_ARM_R12: | ||
35 | return PERF_REG_ARM_IP; | ||
36 | case UNW_ARM_R13: | ||
37 | return PERF_REG_ARM_SP; | ||
38 | case UNW_ARM_R14: | ||
39 | return PERF_REG_ARM_LR; | ||
40 | case UNW_ARM_R15: | ||
41 | return PERF_REG_ARM_PC; | ||
42 | default: | ||
43 | pr_err("unwind: invalid reg id %d\n", regnum); | ||
44 | return -EINVAL; | ||
45 | } | ||
46 | |||
47 | return -EINVAL; | ||
48 | } | ||
diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 58b2d37ae23a..f5905f2b197d 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile | |||
@@ -31,6 +31,10 @@ ifeq ($(ARCH),x86_64) | |||
31 | endif | 31 | endif |
32 | NO_PERF_REGS := 0 | 32 | NO_PERF_REGS := 0 |
33 | endif | 33 | endif |
34 | ifeq ($(ARCH),arm) | ||
35 | NO_PERF_REGS := 0 | ||
36 | LIBUNWIND_LIBS = -lunwind -lunwind-arm | ||
37 | endif | ||
34 | 38 | ||
35 | ifeq ($(NO_PERF_REGS),0) | 39 | ifeq ($(NO_PERF_REGS),0) |
36 | CFLAGS += -DHAVE_PERF_REGS_SUPPORT | 40 | CFLAGS += -DHAVE_PERF_REGS_SUPPORT |
@@ -305,8 +309,7 @@ ifndef NO_LIBELF | |||
305 | endif # NO_DWARF | 309 | endif # NO_DWARF |
306 | endif # NO_LIBELF | 310 | endif # NO_LIBELF |
307 | 311 | ||
308 | # There's only x86 (both 32 and 64) support for CFI unwind so far | 312 | ifeq ($(LIBUNWIND_LIBS),) |
309 | ifneq ($(ARCH),x86) | ||
310 | NO_LIBUNWIND := 1 | 313 | NO_LIBUNWIND := 1 |
311 | endif | 314 | endif |
312 | 315 | ||
@@ -322,8 +325,13 @@ ifndef NO_LIBUNWIND | |||
322 | endif | 325 | endif |
323 | 326 | ||
324 | ifneq ($(feature-libunwind), 1) | 327 | ifneq ($(feature-libunwind), 1) |
325 | msg := $(warning No libunwind found, disabling post unwind support. Please install libunwind-dev[el] >= 0.99); | 328 | msg := $(warning No libunwind found, disabling post unwind support. Please install libunwind-dev[el] >= 1.1); |
326 | NO_LIBUNWIND := 1 | 329 | NO_LIBUNWIND := 1 |
330 | else | ||
331 | ifneq ($(feature-libunwind-debug-frame), 1) | ||
332 | msg := $(warning No debug_frame support found in libunwind); | ||
333 | CFLAGS += -DNO_LIBUNWIND_DEBUG_FRAME | ||
334 | endif | ||
327 | endif | 335 | endif |
328 | endif | 336 | endif |
329 | 337 | ||
diff --git a/tools/perf/config/feature-checks/Makefile b/tools/perf/config/feature-checks/Makefile index c803f17fb986..e8e195f49a4e 100644 --- a/tools/perf/config/feature-checks/Makefile +++ b/tools/perf/config/feature-checks/Makefile | |||
@@ -23,6 +23,7 @@ FILES= \ | |||
23 | test-libpython-version \ | 23 | test-libpython-version \ |
24 | test-libslang \ | 24 | test-libslang \ |
25 | test-libunwind \ | 25 | test-libunwind \ |
26 | test-libunwind-debug-frame \ | ||
26 | test-on-exit \ | 27 | test-on-exit \ |
27 | test-stackprotector-all \ | 28 | test-stackprotector-all \ |
28 | test-stackprotector \ | 29 | test-stackprotector \ |
diff --git a/tools/perf/config/feature-checks/test-all.c b/tools/perf/config/feature-checks/test-all.c index 59e7a705e146..799865b60772 100644 --- a/tools/perf/config/feature-checks/test-all.c +++ b/tools/perf/config/feature-checks/test-all.c | |||
@@ -49,6 +49,10 @@ | |||
49 | # include "test-libunwind.c" | 49 | # include "test-libunwind.c" |
50 | #undef main | 50 | #undef main |
51 | 51 | ||
52 | #define main main_test_libunwind_debug_frame | ||
53 | # include "test-libunwind-debug-frame.c" | ||
54 | #undef main | ||
55 | |||
52 | #define main main_test_libaudit | 56 | #define main main_test_libaudit |
53 | # include "test-libaudit.c" | 57 | # include "test-libaudit.c" |
54 | #undef main | 58 | #undef main |
diff --git a/tools/perf/config/feature-checks/test-libunwind-debug-frame.c b/tools/perf/config/feature-checks/test-libunwind-debug-frame.c new file mode 100644 index 000000000000..0ef8087a104a --- /dev/null +++ b/tools/perf/config/feature-checks/test-libunwind-debug-frame.c | |||
@@ -0,0 +1,16 @@ | |||
1 | #include <libunwind.h> | ||
2 | #include <stdlib.h> | ||
3 | |||
4 | extern int | ||
5 | UNW_OBJ(dwarf_find_debug_frame) (int found, unw_dyn_info_t *di_debug, | ||
6 | unw_word_t ip, unw_word_t segbase, | ||
7 | const char *obj_name, unw_word_t start, | ||
8 | unw_word_t end); | ||
9 | |||
10 | #define dwarf_find_debug_frame UNW_OBJ(dwarf_find_debug_frame) | ||
11 | |||
12 | int main(void) | ||
13 | { | ||
14 | dwarf_find_debug_frame(0, NULL, 0, 0, NULL, 0, 0); | ||
15 | return 0; | ||
16 | } | ||
diff --git a/tools/perf/util/unwind.c b/tools/perf/util/unwind.c index 2f891f7e70bf..5390d0b8862a 100644 --- a/tools/perf/util/unwind.c +++ b/tools/perf/util/unwind.c | |||
@@ -39,6 +39,15 @@ UNW_OBJ(dwarf_search_unwind_table) (unw_addr_space_t as, | |||
39 | 39 | ||
40 | #define dwarf_search_unwind_table UNW_OBJ(dwarf_search_unwind_table) | 40 | #define dwarf_search_unwind_table UNW_OBJ(dwarf_search_unwind_table) |
41 | 41 | ||
42 | extern int | ||
43 | UNW_OBJ(dwarf_find_debug_frame) (int found, unw_dyn_info_t *di_debug, | ||
44 | unw_word_t ip, | ||
45 | unw_word_t segbase, | ||
46 | const char *obj_name, unw_word_t start, | ||
47 | unw_word_t end); | ||
48 | |||
49 | #define dwarf_find_debug_frame UNW_OBJ(dwarf_find_debug_frame) | ||
50 | |||
42 | #define DW_EH_PE_FORMAT_MASK 0x0f /* format of the encoded value */ | 51 | #define DW_EH_PE_FORMAT_MASK 0x0f /* format of the encoded value */ |
43 | #define DW_EH_PE_APPL_MASK 0x70 /* how the value is to be applied */ | 52 | #define DW_EH_PE_APPL_MASK 0x70 /* how the value is to be applied */ |
44 | 53 | ||
@@ -245,8 +254,9 @@ static int unwind_spec_ehframe(struct dso *dso, struct machine *machine, | |||
245 | return 0; | 254 | return 0; |
246 | } | 255 | } |
247 | 256 | ||
248 | static int read_unwind_spec(struct dso *dso, struct machine *machine, | 257 | static int read_unwind_spec_eh_frame(struct dso *dso, struct machine *machine, |
249 | u64 *table_data, u64 *segbase, u64 *fde_count) | 258 | u64 *table_data, u64 *segbase, |
259 | u64 *fde_count) | ||
250 | { | 260 | { |
251 | int ret = -EINVAL, fd; | 261 | int ret = -EINVAL, fd; |
252 | u64 offset; | 262 | u64 offset; |
@@ -255,6 +265,7 @@ static int read_unwind_spec(struct dso *dso, struct machine *machine, | |||
255 | if (fd < 0) | 265 | if (fd < 0) |
256 | return -EINVAL; | 266 | return -EINVAL; |
257 | 267 | ||
268 | /* Check the .eh_frame section for unwinding info */ | ||
258 | offset = elf_section_offset(fd, ".eh_frame_hdr"); | 269 | offset = elf_section_offset(fd, ".eh_frame_hdr"); |
259 | close(fd); | 270 | close(fd); |
260 | 271 | ||
@@ -263,10 +274,29 @@ static int read_unwind_spec(struct dso *dso, struct machine *machine, | |||
263 | table_data, segbase, | 274 | table_data, segbase, |
264 | fde_count); | 275 | fde_count); |
265 | 276 | ||
266 | /* TODO .debug_frame check if eh_frame_hdr fails */ | ||
267 | return ret; | 277 | return ret; |
268 | } | 278 | } |
269 | 279 | ||
280 | #ifndef NO_LIBUNWIND_DEBUG_FRAME | ||
281 | static int read_unwind_spec_debug_frame(struct dso *dso, | ||
282 | struct machine *machine, u64 *offset) | ||
283 | { | ||
284 | int fd = dso__data_fd(dso, machine); | ||
285 | |||
286 | if (fd < 0) | ||
287 | return -EINVAL; | ||
288 | |||
289 | /* Check the .debug_frame section for unwinding info */ | ||
290 | *offset = elf_section_offset(fd, ".debug_frame"); | ||
291 | close(fd); | ||
292 | |||
293 | if (*offset) | ||
294 | return 0; | ||
295 | |||
296 | return -EINVAL; | ||
297 | } | ||
298 | #endif | ||
299 | |||
270 | static struct map *find_map(unw_word_t ip, struct unwind_info *ui) | 300 | static struct map *find_map(unw_word_t ip, struct unwind_info *ui) |
271 | { | 301 | { |
272 | struct addr_location al; | 302 | struct addr_location al; |
@@ -291,20 +321,33 @@ find_proc_info(unw_addr_space_t as, unw_word_t ip, unw_proc_info_t *pi, | |||
291 | 321 | ||
292 | pr_debug("unwind: find_proc_info dso %s\n", map->dso->name); | 322 | pr_debug("unwind: find_proc_info dso %s\n", map->dso->name); |
293 | 323 | ||
294 | if (read_unwind_spec(map->dso, ui->machine, | 324 | /* Check the .eh_frame section for unwinding info */ |
295 | &table_data, &segbase, &fde_count)) | 325 | if (!read_unwind_spec_eh_frame(map->dso, ui->machine, |
296 | return -EINVAL; | 326 | &table_data, &segbase, &fde_count)) { |
327 | memset(&di, 0, sizeof(di)); | ||
328 | di.format = UNW_INFO_FORMAT_REMOTE_TABLE; | ||
329 | di.start_ip = map->start; | ||
330 | di.end_ip = map->end; | ||
331 | di.u.rti.segbase = map->start + segbase; | ||
332 | di.u.rti.table_data = map->start + table_data; | ||
333 | di.u.rti.table_len = fde_count * sizeof(struct table_entry) | ||
334 | / sizeof(unw_word_t); | ||
335 | return dwarf_search_unwind_table(as, ip, &di, pi, | ||
336 | need_unwind_info, arg); | ||
337 | } | ||
338 | |||
339 | #ifndef NO_LIBUNWIND_DEBUG_FRAME | ||
340 | /* Check the .debug_frame section for unwinding info */ | ||
341 | if (!read_unwind_spec_debug_frame(map->dso, ui->machine, &segbase)) { | ||
342 | memset(&di, 0, sizeof(di)); | ||
343 | dwarf_find_debug_frame(0, &di, ip, 0, map->dso->name, | ||
344 | map->start, map->end); | ||
345 | return dwarf_search_unwind_table(as, ip, &di, pi, | ||
346 | need_unwind_info, arg); | ||
347 | } | ||
348 | #endif | ||
297 | 349 | ||
298 | memset(&di, 0, sizeof(di)); | 350 | return -EINVAL; |
299 | di.format = UNW_INFO_FORMAT_REMOTE_TABLE; | ||
300 | di.start_ip = map->start; | ||
301 | di.end_ip = map->end; | ||
302 | di.u.rti.segbase = map->start + segbase; | ||
303 | di.u.rti.table_data = map->start + table_data; | ||
304 | di.u.rti.table_len = fde_count * sizeof(struct table_entry) | ||
305 | / sizeof(unw_word_t); | ||
306 | return dwarf_search_unwind_table(as, ip, &di, pi, | ||
307 | need_unwind_info, arg); | ||
308 | } | 351 | } |
309 | 352 | ||
310 | static int access_fpreg(unw_addr_space_t __maybe_unused as, | 353 | static int access_fpreg(unw_addr_space_t __maybe_unused as, |