From bcccc50ce8fcc833cfed4bb71ede211a6ef5b84a Mon Sep 17 00:00:00 2001 From: Vincent Sanders Date: Tue, 13 Mar 2012 15:34:17 +0100 Subject: ARM: 7420/1: Improve build environment isolation Increasingly distributions are setting default build environments to have LDFLAGS with hardening options. There seems to be an assumption with those options that LDFLAGS are passed to the compiler frontend rather than used directly with ld (which the kernel build process assumes) To prevent build failures in such environments this patch changes the ARM architecture Makefile to override the LDFLAGS from the environment similar to the behaviour on other common architectures e.g. x86 Signed-off-by: Vincent Sanders Signed-off-by: Russell King --- arch/arm/Makefile | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 0298b00fe241..f8ebf1e97027 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -10,6 +10,9 @@ # # Copyright (C) 1995-2001 by Russell King +# Ensure linker flags are correct +LDFLAGS := + LDFLAGS_vmlinux :=-p --no-undefined -X ifeq ($(CONFIG_CPU_ENDIAN_BE8),y) LDFLAGS_vmlinux += --be8 -- cgit v1.2.2 From bacef661acdb634170a8faddbc1cf28e8f8b9eee Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 25 May 2012 16:20:31 +0100 Subject: x86-64/efi: Use EFI to deal with platform wall clock Other than ix86, x86-64 on EFI so far didn't set the {g,s}et_wallclock accessors to the EFI routines, thus incorrectly using raw RTC accesses instead. Simply removing the #ifdef around the respective code isn't enough, however: While so far early get-time calls were done in physical mode, this doesn't work properly for x86-64, as virtual addresses would still need to be set up for all runtime regions (which wasn't the case on the system I have access to), so instead the patch moves the call to efi_enter_virtual_mode() ahead (which in turn allows to drop all code related to calling efi-get-time in physical mode). Additionally the earlier calling of efi_set_executable() requires the CPA code to cope, i.e. during early boot it must be avoided to call cpa_flush_array(), as the first thing this function does is a BUG_ON(irqs_disabled()). Also make the two EFI functions in question here static - they're not being referenced elsewhere. Signed-off-by: Jan Beulich Tested-by: Matt Fleming Acked-by: Matthew Garrett Cc: Linus Torvalds Cc: Andrew Morton Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/4FBFBF5F020000780008637F@nat28.tlf.novell.com Signed-off-by: Ingo Molnar --- arch/x86/mm/pageattr.c | 10 ++++++---- arch/x86/platform/efi/efi.c | 30 ++++-------------------------- 2 files changed, 10 insertions(+), 30 deletions(-) (limited to 'arch') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index e1ebde315210..ee09aca63998 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -919,11 +919,13 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, /* * On success we use clflush, when the CPU supports it to - * avoid the wbindv. If the CPU does not support it and in the - * error case we fall back to cpa_flush_all (which uses - * wbindv): + * avoid the wbindv. If the CPU does not support it, in the + * error case, and during early boot (for EFI) we fall back + * to cpa_flush_all (which uses wbinvd): */ - if (!ret && cpu_has_clflush) { + if (early_boot_irqs_disabled) + __cpa_flush_all((void *)(long)cache); + else if (!ret && cpu_has_clflush) { if (cpa.flags & (CPA_PAGES_ARRAY | CPA_ARRAY)) { cpa_flush_array(addr, numpages, cache, cpa.flags, pages); diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 92660edaa1e7..2dc29f51e75a 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -234,22 +234,7 @@ static efi_status_t __init phys_efi_set_virtual_address_map( return status; } -static efi_status_t __init phys_efi_get_time(efi_time_t *tm, - efi_time_cap_t *tc) -{ - unsigned long flags; - efi_status_t status; - - spin_lock_irqsave(&rtc_lock, flags); - efi_call_phys_prelog(); - status = efi_call_phys2(efi_phys.get_time, virt_to_phys(tm), - virt_to_phys(tc)); - efi_call_phys_epilog(); - spin_unlock_irqrestore(&rtc_lock, flags); - return status; -} - -int efi_set_rtc_mmss(unsigned long nowtime) +static int efi_set_rtc_mmss(unsigned long nowtime) { int real_seconds, real_minutes; efi_status_t status; @@ -278,7 +263,7 @@ int efi_set_rtc_mmss(unsigned long nowtime) return 0; } -unsigned long efi_get_time(void) +static unsigned long efi_get_time(void) { efi_status_t status; efi_time_t eft; @@ -621,18 +606,13 @@ static int __init efi_runtime_init(void) } /* * We will only need *early* access to the following - * two EFI runtime services before set_virtual_address_map + * EFI runtime service before set_virtual_address_map * is invoked. */ - efi_phys.get_time = (efi_get_time_t *)runtime->get_time; efi_phys.set_virtual_address_map = (efi_set_virtual_address_map_t *) runtime->set_virtual_address_map; - /* - * Make efi_get_time can be called before entering - * virtual mode. - */ - efi.get_time = phys_efi_get_time; + early_iounmap(runtime, sizeof(efi_runtime_services_t)); return 0; @@ -720,12 +700,10 @@ void __init efi_init(void) efi_enabled = 0; return; } -#ifdef CONFIG_X86_32 if (efi_native) { x86_platform.get_wallclock = efi_get_time; x86_platform.set_wallclock = efi_set_rtc_mmss; } -#endif #if EFI_DEBUG print_efi_memmap(); -- cgit v1.2.2 From 2bbc89a8e9c652ee71c6c3b2e0679b7ecedb1a09 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 30 May 2012 15:49:59 +0200 Subject: m68k: Use Kbuild logic to import asm-generic headers Replace all headers files that just include their asm-generic version by Kbuild logic Signed-off-by: Geert Uytterhoeven --- arch/m68k/include/asm/Kbuild | 23 +++++++++++++++++++++++ arch/m68k/include/asm/bitsperlong.h | 1 - arch/m68k/include/asm/cputime.h | 6 ------ arch/m68k/include/asm/device.h | 7 ------- arch/m68k/include/asm/emergency-restart.h | 6 ------ arch/m68k/include/asm/errno.h | 6 ------ arch/m68k/include/asm/futex.h | 6 ------ arch/m68k/include/asm/ioctl.h | 1 - arch/m68k/include/asm/ipcbuf.h | 1 - arch/m68k/include/asm/irq_regs.h | 1 - arch/m68k/include/asm/kdebug.h | 1 - arch/m68k/include/asm/kmap_types.h | 6 ------ arch/m68k/include/asm/kvm_para.h | 1 - arch/m68k/include/asm/local.h | 6 ------ arch/m68k/include/asm/local64.h | 1 - arch/m68k/include/asm/mman.h | 1 - arch/m68k/include/asm/mutex.h | 9 --------- arch/m68k/include/asm/percpu.h | 6 ------ arch/m68k/include/asm/resource.h | 6 ------ arch/m68k/include/asm/scatterlist.h | 6 ------ arch/m68k/include/asm/siginfo.h | 6 ------ arch/m68k/include/asm/statfs.h | 6 ------ arch/m68k/include/asm/topology.h | 6 ------ arch/m68k/include/asm/xor.h | 1 - 24 files changed, 23 insertions(+), 97 deletions(-) delete mode 100644 arch/m68k/include/asm/bitsperlong.h delete mode 100644 arch/m68k/include/asm/cputime.h delete mode 100644 arch/m68k/include/asm/device.h delete mode 100644 arch/m68k/include/asm/emergency-restart.h delete mode 100644 arch/m68k/include/asm/errno.h delete mode 100644 arch/m68k/include/asm/futex.h delete mode 100644 arch/m68k/include/asm/ioctl.h delete mode 100644 arch/m68k/include/asm/ipcbuf.h delete mode 100644 arch/m68k/include/asm/irq_regs.h delete mode 100644 arch/m68k/include/asm/kdebug.h delete mode 100644 arch/m68k/include/asm/kmap_types.h delete mode 100644 arch/m68k/include/asm/kvm_para.h delete mode 100644 arch/m68k/include/asm/local.h delete mode 100644 arch/m68k/include/asm/local64.h delete mode 100644 arch/m68k/include/asm/mman.h delete mode 100644 arch/m68k/include/asm/mutex.h delete mode 100644 arch/m68k/include/asm/percpu.h delete mode 100644 arch/m68k/include/asm/resource.h delete mode 100644 arch/m68k/include/asm/scatterlist.h delete mode 100644 arch/m68k/include/asm/siginfo.h delete mode 100644 arch/m68k/include/asm/statfs.h delete mode 100644 arch/m68k/include/asm/topology.h delete mode 100644 arch/m68k/include/asm/xor.h (limited to 'arch') diff --git a/arch/m68k/include/asm/Kbuild b/arch/m68k/include/asm/Kbuild index eafa2539a8ee..4d1f8fa33091 100644 --- a/arch/m68k/include/asm/Kbuild +++ b/arch/m68k/include/asm/Kbuild @@ -1,4 +1,27 @@ include include/asm-generic/Kbuild.asm header-y += cachectl.h +generic-y += bitsperlong.h +generic-y += cputime.h +generic-y += device.h +generic-y += emergency-restart.h +generic-y += errno.h +generic-y += futex.h +generic-y += ioctl.h +generic-y += ipcbuf.h +generic-y += irq_regs.h +generic-y += kdebug.h +generic-y += kmap_types.h +generic-y += kvm_para.h +generic-y += local64.h +generic-y += local.h +generic-y += mman.h +generic-y += mutex.h +generic-y += percpu.h +generic-y += resource.h +generic-y += scatterlist.h +generic-y += siginfo.h +generic-y += statfs.h +generic-y += topology.h generic-y += word-at-a-time.h +generic-y += xor.h diff --git a/arch/m68k/include/asm/bitsperlong.h b/arch/m68k/include/asm/bitsperlong.h deleted file mode 100644 index 6dc0bb0c13b2..000000000000 --- a/arch/m68k/include/asm/bitsperlong.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/m68k/include/asm/cputime.h b/arch/m68k/include/asm/cputime.h deleted file mode 100644 index c79c5e892305..000000000000 --- a/arch/m68k/include/asm/cputime.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __M68K_CPUTIME_H -#define __M68K_CPUTIME_H - -#include - -#endif /* __M68K_CPUTIME_H */ diff --git a/arch/m68k/include/asm/device.h b/arch/m68k/include/asm/device.h deleted file mode 100644 index d8f9872b0e2d..000000000000 --- a/arch/m68k/include/asm/device.h +++ /dev/null @@ -1,7 +0,0 @@ -/* - * Arch specific extensions to struct device - * - * This file is released under the GPLv2 - */ -#include - diff --git a/arch/m68k/include/asm/emergency-restart.h b/arch/m68k/include/asm/emergency-restart.h deleted file mode 100644 index 108d8c48e42e..000000000000 --- a/arch/m68k/include/asm/emergency-restart.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef _ASM_EMERGENCY_RESTART_H -#define _ASM_EMERGENCY_RESTART_H - -#include - -#endif /* _ASM_EMERGENCY_RESTART_H */ diff --git a/arch/m68k/include/asm/errno.h b/arch/m68k/include/asm/errno.h deleted file mode 100644 index 0d4e188d6ef6..000000000000 --- a/arch/m68k/include/asm/errno.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef _M68K_ERRNO_H -#define _M68K_ERRNO_H - -#include - -#endif /* _M68K_ERRNO_H */ diff --git a/arch/m68k/include/asm/futex.h b/arch/m68k/include/asm/futex.h deleted file mode 100644 index 6a332a9f099c..000000000000 --- a/arch/m68k/include/asm/futex.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef _ASM_FUTEX_H -#define _ASM_FUTEX_H - -#include - -#endif diff --git a/arch/m68k/include/asm/ioctl.h b/arch/m68k/include/asm/ioctl.h deleted file mode 100644 index b279fe06dfe5..000000000000 --- a/arch/m68k/include/asm/ioctl.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/m68k/include/asm/ipcbuf.h b/arch/m68k/include/asm/ipcbuf.h deleted file mode 100644 index 84c7e51cb6d0..000000000000 --- a/arch/m68k/include/asm/ipcbuf.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/m68k/include/asm/irq_regs.h b/arch/m68k/include/asm/irq_regs.h deleted file mode 100644 index 3dd9c0b70270..000000000000 --- a/arch/m68k/include/asm/irq_regs.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/m68k/include/asm/kdebug.h b/arch/m68k/include/asm/kdebug.h deleted file mode 100644 index 6ece1b037665..000000000000 --- a/arch/m68k/include/asm/kdebug.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/m68k/include/asm/kmap_types.h b/arch/m68k/include/asm/kmap_types.h deleted file mode 100644 index 3413cc1390ec..000000000000 --- a/arch/m68k/include/asm/kmap_types.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __ASM_M68K_KMAP_TYPES_H -#define __ASM_M68K_KMAP_TYPES_H - -#include - -#endif /* __ASM_M68K_KMAP_TYPES_H */ diff --git a/arch/m68k/include/asm/kvm_para.h b/arch/m68k/include/asm/kvm_para.h deleted file mode 100644 index 14fab8f0b957..000000000000 --- a/arch/m68k/include/asm/kvm_para.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/m68k/include/asm/local.h b/arch/m68k/include/asm/local.h deleted file mode 100644 index 6c259263e1f0..000000000000 --- a/arch/m68k/include/asm/local.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef _ASM_M68K_LOCAL_H -#define _ASM_M68K_LOCAL_H - -#include - -#endif /* _ASM_M68K_LOCAL_H */ diff --git a/arch/m68k/include/asm/local64.h b/arch/m68k/include/asm/local64.h deleted file mode 100644 index 36c93b5cc239..000000000000 --- a/arch/m68k/include/asm/local64.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/m68k/include/asm/mman.h b/arch/m68k/include/asm/mman.h deleted file mode 100644 index 8eebf89f5ab1..000000000000 --- a/arch/m68k/include/asm/mman.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/m68k/include/asm/mutex.h b/arch/m68k/include/asm/mutex.h deleted file mode 100644 index 458c1f7fbc18..000000000000 --- a/arch/m68k/include/asm/mutex.h +++ /dev/null @@ -1,9 +0,0 @@ -/* - * Pull in the generic implementation for the mutex fastpath. - * - * TODO: implement optimized primitives instead, or leave the generic - * implementation in place, or pick the atomic_xchg() based generic - * implementation. (see asm-generic/mutex-xchg.h for details) - */ - -#include diff --git a/arch/m68k/include/asm/percpu.h b/arch/m68k/include/asm/percpu.h deleted file mode 100644 index 0859d048faf5..000000000000 --- a/arch/m68k/include/asm/percpu.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __ASM_M68K_PERCPU_H -#define __ASM_M68K_PERCPU_H - -#include - -#endif /* __ASM_M68K_PERCPU_H */ diff --git a/arch/m68k/include/asm/resource.h b/arch/m68k/include/asm/resource.h deleted file mode 100644 index e7d35019f337..000000000000 --- a/arch/m68k/include/asm/resource.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef _M68K_RESOURCE_H -#define _M68K_RESOURCE_H - -#include - -#endif /* _M68K_RESOURCE_H */ diff --git a/arch/m68k/include/asm/scatterlist.h b/arch/m68k/include/asm/scatterlist.h deleted file mode 100644 index 312505452a1e..000000000000 --- a/arch/m68k/include/asm/scatterlist.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef _M68K_SCATTERLIST_H -#define _M68K_SCATTERLIST_H - -#include - -#endif /* !(_M68K_SCATTERLIST_H) */ diff --git a/arch/m68k/include/asm/siginfo.h b/arch/m68k/include/asm/siginfo.h deleted file mode 100644 index 851d3d784b53..000000000000 --- a/arch/m68k/include/asm/siginfo.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef _M68K_SIGINFO_H -#define _M68K_SIGINFO_H - -#include - -#endif diff --git a/arch/m68k/include/asm/statfs.h b/arch/m68k/include/asm/statfs.h deleted file mode 100644 index 08d93f14e061..000000000000 --- a/arch/m68k/include/asm/statfs.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef _M68K_STATFS_H -#define _M68K_STATFS_H - -#include - -#endif /* _M68K_STATFS_H */ diff --git a/arch/m68k/include/asm/topology.h b/arch/m68k/include/asm/topology.h deleted file mode 100644 index ca173e9f26ff..000000000000 --- a/arch/m68k/include/asm/topology.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef _ASM_M68K_TOPOLOGY_H -#define _ASM_M68K_TOPOLOGY_H - -#include - -#endif /* _ASM_M68K_TOPOLOGY_H */ diff --git a/arch/m68k/include/asm/xor.h b/arch/m68k/include/asm/xor.h deleted file mode 100644 index c82eb12a5b18..000000000000 --- a/arch/m68k/include/asm/xor.h +++ /dev/null @@ -1 +0,0 @@ -#include -- cgit v1.2.2 From 2ef0d3e64f0ba9aed72c2ddd410aea83cd3261ab Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 30 May 2012 19:50:04 +0200 Subject: m68k: Use asm-generic version of The extra definition for BITS_PER_LONG we had is also indirectly provided by , via and Signed-off-by: Geert Uytterhoeven --- arch/m68k/include/asm/Kbuild | 1 + arch/m68k/include/asm/types.h | 22 ---------------------- 2 files changed, 1 insertion(+), 22 deletions(-) delete mode 100644 arch/m68k/include/asm/types.h (limited to 'arch') diff --git a/arch/m68k/include/asm/Kbuild b/arch/m68k/include/asm/Kbuild index 4d1f8fa33091..064f86743b34 100644 --- a/arch/m68k/include/asm/Kbuild +++ b/arch/m68k/include/asm/Kbuild @@ -23,5 +23,6 @@ generic-y += scatterlist.h generic-y += siginfo.h generic-y += statfs.h generic-y += topology.h +generic-y += types.h generic-y += word-at-a-time.h generic-y += xor.h diff --git a/arch/m68k/include/asm/types.h b/arch/m68k/include/asm/types.h deleted file mode 100644 index 89705adcbd52..000000000000 --- a/arch/m68k/include/asm/types.h +++ /dev/null @@ -1,22 +0,0 @@ -#ifndef _M68K_TYPES_H -#define _M68K_TYPES_H - -/* - * This file is never included by application software unless - * explicitly requested (e.g., via linux/types.h) in which case the - * application is Linux specific so (user-) name space pollution is - * not a major issue. However, for interoperability, libraries still - * need to be careful to avoid a name clashes. - */ -#include - -/* - * These aren't exported outside the kernel to avoid name space clashes - */ -#ifdef __KERNEL__ - -#define BITS_PER_LONG 32 - -#endif /* __KERNEL__ */ - -#endif /* _M68K_TYPES_H */ -- cgit v1.2.2 From 54503b1da74afdb38c730ad6413dbe54ce119134 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 31 May 2012 21:39:13 +0200 Subject: m68knommu: Clean up printing of sections - Remove casts and unneeded address-of ('&') operators, - Use %p to format pointers, %lx to format unsigned longs. Signed-off-by: Geert Uytterhoeven Acked-by: Greg Ungerer --- arch/m68k/kernel/setup_no.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/m68k/kernel/setup_no.c b/arch/m68k/kernel/setup_no.c index 7dc186b7a85f..8a624ecf86b8 100644 --- a/arch/m68k/kernel/setup_no.c +++ b/arch/m68k/kernel/setup_no.c @@ -218,13 +218,10 @@ void __init setup_arch(char **cmdline_p) printk(KERN_INFO "Motorola M5235EVB support (C)2005 Syn-tech Systems, Inc. (Jate Sujjavanich)\n"); #endif - pr_debug("KERNEL -> TEXT=0x%06x-0x%06x DATA=0x%06x-0x%06x " - "BSS=0x%06x-0x%06x\n", (int) &_stext, (int) &_etext, - (int) &_sdata, (int) &_edata, - (int) &_sbss, (int) &_ebss); - pr_debug("MEMORY -> ROMFS=0x%06x-0x%06x MEM=0x%06x-0x%06x\n ", - (int) &_ebss, (int) memory_start, - (int) memory_start, (int) memory_end); + pr_debug("KERNEL -> TEXT=0x%p-0x%p DATA=0x%p-0x%p BSS=0x%p-0x%p\n", + _stext, _etext, _sdata, _edata, _sbss, _ebss); + pr_debug("MEMORY -> ROMFS=0x%p-0x%06lx MEM=0x%06lx-0x%06lx\n ", + _ebss, memory_start, memory_start, memory_end); /* Keep a copy of command line */ *cmdline_p = &command_line[0]; -- cgit v1.2.2 From f2378ed90572aeeecded30fd3754575c2ff1e55f Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 6 Jun 2012 17:24:47 +0200 Subject: m68k: Remove duplicate FPU config option It's also defined in arch/m68k/Kconfig.cpu Signed-off-by: Geert Uytterhoeven Acked-by: Greg Ungerer --- arch/m68k/Kconfig | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch') diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig index 147120128260..bc8cc504dee1 100644 --- a/arch/m68k/Kconfig +++ b/arch/m68k/Kconfig @@ -62,9 +62,6 @@ config CPU_HAS_NO_MULDIV64 config CPU_HAS_ADDRESS_SPACES bool -config FPU - bool - config HZ int default 1000 if CLEOPATRA -- cgit v1.2.2 From 022613e0b0753f1261454dea9b07c2e959363af7 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 6 Jun 2012 17:26:35 +0200 Subject: m68k: Move CPU_HAS_* config options They belong together with the CPU selection Signed-off-by: Geert Uytterhoeven Acked-by: Greg Ungerer --- arch/m68k/Kconfig | 9 --------- arch/m68k/Kconfig.cpu | 9 +++++++++ 2 files changed, 9 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig index bc8cc504dee1..87b504cfe001 100644 --- a/arch/m68k/Kconfig +++ b/arch/m68k/Kconfig @@ -53,15 +53,6 @@ config ZONE_DMA bool default y -config CPU_HAS_NO_BITFIELDS - bool - -config CPU_HAS_NO_MULDIV64 - bool - -config CPU_HAS_ADDRESS_SPACES - bool - config HZ int default 1000 if CLEOPATRA diff --git a/arch/m68k/Kconfig.cpu b/arch/m68k/Kconfig.cpu index 2b53254ad994..6cd8c1a32893 100644 --- a/arch/m68k/Kconfig.cpu +++ b/arch/m68k/Kconfig.cpu @@ -360,6 +360,15 @@ config NODES_SHIFT default "3" depends on !SINGLE_MEMORY_CHUNK +config CPU_HAS_NO_BITFIELDS + bool + +config CPU_HAS_NO_MULDIV64 + bool + +config CPU_HAS_ADDRESS_SPACES + bool + config FPU bool -- cgit v1.2.2 From 5df58f3aac58bb0f08fa205a006c6b7840cd7ac4 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 6 Jun 2012 18:35:13 +0200 Subject: m68k: delay, muldi3 - Use CONFIG_CPU_HAS_NO_MULDIV64 instead of open coding CONFIG_M68000 || CONFIG_COLDFIRE Signed-off-by: Geert Uytterhoeven Acked-by: Greg Ungerer --- arch/m68k/include/asm/delay.h | 2 +- arch/m68k/lib/muldi3.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/m68k/include/asm/delay.h b/arch/m68k/include/asm/delay.h index 9c09becfd4c9..12d8fe4f1d30 100644 --- a/arch/m68k/include/asm/delay.h +++ b/arch/m68k/include/asm/delay.h @@ -43,7 +43,7 @@ static inline void __delay(unsigned long loops) extern void __bad_udelay(void); -#if defined(CONFIG_M68000) || defined(CONFIG_COLDFIRE) +#ifdef CONFIG_CPU_HAS_NO_MULDIV64 /* * The simpler m68k and ColdFire processors do not have a 32*32->64 * multiply instruction. So we need to handle them a little differently. diff --git a/arch/m68k/lib/muldi3.c b/arch/m68k/lib/muldi3.c index 79e928a525d0..ee5f0b1b5c5d 100644 --- a/arch/m68k/lib/muldi3.c +++ b/arch/m68k/lib/muldi3.c @@ -19,7 +19,7 @@ along with GNU CC; see the file COPYING. If not, write to the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -#if defined(CONFIG_M68000) || defined(CONFIG_COLDFIRE) +#ifdef CONFIG_CPU_HAS_NO_MULDIV64 #define SI_TYPE_SIZE 32 #define __BITS4 (SI_TYPE_SIZE / 4) -- cgit v1.2.2 From 9f1f118035b2d28bb741844537b9ee87705b4c6e Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 6 Jun 2012 19:37:52 +0200 Subject: m68k: Introduce config option CPU_HAS_NO_UNALIGNED Use CONFIG_CPU_HAS_NO_UNALIGNED instead of open coding CONFIG_M68000 || CONFIG_COLDFIRE Signed-off-by: Geert Uytterhoeven Acked-by: Greg Ungerer --- arch/m68k/Kconfig.cpu | 5 +++++ arch/m68k/include/asm/unaligned.h | 4 ++-- 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/m68k/Kconfig.cpu b/arch/m68k/Kconfig.cpu index 6cd8c1a32893..c4ed65081475 100644 --- a/arch/m68k/Kconfig.cpu +++ b/arch/m68k/Kconfig.cpu @@ -27,6 +27,7 @@ config COLDFIRE select ARCH_HAVE_CUSTOM_GPIO_H select CPU_HAS_NO_BITFIELDS select CPU_HAS_NO_MULDIV64 + select CPU_HAS_NO_UNALIGNED select GENERIC_CSUM endchoice @@ -37,6 +38,7 @@ config M68000 bool select CPU_HAS_NO_BITFIELDS select CPU_HAS_NO_MULDIV64 + select CPU_HAS_NO_UNALIGNED select GENERIC_CSUM help The Freescale (was Motorola) 68000 CPU is the first generation of @@ -366,6 +368,9 @@ config CPU_HAS_NO_BITFIELDS config CPU_HAS_NO_MULDIV64 bool +config CPU_HAS_NO_UNALIGNED + bool + config CPU_HAS_ADDRESS_SPACES bool diff --git a/arch/m68k/include/asm/unaligned.h b/arch/m68k/include/asm/unaligned.h index f4043ae63db1..2b3ca0bf7a0d 100644 --- a/arch/m68k/include/asm/unaligned.h +++ b/arch/m68k/include/asm/unaligned.h @@ -2,7 +2,7 @@ #define _ASM_M68K_UNALIGNED_H -#if defined(CONFIG_COLDFIRE) || defined(CONFIG_M68000) +#ifdef CONFIG_CPU_HAS_NO_UNALIGNED #include #include #include @@ -12,7 +12,7 @@ #else /* - * The m68k can do unaligned accesses itself. + * The m68k can do unaligned accesses itself. */ #include #include -- cgit v1.2.2 From 7df0d27f3d78282e9ab97da060e85f53a5f556b5 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 6 Jun 2012 19:39:39 +0200 Subject: m68k: CPU32 does not support unaligned accesses Hence select CPU_HAS_NO_UNALIGNED Reported-by: Philippe De Muyter Signed-off-by: Geert Uytterhoeven Acked-by: Greg Ungerer --- arch/m68k/Kconfig.cpu | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/m68k/Kconfig.cpu b/arch/m68k/Kconfig.cpu index c4ed65081475..d4f3a9839cff 100644 --- a/arch/m68k/Kconfig.cpu +++ b/arch/m68k/Kconfig.cpu @@ -50,6 +50,7 @@ config M68000 config MCPU32 bool select CPU_HAS_NO_BITFIELDS + select CPU_HAS_NO_UNALIGNED help The Freescale (was then Motorola) CPU32 is a CPU core that is based on the 68020 processor. For the most part it is used in -- cgit v1.2.2 From 65df57743924c3d13e1fa1bcf5bf70fe874fcdfd Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Thu, 24 May 2012 11:13:42 +0200 Subject: crypto: sha1 - use Kbuild supplied flags for AVX test Commit ea4d26ae ("raid5: add AVX optimized RAID5 checksumming") introduced x86/ arch wide defines for AFLAGS and CFLAGS indicating AVX support in binutils based on the same test we have in x86/crypto/ right now. To minimize duplication drop our implementation in favour to the one in x86/. Signed-off-by: Mathias Krause Signed-off-by: Herbert Xu --- arch/x86/crypto/Makefile | 7 ------- arch/x86/crypto/sha1_ssse3_asm.S | 2 +- arch/x86/crypto/sha1_ssse3_glue.c | 6 +++--- 3 files changed, 4 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index e191ac048b59..479f95a744f7 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -34,12 +34,5 @@ salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o serpent-sse2-x86_64-y := serpent-sse2-x86_64-asm_64.o serpent_sse2_glue.o aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o - ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o - -# enable AVX support only when $(AS) can actually assemble the instructions -ifeq ($(call as-instr,vpxor %xmm0$(comma)%xmm1$(comma)%xmm2,yes,no),yes) -AFLAGS_sha1_ssse3_asm.o += -DSHA1_ENABLE_AVX_SUPPORT -CFLAGS_sha1_ssse3_glue.o += -DSHA1_ENABLE_AVX_SUPPORT -endif sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o diff --git a/arch/x86/crypto/sha1_ssse3_asm.S b/arch/x86/crypto/sha1_ssse3_asm.S index b2c2f57d70e8..49d6987a73d9 100644 --- a/arch/x86/crypto/sha1_ssse3_asm.S +++ b/arch/x86/crypto/sha1_ssse3_asm.S @@ -468,7 +468,7 @@ W_PRECALC_SSSE3 */ SHA1_VECTOR_ASM sha1_transform_ssse3 -#ifdef SHA1_ENABLE_AVX_SUPPORT +#ifdef CONFIG_AS_AVX .macro W_PRECALC_AVX diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c index f916499d0abe..4a11a9d72451 100644 --- a/arch/x86/crypto/sha1_ssse3_glue.c +++ b/arch/x86/crypto/sha1_ssse3_glue.c @@ -35,7 +35,7 @@ asmlinkage void sha1_transform_ssse3(u32 *digest, const char *data, unsigned int rounds); -#ifdef SHA1_ENABLE_AVX_SUPPORT +#ifdef CONFIG_AS_AVX asmlinkage void sha1_transform_avx(u32 *digest, const char *data, unsigned int rounds); #endif @@ -184,7 +184,7 @@ static struct shash_alg alg = { } }; -#ifdef SHA1_ENABLE_AVX_SUPPORT +#ifdef CONFIG_AS_AVX static bool __init avx_usable(void) { u64 xcr0; @@ -209,7 +209,7 @@ static int __init sha1_ssse3_mod_init(void) if (cpu_has_ssse3) sha1_transform_asm = sha1_transform_ssse3; -#ifdef SHA1_ENABLE_AVX_SUPPORT +#ifdef CONFIG_AS_AVX /* allow AVX to override SSSE3, it's a little faster */ if (avx_usable()) sha1_transform_asm = sha1_transform_avx; -- cgit v1.2.2 From 107778b592576c0c8e8d2ca7a2aa5415a4908223 Mon Sep 17 00:00:00 2001 From: Johannes Goetzfried Date: Mon, 28 May 2012 15:54:24 +0200 Subject: crypto: twofish - add x86_64/avx assembler implementation This patch adds a x86_64/avx assembler implementation of the Twofish block cipher. The implementation processes eight blocks in parallel (two 4 block chunk AVX operations). The table-lookups are done in general-purpose registers. For small blocksizes the 3way-parallel functions from the twofish-x86_64-3way module are called. A good performance increase is provided for blocksizes greater or equal to 128B. Patch has been tested with tcrypt and automated filesystem tests. Tcrypt benchmark results: Intel Core i5-2500 CPU (fam:6, model:42, step:7) twofish-avx-x86_64 vs. twofish-x86_64-3way 128bit key: (lrw:256bit) (xts:256bit) size ecb-enc ecb-dec cbc-enc cbc-dec ctr-enc ctr-dec lrw-enc lrw-dec xts-enc xts-dec 16B 0.96x 0.97x 1.00x 0.95x 0.97x 0.97x 0.96x 0.95x 0.95x 0.98x 64B 0.99x 0.99x 1.00x 0.99x 0.98x 0.98x 0.99x 0.98x 0.99x 0.98x 256B 1.20x 1.21x 1.00x 1.19x 1.15x 1.14x 1.19x 1.20x 1.18x 1.19x 1024B 1.29x 1.30x 1.00x 1.28x 1.23x 1.24x 1.26x 1.28x 1.26x 1.27x 8192B 1.31x 1.32x 1.00x 1.31x 1.25x 1.25x 1.28x 1.29x 1.28x 1.30x 256bit key: (lrw:384bit) (xts:512bit) size ecb-enc ecb-dec cbc-enc cbc-dec ctr-enc ctr-dec lrw-enc lrw-dec xts-enc xts-dec 16B 0.96x 0.96x 1.00x 0.96x 0.97x 0.98x 0.95x 0.95x 0.95x 0.96x 64B 1.00x 0.99x 1.00x 0.98x 0.98x 1.01x 0.98x 0.98x 0.98x 0.98x 256B 1.20x 1.21x 1.00x 1.21x 1.15x 1.15x 1.19x 1.20x 1.18x 1.19x 1024B 1.29x 1.30x 1.00x 1.28x 1.23x 1.23x 1.26x 1.27x 1.26x 1.27x 8192B 1.31x 1.33x 1.00x 1.31x 1.26x 1.26x 1.29x 1.29x 1.28x 1.30x twofish-avx-x86_64 vs aes-asm (8kB block): 128bit 256bit ecb-enc 1.19x 1.63x ecb-dec 1.18x 1.62x cbc-enc 0.75x 1.03x cbc-dec 1.23x 1.67x ctr-enc 1.24x 1.65x ctr-dec 1.24x 1.65x lrw-enc 1.15x 1.53x lrw-dec 1.14x 1.52x xts-enc 1.16x 1.56x xts-dec 1.16x 1.56x Signed-off-by: Johannes Goetzfried Signed-off-by: Herbert Xu --- arch/x86/crypto/Makefile | 2 + arch/x86/crypto/twofish-avx-x86_64-asm_64.S | 301 ++++++++ arch/x86/crypto/twofish_avx_glue.c | 1086 +++++++++++++++++++++++++++ arch/x86/crypto/twofish_glue_3way.c | 2 + 4 files changed, 1391 insertions(+) create mode 100644 arch/x86/crypto/twofish-avx-x86_64-asm_64.S create mode 100644 arch/x86/crypto/twofish_avx_glue.c (limited to 'arch') diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index 479f95a744f7..3420feef0c70 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -12,6 +12,7 @@ obj-$(CONFIG_CRYPTO_CAMELLIA_X86_64) += camellia-x86_64.o obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o +obj-$(CONFIG_CRYPTO_TWOFISH_AVX_X86_64) += twofish-avx-x86_64.o obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o obj-$(CONFIG_CRYPTO_SERPENT_SSE2_X86_64) += serpent-sse2-x86_64.o obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o @@ -30,6 +31,7 @@ camellia-x86_64-y := camellia-x86_64-asm_64.o camellia_glue.o blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o +twofish-avx-x86_64-y := twofish-avx-x86_64-asm_64.o twofish_avx_glue.o salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o serpent-sse2-x86_64-y := serpent-sse2-x86_64-asm_64.o serpent_sse2_glue.o diff --git a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S new file mode 100644 index 000000000000..fc31b89ba4c3 --- /dev/null +++ b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S @@ -0,0 +1,301 @@ +/* + * Twofish Cipher 8-way parallel algorithm (AVX/x86_64) + * + * Copyright (C) 2012 Johannes Goetzfried + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + * USA + * + */ + +.file "twofish-avx-x86_64-asm_64.S" +.text + +/* structure of crypto context */ +#define s0 0 +#define s1 1024 +#define s2 2048 +#define s3 3072 +#define w 4096 +#define k 4128 + +/********************************************************************** + 8-way AVX twofish + **********************************************************************/ +#define CTX %rdi + +#define RA1 %xmm0 +#define RB1 %xmm1 +#define RC1 %xmm2 +#define RD1 %xmm3 + +#define RA2 %xmm4 +#define RB2 %xmm5 +#define RC2 %xmm6 +#define RD2 %xmm7 + +#define RX %xmm8 +#define RY %xmm9 + +#define RK1 %xmm10 +#define RK2 %xmm11 + +#define RID1 %rax +#define RID1b %al +#define RID2 %rbx +#define RID2b %bl + +#define RGI1 %rdx +#define RGI1bl %dl +#define RGI1bh %dh +#define RGI2 %rcx +#define RGI2bl %cl +#define RGI2bh %ch + +#define RGS1 %r8 +#define RGS1d %r8d +#define RGS2 %r9 +#define RGS2d %r9d +#define RGS3 %r10 +#define RGS3d %r10d + + +#define lookup_32bit(t0, t1, t2, t3, src, dst) \ + movb src ## bl, RID1b; \ + movb src ## bh, RID2b; \ + movl t0(CTX, RID1, 4), dst ## d; \ + xorl t1(CTX, RID2, 4), dst ## d; \ + shrq $16, src; \ + movb src ## bl, RID1b; \ + movb src ## bh, RID2b; \ + xorl t2(CTX, RID1, 4), dst ## d; \ + xorl t3(CTX, RID2, 4), dst ## d; + +#define G(a, x, t0, t1, t2, t3) \ + vmovq a, RGI1; \ + vpsrldq $8, a, x; \ + vmovq x, RGI2; \ + \ + lookup_32bit(t0, t1, t2, t3, RGI1, RGS1); \ + shrq $16, RGI1; \ + lookup_32bit(t0, t1, t2, t3, RGI1, RGS2); \ + shlq $32, RGS2; \ + orq RGS1, RGS2; \ + \ + lookup_32bit(t0, t1, t2, t3, RGI2, RGS1); \ + shrq $16, RGI2; \ + lookup_32bit(t0, t1, t2, t3, RGI2, RGS3); \ + shlq $32, RGS3; \ + orq RGS1, RGS3; \ + \ + vmovq RGS2, x; \ + vpinsrq $1, RGS3, x, x; + +#define encround(a, b, c, d, x, y) \ + G(a, x, s0, s1, s2, s3); \ + G(b, y, s1, s2, s3, s0); \ + vpaddd x, y, x; \ + vpaddd y, x, y; \ + vpaddd x, RK1, x; \ + vpaddd y, RK2, y; \ + vpxor x, c, c; \ + vpsrld $1, c, x; \ + vpslld $(32 - 1), c, c; \ + vpor c, x, c; \ + vpslld $1, d, x; \ + vpsrld $(32 - 1), d, d; \ + vpor d, x, d; \ + vpxor d, y, d; + +#define decround(a, b, c, d, x, y) \ + G(a, x, s0, s1, s2, s3); \ + G(b, y, s1, s2, s3, s0); \ + vpaddd x, y, x; \ + vpaddd y, x, y; \ + vpaddd y, RK2, y; \ + vpxor d, y, d; \ + vpsrld $1, d, y; \ + vpslld $(32 - 1), d, d; \ + vpor d, y, d; \ + vpslld $1, c, y; \ + vpsrld $(32 - 1), c, c; \ + vpor c, y, c; \ + vpaddd x, RK1, x; \ + vpxor x, c, c; + +#define encrypt_round(n, a, b, c, d) \ + vbroadcastss (k+4*(2*(n)))(CTX), RK1; \ + vbroadcastss (k+4*(2*(n)+1))(CTX), RK2; \ + encround(a ## 1, b ## 1, c ## 1, d ## 1, RX, RY); \ + encround(a ## 2, b ## 2, c ## 2, d ## 2, RX, RY); + +#define decrypt_round(n, a, b, c, d) \ + vbroadcastss (k+4*(2*(n)))(CTX), RK1; \ + vbroadcastss (k+4*(2*(n)+1))(CTX), RK2; \ + decround(a ## 1, b ## 1, c ## 1, d ## 1, RX, RY); \ + decround(a ## 2, b ## 2, c ## 2, d ## 2, RX, RY); + +#define encrypt_cycle(n) \ + encrypt_round((2*n), RA, RB, RC, RD); \ + encrypt_round(((2*n) + 1), RC, RD, RA, RB); + +#define decrypt_cycle(n) \ + decrypt_round(((2*n) + 1), RC, RD, RA, RB); \ + decrypt_round((2*n), RA, RB, RC, RD); + + +#define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ + vpunpckldq x1, x0, t0; \ + vpunpckhdq x1, x0, t2; \ + vpunpckldq x3, x2, t1; \ + vpunpckhdq x3, x2, x3; \ + \ + vpunpcklqdq t1, t0, x0; \ + vpunpckhqdq t1, t0, x1; \ + vpunpcklqdq x3, t2, x2; \ + vpunpckhqdq x3, t2, x3; + +#define inpack_blocks(in, x0, x1, x2, x3, wkey, t0, t1, t2) \ + vpxor (0*4*4)(in), wkey, x0; \ + vpxor (1*4*4)(in), wkey, x1; \ + vpxor (2*4*4)(in), wkey, x2; \ + vpxor (3*4*4)(in), wkey, x3; \ + \ + transpose_4x4(x0, x1, x2, x3, t0, t1, t2) + +#define outunpack_blocks(out, x0, x1, x2, x3, wkey, t0, t1, t2) \ + transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ + \ + vpxor x0, wkey, x0; \ + vmovdqu x0, (0*4*4)(out); \ + vpxor x1, wkey, x1; \ + vmovdqu x1, (1*4*4)(out); \ + vpxor x2, wkey, x2; \ + vmovdqu x2, (2*4*4)(out); \ + vpxor x3, wkey, x3; \ + vmovdqu x3, (3*4*4)(out); + +#define outunpack_xor_blocks(out, x0, x1, x2, x3, wkey, t0, t1, t2) \ + transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ + \ + vpxor x0, wkey, x0; \ + vpxor (0*4*4)(out), x0, x0; \ + vmovdqu x0, (0*4*4)(out); \ + vpxor x1, wkey, x1; \ + vpxor (1*4*4)(out), x1, x1; \ + vmovdqu x1, (1*4*4)(out); \ + vpxor x2, wkey, x2; \ + vpxor (2*4*4)(out), x2, x2; \ + vmovdqu x2, (2*4*4)(out); \ + vpxor x3, wkey, x3; \ + vpxor (3*4*4)(out), x3, x3; \ + vmovdqu x3, (3*4*4)(out); + +.align 8 +.global __twofish_enc_blk_8way +.type __twofish_enc_blk_8way,@function; + +__twofish_enc_blk_8way: + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src + * %rcx: bool, if true: xor output + */ + + pushq %rbx; + pushq %rcx; + + vmovdqu w(CTX), RK1; + + leaq (4*4*4)(%rdx), %rax; + inpack_blocks(%rdx, RA1, RB1, RC1, RD1, RK1, RX, RY, RK2); + inpack_blocks(%rax, RA2, RB2, RC2, RD2, RK1, RX, RY, RK2); + + xorq RID1, RID1; + xorq RID2, RID2; + + encrypt_cycle(0); + encrypt_cycle(1); + encrypt_cycle(2); + encrypt_cycle(3); + encrypt_cycle(4); + encrypt_cycle(5); + encrypt_cycle(6); + encrypt_cycle(7); + + vmovdqu (w+4*4)(CTX), RK1; + + popq %rcx; + popq %rbx; + + leaq (4*4*4)(%rsi), %rax; + leaq (4*4*4)(%rax), %rdx; + + testb %cl, %cl; + jnz __enc_xor8; + + outunpack_blocks(%rsi, RC1, RD1, RA1, RB1, RK1, RX, RY, RK2); + outunpack_blocks(%rax, RC2, RD2, RA2, RB2, RK1, RX, RY, RK2); + + ret; + +__enc_xor8: + outunpack_xor_blocks(%rsi, RC1, RD1, RA1, RB1, RK1, RX, RY, RK2); + outunpack_xor_blocks(%rax, RC2, RD2, RA2, RB2, RK1, RX, RY, RK2); + + ret; + +.align 8 +.global twofish_dec_blk_8way +.type twofish_dec_blk_8way,@function; + +twofish_dec_blk_8way: + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src + */ + + pushq %rbx; + + vmovdqu (w+4*4)(CTX), RK1; + + leaq (4*4*4)(%rdx), %rax; + inpack_blocks(%rdx, RC1, RD1, RA1, RB1, RK1, RX, RY, RK2); + inpack_blocks(%rax, RC2, RD2, RA2, RB2, RK1, RX, RY, RK2); + + xorq RID1, RID1; + xorq RID2, RID2; + + decrypt_cycle(7); + decrypt_cycle(6); + decrypt_cycle(5); + decrypt_cycle(4); + decrypt_cycle(3); + decrypt_cycle(2); + decrypt_cycle(1); + decrypt_cycle(0); + + vmovdqu (w)(CTX), RK1; + + popq %rbx; + + leaq (4*4*4)(%rsi), %rax; + outunpack_blocks(%rsi, RA1, RB1, RC1, RD1, RK1, RX, RY, RK2); + outunpack_blocks(%rax, RA2, RB2, RC2, RD2, RK1, RX, RY, RK2); + + ret; diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c new file mode 100644 index 000000000000..599f19e4bef6 --- /dev/null +++ b/arch/x86/crypto/twofish_avx_glue.c @@ -0,0 +1,1086 @@ +/* + * Glue Code for AVX assembler version of Twofish Cipher + * + * Copyright (C) 2012 Johannes Goetzfried + * + * + * Glue code based on serpent_sse2_glue.c by: + * Copyright (C) 2011 Jussi Kivilinna + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + * USA + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#define TWOFISH_PARALLEL_BLOCKS 8 + +/* regular block cipher functions from twofish_x86_64 module */ +asmlinkage void twofish_enc_blk(struct twofish_ctx *ctx, u8 *dst, + const u8 *src); +asmlinkage void twofish_dec_blk(struct twofish_ctx *ctx, u8 *dst, + const u8 *src); + +/* 3-way parallel cipher functions from twofish_x86_64-3way module */ +asmlinkage void __twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, + const u8 *src, bool xor); +asmlinkage void twofish_dec_blk_3way(struct twofish_ctx *ctx, u8 *dst, + const u8 *src); + +static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, + const u8 *src) +{ + __twofish_enc_blk_3way(ctx, dst, src, false); +} + +static inline void twofish_enc_blk_3way_xor(struct twofish_ctx *ctx, u8 *dst, + const u8 *src) +{ + __twofish_enc_blk_3way(ctx, dst, src, true); +} + +/* 8-way parallel cipher functions */ +asmlinkage void __twofish_enc_blk_8way(struct twofish_ctx *ctx, u8 *dst, + const u8 *src, bool xor); +asmlinkage void twofish_dec_blk_8way(struct twofish_ctx *ctx, u8 *dst, + const u8 *src); + +static inline void twofish_enc_blk_xway(struct twofish_ctx *ctx, u8 *dst, + const u8 *src) +{ + __twofish_enc_blk_8way(ctx, dst, src, false); +} + +static inline void twofish_enc_blk_xway_xor(struct twofish_ctx *ctx, u8 *dst, + const u8 *src) +{ + __twofish_enc_blk_8way(ctx, dst, src, true); +} + +static inline void twofish_dec_blk_xway(struct twofish_ctx *ctx, u8 *dst, + const u8 *src) +{ + twofish_dec_blk_8way(ctx, dst, src); +} + + + +struct async_twofish_ctx { + struct cryptd_ablkcipher *cryptd_tfm; +}; + +static inline bool twofish_fpu_begin(bool fpu_enabled, unsigned int nbytes) +{ + if (fpu_enabled) + return true; + + /* AVX is only used when chunk to be processed is large enough, so + * do not enable FPU until it is necessary. + */ + if (nbytes < TF_BLOCK_SIZE * TWOFISH_PARALLEL_BLOCKS) + return false; + + kernel_fpu_begin(); + return true; +} + +static inline void twofish_fpu_end(bool fpu_enabled) +{ + if (fpu_enabled) + kernel_fpu_end(); +} + +static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, + bool enc) +{ + bool fpu_enabled = false; + struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + const unsigned int bsize = TF_BLOCK_SIZE; + unsigned int nbytes; + int err; + + err = blkcipher_walk_virt(desc, walk); + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + + while ((nbytes = walk->nbytes)) { + u8 *wsrc = walk->src.virt.addr; + u8 *wdst = walk->dst.virt.addr; + + fpu_enabled = twofish_fpu_begin(fpu_enabled, nbytes); + + /* Process multi-block batch */ + if (nbytes >= bsize * TWOFISH_PARALLEL_BLOCKS) { + do { + if (enc) + twofish_enc_blk_xway(ctx, wdst, wsrc); + else + twofish_dec_blk_xway(ctx, wdst, wsrc); + + wsrc += bsize * TWOFISH_PARALLEL_BLOCKS; + wdst += bsize * TWOFISH_PARALLEL_BLOCKS; + nbytes -= bsize * TWOFISH_PARALLEL_BLOCKS; + } while (nbytes >= bsize * TWOFISH_PARALLEL_BLOCKS); + + if (nbytes < bsize) + goto done; + } + + /* Process three block batch */ + if (nbytes >= bsize * 3) { + do { + if (enc) + twofish_enc_blk_3way(ctx, wdst, wsrc); + else + twofish_dec_blk_3way(ctx, wdst, wsrc); + + wsrc += bsize * 3; + wdst += bsize * 3; + nbytes -= bsize * 3; + } while (nbytes >= bsize * 3); + + if (nbytes < bsize) + goto done; + } + + /* Handle leftovers */ + do { + if (enc) + twofish_enc_blk(ctx, wdst, wsrc); + else + twofish_dec_blk(ctx, wdst, wsrc); + + wsrc += bsize; + wdst += bsize; + nbytes -= bsize; + } while (nbytes >= bsize); + +done: + err = blkcipher_walk_done(desc, walk, nbytes); + } + + twofish_fpu_end(fpu_enabled); + return err; +} + +static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return ecb_crypt(desc, &walk, true); +} + +static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return ecb_crypt(desc, &walk, false); +} + +static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, + struct blkcipher_walk *walk) +{ + struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + const unsigned int bsize = TF_BLOCK_SIZE; + unsigned int nbytes = walk->nbytes; + u128 *src = (u128 *)walk->src.virt.addr; + u128 *dst = (u128 *)walk->dst.virt.addr; + u128 *iv = (u128 *)walk->iv; + + do { + u128_xor(dst, src, iv); + twofish_enc_blk(ctx, (u8 *)dst, (u8 *)dst); + iv = dst; + + src += 1; + dst += 1; + nbytes -= bsize; + } while (nbytes >= bsize); + + u128_xor((u128 *)walk->iv, (u128 *)walk->iv, iv); + return nbytes; +} + +static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + while ((nbytes = walk.nbytes)) { + nbytes = __cbc_encrypt(desc, &walk); + err = blkcipher_walk_done(desc, &walk, nbytes); + } + + return err; +} + +static unsigned int __cbc_decrypt(struct blkcipher_desc *desc, + struct blkcipher_walk *walk) +{ + struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + const unsigned int bsize = TF_BLOCK_SIZE; + unsigned int nbytes = walk->nbytes; + u128 *src = (u128 *)walk->src.virt.addr; + u128 *dst = (u128 *)walk->dst.virt.addr; + u128 ivs[TWOFISH_PARALLEL_BLOCKS - 1]; + u128 last_iv; + int i; + + /* Start of the last block. */ + src += nbytes / bsize - 1; + dst += nbytes / bsize - 1; + + last_iv = *src; + + /* Process multi-block batch */ + if (nbytes >= bsize * TWOFISH_PARALLEL_BLOCKS) { + do { + nbytes -= bsize * (TWOFISH_PARALLEL_BLOCKS - 1); + src -= TWOFISH_PARALLEL_BLOCKS - 1; + dst -= TWOFISH_PARALLEL_BLOCKS - 1; + + for (i = 0; i < TWOFISH_PARALLEL_BLOCKS - 1; i++) + ivs[i] = src[i]; + + twofish_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src); + + for (i = 0; i < TWOFISH_PARALLEL_BLOCKS - 1; i++) + u128_xor(dst + (i + 1), dst + (i + 1), ivs + i); + + nbytes -= bsize; + if (nbytes < bsize) + goto done; + + u128_xor(dst, dst, src - 1); + src -= 1; + dst -= 1; + } while (nbytes >= bsize * TWOFISH_PARALLEL_BLOCKS); + + if (nbytes < bsize) + goto done; + } + + /* Process three block batch */ + if (nbytes >= bsize * 3) { + do { + nbytes -= bsize * (3 - 1); + src -= 3 - 1; + dst -= 3 - 1; + + ivs[0] = src[0]; + ivs[1] = src[1]; + + twofish_dec_blk_3way(ctx, (u8 *)dst, (u8 *)src); + + u128_xor(dst + 1, dst + 1, ivs + 0); + u128_xor(dst + 2, dst + 2, ivs + 1); + + nbytes -= bsize; + if (nbytes < bsize) + goto done; + + u128_xor(dst, dst, src - 1); + src -= 1; + dst -= 1; + } while (nbytes >= bsize * 3); + + if (nbytes < bsize) + goto done; + } + + /* Handle leftovers */ + for (;;) { + twofish_dec_blk(ctx, (u8 *)dst, (u8 *)src); + + nbytes -= bsize; + if (nbytes < bsize) + break; + + u128_xor(dst, dst, src - 1); + src -= 1; + dst -= 1; + } + +done: + u128_xor(dst, dst, (u128 *)walk->iv); + *(u128 *)walk->iv = last_iv; + + return nbytes; +} + +static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + bool fpu_enabled = false; + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + + while ((nbytes = walk.nbytes)) { + fpu_enabled = twofish_fpu_begin(fpu_enabled, nbytes); + nbytes = __cbc_decrypt(desc, &walk); + err = blkcipher_walk_done(desc, &walk, nbytes); + } + + twofish_fpu_end(fpu_enabled); + return err; +} + +static inline void u128_to_be128(be128 *dst, const u128 *src) +{ + dst->a = cpu_to_be64(src->a); + dst->b = cpu_to_be64(src->b); +} + +static inline void be128_to_u128(u128 *dst, const be128 *src) +{ + dst->a = be64_to_cpu(src->a); + dst->b = be64_to_cpu(src->b); +} + +static inline void u128_inc(u128 *i) +{ + i->b++; + if (!i->b) + i->a++; +} + +static void ctr_crypt_final(struct blkcipher_desc *desc, + struct blkcipher_walk *walk) +{ + struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + u8 *ctrblk = walk->iv; + u8 keystream[TF_BLOCK_SIZE]; + u8 *src = walk->src.virt.addr; + u8 *dst = walk->dst.virt.addr; + unsigned int nbytes = walk->nbytes; + + twofish_enc_blk(ctx, keystream, ctrblk); + crypto_xor(keystream, src, nbytes); + memcpy(dst, keystream, nbytes); + + crypto_inc(ctrblk, TF_BLOCK_SIZE); +} + +static unsigned int __ctr_crypt(struct blkcipher_desc *desc, + struct blkcipher_walk *walk) +{ + struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + const unsigned int bsize = TF_BLOCK_SIZE; + unsigned int nbytes = walk->nbytes; + u128 *src = (u128 *)walk->src.virt.addr; + u128 *dst = (u128 *)walk->dst.virt.addr; + u128 ctrblk; + be128 ctrblocks[TWOFISH_PARALLEL_BLOCKS]; + int i; + + be128_to_u128(&ctrblk, (be128 *)walk->iv); + + /* Process multi-block batch */ + if (nbytes >= bsize * TWOFISH_PARALLEL_BLOCKS) { + do { + /* create ctrblks for parallel encrypt */ + for (i = 0; i < TWOFISH_PARALLEL_BLOCKS; i++) { + if (dst != src) + dst[i] = src[i]; + + u128_to_be128(&ctrblocks[i], &ctrblk); + u128_inc(&ctrblk); + } + + twofish_enc_blk_xway_xor(ctx, (u8 *)dst, + (u8 *)ctrblocks); + + src += TWOFISH_PARALLEL_BLOCKS; + dst += TWOFISH_PARALLEL_BLOCKS; + nbytes -= bsize * TWOFISH_PARALLEL_BLOCKS; + } while (nbytes >= bsize * TWOFISH_PARALLEL_BLOCKS); + + if (nbytes < bsize) + goto done; + } + + /* Process three block batch */ + if (nbytes >= bsize * 3) { + do { + if (dst != src) { + dst[0] = src[0]; + dst[1] = src[1]; + dst[2] = src[2]; + } + + /* create ctrblks for parallel encrypt */ + u128_to_be128(&ctrblocks[0], &ctrblk); + u128_inc(&ctrblk); + u128_to_be128(&ctrblocks[1], &ctrblk); + u128_inc(&ctrblk); + u128_to_be128(&ctrblocks[2], &ctrblk); + u128_inc(&ctrblk); + + twofish_enc_blk_3way_xor(ctx, (u8 *)dst, + (u8 *)ctrblocks); + + src += 3; + dst += 3; + nbytes -= bsize * 3; + } while (nbytes >= bsize * 3); + + if (nbytes < bsize) + goto done; + } + + /* Handle leftovers */ + do { + if (dst != src) + *dst = *src; + + u128_to_be128(&ctrblocks[0], &ctrblk); + u128_inc(&ctrblk); + + twofish_enc_blk(ctx, (u8 *)ctrblocks, (u8 *)ctrblocks); + u128_xor(dst, dst, (u128 *)ctrblocks); + + src += 1; + dst += 1; + nbytes -= bsize; + } while (nbytes >= bsize); + +done: + u128_to_be128((be128 *)walk->iv, &ctrblk); + return nbytes; +} + +static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + bool fpu_enabled = false; + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt_block(desc, &walk, TF_BLOCK_SIZE); + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + + while ((nbytes = walk.nbytes) >= TF_BLOCK_SIZE) { + fpu_enabled = twofish_fpu_begin(fpu_enabled, nbytes); + nbytes = __ctr_crypt(desc, &walk); + err = blkcipher_walk_done(desc, &walk, nbytes); + } + + twofish_fpu_end(fpu_enabled); + + if (walk.nbytes) { + ctr_crypt_final(desc, &walk); + err = blkcipher_walk_done(desc, &walk, 0); + } + + return err; +} + +struct crypt_priv { + struct twofish_ctx *ctx; + bool fpu_enabled; +}; + +static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) +{ + const unsigned int bsize = TF_BLOCK_SIZE; + struct crypt_priv *ctx = priv; + int i; + + ctx->fpu_enabled = twofish_fpu_begin(ctx->fpu_enabled, nbytes); + + if (nbytes == bsize * TWOFISH_PARALLEL_BLOCKS) { + twofish_enc_blk_xway(ctx->ctx, srcdst, srcdst); + return; + } + + for (i = 0; i < nbytes / (bsize * 3); i++, srcdst += bsize * 3) + twofish_enc_blk_3way(ctx->ctx, srcdst, srcdst); + + nbytes %= bsize * 3; + + for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) + twofish_enc_blk(ctx->ctx, srcdst, srcdst); +} + +static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) +{ + const unsigned int bsize = TF_BLOCK_SIZE; + struct crypt_priv *ctx = priv; + int i; + + ctx->fpu_enabled = twofish_fpu_begin(ctx->fpu_enabled, nbytes); + + if (nbytes == bsize * TWOFISH_PARALLEL_BLOCKS) { + twofish_dec_blk_xway(ctx->ctx, srcdst, srcdst); + return; + } + + for (i = 0; i < nbytes / (bsize * 3); i++, srcdst += bsize * 3) + twofish_dec_blk_3way(ctx->ctx, srcdst, srcdst); + + nbytes %= bsize * 3; + + for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) + twofish_dec_blk(ctx->ctx, srcdst, srcdst); +} + +struct twofish_lrw_ctx { + struct lrw_table_ctx lrw_table; + struct twofish_ctx twofish_ctx; +}; + +static int lrw_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, + unsigned int keylen) +{ + struct twofish_lrw_ctx *ctx = crypto_tfm_ctx(tfm); + int err; + + err = __twofish_setkey(&ctx->twofish_ctx, key, + keylen - TF_BLOCK_SIZE, &tfm->crt_flags); + if (err) + return err; + + return lrw_init_table(&ctx->lrw_table, key + keylen - + TF_BLOCK_SIZE); +} + +static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + be128 buf[TWOFISH_PARALLEL_BLOCKS]; + struct crypt_priv crypt_ctx = { + .ctx = &ctx->twofish_ctx, + .fpu_enabled = false, + }; + struct lrw_crypt_req req = { + .tbuf = buf, + .tbuflen = sizeof(buf), + + .table_ctx = &ctx->lrw_table, + .crypt_ctx = &crypt_ctx, + .crypt_fn = encrypt_callback, + }; + int ret; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + ret = lrw_crypt(desc, dst, src, nbytes, &req); + twofish_fpu_end(crypt_ctx.fpu_enabled); + + return ret; +} + +static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + be128 buf[TWOFISH_PARALLEL_BLOCKS]; + struct crypt_priv crypt_ctx = { + .ctx = &ctx->twofish_ctx, + .fpu_enabled = false, + }; + struct lrw_crypt_req req = { + .tbuf = buf, + .tbuflen = sizeof(buf), + + .table_ctx = &ctx->lrw_table, + .crypt_ctx = &crypt_ctx, + .crypt_fn = decrypt_callback, + }; + int ret; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + ret = lrw_crypt(desc, dst, src, nbytes, &req); + twofish_fpu_end(crypt_ctx.fpu_enabled); + + return ret; +} + +static void lrw_exit_tfm(struct crypto_tfm *tfm) +{ + struct twofish_lrw_ctx *ctx = crypto_tfm_ctx(tfm); + + lrw_free_table(&ctx->lrw_table); +} + +struct twofish_xts_ctx { + struct twofish_ctx tweak_ctx; + struct twofish_ctx crypt_ctx; +}; + +static int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, + unsigned int keylen) +{ + struct twofish_xts_ctx *ctx = crypto_tfm_ctx(tfm); + u32 *flags = &tfm->crt_flags; + int err; + + /* key consists of keys of equal size concatenated, therefore + * the length must be even + */ + if (keylen % 2) { + *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + return -EINVAL; + } + + /* first half of xts-key is for crypt */ + err = __twofish_setkey(&ctx->crypt_ctx, key, keylen / 2, flags); + if (err) + return err; + + /* second half of xts-key is for tweak */ + return __twofish_setkey(&ctx->tweak_ctx, + key + keylen / 2, keylen / 2, flags); +} + +static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + be128 buf[TWOFISH_PARALLEL_BLOCKS]; + struct crypt_priv crypt_ctx = { + .ctx = &ctx->crypt_ctx, + .fpu_enabled = false, + }; + struct xts_crypt_req req = { + .tbuf = buf, + .tbuflen = sizeof(buf), + + .tweak_ctx = &ctx->tweak_ctx, + .tweak_fn = XTS_TWEAK_CAST(twofish_enc_blk), + .crypt_ctx = &crypt_ctx, + .crypt_fn = encrypt_callback, + }; + int ret; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + ret = xts_crypt(desc, dst, src, nbytes, &req); + twofish_fpu_end(crypt_ctx.fpu_enabled); + + return ret; +} + +static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + be128 buf[TWOFISH_PARALLEL_BLOCKS]; + struct crypt_priv crypt_ctx = { + .ctx = &ctx->crypt_ctx, + .fpu_enabled = false, + }; + struct xts_crypt_req req = { + .tbuf = buf, + .tbuflen = sizeof(buf), + + .tweak_ctx = &ctx->tweak_ctx, + .tweak_fn = XTS_TWEAK_CAST(twofish_enc_blk), + .crypt_ctx = &crypt_ctx, + .crypt_fn = decrypt_callback, + }; + int ret; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + ret = xts_crypt(desc, dst, src, nbytes, &req); + twofish_fpu_end(crypt_ctx.fpu_enabled); + + return ret; +} + +static int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key, + unsigned int key_len) +{ + struct async_twofish_ctx *ctx = crypto_ablkcipher_ctx(tfm); + struct crypto_ablkcipher *child = &ctx->cryptd_tfm->base; + int err; + + crypto_ablkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); + crypto_ablkcipher_set_flags(child, crypto_ablkcipher_get_flags(tfm) + & CRYPTO_TFM_REQ_MASK); + err = crypto_ablkcipher_setkey(child, key, key_len); + crypto_ablkcipher_set_flags(tfm, crypto_ablkcipher_get_flags(child) + & CRYPTO_TFM_RES_MASK); + return err; +} + +static int __ablk_encrypt(struct ablkcipher_request *req) +{ + struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); + struct async_twofish_ctx *ctx = crypto_ablkcipher_ctx(tfm); + struct blkcipher_desc desc; + + desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm); + desc.info = req->info; + desc.flags = 0; + + return crypto_blkcipher_crt(desc.tfm)->encrypt( + &desc, req->dst, req->src, req->nbytes); +} + +static int ablk_encrypt(struct ablkcipher_request *req) +{ + struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); + struct async_twofish_ctx *ctx = crypto_ablkcipher_ctx(tfm); + + if (!irq_fpu_usable()) { + struct ablkcipher_request *cryptd_req = + ablkcipher_request_ctx(req); + + memcpy(cryptd_req, req, sizeof(*req)); + ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); + + return crypto_ablkcipher_encrypt(cryptd_req); + } else { + return __ablk_encrypt(req); + } +} + +static int ablk_decrypt(struct ablkcipher_request *req) +{ + struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); + struct async_twofish_ctx *ctx = crypto_ablkcipher_ctx(tfm); + + if (!irq_fpu_usable()) { + struct ablkcipher_request *cryptd_req = + ablkcipher_request_ctx(req); + + memcpy(cryptd_req, req, sizeof(*req)); + ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); + + return crypto_ablkcipher_decrypt(cryptd_req); + } else { + struct blkcipher_desc desc; + + desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm); + desc.info = req->info; + desc.flags = 0; + + return crypto_blkcipher_crt(desc.tfm)->decrypt( + &desc, req->dst, req->src, req->nbytes); + } +} + +static void ablk_exit(struct crypto_tfm *tfm) +{ + struct async_twofish_ctx *ctx = crypto_tfm_ctx(tfm); + + cryptd_free_ablkcipher(ctx->cryptd_tfm); +} + +static int ablk_init(struct crypto_tfm *tfm) +{ + struct async_twofish_ctx *ctx = crypto_tfm_ctx(tfm); + struct cryptd_ablkcipher *cryptd_tfm; + char drv_name[CRYPTO_MAX_ALG_NAME]; + + snprintf(drv_name, sizeof(drv_name), "__driver-%s", + crypto_tfm_alg_driver_name(tfm)); + + cryptd_tfm = cryptd_alloc_ablkcipher(drv_name, 0, 0); + if (IS_ERR(cryptd_tfm)) + return PTR_ERR(cryptd_tfm); + + ctx->cryptd_tfm = cryptd_tfm; + tfm->crt_ablkcipher.reqsize = sizeof(struct ablkcipher_request) + + crypto_ablkcipher_reqsize(&cryptd_tfm->base); + + return 0; +} + +static struct crypto_alg twofish_algs[10] = { { + .cra_name = "__ecb-twofish-avx", + .cra_driver_name = "__driver-ecb-twofish-avx", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = TF_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct twofish_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(twofish_algs[0].cra_list), + .cra_u = { + .blkcipher = { + .min_keysize = TF_MIN_KEY_SIZE, + .max_keysize = TF_MAX_KEY_SIZE, + .setkey = twofish_setkey, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, + }, +}, { + .cra_name = "__cbc-twofish-avx", + .cra_driver_name = "__driver-cbc-twofish-avx", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = TF_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct twofish_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(twofish_algs[1].cra_list), + .cra_u = { + .blkcipher = { + .min_keysize = TF_MIN_KEY_SIZE, + .max_keysize = TF_MAX_KEY_SIZE, + .setkey = twofish_setkey, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, + }, +}, { + .cra_name = "__ctr-twofish-avx", + .cra_driver_name = "__driver-ctr-twofish-avx", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct twofish_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(twofish_algs[2].cra_list), + .cra_u = { + .blkcipher = { + .min_keysize = TF_MIN_KEY_SIZE, + .max_keysize = TF_MAX_KEY_SIZE, + .ivsize = TF_BLOCK_SIZE, + .setkey = twofish_setkey, + .encrypt = ctr_crypt, + .decrypt = ctr_crypt, + }, + }, +}, { + .cra_name = "__lrw-twofish-avx", + .cra_driver_name = "__driver-lrw-twofish-avx", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = TF_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct twofish_lrw_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(twofish_algs[3].cra_list), + .cra_exit = lrw_exit_tfm, + .cra_u = { + .blkcipher = { + .min_keysize = TF_MIN_KEY_SIZE + + TF_BLOCK_SIZE, + .max_keysize = TF_MAX_KEY_SIZE + + TF_BLOCK_SIZE, + .ivsize = TF_BLOCK_SIZE, + .setkey = lrw_twofish_setkey, + .encrypt = lrw_encrypt, + .decrypt = lrw_decrypt, + }, + }, +}, { + .cra_name = "__xts-twofish-avx", + .cra_driver_name = "__driver-xts-twofish-avx", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = TF_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct twofish_xts_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(twofish_algs[4].cra_list), + .cra_u = { + .blkcipher = { + .min_keysize = TF_MIN_KEY_SIZE * 2, + .max_keysize = TF_MAX_KEY_SIZE * 2, + .ivsize = TF_BLOCK_SIZE, + .setkey = xts_twofish_setkey, + .encrypt = xts_encrypt, + .decrypt = xts_decrypt, + }, + }, +}, { + .cra_name = "ecb(twofish)", + .cra_driver_name = "ecb-twofish-avx", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = TF_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct async_twofish_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(twofish_algs[5].cra_list), + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_u = { + .ablkcipher = { + .min_keysize = TF_MIN_KEY_SIZE, + .max_keysize = TF_MAX_KEY_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_decrypt, + }, + }, +}, { + .cra_name = "cbc(twofish)", + .cra_driver_name = "cbc-twofish-avx", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = TF_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct async_twofish_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(twofish_algs[6].cra_list), + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_u = { + .ablkcipher = { + .min_keysize = TF_MIN_KEY_SIZE, + .max_keysize = TF_MAX_KEY_SIZE, + .ivsize = TF_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = __ablk_encrypt, + .decrypt = ablk_decrypt, + }, + }, +}, { + .cra_name = "ctr(twofish)", + .cra_driver_name = "ctr-twofish-avx", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct async_twofish_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(twofish_algs[7].cra_list), + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_u = { + .ablkcipher = { + .min_keysize = TF_MIN_KEY_SIZE, + .max_keysize = TF_MAX_KEY_SIZE, + .ivsize = TF_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_encrypt, + .geniv = "chainiv", + }, + }, +}, { + .cra_name = "lrw(twofish)", + .cra_driver_name = "lrw-twofish-avx", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = TF_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct async_twofish_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(twofish_algs[8].cra_list), + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_u = { + .ablkcipher = { + .min_keysize = TF_MIN_KEY_SIZE + + TF_BLOCK_SIZE, + .max_keysize = TF_MAX_KEY_SIZE + + TF_BLOCK_SIZE, + .ivsize = TF_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_decrypt, + }, + }, +}, { + .cra_name = "xts(twofish)", + .cra_driver_name = "xts-twofish-avx", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = TF_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct async_twofish_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(twofish_algs[9].cra_list), + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_u = { + .ablkcipher = { + .min_keysize = TF_MIN_KEY_SIZE * 2, + .max_keysize = TF_MAX_KEY_SIZE * 2, + .ivsize = TF_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_decrypt, + }, + }, +} }; + +static int __init twofish_init(void) +{ + u64 xcr0; + + if (!cpu_has_avx || !cpu_has_osxsave) { + printk(KERN_INFO "AVX instructions are not detected.\n"); + return -ENODEV; + } + + xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); + if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) { + printk(KERN_INFO "AVX detected but unusable.\n"); + return -ENODEV; + } + + return crypto_register_algs(twofish_algs, ARRAY_SIZE(twofish_algs)); +} + +static void __exit twofish_exit(void) +{ + crypto_unregister_algs(twofish_algs, ARRAY_SIZE(twofish_algs)); +} + +module_init(twofish_init); +module_exit(twofish_exit); + +MODULE_DESCRIPTION("Twofish Cipher Algorithm, AVX optimized"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("twofish"); diff --git a/arch/x86/crypto/twofish_glue_3way.c b/arch/x86/crypto/twofish_glue_3way.c index 922ab24cce31..77e4e55a2660 100644 --- a/arch/x86/crypto/twofish_glue_3way.c +++ b/arch/x86/crypto/twofish_glue_3way.c @@ -45,8 +45,10 @@ asmlinkage void twofish_dec_blk(struct twofish_ctx *ctx, u8 *dst, /* 3-way parallel cipher functions */ asmlinkage void __twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, const u8 *src, bool xor); +EXPORT_SYMBOL_GPL(__twofish_enc_blk_3way); asmlinkage void twofish_dec_blk_3way(struct twofish_ctx *ctx, u8 *dst, const u8 *src); +EXPORT_SYMBOL_GPL(twofish_dec_blk_3way); static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, const u8 *src) -- cgit v1.2.2 From 7efe4076725aeb01722445b56613681aa492c8d6 Mon Sep 17 00:00:00 2001 From: Johannes Goetzfried Date: Tue, 12 Jun 2012 16:47:43 +0800 Subject: crypto: serpent - add x86_64/avx assembler implementation This patch adds a x86_64/avx assembler implementation of the Serpent block cipher. The implementation is very similar to the sse2 implementation and processes eight blocks in parallel. Because of the new non-destructive three operand syntax all move-instructions can be removed and therefore a little performance increase is provided. Patch has been tested with tcrypt and automated filesystem tests. Tcrypt benchmark results: Intel Core i5-2500 CPU (fam:6, model:42, step:7) serpent-avx-x86_64 vs. serpent-sse2-x86_64 128bit key: (lrw:256bit) (xts:256bit) size ecb-enc ecb-dec cbc-enc cbc-dec ctr-enc ctr-dec lrw-enc lrw-dec xts-enc xts-dec 16B 1.03x 1.01x 1.01x 1.01x 1.00x 1.00x 1.00x 1.00x 1.00x 1.01x 64B 1.00x 1.00x 1.00x 1.00x 1.00x 0.99x 1.00x 1.01x 1.00x 1.00x 256B 1.05x 1.03x 1.00x 1.02x 1.05x 1.06x 1.05x 1.02x 1.05x 1.02x 1024B 1.05x 1.02x 1.00x 1.02x 1.05x 1.06x 1.05x 1.03x 1.05x 1.02x 8192B 1.05x 1.02x 1.00x 1.02x 1.06x 1.06x 1.04x 1.03x 1.04x 1.02x 256bit key: (lrw:384bit) (xts:512bit) size ecb-enc ecb-dec cbc-enc cbc-dec ctr-enc ctr-dec lrw-enc lrw-dec xts-enc xts-dec 16B 1.01x 1.00x 1.01x 1.01x 1.00x 1.00x 0.99x 1.03x 1.01x 1.01x 64B 1.00x 1.00x 1.00x 1.00x 1.00x 1.00x 1.00x 1.01x 1.00x 1.02x 256B 1.05x 1.02x 1.00x 1.02x 1.05x 1.02x 1.04x 1.05x 1.05x 1.02x 1024B 1.06x 1.02x 1.00x 1.02x 1.07x 1.06x 1.05x 1.04x 1.05x 1.02x 8192B 1.05x 1.02x 1.00x 1.02x 1.06x 1.06x 1.04x 1.05x 1.05x 1.02x serpent-avx-x86_64 vs aes-asm (8kB block): 128bit 256bit ecb-enc 1.26x 1.73x ecb-dec 1.20x 1.64x cbc-enc 0.33x 0.45x cbc-dec 1.24x 1.67x ctr-enc 1.32x 1.76x ctr-dec 1.32x 1.76x lrw-enc 1.20x 1.60x lrw-dec 1.15x 1.54x xts-enc 1.22x 1.64x xts-dec 1.17x 1.57x Signed-off-by: Johannes Goetzfried Signed-off-by: Herbert Xu --- arch/x86/crypto/Makefile | 2 + arch/x86/crypto/serpent-avx-x86_64-asm_64.S | 704 +++++++++++++++++++++ arch/x86/crypto/serpent_avx_glue.c | 949 ++++++++++++++++++++++++++++ 3 files changed, 1655 insertions(+) create mode 100644 arch/x86/crypto/serpent-avx-x86_64-asm_64.S create mode 100644 arch/x86/crypto/serpent_avx_glue.c (limited to 'arch') diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index 3420feef0c70..83caa4b948c8 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -15,6 +15,7 @@ obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o obj-$(CONFIG_CRYPTO_TWOFISH_AVX_X86_64) += twofish-avx-x86_64.o obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o obj-$(CONFIG_CRYPTO_SERPENT_SSE2_X86_64) += serpent-sse2-x86_64.o +obj-$(CONFIG_CRYPTO_SERPENT_AVX_X86_64) += serpent-avx-x86_64.o obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o @@ -34,6 +35,7 @@ twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o twofish-avx-x86_64-y := twofish-avx-x86_64-asm_64.o twofish_avx_glue.o salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o serpent-sse2-x86_64-y := serpent-sse2-x86_64-asm_64.o serpent_sse2_glue.o +serpent-avx-x86_64-y := serpent-avx-x86_64-asm_64.o serpent_avx_glue.o aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o diff --git a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S new file mode 100644 index 000000000000..0ed47a124bac --- /dev/null +++ b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S @@ -0,0 +1,704 @@ +/* + * Serpent Cipher 8-way parallel algorithm (x86_64/AVX) + * + * Copyright (C) 2012 Johannes Goetzfried + * + * + * Based on arch/x86/crypto/serpent-sse2-x86_64-asm_64.S by + * Copyright (C) 2011 Jussi Kivilinna + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + * USA + * + */ + +.file "serpent-avx-x86_64-asm_64.S" +.text + +#define CTX %rdi + +/********************************************************************** + 8-way AVX serpent + **********************************************************************/ +#define RA1 %xmm0 +#define RB1 %xmm1 +#define RC1 %xmm2 +#define RD1 %xmm3 +#define RE1 %xmm4 + +#define tp %xmm5 + +#define RA2 %xmm6 +#define RB2 %xmm7 +#define RC2 %xmm8 +#define RD2 %xmm9 +#define RE2 %xmm10 + +#define RNOT %xmm11 + +#define RK0 %xmm12 +#define RK1 %xmm13 +#define RK2 %xmm14 +#define RK3 %xmm15 + + +#define S0_1(x0, x1, x2, x3, x4) \ + vpor x0, x3, tp; \ + vpxor x3, x0, x0; \ + vpxor x2, x3, x4; \ + vpxor RNOT, x4, x4; \ + vpxor x1, tp, x3; \ + vpand x0, x1, x1; \ + vpxor x4, x1, x1; \ + vpxor x0, x2, x2; +#define S0_2(x0, x1, x2, x3, x4) \ + vpxor x3, x0, x0; \ + vpor x0, x4, x4; \ + vpxor x2, x0, x0; \ + vpand x1, x2, x2; \ + vpxor x2, x3, x3; \ + vpxor RNOT, x1, x1; \ + vpxor x4, x2, x2; \ + vpxor x2, x1, x1; + +#define S1_1(x0, x1, x2, x3, x4) \ + vpxor x0, x1, tp; \ + vpxor x3, x0, x0; \ + vpxor RNOT, x3, x3; \ + vpand tp, x1, x4; \ + vpor tp, x0, x0; \ + vpxor x2, x3, x3; \ + vpxor x3, x0, x0; \ + vpxor x3, tp, x1; +#define S1_2(x0, x1, x2, x3, x4) \ + vpxor x4, x3, x3; \ + vpor x4, x1, x1; \ + vpxor x2, x4, x4; \ + vpand x0, x2, x2; \ + vpxor x1, x2, x2; \ + vpor x0, x1, x1; \ + vpxor RNOT, x0, x0; \ + vpxor x2, x0, x0; \ + vpxor x1, x4, x4; + +#define S2_1(x0, x1, x2, x3, x4) \ + vpxor RNOT, x3, x3; \ + vpxor x0, x1, x1; \ + vpand x2, x0, tp; \ + vpxor x3, tp, tp; \ + vpor x0, x3, x3; \ + vpxor x1, x2, x2; \ + vpxor x1, x3, x3; \ + vpand tp, x1, x1; +#define S2_2(x0, x1, x2, x3, x4) \ + vpxor x2, tp, tp; \ + vpand x3, x2, x2; \ + vpor x1, x3, x3; \ + vpxor RNOT, tp, tp; \ + vpxor tp, x3, x3; \ + vpxor tp, x0, x4; \ + vpxor x2, tp, x0; \ + vpor x2, x1, x1; + +#define S3_1(x0, x1, x2, x3, x4) \ + vpxor x3, x1, tp; \ + vpor x0, x3, x3; \ + vpand x0, x1, x4; \ + vpxor x2, x0, x0; \ + vpxor tp, x2, x2; \ + vpand x3, tp, x1; \ + vpxor x3, x2, x2; \ + vpor x4, x0, x0; \ + vpxor x3, x4, x4; +#define S3_2(x0, x1, x2, x3, x4) \ + vpxor x0, x1, x1; \ + vpand x3, x0, x0; \ + vpand x4, x3, x3; \ + vpxor x2, x3, x3; \ + vpor x1, x4, x4; \ + vpand x1, x2, x2; \ + vpxor x3, x4, x4; \ + vpxor x3, x0, x0; \ + vpxor x2, x3, x3; + +#define S4_1(x0, x1, x2, x3, x4) \ + vpand x0, x3, tp; \ + vpxor x3, x0, x0; \ + vpxor x2, tp, tp; \ + vpor x3, x2, x2; \ + vpxor x1, x0, x0; \ + vpxor tp, x3, x4; \ + vpor x0, x2, x2; \ + vpxor x1, x2, x2; +#define S4_2(x0, x1, x2, x3, x4) \ + vpand x0, x1, x1; \ + vpxor x4, x1, x1; \ + vpand x2, x4, x4; \ + vpxor tp, x2, x2; \ + vpxor x0, x4, x4; \ + vpor x1, tp, x3; \ + vpxor RNOT, x1, x1; \ + vpxor x0, x3, x3; + +#define S5_1(x0, x1, x2, x3, x4) \ + vpor x0, x1, tp; \ + vpxor tp, x2, x2; \ + vpxor RNOT, x3, x3; \ + vpxor x0, x1, x4; \ + vpxor x2, x0, x0; \ + vpand x4, tp, x1; \ + vpor x3, x4, x4; \ + vpxor x0, x4, x4; +#define S5_2(x0, x1, x2, x3, x4) \ + vpand x3, x0, x0; \ + vpxor x3, x1, x1; \ + vpxor x2, x3, x3; \ + vpxor x1, x0, x0; \ + vpand x4, x2, x2; \ + vpxor x2, x1, x1; \ + vpand x0, x2, x2; \ + vpxor x2, x3, x3; + +#define S6_1(x0, x1, x2, x3, x4) \ + vpxor x0, x3, x3; \ + vpxor x2, x1, tp; \ + vpxor x0, x2, x2; \ + vpand x3, x0, x0; \ + vpor x3, tp, tp; \ + vpxor RNOT, x1, x4; \ + vpxor tp, x0, x0; \ + vpxor x2, tp, x1; +#define S6_2(x0, x1, x2, x3, x4) \ + vpxor x4, x3, x3; \ + vpxor x0, x4, x4; \ + vpand x0, x2, x2; \ + vpxor x1, x4, x4; \ + vpxor x3, x2, x2; \ + vpand x1, x3, x3; \ + vpxor x0, x3, x3; \ + vpxor x2, x1, x1; + +#define S7_1(x0, x1, x2, x3, x4) \ + vpxor RNOT, x1, tp; \ + vpxor RNOT, x0, x0; \ + vpand x2, tp, x1; \ + vpxor x3, x1, x1; \ + vpor tp, x3, x3; \ + vpxor x2, tp, x4; \ + vpxor x3, x2, x2; \ + vpxor x0, x3, x3; \ + vpor x1, x0, x0; +#define S7_2(x0, x1, x2, x3, x4) \ + vpand x0, x2, x2; \ + vpxor x4, x0, x0; \ + vpxor x3, x4, x4; \ + vpand x0, x3, x3; \ + vpxor x1, x4, x4; \ + vpxor x4, x2, x2; \ + vpxor x1, x3, x3; \ + vpor x0, x4, x4; \ + vpxor x1, x4, x4; + +#define SI0_1(x0, x1, x2, x3, x4) \ + vpxor x0, x1, x1; \ + vpor x1, x3, tp; \ + vpxor x1, x3, x4; \ + vpxor RNOT, x0, x0; \ + vpxor tp, x2, x2; \ + vpxor x0, tp, x3; \ + vpand x1, x0, x0; \ + vpxor x2, x0, x0; +#define SI0_2(x0, x1, x2, x3, x4) \ + vpand x3, x2, x2; \ + vpxor x4, x3, x3; \ + vpxor x3, x2, x2; \ + vpxor x3, x1, x1; \ + vpand x0, x3, x3; \ + vpxor x0, x1, x1; \ + vpxor x2, x0, x0; \ + vpxor x3, x4, x4; + +#define SI1_1(x0, x1, x2, x3, x4) \ + vpxor x3, x1, x1; \ + vpxor x2, x0, tp; \ + vpxor RNOT, x2, x2; \ + vpor x1, x0, x4; \ + vpxor x3, x4, x4; \ + vpand x1, x3, x3; \ + vpxor x2, x1, x1; \ + vpand x4, x2, x2; +#define SI1_2(x0, x1, x2, x3, x4) \ + vpxor x1, x4, x4; \ + vpor x3, x1, x1; \ + vpxor tp, x3, x3; \ + vpxor tp, x2, x2; \ + vpor x4, tp, x0; \ + vpxor x4, x2, x2; \ + vpxor x0, x1, x1; \ + vpxor x1, x4, x4; + +#define SI2_1(x0, x1, x2, x3, x4) \ + vpxor x1, x2, x2; \ + vpxor RNOT, x3, tp; \ + vpor x2, tp, tp; \ + vpxor x3, x2, x2; \ + vpxor x0, x3, x4; \ + vpxor x1, tp, x3; \ + vpor x2, x1, x1; \ + vpxor x0, x2, x2; +#define SI2_2(x0, x1, x2, x3, x4) \ + vpxor x4, x1, x1; \ + vpor x3, x4, x4; \ + vpxor x3, x2, x2; \ + vpxor x2, x4, x4; \ + vpand x1, x2, x2; \ + vpxor x3, x2, x2; \ + vpxor x4, x3, x3; \ + vpxor x0, x4, x4; + +#define SI3_1(x0, x1, x2, x3, x4) \ + vpxor x1, x2, x2; \ + vpand x2, x1, tp; \ + vpxor x0, tp, tp; \ + vpor x1, x0, x0; \ + vpxor x3, x1, x4; \ + vpxor x3, x0, x0; \ + vpor tp, x3, x3; \ + vpxor x2, tp, x1; +#define SI3_2(x0, x1, x2, x3, x4) \ + vpxor x3, x1, x1; \ + vpxor x2, x0, x0; \ + vpxor x3, x2, x2; \ + vpand x1, x3, x3; \ + vpxor x0, x1, x1; \ + vpand x2, x0, x0; \ + vpxor x3, x4, x4; \ + vpxor x0, x3, x3; \ + vpxor x1, x0, x0; + +#define SI4_1(x0, x1, x2, x3, x4) \ + vpxor x3, x2, x2; \ + vpand x1, x0, tp; \ + vpxor x2, tp, tp; \ + vpor x3, x2, x2; \ + vpxor RNOT, x0, x4; \ + vpxor tp, x1, x1; \ + vpxor x2, tp, x0; \ + vpand x4, x2, x2; +#define SI4_2(x0, x1, x2, x3, x4) \ + vpxor x0, x2, x2; \ + vpor x4, x0, x0; \ + vpxor x3, x0, x0; \ + vpand x2, x3, x3; \ + vpxor x3, x4, x4; \ + vpxor x1, x3, x3; \ + vpand x0, x1, x1; \ + vpxor x1, x4, x4; \ + vpxor x3, x0, x0; + +#define SI5_1(x0, x1, x2, x3, x4) \ + vpor x2, x1, tp; \ + vpxor x1, x2, x2; \ + vpxor x3, tp, tp; \ + vpand x1, x3, x3; \ + vpxor x3, x2, x2; \ + vpor x0, x3, x3; \ + vpxor RNOT, x0, x0; \ + vpxor x2, x3, x3; \ + vpor x0, x2, x2; +#define SI5_2(x0, x1, x2, x3, x4) \ + vpxor tp, x1, x4; \ + vpxor x4, x2, x2; \ + vpand x0, x4, x4; \ + vpxor tp, x0, x0; \ + vpxor x3, tp, x1; \ + vpand x2, x0, x0; \ + vpxor x3, x2, x2; \ + vpxor x2, x0, x0; \ + vpxor x4, x2, x2; \ + vpxor x3, x4, x4; + +#define SI6_1(x0, x1, x2, x3, x4) \ + vpxor x2, x0, x0; \ + vpand x3, x0, tp; \ + vpxor x3, x2, x2; \ + vpxor x2, tp, tp; \ + vpxor x1, x3, x3; \ + vpor x0, x2, x2; \ + vpxor x3, x2, x2; \ + vpand tp, x3, x3; +#define SI6_2(x0, x1, x2, x3, x4) \ + vpxor RNOT, tp, tp; \ + vpxor x1, x3, x3; \ + vpand x2, x1, x1; \ + vpxor tp, x0, x4; \ + vpxor x4, x3, x3; \ + vpxor x2, x4, x4; \ + vpxor x1, tp, x0; \ + vpxor x0, x2, x2; + +#define SI7_1(x0, x1, x2, x3, x4) \ + vpand x0, x3, tp; \ + vpxor x2, x0, x0; \ + vpor x3, x2, x2; \ + vpxor x1, x3, x4; \ + vpxor RNOT, x0, x0; \ + vpor tp, x1, x1; \ + vpxor x0, x4, x4; \ + vpand x2, x0, x0; \ + vpxor x1, x0, x0; +#define SI7_2(x0, x1, x2, x3, x4) \ + vpand x2, x1, x1; \ + vpxor x2, tp, x3; \ + vpxor x3, x4, x4; \ + vpand x3, x2, x2; \ + vpor x0, x3, x3; \ + vpxor x4, x1, x1; \ + vpxor x4, x3, x3; \ + vpand x0, x4, x4; \ + vpxor x2, x4, x4; + +#define get_key(i, j, t) \ + vbroadcastss (4*(i)+(j))*4(CTX), t; + +#define K2(x0, x1, x2, x3, x4, i) \ + get_key(i, 0, RK0); \ + get_key(i, 1, RK1); \ + get_key(i, 2, RK2); \ + get_key(i, 3, RK3); \ + vpxor RK0, x0 ## 1, x0 ## 1; \ + vpxor RK1, x1 ## 1, x1 ## 1; \ + vpxor RK2, x2 ## 1, x2 ## 1; \ + vpxor RK3, x3 ## 1, x3 ## 1; \ + vpxor RK0, x0 ## 2, x0 ## 2; \ + vpxor RK1, x1 ## 2, x1 ## 2; \ + vpxor RK2, x2 ## 2, x2 ## 2; \ + vpxor RK3, x3 ## 2, x3 ## 2; + +#define LK2(x0, x1, x2, x3, x4, i) \ + vpslld $13, x0 ## 1, x4 ## 1; \ + vpsrld $(32 - 13), x0 ## 1, x0 ## 1; \ + vpor x4 ## 1, x0 ## 1, x0 ## 1; \ + vpxor x0 ## 1, x1 ## 1, x1 ## 1; \ + vpslld $3, x2 ## 1, x4 ## 1; \ + vpsrld $(32 - 3), x2 ## 1, x2 ## 1; \ + vpor x4 ## 1, x2 ## 1, x2 ## 1; \ + vpxor x2 ## 1, x1 ## 1, x1 ## 1; \ + vpslld $13, x0 ## 2, x4 ## 2; \ + vpsrld $(32 - 13), x0 ## 2, x0 ## 2; \ + vpor x4 ## 2, x0 ## 2, x0 ## 2; \ + vpxor x0 ## 2, x1 ## 2, x1 ## 2; \ + vpslld $3, x2 ## 2, x4 ## 2; \ + vpsrld $(32 - 3), x2 ## 2, x2 ## 2; \ + vpor x4 ## 2, x2 ## 2, x2 ## 2; \ + vpxor x2 ## 2, x1 ## 2, x1 ## 2; \ + vpslld $1, x1 ## 1, x4 ## 1; \ + vpsrld $(32 - 1), x1 ## 1, x1 ## 1; \ + vpor x4 ## 1, x1 ## 1, x1 ## 1; \ + vpslld $3, x0 ## 1, x4 ## 1; \ + vpxor x2 ## 1, x3 ## 1, x3 ## 1; \ + vpxor x4 ## 1, x3 ## 1, x3 ## 1; \ + get_key(i, 1, RK1); \ + vpslld $1, x1 ## 2, x4 ## 2; \ + vpsrld $(32 - 1), x1 ## 2, x1 ## 2; \ + vpor x4 ## 2, x1 ## 2, x1 ## 2; \ + vpslld $3, x0 ## 2, x4 ## 2; \ + vpxor x2 ## 2, x3 ## 2, x3 ## 2; \ + vpxor x4 ## 2, x3 ## 2, x3 ## 2; \ + get_key(i, 3, RK3); \ + vpslld $7, x3 ## 1, x4 ## 1; \ + vpsrld $(32 - 7), x3 ## 1, x3 ## 1; \ + vpor x4 ## 1, x3 ## 1, x3 ## 1; \ + vpslld $7, x1 ## 1, x4 ## 1; \ + vpxor x1 ## 1, x0 ## 1, x0 ## 1; \ + vpxor x3 ## 1, x0 ## 1, x0 ## 1; \ + vpxor x3 ## 1, x2 ## 1, x2 ## 1; \ + vpxor x4 ## 1, x2 ## 1, x2 ## 1; \ + get_key(i, 0, RK0); \ + vpslld $7, x3 ## 2, x4 ## 2; \ + vpsrld $(32 - 7), x3 ## 2, x3 ## 2; \ + vpor x4 ## 2, x3 ## 2, x3 ## 2; \ + vpslld $7, x1 ## 2, x4 ## 2; \ + vpxor x1 ## 2, x0 ## 2, x0 ## 2; \ + vpxor x3 ## 2, x0 ## 2, x0 ## 2; \ + vpxor x3 ## 2, x2 ## 2, x2 ## 2; \ + vpxor x4 ## 2, x2 ## 2, x2 ## 2; \ + get_key(i, 2, RK2); \ + vpxor RK1, x1 ## 1, x1 ## 1; \ + vpxor RK3, x3 ## 1, x3 ## 1; \ + vpslld $5, x0 ## 1, x4 ## 1; \ + vpsrld $(32 - 5), x0 ## 1, x0 ## 1; \ + vpor x4 ## 1, x0 ## 1, x0 ## 1; \ + vpslld $22, x2 ## 1, x4 ## 1; \ + vpsrld $(32 - 22), x2 ## 1, x2 ## 1; \ + vpor x4 ## 1, x2 ## 1, x2 ## 1; \ + vpxor RK0, x0 ## 1, x0 ## 1; \ + vpxor RK2, x2 ## 1, x2 ## 1; \ + vpxor RK1, x1 ## 2, x1 ## 2; \ + vpxor RK3, x3 ## 2, x3 ## 2; \ + vpslld $5, x0 ## 2, x4 ## 2; \ + vpsrld $(32 - 5), x0 ## 2, x0 ## 2; \ + vpor x4 ## 2, x0 ## 2, x0 ## 2; \ + vpslld $22, x2 ## 2, x4 ## 2; \ + vpsrld $(32 - 22), x2 ## 2, x2 ## 2; \ + vpor x4 ## 2, x2 ## 2, x2 ## 2; \ + vpxor RK0, x0 ## 2, x0 ## 2; \ + vpxor RK2, x2 ## 2, x2 ## 2; + +#define KL2(x0, x1, x2, x3, x4, i) \ + vpxor RK0, x0 ## 1, x0 ## 1; \ + vpxor RK2, x2 ## 1, x2 ## 1; \ + vpsrld $5, x0 ## 1, x4 ## 1; \ + vpslld $(32 - 5), x0 ## 1, x0 ## 1; \ + vpor x4 ## 1, x0 ## 1, x0 ## 1; \ + vpxor RK3, x3 ## 1, x3 ## 1; \ + vpxor RK1, x1 ## 1, x1 ## 1; \ + vpsrld $22, x2 ## 1, x4 ## 1; \ + vpslld $(32 - 22), x2 ## 1, x2 ## 1; \ + vpor x4 ## 1, x2 ## 1, x2 ## 1; \ + vpxor x3 ## 1, x2 ## 1, x2 ## 1; \ + vpxor RK0, x0 ## 2, x0 ## 2; \ + vpxor RK2, x2 ## 2, x2 ## 2; \ + vpsrld $5, x0 ## 2, x4 ## 2; \ + vpslld $(32 - 5), x0 ## 2, x0 ## 2; \ + vpor x4 ## 2, x0 ## 2, x0 ## 2; \ + vpxor RK3, x3 ## 2, x3 ## 2; \ + vpxor RK1, x1 ## 2, x1 ## 2; \ + vpsrld $22, x2 ## 2, x4 ## 2; \ + vpslld $(32 - 22), x2 ## 2, x2 ## 2; \ + vpor x4 ## 2, x2 ## 2, x2 ## 2; \ + vpxor x3 ## 2, x2 ## 2, x2 ## 2; \ + vpxor x3 ## 1, x0 ## 1, x0 ## 1; \ + vpslld $7, x1 ## 1, x4 ## 1; \ + vpxor x1 ## 1, x0 ## 1, x0 ## 1; \ + vpxor x4 ## 1, x2 ## 1, x2 ## 1; \ + vpsrld $1, x1 ## 1, x4 ## 1; \ + vpslld $(32 - 1), x1 ## 1, x1 ## 1; \ + vpor x4 ## 1, x1 ## 1, x1 ## 1; \ + vpxor x3 ## 2, x0 ## 2, x0 ## 2; \ + vpslld $7, x1 ## 2, x4 ## 2; \ + vpxor x1 ## 2, x0 ## 2, x0 ## 2; \ + vpxor x4 ## 2, x2 ## 2, x2 ## 2; \ + vpsrld $1, x1 ## 2, x4 ## 2; \ + vpslld $(32 - 1), x1 ## 2, x1 ## 2; \ + vpor x4 ## 2, x1 ## 2, x1 ## 2; \ + vpsrld $7, x3 ## 1, x4 ## 1; \ + vpslld $(32 - 7), x3 ## 1, x3 ## 1; \ + vpor x4 ## 1, x3 ## 1, x3 ## 1; \ + vpxor x0 ## 1, x1 ## 1, x1 ## 1; \ + vpslld $3, x0 ## 1, x4 ## 1; \ + vpxor x4 ## 1, x3 ## 1, x3 ## 1; \ + vpsrld $7, x3 ## 2, x4 ## 2; \ + vpslld $(32 - 7), x3 ## 2, x3 ## 2; \ + vpor x4 ## 2, x3 ## 2, x3 ## 2; \ + vpxor x0 ## 2, x1 ## 2, x1 ## 2; \ + vpslld $3, x0 ## 2, x4 ## 2; \ + vpxor x4 ## 2, x3 ## 2, x3 ## 2; \ + vpsrld $13, x0 ## 1, x4 ## 1; \ + vpslld $(32 - 13), x0 ## 1, x0 ## 1; \ + vpor x4 ## 1, x0 ## 1, x0 ## 1; \ + vpxor x2 ## 1, x1 ## 1, x1 ## 1; \ + vpxor x2 ## 1, x3 ## 1, x3 ## 1; \ + vpsrld $3, x2 ## 1, x4 ## 1; \ + vpslld $(32 - 3), x2 ## 1, x2 ## 1; \ + vpor x4 ## 1, x2 ## 1, x2 ## 1; \ + vpsrld $13, x0 ## 2, x4 ## 2; \ + vpslld $(32 - 13), x0 ## 2, x0 ## 2; \ + vpor x4 ## 2, x0 ## 2, x0 ## 2; \ + vpxor x2 ## 2, x1 ## 2, x1 ## 2; \ + vpxor x2 ## 2, x3 ## 2, x3 ## 2; \ + vpsrld $3, x2 ## 2, x4 ## 2; \ + vpslld $(32 - 3), x2 ## 2, x2 ## 2; \ + vpor x4 ## 2, x2 ## 2, x2 ## 2; + +#define S(SBOX, x0, x1, x2, x3, x4) \ + SBOX ## _1(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \ + SBOX ## _2(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \ + SBOX ## _1(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \ + SBOX ## _2(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); + +#define SP(SBOX, x0, x1, x2, x3, x4, i) \ + get_key(i, 0, RK0); \ + SBOX ## _1(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \ + get_key(i, 2, RK2); \ + SBOX ## _2(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \ + get_key(i, 3, RK3); \ + SBOX ## _1(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \ + get_key(i, 1, RK1); \ + SBOX ## _2(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \ + +#define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ + vpunpckldq x1, x0, t0; \ + vpunpckhdq x1, x0, t2; \ + vpunpckldq x3, x2, t1; \ + vpunpckhdq x3, x2, x3; \ + \ + vpunpcklqdq t1, t0, x0; \ + vpunpckhqdq t1, t0, x1; \ + vpunpcklqdq x3, t2, x2; \ + vpunpckhqdq x3, t2, x3; + +#define read_blocks(in, x0, x1, x2, x3, t0, t1, t2) \ + vmovdqu (0*4*4)(in), x0; \ + vmovdqu (1*4*4)(in), x1; \ + vmovdqu (2*4*4)(in), x2; \ + vmovdqu (3*4*4)(in), x3; \ + \ + transpose_4x4(x0, x1, x2, x3, t0, t1, t2) + +#define write_blocks(out, x0, x1, x2, x3, t0, t1, t2) \ + transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ + \ + vmovdqu x0, (0*4*4)(out); \ + vmovdqu x1, (1*4*4)(out); \ + vmovdqu x2, (2*4*4)(out); \ + vmovdqu x3, (3*4*4)(out); + +#define xor_blocks(out, x0, x1, x2, x3, t0, t1, t2) \ + transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ + \ + vpxor (0*4*4)(out), x0, x0; \ + vmovdqu x0, (0*4*4)(out); \ + vpxor (1*4*4)(out), x1, x1; \ + vmovdqu x1, (1*4*4)(out); \ + vpxor (2*4*4)(out), x2, x2; \ + vmovdqu x2, (2*4*4)(out); \ + vpxor (3*4*4)(out), x3, x3; \ + vmovdqu x3, (3*4*4)(out); + +.align 8 +.global __serpent_enc_blk_8way +.type __serpent_enc_blk_8way,@function; + +__serpent_enc_blk_8way: + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src + * %rcx: bool, if true: xor output + */ + + vpcmpeqd RNOT, RNOT, RNOT; + + leaq (4*4*4)(%rdx), %rax; + read_blocks(%rdx, RA1, RB1, RC1, RD1, RK0, RK1, RK2); + read_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2); + + K2(RA, RB, RC, RD, RE, 0); + S(S0, RA, RB, RC, RD, RE); LK2(RC, RB, RD, RA, RE, 1); + S(S1, RC, RB, RD, RA, RE); LK2(RE, RD, RA, RC, RB, 2); + S(S2, RE, RD, RA, RC, RB); LK2(RB, RD, RE, RC, RA, 3); + S(S3, RB, RD, RE, RC, RA); LK2(RC, RA, RD, RB, RE, 4); + S(S4, RC, RA, RD, RB, RE); LK2(RA, RD, RB, RE, RC, 5); + S(S5, RA, RD, RB, RE, RC); LK2(RC, RA, RD, RE, RB, 6); + S(S6, RC, RA, RD, RE, RB); LK2(RD, RB, RA, RE, RC, 7); + S(S7, RD, RB, RA, RE, RC); LK2(RC, RA, RE, RD, RB, 8); + S(S0, RC, RA, RE, RD, RB); LK2(RE, RA, RD, RC, RB, 9); + S(S1, RE, RA, RD, RC, RB); LK2(RB, RD, RC, RE, RA, 10); + S(S2, RB, RD, RC, RE, RA); LK2(RA, RD, RB, RE, RC, 11); + S(S3, RA, RD, RB, RE, RC); LK2(RE, RC, RD, RA, RB, 12); + S(S4, RE, RC, RD, RA, RB); LK2(RC, RD, RA, RB, RE, 13); + S(S5, RC, RD, RA, RB, RE); LK2(RE, RC, RD, RB, RA, 14); + S(S6, RE, RC, RD, RB, RA); LK2(RD, RA, RC, RB, RE, 15); + S(S7, RD, RA, RC, RB, RE); LK2(RE, RC, RB, RD, RA, 16); + S(S0, RE, RC, RB, RD, RA); LK2(RB, RC, RD, RE, RA, 17); + S(S1, RB, RC, RD, RE, RA); LK2(RA, RD, RE, RB, RC, 18); + S(S2, RA, RD, RE, RB, RC); LK2(RC, RD, RA, RB, RE, 19); + S(S3, RC, RD, RA, RB, RE); LK2(RB, RE, RD, RC, RA, 20); + S(S4, RB, RE, RD, RC, RA); LK2(RE, RD, RC, RA, RB, 21); + S(S5, RE, RD, RC, RA, RB); LK2(RB, RE, RD, RA, RC, 22); + S(S6, RB, RE, RD, RA, RC); LK2(RD, RC, RE, RA, RB, 23); + S(S7, RD, RC, RE, RA, RB); LK2(RB, RE, RA, RD, RC, 24); + S(S0, RB, RE, RA, RD, RC); LK2(RA, RE, RD, RB, RC, 25); + S(S1, RA, RE, RD, RB, RC); LK2(RC, RD, RB, RA, RE, 26); + S(S2, RC, RD, RB, RA, RE); LK2(RE, RD, RC, RA, RB, 27); + S(S3, RE, RD, RC, RA, RB); LK2(RA, RB, RD, RE, RC, 28); + S(S4, RA, RB, RD, RE, RC); LK2(RB, RD, RE, RC, RA, 29); + S(S5, RB, RD, RE, RC, RA); LK2(RA, RB, RD, RC, RE, 30); + S(S6, RA, RB, RD, RC, RE); LK2(RD, RE, RB, RC, RA, 31); + S(S7, RD, RE, RB, RC, RA); K2(RA, RB, RC, RD, RE, 32); + + leaq (4*4*4)(%rsi), %rax; + + testb %cl, %cl; + jnz __enc_xor8; + + write_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2); + write_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2); + + ret; + +__enc_xor8: + xor_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2); + xor_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2); + + ret; + +.align 8 +.global serpent_dec_blk_8way +.type serpent_dec_blk_8way,@function; + +serpent_dec_blk_8way: + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src + */ + + vpcmpeqd RNOT, RNOT, RNOT; + + leaq (4*4*4)(%rdx), %rax; + read_blocks(%rdx, RA1, RB1, RC1, RD1, RK0, RK1, RK2); + read_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2); + + K2(RA, RB, RC, RD, RE, 32); + SP(SI7, RA, RB, RC, RD, RE, 31); KL2(RB, RD, RA, RE, RC, 31); + SP(SI6, RB, RD, RA, RE, RC, 30); KL2(RA, RC, RE, RB, RD, 30); + SP(SI5, RA, RC, RE, RB, RD, 29); KL2(RC, RD, RA, RE, RB, 29); + SP(SI4, RC, RD, RA, RE, RB, 28); KL2(RC, RA, RB, RE, RD, 28); + SP(SI3, RC, RA, RB, RE, RD, 27); KL2(RB, RC, RD, RE, RA, 27); + SP(SI2, RB, RC, RD, RE, RA, 26); KL2(RC, RA, RE, RD, RB, 26); + SP(SI1, RC, RA, RE, RD, RB, 25); KL2(RB, RA, RE, RD, RC, 25); + SP(SI0, RB, RA, RE, RD, RC, 24); KL2(RE, RC, RA, RB, RD, 24); + SP(SI7, RE, RC, RA, RB, RD, 23); KL2(RC, RB, RE, RD, RA, 23); + SP(SI6, RC, RB, RE, RD, RA, 22); KL2(RE, RA, RD, RC, RB, 22); + SP(SI5, RE, RA, RD, RC, RB, 21); KL2(RA, RB, RE, RD, RC, 21); + SP(SI4, RA, RB, RE, RD, RC, 20); KL2(RA, RE, RC, RD, RB, 20); + SP(SI3, RA, RE, RC, RD, RB, 19); KL2(RC, RA, RB, RD, RE, 19); + SP(SI2, RC, RA, RB, RD, RE, 18); KL2(RA, RE, RD, RB, RC, 18); + SP(SI1, RA, RE, RD, RB, RC, 17); KL2(RC, RE, RD, RB, RA, 17); + SP(SI0, RC, RE, RD, RB, RA, 16); KL2(RD, RA, RE, RC, RB, 16); + SP(SI7, RD, RA, RE, RC, RB, 15); KL2(RA, RC, RD, RB, RE, 15); + SP(SI6, RA, RC, RD, RB, RE, 14); KL2(RD, RE, RB, RA, RC, 14); + SP(SI5, RD, RE, RB, RA, RC, 13); KL2(RE, RC, RD, RB, RA, 13); + SP(SI4, RE, RC, RD, RB, RA, 12); KL2(RE, RD, RA, RB, RC, 12); + SP(SI3, RE, RD, RA, RB, RC, 11); KL2(RA, RE, RC, RB, RD, 11); + SP(SI2, RA, RE, RC, RB, RD, 10); KL2(RE, RD, RB, RC, RA, 10); + SP(SI1, RE, RD, RB, RC, RA, 9); KL2(RA, RD, RB, RC, RE, 9); + SP(SI0, RA, RD, RB, RC, RE, 8); KL2(RB, RE, RD, RA, RC, 8); + SP(SI7, RB, RE, RD, RA, RC, 7); KL2(RE, RA, RB, RC, RD, 7); + SP(SI6, RE, RA, RB, RC, RD, 6); KL2(RB, RD, RC, RE, RA, 6); + SP(SI5, RB, RD, RC, RE, RA, 5); KL2(RD, RA, RB, RC, RE, 5); + SP(SI4, RD, RA, RB, RC, RE, 4); KL2(RD, RB, RE, RC, RA, 4); + SP(SI3, RD, RB, RE, RC, RA, 3); KL2(RE, RD, RA, RC, RB, 3); + SP(SI2, RE, RD, RA, RC, RB, 2); KL2(RD, RB, RC, RA, RE, 2); + SP(SI1, RD, RB, RC, RA, RE, 1); KL2(RE, RB, RC, RA, RD, 1); + S(SI0, RE, RB, RC, RA, RD); K2(RC, RD, RB, RE, RA, 0); + + leaq (4*4*4)(%rsi), %rax; + write_blocks(%rsi, RC1, RD1, RB1, RE1, RK0, RK1, RK2); + write_blocks(%rax, RC2, RD2, RB2, RE2, RK0, RK1, RK2); + + ret; diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c new file mode 100644 index 000000000000..0dc7a26535e5 --- /dev/null +++ b/arch/x86/crypto/serpent_avx_glue.c @@ -0,0 +1,949 @@ +/* + * Glue Code for AVX assembler versions of Serpent Cipher + * + * Copyright (C) 2012 Johannes Goetzfried + * + * + * Glue code based on serpent_sse2_glue.c by: + * Copyright (C) 2011 Jussi Kivilinna + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + * USA + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct async_serpent_ctx { + struct cryptd_ablkcipher *cryptd_tfm; +}; + +static inline bool serpent_fpu_begin(bool fpu_enabled, unsigned int nbytes) +{ + if (fpu_enabled) + return true; + + /* AVX is only used when chunk to be processed is large enough, so + * do not enable FPU until it is necessary. + */ + if (nbytes < SERPENT_BLOCK_SIZE * SERPENT_PARALLEL_BLOCKS) + return false; + + kernel_fpu_begin(); + return true; +} + +static inline void serpent_fpu_end(bool fpu_enabled) +{ + if (fpu_enabled) + kernel_fpu_end(); +} + +static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, + bool enc) +{ + bool fpu_enabled = false; + struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + const unsigned int bsize = SERPENT_BLOCK_SIZE; + unsigned int nbytes; + int err; + + err = blkcipher_walk_virt(desc, walk); + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + + while ((nbytes = walk->nbytes)) { + u8 *wsrc = walk->src.virt.addr; + u8 *wdst = walk->dst.virt.addr; + + fpu_enabled = serpent_fpu_begin(fpu_enabled, nbytes); + + /* Process multi-block batch */ + if (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS) { + do { + if (enc) + serpent_enc_blk_xway(ctx, wdst, wsrc); + else + serpent_dec_blk_xway(ctx, wdst, wsrc); + + wsrc += bsize * SERPENT_PARALLEL_BLOCKS; + wdst += bsize * SERPENT_PARALLEL_BLOCKS; + nbytes -= bsize * SERPENT_PARALLEL_BLOCKS; + } while (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS); + + if (nbytes < bsize) + goto done; + } + + /* Handle leftovers */ + do { + if (enc) + __serpent_encrypt(ctx, wdst, wsrc); + else + __serpent_decrypt(ctx, wdst, wsrc); + + wsrc += bsize; + wdst += bsize; + nbytes -= bsize; + } while (nbytes >= bsize); + +done: + err = blkcipher_walk_done(desc, walk, nbytes); + } + + serpent_fpu_end(fpu_enabled); + return err; +} + +static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return ecb_crypt(desc, &walk, true); +} + +static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return ecb_crypt(desc, &walk, false); +} + +static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, + struct blkcipher_walk *walk) +{ + struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + const unsigned int bsize = SERPENT_BLOCK_SIZE; + unsigned int nbytes = walk->nbytes; + u128 *src = (u128 *)walk->src.virt.addr; + u128 *dst = (u128 *)walk->dst.virt.addr; + u128 *iv = (u128 *)walk->iv; + + do { + u128_xor(dst, src, iv); + __serpent_encrypt(ctx, (u8 *)dst, (u8 *)dst); + iv = dst; + + src += 1; + dst += 1; + nbytes -= bsize; + } while (nbytes >= bsize); + + u128_xor((u128 *)walk->iv, (u128 *)walk->iv, iv); + return nbytes; +} + +static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + while ((nbytes = walk.nbytes)) { + nbytes = __cbc_encrypt(desc, &walk); + err = blkcipher_walk_done(desc, &walk, nbytes); + } + + return err; +} + +static unsigned int __cbc_decrypt(struct blkcipher_desc *desc, + struct blkcipher_walk *walk) +{ + struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + const unsigned int bsize = SERPENT_BLOCK_SIZE; + unsigned int nbytes = walk->nbytes; + u128 *src = (u128 *)walk->src.virt.addr; + u128 *dst = (u128 *)walk->dst.virt.addr; + u128 ivs[SERPENT_PARALLEL_BLOCKS - 1]; + u128 last_iv; + int i; + + /* Start of the last block. */ + src += nbytes / bsize - 1; + dst += nbytes / bsize - 1; + + last_iv = *src; + + /* Process multi-block batch */ + if (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS) { + do { + nbytes -= bsize * (SERPENT_PARALLEL_BLOCKS - 1); + src -= SERPENT_PARALLEL_BLOCKS - 1; + dst -= SERPENT_PARALLEL_BLOCKS - 1; + + for (i = 0; i < SERPENT_PARALLEL_BLOCKS - 1; i++) + ivs[i] = src[i]; + + serpent_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src); + + for (i = 0; i < SERPENT_PARALLEL_BLOCKS - 1; i++) + u128_xor(dst + (i + 1), dst + (i + 1), ivs + i); + + nbytes -= bsize; + if (nbytes < bsize) + goto done; + + u128_xor(dst, dst, src - 1); + src -= 1; + dst -= 1; + } while (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS); + + if (nbytes < bsize) + goto done; + } + + /* Handle leftovers */ + for (;;) { + __serpent_decrypt(ctx, (u8 *)dst, (u8 *)src); + + nbytes -= bsize; + if (nbytes < bsize) + break; + + u128_xor(dst, dst, src - 1); + src -= 1; + dst -= 1; + } + +done: + u128_xor(dst, dst, (u128 *)walk->iv); + *(u128 *)walk->iv = last_iv; + + return nbytes; +} + +static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + bool fpu_enabled = false; + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + + while ((nbytes = walk.nbytes)) { + fpu_enabled = serpent_fpu_begin(fpu_enabled, nbytes); + nbytes = __cbc_decrypt(desc, &walk); + err = blkcipher_walk_done(desc, &walk, nbytes); + } + + serpent_fpu_end(fpu_enabled); + return err; +} + +static inline void u128_to_be128(be128 *dst, const u128 *src) +{ + dst->a = cpu_to_be64(src->a); + dst->b = cpu_to_be64(src->b); +} + +static inline void be128_to_u128(u128 *dst, const be128 *src) +{ + dst->a = be64_to_cpu(src->a); + dst->b = be64_to_cpu(src->b); +} + +static inline void u128_inc(u128 *i) +{ + i->b++; + if (!i->b) + i->a++; +} + +static void ctr_crypt_final(struct blkcipher_desc *desc, + struct blkcipher_walk *walk) +{ + struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + u8 *ctrblk = walk->iv; + u8 keystream[SERPENT_BLOCK_SIZE]; + u8 *src = walk->src.virt.addr; + u8 *dst = walk->dst.virt.addr; + unsigned int nbytes = walk->nbytes; + + __serpent_encrypt(ctx, keystream, ctrblk); + crypto_xor(keystream, src, nbytes); + memcpy(dst, keystream, nbytes); + + crypto_inc(ctrblk, SERPENT_BLOCK_SIZE); +} + +static unsigned int __ctr_crypt(struct blkcipher_desc *desc, + struct blkcipher_walk *walk) +{ + struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + const unsigned int bsize = SERPENT_BLOCK_SIZE; + unsigned int nbytes = walk->nbytes; + u128 *src = (u128 *)walk->src.virt.addr; + u128 *dst = (u128 *)walk->dst.virt.addr; + u128 ctrblk; + be128 ctrblocks[SERPENT_PARALLEL_BLOCKS]; + int i; + + be128_to_u128(&ctrblk, (be128 *)walk->iv); + + /* Process multi-block batch */ + if (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS) { + do { + /* create ctrblks for parallel encrypt */ + for (i = 0; i < SERPENT_PARALLEL_BLOCKS; i++) { + if (dst != src) + dst[i] = src[i]; + + u128_to_be128(&ctrblocks[i], &ctrblk); + u128_inc(&ctrblk); + } + + serpent_enc_blk_xway_xor(ctx, (u8 *)dst, + (u8 *)ctrblocks); + + src += SERPENT_PARALLEL_BLOCKS; + dst += SERPENT_PARALLEL_BLOCKS; + nbytes -= bsize * SERPENT_PARALLEL_BLOCKS; + } while (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS); + + if (nbytes < bsize) + goto done; + } + + /* Handle leftovers */ + do { + if (dst != src) + *dst = *src; + + u128_to_be128(&ctrblocks[0], &ctrblk); + u128_inc(&ctrblk); + + __serpent_encrypt(ctx, (u8 *)ctrblocks, (u8 *)ctrblocks); + u128_xor(dst, dst, (u128 *)ctrblocks); + + src += 1; + dst += 1; + nbytes -= bsize; + } while (nbytes >= bsize); + +done: + u128_to_be128((be128 *)walk->iv, &ctrblk); + return nbytes; +} + +static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + bool fpu_enabled = false; + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt_block(desc, &walk, SERPENT_BLOCK_SIZE); + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + + while ((nbytes = walk.nbytes) >= SERPENT_BLOCK_SIZE) { + fpu_enabled = serpent_fpu_begin(fpu_enabled, nbytes); + nbytes = __ctr_crypt(desc, &walk); + err = blkcipher_walk_done(desc, &walk, nbytes); + } + + serpent_fpu_end(fpu_enabled); + + if (walk.nbytes) { + ctr_crypt_final(desc, &walk); + err = blkcipher_walk_done(desc, &walk, 0); + } + + return err; +} + +struct crypt_priv { + struct serpent_ctx *ctx; + bool fpu_enabled; +}; + +static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) +{ + const unsigned int bsize = SERPENT_BLOCK_SIZE; + struct crypt_priv *ctx = priv; + int i; + + ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes); + + if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) { + serpent_enc_blk_xway(ctx->ctx, srcdst, srcdst); + return; + } + + for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) + __serpent_encrypt(ctx->ctx, srcdst, srcdst); +} + +static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) +{ + const unsigned int bsize = SERPENT_BLOCK_SIZE; + struct crypt_priv *ctx = priv; + int i; + + ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes); + + if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) { + serpent_dec_blk_xway(ctx->ctx, srcdst, srcdst); + return; + } + + for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) + __serpent_decrypt(ctx->ctx, srcdst, srcdst); +} + +struct serpent_lrw_ctx { + struct lrw_table_ctx lrw_table; + struct serpent_ctx serpent_ctx; +}; + +static int lrw_serpent_setkey(struct crypto_tfm *tfm, const u8 *key, + unsigned int keylen) +{ + struct serpent_lrw_ctx *ctx = crypto_tfm_ctx(tfm); + int err; + + err = __serpent_setkey(&ctx->serpent_ctx, key, keylen - + SERPENT_BLOCK_SIZE); + if (err) + return err; + + return lrw_init_table(&ctx->lrw_table, key + keylen - + SERPENT_BLOCK_SIZE); +} + +static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + be128 buf[SERPENT_PARALLEL_BLOCKS]; + struct crypt_priv crypt_ctx = { + .ctx = &ctx->serpent_ctx, + .fpu_enabled = false, + }; + struct lrw_crypt_req req = { + .tbuf = buf, + .tbuflen = sizeof(buf), + + .table_ctx = &ctx->lrw_table, + .crypt_ctx = &crypt_ctx, + .crypt_fn = encrypt_callback, + }; + int ret; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + ret = lrw_crypt(desc, dst, src, nbytes, &req); + serpent_fpu_end(crypt_ctx.fpu_enabled); + + return ret; +} + +static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + be128 buf[SERPENT_PARALLEL_BLOCKS]; + struct crypt_priv crypt_ctx = { + .ctx = &ctx->serpent_ctx, + .fpu_enabled = false, + }; + struct lrw_crypt_req req = { + .tbuf = buf, + .tbuflen = sizeof(buf), + + .table_ctx = &ctx->lrw_table, + .crypt_ctx = &crypt_ctx, + .crypt_fn = decrypt_callback, + }; + int ret; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + ret = lrw_crypt(desc, dst, src, nbytes, &req); + serpent_fpu_end(crypt_ctx.fpu_enabled); + + return ret; +} + +static void lrw_exit_tfm(struct crypto_tfm *tfm) +{ + struct serpent_lrw_ctx *ctx = crypto_tfm_ctx(tfm); + + lrw_free_table(&ctx->lrw_table); +} + +struct serpent_xts_ctx { + struct serpent_ctx tweak_ctx; + struct serpent_ctx crypt_ctx; +}; + +static int xts_serpent_setkey(struct crypto_tfm *tfm, const u8 *key, + unsigned int keylen) +{ + struct serpent_xts_ctx *ctx = crypto_tfm_ctx(tfm); + u32 *flags = &tfm->crt_flags; + int err; + + /* key consists of keys of equal size concatenated, therefore + * the length must be even + */ + if (keylen % 2) { + *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + return -EINVAL; + } + + /* first half of xts-key is for crypt */ + err = __serpent_setkey(&ctx->crypt_ctx, key, keylen / 2); + if (err) + return err; + + /* second half of xts-key is for tweak */ + return __serpent_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2); +} + +static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + be128 buf[SERPENT_PARALLEL_BLOCKS]; + struct crypt_priv crypt_ctx = { + .ctx = &ctx->crypt_ctx, + .fpu_enabled = false, + }; + struct xts_crypt_req req = { + .tbuf = buf, + .tbuflen = sizeof(buf), + + .tweak_ctx = &ctx->tweak_ctx, + .tweak_fn = XTS_TWEAK_CAST(__serpent_encrypt), + .crypt_ctx = &crypt_ctx, + .crypt_fn = encrypt_callback, + }; + int ret; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + ret = xts_crypt(desc, dst, src, nbytes, &req); + serpent_fpu_end(crypt_ctx.fpu_enabled); + + return ret; +} + +static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + be128 buf[SERPENT_PARALLEL_BLOCKS]; + struct crypt_priv crypt_ctx = { + .ctx = &ctx->crypt_ctx, + .fpu_enabled = false, + }; + struct xts_crypt_req req = { + .tbuf = buf, + .tbuflen = sizeof(buf), + + .tweak_ctx = &ctx->tweak_ctx, + .tweak_fn = XTS_TWEAK_CAST(__serpent_encrypt), + .crypt_ctx = &crypt_ctx, + .crypt_fn = decrypt_callback, + }; + int ret; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + ret = xts_crypt(desc, dst, src, nbytes, &req); + serpent_fpu_end(crypt_ctx.fpu_enabled); + + return ret; +} + +static int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key, + unsigned int key_len) +{ + struct async_serpent_ctx *ctx = crypto_ablkcipher_ctx(tfm); + struct crypto_ablkcipher *child = &ctx->cryptd_tfm->base; + int err; + + crypto_ablkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); + crypto_ablkcipher_set_flags(child, crypto_ablkcipher_get_flags(tfm) + & CRYPTO_TFM_REQ_MASK); + err = crypto_ablkcipher_setkey(child, key, key_len); + crypto_ablkcipher_set_flags(tfm, crypto_ablkcipher_get_flags(child) + & CRYPTO_TFM_RES_MASK); + return err; +} + +static int __ablk_encrypt(struct ablkcipher_request *req) +{ + struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); + struct async_serpent_ctx *ctx = crypto_ablkcipher_ctx(tfm); + struct blkcipher_desc desc; + + desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm); + desc.info = req->info; + desc.flags = 0; + + return crypto_blkcipher_crt(desc.tfm)->encrypt( + &desc, req->dst, req->src, req->nbytes); +} + +static int ablk_encrypt(struct ablkcipher_request *req) +{ + struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); + struct async_serpent_ctx *ctx = crypto_ablkcipher_ctx(tfm); + + if (!irq_fpu_usable()) { + struct ablkcipher_request *cryptd_req = + ablkcipher_request_ctx(req); + + memcpy(cryptd_req, req, sizeof(*req)); + ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); + + return crypto_ablkcipher_encrypt(cryptd_req); + } else { + return __ablk_encrypt(req); + } +} + +static int ablk_decrypt(struct ablkcipher_request *req) +{ + struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); + struct async_serpent_ctx *ctx = crypto_ablkcipher_ctx(tfm); + + if (!irq_fpu_usable()) { + struct ablkcipher_request *cryptd_req = + ablkcipher_request_ctx(req); + + memcpy(cryptd_req, req, sizeof(*req)); + ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); + + return crypto_ablkcipher_decrypt(cryptd_req); + } else { + struct blkcipher_desc desc; + + desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm); + desc.info = req->info; + desc.flags = 0; + + return crypto_blkcipher_crt(desc.tfm)->decrypt( + &desc, req->dst, req->src, req->nbytes); + } +} + +static void ablk_exit(struct crypto_tfm *tfm) +{ + struct async_serpent_ctx *ctx = crypto_tfm_ctx(tfm); + + cryptd_free_ablkcipher(ctx->cryptd_tfm); +} + +static int ablk_init(struct crypto_tfm *tfm) +{ + struct async_serpent_ctx *ctx = crypto_tfm_ctx(tfm); + struct cryptd_ablkcipher *cryptd_tfm; + char drv_name[CRYPTO_MAX_ALG_NAME]; + + snprintf(drv_name, sizeof(drv_name), "__driver-%s", + crypto_tfm_alg_driver_name(tfm)); + + cryptd_tfm = cryptd_alloc_ablkcipher(drv_name, 0, 0); + if (IS_ERR(cryptd_tfm)) + return PTR_ERR(cryptd_tfm); + + ctx->cryptd_tfm = cryptd_tfm; + tfm->crt_ablkcipher.reqsize = sizeof(struct ablkcipher_request) + + crypto_ablkcipher_reqsize(&cryptd_tfm->base); + + return 0; +} + +static struct crypto_alg serpent_algs[10] = { { + .cra_name = "__ecb-serpent-avx", + .cra_driver_name = "__driver-ecb-serpent-avx", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = SERPENT_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct serpent_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(serpent_algs[0].cra_list), + .cra_u = { + .blkcipher = { + .min_keysize = SERPENT_MIN_KEY_SIZE, + .max_keysize = SERPENT_MAX_KEY_SIZE, + .setkey = serpent_setkey, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, + }, +}, { + .cra_name = "__cbc-serpent-avx", + .cra_driver_name = "__driver-cbc-serpent-avx", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = SERPENT_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct serpent_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(serpent_algs[1].cra_list), + .cra_u = { + .blkcipher = { + .min_keysize = SERPENT_MIN_KEY_SIZE, + .max_keysize = SERPENT_MAX_KEY_SIZE, + .setkey = serpent_setkey, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, + }, +}, { + .cra_name = "__ctr-serpent-avx", + .cra_driver_name = "__driver-ctr-serpent-avx", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct serpent_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(serpent_algs[2].cra_list), + .cra_u = { + .blkcipher = { + .min_keysize = SERPENT_MIN_KEY_SIZE, + .max_keysize = SERPENT_MAX_KEY_SIZE, + .ivsize = SERPENT_BLOCK_SIZE, + .setkey = serpent_setkey, + .encrypt = ctr_crypt, + .decrypt = ctr_crypt, + }, + }, +}, { + .cra_name = "__lrw-serpent-avx", + .cra_driver_name = "__driver-lrw-serpent-avx", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = SERPENT_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct serpent_lrw_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(serpent_algs[3].cra_list), + .cra_exit = lrw_exit_tfm, + .cra_u = { + .blkcipher = { + .min_keysize = SERPENT_MIN_KEY_SIZE + + SERPENT_BLOCK_SIZE, + .max_keysize = SERPENT_MAX_KEY_SIZE + + SERPENT_BLOCK_SIZE, + .ivsize = SERPENT_BLOCK_SIZE, + .setkey = lrw_serpent_setkey, + .encrypt = lrw_encrypt, + .decrypt = lrw_decrypt, + }, + }, +}, { + .cra_name = "__xts-serpent-avx", + .cra_driver_name = "__driver-xts-serpent-avx", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = SERPENT_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct serpent_xts_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(serpent_algs[4].cra_list), + .cra_u = { + .blkcipher = { + .min_keysize = SERPENT_MIN_KEY_SIZE * 2, + .max_keysize = SERPENT_MAX_KEY_SIZE * 2, + .ivsize = SERPENT_BLOCK_SIZE, + .setkey = xts_serpent_setkey, + .encrypt = xts_encrypt, + .decrypt = xts_decrypt, + }, + }, +}, { + .cra_name = "ecb(serpent)", + .cra_driver_name = "ecb-serpent-avx", + .cra_priority = 500, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = SERPENT_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct async_serpent_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(serpent_algs[5].cra_list), + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_u = { + .ablkcipher = { + .min_keysize = SERPENT_MIN_KEY_SIZE, + .max_keysize = SERPENT_MAX_KEY_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_decrypt, + }, + }, +}, { + .cra_name = "cbc(serpent)", + .cra_driver_name = "cbc-serpent-avx", + .cra_priority = 500, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = SERPENT_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct async_serpent_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(serpent_algs[6].cra_list), + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_u = { + .ablkcipher = { + .min_keysize = SERPENT_MIN_KEY_SIZE, + .max_keysize = SERPENT_MAX_KEY_SIZE, + .ivsize = SERPENT_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = __ablk_encrypt, + .decrypt = ablk_decrypt, + }, + }, +}, { + .cra_name = "ctr(serpent)", + .cra_driver_name = "ctr-serpent-avx", + .cra_priority = 500, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct async_serpent_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(serpent_algs[7].cra_list), + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_u = { + .ablkcipher = { + .min_keysize = SERPENT_MIN_KEY_SIZE, + .max_keysize = SERPENT_MAX_KEY_SIZE, + .ivsize = SERPENT_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_encrypt, + .geniv = "chainiv", + }, + }, +}, { + .cra_name = "lrw(serpent)", + .cra_driver_name = "lrw-serpent-avx", + .cra_priority = 500, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = SERPENT_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct async_serpent_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(serpent_algs[8].cra_list), + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_u = { + .ablkcipher = { + .min_keysize = SERPENT_MIN_KEY_SIZE + + SERPENT_BLOCK_SIZE, + .max_keysize = SERPENT_MAX_KEY_SIZE + + SERPENT_BLOCK_SIZE, + .ivsize = SERPENT_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_decrypt, + }, + }, +}, { + .cra_name = "xts(serpent)", + .cra_driver_name = "xts-serpent-avx", + .cra_priority = 500, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = SERPENT_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct async_serpent_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(serpent_algs[9].cra_list), + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_u = { + .ablkcipher = { + .min_keysize = SERPENT_MIN_KEY_SIZE * 2, + .max_keysize = SERPENT_MAX_KEY_SIZE * 2, + .ivsize = SERPENT_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_decrypt, + }, + }, +} }; + +static int __init serpent_init(void) +{ + u64 xcr0; + + if (!cpu_has_avx || !cpu_has_osxsave) { + printk(KERN_INFO "AVX instructions are not detected.\n"); + return -ENODEV; + } + + xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); + if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) { + printk(KERN_INFO "AVX detected but unusable.\n"); + return -ENODEV; + } + + return crypto_register_algs(serpent_algs, ARRAY_SIZE(serpent_algs)); +} + +static void __exit serpent_exit(void) +{ + crypto_unregister_algs(serpent_algs, ARRAY_SIZE(serpent_algs)); +} + +module_init(serpent_init); +module_exit(serpent_exit); + +MODULE_DESCRIPTION("Serpent Cipher Algorithm, AVX optimized"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("serpent"); -- cgit v1.2.2 From d691af000274ae9658695c2a63a76b30890a8983 Mon Sep 17 00:00:00 2001 From: Paul Bolle Date: Tue, 12 Jun 2012 16:50:55 +0800 Subject: crypto: s390 - clean up DES code a bit more Commit 98971f8439b1bb9a61682fe24a865ddd25167a6b ("crypto: s390 - cleanup DES code") should have also removed crypto_des.h. That file is unused and unneeded since that commit. So let's clean up that file too. Signed-off-by: Paul Bolle Acked-by: Jan Glauber Signed-off-by: Herbert Xu --- arch/s390/crypto/crypto_des.h | 18 ------------------ 1 file changed, 18 deletions(-) delete mode 100644 arch/s390/crypto/crypto_des.h (limited to 'arch') diff --git a/arch/s390/crypto/crypto_des.h b/arch/s390/crypto/crypto_des.h deleted file mode 100644 index 6210457ceebb..000000000000 --- a/arch/s390/crypto/crypto_des.h +++ /dev/null @@ -1,18 +0,0 @@ -/* - * Cryptographic API. - * - * Function for checking keys for the DES and Tripple DES Encryption - * algorithms. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - */ -#ifndef __CRYPTO_DES_H__ -#define __CRYPTO_DES_H__ - -extern int crypto_des_check_key(const u8*, unsigned int, u32*); - -#endif /*__CRYPTO_DES_H__*/ -- cgit v1.2.2 From 3387e7d69048f5ab02729825f9611754850d9a87 Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Thu, 14 Jun 2012 10:09:03 +0800 Subject: crypto: serpent-sse2/avx - allow both to be built into kernel Rename serpent-avx assembler functions so that they do not collide with serpent-sse2 assembler functions when linking both versions in to same kernel image. Reported-by: Randy Dunlap Cc: Johannes Goetzfried Signed-off-by: Jussi Kivilinna Signed-off-by: Herbert Xu --- arch/x86/crypto/serpent-avx-x86_64-asm_64.S | 12 +++--- arch/x86/crypto/serpent_avx_glue.c | 2 +- arch/x86/crypto/serpent_sse2_glue.c | 2 +- arch/x86/include/asm/serpent-avx.h | 32 +++++++++++++++ arch/x86/include/asm/serpent-sse2.h | 63 +++++++++++++++++++++++++++++ arch/x86/include/asm/serpent.h | 63 ----------------------------- 6 files changed, 103 insertions(+), 71 deletions(-) create mode 100644 arch/x86/include/asm/serpent-avx.h create mode 100644 arch/x86/include/asm/serpent-sse2.h delete mode 100644 arch/x86/include/asm/serpent.h (limited to 'arch') diff --git a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S index 0ed47a124bac..504106bf04a2 100644 --- a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S @@ -579,10 +579,10 @@ vmovdqu x3, (3*4*4)(out); .align 8 -.global __serpent_enc_blk_8way -.type __serpent_enc_blk_8way,@function; +.global __serpent_enc_blk_8way_avx +.type __serpent_enc_blk_8way_avx,@function; -__serpent_enc_blk_8way: +__serpent_enc_blk_8way_avx: /* input: * %rdi: ctx, CTX * %rsi: dst @@ -647,10 +647,10 @@ __enc_xor8: ret; .align 8 -.global serpent_dec_blk_8way -.type serpent_dec_blk_8way,@function; +.global serpent_dec_blk_8way_avx +.type serpent_dec_blk_8way_avx,@function; -serpent_dec_blk_8way: +serpent_dec_blk_8way_avx: /* input: * %rdi: ctx, CTX * %rsi: dst diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c index 0dc7a26535e5..dd81bab4f11a 100644 --- a/arch/x86/crypto/serpent_avx_glue.c +++ b/arch/x86/crypto/serpent_avx_glue.c @@ -39,7 +39,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/crypto/serpent_sse2_glue.c b/arch/x86/crypto/serpent_sse2_glue.c index 4b21be85e0a1..deecd25c1299 100644 --- a/arch/x86/crypto/serpent_sse2_glue.c +++ b/arch/x86/crypto/serpent_sse2_glue.c @@ -42,7 +42,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/include/asm/serpent-avx.h b/arch/x86/include/asm/serpent-avx.h new file mode 100644 index 000000000000..432deedd2945 --- /dev/null +++ b/arch/x86/include/asm/serpent-avx.h @@ -0,0 +1,32 @@ +#ifndef ASM_X86_SERPENT_AVX_H +#define ASM_X86_SERPENT_AVX_H + +#include +#include + +#define SERPENT_PARALLEL_BLOCKS 8 + +asmlinkage void __serpent_enc_blk_8way_avx(struct serpent_ctx *ctx, u8 *dst, + const u8 *src, bool xor); +asmlinkage void serpent_dec_blk_8way_avx(struct serpent_ctx *ctx, u8 *dst, + const u8 *src); + +static inline void serpent_enc_blk_xway(struct serpent_ctx *ctx, u8 *dst, + const u8 *src) +{ + __serpent_enc_blk_8way_avx(ctx, dst, src, false); +} + +static inline void serpent_enc_blk_xway_xor(struct serpent_ctx *ctx, u8 *dst, + const u8 *src) +{ + __serpent_enc_blk_8way_avx(ctx, dst, src, true); +} + +static inline void serpent_dec_blk_xway(struct serpent_ctx *ctx, u8 *dst, + const u8 *src) +{ + serpent_dec_blk_8way_avx(ctx, dst, src); +} + +#endif diff --git a/arch/x86/include/asm/serpent-sse2.h b/arch/x86/include/asm/serpent-sse2.h new file mode 100644 index 000000000000..e6e77dffbdab --- /dev/null +++ b/arch/x86/include/asm/serpent-sse2.h @@ -0,0 +1,63 @@ +#ifndef ASM_X86_SERPENT_SSE2_H +#define ASM_X86_SERPENT_SSE2_H + +#include +#include + +#ifdef CONFIG_X86_32 + +#define SERPENT_PARALLEL_BLOCKS 4 + +asmlinkage void __serpent_enc_blk_4way(struct serpent_ctx *ctx, u8 *dst, + const u8 *src, bool xor); +asmlinkage void serpent_dec_blk_4way(struct serpent_ctx *ctx, u8 *dst, + const u8 *src); + +static inline void serpent_enc_blk_xway(struct serpent_ctx *ctx, u8 *dst, + const u8 *src) +{ + __serpent_enc_blk_4way(ctx, dst, src, false); +} + +static inline void serpent_enc_blk_xway_xor(struct serpent_ctx *ctx, u8 *dst, + const u8 *src) +{ + __serpent_enc_blk_4way(ctx, dst, src, true); +} + +static inline void serpent_dec_blk_xway(struct serpent_ctx *ctx, u8 *dst, + const u8 *src) +{ + serpent_dec_blk_4way(ctx, dst, src); +} + +#else + +#define SERPENT_PARALLEL_BLOCKS 8 + +asmlinkage void __serpent_enc_blk_8way(struct serpent_ctx *ctx, u8 *dst, + const u8 *src, bool xor); +asmlinkage void serpent_dec_blk_8way(struct serpent_ctx *ctx, u8 *dst, + const u8 *src); + +static inline void serpent_enc_blk_xway(struct serpent_ctx *ctx, u8 *dst, + const u8 *src) +{ + __serpent_enc_blk_8way(ctx, dst, src, false); +} + +static inline void serpent_enc_blk_xway_xor(struct serpent_ctx *ctx, u8 *dst, + const u8 *src) +{ + __serpent_enc_blk_8way(ctx, dst, src, true); +} + +static inline void serpent_dec_blk_xway(struct serpent_ctx *ctx, u8 *dst, + const u8 *src) +{ + serpent_dec_blk_8way(ctx, dst, src); +} + +#endif + +#endif diff --git a/arch/x86/include/asm/serpent.h b/arch/x86/include/asm/serpent.h deleted file mode 100644 index d3ef63fe0c81..000000000000 --- a/arch/x86/include/asm/serpent.h +++ /dev/null @@ -1,63 +0,0 @@ -#ifndef ASM_X86_SERPENT_H -#define ASM_X86_SERPENT_H - -#include -#include - -#ifdef CONFIG_X86_32 - -#define SERPENT_PARALLEL_BLOCKS 4 - -asmlinkage void __serpent_enc_blk_4way(struct serpent_ctx *ctx, u8 *dst, - const u8 *src, bool xor); -asmlinkage void serpent_dec_blk_4way(struct serpent_ctx *ctx, u8 *dst, - const u8 *src); - -static inline void serpent_enc_blk_xway(struct serpent_ctx *ctx, u8 *dst, - const u8 *src) -{ - __serpent_enc_blk_4way(ctx, dst, src, false); -} - -static inline void serpent_enc_blk_xway_xor(struct serpent_ctx *ctx, u8 *dst, - const u8 *src) -{ - __serpent_enc_blk_4way(ctx, dst, src, true); -} - -static inline void serpent_dec_blk_xway(struct serpent_ctx *ctx, u8 *dst, - const u8 *src) -{ - serpent_dec_blk_4way(ctx, dst, src); -} - -#else - -#define SERPENT_PARALLEL_BLOCKS 8 - -asmlinkage void __serpent_enc_blk_8way(struct serpent_ctx *ctx, u8 *dst, - const u8 *src, bool xor); -asmlinkage void serpent_dec_blk_8way(struct serpent_ctx *ctx, u8 *dst, - const u8 *src); - -static inline void serpent_enc_blk_xway(struct serpent_ctx *ctx, u8 *dst, - const u8 *src) -{ - __serpent_enc_blk_8way(ctx, dst, src, false); -} - -static inline void serpent_enc_blk_xway_xor(struct serpent_ctx *ctx, u8 *dst, - const u8 *src) -{ - __serpent_enc_blk_8way(ctx, dst, src, true); -} - -static inline void serpent_dec_blk_xway(struct serpent_ctx *ctx, u8 *dst, - const u8 *src) -{ - serpent_dec_blk_8way(ctx, dst, src); -} - -#endif - -#endif -- cgit v1.2.2 From c35f77417ebfc7c21c02aa9c8c30aa4cecf331d6 Mon Sep 17 00:00:00 2001 From: Ido Yariv Date: Mon, 11 Jun 2012 12:56:45 +0300 Subject: x86: Define early read-mostly per-cpu macros Some read-mostly per-cpu data may need to be declared or defined early, so it can be initialized and accessed before per_cpu areas are allocated. Only the data that resides in the per_cpu areas should be read-mostly, as there is little benefit in optimizing cache lines on initialization. Signed-off-by: Ido Yariv [ Added the missing declarations in !SMP code. ] Signed-off-by: Vlad Zolotarov Acked-by: Shai Fultheim Cc: Linus Torvalds Cc: Andrew Morton Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/46188571.ddB8aVQYWo@vlad Signed-off-by: Ingo Molnar --- arch/x86/include/asm/percpu.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'arch') diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index d9b8e3f7f42a..1104afaba52b 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -551,6 +551,12 @@ DECLARE_PER_CPU(unsigned long, this_cpu_off); { [0 ... NR_CPUS-1] = _initvalue }; \ __typeof__(_type) *_name##_early_ptr __refdata = _name##_early_map +#define DEFINE_EARLY_PER_CPU_READ_MOSTLY(_type, _name, _initvalue) \ + DEFINE_PER_CPU_READ_MOSTLY(_type, _name) = _initvalue; \ + __typeof__(_type) _name##_early_map[NR_CPUS] __initdata = \ + { [0 ... NR_CPUS-1] = _initvalue }; \ + __typeof__(_type) *_name##_early_ptr __refdata = _name##_early_map + #define EXPORT_EARLY_PER_CPU_SYMBOL(_name) \ EXPORT_PER_CPU_SYMBOL(_name) @@ -559,6 +565,11 @@ DECLARE_PER_CPU(unsigned long, this_cpu_off); extern __typeof__(_type) *_name##_early_ptr; \ extern __typeof__(_type) _name##_early_map[] +#define DECLARE_EARLY_PER_CPU_READ_MOSTLY(_type, _name) \ + DECLARE_PER_CPU_READ_MOSTLY(_type, _name); \ + extern __typeof__(_type) *_name##_early_ptr; \ + extern __typeof__(_type) _name##_early_map[] + #define early_per_cpu_ptr(_name) (_name##_early_ptr) #define early_per_cpu_map(_name, _idx) (_name##_early_map[_idx]) #define early_per_cpu(_name, _cpu) \ @@ -570,12 +581,18 @@ DECLARE_PER_CPU(unsigned long, this_cpu_off); #define DEFINE_EARLY_PER_CPU(_type, _name, _initvalue) \ DEFINE_PER_CPU(_type, _name) = _initvalue +#define DEFINE_EARLY_PER_CPU_READ_MOSTLY(_type, _name, _initvalue) \ + DEFINE_PER_CPU_READ_MOSTLY(_type, _name) = _initvalue + #define EXPORT_EARLY_PER_CPU_SYMBOL(_name) \ EXPORT_PER_CPU_SYMBOL(_name) #define DECLARE_EARLY_PER_CPU(_type, _name) \ DECLARE_PER_CPU(_type, _name) +#define DECLARE_EARLY_PER_CPU_READ_MOSTLY(_type, _name) \ + DECLARE_PER_CPU_READ_MOSTLY(_type, _name) + #define early_per_cpu(_name, _cpu) per_cpu(_name, _cpu) #define early_per_cpu_ptr(_name) NULL /* no early_per_cpu_map() */ -- cgit v1.2.2 From 0816b0f0365539c8f6280634d2c1778d0108d8f5 Mon Sep 17 00:00:00 2001 From: Vlad Zolotarov Date: Mon, 11 Jun 2012 12:56:52 +0300 Subject: x86: Add read_mostly declaration/definition to variables from smp.h Add "read-mostly" qualifier to the following variables in smp.h: - cpu_sibling_map - cpu_core_map - cpu_llc_shared_map - cpu_llc_id - cpu_number - x86_cpu_to_apicid - x86_bios_cpu_apicid - x86_cpu_to_logical_apicid As long as all the variables above are only written during the initialization, this change is meant to prevent the false sharing. More specifically, on vSMP Foundation platform x86_cpu_to_apicid shared the same internode_cache_line with frequently written lapic_events. From the analysis of the first 33 per_cpu variables out of 219 (memories they describe, to be more specific) the 8 have read_mostly nature (tlb_vector_offset, cpu_loops_per_jiffy, xen_debug_irq, etc.) and 25 are frequently written (irq_stack_union, gdt_page, exception_stacks, idt_desc, etc.). Assuming that the spread of the rest of the per_cpu variables is similar, identifying the read mostly memories will make more sense in terms of long-term code maintenance comparing to identifying frequently written memories. Signed-off-by: Vlad Zolotarov Acked-by: Shai Fultheim Cc: Shai Fultheim (Shai@ScaleMP.com) Cc: ido@wizery.com Cc: Linus Torvalds Cc: Andrew Morton Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1719258.EYKzE4Zbq5@vlad Signed-off-by: Ingo Molnar --- arch/x86/include/asm/apic.h | 2 +- arch/x86/include/asm/smp.h | 16 ++++++++-------- arch/x86/kernel/apic/apic.c | 6 +++--- arch/x86/kernel/setup_percpu.c | 2 +- arch/x86/kernel/smpboot.c | 8 ++++---- 5 files changed, 17 insertions(+), 17 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index eaff4790ed96..a907d4d251a8 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -537,7 +537,7 @@ static inline const struct cpumask *default_target_cpus(void) #endif } -DECLARE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid); +DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid); static inline unsigned int read_apic_id(void) diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index f48394513c37..cc1df2b5cc65 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -31,12 +31,12 @@ static inline bool cpu_has_ht_siblings(void) return has_siblings; } -DECLARE_PER_CPU(cpumask_var_t, cpu_sibling_map); -DECLARE_PER_CPU(cpumask_var_t, cpu_core_map); +DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map); +DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_map); /* cpus sharing the last level cache: */ -DECLARE_PER_CPU(cpumask_var_t, cpu_llc_shared_map); -DECLARE_PER_CPU(u16, cpu_llc_id); -DECLARE_PER_CPU(int, cpu_number); +DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map); +DECLARE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id); +DECLARE_PER_CPU_READ_MOSTLY(int, cpu_number); static inline struct cpumask *cpu_sibling_mask(int cpu) { @@ -53,10 +53,10 @@ static inline struct cpumask *cpu_llc_shared_mask(int cpu) return per_cpu(cpu_llc_shared_map, cpu); } -DECLARE_EARLY_PER_CPU(u16, x86_cpu_to_apicid); -DECLARE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid); +DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid); +DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid); #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32) -DECLARE_EARLY_PER_CPU(int, x86_cpu_to_logical_apicid); +DECLARE_EARLY_PER_CPU_READ_MOSTLY(int, x86_cpu_to_logical_apicid); #endif /* Static state in head.S used to set up a CPU */ diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 39a222e094af..0443b6482214 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -75,8 +75,8 @@ physid_mask_t phys_cpu_present_map; /* * Map cpu index to physical APIC ID */ -DEFINE_EARLY_PER_CPU(u16, x86_cpu_to_apicid, BAD_APICID); -DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID); +DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid, BAD_APICID); +DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid, BAD_APICID); EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid); EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid); @@ -88,7 +88,7 @@ EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid); * used for the mapping. This is where the behaviors of x86_64 and 32 * actually diverge. Let's keep it ugly for now. */ -DEFINE_EARLY_PER_CPU(int, x86_cpu_to_logical_apicid, BAD_APICID); +DEFINE_EARLY_PER_CPU_READ_MOSTLY(int, x86_cpu_to_logical_apicid, BAD_APICID); /* * Knob to control our willingness to enable the local APIC. diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 5a98aa272184..5cdff0357746 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -21,7 +21,7 @@ #include #include -DEFINE_PER_CPU(int, cpu_number); +DEFINE_PER_CPU_READ_MOSTLY(int, cpu_number); EXPORT_PER_CPU_SYMBOL(cpu_number); #ifdef CONFIG_X86_64 diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 3fab55bea29b..e61110e29a8c 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -104,17 +104,17 @@ int smp_num_siblings = 1; EXPORT_SYMBOL(smp_num_siblings); /* Last level cache ID of each logical CPU */ -DEFINE_PER_CPU(u16, cpu_llc_id) = BAD_APICID; +DEFINE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id) = BAD_APICID; /* representing HT siblings of each logical CPU */ -DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map); +DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map); EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); /* representing HT and core siblings of each logical CPU */ -DEFINE_PER_CPU(cpumask_var_t, cpu_core_map); +DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_map); EXPORT_PER_CPU_SYMBOL(cpu_core_map); -DEFINE_PER_CPU(cpumask_var_t, cpu_llc_shared_map); +DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map); /* Per CPU bogomips and other parameters */ DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info); -- cgit v1.2.2 From efd68e7254503f3207805f674a1ea1d743f5dfe2 Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Sun, 3 Jun 2012 22:04:33 -0700 Subject: devicetree: add helper inline for retrieving a node's full name The pattern (np ? np->full_name : "") is rather common in the kernel, but can also make for quite long lines. This patch adds a new inline function, of_node_full_name() so that the test for a valid node pointer doesn't need to be open coded at all call sites. Signed-off-by: Grant Likely Cc: Paul Mundt Cc: Benjamin Herrenschmidt Cc: Thomas Gleixner --- arch/microblaze/pci/pci-common.c | 6 ++---- arch/powerpc/kernel/pci-common.c | 6 ++---- arch/powerpc/kernel/vio.c | 5 ++--- arch/powerpc/platforms/cell/iommu.c | 3 +-- arch/powerpc/platforms/pseries/iommu.c | 2 +- arch/sparc/kernel/of_device_64.c | 2 +- 6 files changed, 9 insertions(+), 15 deletions(-) (limited to 'arch') diff --git a/arch/microblaze/pci/pci-common.c b/arch/microblaze/pci/pci-common.c index ed22bfc5db14..ca8f6e769960 100644 --- a/arch/microblaze/pci/pci-common.c +++ b/arch/microblaze/pci/pci-common.c @@ -249,8 +249,7 @@ int pci_read_irq_line(struct pci_dev *pci_dev) } else { pr_debug(" Got one, spec %d cells (0x%08x 0x%08x...) on %s\n", oirq.size, oirq.specifier[0], oirq.specifier[1], - oirq.controller ? oirq.controller->full_name : - ""); + of_node_full_name(oirq.controller)); virq = irq_create_of_mapping(oirq.controller, oirq.specifier, oirq.size); @@ -1493,8 +1492,7 @@ static void __devinit pcibios_scan_phb(struct pci_controller *hose) struct pci_bus *bus; struct device_node *node = hose->dn; - pr_debug("PCI: Scanning PHB %s\n", - node ? node->full_name : ""); + pr_debug("PCI: Scanning PHB %s\n", of_node_full_name(node)); pcibios_setup_phb_resources(hose, &resources); diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 8e78e93c8185..886c254fd565 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -248,8 +248,7 @@ static int pci_read_irq_line(struct pci_dev *pci_dev) } else { pr_debug(" Got one, spec %d cells (0x%08x 0x%08x...) on %s\n", oirq.size, oirq.specifier[0], oirq.specifier[1], - oirq.controller ? oirq.controller->full_name : - ""); + of_node_full_name(oirq.controller)); virq = irq_create_of_mapping(oirq.controller, oirq.specifier, oirq.size); @@ -1628,8 +1627,7 @@ void __devinit pcibios_scan_phb(struct pci_controller *hose) struct device_node *node = hose->dn; int mode; - pr_debug("PCI: Scanning PHB %s\n", - node ? node->full_name : ""); + pr_debug("PCI: Scanning PHB %s\n", of_node_full_name(node)); /* Get some IO space for the new PHB */ pcibios_setup_phb_io_space(hose); diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c index cb87301ccd55..63f72ede4341 100644 --- a/arch/powerpc/kernel/vio.c +++ b/arch/powerpc/kernel/vio.c @@ -1296,8 +1296,7 @@ static void __devinit vio_dev_release(struct device *dev) struct iommu_table *tbl = get_iommu_table_base(dev); if (tbl) - iommu_free_table(tbl, dev->of_node ? - dev->of_node->full_name : dev_name(dev)); + iommu_free_table(tbl, of_node_full_name(dev->of_node)); of_node_put(dev->of_node); kfree(to_vio_dev(dev)); } @@ -1509,7 +1508,7 @@ static ssize_t devspec_show(struct device *dev, { struct device_node *of_node = dev->of_node; - return sprintf(buf, "%s\n", of_node ? of_node->full_name : "none"); + return sprintf(buf, "%s\n", of_node_full_name(of_node)); } static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c index b9f509a34c01..b6732004c882 100644 --- a/arch/powerpc/platforms/cell/iommu.c +++ b/arch/powerpc/platforms/cell/iommu.c @@ -552,8 +552,7 @@ static struct iommu_table *cell_get_iommu_table(struct device *dev) iommu = cell_iommu_for_node(dev_to_node(dev)); if (iommu == NULL || list_empty(&iommu->windows)) { printk(KERN_ERR "iommu: missing iommu for %s (node %d)\n", - dev->of_node ? dev->of_node->full_name : "?", - dev_to_node(dev)); + of_node_full_name(dev->of_node), dev_to_node(dev)); return NULL; } window = list_entry(iommu->windows.next, struct iommu_window, list); diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 0915b1ad66ce..aab5fbc924e6 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -1051,7 +1051,7 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev) if (!pdn || !PCI_DN(pdn)) { printk(KERN_WARNING "pci_dma_dev_setup_pSeriesLP: " "no DMA window found for pci dev=%s dn=%s\n", - pci_name(dev), dn? dn->full_name : ""); + pci_name(dev), of_node_full_name(dn)); return; } pr_debug(" parent is %s\n", pdn->full_name); diff --git a/arch/sparc/kernel/of_device_64.c b/arch/sparc/kernel/of_device_64.c index 7a3be6f6737a..7bbdc26d9512 100644 --- a/arch/sparc/kernel/of_device_64.c +++ b/arch/sparc/kernel/of_device_64.c @@ -580,7 +580,7 @@ static unsigned int __init build_one_device_irq(struct platform_device *op, printk("%s: Apply [%s:%x] imap --> [%s:%x]\n", op->dev.of_node->full_name, pp->full_name, this_orig_irq, - (iret ? iret->full_name : "NULL"), irq); + of_node_full_name(iret), irq); if (!iret) break; -- cgit v1.2.2 From 7d7136cabcad632a81cd568a9c1135db276fe0f2 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 12 Jun 2012 02:35:15 -0700 Subject: ARM: shmobile: r8a7740: add HDMI interrupt support It is required from sh_mobile_hdmi driver. This patch is based on v1.0 manual Signed-off-by: Kuninori Morimoto Acked-by: Magnus Damm Signed-off-by: Rafael J. Wysocki --- arch/arm/mach-shmobile/intc-r8a7740.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-shmobile/intc-r8a7740.c b/arch/arm/mach-shmobile/intc-r8a7740.c index 09c42afcb22d..9a69a31918ba 100644 --- a/arch/arm/mach-shmobile/intc-r8a7740.c +++ b/arch/arm/mach-shmobile/intc-r8a7740.c @@ -71,10 +71,12 @@ enum { DMAC3_1_DEI0, DMAC3_1_DEI1, DMAC3_1_DEI2, DMAC3_1_DEI3, DMAC3_2_DEI4, DMAC3_2_DEI5, DMAC3_2_DADERR, SHWYSTAT_RT, SHWYSTAT_HS, SHWYSTAT_COM, + HDMI, USBH_INT, USBH_OHCI, USBH_EHCI, USBH_PME, USBH_BIND, RSPI_OVRF, RSPI_SPTEF, RSPI_SPRF, SPU2_0, SPU2_1, FSI, FMSI, + HDMI_SSS, HDMI_KEY, IPMMU, AP_ARM_CTIIRQ, AP_ARM_PMURQ, MFIS2, @@ -182,6 +184,7 @@ static struct intc_vect intca_vectors[] __initdata = { INTC_VECT(USBH_EHCI, 0x1580), INTC_VECT(USBH_PME, 0x15A0), INTC_VECT(USBH_BIND, 0x15C0), + INTC_VECT(HDMI, 0x1700), INTC_VECT(RSPI_OVRF, 0x1780), INTC_VECT(RSPI_SPTEF, 0x17A0), INTC_VECT(RSPI_SPRF, 0x17C0), @@ -189,6 +192,8 @@ static struct intc_vect intca_vectors[] __initdata = { INTC_VECT(SPU2_1, 0x1820), INTC_VECT(FSI, 0x1840), INTC_VECT(FMSI, 0x1860), + INTC_VECT(HDMI_SSS, 0x18A0), + INTC_VECT(HDMI_KEY, 0x18C0), INTC_VECT(IPMMU, 0x1920), INTC_VECT(AP_ARM_CTIIRQ, 0x1980), INTC_VECT(AP_ARM_PMURQ, 0x19A0), @@ -304,11 +309,11 @@ static struct intc_mask_reg intca_mask_registers[] __initdata = { USBH_EHCI, USBH_PME, USBH_BIND, 0 } }, /* IMR3A3 / IMCR3A3 */ { /* IMR4A3 / IMCR4A3 */ 0xe6950090, 0xe69500d0, 8, - { 0, 0, 0, 0, + { HDMI, 0, 0, 0, RSPI_OVRF, RSPI_SPTEF, RSPI_SPRF, 0 } }, { /* IMR5A3 / IMCR5A3 */ 0xe6950094, 0xe69500d4, 8, { SPU2_0, SPU2_1, FSI, FMSI, - 0, 0, 0, 0 } }, + 0, HDMI_SSS, HDMI_KEY, 0 } }, { /* IMR6A3 / IMCR6A3 */ 0xe6950098, 0xe69500d8, 8, { 0, IPMMU, 0, 0, AP_ARM_CTIIRQ, AP_ARM_PMURQ, 0, 0 } }, @@ -353,10 +358,10 @@ static struct intc_prio_reg intca_prio_registers[] __initdata = { { 0xe6950014, 0, 16, 4, /* IPRFA3 */ { USBH2, 0, 0, 0 } }, /* IPRGA3 */ /* IPRHA3 */ - /* IPRIA3 */ + { 0xe6950020, 0, 16, 4, /* IPRIA3 */ { HDMI, 0, 0, 0 } }, { 0xe6950024, 0, 16, 4, /* IPRJA3 */ { RSPI, 0, 0, 0 } }, { 0xe6950028, 0, 16, 4, /* IPRKA3 */ { SPU2, 0, FSI, FMSI } }, - /* IPRLA3 */ + { 0xe695002c, 0, 16, 4, /* IPRLA3 */ { 0, HDMI_SSS, HDMI_KEY, 0 } }, { 0xe6950030, 0, 16, 4, /* IPRMA3 */ { IPMMU, 0, 0, 0 } }, { 0xe6950034, 0, 16, 4, /* IPRNA3 */ { AP_ARM2, 0, 0, 0 } }, { 0xe6950038, 0, 16, 4, /* IPROA3 */ { MFIS2, CPORTR2S, -- cgit v1.2.2 From c6750acb3b54c77c011045467770d5143be749ee Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 12 Jun 2012 02:35:36 -0700 Subject: ARM: shmobile: r8a7740: add HDMI clock support It is required from sh_mobile_hdmi driver. This patch is based on v1.0 manual Signed-off-by: Kuninori Morimoto Acked-by: Magnus Damm Signed-off-by: Rafael J. Wysocki --- arch/arm/mach-shmobile/clock-r8a7740.c | 84 ++++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) (limited to 'arch') diff --git a/arch/arm/mach-shmobile/clock-r8a7740.c b/arch/arm/mach-shmobile/clock-r8a7740.c index 26eea5f21054..b09534352f97 100644 --- a/arch/arm/mach-shmobile/clock-r8a7740.c +++ b/arch/arm/mach-shmobile/clock-r8a7740.c @@ -54,6 +54,7 @@ #define MSTPSR2 0xe6150040 #define MSTPSR3 0xe6150048 #define MSTPSR4 0xe615004c +#define HDMICKCR 0xe6150094 #define SMSTPCR0 0xe6150130 #define SMSTPCR1 0xe6150134 #define SMSTPCR2 0xe6150138 @@ -313,6 +314,79 @@ static struct clk_div4_table div4_table = { .kick = div4_kick, }; +/* DIV6 reparent */ +enum { + DIV6_HDMI, + DIV6_REPARENT_NR, +}; + +static struct clk *hdmi_parent[] = { + [0] = &pllc1_div2_clk, + [1] = &system_clk, + [2] = &dv_clk +}; + +static struct clk div6_reparent_clks[DIV6_REPARENT_NR] = { + [DIV6_HDMI] = SH_CLK_DIV6_EXT(HDMICKCR, 0, + hdmi_parent, ARRAY_SIZE(hdmi_parent), 6, 2), +}; + +/* HDMI1/2 clock */ +static unsigned long hdmi12_recalc(struct clk *clk) +{ + u32 val = __raw_readl(HDMICKCR); + int shift = (int)clk->priv; + + val >>= shift; + val &= 0x3; + + return clk->parent->rate / (1 << val); +}; + +static int hdmi12_set_rate(struct clk *clk, unsigned long rate) +{ + u32 val, mask; + int i, shift; + + for (i = 0; i < 3; i++) + if (rate == clk->parent->rate / (1 << i)) + goto find; + return -ENODEV; + +find: + shift = (int)clk->priv; + + val = __raw_readl(HDMICKCR); + mask = ~(0x3 << shift); + val = (val & mask) | i << shift; + __raw_writel(val, HDMICKCR); + + return 0; +}; + +static struct sh_clk_ops hdmi12_clk_ops = { + .recalc = hdmi12_recalc, + .set_rate = hdmi12_set_rate, +}; + +static struct clk hdmi1_clk = { + .ops = &hdmi12_clk_ops, + .priv = (void *)9, + .parent = &div6_reparent_clks[DIV6_HDMI], /* late install */ +}; + +static struct clk hdmi2_clk = { + .ops = &hdmi12_clk_ops, + .priv = (void *)11, + .parent = &div6_reparent_clks[DIV6_HDMI], /* late install */ +}; + +static struct clk *late_main_clks[] = { + &hdmi1_clk, + &hdmi2_clk, +}; + +/* MSTP */ enum { DIV4_I, DIV4_ZG, DIV4_B, DIV4_M1, DIV4_HP, DIV4_HPP, DIV4_USBP, DIV4_S, DIV4_ZB, DIV4_M3, DIV4_CP, @@ -408,6 +482,8 @@ static struct clk_lookup lookups[] = { CLKDEV_CON_ID("pllc1_clk", &pllc1_clk), CLKDEV_CON_ID("pllc1_div2_clk", &pllc1_div2_clk), CLKDEV_CON_ID("usb24s", &usb24s_clk), + CLKDEV_CON_ID("hdmi1", &hdmi1_clk), + CLKDEV_CON_ID("hdmi2", &hdmi2_clk), /* DIV4 clocks */ CLKDEV_CON_ID("i_clk", &div4_clks[DIV4_I]), @@ -459,6 +535,7 @@ static struct clk_lookup lookups[] = { CLKDEV_ICK_ID("phy", "renesas_usbhs", &mstp_clks[MSTP406]), CLKDEV_ICK_ID("pci", "renesas_usbhs", &div4_clks[DIV4_USBP]), CLKDEV_ICK_ID("usb24", "renesas_usbhs", &usb24_clk), + CLKDEV_ICK_ID("ick", "sh-mobile-hdmi", &div6_reparent_clks[DIV6_HDMI]), }; void __init r8a7740_clock_init(u8 md_ck) @@ -494,9 +571,16 @@ void __init r8a7740_clock_init(u8 md_ck) if (!ret) ret = sh_clk_div6_register(div6_clks, DIV6_NR); + if (!ret) + ret = sh_clk_div6_reparent_register(div6_reparent_clks, + DIV6_REPARENT_NR); + if (!ret) ret = sh_clk_mstp32_register(mstp_clks, MSTP_NR); + for (k = 0; !ret && (k < ARRAY_SIZE(late_main_clks)); k++) + ret = clk_register(late_main_clks[k]); + clkdev_add_table(lookups, ARRAY_SIZE(lookups)); if (!ret) -- cgit v1.2.2 From e2dcd461a7ba0c3deb44336136ea784c8b972292 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 12 Jun 2012 02:35:58 -0700 Subject: ARM: shmobile: r8a7740: add HDMI GPIO support In order to enable HDMI GPIO selection from platform board, this patch adds its interface to GPIO framework. This patch is based on v1.0 manual Signed-off-by: Kuninori Morimoto Acked-by: Magnus Damm Signed-off-by: Rafael J. Wysocki --- arch/arm/mach-shmobile/include/mach/r8a7740.h | 4 ++++ arch/arm/mach-shmobile/pfc-r8a7740.c | 9 +++++++++ 2 files changed, 13 insertions(+) (limited to 'arch') diff --git a/arch/arm/mach-shmobile/include/mach/r8a7740.h b/arch/arm/mach-shmobile/include/mach/r8a7740.h index 9d447abb969c..276484629fb2 100644 --- a/arch/arm/mach-shmobile/include/mach/r8a7740.h +++ b/arch/arm/mach-shmobile/include/mach/r8a7740.h @@ -565,6 +565,10 @@ enum { GPIO_FN_RESETP_PULLUP, GPIO_FN_RESETP_PLAIN, + /* HDMI */ + GPIO_FN_HDMI_HPD, + GPIO_FN_HDMI_CEC, + /* SDENC */ GPIO_FN_SDENC_CPG, GPIO_FN_SDENC_DV_CLKI, diff --git a/arch/arm/mach-shmobile/pfc-r8a7740.c b/arch/arm/mach-shmobile/pfc-r8a7740.c index 670fe1869dbc..0dda816feff8 100644 --- a/arch/arm/mach-shmobile/pfc-r8a7740.c +++ b/arch/arm/mach-shmobile/pfc-r8a7740.c @@ -560,6 +560,9 @@ enum { /* SDENC */ SDENC_CPG_MARK, SDENC_DV_CLKI_MARK, + /* HDMI */ + HDMI_HPD_MARK, HDMI_CEC_MARK, + /* DEBUG */ EDEBGREQ_PULLUP_MARK, /* for JTAG */ EDEBGREQ_PULLDOWN_MARK, @@ -1620,9 +1623,11 @@ static pinmux_enum_t pinmux_data[] = { /* Port210 */ PINMUX_DATA(IRQ9_PORT210_MARK, PORT210_FN0, MSEL1CR_9_1), + PINMUX_DATA(HDMI_HPD_MARK, PORT210_FN1), /* Port211 */ PINMUX_DATA(IRQ16_PORT211_MARK, PORT211_FN0, MSEL1CR_16_1), + PINMUX_DATA(HDMI_CEC_MARK, PORT211_FN1), /* LCDC select */ PINMUX_DATA(LCDC0_SELECT_MARK, MSEL3CR_6_0), @@ -2097,6 +2102,10 @@ static struct pinmux_gpio pinmux_gpios[] = { GPIO_FN(SDENC_CPG), GPIO_FN(SDENC_DV_CLKI), + /* HDMI */ + GPIO_FN(HDMI_HPD), + GPIO_FN(HDMI_CEC), + /* SYSC */ GPIO_FN(RESETP_PULLUP), GPIO_FN(RESETP_PLAIN), -- cgit v1.2.2 From d49679e5928709bce8937dce396458b139c4b34d Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 12 Jun 2012 02:36:21 -0700 Subject: ARM: shmobile: r8a7740: add MERAM work-around r8a7740 chip has lasting errata on MERAM buffer, and this patch adds its work-around on setup-r8a7740.c It solved CEU/VIO6C/2D-DMAC/VCP1/VPU5F/JPU/DISP memroy access error. But MERAM driver can't control this issue, since this work-around requires access to non-MERAM register address. So, This it will be called as board specific code at this point. Signed-off-by: Kuninori Morimoto Tested-by: Simon Horman Acked-by: Magnus Damm Signed-off-by: Rafael J. Wysocki --- arch/arm/mach-shmobile/board-armadillo800eva.c | 1 + arch/arm/mach-shmobile/include/mach/common.h | 1 + arch/arm/mach-shmobile/setup-r8a7740.c | 18 ++++++++++++++++++ 3 files changed, 20 insertions(+) (limited to 'arch') diff --git a/arch/arm/mach-shmobile/board-armadillo800eva.c b/arch/arm/mach-shmobile/board-armadillo800eva.c index 9e37026ef9dd..6e6839d0a2dc 100644 --- a/arch/arm/mach-shmobile/board-armadillo800eva.c +++ b/arch/arm/mach-shmobile/board-armadillo800eva.c @@ -587,6 +587,7 @@ static void __init eva_init(void) eva_clock_init(); r8a7740_pinmux_init(); + r8a7740_meram_workaround(); /* SCIFA1 */ gpio_request(GPIO_FN_SCIFA1_RXD, NULL); diff --git a/arch/arm/mach-shmobile/include/mach/common.h b/arch/arm/mach-shmobile/include/mach/common.h index 01e2bc014f15..45e61dada030 100644 --- a/arch/arm/mach-shmobile/include/mach/common.h +++ b/arch/arm/mach-shmobile/include/mach/common.h @@ -77,6 +77,7 @@ extern void r8a7779_add_standard_devices(void); extern void r8a7779_clock_init(void); extern void r8a7779_pinmux_init(void); extern void r8a7779_pm_init(void); +extern void r8a7740_meram_workaround(void); extern unsigned int r8a7779_get_core_count(void); extern int r8a7779_platform_cpu_kill(unsigned int cpu); diff --git a/arch/arm/mach-shmobile/setup-r8a7740.c b/arch/arm/mach-shmobile/setup-r8a7740.c index ec4eb49c1693..366311b3dc73 100644 --- a/arch/arm/mach-shmobile/setup-r8a7740.c +++ b/arch/arm/mach-shmobile/setup-r8a7740.c @@ -324,6 +324,24 @@ static struct platform_device *r8a7740_late_devices[] __initdata = { &i2c1_device, }; +/* + * r8a7740 chip has lasting errata on MERAM buffer. + * this is work-around for it. + * see + * "Media RAM (MERAM)" on r8a7740 documentation + */ +#define MEBUFCNTR 0xFE950098 +void r8a7740_meram_workaround(void) +{ + void __iomem *reg; + + reg = ioremap_nocache(MEBUFCNTR, 4); + if (reg) { + iowrite32(0x01600164, reg); + iounmap(reg); + } +} + #define ICCR 0x0004 #define ICSTART 0x0070 -- cgit v1.2.2 From ad9f1721c15b84f1a2e45a8a03f1ff7c86c2829b Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 12 Jun 2012 02:36:39 -0700 Subject: ARM: shmobile: r8a7740: add CEU clock support It is required from sh_mobile_ceu_camera driver. This patch is based on v1.0 manual Signed-off-by: Kuninori Morimoto Tested-by: Simon Horman Acked-by: Magnus Damm Signed-off-by: Rafael J. Wysocki --- arch/arm/mach-shmobile/clock-r8a7740.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/mach-shmobile/clock-r8a7740.c b/arch/arm/mach-shmobile/clock-r8a7740.c index b09534352f97..ce0930a415e8 100644 --- a/arch/arm/mach-shmobile/clock-r8a7740.c +++ b/arch/arm/mach-shmobile/clock-r8a7740.c @@ -43,6 +43,8 @@ /* CPG registers */ #define FRQCRA 0xe6150000 #define FRQCRB 0xe6150004 +#define VCLKCR1 0xE6150008 +#define VCLKCR2 0xE615000c #define FRQCRC 0xe61500e0 #define PLLC01CR 0xe6150028 @@ -317,6 +319,7 @@ static struct clk_div4_table div4_table = { /* DIV6 reparent */ enum { DIV6_HDMI, + DIV6_VCLK1, DIV6_VCLK2, DIV6_REPARENT_NR, }; @@ -326,9 +329,21 @@ static struct clk *hdmi_parent[] = { [2] = &dv_clk }; +static struct clk *vclk_parents[8] = { + [0] = &pllc1_div2_clk, + [2] = &dv_clk, + [3] = &usb24s_clk, + [4] = &extal1_div2_clk, + [5] = &extalr_clk, +}; + static struct clk div6_reparent_clks[DIV6_REPARENT_NR] = { [DIV6_HDMI] = SH_CLK_DIV6_EXT(HDMICKCR, 0, hdmi_parent, ARRAY_SIZE(hdmi_parent), 6, 2), + [DIV6_VCLK1] = SH_CLK_DIV6_EXT(VCLKCR1, 0, + vclk_parents, ARRAY_SIZE(vclk_parents), 12, 3), + [DIV6_VCLK2] = SH_CLK_DIV6_EXT(VCLKCR2, 0, + vclk_parents, ARRAY_SIZE(vclk_parents), 12, 3), }; /* HDMI1/2 clock */ @@ -417,7 +432,7 @@ static struct clk div6_clks[DIV6_NR] = { }; enum { - MSTP125, + MSTP128, MSTP127, MSTP125, MSTP116, MSTP111, MSTP100, MSTP117, MSTP230, @@ -434,6 +449,8 @@ enum { }; static struct clk mstp_clks[MSTP_NR] = { + [MSTP128] = SH_CLK_MSTP32(&div4_clks[DIV4_S], SMSTPCR1, 28, 0), /* CEU21 */ + [MSTP127] = SH_CLK_MSTP32(&div4_clks[DIV4_S], SMSTPCR1, 27, 0), /* CEU20 */ [MSTP125] = SH_CLK_MSTP32(&div6_clks[DIV6_SUB], SMSTPCR1, 25, 0), /* TMU0 */ [MSTP117] = SH_CLK_MSTP32(&div4_clks[DIV4_B], SMSTPCR1, 17, 0), /* LCDC1 */ [MSTP116] = SH_CLK_MSTP32(&div6_clks[DIV6_SUB], SMSTPCR1, 16, 0), /* IIC0 */ @@ -484,6 +501,8 @@ static struct clk_lookup lookups[] = { CLKDEV_CON_ID("usb24s", &usb24s_clk), CLKDEV_CON_ID("hdmi1", &hdmi1_clk), CLKDEV_CON_ID("hdmi2", &hdmi2_clk), + CLKDEV_CON_ID("video1", &div6_reparent_clks[DIV6_VCLK1]), + CLKDEV_CON_ID("video2", &div6_reparent_clks[DIV6_VCLK2]), /* DIV4 clocks */ CLKDEV_CON_ID("i_clk", &div4_clks[DIV4_I]), @@ -506,6 +525,8 @@ static struct clk_lookup lookups[] = { CLKDEV_DEV_ID("i2c-sh_mobile.0", &mstp_clks[MSTP116]), CLKDEV_DEV_ID("sh_mobile_lcdc_fb.1", &mstp_clks[MSTP117]), CLKDEV_DEV_ID("sh_tmu.0", &mstp_clks[MSTP125]), + CLKDEV_DEV_ID("sh_mobile_ceu.0", &mstp_clks[MSTP127]), + CLKDEV_DEV_ID("sh_mobile_ceu.1", &mstp_clks[MSTP128]), CLKDEV_DEV_ID("sh-sci.4", &mstp_clks[MSTP200]), CLKDEV_DEV_ID("sh-sci.3", &mstp_clks[MSTP201]), -- cgit v1.2.2 From 69efac9a8bc6d479bc4c339ae4ac4d353460def6 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 12 Jun 2012 02:36:58 -0700 Subject: ARM: shmobile: r8a7740: add FSI parent clock support r8a7740 FSI can select its parent clock, and its selection is dependent on platform board. In order to enable FSI parent selection from platform board, this patch adds its interface to clock framework. This patch is based on v1.0 manual Signed-off-by: Kuninori Morimoto Tested-by: Simon Horman Acked-by: Magnus Damm Signed-off-by: Rafael J. Wysocki --- arch/arm/mach-shmobile/clock-r8a7740.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'arch') diff --git a/arch/arm/mach-shmobile/clock-r8a7740.c b/arch/arm/mach-shmobile/clock-r8a7740.c index ce0930a415e8..7b9e4ab34fa2 100644 --- a/arch/arm/mach-shmobile/clock-r8a7740.c +++ b/arch/arm/mach-shmobile/clock-r8a7740.c @@ -46,6 +46,7 @@ #define VCLKCR1 0xE6150008 #define VCLKCR2 0xE615000c #define FRQCRC 0xe61500e0 +#define FSIACKCR 0xe6150018 #define PLLC01CR 0xe6150028 #define SUBCKCR 0xe6150080 @@ -56,6 +57,7 @@ #define MSTPSR2 0xe6150040 #define MSTPSR3 0xe6150048 #define MSTPSR4 0xe615004c +#define FSIBCKCR 0xe6150090 #define HDMICKCR 0xe6150094 #define SMSTPCR0 0xe6150130 #define SMSTPCR1 0xe6150134 @@ -274,6 +276,13 @@ static struct clk usb24_clk = { .parent = &usb24s_clk, }; +/* External FSIACK/FSIBCK clock */ +static struct clk fsiack_clk = { +}; + +static struct clk fsibck_clk = { +}; + struct clk *main_clks[] = { &extalr_clk, &extal1_clk, @@ -291,6 +300,8 @@ struct clk *main_clks[] = { &pllc1_div2_clk, &usb24s_clk, &usb24_clk, + &fsiack_clk, + &fsibck_clk, }; static void div4_kick(struct clk *clk) @@ -320,6 +331,7 @@ static struct clk_div4_table div4_table = { enum { DIV6_HDMI, DIV6_VCLK1, DIV6_VCLK2, + DIV6_FSIA, DIV6_FSIB, DIV6_REPARENT_NR, }; @@ -337,6 +349,16 @@ static struct clk *vclk_parents[8] = { [5] = &extalr_clk, }; +static struct clk *fsia_parents[] = { + [0] = &pllc1_div2_clk, + [1] = &fsiack_clk, /* external clock */ +}; + +static struct clk *fsib_parents[] = { + [0] = &pllc1_div2_clk, + [1] = &fsibck_clk, /* external clock */ +}; + static struct clk div6_reparent_clks[DIV6_REPARENT_NR] = { [DIV6_HDMI] = SH_CLK_DIV6_EXT(HDMICKCR, 0, hdmi_parent, ARRAY_SIZE(hdmi_parent), 6, 2), @@ -344,6 +366,10 @@ static struct clk div6_reparent_clks[DIV6_REPARENT_NR] = { vclk_parents, ARRAY_SIZE(vclk_parents), 12, 3), [DIV6_VCLK2] = SH_CLK_DIV6_EXT(VCLKCR2, 0, vclk_parents, ARRAY_SIZE(vclk_parents), 12, 3), + [DIV6_FSIA] = SH_CLK_DIV6_EXT(FSIACKCR, 0, + fsia_parents, ARRAY_SIZE(fsia_parents), 6, 2), + [DIV6_FSIB] = SH_CLK_DIV6_EXT(FSIBCKCR, 0, + fsib_parents, ARRAY_SIZE(fsib_parents), 6, 2), }; /* HDMI1/2 clock */ @@ -503,6 +529,8 @@ static struct clk_lookup lookups[] = { CLKDEV_CON_ID("hdmi2", &hdmi2_clk), CLKDEV_CON_ID("video1", &div6_reparent_clks[DIV6_VCLK1]), CLKDEV_CON_ID("video2", &div6_reparent_clks[DIV6_VCLK2]), + CLKDEV_CON_ID("fsiack", &fsiack_clk), + CLKDEV_CON_ID("fsibck", &fsibck_clk), /* DIV4 clocks */ CLKDEV_CON_ID("i_clk", &div4_clks[DIV4_I]), @@ -557,6 +585,9 @@ static struct clk_lookup lookups[] = { CLKDEV_ICK_ID("pci", "renesas_usbhs", &div4_clks[DIV4_USBP]), CLKDEV_ICK_ID("usb24", "renesas_usbhs", &usb24_clk), CLKDEV_ICK_ID("ick", "sh-mobile-hdmi", &div6_reparent_clks[DIV6_HDMI]), + + CLKDEV_ICK_ID("icka", "sh_fsi2", &div6_reparent_clks[DIV6_FSIA]), + CLKDEV_ICK_ID("ickb", "sh_fsi2", &div6_reparent_clks[DIV6_FSIB]), }; void __init r8a7740_clock_init(u8 md_ck) -- cgit v1.2.2 From 147d1ffdc21d067f0084f0911dbf1eee57e3d76b Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 12 Jun 2012 02:37:16 -0700 Subject: ARM: shmobile: r8a7740: add FSI-B (for HDMI) GPIO support In order to enable FSI-B selection from platform board, this patch adds its interface to GPIO framework. This patch is based on v1.0 manual Signed-off-by: Kuninori Morimoto Acked-by: Magnus Damm Signed-off-by: Rafael J. Wysocki --- arch/arm/mach-shmobile/include/mach/r8a7740.h | 5 ++++- arch/arm/mach-shmobile/pfc-r8a7740.c | 11 +++++++++-- 2 files changed, 13 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-shmobile/include/mach/r8a7740.h b/arch/arm/mach-shmobile/include/mach/r8a7740.h index 276484629fb2..6468fcc5ee49 100644 --- a/arch/arm/mach-shmobile/include/mach/r8a7740.h +++ b/arch/arm/mach-shmobile/include/mach/r8a7740.h @@ -139,7 +139,7 @@ enum { GPIO_FN_DBGMD10, GPIO_FN_DBGMD11, GPIO_FN_DBGMD20, GPIO_FN_DBGMD21, - /* FSI */ + /* FSI-A */ GPIO_FN_FSIAISLD_PORT0, /* FSIAISLD Port 0/5 */ GPIO_FN_FSIAISLD_PORT5, GPIO_FN_FSIASPDIF_PORT9, /* FSIASPDIF Port 9/18 */ @@ -150,6 +150,9 @@ enum { GPIO_FN_FSIACK, GPIO_FN_FSIAILR, GPIO_FN_FSIAIBT, + /* FSI-B */ + GPIO_FN_FSIBCK, + /* FMSI */ GPIO_FN_FMSISLD_PORT1, /* FMSISLD Port 1/6 */ GPIO_FN_FMSISLD_PORT6, diff --git a/arch/arm/mach-shmobile/pfc-r8a7740.c b/arch/arm/mach-shmobile/pfc-r8a7740.c index 0dda816feff8..03def0fd7a05 100644 --- a/arch/arm/mach-shmobile/pfc-r8a7740.c +++ b/arch/arm/mach-shmobile/pfc-r8a7740.c @@ -169,7 +169,7 @@ enum { DBGMD10_MARK, DBGMD11_MARK, DBGMD20_MARK, DBGMD21_MARK, - /* FSI */ + /* FSI-A */ FSIAISLD_PORT0_MARK, /* FSIAISLD Port 0/5 */ FSIAISLD_PORT5_MARK, FSIASPDIF_PORT9_MARK, /* FSIASPDIF Port 9/18 */ @@ -178,6 +178,9 @@ enum { FSIAOBT_MARK, FSIAOSLD_MARK, FSIAOMC_MARK, FSIACK_MARK, FSIAILR_MARK, FSIAIBT_MARK, + /* FSI-B */ + FSIBCK_MARK, + /* FMSI */ FMSISLD_PORT1_MARK, /* FMSISLD Port 1/6 */ FMSISLD_PORT6_MARK, @@ -774,6 +777,7 @@ static pinmux_enum_t pinmux_data[] = { /* Port11 */ PINMUX_DATA(FSIACK_MARK, PORT11_FN1), + PINMUX_DATA(FSIBCK_MARK, PORT11_FN2), PINMUX_DATA(IRQ2_PORT11_MARK, PORT11_FN0, MSEL1CR_2_0), /* Port12 */ @@ -1696,7 +1700,7 @@ static struct pinmux_gpio pinmux_gpios[] = { GPIO_FN(DBGMD10), GPIO_FN(DBGMD11), GPIO_FN(DBGMD20), GPIO_FN(DBGMD21), - /* FSI */ + /* FSI-A */ GPIO_FN(FSIAISLD_PORT0), /* FSIAISLD Port 0/5 */ GPIO_FN(FSIAISLD_PORT5), GPIO_FN(FSIASPDIF_PORT9), /* FSIASPDIF Port 9/18 */ @@ -1705,6 +1709,9 @@ static struct pinmux_gpio pinmux_gpios[] = { GPIO_FN(FSIAOBT), GPIO_FN(FSIAOSLD), GPIO_FN(FSIAOMC), GPIO_FN(FSIACK), GPIO_FN(FSIAILR), GPIO_FN(FSIAIBT), + /* FSI-B */ + GPIO_FN(FSIBCK), + /* FMSI */ GPIO_FN(FMSISLD_PORT1), /* FMSISLD Port 1/6 */ GPIO_FN(FMSISLD_PORT6), -- cgit v1.2.2 From 910c14d0b8f121df420a878cbd973ffa7d549393 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 12 Jun 2012 02:39:11 -0700 Subject: ARM: shmobile: armadillo800eva: enable HDMI This patch enable HDMI support on CON3. And removed unnecessary CONFIG_SYSFS_DEPRECATED_xxx config. Signed-off-by: Kuninori Morimoto Acked-by: Magnus Damm Signed-off-by: Rafael J. Wysocki --- arch/arm/configs/armadillo800eva_defconfig | 10 +-- arch/arm/mach-shmobile/board-armadillo800eva.c | 100 +++++++++++++++++++++++++ 2 files changed, 101 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/arm/configs/armadillo800eva_defconfig b/arch/arm/configs/armadillo800eva_defconfig index ddc9fe6a78ac..f6ebdde380ee 100644 --- a/arch/arm/configs/armadillo800eva_defconfig +++ b/arch/arm/configs/armadillo800eva_defconfig @@ -5,10 +5,7 @@ CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=16 # CONFIG_UTS_NS is not set # CONFIG_IPC_NS is not set -# CONFIG_USER_NS is not set # CONFIG_PID_NS is not set -CONFIG_SYSFS_DEPRECATED=y -CONFIG_SYSFS_DEPRECATED_V2=y CONFIG_CC_OPTIMIZE_FOR_SIZE=y CONFIG_SLAB=y CONFIG_MODULES=y @@ -90,25 +87,21 @@ CONFIG_I2C=y CONFIG_I2C_SH_MOBILE=y # CONFIG_HWMON is not set CONFIG_FB=y -CONFIG_FB_MODE_HELPERS=y CONFIG_FB_SH_MOBILE_LCDC=y +CONFIG_FB_SH_MOBILE_HDMI=y CONFIG_LCD_CLASS_DEVICE=y CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y CONFIG_LOGO=y # CONFIG_LOGO_LINUX_MONO is not set # CONFIG_LOGO_LINUX_VGA16 is not set -CONFIG_SOUND=y -CONFIG_SND=y # CONFIG_SND_SUPPORT_OLD_API is not set # CONFIG_SND_VERBOSE_PROCFS is not set # CONFIG_SND_DRIVERS is not set # CONFIG_SND_ARM is not set -CONFIG_SND_SOC=y CONFIG_SND_SOC_SH4_FSI=y # CONFIG_HID_SUPPORT is not set CONFIG_USB=y -# CONFIG_USB_DEVICE_CLASS is not set CONFIG_USB_RENESAS_USBHS=y CONFIG_USB_GADGET=y CONFIG_USB_RENESAS_USBHS_UDC=y @@ -124,7 +117,6 @@ CONFIG_VFAT_FS=y CONFIG_TMPFS=y # CONFIG_MISC_FILESYSTEMS is not set CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_NFS_V3_ACL=y CONFIG_NFS_V4=y CONFIG_NFS_V4_1=y diff --git a/arch/arm/mach-shmobile/board-armadillo800eva.c b/arch/arm/mach-shmobile/board-armadillo800eva.c index 6e6839d0a2dc..4e6893f9e0da 100644 --- a/arch/arm/mach-shmobile/board-armadillo800eva.c +++ b/arch/arm/mach-shmobile/board-armadillo800eva.c @@ -45,6 +45,7 @@ #include #include #include