From 11a5aa32562e1e90ab2d0278748fe0fbecda1cbe Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Mon, 25 Nov 2013 21:52:25 +0000
Subject: ARM: dma-mapping: check DMA mask against available memory

Some buses have negative offsets, which causes the DMA mask checks to
falsely fail.  Fix this by using the actual amount of memory fitted in
the system.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mm/dma-mapping.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'arch/arm')

diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 79f8b39801a8..f6b6bfa88ecf 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -9,6 +9,7 @@
  *
  *  DMA uncached mapping support.
  */
+#include <linux/bootmem.h>
 #include <linux/module.h>
 #include <linux/mm.h>
 #include <linux/gfp.h>
@@ -162,6 +163,8 @@ static u64 get_coherent_dma_mask(struct device *dev)
 	u64 mask = (u64)DMA_BIT_MASK(32);
 
 	if (dev) {
+		unsigned long max_dma_pfn;
+
 		mask = dev->coherent_dma_mask;
 
 		/*
@@ -173,6 +176,8 @@ static u64 get_coherent_dma_mask(struct device *dev)
 			return 0;
 		}
 
+		max_dma_pfn = min(max_pfn, arm_dma_pfn_limit);
+
 		/*
 		 * If the mask allows for more memory than we can address,
 		 * and we actually have that much memory, then fail the
@@ -180,7 +185,7 @@ static u64 get_coherent_dma_mask(struct device *dev)
 		 */
 		if (sizeof(mask) != sizeof(dma_addr_t) &&
 		    mask > (dma_addr_t)~0 &&
-		    dma_to_pfn(dev, ~0) > arm_dma_pfn_limit) {
+		    dma_to_pfn(dev, ~0) > max_dma_pfn) {
 			dev_warn(dev, "Coherent DMA mask %#llx is larger than dma_addr_t allows\n",
 				 mask);
 			dev_warn(dev, "Driver did not use or check the return value from dma_set_coherent_mask()?\n");
@@ -192,7 +197,7 @@ static u64 get_coherent_dma_mask(struct device *dev)
 		 * fits within the allowable addresses which we can
 		 * allocate.
 		 */
-		if (dma_to_pfn(dev, mask) < arm_dma_pfn_limit) {
+		if (dma_to_pfn(dev, mask) < max_dma_pfn) {
 			dev_warn(dev, "Coherent DMA mask %#llx (pfn %#lx-%#lx) covers a smaller range of system memory than the DMA zone pfn 0x0-%#lx\n",
 				 mask,
 				 dma_to_pfn(dev, 0), dma_to_pfn(dev, mask) + 1,
-- 
cgit v1.2.2


From d8aa712c30148ba26fd89a5dc14de95d4c375184 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Thu, 28 Nov 2013 21:43:40 +0000
Subject: ARM: fix booting low-vectors machines

Commit f6f91b0d9fd9 (ARM: allow kuser helpers to be removed from the
vector page) required two pages for the vectors code.  Although the
code setting up the initial page tables was updated, the code which
allocates page tables for new processes wasn't, neither was the code
which tears down the mappings.  Fix this.

Fixes: f6f91b0d9fd9 ("ARM: allow kuser helpers to be removed from the vector page")
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Cc: <stable@vger.kernel.org>
---
 arch/arm/include/asm/pgtable.h | 2 +-
 arch/arm/mm/mmap.c             | 2 +-
 arch/arm/mm/pgd.c              | 3 ++-
 3 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'arch/arm')

diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index be956dbf6bae..1571d126e9dd 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -61,7 +61,7 @@ extern void __pgd_error(const char *file, int line, pgd_t);
  * mapping to be mapped at.  This is particularly important for
  * non-high vector CPUs.
  */
-#define FIRST_USER_ADDRESS	PAGE_SIZE
+#define FIRST_USER_ADDRESS	(PAGE_SIZE * 2)
 
 /*
  * Use TASK_SIZE as the ceiling argument for free_pgtables() and
diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c
index d27158c38eb0..5e85ed371364 100644
--- a/arch/arm/mm/mmap.c
+++ b/arch/arm/mm/mmap.c
@@ -146,7 +146,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 
 	info.flags = VM_UNMAPPED_AREA_TOPDOWN;
 	info.length = len;
-	info.low_limit = PAGE_SIZE;
+	info.low_limit = FIRST_USER_ADDRESS;
 	info.high_limit = mm->mmap_base;
 	info.align_mask = do_align ? (PAGE_MASK & (SHMLBA - 1)) : 0;
 	info.align_offset = pgoff << PAGE_SHIFT;
diff --git a/arch/arm/mm/pgd.c b/arch/arm/mm/pgd.c
index 0acb089d0f70..1046b373d1ae 100644
--- a/arch/arm/mm/pgd.c
+++ b/arch/arm/mm/pgd.c
@@ -87,7 +87,8 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
 		init_pud = pud_offset(init_pgd, 0);
 		init_pmd = pmd_offset(init_pud, 0);
 		init_pte = pte_offset_map(init_pmd, 0);
-		set_pte_ext(new_pte, *init_pte, 0);
+		set_pte_ext(new_pte + 0, init_pte[0], 0);
+		set_pte_ext(new_pte + 1, init_pte[1], 0);
 		pte_unmap(init_pte);
 		pte_unmap(new_pte);
 	}
-- 
cgit v1.2.2


From 43659222e7a0113912ed02f6b2231550b3e471ac Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Thu, 28 Nov 2013 21:55:41 +0000
Subject: ARM: footbridge: fix VGA initialisation

It's no good setting vga_base after the VGA console has been
initialised, because if we do that we get this:

Unable to handle kernel paging request at virtual address 000b8000
pgd = c0004000
[000b8000] *pgd=07ffc831, *pte=00000000, *ppte=00000000
0Internal error: Oops: 5017 [#1] ARM
Modules linked in:
CPU: 0 PID: 0 Comm: swapper Not tainted 3.12.0+ #49
task: c03e2974 ti: c03d8000 task.ti: c03d8000
PC is at vgacon_startup+0x258/0x39c
LR is at request_resource+0x10/0x1c
pc : [<c01725d0>]    lr : [<c0022b50>]    psr: 60000053
sp : c03d9f68  ip : 000b8000  fp : c03d9f8c
r10: 000055aa  r9 : 4401a103  r8 : ffffaa55
r7 : c03e357c  r6 : c051b460  r5 : 000000ff  r4 : 000c0000
r3 : 000b8000  r2 : c03e0514  r1 : 00000000  r0 : c0304971
Flags: nZCv  IRQs on  FIQs off  Mode SVC_32  ISA ARM  Segment kernel

which is an access to the 0xb8000 without the PCI offset required to
make it work.

Fixes: cc22b4c18540 ("ARM: set vga memory base at run-time")
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Cc: <stable@vger.kernel.org>
---
 arch/arm/mach-footbridge/common.c  | 3 +++
 arch/arm/mach-footbridge/dc21285.c | 2 --
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'arch/arm')

diff --git a/arch/arm/mach-footbridge/common.c b/arch/arm/mach-footbridge/common.c
index 2739ca2c1334..e0091685fd48 100644
--- a/arch/arm/mach-footbridge/common.c
+++ b/arch/arm/mach-footbridge/common.c
@@ -15,6 +15,7 @@
 #include <linux/init.h>
 #include <linux/io.h>
 #include <linux/spinlock.h>
+#include <video/vga.h>
 
 #include <asm/pgtable.h>
 #include <asm/page.h>
@@ -196,6 +197,8 @@ void __init footbridge_map_io(void)
 		iotable_init(ebsa285_host_io_desc, ARRAY_SIZE(ebsa285_host_io_desc));
 		pci_map_io_early(__phys_to_pfn(DC21285_PCI_IO));
 	}
+
+	vga_base = PCIMEM_BASE;
 }
 
 void footbridge_restart(enum reboot_mode mode, const char *cmd)
diff --git a/arch/arm/mach-footbridge/dc21285.c b/arch/arm/mach-footbridge/dc21285.c
index 3490a24f969e..7c2fdae9a38b 100644
--- a/arch/arm/mach-footbridge/dc21285.c
+++ b/arch/arm/mach-footbridge/dc21285.c
@@ -18,7 +18,6 @@
 #include <linux/irq.h>
 #include <linux/io.h>
 #include <linux/spinlock.h>
-#include <video/vga.h>
 
 #include <asm/irq.h>
 #include <asm/mach/pci.h>
@@ -291,7 +290,6 @@ void __init dc21285_preinit(void)
 	int cfn_mode;
 
 	pcibios_min_mem = 0x81000000;
-	vga_base = PCIMEM_BASE;
 
 	mem_size = (unsigned int)high_memory - PAGE_OFFSET;
 	for (mem_mask = 0x00100000; mem_mask < 0x10000000; mem_mask <<= 1)
-- 
cgit v1.2.2


From 67130c5464f50428aea0b4526a6729d61f9a1d53 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Fri, 29 Nov 2013 00:54:38 +0000
Subject: ARM: footbridge: fix EBSA285 LEDs

- The LEDs register is write-only: it can't be read-modify-written.
- The LEDs are write-1-for-off not 0.
- The check for the platform was inverted.

Fixes: cf6856d693dd ("ARM: mach-footbridge: retire custom LED code")
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Cc: stable@vger.kernel.org
---
 arch/arm/mach-footbridge/ebsa285.c | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

(limited to 'arch/arm')

diff --git a/arch/arm/mach-footbridge/ebsa285.c b/arch/arm/mach-footbridge/ebsa285.c
index b08243500e2e..1a7235fb52ac 100644
--- a/arch/arm/mach-footbridge/ebsa285.c
+++ b/arch/arm/mach-footbridge/ebsa285.c
@@ -30,21 +30,24 @@ static const struct {
 	const char *name;
 	const char *trigger;
 } ebsa285_leds[] = {
-	{ "ebsa285:amber", "heartbeat", },
-	{ "ebsa285:green", "cpu0", },
+	{ "ebsa285:amber", "cpu0", },
+	{ "ebsa285:green", "heartbeat", },
 	{ "ebsa285:red",},
 };
 
+static unsigned char hw_led_state;
+
 static void ebsa285_led_set(struct led_classdev *cdev,
 		enum led_brightness b)
 {
 	struct ebsa285_led *led = container_of(cdev,
 			struct ebsa285_led, cdev);
 
-	if (b != LED_OFF)
-		*XBUS_LEDS |= led->mask;
+	if (b == LED_OFF)
+		hw_led_state |= led->mask;
 	else
-		*XBUS_LEDS &= ~led->mask;
+		hw_led_state &= ~led->mask;
+	*XBUS_LEDS = hw_led_state;
 }
 
 static enum led_brightness ebsa285_led_get(struct led_classdev *cdev)
@@ -52,18 +55,19 @@ static enum led_brightness ebsa285_led_get(struct led_classdev *cdev)
 	struct ebsa285_led *led = container_of(cdev,
 			struct ebsa285_led, cdev);
 
-	return (*XBUS_LEDS & led->mask) ? LED_FULL : LED_OFF;
+	return hw_led_state & led->mask ? LED_OFF : LED_FULL;
 }
 
 static int __init ebsa285_leds_init(void)
 {
 	int i;
 
-	if (machine_is_ebsa285())
+	if (!machine_is_ebsa285())
 		return -ENODEV;
 
-	/* 3 LEDS All ON */
-	*XBUS_LEDS |= XBUS_LED_AMBER | XBUS_LED_GREEN | XBUS_LED_RED;
+	/* 3 LEDS all off */
+	hw_led_state = XBUS_LED_AMBER | XBUS_LED_GREEN | XBUS_LED_RED;
+	*XBUS_LEDS = hw_led_state;
 
 	for (i = 0; i < ARRAY_SIZE(ebsa285_leds); i++) {
 		struct ebsa285_led *led;
-- 
cgit v1.2.2


From 50913336ef3c9270b5e2b44a5d81230d7db42fc5 Mon Sep 17 00:00:00 2001
From: Victor Kamensky <victor.kamensky@linaro.org>
Date: Thu, 21 Nov 2013 07:17:03 +0100
Subject: ARM: 7895/1: signal: fix armv7-m build issue in sigreturn_codes.S
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

After "ARM: signal: sigreturn_codes should be endian neutral to
work in BE8" commit, thumb only platforms, like armv7m, fails to
compile sigreturn_codes.S. The reason is that for such arch
values '.arm' directive and arm opcodes are not allowed.

Fix conditionally enables arm opcodes only if no CONFIG_CPU_THUMBONLY
defined and it uses .org instructions to keep sigreturn_codes
layout.

Suggested-by: Dave Martin <Dave.Martin@arm.com>
Signed-off-by: Victor Kamensky <victor.kamensky@linaro.org>
Tested-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Dave Martin <Dave.Martin@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/sigreturn_codes.S | 40 ++++++++++++++++++++++++++++++---------
 1 file changed, 31 insertions(+), 9 deletions(-)

(limited to 'arch/arm')

diff --git a/arch/arm/kernel/sigreturn_codes.S b/arch/arm/kernel/sigreturn_codes.S
index 3c5d0f2170fd..b84d0cb13682 100644
--- a/arch/arm/kernel/sigreturn_codes.S
+++ b/arch/arm/kernel/sigreturn_codes.S
@@ -30,6 +30,27 @@
  * snippets.
  */
 
+/*
+ * In CPU_THUMBONLY case kernel arm opcodes are not allowed.
+ * Note in this case codes skips those instructions but it uses .org
+ * directive to keep correct layout of sigreturn_codes array.
+ */
+#ifndef CONFIG_CPU_THUMBONLY
+#define ARM_OK(code...)	code
+#else
+#define ARM_OK(code...)
+#endif
+
+	.macro arm_slot n
+	.org	sigreturn_codes + 12 * (\n)
+ARM_OK(	.arm	)
+	.endm
+
+	.macro thumb_slot n
+	.org	sigreturn_codes + 12 * (\n) + 8
+	.thumb
+	.endm
+
 #if __LINUX_ARM_ARCH__ <= 4
 	/*
 	 * Note we manually set minimally required arch that supports
@@ -45,26 +66,27 @@
 	.global sigreturn_codes
 	.type	sigreturn_codes, #object
 
-	.arm
+	.align
 
 sigreturn_codes:
 
 	/* ARM sigreturn syscall code snippet */
-	mov	r7, #(__NR_sigreturn - __NR_SYSCALL_BASE)
-	swi	#(__NR_sigreturn)|(__NR_OABI_SYSCALL_BASE)
+	arm_slot 0
+ARM_OK(	mov	r7, #(__NR_sigreturn - __NR_SYSCALL_BASE)	)
+ARM_OK(	swi	#(__NR_sigreturn)|(__NR_OABI_SYSCALL_BASE)	)
 
 	/* Thumb sigreturn syscall code snippet */
-	.thumb
+	thumb_slot 0
 	movs	r7, #(__NR_sigreturn - __NR_SYSCALL_BASE)
 	swi	#0
 
 	/* ARM sigreturn_rt syscall code snippet */
-	.arm
-	mov	r7, #(__NR_rt_sigreturn - __NR_SYSCALL_BASE)
-	swi	#(__NR_rt_sigreturn)|(__NR_OABI_SYSCALL_BASE)
+	arm_slot 1
+ARM_OK(	mov	r7, #(__NR_rt_sigreturn - __NR_SYSCALL_BASE)	)
+ARM_OK(	swi	#(__NR_rt_sigreturn)|(__NR_OABI_SYSCALL_BASE)	)
 
 	/* Thumb sigreturn_rt syscall code snippet */
-	.thumb
+	thumb_slot 1
 	movs	r7, #(__NR_rt_sigreturn - __NR_SYSCALL_BASE)
 	swi	#0
 
@@ -74,7 +96,7 @@ sigreturn_codes:
 	 * it is thumb case or not, so we need additional
 	 * word after real last entry.
 	 */
-	.arm
+	arm_slot 2
 	.space	4
 
 	.size	sigreturn_codes, . - sigreturn_codes
-- 
cgit v1.2.2


From e2ccba49085ab5d71b092de2a5176eb9b19cc876 Mon Sep 17 00:00:00 2001
From: Dave Martin <dave.martin@linaro.org>
Date: Mon, 25 Nov 2013 14:54:47 +0100
Subject: ARM: 7897/1: kexec: Use the right ISA for relocate_new_kernel

Copying a function with memcpy() and then trying to execute the
result isn't trivially portable to Thumb.

This patch modifies the kexec soft restart code to copy its
assembler trampoline relocate_new_kernel() using fncpy() instead,
so that relocate_new_kernel can be in the same ISA as the rest of
the kernel without problems.

Signed-off-by: Dave Martin <Dave.Martin@arm.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Reported-by: Taras Kondratiuk <taras.kondratiuk@linaro.org>
Tested-by: Taras Kondratiuk <taras.kondratiuk@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/machine_kexec.c   | 17 ++++++++++-------
 arch/arm/kernel/relocate_kernel.S |  8 ++++++--
 2 files changed, 16 insertions(+), 9 deletions(-)

(limited to 'arch/arm')

diff --git a/arch/arm/kernel/machine_kexec.c b/arch/arm/kernel/machine_kexec.c
index 57221e349a7c..f0d180d8b29f 100644
--- a/arch/arm/kernel/machine_kexec.c
+++ b/arch/arm/kernel/machine_kexec.c
@@ -14,11 +14,12 @@
 #include <asm/pgalloc.h>
 #include <asm/mmu_context.h>
 #include <asm/cacheflush.h>
+#include <asm/fncpy.h>
 #include <asm/mach-types.h>
 #include <asm/smp_plat.h>
 #include <asm/system_misc.h>
 
-extern const unsigned char relocate_new_kernel[];
+extern void relocate_new_kernel(void);
 extern const unsigned int relocate_new_kernel_size;
 
 extern unsigned long kexec_start_address;
@@ -142,6 +143,8 @@ void machine_kexec(struct kimage *image)
 {
 	unsigned long page_list;
 	unsigned long reboot_code_buffer_phys;
+	unsigned long reboot_entry = (unsigned long)relocate_new_kernel;
+	unsigned long reboot_entry_phys;
 	void *reboot_code_buffer;
 
 	/*
@@ -168,16 +171,16 @@ void machine_kexec(struct kimage *image)
 
 
 	/* copy our kernel relocation code to the control code page */
-	memcpy(reboot_code_buffer,
-	       relocate_new_kernel, relocate_new_kernel_size);
+	reboot_entry = fncpy(reboot_code_buffer,
+			     reboot_entry,
+			     relocate_new_kernel_size);
+	reboot_entry_phys = (unsigned long)reboot_entry +
+		(reboot_code_buffer_phys - (unsigned long)reboot_code_buffer);
 
-
-	flush_icache_range((unsigned long) reboot_code_buffer,
-			   (unsigned long) reboot_code_buffer + KEXEC_CONTROL_PAGE_SIZE);
 	printk(KERN_INFO "Bye!\n");
 
 	if (kexec_reinit)
 		kexec_reinit();
 
-	soft_restart(reboot_code_buffer_phys);
+	soft_restart(reboot_entry_phys);
 }
diff --git a/arch/arm/kernel/relocate_kernel.S b/arch/arm/kernel/relocate_kernel.S
index d0cdedf4864d..95858966d84e 100644
--- a/arch/arm/kernel/relocate_kernel.S
+++ b/arch/arm/kernel/relocate_kernel.S
@@ -2,10 +2,12 @@
  * relocate_kernel.S - put the kernel image in place to boot
  */
 
+#include <linux/linkage.h>
 #include <asm/kexec.h>
 
-	.globl relocate_new_kernel
-relocate_new_kernel:
+	.align	3	/* not needed for this code, but keeps fncpy() happy */
+
+ENTRY(relocate_new_kernel)
 
 	ldr	r0,kexec_indirection_page
 	ldr	r1,kexec_start_address
@@ -79,6 +81,8 @@ kexec_mach_type:
 kexec_boot_atags:
 	.long	0x0
 
+ENDPROC(relocate_new_kernel)
+
 relocate_new_kernel_end:
 
 	.globl relocate_new_kernel_size
-- 
cgit v1.2.2


From 11d4bb1bd067f9d01d18f620ccfad516dc579593 Mon Sep 17 00:00:00 2001
From: Fabio Estevam <festevam@gmail.com>
Date: Sat, 30 Nov 2013 15:24:42 +0100
Subject: ARM: 7907/1: lib: delay-loop: Add align directive to fix BogoMIPS
 calculation

Currently mx53 (CortexA8) running at 1GHz reports:
Calibrating delay loop... 663.55 BogoMIPS (lpj=3317760)

Tom Evans verified that alignments of 0x0 and 0x8 run the two instructions of __loop_delay in one clock cycle (1 clock/loop), while alignments of 0x4 and 0xc take 3 clocks to run the loop twice. (1.5 clock/loop)

The original object code looks like this:

00000010 <__loop_const_udelay>:
  10:	e3e01000 	mvn	r1, #0
  14:	e51f201c 	ldr	r2, [pc, #-28]	; 0 <__loop_udelay-0x8>
  18:	e5922000 	ldr	r2, [r2]
  1c:	e0800921 	add	r0, r0, r1, lsr #18
  20:	e1a00720 	lsr	r0, r0, #14
  24:	e0822b21 	add	r2, r2, r1, lsr #22
  28:	e1a02522 	lsr	r2, r2, #10
  2c:	e0000092 	mul	r0, r2, r0
  30:	e0800d21 	add	r0, r0, r1, lsr #26
  34:	e1b00320 	lsrs	r0, r0, #6
  38:	01a0f00e 	moveq	pc, lr

0000003c <__loop_delay>:
  3c:	e2500001 	subs	r0, r0, #1
  40:	8afffffe 	bhi	3c <__loop_delay>
  44:	e1a0f00e 	mov	pc, lr

After adding the 'align 3' directive to __loop_delay (align to 8 bytes):

00000010 <__loop_const_udelay>:
  10:	e3e01000 	mvn	r1, #0
  14:	e51f201c 	ldr	r2, [pc, #-28]	; 0 <__loop_udelay-0x8>
  18:	e5922000 	ldr	r2, [r2]
  1c:	e0800921 	add	r0, r0, r1, lsr #18
  20:	e1a00720 	lsr	r0, r0, #14
  24:	e0822b21 	add	r2, r2, r1, lsr #22
  28:	e1a02522 	lsr	r2, r2, #10
  2c:	e0000092 	mul	r0, r2, r0
  30:	e0800d21 	add	r0, r0, r1, lsr #26
  34:	e1b00320 	lsrs	r0, r0, #6
  38:	01a0f00e 	moveq	pc, lr
  3c:	e320f000 	nop	{0}

00000040 <__loop_delay>:
  40:	e2500001 	subs	r0, r0, #1
  44:	8afffffe 	bhi	40 <__loop_delay>
  48:	e1a0f00e 	mov	pc, lr
  4c:	e320f000 	nop	{0}

, which now reports:
Calibrating delay loop... 996.14 BogoMIPS (lpj=4980736)

Some more test results:

On mx31 (ARM1136) running at 532 MHz, before the patch:
Calibrating delay loop... 351.43 BogoMIPS (lpj=1757184)

On mx31 (ARM1136) running at 532 MHz after the patch:
Calibrating delay loop... 528.79 BogoMIPS (lpj=2643968)

Also tested on mx6 (CortexA9) and on mx27 (ARM926), which shows the same
BogoMIPS value before and after this patch.

Reported-by: Tom Evans <tom_usenet@optusnet.com.au>
Suggested-by: Tom Evans <tom_usenet@optusnet.com.au>
Signed-off-by: Fabio Estevam <fabio.estevam@freescale.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/lib/delay-loop.S | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/arm')

diff --git a/arch/arm/lib/delay-loop.S b/arch/arm/lib/delay-loop.S
index 36b668d8e121..bc1033b897b4 100644
--- a/arch/arm/lib/delay-loop.S
+++ b/arch/arm/lib/delay-loop.S
@@ -40,6 +40,7 @@ ENTRY(__loop_const_udelay)			@ 0 <= r0 <= 0x7fffff06
 /*
  * loops = r0 * HZ * loops_per_jiffy / 1000000
  */
+		.align 3
 
 @ Delay routine
 ENTRY(__loop_delay)
-- 
cgit v1.2.2