arch/tile: core support for Tilera 32-bit chips.

This change is the core kernel support for TILEPro and TILE64 chips. No driver support (except the console driver) is included yet. This includes the relevant Linux headers in asm/; the low-level low-level "Tile architecture" headers in arch/, which are shared with the hypervisor, etc., and are build-system agnostic; and the relevant hypervisor headers in hv/. Signed-off-by: Chris Metcalf <cmetcalf@tilera.com> Acked-by: Arnd Bergmann <arnd@arndb.de> Acked-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp> Reviewed-by: Paul Mundt <lethal@linux-sh.org>
author: Chris Metcalf <cmetcalf@tilera.com> 2010-05-28 23:09:12 -0400
committer: Chris Metcalf <cmetcalf@tilera.com> 2010-06-04 17:11:18 -0400
commit: 867e359b97c970a60626d5d76bbe2a8fadbf38fb (patch)
tree: c5ccbb7f5172e8555977119608ecb1eee3cc37e3 /arch/tile/lib/atomic_32.c
parent: 5360bd776f73d0a7da571d72a09a03f237e99900 (diff)
1 files changed, 347 insertions, 0 deletions
diff --git a/arch/tile/lib/atomic_32.c b/arch/tile/lib/atomic_32.c
new file mode 100644
index 000000000000..be1e8acd105d
--- /dev/null
+++ b/arch/tile/lib/atomic_32.c
@@ -0,0 +1,347 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+#include <linux/cache.h>
+#include <linux/delay.h>
+#include <linux/uaccess.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <asm/atomic.h>
+#include <arch/chip.h>
+/* The routines in atomic_asm.S are private, so we only declare them here. */
+extern struct __get_user __atomic_cmpxchg(volatile int *p,
+                                          int *lock, int o, int n);
+extern struct __get_user __atomic_xchg(volatile int *p, int *lock, int n);
+extern struct __get_user __atomic_xchg_add(volatile int *p, int *lock, int n);
+extern struct __get_user __atomic_xchg_add_unless(volatile int *p,
+                                                  int *lock, int o, int n);
+extern struct __get_user __atomic_or(volatile int *p, int *lock, int n);
+extern struct __get_user __atomic_andn(volatile int *p, int *lock, int n);
+extern struct __get_user __atomic_xor(volatile int *p, int *lock, int n);
+extern u64 __atomic64_cmpxchg(volatile u64 *p, int *lock, u64 o, u64 n);
+extern u64 __atomic64_xchg(volatile u64 *p, int *lock, u64 n);
+extern u64 __atomic64_xchg_add(volatile u64 *p, int *lock, u64 n);
+extern u64 __atomic64_xchg_add_unless(volatile u64 *p,
+                                      int *lock, u64 o, u64 n);
+/* See <asm/atomic.h> */
+#if ATOMIC_LOCKS_FOUND_VIA_TABLE()
+/*
+ * A block of memory containing locks for atomic ops. Each instance of this
+ * struct will be homed on a different CPU.
+ */
+struct atomic_locks_on_cpu {
+        int lock[ATOMIC_HASH_L2_SIZE];
+} __attribute__((aligned(ATOMIC_HASH_L2_SIZE * 4)));
+static DEFINE_PER_CPU(struct atomic_locks_on_cpu, atomic_lock_pool);
+/* The locks we'll use until __init_atomic_per_cpu is called. */
+static struct atomic_locks_on_cpu __initdata initial_atomic_locks;
+/* Hash into this vector to get a pointer to lock for the given atomic. */
+struct atomic_locks_on_cpu *atomic_lock_ptr[ATOMIC_HASH_L1_SIZE]
+        __write_once = {
+        [0 ... ATOMIC_HASH_L1_SIZE-1] (&initial_atomic_locks)
+};
+#else /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */
+/* This page is remapped on startup to be hash-for-home. */
+int atomic_locks[PAGE_SIZE / sizeof(int) /* Only ATOMIC_HASH_SIZE is used */]
+  __attribute__((aligned(PAGE_SIZE), section(".bss.page_aligned")));
+#endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */
+static inline int *__atomic_hashed_lock(volatile void *v)
+{
+        /* NOTE: this code must match "sys_cmpxchg" in kernel/intvec.S */
+#if ATOMIC_LOCKS_FOUND_VIA_TABLE()
+        unsigned long i =
+                (unsigned long) v & ((PAGE_SIZE-1) & -sizeof(long long));
+        unsigned long n = __insn_crc32_32(0, i);
+        /* Grab high bits for L1 index. */
+        unsigned long l1_index = n >> ((sizeof(n) * 8) - ATOMIC_HASH_L1_SHIFT);
+        /* Grab low bits for L2 index. */
+        unsigned long l2_index = n & (ATOMIC_HASH_L2_SIZE - 1);
+        return &atomic_lock_ptr[l1_index]->lock[l2_index];
+#else
+        /*
+         * Use bits [3, 3 + ATOMIC_HASH_SHIFT) as the lock index.
+         * Using mm works here because atomic_locks is page aligned.
+         */
+        unsigned long ptr = __insn_mm((unsigned long)v >> 1,
+                                      (unsigned long)atomic_locks,
+                                      2, (ATOMIC_HASH_SHIFT + 2) - 1);
+        return (int *)ptr;
+#endif
+}
+#ifdef CONFIG_SMP
+/* Return whether the passed pointer is a valid atomic lock pointer. */
+static int is_atomic_lock(int *p)
+{
+#if ATOMIC_LOCKS_FOUND_VIA_TABLE()
+        int i;
+        for (i = 0; i < ATOMIC_HASH_L1_SIZE; ++i) {
+                if (p >= &atomic_lock_ptr[i]->lock[0] &&
+                    p < &atomic_lock_ptr[i]->lock[ATOMIC_HASH_L2_SIZE]) {
+                        return 1;
+                }
+        }
+        return 0;
+#else
+        return p >= &atomic_locks[0] && p < &atomic_locks[ATOMIC_HASH_SIZE];
+#endif
+}
+void __atomic_fault_unlock(int *irqlock_word)
+{
+        BUG_ON(!is_atomic_lock(irqlock_word));
+        BUG_ON(*irqlock_word != 1);
+        *irqlock_word = 0;
+}
+#endif /* CONFIG_SMP */
+static inline int *__atomic_setup(volatile void *v)
+{
+        /* Issue a load to the target to bring it into cache. */
+        *(volatile int *)v;
+        return __atomic_hashed_lock(v);
+}
+int _atomic_xchg(atomic_t *v, int n)
+{
+        return __atomic_xchg(&v->counter, __atomic_setup(v), n).val;
+}
+EXPORT_SYMBOL(_atomic_xchg);
+int _atomic_xchg_add(atomic_t *v, int i)
+{
+        return __atomic_xchg_add(&v->counter, __atomic_setup(v), i).val;
+}
+EXPORT_SYMBOL(_atomic_xchg_add);
+int _atomic_xchg_add_unless(atomic_t *v, int a, int u)
+{
+        /*
+         * Note: argument order is switched here since it is easier
+         * to use the first argument consistently as the "old value"
+         * in the assembly, as is done for _atomic_cmpxchg().
+         */
+        return __atomic_xchg_add_unless(&v->counter, __atomic_setup(v), u, a)
+                .val;
+}
+EXPORT_SYMBOL(_atomic_xchg_add_unless);
+int _atomic_cmpxchg(atomic_t *v, int o, int n)
+{
+        return __atomic_cmpxchg(&v->counter, __atomic_setup(v), o, n).val;
+}
+EXPORT_SYMBOL(_atomic_cmpxchg);
+unsigned long _atomic_or(volatile unsigned long *p, unsigned long mask)
+{
+        return __atomic_or((int *)p, __atomic_setup(p), mask).val;
+}
+EXPORT_SYMBOL(_atomic_or);
+unsigned long _atomic_andn(volatile unsigned long *p, unsigned long mask)
+{
+        return __atomic_andn((int *)p, __atomic_setup(p), mask).val;
+}
+EXPORT_SYMBOL(_atomic_andn);
+unsigned long _atomic_xor(volatile unsigned long *p, unsigned long mask)
+{
+        return __atomic_xor((int *)p, __atomic_setup(p), mask).val;
+}
+EXPORT_SYMBOL(_atomic_xor);
+u64 _atomic64_xchg(atomic64_t *v, u64 n)
+{
+        return __atomic64_xchg(&v->counter, __atomic_setup(v), n);
+}
+EXPORT_SYMBOL(_atomic64_xchg);
+u64 _atomic64_xchg_add(atomic64_t *v, u64 i)
+{
+        return __atomic64_xchg_add(&v->counter, __atomic_setup(v), i);
+}
+EXPORT_SYMBOL(_atomic64_xchg_add);
+u64 _atomic64_xchg_add_unless(atomic64_t *v, u64 a, u64 u)
+{
+        /*
+         * Note: argument order is switched here since it is easier
+         * to use the first argument consistently as the "old value"
+         * in the assembly, as is done for _atomic_cmpxchg().
+         */
+        return __atomic64_xchg_add_unless(&v->counter, __atomic_setup(v),
+                                          u, a);
+}
+EXPORT_SYMBOL(_atomic64_xchg_add_unless);
+u64 _atomic64_cmpxchg(atomic64_t *v, u64 o, u64 n)
+{
+        return __atomic64_cmpxchg(&v->counter, __atomic_setup(v), o, n);
+}
+EXPORT_SYMBOL(_atomic64_cmpxchg);
+static inline int *__futex_setup(__user int *v)
+{
+        /*
+         * Issue a prefetch to the counter to bring it into cache.
+         * As for __atomic_setup, but we can't do a read into the L1
+         * since it might fault; instead we do a prefetch into the L2.
+         */
+        __insn_prefetch(v);
+        return __atomic_hashed_lock(v);
+}
+struct __get_user futex_set(int *v, int i)
+{
+        return __atomic_xchg(v, __futex_setup(v), i);
+}
+struct __get_user futex_add(int *v, int n)
+{
+        return __atomic_xchg_add(v, __futex_setup(v), n);
+}
+struct __get_user futex_or(int *v, int n)
+{
+        return __atomic_or(v, __futex_setup(v), n);
+}
+struct __get_user futex_andn(int *v, int n)
+{
+        return __atomic_andn(v, __futex_setup(v), n);
+}
+struct __get_user futex_xor(int *v, int n)
+{
+        return __atomic_xor(v, __futex_setup(v), n);
+}
+struct __get_user futex_cmpxchg(int *v, int o, int n)
+{
+        return __atomic_cmpxchg(v, __futex_setup(v), o, n);
+}
+/*
+ * If any of the atomic or futex routines hit a bad address (not in
+ * the page tables at kernel PL) this routine is called.  The futex
+ * routines are never used on kernel space, and the normal atomics and
+ * bitops are never used on user space.  So a fault on kernel space
+ * must be fatal, but a fault on userspace is a futex fault and we
+ * need to return -EFAULT.  Note that the context this routine is
+ * invoked in is the context of the "_atomic_xxx()" routines called
+ * by the functions in this file.
+ */
+struct __get_user __atomic_bad_address(int *addr)
+{
+        if (unlikely(!access_ok(VERIFY_WRITE, addr, sizeof(int))))
+                panic("Bad address used for kernel atomic op: %p\n", addr);
+        return (struct __get_user) { .err = -EFAULT };
+}
+#if CHIP_HAS_CBOX_HOME_MAP()
+static int __init noatomichash(char *str)
+{
+        printk("noatomichash is deprecated.\n");
+        return 1;
+}
+__setup("noatomichash", noatomichash);
+#endif
+void __init __init_atomic_per_cpu(void)
+{
+#if ATOMIC_LOCKS_FOUND_VIA_TABLE()
+        unsigned int i;
+        int actual_cpu;
+        /*
+         * Before this is called from setup, we just have one lock for
+         * all atomic objects/operations.  Here we replace the
+         * elements of atomic_lock_ptr so that they point at per_cpu
+         * integers.  This seemingly over-complex approach stems from
+         * the fact that DEFINE_PER_CPU defines an entry for each cpu
+         * in the grid, not each cpu from 0..ATOMIC_HASH_SIZE-1.  But
+         * for efficient hashing of atomics to their locks we want a
+         * compile time constant power of 2 for the size of this
+         * table, so we use ATOMIC_HASH_SIZE.
+         *
+         * Here we populate atomic_lock_ptr from the per cpu
+         * atomic_lock_pool, interspersing by actual cpu so that
+         * subsequent elements are homed on consecutive cpus.
+         */
+        actual_cpu = cpumask_first(cpu_possible_mask);
+        for (i = 0; i < ATOMIC_HASH_L1_SIZE; ++i) {
+                /*
+                 * Preincrement to slightly bias against using cpu 0,
+                 * which has plenty of stuff homed on it already.
+                 */
+                actual_cpu = cpumask_next(actual_cpu, cpu_possible_mask);
+                if (actual_cpu >= nr_cpu_ids)
+                        actual_cpu = cpumask_first(cpu_possible_mask);
+                atomic_lock_ptr[i] = &per_cpu(atomic_lock_pool, actual_cpu);
+        }
+#else /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */
+        /* Validate power-of-two and "bigger than cpus" assumption */
+        BUG_ON(ATOMIC_HASH_SIZE & (ATOMIC_HASH_SIZE-1));
+        BUG_ON(ATOMIC_HASH_SIZE < nr_cpu_ids);
+        /*
+         * On TILEPro we prefer to use a single hash-for-home
+         * page, since this means atomic operations are less
+         * likely to encounter a TLB fault and thus should
+         * in general perform faster.  You may wish to disable
+         * this in situations where few hash-for-home tiles
+         * are configured.
+         */
+        BUG_ON((unsigned long)atomic_locks % PAGE_SIZE != 0);
+        /* The locks must all fit on one page. */
+        BUG_ON(ATOMIC_HASH_SIZE * sizeof(int) > PAGE_SIZE);
+        /*
+         * We use the page offset of the atomic value's address as
+         * an index into atomic_locks, excluding the low 3 bits.
+         * That should not produce more indices than ATOMIC_HASH_SIZE.
+         */
+        BUG_ON((PAGE_SIZE >> 3) > ATOMIC_HASH_SIZE);
+#endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */
+        /* The futex code makes this assumption, so we validate it here. */
+        BUG_ON(sizeof(atomic_t) != sizeof(int));
+}
author	Chris Metcalf <cmetcalf@tilera.com>	2010-05-28 23:09:12 -0400
committer	Chris Metcalf <cmetcalf@tilera.com>	2010-06-04 17:11:18 -0400
commit	867e359b97c970a60626d5d76bbe2a8fadbf38fb (patch)
tree	c5ccbb7f5172e8555977119608ecb1eee3cc37e3 /arch/tile/lib/atomic_32.c
parent	5360bd776f73d0a7da571d72a09a03f237e99900 (diff)

diff --git a/arch/tile/lib/atomic_32.c b/arch/tile/lib/atomic_32.c new file mode 100644 index 000000000000..be1e8acd105d --- /dev/null +++ b/arch/tile/lib/atomic_32.c
@@ -0,0 +1,347 @@
	1	/*
	2	* Copyright 2010 Tilera Corporation. All Rights Reserved.
	3	*
	4	* This program is free software; you can redistribute it and/or
	5	* modify it under the terms of the GNU General Public License
	6	* as published by the Free Software Foundation, version 2.
	7	*
	8	* This program is distributed in the hope that it will be useful, but
	9	* WITHOUT ANY WARRANTY; without even the implied warranty of
	10	* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
	11	* NON INFRINGEMENT. See the GNU General Public License for
	12	* more details.
	13	*/
	14
	15	#include <linux/cache.h>
	16	#include <linux/delay.h>
	17	#include <linux/uaccess.h>
	18	#include <linux/module.h>
	19	#include <linux/mm.h>
	20	#include <asm/atomic.h>
	21	#include <arch/chip.h>
	22
	23	/* The routines in atomic_asm.S are private, so we only declare them here. */
	24	extern struct __get_user __atomic_cmpxchg(volatile int *p,
	25	int *lock, int o, int n);
	26	extern struct __get_user __atomic_xchg(volatile int p, int lock, int n);
	27	extern struct __get_user __atomic_xchg_add(volatile int p, int lock, int n);
	28	extern struct __get_user __atomic_xchg_add_unless(volatile int *p,
	29	int *lock, int o, int n);
	30	extern struct __get_user __atomic_or(volatile int p, int lock, int n);
	31	extern struct __get_user __atomic_andn(volatile int p, int lock, int n);
	32	extern struct __get_user __atomic_xor(volatile int p, int lock, int n);
	33
	34	extern u64 __atomic64_cmpxchg(volatile u64 p, int lock, u64 o, u64 n);
	35	extern u64 __atomic64_xchg(volatile u64 p, int lock, u64 n);
	36	extern u64 __atomic64_xchg_add(volatile u64 p, int lock, u64 n);
	37	extern u64 __atomic64_xchg_add_unless(volatile u64 *p,
	38	int *lock, u64 o, u64 n);
	39
	40
	41	/* See <asm/atomic.h> */
	42	#if ATOMIC_LOCKS_FOUND_VIA_TABLE()
	43
	44	/*
	45	* A block of memory containing locks for atomic ops. Each instance of this
	46	* struct will be homed on a different CPU.
	47	*/
	48	struct atomic_locks_on_cpu {
	49	int lock[ATOMIC_HASH_L2_SIZE];
	50	} __attribute__((aligned(ATOMIC_HASH_L2_SIZE * 4)));
	51
	52	static DEFINE_PER_CPU(struct atomic_locks_on_cpu, atomic_lock_pool);
	53
	54	/* The locks we'll use until __init_atomic_per_cpu is called. */
	55	static struct atomic_locks_on_cpu __initdata initial_atomic_locks;
	56
	57	/* Hash into this vector to get a pointer to lock for the given atomic. */
	58	struct atomic_locks_on_cpu *atomic_lock_ptr[ATOMIC_HASH_L1_SIZE]
	59	__write_once = {
	60	[0 ... ATOMIC_HASH_L1_SIZE-1] (&initial_atomic_locks)
	61	};
	62
	63	#else /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */
	64
	65	/* This page is remapped on startup to be hash-for-home. */
	66	int atomic_locks[PAGE_SIZE / sizeof(int) /* Only ATOMIC_HASH_SIZE is used */]
	67	__attribute__((aligned(PAGE_SIZE), section(".bss.page_aligned")));
	68
	69	#endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */
	70
	71	static inline int __atomic_hashed_lock(volatile void v)
	72	{
	73	/* NOTE: this code must match "sys_cmpxchg" in kernel/intvec.S */
	74	#if ATOMIC_LOCKS_FOUND_VIA_TABLE()
	75	unsigned long i =
	76	(unsigned long) v & ((PAGE_SIZE-1) & -sizeof(long long));
	77	unsigned long n = __insn_crc32_32(0, i);
	78
	79	/* Grab high bits for L1 index. */
	80	unsigned long l1_index = n >> ((sizeof(n) * 8) - ATOMIC_HASH_L1_SHIFT);
	81	/* Grab low bits for L2 index. */
	82	unsigned long l2_index = n & (ATOMIC_HASH_L2_SIZE - 1);
	83
	84	return &atomic_lock_ptr[l1_index]->lock[l2_index];
	85	#else
	86	/*
	87	* Use bits [3, 3 + ATOMIC_HASH_SHIFT) as the lock index.
	88	* Using mm works here because atomic_locks is page aligned.
	89	*/
	90	unsigned long ptr = __insn_mm((unsigned long)v >> 1,
	91	(unsigned long)atomic_locks,
	92	2, (ATOMIC_HASH_SHIFT + 2) - 1);
	93	return (int *)ptr;
	94	#endif
	95	}
	96
	97	#ifdef CONFIG_SMP
	98	/* Return whether the passed pointer is a valid atomic lock pointer. */
	99	static int is_atomic_lock(int *p)
	100	{
	101	#if ATOMIC_LOCKS_FOUND_VIA_TABLE()
	102	int i;
	103	for (i = 0; i < ATOMIC_HASH_L1_SIZE; ++i) {
	104
	105	if (p >= &atomic_lock_ptr[i]->lock[0] &&
	106	p < &atomic_lock_ptr[i]->lock[ATOMIC_HASH_L2_SIZE]) {
	107	return 1;
	108	}
	109	}
	110	return 0;
	111	#else
	112	return p >= &atomic_locks[0] && p < &atomic_locks[ATOMIC_HASH_SIZE];
	113	#endif
	114	}
	115
	116	void __atomic_fault_unlock(int *irqlock_word)
	117	{
	118	BUG_ON(!is_atomic_lock(irqlock_word));
	119	BUG_ON(*irqlock_word != 1);
	120	*irqlock_word = 0;
	121	}
	122
	123	#endif /* CONFIG_SMP */
	124
	125	static inline int __atomic_setup(volatile void v)
	126	{
	127	/* Issue a load to the target to bring it into cache. */
	128	(volatile int )v;
	129	return __atomic_hashed_lock(v);
	130	}
	131
	132	int _atomic_xchg(atomic_t *v, int n)
	133	{
	134	return __atomic_xchg(&v->counter, __atomic_setup(v), n).val;
	135	}
	136	EXPORT_SYMBOL(_atomic_xchg);
	137
	138	int _atomic_xchg_add(atomic_t *v, int i)
	139	{
	140	return __atomic_xchg_add(&v->counter, __atomic_setup(v), i).val;
	141	}
	142	EXPORT_SYMBOL(_atomic_xchg_add);
	143
	144	int _atomic_xchg_add_unless(atomic_t *v, int a, int u)
	145	{
	146	/*
	147	* Note: argument order is switched here since it is easier
	148	* to use the first argument consistently as the "old value"
	149	* in the assembly, as is done for _atomic_cmpxchg().
	150	*/
	151	return __atomic_xchg_add_unless(&v->counter, __atomic_setup(v), u, a)
	152	.val;
	153	}
	154	EXPORT_SYMBOL(_atomic_xchg_add_unless);
	155
	156	int _atomic_cmpxchg(atomic_t *v, int o, int n)
	157	{
	158	return __atomic_cmpxchg(&v->counter, __atomic_setup(v), o, n).val;
	159	}
	160	EXPORT_SYMBOL(_atomic_cmpxchg);
	161
	162	unsigned long _atomic_or(volatile unsigned long *p, unsigned long mask)
	163	{
	164	return __atomic_or((int *)p, __atomic_setup(p), mask).val;
	165	}
	166	EXPORT_SYMBOL(_atomic_or);
	167
	168	unsigned long _atomic_andn(volatile unsigned long *p, unsigned long mask)
	169	{
	170	return __atomic_andn((int *)p, __atomic_setup(p), mask).val;
	171	}
	172	EXPORT_SYMBOL(_atomic_andn);
	173
	174	unsigned long _atomic_xor(volatile unsigned long *p, unsigned long mask)
	175	{
	176	return __atomic_xor((int *)p, __atomic_setup(p), mask).val;
	177	}
	178	EXPORT_SYMBOL(_atomic_xor);
	179
	180
	181	u64 _atomic64_xchg(atomic64_t *v, u64 n)
	182	{
	183	return __atomic64_xchg(&v->counter, __atomic_setup(v), n);
	184	}
	185	EXPORT_SYMBOL(_atomic64_xchg);
	186
	187	u64 _atomic64_xchg_add(atomic64_t *v, u64 i)
	188	{
	189	return __atomic64_xchg_add(&v->counter, __atomic_setup(v), i);
	190	}
	191	EXPORT_SYMBOL(_atomic64_xchg_add);
	192
	193	u64 _atomic64_xchg_add_unless(atomic64_t *v, u64 a, u64 u)
	194	{
	195	/*
	196	* Note: argument order is switched here since it is easier
	197	* to use the first argument consistently as the "old value"
	198	* in the assembly, as is done for _atomic_cmpxchg().
	199	*/
	200	return __atomic64_xchg_add_unless(&v->counter, __atomic_setup(v),
	201	u, a);
	202	}
	203	EXPORT_SYMBOL(_atomic64_xchg_add_unless);
	204
	205	u64 _atomic64_cmpxchg(atomic64_t *v, u64 o, u64 n)
	206	{
	207	return __atomic64_cmpxchg(&v->counter, __atomic_setup(v), o, n);
	208	}
	209	EXPORT_SYMBOL(_atomic64_cmpxchg);
	210
	211
	212	static inline int __futex_setup(__user int v)
	213	{
	214	/*
	215	* Issue a prefetch to the counter to bring it into cache.
	216	* As for __atomic_setup, but we can't do a read into the L1
	217	* since it might fault; instead we do a prefetch into the L2.
	218	*/
	219	__insn_prefetch(v);
	220	return __atomic_hashed_lock(v);
	221	}
	222
	223	struct __get_user futex_set(int *v, int i)
	224	{
	225	return __atomic_xchg(v, __futex_setup(v), i);
	226	}
	227
	228	struct __get_user futex_add(int *v, int n)
	229	{
	230	return __atomic_xchg_add(v, __futex_setup(v), n);
	231	}
	232
	233	struct __get_user futex_or(int *v, int n)
	234	{
	235	return __atomic_or(v, __futex_setup(v), n);
	236	}
	237
	238	struct __get_user futex_andn(int *v, int n)
	239	{
	240	return __atomic_andn(v, __futex_setup(v), n);
	241	}
	242
	243	struct __get_user futex_xor(int *v, int n)
	244	{
	245	return __atomic_xor(v, __futex_setup(v), n);
	246	}
	247
	248	struct __get_user futex_cmpxchg(int *v, int o, int n)
	249	{
	250	return __atomic_cmpxchg(v, __futex_setup(v), o, n);
	251	}
	252
	253	/*
	254	* If any of the atomic or futex routines hit a bad address (not in
	255	* the page tables at kernel PL) this routine is called. The futex
	256	* routines are never used on kernel space, and the normal atomics and
	257	* bitops are never used on user space. So a fault on kernel space
	258	* must be fatal, but a fault on userspace is a futex fault and we
	259	* need to return -EFAULT. Note that the context this routine is
	260	* invoked in is the context of the "_atomic_xxx()" routines called
	261	* by the functions in this file.
	262	*/
	263	struct __get_user __atomic_bad_address(int *addr)
	264	{
	265	if (unlikely(!access_ok(VERIFY_WRITE, addr, sizeof(int))))
	266	panic("Bad address used for kernel atomic op: %p\n", addr);
	267	return (struct __get_user) { .err = -EFAULT };
	268	}
	269
	270
	271	#if CHIP_HAS_CBOX_HOME_MAP()
	272	static int __init noatomichash(char *str)
	273	{
	274	printk("noatomichash is deprecated.\n");
	275	return 1;
	276	}
	277	__setup("noatomichash", noatomichash);
	278	#endif
	279
	280	void __init __init_atomic_per_cpu(void)
	281	{
	282	#if ATOMIC_LOCKS_FOUND_VIA_TABLE()
	283
	284	unsigned int i;
	285	int actual_cpu;
	286
	287	/*
	288	* Before this is called from setup, we just have one lock for
	289	* all atomic objects/operations. Here we replace the
	290	* elements of atomic_lock_ptr so that they point at per_cpu
	291	* integers. This seemingly over-complex approach stems from
	292	* the fact that DEFINE_PER_CPU defines an entry for each cpu
	293	* in the grid, not each cpu from 0..ATOMIC_HASH_SIZE-1. But
	294	* for efficient hashing of atomics to their locks we want a
	295	* compile time constant power of 2 for the size of this
	296	* table, so we use ATOMIC_HASH_SIZE.
	297	*
	298	* Here we populate atomic_lock_ptr from the per cpu
	299	* atomic_lock_pool, interspersing by actual cpu so that
	300	* subsequent elements are homed on consecutive cpus.
	301	*/
	302
	303	actual_cpu = cpumask_first(cpu_possible_mask);
	304
	305	for (i = 0; i < ATOMIC_HASH_L1_SIZE; ++i) {
	306	/*
	307	* Preincrement to slightly bias against using cpu 0,
	308	* which has plenty of stuff homed on it already.
	309	*/
	310	actual_cpu = cpumask_next(actual_cpu, cpu_possible_mask);
	311	if (actual_cpu >= nr_cpu_ids)
	312	actual_cpu = cpumask_first(cpu_possible_mask);
	313
	314	atomic_lock_ptr[i] = &per_cpu(atomic_lock_pool, actual_cpu);
	315	}
	316
	317	#else /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */
	318
	319	/* Validate power-of-two and "bigger than cpus" assumption */
	320	BUG_ON(ATOMIC_HASH_SIZE & (ATOMIC_HASH_SIZE-1));
	321	BUG_ON(ATOMIC_HASH_SIZE < nr_cpu_ids);
	322
	323	/*
	324	* On TILEPro we prefer to use a single hash-for-home
	325	* page, since this means atomic operations are less
	326	* likely to encounter a TLB fault and thus should
	327	* in general perform faster. You may wish to disable
	328	* this in situations where few hash-for-home tiles
	329	* are configured.
	330	*/
	331	BUG_ON((unsigned long)atomic_locks % PAGE_SIZE != 0);
	332
	333	/* The locks must all fit on one page. */
	334	BUG_ON(ATOMIC_HASH_SIZE * sizeof(int) > PAGE_SIZE);
	335
	336	/*
	337	* We use the page offset of the atomic value's address as
	338	* an index into atomic_locks, excluding the low 3 bits.
	339	* That should not produce more indices than ATOMIC_HASH_SIZE.
	340	*/
	341	BUG_ON((PAGE_SIZE >> 3) > ATOMIC_HASH_SIZE);
	342
	343	#endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */
	344
	345	/* The futex code makes this assumption, so we validate it here. */
	346	BUG_ON(sizeof(atomic_t) != sizeof(int));
	347	}