diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-04-06 20:56:20 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-04-06 20:56:20 -0400 |
commit | 4157368edbc3d69b05e9294a73c84fc9c96bdec4 (patch) | |
tree | 60072b082dad509d270a9c53427cfedb1c997b3b /arch/tile/lib | |
parent | 9479f0f8018a0317b0b5e0c2b338bec6e26fdf2d (diff) | |
parent | 00a62d4bc9b9a0388abee5c5ea946b9631b149d5 (diff) |
Merge branch 'stable' of git://git.kernel.org/pub/scm/linux/kernel/git/cmetcalf/linux-tile
Pull arch/tile bug fixes from Chris Metcalf:
"This includes Paul Gortmaker's change to fix the <asm/system.h>
disintegration issues on tile, a fix to unbreak the tilepro ethernet
driver, and a backlog of bugfix-only changes from internal Tilera
development over the last few months.
They have all been to LKML and on linux-next for the last few days.
The EDAC change to MAINTAINERS is an oddity but discussion on the
linux-edac list suggested I ask you to pull that change through my
tree since they don't have a tree to pull edac changes from at the
moment."
* 'stable' of git://git.kernel.org/pub/scm/linux/kernel/git/cmetcalf/linux-tile: (39 commits)
drivers/net/ethernet/tile: fix netdev_alloc_skb() bombing
MAINTAINERS: update EDAC information
tilepro ethernet driver: fix a few minor issues
tile-srom.c driver: minor code cleanup
edac: say "TILEGx" not "TILEPro" for the tilegx edac driver
arch/tile: avoid accidentally unmasking NMI-type interrupt accidentally
arch/tile: remove bogus performance optimization
arch/tile: return SIGBUS for addresses that are unaligned AND invalid
arch/tile: fix finv_buffer_remote() for tilegx
arch/tile: use atomic exchange in arch_write_unlock()
arch/tile: stop mentioning the "kvm" subdirectory
arch/tile: export the page_home() function.
arch/tile: fix pointer cast in cacheflush.c
arch/tile: fix single-stepping over swint1 instructions on tilegx
arch/tile: implement panic_smp_self_stop()
arch/tile: add "nop" after "nap" to help GX idle power draw
arch/tile: use proper memparse() for "maxmem" options
arch/tile: fix up locking in pgtable.c slightly
arch/tile: don't leak kernel memory when we unload modules
arch/tile: fix bug in delay_backoff()
...
Diffstat (limited to 'arch/tile/lib')
-rw-r--r-- | arch/tile/lib/Makefile | 1 | ||||
-rw-r--r-- | arch/tile/lib/cacheflush.c | 30 | ||||
-rw-r--r-- | arch/tile/lib/memcpy_user_64.c | 8 | ||||
-rw-r--r-- | arch/tile/lib/spinlock_common.h | 2 |
4 files changed, 36 insertions, 5 deletions
diff --git a/arch/tile/lib/Makefile b/arch/tile/lib/Makefile index 0c26086ecbe..985f5985823 100644 --- a/arch/tile/lib/Makefile +++ b/arch/tile/lib/Makefile | |||
@@ -7,6 +7,7 @@ lib-y = cacheflush.o checksum.o cpumask.o delay.o uaccess.o \ | |||
7 | strchr_$(BITS).o strlen_$(BITS).o | 7 | strchr_$(BITS).o strlen_$(BITS).o |
8 | 8 | ||
9 | ifeq ($(CONFIG_TILEGX),y) | 9 | ifeq ($(CONFIG_TILEGX),y) |
10 | CFLAGS_REMOVE_memcpy_user_64.o = -fno-omit-frame-pointer | ||
10 | lib-y += memcpy_user_64.o | 11 | lib-y += memcpy_user_64.o |
11 | else | 12 | else |
12 | lib-y += atomic_32.o atomic_asm_32.o memcpy_tile64.o | 13 | lib-y += atomic_32.o atomic_asm_32.o memcpy_tile64.o |
diff --git a/arch/tile/lib/cacheflush.c b/arch/tile/lib/cacheflush.c index 8928aace7a6..db4fb89e12d 100644 --- a/arch/tile/lib/cacheflush.c +++ b/arch/tile/lib/cacheflush.c | |||
@@ -39,7 +39,21 @@ void finv_buffer_remote(void *buffer, size_t size, int hfh) | |||
39 | { | 39 | { |
40 | char *p, *base; | 40 | char *p, *base; |
41 | size_t step_size, load_count; | 41 | size_t step_size, load_count; |
42 | |||
43 | /* | ||
44 | * On TILEPro the striping granularity is a fixed 8KB; on | ||
45 | * TILE-Gx it is configurable, and we rely on the fact that | ||
46 | * the hypervisor always configures maximum striping, so that | ||
47 | * bits 9 and 10 of the PA are part of the stripe function, so | ||
48 | * every 512 bytes we hit a striping boundary. | ||
49 | * | ||
50 | */ | ||
51 | #ifdef __tilegx__ | ||
52 | const unsigned long STRIPE_WIDTH = 512; | ||
53 | #else | ||
42 | const unsigned long STRIPE_WIDTH = 8192; | 54 | const unsigned long STRIPE_WIDTH = 8192; |
55 | #endif | ||
56 | |||
43 | #ifdef __tilegx__ | 57 | #ifdef __tilegx__ |
44 | /* | 58 | /* |
45 | * On TILE-Gx, we must disable the dstream prefetcher before doing | 59 | * On TILE-Gx, we must disable the dstream prefetcher before doing |
@@ -74,7 +88,7 @@ void finv_buffer_remote(void *buffer, size_t size, int hfh) | |||
74 | * memory, that one load would be sufficient, but since we may | 88 | * memory, that one load would be sufficient, but since we may |
75 | * be, we also need to back up to the last load issued to | 89 | * be, we also need to back up to the last load issued to |
76 | * another memory controller, which would be the point where | 90 | * another memory controller, which would be the point where |
77 | * we crossed an 8KB boundary (the granularity of striping | 91 | * we crossed a "striping" boundary (the granularity of striping |
78 | * across memory controllers). Keep backing up and doing this | 92 | * across memory controllers). Keep backing up and doing this |
79 | * until we are before the beginning of the buffer, or have | 93 | * until we are before the beginning of the buffer, or have |
80 | * hit all the controllers. | 94 | * hit all the controllers. |
@@ -88,12 +102,22 @@ void finv_buffer_remote(void *buffer, size_t size, int hfh) | |||
88 | * every cache line on a full memory stripe on each | 102 | * every cache line on a full memory stripe on each |
89 | * controller" that we simply do that, to simplify the logic. | 103 | * controller" that we simply do that, to simplify the logic. |
90 | * | 104 | * |
91 | * FIXME: See bug 9535 for some issues with this code. | 105 | * On TILE-Gx the hash-for-home function is much more complex, |
106 | * with the upshot being we can't readily guarantee we have | ||
107 | * hit both entries in the 128-entry AMT that were hit by any | ||
108 | * load in the entire range, so we just re-load them all. | ||
109 | * With larger buffers, we may want to consider using a hypervisor | ||
110 | * trap to issue loads directly to each hash-for-home tile for | ||
111 | * each controller (doing it from Linux would trash the TLB). | ||
92 | */ | 112 | */ |
93 | if (hfh) { | 113 | if (hfh) { |
94 | step_size = L2_CACHE_BYTES; | 114 | step_size = L2_CACHE_BYTES; |
115 | #ifdef __tilegx__ | ||
116 | load_count = (size + L2_CACHE_BYTES - 1) / L2_CACHE_BYTES; | ||
117 | #else | ||
95 | load_count = (STRIPE_WIDTH / L2_CACHE_BYTES) * | 118 | load_count = (STRIPE_WIDTH / L2_CACHE_BYTES) * |
96 | (1 << CHIP_LOG_NUM_MSHIMS()); | 119 | (1 << CHIP_LOG_NUM_MSHIMS()); |
120 | #endif | ||
97 | } else { | 121 | } else { |
98 | step_size = STRIPE_WIDTH; | 122 | step_size = STRIPE_WIDTH; |
99 | load_count = (1 << CHIP_LOG_NUM_MSHIMS()); | 123 | load_count = (1 << CHIP_LOG_NUM_MSHIMS()); |
@@ -109,7 +133,7 @@ void finv_buffer_remote(void *buffer, size_t size, int hfh) | |||
109 | 133 | ||
110 | /* Figure out how far back we need to go. */ | 134 | /* Figure out how far back we need to go. */ |
111 | base = p - (step_size * (load_count - 2)); | 135 | base = p - (step_size * (load_count - 2)); |
112 | if ((long)base < (long)buffer) | 136 | if ((unsigned long)base < (unsigned long)buffer) |
113 | base = buffer; | 137 | base = buffer; |
114 | 138 | ||
115 | /* | 139 | /* |
diff --git a/arch/tile/lib/memcpy_user_64.c b/arch/tile/lib/memcpy_user_64.c index 4763b3aff1c..37440caa737 100644 --- a/arch/tile/lib/memcpy_user_64.c +++ b/arch/tile/lib/memcpy_user_64.c | |||
@@ -14,7 +14,13 @@ | |||
14 | * Do memcpy(), but trap and return "n" when a load or store faults. | 14 | * Do memcpy(), but trap and return "n" when a load or store faults. |
15 | * | 15 | * |
16 | * Note: this idiom only works when memcpy() compiles to a leaf function. | 16 | * Note: this idiom only works when memcpy() compiles to a leaf function. |
17 | * If "sp" is updated during memcpy, the "jrp lr" will be incorrect. | 17 | * Here leaf function not only means it does not have calls, but also |
18 | * requires no stack operations (sp, stack frame pointer) and no | ||
19 | * use of callee-saved registers, else "jrp lr" will be incorrect since | ||
20 | * unwinding stack frame is bypassed. Since memcpy() is not complex so | ||
21 | * these conditions are satisfied here, but we need to be careful when | ||
22 | * modifying this file. This is not a clean solution but is the best | ||
23 | * one so far. | ||
18 | * | 24 | * |
19 | * Also note that we are capturing "n" from the containing scope here. | 25 | * Also note that we are capturing "n" from the containing scope here. |
20 | */ | 26 | */ |
diff --git a/arch/tile/lib/spinlock_common.h b/arch/tile/lib/spinlock_common.h index c1010980913..6ac37509fac 100644 --- a/arch/tile/lib/spinlock_common.h +++ b/arch/tile/lib/spinlock_common.h | |||
@@ -60,5 +60,5 @@ static void delay_backoff(int iterations) | |||
60 | loops += __insn_crc32_32(stack_pointer, get_cycles_low()) & | 60 | loops += __insn_crc32_32(stack_pointer, get_cycles_low()) & |
61 | (loops - 1); | 61 | (loops - 1); |
62 | 62 | ||
63 | relax(1 << exponent); | 63 | relax(loops); |
64 | } | 64 | } |