diff options
Diffstat (limited to 'lib')
| -rw-r--r-- | lib/Kconfig | 9 | ||||
| -rw-r--r-- | lib/Kconfig.debug | 1 | ||||
| -rw-r--r-- | lib/Kconfig.kasan | 8 | ||||
| -rw-r--r-- | lib/Makefile | 4 | ||||
| -rw-r--r-- | lib/bitmap.c | 30 | ||||
| -rw-r--r-- | lib/cpumask.c | 37 | ||||
| -rw-r--r-- | lib/devres.c | 28 | ||||
| -rw-r--r-- | lib/dma-debug.c | 2 | ||||
| -rw-r--r-- | lib/find_bit.c | 193 | ||||
| -rw-r--r-- | lib/find_last_bit.c | 49 | ||||
| -rw-r--r-- | lib/find_next_bit.c | 285 | ||||
| -rw-r--r-- | lib/iommu-common.c | 270 | ||||
| -rw-r--r-- | lib/raid6/algos.c | 41 | ||||
| -rw-r--r-- | lib/raid6/altivec.uc | 1 | ||||
| -rw-r--r-- | lib/raid6/avx2.c | 3 | ||||
| -rw-r--r-- | lib/raid6/int.uc | 41 | ||||
| -rw-r--r-- | lib/raid6/mmx.c | 2 | ||||
| -rw-r--r-- | lib/raid6/neon.c | 1 | ||||
| -rw-r--r-- | lib/raid6/sse1.c | 2 | ||||
| -rw-r--r-- | lib/raid6/sse2.c | 227 | ||||
| -rw-r--r-- | lib/raid6/test/test.c | 51 | ||||
| -rw-r--r-- | lib/raid6/tilegx.uc | 1 | ||||
| -rw-r--r-- | lib/rhashtable.c | 11 | ||||
| -rw-r--r-- | lib/string_helpers.c | 68 | ||||
| -rw-r--r-- | lib/test-hexdump.c | 2 | ||||
| -rw-r--r-- | lib/vsprintf.c | 244 |
26 files changed, 1038 insertions, 573 deletions
diff --git a/lib/Kconfig b/lib/Kconfig index 87da53bb1fef..601965a948e8 100644 --- a/lib/Kconfig +++ b/lib/Kconfig | |||
| @@ -18,9 +18,8 @@ config HAVE_ARCH_BITREVERSE | |||
| 18 | default n | 18 | default n |
| 19 | depends on BITREVERSE | 19 | depends on BITREVERSE |
| 20 | help | 20 | help |
| 21 | This option provides an config for the architecture which have instruction | 21 | This option enables the use of hardware bit-reversal instructions on |
| 22 | can do bitreverse operation, we use the hardware instruction if the architecture | 22 | architectures which support such operations. |
| 23 | have this capability. | ||
| 24 | 23 | ||
| 25 | config RATIONAL | 24 | config RATIONAL |
| 26 | bool | 25 | bool |
| @@ -397,10 +396,6 @@ config CPUMASK_OFFSTACK | |||
| 397 | them on the stack. This is a bit more expensive, but avoids | 396 | them on the stack. This is a bit more expensive, but avoids |
| 398 | stack overflow. | 397 | stack overflow. |
| 399 | 398 | ||
| 400 | config DISABLE_OBSOLETE_CPUMASK_FUNCTIONS | ||
| 401 | bool "Disable obsolete cpumask functions" if DEBUG_PER_CPU_MAPS | ||
| 402 | depends on BROKEN | ||
| 403 | |||
| 404 | config CPU_RMAP | 399 | config CPU_RMAP |
| 405 | bool | 400 | bool |
| 406 | depends on SMP | 401 | depends on SMP |
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 17670573dda8..ba2b0c87e65b 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug | |||
| @@ -1281,6 +1281,7 @@ config RCU_TORTURE_TEST_SLOW_INIT_DELAY | |||
| 1281 | int "How much to slow down RCU grace-period initialization" | 1281 | int "How much to slow down RCU grace-period initialization" |
| 1282 | range 0 5 | 1282 | range 0 5 |
| 1283 | default 3 | 1283 | default 3 |
| 1284 | depends on RCU_TORTURE_TEST_SLOW_INIT | ||
| 1284 | help | 1285 | help |
| 1285 | This option specifies the number of jiffies to wait between | 1286 | This option specifies the number of jiffies to wait between |
| 1286 | each rcu_node structure initialization. | 1287 | each rcu_node structure initialization. |
diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan index 4fecaedc80a2..777eda7d1ab4 100644 --- a/lib/Kconfig.kasan +++ b/lib/Kconfig.kasan | |||
| @@ -10,8 +10,11 @@ config KASAN | |||
| 10 | help | 10 | help |
| 11 | Enables kernel address sanitizer - runtime memory debugger, | 11 | Enables kernel address sanitizer - runtime memory debugger, |
| 12 | designed to find out-of-bounds accesses and use-after-free bugs. | 12 | designed to find out-of-bounds accesses and use-after-free bugs. |
| 13 | This is strictly debugging feature. It consumes about 1/8 | 13 | This is strictly a debugging feature and it requires a gcc version |
| 14 | of available memory and brings about ~x3 performance slowdown. | 14 | of 4.9.2 or later. Detection of out of bounds accesses to stack or |
| 15 | global variables requires gcc 5.0 or later. | ||
| 16 | This feature consumes about 1/8 of available memory and brings about | ||
| 17 | ~x3 performance slowdown. | ||
| 15 | For better error detection enable CONFIG_STACKTRACE, | 18 | For better error detection enable CONFIG_STACKTRACE, |
| 16 | and add slub_debug=U to boot cmdline. | 19 | and add slub_debug=U to boot cmdline. |
| 17 | 20 | ||
| @@ -40,6 +43,7 @@ config KASAN_INLINE | |||
| 40 | memory accesses. This is faster than outline (in some workloads | 43 | memory accesses. This is faster than outline (in some workloads |
| 41 | it gives about x2 boost over outline instrumentation), but | 44 | it gives about x2 boost over outline instrumentation), but |
| 42 | make kernel's .text size much bigger. | 45 | make kernel's .text size much bigger. |
| 46 | This requires a gcc version of 5.0 or later. | ||
| 43 | 47 | ||
| 44 | endchoice | 48 | endchoice |
| 45 | 49 | ||
diff --git a/lib/Makefile b/lib/Makefile index 58f74d2dd396..6c37933336a0 100644 --- a/lib/Makefile +++ b/lib/Makefile | |||
| @@ -25,7 +25,7 @@ obj-y += lockref.o | |||
| 25 | obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \ | 25 | obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \ |
| 26 | bust_spinlocks.o kasprintf.o bitmap.o scatterlist.o \ | 26 | bust_spinlocks.o kasprintf.o bitmap.o scatterlist.o \ |
| 27 | gcd.o lcm.o list_sort.o uuid.o flex_array.o iov_iter.o clz_ctz.o \ | 27 | gcd.o lcm.o list_sort.o uuid.o flex_array.o iov_iter.o clz_ctz.o \ |
| 28 | bsearch.o find_last_bit.o find_next_bit.o llist.o memweight.o kfifo.o \ | 28 | bsearch.o find_bit.o llist.o memweight.o kfifo.o \ |
| 29 | percpu-refcount.o percpu_ida.o rhashtable.o reciprocal_div.o | 29 | percpu-refcount.o percpu_ida.o rhashtable.o reciprocal_div.o |
| 30 | obj-y += string_helpers.o | 30 | obj-y += string_helpers.o |
| 31 | obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o | 31 | obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o |
| @@ -106,7 +106,7 @@ obj-$(CONFIG_AUDIT_GENERIC) += audit.o | |||
| 106 | obj-$(CONFIG_AUDIT_COMPAT_GENERIC) += compat_audit.o | 106 | obj-$(CONFIG_AUDIT_COMPAT_GENERIC) += compat_audit.o |
| 107 | 107 | ||
| 108 | obj-$(CONFIG_SWIOTLB) += swiotlb.o | 108 | obj-$(CONFIG_SWIOTLB) += swiotlb.o |
| 109 | obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o | 109 | obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o iommu-common.o |
| 110 | obj-$(CONFIG_FAULT_INJECTION) += fault-inject.o | 110 | obj-$(CONFIG_FAULT_INJECTION) += fault-inject.o |
| 111 | obj-$(CONFIG_NOTIFIER_ERROR_INJECTION) += notifier-error-inject.o | 111 | obj-$(CONFIG_NOTIFIER_ERROR_INJECTION) += notifier-error-inject.o |
| 112 | obj-$(CONFIG_CPU_NOTIFIER_ERROR_INJECT) += cpu-notifier-error-inject.o | 112 | obj-$(CONFIG_CPU_NOTIFIER_ERROR_INJECT) += cpu-notifier-error-inject.o |
diff --git a/lib/bitmap.c b/lib/bitmap.c index d456f4c15a9f..64c0926f5dd8 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c | |||
| @@ -42,36 +42,6 @@ | |||
| 42 | * for the best explanations of this ordering. | 42 | * for the best explanations of this ordering. |
| 43 | */ | 43 | */ |
| 44 | 44 | ||
| 45 | int __bitmap_empty(const unsigned long *bitmap, unsigned int bits) | ||
| 46 | { | ||
| 47 | unsigned int k, lim = bits/BITS_PER_LONG; | ||
| 48 | for (k = 0; k < lim; ++k) | ||
| 49 | if (bitmap[k]) | ||
| 50 | return 0; | ||
| 51 | |||
| 52 | if (bits % BITS_PER_LONG) | ||
| 53 | if (bitmap[k] & BITMAP_LAST_WORD_MASK(bits)) | ||
| 54 | return 0; | ||
| 55 | |||
| 56 | return 1; | ||
| 57 | } | ||
| 58 | EXPORT_SYMBOL(__bitmap_empty); | ||
| 59 | |||
| 60 | int __bitmap_full(const unsigned long *bitmap, unsigned int bits) | ||
| 61 | { | ||
| 62 | unsigned int k, lim = bits/BITS_PER_LONG; | ||
| 63 | for (k = 0; k < lim; ++k) | ||
| 64 | if (~bitmap[k]) | ||
| 65 | return 0; | ||
| 66 | |||
| 67 | if (bits % BITS_PER_LONG) | ||
| 68 | if (~bitmap[k] & BITMAP_LAST_WORD_MASK(bits)) | ||
| 69 | return 0; | ||
| 70 | |||
| 71 | return 1; | ||
| 72 | } | ||
| 73 | EXPORT_SYMBOL(__bitmap_full); | ||
| 74 | |||
| 75 | int __bitmap_equal(const unsigned long *bitmap1, | 45 | int __bitmap_equal(const unsigned long *bitmap1, |
| 76 | const unsigned long *bitmap2, unsigned int bits) | 46 | const unsigned long *bitmap2, unsigned int bits) |
| 77 | { | 47 | { |
diff --git a/lib/cpumask.c b/lib/cpumask.c index b6513a9f2892..830dd5dec40f 100644 --- a/lib/cpumask.c +++ b/lib/cpumask.c | |||
| @@ -5,27 +5,6 @@ | |||
| 5 | #include <linux/export.h> | 5 | #include <linux/export.h> |
| 6 | #include <linux/bootmem.h> | 6 | #include <linux/bootmem.h> |
| 7 | 7 | ||
| 8 | int __first_cpu(const cpumask_t *srcp) | ||
| 9 | { | ||
| 10 | return min_t(int, NR_CPUS, find_first_bit(srcp->bits, NR_CPUS)); | ||
| 11 | } | ||
| 12 | EXPORT_SYMBOL(__first_cpu); | ||
| 13 | |||
| 14 | int __next_cpu(int n, const cpumask_t *srcp) | ||
| 15 | { | ||
| 16 | return min_t(int, NR_CPUS, find_next_bit(srcp->bits, NR_CPUS, n+1)); | ||
| 17 | } | ||
| 18 | EXPORT_SYMBOL(__next_cpu); | ||
| 19 | |||
| 20 | #if NR_CPUS > 64 | ||
| 21 | int __next_cpu_nr(int n, const cpumask_t *srcp) | ||
| 22 | { | ||
| 23 | return min_t(int, nr_cpu_ids, | ||
| 24 | find_next_bit(srcp->bits, nr_cpu_ids, n+1)); | ||
| 25 | } | ||
| 26 | EXPORT_SYMBOL(__next_cpu_nr); | ||
| 27 | #endif | ||
| 28 | |||
| 29 | /** | 8 | /** |
| 30 | * cpumask_next_and - get the next cpu in *src1p & *src2p | 9 | * cpumask_next_and - get the next cpu in *src1p & *src2p |
| 31 | * @n: the cpu prior to the place to search (ie. return will be > @n) | 10 | * @n: the cpu prior to the place to search (ie. return will be > @n) |
| @@ -37,10 +16,11 @@ EXPORT_SYMBOL(__next_cpu_nr); | |||
| 37 | int cpumask_next_and(int n, const struct cpumask *src1p, | 16 | int cpumask_next_and(int n, const struct cpumask *src1p, |
| 38 | const struct cpumask *src2p) | 17 | const struct cpumask *src2p) |
| 39 | { | 18 | { |
| 40 | while ((n = cpumask_next(n, src1p)) < nr_cpu_ids) | 19 | struct cpumask tmp; |
| 41 | if (cpumask_test_cpu(n, src2p)) | 20 | |
| 42 | break; | 21 | if (cpumask_and(&tmp, src1p, src2p)) |
| 43 | return n; | 22 | return cpumask_next(n, &tmp); |
| 23 | return nr_cpu_ids; | ||
| 44 | } | 24 | } |
| 45 | EXPORT_SYMBOL(cpumask_next_and); | 25 | EXPORT_SYMBOL(cpumask_next_and); |
| 46 | 26 | ||
| @@ -89,13 +69,6 @@ bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node) | |||
| 89 | dump_stack(); | 69 | dump_stack(); |
| 90 | } | 70 | } |
| 91 | #endif | 71 | #endif |
| 92 | /* FIXME: Bandaid to save us from old primitives which go to NR_CPUS. */ | ||
| 93 | if (*mask) { | ||
| 94 | unsigned char *ptr = (unsigned char *)cpumask_bits(*mask); | ||
| 95 | unsigned int tail; | ||
| 96 | tail = BITS_TO_LONGS(NR_CPUS - nr_cpumask_bits) * sizeof(long); | ||
| 97 | memset(ptr + cpumask_size() - tail, 0, tail); | ||
| 98 | } | ||
| 99 | 72 | ||
| 100 | return *mask != NULL; | 73 | return *mask != NULL; |
| 101 | } | 74 | } |
diff --git a/lib/devres.c b/lib/devres.c index 0f1dd2e9d2c1..fbe2aac522e6 100644 --- a/lib/devres.c +++ b/lib/devres.c | |||
| @@ -72,6 +72,34 @@ void __iomem *devm_ioremap_nocache(struct device *dev, resource_size_t offset, | |||
| 72 | EXPORT_SYMBOL(devm_ioremap_nocache); | 72 | EXPORT_SYMBOL(devm_ioremap_nocache); |
| 73 | 73 | ||
| 74 | /** | 74 | /** |
| 75 | * devm_ioremap_wc - Managed ioremap_wc() | ||
| 76 | * @dev: Generic device to remap IO address for | ||
| 77 | * @offset: BUS offset to map | ||
| 78 | * @size: Size of map | ||
| 79 | * | ||
| 80 | * Managed ioremap_wc(). Map is automatically unmapped on driver detach. | ||
| 81 | */ | ||
| 82 | void __iomem *devm_ioremap_wc(struct device *dev, resource_size_t offset, | ||
| 83 | resource_size_t size) | ||
| 84 | { | ||
| 85 | void __iomem **ptr, *addr; | ||
| 86 | |||
| 87 | ptr = devres_alloc(devm_ioremap_release, sizeof(*ptr), GFP_KERNEL); | ||
| 88 | if (!ptr) | ||
| 89 | return NULL; | ||
| 90 | |||
| 91 | addr = ioremap_wc(offset, size); | ||
| 92 | if (addr) { | ||
| 93 | *ptr = addr; | ||
| 94 | devres_add(dev, ptr); | ||
| 95 | } else | ||
| 96 | devres_free(ptr); | ||
| 97 | |||
| 98 | return addr; | ||
| 99 | } | ||
| 100 | EXPORT_SYMBOL(devm_ioremap_wc); | ||
| 101 | |||
| 102 | /** | ||
| 75 | * devm_iounmap - Managed iounmap() | 103 | * devm_iounmap - Managed iounmap() |
| 76 | * @dev: Generic device to unmap for | 104 | * @dev: Generic device to unmap for |
| 77 | * @addr: Address to unmap | 105 | * @addr: Address to unmap |
diff --git a/lib/dma-debug.c b/lib/dma-debug.c index 9722bd2dbc9b..ae4b65e17e64 100644 --- a/lib/dma-debug.c +++ b/lib/dma-debug.c | |||
| @@ -361,7 +361,7 @@ static struct dma_debug_entry *bucket_find_contain(struct hash_bucket **bucket, | |||
| 361 | unsigned int range = 0; | 361 | unsigned int range = 0; |
| 362 | 362 | ||
| 363 | while (range <= max_range) { | 363 | while (range <= max_range) { |
| 364 | entry = __hash_bucket_find(*bucket, &index, containing_match); | 364 | entry = __hash_bucket_find(*bucket, ref, containing_match); |
| 365 | 365 | ||
| 366 | if (entry) | 366 | if (entry) |
| 367 | return entry; | 367 | return entry; |
diff --git a/lib/find_bit.c b/lib/find_bit.c new file mode 100644 index 000000000000..18072ea9c20e --- /dev/null +++ b/lib/find_bit.c | |||
| @@ -0,0 +1,193 @@ | |||
| 1 | /* bit search implementation | ||
| 2 | * | ||
| 3 | * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. | ||
| 4 | * Written by David Howells (dhowells@redhat.com) | ||
| 5 | * | ||
| 6 | * Copyright (C) 2008 IBM Corporation | ||
| 7 | * 'find_last_bit' is written by Rusty Russell <rusty@rustcorp.com.au> | ||
| 8 | * (Inspired by David Howell's find_next_bit implementation) | ||
| 9 | * | ||
| 10 | * Rewritten by Yury Norov <yury.norov@gmail.com> to decrease | ||
| 11 | * size and improve performance, 2015. | ||
| 12 | * | ||
| 13 | * This program is free software; you can redistribute it and/or | ||
| 14 | * modify it under the terms of the GNU General Public License | ||
| 15 | * as published by the Free Software Foundation; either version | ||
| 16 | * 2 of the License, or (at your option) any later version. | ||
| 17 | */ | ||
| 18 | |||
| 19 | #include <linux/bitops.h> | ||
| 20 | #include <linux/bitmap.h> | ||
| 21 | #include <linux/export.h> | ||
| 22 | #include <linux/kernel.h> | ||
| 23 | |||
| 24 | #if !defined(find_next_bit) || !defined(find_next_zero_bit) | ||
| 25 | |||
| 26 | /* | ||
| 27 | * This is a common helper function for find_next_bit and | ||
| 28 | * find_next_zero_bit. The difference is the "invert" argument, which | ||
| 29 | * is XORed with each fetched word before searching it for one bits. | ||
| 30 | */ | ||
| 31 | static unsigned long _find_next_bit(const unsigned long *addr, | ||
| 32 | unsigned long nbits, unsigned long start, unsigned long invert) | ||
| 33 | { | ||
| 34 | unsigned long tmp; | ||
| 35 | |||
| 36 | if (!nbits || start >= nbits) | ||
| 37 | return nbits; | ||
| 38 | |||
| 39 | tmp = addr[start / BITS_PER_LONG] ^ invert; | ||
| 40 | |||
| 41 | /* Handle 1st word. */ | ||
| 42 | tmp &= BITMAP_FIRST_WORD_MASK(start); | ||
| 43 | start = round_down(start, BITS_PER_LONG); | ||
| 44 | |||
| 45 | while (!tmp) { | ||
| 46 | start += BITS_PER_LONG; | ||
| 47 | if (start >= nbits) | ||
| 48 | return nbits; | ||
| 49 | |||
| 50 | tmp = addr[start / BITS_PER_LONG] ^ invert; | ||
| 51 | } | ||
| 52 | |||
| 53 | return min(start + __ffs(tmp), nbits); | ||
| 54 | } | ||
| 55 | #endif | ||
| 56 | |||
| 57 | #ifndef find_next_bit | ||
| 58 | /* | ||
| 59 | * Find the next set bit in a memory region. | ||
| 60 | */ | ||
| 61 | unsigned long find_next_bit(const unsigned long *addr, unsigned long size, | ||
| 62 | unsigned long offset) | ||
| 63 | { | ||
| 64 | return _find_next_bit(addr, size, offset, 0UL); | ||
| 65 | } | ||
| 66 | EXPORT_SYMBOL(find_next_bit); | ||
| 67 | #endif | ||
| 68 | |||
| 69 | #ifndef find_next_zero_bit | ||
| 70 | unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size, | ||
| 71 | unsigned long offset) | ||
| 72 | { | ||
| 73 | return _find_next_bit(addr, size, offset, ~0UL); | ||
| 74 | } | ||
| 75 | EXPORT_SYMBOL(find_next_zero_bit); | ||
| 76 | #endif | ||
| 77 | |||
| 78 | #ifndef find_first_bit | ||
| 79 | /* | ||
| 80 | * Find the first set bit in a memory region. | ||
| 81 | */ | ||
| 82 | unsigned long find_first_bit(const unsigned long *addr, unsigned long size) | ||
| 83 | { | ||
| 84 | unsigned long idx; | ||
| 85 | |||
| 86 | for (idx = 0; idx * BITS_PER_LONG < size; idx++) { | ||
| 87 | if (addr[idx]) | ||
| 88 | return min(idx * BITS_PER_LONG + __ffs(addr[idx]), size); | ||
| 89 | } | ||
| 90 | |||
| 91 | return size; | ||
| 92 | } | ||
| 93 | EXPORT_SYMBOL(find_first_bit); | ||
| 94 | #endif | ||
| 95 | |||
| 96 | #ifndef find_first_zero_bit | ||
| 97 | /* | ||
| 98 | * Find the first cleared bit in a memory region. | ||
| 99 | */ | ||
| 100 | unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size) | ||
| 101 | { | ||
| 102 | unsigned long idx; | ||
| 103 | |||
| 104 | for (idx = 0; idx * BITS_PER_LONG < size; idx++) { | ||
| 105 | if (addr[idx] != ~0UL) | ||
| 106 | return min(idx * BITS_PER_LONG + ffz(addr[idx]), size); | ||
| 107 | } | ||
| 108 | |||
| 109 | return size; | ||
| 110 | } | ||
| 111 | EXPORT_SYMBOL(find_first_zero_bit); | ||
| 112 | #endif | ||
| 113 | |||
| 114 | #ifndef find_last_bit | ||
| 115 | unsigned long find_last_bit(const unsigned long *addr, unsigned long size) | ||
| 116 | { | ||
| 117 | if (size) { | ||
| 118 | unsigned long val = BITMAP_LAST_WORD_MASK(size); | ||
| 119 | unsigned long idx = (size-1) / BITS_PER_LONG; | ||
| 120 | |||
| 121 | do { | ||
| 122 | val &= addr[idx]; | ||
| 123 | if (val) | ||
| 124 | return idx * BITS_PER_LONG + __fls(val); | ||
| 125 | |||
| 126 | val = ~0ul; | ||
| 127 | } while (idx--); | ||
| 128 | } | ||
| 129 | return size; | ||
| 130 | } | ||
| 131 | EXPORT_SYMBOL(find_last_bit); | ||
| 132 | #endif | ||
| 133 | |||
| 134 | #ifdef __BIG_ENDIAN | ||
| 135 | |||
| 136 | /* include/linux/byteorder does not support "unsigned long" type */ | ||
| 137 | static inline unsigned long ext2_swab(const unsigned long y) | ||
| 138 | { | ||
| 139 | #if BITS_PER_LONG == 64 | ||
| 140 | return (unsigned long) __swab64((u64) y); | ||
| 141 | #elif BITS_PER_LONG == 32 | ||
| 142 | return (unsigned long) __swab32((u32) y); | ||
| 143 | #else | ||
| 144 | #error BITS_PER_LONG not defined | ||
| 145 | #endif | ||
| 146 | } | ||
| 147 | |||
| 148 | #if !defined(find_next_bit_le) || !defined(find_next_zero_bit_le) | ||
| 149 | static unsigned long _find_next_bit_le(const unsigned long *addr, | ||
| 150 | unsigned long nbits, unsigned long start, unsigned long invert) | ||
| 151 | { | ||
| 152 | unsigned long tmp; | ||
| 153 | |||
| 154 | if (!nbits || start >= nbits) | ||
| 155 | return nbits; | ||
| 156 | |||
| 157 | tmp = addr[start / BITS_PER_LONG] ^ invert; | ||
| 158 | |||
| 159 | /* Handle 1st word. */ | ||
| 160 | tmp &= ext2_swab(BITMAP_FIRST_WORD_MASK(start)); | ||
| 161 | start = round_down(start, BITS_PER_LONG); | ||
| 162 | |||
| 163 | while (!tmp) { | ||
| 164 | start += BITS_PER_LONG; | ||
| 165 | if (start >= nbits) | ||
| 166 | return nbits; | ||
| 167 | |||
| 168 | tmp = addr[start / BITS_PER_LONG] ^ invert; | ||
| 169 | } | ||
| 170 | |||
| 171 | return min(start + __ffs(ext2_swab(tmp)), nbits); | ||
| 172 | } | ||
| 173 | #endif | ||
| 174 | |||
| 175 | #ifndef find_next_zero_bit_le | ||
| 176 | unsigned long find_next_zero_bit_le(const void *addr, unsigned | ||
| 177 | long size, unsigned long offset) | ||
| 178 | { | ||
| 179 | return _find_next_bit_le(addr, size, offset, ~0UL); | ||
| 180 | } | ||
| 181 | EXPORT_SYMBOL(find_next_zero_bit_le); | ||
| 182 | #endif | ||
| 183 | |||
| 184 | #ifndef find_next_bit_le | ||
| 185 | unsigned long find_next_bit_le(const void *addr, unsigned | ||
| 186 | long size, unsigned long offset) | ||
| 187 | { | ||
| 188 | return _find_next_bit_le(addr, size, offset, 0UL); | ||
| 189 | } | ||
| 190 | EXPORT_SYMBOL(find_next_bit_le); | ||
| 191 | #endif | ||
| 192 | |||
| 193 | #endif /* __BIG_ENDIAN */ | ||
diff --git a/lib/find_last_bit.c b/lib/find_last_bit.c deleted file mode 100644 index 91ca09fbf6f9..000000000000 --- a/lib/find_last_bit.c +++ /dev/null | |||
| @@ -1,49 +0,0 @@ | |||
| 1 | /* find_last_bit.c: fallback find next bit implementation | ||
| 2 | * | ||
| 3 | * Copyright (C) 2008 IBM Corporation | ||
| 4 | * Written by Rusty Russell <rusty@rustcorp.com.au> | ||
| 5 | * (Inspired by David Howell's find_next_bit implementation) | ||
| 6 | * | ||
| 7 | * This program is free software; you can redistribute it and/or | ||
| 8 | * modify it under the terms of the GNU General Public License | ||
| 9 | * as published by the Free Software Foundation; either version | ||
| 10 | * 2 of the License, or (at your option) any later version. | ||
| 11 | */ | ||
| 12 | |||
| 13 | #include <linux/bitops.h> | ||
| 14 | #include <linux/export.h> | ||
| 15 | #include <asm/types.h> | ||
| 16 | #include <asm/byteorder.h> | ||
| 17 | |||
| 18 | #ifndef find_last_bit | ||
| 19 | |||
| 20 | unsigned long find_last_bit(const unsigned long *addr, unsigned long size) | ||
| 21 | { | ||
| 22 | unsigned long words; | ||
| 23 | unsigned long tmp; | ||
| 24 | |||
| 25 | /* Start at final word. */ | ||
| 26 | words = size / BITS_PER_LONG; | ||
| 27 | |||
| 28 | /* Partial final word? */ | ||
| 29 | if (size & (BITS_PER_LONG-1)) { | ||
| 30 | tmp = (addr[words] & (~0UL >> (BITS_PER_LONG | ||
| 31 | - (size & (BITS_PER_LONG-1))))); | ||
| 32 | if (tmp) | ||
| 33 | goto found; | ||
| 34 | } | ||
| 35 | |||
| 36 | while (words) { | ||
| 37 | tmp = addr[--words]; | ||
| 38 | if (tmp) { | ||
| 39 | found: | ||
| 40 | return words * BITS_PER_LONG + __fls(tmp); | ||
| 41 | } | ||
| 42 | } | ||
| 43 | |||
| 44 | /* Not found */ | ||
| 45 | return size; | ||
| 46 | } | ||
| 47 | EXPORT_SYMBOL(find_last_bit); | ||
| 48 | |||
| 49 | #endif | ||
diff --git a/lib/find_next_bit.c b/lib/find_next_bit.c deleted file mode 100644 index 0cbfc0b4398f..000000000000 --- a/lib/find_next_bit.c +++ /dev/null | |||
| @@ -1,285 +0,0 @@ | |||
| 1 | /* find_next_bit.c: fallback find next bit implementation | ||
| 2 | * | ||
| 3 | * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. | ||
| 4 | * Written by David Howells (dhowells@redhat.com) | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or | ||
| 7 | * modify it under the terms of the GNU General Public License | ||
| 8 | * as published by the Free Software Foundation; either version | ||
| 9 | * 2 of the License, or (at your option) any later version. | ||
| 10 | */ | ||
| 11 | |||
| 12 | #include <linux/bitops.h> | ||
| 13 | #include <linux/export.h> | ||
| 14 | #include <asm/types.h> | ||
| 15 | #include <asm/byteorder.h> | ||
| 16 | |||
| 17 | #define BITOP_WORD(nr) ((nr) / BITS_PER_LONG) | ||
| 18 | |||
| 19 | #ifndef find_next_bit | ||
| 20 | /* | ||
| 21 | * Find the next set bit in a memory region. | ||
| 22 | */ | ||
| 23 | unsigned long find_next_bit(const unsigned long *addr, unsigned long size, | ||
| 24 | unsigned long offset) | ||
| 25 | { | ||
| 26 | const unsigned long *p = addr + BITOP_WORD(offset); | ||
| 27 | unsigned long result = offset & ~(BITS_PER_LONG-1); | ||
| 28 | unsigned long tmp; | ||
| 29 | |||
| 30 | if (offset >= size) | ||
| 31 | return size; | ||
| 32 | size -= result; | ||
| 33 | offset %= BITS_PER_LONG; | ||
| 34 | if (offset) { | ||
| 35 | tmp = *(p++); | ||
| 36 | tmp &= (~0UL << offset); | ||
| 37 | if (size < BITS_PER_LONG) | ||
| 38 | goto found_first; | ||
| 39 | if (tmp) | ||
| 40 | goto found_middle; | ||
| 41 | size -= BITS_PER_LONG; | ||
| 42 | result += BITS_PER_LONG; | ||
| 43 | } | ||
| 44 | while (size & ~(BITS_PER_LONG-1)) { | ||
| 45 | if ((tmp = *(p++))) | ||
| 46 | goto found_middle; | ||
| 47 | result += BITS_PER_LONG; | ||
| 48 | size -= BITS_PER_LONG; | ||
| 49 | } | ||
| 50 | if (!size) | ||
| 51 | return result; | ||
| 52 | tmp = *p; | ||
| 53 | |||
| 54 | found_first: | ||
| 55 | tmp &= (~0UL >> (BITS_PER_LONG - size)); | ||
| 56 | if (tmp == 0UL) /* Are any bits set? */ | ||
| 57 | return result + size; /* Nope. */ | ||
| 58 | found_middle: | ||
| 59 | return result + __ffs(tmp); | ||
| 60 | } | ||
| 61 | EXPORT_SYMBOL(find_next_bit); | ||
| 62 | #endif | ||
| 63 | |||
| 64 | #ifndef find_next_zero_bit | ||
| 65 | /* | ||
| 66 | * This implementation of find_{first,next}_zero_bit was stolen from | ||
| 67 | * Linus' asm-alpha/bitops.h. | ||
| 68 | */ | ||
| 69 | unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size, | ||
| 70 | unsigned long offset) | ||
| 71 | { | ||
| 72 | const unsigned long *p = addr + BITOP_WORD(offset); | ||
| 73 | unsigned long result = offset & ~(BITS_PER_LONG-1); | ||
| 74 | unsigned long tmp; | ||
| 75 | |||
| 76 | if (offset >= size) | ||
| 77 | return size; | ||
| 78 | size -= result; | ||
| 79 | offset %= BITS_PER_LONG; | ||
| 80 | if (offset) { | ||
| 81 | tmp = *(p++); | ||
| 82 | tmp |= ~0UL >> (BITS_PER_LONG - offset); | ||
| 83 | if (size < BITS_PER_LONG) | ||
| 84 | goto found_first; | ||
| 85 | if (~tmp) | ||
| 86 | goto found_middle; | ||
| 87 | size -= BITS_PER_LONG; | ||
| 88 | result += BITS_PER_LONG; | ||
| 89 | } | ||
| 90 | while (size & ~(BITS_PER_LONG-1)) { | ||
| 91 | if (~(tmp = *(p++))) | ||
| 92 | goto found_middle; | ||
| 93 | result += BITS_PER_LONG; | ||
| 94 | size -= BITS_PER_LONG; | ||
| 95 | } | ||
| 96 | if (!size) | ||
| 97 | return result; | ||
| 98 | tmp = *p; | ||
| 99 | |||
| 100 | found_first: | ||
| 101 | tmp |= ~0UL << size; | ||
| 102 | if (tmp == ~0UL) /* Are any bits zero? */ | ||
| 103 | return result + size; /* Nope. */ | ||
| 104 | found_middle: | ||
| 105 | return result + ffz(tmp); | ||
| 106 | } | ||
| 107 | EXPORT_SYMBOL(find_next_zero_bit); | ||
| 108 | #endif | ||
| 109 | |||
| 110 | #ifndef find_first_bit | ||
| 111 | /* | ||
| 112 | * Find the first set bit in a memory region. | ||
| 113 | */ | ||
| 114 | unsigned long find_first_bit(const unsigned long *addr, unsigned long size) | ||
| 115 | { | ||
| 116 | const unsigned long *p = addr; | ||
| 117 | unsigned long result = 0; | ||
| 118 | unsigned long tmp; | ||
| 119 | |||
| 120 | while (size & ~(BITS_PER_LONG-1)) { | ||
| 121 | if ((tmp = *(p++))) | ||
| 122 | goto found; | ||
| 123 | result += BITS_PER_LONG; | ||
| 124 | size -= BITS_PER_LONG; | ||
| 125 | } | ||
| 126 | if (!size) | ||
| 127 | return result; | ||
| 128 | |||
| 129 | tmp = (*p) & (~0UL >> (BITS_PER_LONG - size)); | ||
| 130 | if (tmp == 0UL) /* Are any bits set? */ | ||
| 131 | return result + size; /* Nope. */ | ||
| 132 | found: | ||
| 133 | return result + __ffs(tmp); | ||
| 134 | } | ||
| 135 | EXPORT_SYMBOL(find_first_bit); | ||
| 136 | #endif | ||
| 137 | |||
| 138 | #ifndef find_first_zero_bit | ||
| 139 | /* | ||
| 140 | * Find the first cleared bit in a memory region. | ||
| 141 | */ | ||
| 142 | unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size) | ||
| 143 | { | ||
| 144 | const unsigned long *p = addr; | ||
| 145 | unsigned long result = 0; | ||
| 146 | unsigned long tmp; | ||
| 147 | |||
| 148 | while (size & ~(BITS_PER_LONG-1)) { | ||
| 149 | if (~(tmp = *(p++))) | ||
| 150 | goto found; | ||
| 151 | result += BITS_PER_LONG; | ||
| 152 | size -= BITS_PER_LONG; | ||
| 153 | } | ||
| 154 | if (!size) | ||
| 155 | return result; | ||
| 156 | |||
| 157 | tmp = (*p) | (~0UL << size); | ||
| 158 | if (tmp == ~0UL) /* Are any bits zero? */ | ||
| 159 | return result + size; /* Nope. */ | ||
| 160 | found: | ||
| 161 | return result + ffz(tmp); | ||
| 162 | } | ||
| 163 | EXPORT_SYMBOL(find_first_zero_bit); | ||
| 164 | #endif | ||
| 165 | |||
| 166 | #ifdef __BIG_ENDIAN | ||
| 167 | |||
| 168 | /* include/linux/byteorder does not support "unsigned long" type */ | ||
| 169 | static inline unsigned long ext2_swabp(const unsigned long * x) | ||
| 170 | { | ||
| 171 | #if BITS_PER_LONG == 64 | ||
| 172 | return (unsigned long) __swab64p((u64 *) x); | ||
| 173 | #elif BITS_PER_LONG == 32 | ||
| 174 | return (unsigned long) __swab32p((u32 *) x); | ||
| 175 | #else | ||
| 176 | #error BITS_PER_LONG not defined | ||
| 177 | #endif | ||
| 178 | } | ||
| 179 | |||
| 180 | /* include/linux/byteorder doesn't support "unsigned long" type */ | ||
| 181 | static inline unsigned long ext2_swab(const unsigned long y) | ||
| 182 | { | ||
| 183 | #if BITS_PER_LONG == 64 | ||
| 184 | return (unsigned long) __swab64((u64) y); | ||
| 185 | #elif BITS_PER_LONG == 32 | ||
| 186 | return (unsigned long) __swab32((u32) y); | ||
| 187 | #else | ||
| 188 | #error BITS_PER_LONG not defined | ||
| 189 | #endif | ||
| 190 | } | ||
| 191 | |||
| 192 | #ifndef find_next_zero_bit_le | ||
| 193 | unsigned long find_next_zero_bit_le(const void *addr, unsigned | ||
| 194 | long size, unsigned long offset) | ||
| 195 | { | ||
| 196 | const unsigned long *p = addr; | ||
| 197 | unsigned long result = offset & ~(BITS_PER_LONG - 1); | ||
| 198 | unsigned long tmp; | ||
| 199 | |||
| 200 | if (offset >= size) | ||
| 201 | return size; | ||
| 202 | p += BITOP_WORD(offset); | ||
| 203 | size -= result; | ||
| 204 | offset &= (BITS_PER_LONG - 1UL); | ||
| 205 | if (offset) { | ||
| 206 | tmp = ext2_swabp(p++); | ||
| 207 | tmp |= (~0UL >> (BITS_PER_LONG - offset)); | ||
| 208 | if (size < BITS_PER_LONG) | ||
| 209 | goto found_first; | ||
| 210 | if (~tmp) | ||
| 211 | goto found_middle; | ||
| 212 | size -= BITS_PER_LONG; | ||
| 213 | result += BITS_PER_LONG; | ||
| 214 | } | ||
| 215 | |||
| 216 | while (size & ~(BITS_PER_LONG - 1)) { | ||
| 217 | if (~(tmp = *(p++))) | ||
| 218 | goto found_middle_swap; | ||
| 219 | result += BITS_PER_LONG; | ||
| 220 | size -= BITS_PER_LONG; | ||
| 221 | } | ||
| 222 | if (!size) | ||
| 223 | return result; | ||
| 224 | tmp = ext2_swabp(p); | ||
| 225 | found_first: | ||
| 226 | tmp |= ~0UL << size; | ||
| 227 | if (tmp == ~0UL) /* Are any bits zero? */ | ||
| 228 | return result + size; /* Nope. Skip ffz */ | ||
| 229 | found_middle: | ||
| 230 | return result + ffz(tmp); | ||
| 231 | |||
| 232 | found_middle_swap: | ||
| 233 | return result + ffz(ext2_swab(tmp)); | ||
| 234 | } | ||
| 235 | EXPORT_SYMBOL(find_next_zero_bit_le); | ||
| 236 | #endif | ||
| 237 | |||
| 238 | #ifndef find_next_bit_le | ||
| 239 | unsigned long find_next_bit_le(const void *addr, unsigned | ||
| 240 | long size, unsigned long offset) | ||
| 241 | { | ||
| 242 | const unsigned long *p = addr; | ||
| 243 | unsigned long result = offset & ~(BITS_PER_LONG - 1); | ||
| 244 | unsigned long tmp; | ||
| 245 | |||
| 246 | if (offset >= size) | ||
| 247 | return size; | ||
| 248 | p += BITOP_WORD(offset); | ||
| 249 | size -= result; | ||
| 250 | offset &= (BITS_PER_LONG - 1UL); | ||
| 251 | if (offset) { | ||
| 252 | tmp = ext2_swabp(p++); | ||
| 253 | tmp &= (~0UL << offset); | ||
| 254 | if (size < BITS_PER_LONG) | ||
| 255 | goto found_first; | ||
| 256 | if (tmp) | ||
| 257 | goto found_middle; | ||
| 258 | size -= BITS_PER_LONG; | ||
| 259 | result += BITS_PER_LONG; | ||
| 260 | } | ||
| 261 | |||
| 262 | while (size & ~(BITS_PER_LONG - 1)) { | ||
| 263 | tmp = *(p++); | ||
| 264 | if (tmp) | ||
| 265 | goto found_middle_swap; | ||
| 266 | result += BITS_PER_LONG; | ||
| 267 | size -= BITS_PER_LONG; | ||
| 268 | } | ||
| 269 | if (!size) | ||
| 270 | return result; | ||
| 271 | tmp = ext2_swabp(p); | ||
| 272 | found_first: | ||
| 273 | tmp &= (~0UL >> (BITS_PER_LONG - size)); | ||
| 274 | if (tmp == 0UL) /* Are any bits set? */ | ||
| 275 | return result + size; /* Nope. */ | ||
| 276 | found_middle: | ||
| 277 | return result + __ffs(tmp); | ||
| 278 | |||
| 279 | found_middle_swap: | ||
| 280 | return result + __ffs(ext2_swab(tmp)); | ||
| 281 | } | ||
| 282 | EXPORT_SYMBOL(find_next_bit_le); | ||
| 283 | #endif | ||
| 284 | |||
| 285 | #endif /* __BIG_ENDIAN */ | ||
diff --git a/lib/iommu-common.c b/lib/iommu-common.c new file mode 100644 index 000000000000..df30632f0bef --- /dev/null +++ b/lib/iommu-common.c | |||
| @@ -0,0 +1,270 @@ | |||
| 1 | /* | ||
| 2 | * IOMMU mmap management and range allocation functions. | ||
| 3 | * Based almost entirely upon the powerpc iommu allocator. | ||
| 4 | */ | ||
| 5 | |||
| 6 | #include <linux/export.h> | ||
| 7 | #include <linux/bitmap.h> | ||
| 8 | #include <linux/bug.h> | ||
| 9 | #include <linux/iommu-helper.h> | ||
| 10 | #include <linux/iommu-common.h> | ||
| 11 | #include <linux/dma-mapping.h> | ||
| 12 | #include <linux/hash.h> | ||
| 13 | |||
| 14 | #ifndef DMA_ERROR_CODE | ||
| 15 | #define DMA_ERROR_CODE (~(dma_addr_t)0x0) | ||
| 16 | #endif | ||
| 17 | |||
| 18 | static unsigned long iommu_large_alloc = 15; | ||
| 19 | |||
| 20 | static DEFINE_PER_CPU(unsigned int, iommu_hash_common); | ||
| 21 | |||
| 22 | static inline bool need_flush(struct iommu_map_table *iommu) | ||
| 23 | { | ||
| 24 | return (iommu->lazy_flush != NULL && | ||
| 25 | (iommu->flags & IOMMU_NEED_FLUSH) != 0); | ||
| 26 | } | ||
| 27 | |||
| 28 | static inline void set_flush(struct iommu_map_table *iommu) | ||
| 29 | { | ||
| 30 | iommu->flags |= IOMMU_NEED_FLUSH; | ||
| 31 | } | ||
| 32 | |||
| 33 | static inline void clear_flush(struct iommu_map_table *iommu) | ||
| 34 | { | ||
| 35 | iommu->flags &= ~IOMMU_NEED_FLUSH; | ||
| 36 | } | ||
| 37 | |||
| 38 | static void setup_iommu_pool_hash(void) | ||
| 39 | { | ||
| 40 | unsigned int i; | ||
| 41 | static bool do_once; | ||
| 42 | |||
| 43 | if (do_once) | ||
| 44 | return; | ||
| 45 | do_once = true; | ||
| 46 | for_each_possible_cpu(i) | ||
| 47 | per_cpu(iommu_hash_common, i) = hash_32(i, IOMMU_POOL_HASHBITS); | ||
| 48 | } | ||
| 49 | |||
| 50 | /* | ||
| 51 | * Initialize iommu_pool entries for the iommu_map_table. `num_entries' | ||
| 52 | * is the number of table entries. If `large_pool' is set to true, | ||
| 53 | * the top 1/4 of the table will be set aside for pool allocations | ||
| 54 | * of more than iommu_large_alloc pages. | ||
| 55 | */ | ||
| 56 | void iommu_tbl_pool_init(struct iommu_map_table *iommu, | ||
| 57 | unsigned long num_entries, | ||
| 58 | u32 table_shift, | ||
| 59 | void (*lazy_flush)(struct iommu_map_table *), | ||
| 60 | bool large_pool, u32 npools, | ||
| 61 | bool skip_span_boundary_check) | ||
| 62 | { | ||
| 63 | unsigned int start, i; | ||
| 64 | struct iommu_pool *p = &(iommu->large_pool); | ||
| 65 | |||
| 66 | setup_iommu_pool_hash(); | ||
| 67 | if (npools == 0) | ||
| 68 | iommu->nr_pools = IOMMU_NR_POOLS; | ||
| 69 | else | ||
| 70 | iommu->nr_pools = npools; | ||
| 71 | BUG_ON(npools > IOMMU_NR_POOLS); | ||
| 72 | |||
| 73 | iommu->table_shift = table_shift; | ||
| 74 | iommu->lazy_flush = lazy_flush; | ||
| 75 | start = 0; | ||
| 76 | if (skip_span_boundary_check) | ||
| 77 | iommu->flags |= IOMMU_NO_SPAN_BOUND; | ||
| 78 | if (large_pool) | ||
| 79 | iommu->flags |= IOMMU_HAS_LARGE_POOL; | ||
| 80 | |||
| 81 | if (!large_pool) | ||
| 82 | iommu->poolsize = num_entries/iommu->nr_pools; | ||
| 83 | else | ||
| 84 | iommu->poolsize = (num_entries * 3 / 4)/iommu->nr_pools; | ||
| 85 | for (i = 0; i < iommu->nr_pools; i++) { | ||
| 86 | spin_lock_init(&(iommu->pools[i].lock)); | ||
| 87 | iommu->pools[i].start = start; | ||
| 88 | iommu->pools[i].hint = start; | ||
| 89 | start += iommu->poolsize; /* start for next pool */ | ||
| 90 | iommu->pools[i].end = start - 1; | ||
| 91 | } | ||
| 92 | if (!large_pool) | ||
| 93 | return; | ||
| 94 | /* initialize large_pool */ | ||
| 95 | spin_lock_init(&(p->lock)); | ||
| 96 | p->start = start; | ||
| 97 | p->hint = p->start; | ||
| 98 | p->end = num_entries; | ||
| 99 | } | ||
| 100 | EXPORT_SYMBOL(iommu_tbl_pool_init); | ||
| 101 | |||
| 102 | unsigned long iommu_tbl_range_alloc(struct device *dev, | ||
| 103 | struct iommu_map_table *iommu, | ||
| 104 | unsigned long npages, | ||
| 105 | unsigned long *handle, | ||
| 106 | unsigned long mask, | ||
| 107 | unsigned int align_order) | ||
| 108 | { | ||
| 109 | unsigned int pool_hash = __this_cpu_read(iommu_hash_common); | ||
| 110 | unsigned long n, end, start, limit, boundary_size; | ||
| 111 | struct iommu_pool *pool; | ||
| 112 | int pass = 0; | ||
| 113 | unsigned int pool_nr; | ||
| 114 | unsigned int npools = iommu->nr_pools; | ||
| 115 | unsigned long flags; | ||
| 116 | bool large_pool = ((iommu->flags & IOMMU_HAS_LARGE_POOL) != 0); | ||
| 117 | bool largealloc = (large_pool && npages > iommu_large_alloc); | ||
| 118 | unsigned long shift; | ||
| 119 | unsigned long align_mask = 0; | ||
| 120 | |||
| 121 | if (align_order > 0) | ||
| 122 | align_mask = 0xffffffffffffffffl >> (64 - align_order); | ||
| 123 | |||
| 124 | /* Sanity check */ | ||
| 125 | if (unlikely(npages == 0)) { | ||
| 126 | WARN_ON_ONCE(1); | ||
| 127 | return DMA_ERROR_CODE; | ||
| 128 | } | ||
| 129 | |||
| 130 | if (largealloc) { | ||
| 131 | pool = &(iommu->large_pool); | ||
| 132 | pool_nr = 0; /* to keep compiler happy */ | ||
| 133 | } else { | ||
| 134 | /* pick out pool_nr */ | ||
| 135 | pool_nr = pool_hash & (npools - 1); | ||
| 136 | pool = &(iommu->pools[pool_nr]); | ||
| 137 | } | ||
| 138 | spin_lock_irqsave(&pool->lock, flags); | ||
| 139 | |||
| 140 | again: | ||
| 141 | if (pass == 0 && handle && *handle && | ||
| 142 | (*handle >= pool->start) && (*handle < pool->end)) | ||
| 143 | start = *handle; | ||
| 144 | else | ||
| 145 | start = pool->hint; | ||
| 146 | |||
| 147 | limit = pool->end; | ||
| 148 | |||
| 149 | /* The case below can happen if we have a small segment appended | ||
| 150 | * to a large, or when the previous alloc was at the very end of | ||
| 151 | * the available space. If so, go back to the beginning. If a | ||
| 152 | * flush is needed, it will get done based on the return value | ||
| 153 | * from iommu_area_alloc() below. | ||
| 154 | */ | ||
| 155 | if (start >= limit) | ||
| 156 | start = pool->start; | ||
| 157 | shift = iommu->table_map_base >> iommu->table_shift; | ||
| 158 | if (limit + shift > mask) { | ||
| 159 | limit = mask - shift + 1; | ||
| 160 | /* If we're constrained on address range, first try | ||
| 161 | * at the masked hint to avoid O(n) search complexity, | ||
| 162 | * but on second pass, start at 0 in pool 0. | ||
| 163 | */ | ||
| 164 | if ((start & mask) >= limit || pass > 0) { | ||
| 165 | spin_unlock(&(pool->lock)); | ||
| 166 | pool = &(iommu->pools[0]); | ||
| 167 | spin_lock(&(pool->lock)); | ||
| 168 | start = pool->start; | ||
| 169 | } else { | ||
| 170 | start &= mask; | ||
| 171 | } | ||
| 172 | } | ||
| 173 | |||
| 174 | if (dev) | ||
| 175 | boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, | ||
| 176 | 1 << iommu->table_shift); | ||
| 177 | else | ||
| 178 | boundary_size = ALIGN(1ULL << 32, 1 << iommu->table_shift); | ||
| 179 | |||
| 180 | boundary_size = boundary_size >> iommu->table_shift; | ||
| 181 | /* | ||
| 182 | * if the skip_span_boundary_check had been set during init, we set | ||
| 183 | * things up so that iommu_is_span_boundary() merely checks if the | ||
| 184 | * (index + npages) < num_tsb_entries | ||
| 185 | */ | ||
| 186 | if ((iommu->flags & IOMMU_NO_SPAN_BOUND) != 0) { | ||
| 187 | shift = 0; | ||
| 188 | boundary_size = iommu->poolsize * iommu->nr_pools; | ||
| 189 | } | ||
| 190 | n = iommu_area_alloc(iommu->map, limit, start, npages, shift, | ||
| 191 | boundary_size, align_mask); | ||
| 192 | if (n == -1) { | ||
| 193 | if (likely(pass == 0)) { | ||
| 194 | /* First failure, rescan from the beginning. */ | ||
| 195 | pool->hint = pool->start; | ||
| 196 | set_flush(iommu); | ||
| 197 | pass++; | ||
| 198 | goto again; | ||
| 199 | } else if (!largealloc && pass <= iommu->nr_pools) { | ||
| 200 | spin_unlock(&(pool->lock)); | ||
| 201 | pool_nr = (pool_nr + 1) & (iommu->nr_pools - 1); | ||
| 202 | pool = &(iommu->pools[pool_nr]); | ||
| 203 | spin_lock(&(pool->lock)); | ||
| 204 | pool->hint = pool->start; | ||
| 205 | set_flush(iommu); | ||
| 206 | pass++; | ||
| 207 | goto again; | ||
| 208 | } else { | ||
| 209 | /* give up */ | ||
| 210 | n = DMA_ERROR_CODE; | ||
| 211 | goto bail; | ||
| 212 | } | ||
| 213 | } | ||
| 214 | if (n < pool->hint || need_flush(iommu)) { | ||
| 215 | clear_flush(iommu); | ||
| 216 | iommu->lazy_flush(iommu); | ||
| 217 | } | ||
| 218 | |||
| 219 | end = n + npages; | ||
| 220 | pool->hint = end; | ||
| 221 | |||
| 222 | /* Update handle for SG allocations */ | ||
| 223 | if (handle) | ||
| 224 | *handle = end; | ||
| 225 | bail: | ||
| 226 | spin_unlock_irqrestore(&(pool->lock), flags); | ||
| 227 | |||
| 228 | return n; | ||
| 229 | } | ||
| 230 | EXPORT_SYMBOL(iommu_tbl_range_alloc); | ||
| 231 | |||
| 232 | static struct iommu_pool *get_pool(struct iommu_map_table *tbl, | ||
| 233 | unsigned long entry) | ||
| 234 | { | ||
| 235 | struct iommu_pool *p; | ||
| 236 | unsigned long largepool_start = tbl->large_pool.start; | ||
| 237 | bool large_pool = ((tbl->flags & IOMMU_HAS_LARGE_POOL) != 0); | ||
| 238 | |||
| 239 | /* The large pool is the last pool at the top of the table */ | ||
| 240 | if (large_pool && entry >= largepool_start) { | ||
| 241 | p = &tbl->large_pool; | ||
| 242 | } else { | ||
| 243 | unsigned int pool_nr = entry / tbl->poolsize; | ||
| 244 | |||
| 245 | BUG_ON(pool_nr >= tbl->nr_pools); | ||
| 246 | p = &tbl->pools[pool_nr]; | ||
| 247 | } | ||
| 248 | return p; | ||
| 249 | } | ||
| 250 | |||
| 251 | /* Caller supplies the index of the entry into the iommu map table | ||
| 252 | * itself when the mapping from dma_addr to the entry is not the | ||
| 253 | * default addr->entry mapping below. | ||
| 254 | */ | ||
| 255 | void iommu_tbl_range_free(struct iommu_map_table *iommu, u64 dma_addr, | ||
| 256 | unsigned long npages, unsigned long entry) | ||
| 257 | { | ||
| 258 | struct iommu_pool *pool; | ||
| 259 | unsigned long flags; | ||
| 260 | unsigned long shift = iommu->table_shift; | ||
| 261 | |||
| 262 | if (entry == DMA_ERROR_CODE) /* use default addr->entry mapping */ | ||
| 263 | entry = (dma_addr - iommu->table_map_base) >> shift; | ||
| 264 | pool = get_pool(iommu, entry); | ||
| 265 | |||
| 266 | spin_lock_irqsave(&(pool->lock), flags); | ||
| 267 | bitmap_clear(iommu->map, entry, npages); | ||
| 268 | spin_unlock_irqrestore(&(pool->lock), flags); | ||
| 269 | } | ||
| 270 | EXPORT_SYMBOL(iommu_tbl_range_free); | ||
diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c index dbef2314901e..975c6e0434bd 100644 --- a/lib/raid6/algos.c +++ b/lib/raid6/algos.c | |||
| @@ -131,11 +131,12 @@ static inline const struct raid6_recov_calls *raid6_choose_recov(void) | |||
| 131 | static inline const struct raid6_calls *raid6_choose_gen( | 131 | static inline const struct raid6_calls *raid6_choose_gen( |
| 132 | void *(*const dptrs)[(65536/PAGE_SIZE)+2], const int disks) | 132 | void *(*const dptrs)[(65536/PAGE_SIZE)+2], const int disks) |
| 133 | { | 133 | { |
| 134 | unsigned long perf, bestperf, j0, j1; | 134 | unsigned long perf, bestgenperf, bestxorperf, j0, j1; |
| 135 | int start = (disks>>1)-1, stop = disks-3; /* work on the second half of the disks */ | ||
| 135 | const struct raid6_calls *const *algo; | 136 | const struct raid6_calls *const *algo; |
| 136 | const struct raid6_calls *best; | 137 | const struct raid6_calls *best; |
| 137 | 138 | ||
| 138 | for (bestperf = 0, best = NULL, algo = raid6_algos; *algo; algo++) { | 139 | for (bestgenperf = 0, bestxorperf = 0, best = NULL, algo = raid6_algos; *algo; algo++) { |
| 139 | if (!best || (*algo)->prefer >= best->prefer) { | 140 | if (!best || (*algo)->prefer >= best->prefer) { |
| 140 | if ((*algo)->valid && !(*algo)->valid()) | 141 | if ((*algo)->valid && !(*algo)->valid()) |
| 141 | continue; | 142 | continue; |
| @@ -153,19 +154,45 @@ static inline const struct raid6_calls *raid6_choose_gen( | |||
| 153 | } | 154 | } |
| 154 | preempt_enable(); | 155 | preempt_enable(); |
| 155 | 156 | ||
| 156 | if (perf > bestperf) { | 157 | if (perf > bestgenperf) { |
| 157 | bestperf = perf; | 158 | bestgenperf = perf; |
| 158 | best = *algo; | 159 | best = *algo; |
| 159 | } | 160 | } |
| 160 | pr_info("raid6: %-8s %5ld MB/s\n", (*algo)->name, | 161 | pr_info("raid6: %-8s gen() %5ld MB/s\n", (*algo)->name, |
| 161 | (perf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2)); | 162 | (perf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2)); |
| 163 | |||
| 164 | if (!(*algo)->xor_syndrome) | ||
| 165 | continue; | ||
| 166 | |||
| 167 | perf = 0; | ||
| 168 | |||
| 169 | preempt_disable(); | ||
| 170 | j0 = jiffies; | ||
| 171 | while ((j1 = jiffies) == j0) | ||
| 172 | cpu_relax(); | ||
| 173 | while (time_before(jiffies, | ||
| 174 | j1 + (1<<RAID6_TIME_JIFFIES_LG2))) { | ||
| 175 | (*algo)->xor_syndrome(disks, start, stop, | ||
| 176 | PAGE_SIZE, *dptrs); | ||
| 177 | perf++; | ||
| 178 | } | ||
| 179 | preempt_enable(); | ||
| 180 | |||
| 181 | if (best == *algo) | ||
| 182 | bestxorperf = perf; | ||
| 183 | |||
| 184 | pr_info("raid6: %-8s xor() %5ld MB/s\n", (*algo)->name, | ||
| 185 | (perf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2+1)); | ||
| 162 | } | 186 | } |
| 163 | } | 187 | } |
| 164 | 188 | ||
| 165 | if (best) { | 189 | if (best) { |
| 166 | pr_info("raid6: using algorithm %s (%ld MB/s)\n", | 190 | pr_info("raid6: using algorithm %s gen() %ld MB/s\n", |
| 167 | best->name, | 191 | best->name, |
| 168 | (bestperf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2)); | 192 | (bestgenperf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2)); |
| 193 | if (best->xor_syndrome) | ||
| 194 | pr_info("raid6: .... xor() %ld MB/s, rmw enabled\n", | ||
| 195 | (bestxorperf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2+1)); | ||
| 169 | raid6_call = *best; | 196 | raid6_call = *best; |
| 170 | } else | 197 | } else |
| 171 | pr_err("raid6: Yikes! No algorithm found!\n"); | 198 | pr_err("raid6: Yikes! No algorithm found!\n"); |
diff --git a/lib/raid6/altivec.uc b/lib/raid6/altivec.uc index 7cc12b532e95..bec27fce7501 100644 --- a/lib/raid6/altivec.uc +++ b/lib/raid6/altivec.uc | |||
| @@ -119,6 +119,7 @@ int raid6_have_altivec(void) | |||
| 119 | 119 | ||
| 120 | const struct raid6_calls raid6_altivec$# = { | 120 | const struct raid6_calls raid6_altivec$# = { |
| 121 | raid6_altivec$#_gen_syndrome, | 121 | raid6_altivec$#_gen_syndrome, |
| 122 | NULL, /* XOR not yet implemented */ | ||
| 122 | raid6_have_altivec, | 123 | raid6_have_altivec, |
| 123 | "altivecx$#", | 124 | "altivecx$#", |
| 124 | 0 | 125 | 0 |
diff --git a/lib/raid6/avx2.c b/lib/raid6/avx2.c index bc3b1dd436eb..76734004358d 100644 --- a/lib/raid6/avx2.c +++ b/lib/raid6/avx2.c | |||
| @@ -89,6 +89,7 @@ static void raid6_avx21_gen_syndrome(int disks, size_t bytes, void **ptrs) | |||
| 89 | 89 | ||
| 90 | const struct raid6_calls raid6_avx2x1 = { | 90 | const struct raid6_calls raid6_avx2x1 = { |
| 91 | raid6_avx21_gen_syndrome, | 91 | raid6_avx21_gen_syndrome, |
| 92 | NULL, /* XOR not yet implemented */ | ||
| 92 | raid6_have_avx2, | 93 | raid6_have_avx2, |
| 93 | "avx2x1", | 94 | "avx2x1", |
| 94 | 1 /* Has cache hints */ | 95 | 1 /* Has cache hints */ |
| @@ -150,6 +151,7 @@ static void raid6_avx22_gen_syndrome(int disks, size_t bytes, void **ptrs) | |||
| 150 | 151 | ||
| 151 | const struct raid6_calls raid6_avx2x2 = { | 152 | const struct raid6_calls raid6_avx2x2 = { |
| 152 | raid6_avx22_gen_syndrome, | 153 | raid6_avx22_gen_syndrome, |
| 154 | NULL, /* XOR not yet implemented */ | ||
| 153 | raid6_have_avx2, | 155 | raid6_have_avx2, |
| 154 | "avx2x2", | 156 | "avx2x2", |
| 155 | 1 /* Has cache hints */ | 157 | 1 /* Has cache hints */ |
| @@ -242,6 +244,7 @@ static void raid6_avx24_gen_syndrome(int disks, size_t bytes, void **ptrs) | |||
| 242 | 244 | ||
| 243 | const struct raid6_calls raid6_avx2x4 = { | 245 | const struct raid6_calls raid6_avx2x4 = { |
| 244 | raid6_avx24_gen_syndrome, | 246 | raid6_avx24_gen_syndrome, |
| 247 | NULL, /* XOR not yet implemented */ | ||
| 245 | raid6_have_avx2, | 248 | raid6_have_avx2, |
| 246 | "avx2x4", | 249 | "avx2x4", |
| 247 | 1 /* Has cache hints */ | 250 | 1 /* Has cache hints */ |
diff --git a/lib/raid6/int.uc b/lib/raid6/int.uc index 5b50f8dfc5d2..558aeac9342a 100644 --- a/lib/raid6/int.uc +++ b/lib/raid6/int.uc | |||
| @@ -107,9 +107,48 @@ static void raid6_int$#_gen_syndrome(int disks, size_t bytes, void **ptrs) | |||
| 107 | } | 107 | } |
| 108 | } | 108 | } |
| 109 | 109 | ||
| 110 | static void raid6_int$#_xor_syndrome(int disks, int start, int stop, | ||
| 111 | size_t bytes, void **ptrs) | ||
| 112 | { | ||
| 113 | u8 **dptr = (u8 **)ptrs; | ||
| 114 | u8 *p, *q; | ||
| 115 | int d, z, z0; | ||
| 116 | |||
| 117 | unative_t wd$$, wq$$, wp$$, w1$$, w2$$; | ||
| 118 | |||
| 119 | z0 = stop; /* P/Q right side optimization */ | ||
| 120 | p = dptr[disks-2]; /* XOR parity */ | ||
| 121 | q = dptr[disks-1]; /* RS syndrome */ | ||
| 122 | |||
| 123 | for ( d = 0 ; d < bytes ; d += NSIZE*$# ) { | ||
| 124 | /* P/Q data pages */ | ||
| 125 | wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; | ||
| 126 | for ( z = z0-1 ; z >= start ; z-- ) { | ||
| 127 | wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; | ||
| 128 | wp$$ ^= wd$$; | ||
| 129 | w2$$ = MASK(wq$$); | ||
| 130 | w1$$ = SHLBYTE(wq$$); | ||
| 131 | w2$$ &= NBYTES(0x1d); | ||
| 132 | w1$$ ^= w2$$; | ||
| 133 | wq$$ = w1$$ ^ wd$$; | ||
| 134 | } | ||
| 135 | /* P/Q left side optimization */ | ||
| 136 | for ( z = start-1 ; z >= 0 ; z-- ) { | ||
| 137 | w2$$ = MASK(wq$$); | ||
| 138 | w1$$ = SHLBYTE(wq$$); | ||
| 139 | w2$$ &= NBYTES(0x1d); | ||
| 140 | wq$$ = w1$$ ^ w2$$; | ||
| 141 | } | ||
| 142 | *(unative_t *)&p[d+NSIZE*$$] ^= wp$$; | ||
| 143 | *(unative_t *)&q[d+NSIZE*$$] ^= wq$$; | ||
| 144 | } | ||
| 145 | |||
| 146 | } | ||
| 147 | |||
| 110 | const struct raid6_calls raid6_intx$# = { | 148 | const struct raid6_calls raid6_intx$# = { |
| 111 | raid6_int$#_gen_syndrome, | 149 | raid6_int$#_gen_syndrome, |
| 112 | NULL, /* always valid */ | 150 | raid6_int$#_xor_syndrome, |
| 151 | NULL, /* always valid */ | ||
| 113 | "int" NSTRING "x$#", | 152 | "int" NSTRING "x$#", |
| 114 | 0 | 153 | 0 |
| 115 | }; | 154 | }; |
diff --git a/lib/raid6/mmx.c b/lib/raid6/mmx.c index 590c71c9e200..b3b0e1fcd3af 100644 --- a/lib/raid6/mmx.c +++ b/lib/raid6/mmx.c | |||
| @@ -76,6 +76,7 @@ static void raid6_mmx1_gen_syndrome(int disks, size_t bytes, void **ptrs) | |||
| 76 | 76 | ||
| 77 | const struct raid6_calls raid6_mmxx1 = { | 77 | const struct raid6_calls raid6_mmxx1 = { |
| 78 | raid6_mmx1_gen_syndrome, | 78 | raid6_mmx1_gen_syndrome, |
| 79 | NULL, /* XOR not yet implemented */ | ||
| 79 | raid6_have_mmx, | 80 | raid6_have_mmx, |
| 80 | "mmxx1", | 81 | "mmxx1", |
| 81 | 0 | 82 | 0 |
| @@ -134,6 +135,7 @@ static void raid6_mmx2_gen_syndrome(int disks, size_t bytes, void **ptrs) | |||
| 134 | 135 | ||
| 135 | const struct raid6_calls raid6_mmxx2 = { | 136 | const struct raid6_calls raid6_mmxx2 = { |
| 136 | raid6_mmx2_gen_syndrome, | 137 | raid6_mmx2_gen_syndrome, |
| 138 | NULL, /* XOR not yet implemented */ | ||
| 137 | raid6_have_mmx, | 139 | raid6_have_mmx, |
| 138 | "mmxx2", | 140 | "mmxx2", |
| 139 | 0 | 141 | 0 |
diff --git a/lib/raid6/neon.c b/lib/raid6/neon.c index 36ad4705df1a..d9ad6ee284f4 100644 --- a/lib/raid6/neon.c +++ b/lib/raid6/neon.c | |||
| @@ -42,6 +42,7 @@ | |||
| 42 | } \ | 42 | } \ |
| 43 | struct raid6_calls const raid6_neonx ## _n = { \ | 43 | struct raid6_calls const raid6_neonx ## _n = { \ |
| 44 | raid6_neon ## _n ## _gen_syndrome, \ | 44 | raid6_neon ## _n ## _gen_syndrome, \ |
| 45 | NULL, /* XOR not yet implemented */ \ | ||
| 45 | raid6_have_neon, \ | 46 | raid6_have_neon, \ |
| 46 | "neonx" #_n, \ | 47 | "neonx" #_n, \ |
| 47 | 0 \ | 48 | 0 \ |
diff --git a/lib/raid6/sse1.c b/lib/raid6/sse1.c index f76297139445..9025b8ca9aa3 100644 --- a/lib/raid6/sse1.c +++ b/lib/raid6/sse1.c | |||
| @@ -92,6 +92,7 @@ static void raid6_sse11_gen_syndrome(int disks, size_t bytes, void **ptrs) | |||
| 92 | 92 | ||
| 93 | const struct raid6_calls raid6_sse1x1 = { | 93 | const struct raid6_calls raid6_sse1x1 = { |
| 94 | raid6_sse11_gen_syndrome, | 94 | raid6_sse11_gen_syndrome, |
| 95 | NULL, /* XOR not yet implemented */ | ||
| 95 | raid6_have_sse1_or_mmxext, | 96 | raid6_have_sse1_or_mmxext, |
| 96 | "sse1x1", | 97 | "sse1x1", |
| 97 | 1 /* Has cache hints */ | 98 | 1 /* Has cache hints */ |
| @@ -154,6 +155,7 @@ static void raid6_sse12_gen_syndrome(int disks, size_t bytes, void **ptrs) | |||
| 154 | 155 | ||
| 155 | const struct raid6_calls raid6_sse1x2 = { | 156 | const struct raid6_calls raid6_sse1x2 = { |
| 156 | raid6_sse12_gen_syndrome, | 157 | raid6_sse12_gen_syndrome, |
| 158 | NULL, /* XOR not yet implemented */ | ||
| 157 | raid6_have_sse1_or_mmxext, | 159 | raid6_have_sse1_or_mmxext, |
| 158 | "sse1x2", | 160 | "sse1x2", |
| 159 | 1 /* Has cache hints */ | 161 | 1 /* Has cache hints */ |
diff --git a/lib/raid6/sse2.c b/lib/raid6/sse2.c index 85b82c85f28e..1d2276b007ee 100644 --- a/lib/raid6/sse2.c +++ b/lib/raid6/sse2.c | |||
| @@ -88,8 +88,58 @@ static void raid6_sse21_gen_syndrome(int disks, size_t bytes, void **ptrs) | |||
| 88 | kernel_fpu_end(); | 88 | kernel_fpu_end(); |
| 89 | } | 89 | } |
| 90 | 90 | ||
| 91 | |||
| 92 | static void raid6_sse21_xor_syndrome(int disks, int start, int stop, | ||
| 93 | size_t bytes, void **ptrs) | ||
| 94 | { | ||
| 95 | u8 **dptr = (u8 **)ptrs; | ||
| 96 | u8 *p, *q; | ||
| 97 | int d, z, z0; | ||
| 98 | |||
| 99 | z0 = stop; /* P/Q right side optimization */ | ||
| 100 | p = dptr[disks-2]; /* XOR parity */ | ||
| 101 | q = dptr[disks-1]; /* RS syndrome */ | ||
| 102 | |||
| 103 | kernel_fpu_begin(); | ||
| 104 | |||
| 105 | asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0])); | ||
| 106 | |||
| 107 | for ( d = 0 ; d < bytes ; d += 16 ) { | ||
| 108 | asm volatile("movdqa %0,%%xmm4" :: "m" (dptr[z0][d])); | ||
| 109 | asm volatile("movdqa %0,%%xmm2" : : "m" (p[d])); | ||
| 110 | asm volatile("pxor %xmm4,%xmm2"); | ||
| 111 | /* P/Q data pages */ | ||
| 112 | for ( z = z0-1 ; z >= start ; z-- ) { | ||
| 113 | asm volatile("pxor %xmm5,%xmm5"); | ||
| 114 | asm volatile("pcmpgtb %xmm4,%xmm5"); | ||
| 115 | asm volatile("paddb %xmm4,%xmm4"); | ||
| 116 | asm volatile("pand %xmm0,%xmm5"); | ||
| 117 | asm volatile("pxor %xmm5,%xmm4"); | ||
| 118 | asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d])); | ||
| 119 | asm volatile("pxor %xmm5,%xmm2"); | ||
| 120 | asm volatile("pxor %xmm5,%xmm4"); | ||
| 121 | } | ||
| 122 | /* P/Q left side optimization */ | ||
| 123 | for ( z = start-1 ; z >= 0 ; z-- ) { | ||
| 124 | asm volatile("pxor %xmm5,%xmm5"); | ||
| 125 | asm volatile("pcmpgtb %xmm4,%xmm5"); | ||
| 126 | asm volatile("paddb %xmm4,%xmm4"); | ||
| 127 | asm volatile("pand %xmm0,%xmm5"); | ||
| 128 | asm volatile("pxor %xmm5,%xmm4"); | ||
| 129 | } | ||
| 130 | asm volatile("pxor %0,%%xmm4" : : "m" (q[d])); | ||
| 131 | /* Don't use movntdq for r/w memory area < cache line */ | ||
| 132 | asm volatile("movdqa %%xmm4,%0" : "=m" (q[d])); | ||
| 133 | asm volatile("movdqa %%xmm2,%0" : "=m" (p[d])); | ||
| 134 | } | ||
| 135 | |||
| 136 | asm volatile("sfence" : : : "memory"); | ||
| 137 | kernel_fpu_end(); | ||
| 138 | } | ||
| 139 | |||
| 91 | const struct raid6_calls raid6_sse2x1 = { | 140 | const struct raid6_calls raid6_sse2x1 = { |
| 92 | raid6_sse21_gen_syndrome, | 141 | raid6_sse21_gen_syndrome, |
| 142 | raid6_sse21_xor_syndrome, | ||
| 93 | raid6_have_sse2, | 143 | raid6_have_sse2, |
| 94 | "sse2x1", | 144 | "sse2x1", |
| 95 | 1 /* Has cache hints */ | 145 | 1 /* Has cache hints */ |
| @@ -150,8 +200,76 @@ static void raid6_sse22_gen_syndrome(int disks, size_t bytes, void **ptrs) | |||
| 150 | kernel_fpu_end(); | 200 | kernel_fpu_end(); |
| 151 | } | 201 | } |
| 152 | 202 | ||
| 203 | static void raid6_sse22_xor_syndrome(int disks, int start, int stop, | ||
| 204 | size_t bytes, void **ptrs) | ||
| 205 | { | ||
| 206 | u8 **dptr = (u8 **)ptrs; | ||
| 207 | u8 *p, *q; | ||
| 208 | int d, z, z0; | ||
| 209 | |||
| 210 | z0 = stop; /* P/Q right side optimization */ | ||
| 211 | p = dptr[disks-2]; /* XOR parity */ | ||
| 212 | q = dptr[disks-1]; /* RS syndrome */ | ||
| 213 | |||
| 214 | kernel_fpu_begin(); | ||
| 215 | |||
| 216 | asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0])); | ||
| 217 | |||
| 218 | for ( d = 0 ; d < bytes ; d += 32 ) { | ||
| 219 | asm volatile("movdqa %0,%%xmm4" :: "m" (dptr[z0][d])); | ||
| 220 | asm volatile("movdqa %0,%%xmm6" :: "m" (dptr[z0][d+16])); | ||
| 221 | asm volatile("movdqa %0,%%xmm2" : : "m" (p[d])); | ||
| 222 | asm volatile("movdqa %0,%%xmm3" : : "m" (p[d+16])); | ||
| 223 | asm volatile("pxor %xmm4,%xmm2"); | ||
| 224 | asm volatile("pxor %xmm6,%xmm3"); | ||
| 225 | /* P/Q data pages */ | ||
| 226 | for ( z = z0-1 ; z >= start ; z-- ) { | ||
| 227 | asm volatile("pxor %xmm5,%xmm5"); | ||
| 228 | asm volatile("pxor %xmm7,%xmm7"); | ||
| 229 | asm volatile("pcmpgtb %xmm4,%xmm5"); | ||
| 230 | asm volatile("pcmpgtb %xmm6,%xmm7"); | ||
| 231 | asm volatile("paddb %xmm4,%xmm4"); | ||
| 232 | asm volatile("paddb %xmm6,%xmm6"); | ||
| 233 | asm volatile("pand %xmm0,%xmm5"); | ||
| 234 | asm volatile("pand %xmm0,%xmm7"); | ||
| 235 | asm volatile("pxor %xmm5,%xmm4"); | ||
| 236 | asm volatile("pxor %xmm7,%xmm6"); | ||
| 237 | asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d])); | ||
| 238 | asm volatile("movdqa %0,%%xmm7" :: "m" (dptr[z][d+16])); | ||
| 239 | asm volatile("pxor %xmm5,%xmm2"); | ||
| 240 | asm volatile("pxor %xmm7,%xmm3"); | ||
| 241 | asm volatile("pxor %xmm5,%xmm4"); | ||
| 242 | asm volatile("pxor %xmm7,%xmm6"); | ||
| 243 | } | ||
| 244 | /* P/Q left side optimization */ | ||
| 245 | for ( z = start-1 ; z >= 0 ; z-- ) { | ||
| 246 | asm volatile("pxor %xmm5,%xmm5"); | ||
| 247 | asm volatile("pxor %xmm7,%xmm7"); | ||
| 248 | asm volatile("pcmpgtb %xmm4,%xmm5"); | ||
| 249 | asm volatile("pcmpgtb %xmm6,%xmm7"); | ||
| 250 | asm volatile("paddb %xmm4,%xmm4"); | ||
| 251 | asm volatile("paddb %xmm6,%xmm6"); | ||
| 252 | asm volatile("pand %xmm0,%xmm5"); | ||
| 253 | asm volatile("pand %xmm0,%xmm7"); | ||
| 254 | asm volatile("pxor %xmm5,%xmm4"); | ||
| 255 | asm volatile("pxor %xmm7,%xmm6"); | ||
| 256 | } | ||
| 257 | asm volatile("pxor %0,%%xmm4" : : "m" (q[d])); | ||
| 258 | asm volatile("pxor %0,%%xmm6" : : "m" (q[d+16])); | ||
| 259 | /* Don't use movntdq for r/w memory area < cache line */ | ||
| 260 | asm volatile("movdqa %%xmm4,%0" : "=m" (q[d])); | ||
| 261 | asm volatile("movdqa %%xmm6,%0" : "=m" (q[d+16])); | ||
| 262 | asm volatile("movdqa %%xmm2,%0" : "=m" (p[d])); | ||
| 263 | asm volatile("movdqa %%xmm3,%0" : "=m" (p[d+16])); | ||
| 264 | } | ||
| 265 | |||
| 266 | asm volatile("sfence" : : : "memory"); | ||
| 267 | kernel_fpu_end(); | ||
| 268 | } | ||
| 269 | |||
| 153 | const struct raid6_calls raid6_sse2x2 = { | 270 | const struct raid6_calls raid6_sse2x2 = { |
| 154 | raid6_sse22_gen_syndrome, | 271 | raid6_sse22_gen_syndrome, |
| 272 | raid6_sse22_xor_syndrome, | ||
| 155 | raid6_have_sse2, | 273 | raid6_have_sse2, |
| 156 | "sse2x2", | 274 | "sse2x2", |
| 157 | 1 /* Has cache hints */ | 275 | 1 /* Has cache hints */ |
| @@ -248,8 +366,117 @@ static void raid6_sse24_gen_syndrome(int disks, size_t bytes, void **ptrs) | |||
| 248 | kernel_fpu_end(); | 366 | kernel_fpu_end(); |
| 249 | } | 367 | } |
| 250 | 368 | ||
| 369 | static void raid6_sse24_xor_syndrome(int disks, int start, int stop, | ||
| 370 | size_t bytes, void **ptrs) | ||
| 371 | { | ||
| 372 | u8 **dptr = (u8 **)ptrs; | ||
| 373 | u8 *p, *q; | ||
| 374 | int d, z, z0; | ||
| 375 | |||
| 376 | z0 = stop; /* P/Q right side optimization */ | ||
| 377 | p = dptr[disks-2]; /* XOR parity */ | ||
| 378 | q = dptr[disks-1]; /* RS syndrome */ | ||
| 379 | |||
| 380 | kernel_fpu_begin(); | ||
| 381 | |||
| 382 | asm volatile("movdqa %0,%%xmm0" :: "m" (raid6_sse_constants.x1d[0])); | ||
| 383 | |||
| 384 | for ( d = 0 ; d < bytes ; d += 64 ) { | ||
| 385 | asm volatile("movdqa %0,%%xmm4" :: "m" (dptr[z0][d])); | ||
| 386 | asm volatile("movdqa %0,%%xmm6" :: "m" (dptr[z0][d+16])); | ||
| 387 | asm volatile("movdqa %0,%%xmm12" :: "m" (dptr[z0][d+32])); | ||
| 388 | asm volatile("movdqa %0,%%xmm14" :: "m" (dptr[z0][d+48])); | ||
| 389 | asm volatile("movdqa %0,%%xmm2" : : "m" (p[d])); | ||
| 390 | asm volatile("movdqa %0,%%xmm3" : : "m" (p[d+16])); | ||
| 391 | asm volatile("movdqa %0,%%xmm10" : : "m" (p[d+32])); | ||
| 392 | asm volatile("movdqa %0,%%xmm11" : : "m" (p[d+48])); | ||
| 393 | asm volatile("pxor %xmm4,%xmm2"); | ||
| 394 | asm volatile("pxor %xmm6,%xmm3"); | ||
| 395 | asm volatile("pxor %xmm12,%xmm10"); | ||
| 396 | asm volatile("pxor %xmm14,%xmm11"); | ||
| 397 | /* P/Q data pages */ | ||
| 398 | for ( z = z0-1 ; z >= start ; z-- ) { | ||
| 399 | asm volatile("prefetchnta %0" :: "m" (dptr[z][d])); | ||
| 400 | asm volatile("prefetchnta %0" :: "m" (dptr[z][d+32])); | ||
| 401 | asm volatile("pxor %xmm5,%xmm5"); | ||
| 402 | asm volatile("pxor %xmm7,%xmm7"); | ||
| 403 | asm volatile("pxor %xmm13,%xmm13"); | ||
| 404 | asm volatile("pxor %xmm15,%xmm15"); | ||
| 405 | asm volatile("pcmpgtb %xmm4,%xmm5"); | ||
| 406 | asm volatile("pcmpgtb %xmm6,%xmm7"); | ||
| 407 | asm volatile("pcmpgtb %xmm12,%xmm13"); | ||
| 408 | asm volatile("pcmpgtb %xmm14,%xmm15"); | ||
| 409 | asm volatile("paddb %xmm4,%xmm4"); | ||
| 410 | asm volatile("paddb %xmm6,%xmm6"); | ||
| 411 | asm volatile("paddb %xmm12,%xmm12"); | ||
| 412 | asm volatile("paddb %xmm14,%xmm14"); | ||
| 413 | asm volatile("pand %xmm0,%xmm5"); | ||
| 414 | asm volatile("pand %xmm0,%xmm7"); | ||
| 415 | asm volatile("pand %xmm0,%xmm13"); | ||
| 416 | asm volatile("pand %xmm0,%xmm15"); | ||
| 417 | asm volatile("pxor %xmm5,%xmm4"); | ||
| 418 | asm volatile("pxor %xmm7,%xmm6"); | ||
| 419 | asm volatile("pxor %xmm13,%xmm12"); | ||
| 420 | asm volatile("pxor %xmm15,%xmm14"); | ||
| 421 | asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d])); | ||
| 422 | asm volatile("movdqa %0,%%xmm7" :: "m" (dptr[z][d+16])); | ||
| 423 | asm volatile("movdqa %0,%%xmm13" :: "m" (dptr[z][d+32])); | ||
| 424 | asm volatile("movdqa %0,%%xmm15" :: "m" (dptr[z][d+48])); | ||
| 425 | asm volatile("pxor %xmm5,%xmm2"); | ||
| 426 | asm volatile("pxor %xmm7,%xmm3"); | ||
| 427 | asm volatile("pxor %xmm13,%xmm10"); | ||
| 428 | asm volatile("pxor %xmm15,%xmm11"); | ||
| 429 | asm volatile("pxor %xmm5,%xmm4"); | ||
| 430 | asm volatile("pxor %xmm7,%xmm6"); | ||
| 431 | asm volatile("pxor %xmm13,%xmm12"); | ||
| 432 | asm volatile("pxor %xmm15,%xmm14"); | ||
| 433 | } | ||
| 434 | asm volatile("prefetchnta %0" :: "m" (q[d])); | ||
| 435 | asm volatile("prefetchnta %0" :: "m" (q[d+32])); | ||
| 436 | /* P/Q left side optimization */ | ||
| 437 | for ( z = start-1 ; z >= 0 ; z-- ) { | ||
| 438 | asm volatile("pxor %xmm5,%xmm5"); | ||
| 439 | asm volatile("pxor %xmm7,%xmm7"); | ||
| 440 | asm volatile("pxor %xmm13,%xmm13"); | ||
| 441 | asm volatile("pxor %xmm15,%xmm15"); | ||
| 442 | asm volatile("pcmpgtb %xmm4,%xmm5"); | ||
| 443 | asm volatile("pcmpgtb %xmm6,%xmm7"); | ||
| 444 | asm volatile("pcmpgtb %xmm12,%xmm13"); | ||
| 445 | asm volatile("pcmpgtb %xmm14,%xmm15"); | ||
| 446 | asm volatile("paddb %xmm4,%xmm4"); | ||
| 447 | asm volatile("paddb %xmm6,%xmm6"); | ||
| 448 | asm volatile("paddb %xmm12,%xmm12"); | ||
| 449 | asm volatile("paddb %xmm14,%xmm14"); | ||
| 450 | asm volatile("pand %xmm0,%xmm5"); | ||
| 451 | asm volatile("pand %xmm0,%xmm7"); | ||
| 452 | asm volatile("pand %xmm0,%xmm13"); | ||
| 453 | asm volatile("pand %xmm0,%xmm15"); | ||
| 454 | asm volatile("pxor %xmm5,%xmm4"); | ||
| 455 | asm volatile("pxor %xmm7,%xmm6"); | ||
| 456 | asm volatile("pxor %xmm13,%xmm12"); | ||
| 457 | asm volatile("pxor %xmm15,%xmm14"); | ||
| 458 | } | ||
| 459 | asm volatile("movntdq %%xmm2,%0" : "=m" (p[d])); | ||
| 460 | asm volatile("movntdq %%xmm3,%0" : "=m" (p[d+16])); | ||
| 461 | asm volatile("movntdq %%xmm10,%0" : "=m" (p[d+32])); | ||
| 462 | asm volatile("movntdq %%xmm11,%0" : "=m" (p[d+48])); | ||
| 463 | asm volatile("pxor %0,%%xmm4" : : "m" (q[d])); | ||
| 464 | asm volatile("pxor %0,%%xmm6" : : "m" (q[d+16])); | ||
| 465 | asm volatile("pxor %0,%%xmm12" : : "m" (q[d+32])); | ||
| 466 | asm volatile("pxor %0,%%xmm14" : : "m" (q[d+48])); | ||
| 467 | asm volatile("movntdq %%xmm4,%0" : "=m" (q[d])); | ||
| 468 | asm volatile("movntdq %%xmm6,%0" : "=m" (q[d+16])); | ||
| 469 | asm volatile("movntdq %%xmm12,%0" : "=m" (q[d+32])); | ||
| 470 | asm volatile("movntdq %%xmm14,%0" : "=m" (q[d+48])); | ||
| 471 | } | ||
| 472 | asm volatile("sfence" : : : "memory"); | ||
| 473 | kernel_fpu_end(); | ||
| 474 | } | ||
| 475 | |||
| 476 | |||
| 251 | const struct raid6_calls raid6_sse2x4 = { | 477 | const struct raid6_calls raid6_sse2x4 = { |
| 252 | raid6_sse24_gen_syndrome, | 478 | raid6_sse24_gen_syndrome, |
| 479 | raid6_sse24_xor_syndrome, | ||
| 253 | raid6_have_sse2, | 480 | raid6_have_sse2, |
| 254 | "sse2x4", | 481 | "sse2x4", |
| 255 | 1 /* Has cache hints */ | 482 | 1 /* Has cache hints */ |
diff --git a/lib/raid6/test/test.c b/lib/raid6/test/test.c index 5a485b7a7d3c..3bebbabdb510 100644 --- a/lib/raid6/test/test.c +++ b/lib/raid6/test/test.c | |||
| @@ -28,11 +28,11 @@ char *dataptrs[NDISKS]; | |||
| 28 | char data[NDISKS][PAGE_SIZE]; | 28 | char data[NDISKS][PAGE_SIZE]; |
| 29 | char recovi[PAGE_SIZE], recovj[PAGE_SIZE]; | 29 | char recovi[PAGE_SIZE], recovj[PAGE_SIZE]; |
| 30 | 30 | ||
| 31 | static void makedata(void) | 31 | static void makedata(int start, int stop) |
| 32 | { | 32 | { |
| 33 | int i, j; | 33 | int i, j; |
| 34 | 34 | ||
| 35 | for (i = 0; i < NDISKS; i++) { | 35 | for (i = start; i <= stop; i++) { |
| 36 | for (j = 0; j < PAGE_SIZE; j++) | 36 | for (j = 0; j < PAGE_SIZE; j++) |
| 37 | data[i][j] = rand(); | 37 | data[i][j] = rand(); |
| 38 | 38 | ||
| @@ -91,34 +91,55 @@ int main(int argc, char *argv[]) | |||
| 91 | { | 91 | { |
| 92 | const struct raid6_calls *const *algo; | 92 | const struct raid6_calls *const *algo; |
| 93 | const struct raid6_recov_calls *const *ra; | 93 | const struct raid6_recov_calls *const *ra; |
| 94 | int i, j; | 94 | int i, j, p1, p2; |
| 95 | int err = 0; | 95 | int err = 0; |
| 96 | 96 | ||
| 97 | makedata(); | 97 | makedata(0, NDISKS-1); |
| 98 | 98 | ||
| 99 | for (ra = raid6_recov_algos; *ra; ra++) { | 99 | for (ra = raid6_recov_algos; *ra; ra++) { |
| 100 | if ((*ra)->valid && !(*ra)->valid()) | 100 | if ((*ra)->valid && !(*ra)->valid()) |
| 101 | continue; | 101 | continue; |
| 102 | |||
| 102 | raid6_2data_recov = (*ra)->data2; | 103 | raid6_2data_recov = (*ra)->data2; |
| 103 | raid6_datap_recov = (*ra)->datap; | 104 | raid6_datap_recov = (*ra)->datap; |
| 104 | 105 | ||
| 105 | printf("using recovery %s\n", (*ra)->name); | 106 | printf("using recovery %s\n", (*ra)->name); |
| 106 | 107 | ||
| 107 | for (algo = raid6_algos; *algo; algo++) { | 108 | for (algo = raid6_algos; *algo; algo++) { |
| 108 | if (!(*algo)->valid || (*algo)->valid()) { | 109 | if ((*algo)->valid && !(*algo)->valid()) |
| 109 | raid6_call = **algo; | 110 | continue; |
| 111 | |||
| 112 | raid6_call = **algo; | ||
| 113 | |||
| 114 | /* Nuke syndromes */ | ||
| 115 | memset(data[NDISKS-2], 0xee, 2*PAGE_SIZE); | ||
| 116 | |||
| 117 | /* Generate assumed good syndrome */ | ||
| 118 | raid6_call.gen_syndrome(NDISKS, PAGE_SIZE, | ||
| 119 | (void **)&dataptrs); | ||
| 120 | |||
| 121 | for (i = 0; i < NDISKS-1; i++) | ||
| 122 | for (j = i+1; j < NDISKS; j++) | ||
| 123 | err += test_disks(i, j); | ||
| 124 | |||
| 125 | if (!raid6_call.xor_syndrome) | ||
| 126 | continue; | ||
| 127 | |||
| 128 | for (p1 = 0; p1 < NDISKS-2; p1++) | ||
| 129 | for (p2 = p1; p2 < NDISKS-2; p2++) { | ||
| 110 | 130 | ||
| 111 | /* Nuke syndromes */ | 131 | /* Simulate rmw run */ |
| 112 | memset(data[NDISKS-2], 0xee, 2*PAGE_SIZE); | 132 | raid6_call.xor_syndrome(NDISKS, p1, p2, PAGE_SIZE, |
| 133 | (void **)&dataptrs); | ||
| 134 | makedata(p1, p2); | ||
| 135 | raid6_call.xor_syndrome(NDISKS, p1, p2, PAGE_SIZE, | ||
| 136 | (void **)&dataptrs); | ||
| 113 | 137 | ||
| 114 | /* Generate assumed good syndrome */ | 138 | for (i = 0; i < NDISKS-1; i++) |
| 115 | raid6_call.gen_syndrome(NDISKS, PAGE_SIZE, | 139 | for (j = i+1; j < NDISKS; j++) |
| 116 | (void **)&dataptrs); | 140 | err += test_disks(i, j); |
| 141 | } | ||
| 117 | 142 | ||
| 118 | for (i = 0; i < NDISKS-1; i++) | ||
| 119 | for (j = i+1; j < NDISKS; j++) | ||
| 120 | err += test_disks(i, j); | ||
| 121 | } | ||
| 122 | } | 143 | } |
| 123 | printf("\n"); | 144 | printf("\n"); |
| 124 | } | 145 | } |
diff --git a/lib/raid6/tilegx.uc b/lib/raid6/tilegx.uc index e7c29459cbcd..2dd291a11264 100644 --- a/lib/raid6/tilegx.uc +++ b/lib/raid6/tilegx.uc | |||
| @@ -80,6 +80,7 @@ void raid6_tilegx$#_gen_syndrome(int disks, size_t bytes, void **ptrs) | |||
| 80 | 80 | ||
| 81 | const struct raid6_calls raid6_tilegx$# = { | 81 | const struct raid6_calls raid6_tilegx$# = { |
| 82 | raid6_tilegx$#_gen_syndrome, | 82 | raid6_tilegx$#_gen_syndrome, |
| 83 | NULL, /* XOR not yet implemented */ | ||
| 83 | NULL, | 84 | NULL, |
| 84 | "tilegx$#", | 85 | "tilegx$#", |
| 85 | 0 | 86 | 0 |
diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 4898442b837f..b28df4019ade 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c | |||
| @@ -405,13 +405,18 @@ int rhashtable_insert_rehash(struct rhashtable *ht) | |||
| 405 | 405 | ||
| 406 | if (rht_grow_above_75(ht, tbl)) | 406 | if (rht_grow_above_75(ht, tbl)) |
| 407 | size *= 2; | 407 | size *= 2; |
| 408 | /* More than two rehashes (not resizes) detected. */ | 408 | /* Do not schedule more than one rehash */ |
| 409 | else if (WARN_ON(old_tbl != tbl && old_tbl->size == size)) | 409 | else if (old_tbl != tbl) |
| 410 | return -EBUSY; | 410 | return -EBUSY; |
| 411 | 411 | ||
| 412 | new_tbl = bucket_table_alloc(ht, size, GFP_ATOMIC); | 412 | new_tbl = bucket_table_alloc(ht, size, GFP_ATOMIC); |
| 413 | if (new_tbl == NULL) | 413 | if (new_tbl == NULL) { |
| 414 | /* Schedule async resize/rehash to try allocation | ||
| 415 | * non-atomic context. | ||
| 416 | */ | ||
| 417 | schedule_work(&ht->run_work); | ||
| 414 | return -ENOMEM; | 418 | return -ENOMEM; |
| 419 | } | ||
| 415 | 420 | ||
| 416 | err = rhashtable_rehash_attach(ht, tbl, new_tbl); | 421 | err = rhashtable_rehash_attach(ht, tbl, new_tbl); |
| 417 | if (err) { | 422 | if (err) { |
diff --git a/lib/string_helpers.c b/lib/string_helpers.c index 1826c7407258..c98ae818eb4e 100644 --- a/lib/string_helpers.c +++ b/lib/string_helpers.c | |||
| @@ -4,6 +4,7 @@ | |||
| 4 | * Copyright 31 August 2008 James Bottomley | 4 | * Copyright 31 August 2008 James Bottomley |
| 5 | * Copyright (C) 2013, Intel Corporation | 5 | * Copyright (C) 2013, Intel Corporation |
| 6 | */ | 6 | */ |
| 7 | #include <linux/bug.h> | ||
| 7 | #include <linux/kernel.h> | 8 | #include <linux/kernel.h> |
| 8 | #include <linux/math64.h> | 9 | #include <linux/math64.h> |
| 9 | #include <linux/export.h> | 10 | #include <linux/export.h> |
| @@ -14,7 +15,8 @@ | |||
| 14 | 15 | ||
| 15 | /** | 16 | /** |
| 16 | * string_get_size - get the size in the specified units | 17 | * string_get_size - get the size in the specified units |
| 17 | * @size: The size to be converted | 18 | * @size: The size to be converted in blocks |
| 19 | * @blk_size: Size of the block (use 1 for size in bytes) | ||
| 18 | * @units: units to use (powers of 1000 or 1024) | 20 | * @units: units to use (powers of 1000 or 1024) |
| 19 | * @buf: buffer to format to | 21 | * @buf: buffer to format to |
| 20 | * @len: length of buffer | 22 | * @len: length of buffer |
| @@ -24,14 +26,14 @@ | |||
| 24 | * at least 9 bytes and will always be zero terminated. | 26 | * at least 9 bytes and will always be zero terminated. |
| 25 | * | 27 | * |
| 26 | */ | 28 | */ |
| 27 | void string_get_size(u64 size, const enum string_size_units units, | 29 | void string_get_size(u64 size, u64 blk_size, const enum string_size_units units, |
| 28 | char *buf, int len) | 30 | char *buf, int len) |
| 29 | { | 31 | { |
| 30 | static const char *const units_10[] = { | 32 | static const char *const units_10[] = { |
| 31 | "B", "kB", "MB", "GB", "TB", "PB", "EB" | 33 | "B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB" |
| 32 | }; | 34 | }; |
| 33 | static const char *const units_2[] = { | 35 | static const char *const units_2[] = { |
| 34 | "B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB" | 36 | "B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB" |
| 35 | }; | 37 | }; |
| 36 | static const char *const *const units_str[] = { | 38 | static const char *const *const units_str[] = { |
| 37 | [STRING_UNITS_10] = units_10, | 39 | [STRING_UNITS_10] = units_10, |
| @@ -42,31 +44,57 @@ void string_get_size(u64 size, const enum string_size_units units, | |||
| 42 | [STRING_UNITS_2] = 1024, | 44 | [STRING_UNITS_2] = 1024, |
| 43 | }; | 45 | }; |
| 44 | int i, j; | 46 | int i, j; |
| 45 | u32 remainder = 0, sf_cap; | 47 | u32 remainder = 0, sf_cap, exp; |
| 46 | char tmp[8]; | 48 | char tmp[8]; |
| 49 | const char *unit; | ||
| 47 | 50 | ||
| 48 | tmp[0] = '\0'; | 51 | tmp[0] = '\0'; |
| 49 | i = 0; | 52 | i = 0; |
| 50 | if (size >= divisor[units]) { | 53 | if (!size) |
| 51 | while (size >= divisor[units]) { | 54 | goto out; |
| 52 | remainder = do_div(size, divisor[units]); | ||
| 53 | i++; | ||
| 54 | } | ||
| 55 | 55 | ||
| 56 | sf_cap = size; | 56 | while (blk_size >= divisor[units]) { |
| 57 | for (j = 0; sf_cap*10 < 1000; j++) | 57 | remainder = do_div(blk_size, divisor[units]); |
| 58 | sf_cap *= 10; | 58 | i++; |
| 59 | } | ||
| 59 | 60 | ||
| 60 | if (j) { | 61 | exp = divisor[units] / (u32)blk_size; |
| 61 | remainder *= 1000; | 62 | if (size >= exp) { |
| 62 | remainder /= divisor[units]; | 63 | remainder = do_div(size, divisor[units]); |
| 63 | snprintf(tmp, sizeof(tmp), ".%03u", remainder); | 64 | remainder *= blk_size; |
| 64 | tmp[j+1] = '\0'; | 65 | i++; |
| 65 | } | 66 | } else { |
| 67 | remainder *= size; | ||
| 68 | } | ||
| 69 | |||
| 70 | size *= blk_size; | ||
| 71 | size += remainder / divisor[units]; | ||
| 72 | remainder %= divisor[units]; | ||
| 73 | |||
| 74 | while (size >= divisor[units]) { | ||
| 75 | remainder = do_div(size, divisor[units]); | ||
| 76 | i++; | ||
| 66 | } | 77 | } |
| 67 | 78 | ||
| 79 | sf_cap = size; | ||
| 80 | for (j = 0; sf_cap*10 < 1000; j++) | ||
| 81 | sf_cap *= 10; | ||
| 82 | |||
| 83 | if (j) { | ||
| 84 | remainder *= 1000; | ||
| 85 | remainder /= divisor[units]; | ||
| 86 | snprintf(tmp, sizeof(tmp), ".%03u", remainder); | ||
| 87 | tmp[j+1] = '\0'; | ||
| 88 | } | ||
| 89 | |||
| 90 | out: | ||
| 91 | if (i >= ARRAY_SIZE(units_2)) | ||
| 92 | unit = "UNK"; | ||
| 93 | else | ||
| 94 | unit = units_str[units][i]; | ||
| 95 | |||
| 68 | snprintf(buf, len, "%u%s %s", (u32)size, | 96 | snprintf(buf, len, "%u%s %s", (u32)size, |
| 69 | tmp, units_str[units][i]); | 97 | tmp, unit); |
| 70 | } | 98 | } |
| 71 | EXPORT_SYMBOL(string_get_size); | 99 | EXPORT_SYMBOL(string_get_size); |
| 72 | 100 | ||
diff --git a/lib/test-hexdump.c b/lib/test-hexdump.c index 9846ff7428b3..c227cc43ec0a 100644 --- a/lib/test-hexdump.c +++ b/lib/test-hexdump.c | |||
| @@ -48,7 +48,7 @@ static void __init test_hexdump(size_t len, int rowsize, int groupsize, | |||
| 48 | char test[32 * 3 + 2 + 32 + 1]; | 48 | char test[32 * 3 + 2 + 32 + 1]; |
| 49 | char real[32 * 3 + 2 + 32 + 1]; | 49 | char real[32 * 3 + 2 + 32 + 1]; |
| 50 | char *p; | 50 | char *p; |
| 51 | const char **result; | 51 | const char * const *result; |
| 52 | size_t l = len; | 52 | size_t l = len; |
| 53 | int gs = groupsize, rs = rowsize; | 53 | int gs = groupsize, rs = rowsize; |
| 54 | unsigned int i; | 54 | unsigned int i; |
diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 3a1e0843f9a2..da39c608a28c 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c | |||
| @@ -33,6 +33,7 @@ | |||
| 33 | 33 | ||
| 34 | #include <asm/page.h> /* for PAGE_SIZE */ | 34 | #include <asm/page.h> /* for PAGE_SIZE */ |
| 35 | #include <asm/sections.h> /* for dereference_function_descriptor() */ | 35 | #include <asm/sections.h> /* for dereference_function_descriptor() */ |
| 36 | #include <asm/byteorder.h> /* cpu_to_le16 */ | ||
| 36 | 37 | ||
| 37 | #include <linux/string_helpers.h> | 38 | #include <linux/string_helpers.h> |
| 38 | #include "kstrtox.h" | 39 | #include "kstrtox.h" |
| @@ -122,142 +123,145 @@ int skip_atoi(const char **s) | |||
| 122 | return i; | 123 | return i; |
| 123 | } | 124 | } |
| 124 | 125 | ||
| 125 | /* Decimal conversion is by far the most typical, and is used | 126 | /* |
| 126 | * for /proc and /sys data. This directly impacts e.g. top performance | 127 | * Decimal conversion is by far the most typical, and is used for |
| 127 | * with many processes running. We optimize it for speed | 128 | * /proc and /sys data. This directly impacts e.g. top performance |
| 128 | * using ideas described at <http://www.cs.uiowa.edu/~jones/bcd/divide.html> | 129 | * with many processes running. We optimize it for speed by emitting |
| 129 | * (with permission from the author, Douglas W. Jones). | 130 | * two characters at a time, using a 200 byte lookup table. This |
| 131 | * roughly halves the number of multiplications compared to computing | ||
| 132 | * the digits one at a time. Implementation strongly inspired by the | ||
| 133 | * previous version, which in turn used ideas described at | ||
| 134 | * <http://www.cs.uiowa.edu/~jones/bcd/divide.html> (with permission | ||
| 135 | * from the author, Douglas W. Jones). | ||
| 136 | * | ||
| 137 | * It turns out there is precisely one 26 bit fixed-point | ||
| 138 | * approximation a of 64/100 for which x/100 == (x * (u64)a) >> 32 | ||
| 139 | * holds for all x in [0, 10^8-1], namely a = 0x28f5c29. The actual | ||
| 140 | * range happens to be somewhat larger (x <= 1073741898), but that's | ||
| 141 | * irrelevant for our purpose. | ||
| 142 | * | ||
| 143 | * For dividing a number in the range [10^4, 10^6-1] by 100, we still | ||
| 144 | * need a 32x32->64 bit multiply, so we simply use the same constant. | ||
| 145 | * | ||
| 146 | * For dividing a number in the range [100, 10^4-1] by 100, there are | ||
| 147 | * several options. The simplest is (x * 0x147b) >> 19, which is valid | ||
| 148 | * for all x <= 43698. | ||
| 130 | */ | 149 | */ |
| 131 | 150 | ||
| 132 | #if BITS_PER_LONG != 32 || BITS_PER_LONG_LONG != 64 | 151 | static const u16 decpair[100] = { |
| 133 | /* Formats correctly any integer in [0, 999999999] */ | 152 | #define _(x) (__force u16) cpu_to_le16(((x % 10) | ((x / 10) << 8)) + 0x3030) |
| 153 | _( 0), _( 1), _( 2), _( 3), _( 4), _( 5), _( 6), _( 7), _( 8), _( 9), | ||
| 154 | _(10), _(11), _(12), _(13), _(14), _(15), _(16), _(17), _(18), _(19), | ||
| 155 | _(20), _(21), _(22), _(23), _(24), _(25), _(26), _(27), _(28), _(29), | ||
| 156 | _(30), _(31), _(32), _(33), _(34), _(35), _(36), _(37), _(38), _(39), | ||
| 157 | _(40), _(41), _(42), _(43), _(44), _(45), _(46), _(47), _(48), _(49), | ||
| 158 | _(50), _(51), _(52), _(53), _(54), _(55), _(56), _(57), _(58), _(59), | ||
| 159 | _(60), _(61), _(62), _(63), _(64), _(65), _(66), _(67), _(68), _(69), | ||
| 160 | _(70), _(71), _(72), _(73), _(74), _(75), _(76), _(77), _(78), _(79), | ||
| 161 | _(80), _(81), _(82), _(83), _(84), _(85), _(86), _(87), _(88), _(89), | ||
| 162 | _(90), _(91), _(92), _(93), _(94), _(95), _(96), _(97), _(98), _(99), | ||
| 163 | #undef _ | ||
| 164 | }; | ||
| 165 | |||
| 166 | /* | ||
| 167 | * This will print a single '0' even if r == 0, since we would | ||
| 168 | * immediately jump to out_r where two 0s would be written but only | ||
| 169 | * one of them accounted for in buf. This is needed by ip4_string | ||
| 170 | * below. All other callers pass a non-zero value of r. | ||
| 171 | */ | ||
| 134 | static noinline_for_stack | 172 | static noinline_for_stack |
| 135 | char *put_dec_full9(char *buf, unsigned q) | 173 | char *put_dec_trunc8(char *buf, unsigned r) |
| 136 | { | 174 | { |
| 137 | unsigned r; | 175 | unsigned q; |
| 138 | 176 | ||
| 139 | /* | 177 | /* 1 <= r < 10^8 */ |
| 140 | * Possible ways to approx. divide by 10 | 178 | if (r < 100) |
| 141 | * (x * 0x1999999a) >> 32 x < 1073741829 (multiply must be 64-bit) | 179 | goto out_r; |
| 142 | * (x * 0xcccd) >> 19 x < 81920 (x < 262149 when 64-bit mul) | 180 | |
| 143 | * (x * 0x6667) >> 18 x < 43699 | 181 | /* 100 <= r < 10^8 */ |
| 144 | * (x * 0x3334) >> 17 x < 16389 | 182 | q = (r * (u64)0x28f5c29) >> 32; |
| 145 | * (x * 0x199a) >> 16 x < 16389 | 183 | *((u16 *)buf) = decpair[r - 100*q]; |
| 146 | * (x * 0x0ccd) >> 15 x < 16389 | 184 | buf += 2; |
| 147 | * (x * 0x0667) >> 14 x < 2739 | 185 | |
| 148 | * (x * 0x0334) >> 13 x < 1029 | 186 | /* 1 <= q < 10^6 */ |
| 149 | * (x * 0x019a) >> 12 x < 1029 | 187 | if (q < 100) |
| 150 | * (x * 0x00cd) >> 11 x < 1029 shorter code than * 0x67 (on i386) | 188 | goto out_q; |
| 151 | * (x * 0x0067) >> 10 x < 179 | 189 | |
| 152 | * (x * 0x0034) >> 9 x < 69 same | 190 | /* 100 <= q < 10^6 */ |
| 153 | * (x * 0x001a) >> 8 x < 69 same | 191 | r = (q * (u64)0x28f5c29) >> 32; |
| 154 | * (x * 0x000d) >> 7 x < 69 same, shortest code (on i386) | 192 | *((u16 *)buf) = decpair[q - 100*r]; |
| 155 | * (x * 0x0007) >> 6 x < 19 | 193 | buf += 2; |
| 156 | * See <http://www.cs.uiowa.edu/~jones/bcd/divide.html> | 194 | |
| 157 | */ | 195 | /* 1 <= r < 10^4 */ |
| 158 | r = (q * (uint64_t)0x1999999a) >> 32; | 196 | if (r < 100) |
| 159 | *buf++ = (q - 10 * r) + '0'; /* 1 */ | 197 | goto out_r; |
| 160 | q = (r * (uint64_t)0x1999999a) >> 32; | 198 | |
| 161 | *buf++ = (r - 10 * q) + '0'; /* 2 */ | 199 | /* 100 <= r < 10^4 */ |
| 162 | r = (q * (uint64_t)0x1999999a) >> 32; | 200 | q = (r * 0x147b) >> 19; |
| 163 | *buf++ = (q - 10 * r) + '0'; /* 3 */ | 201 | *((u16 *)buf) = decpair[r - 100*q]; |
| 164 | q = (r * (uint64_t)0x1999999a) >> 32; | 202 | buf += 2; |
| 165 | *buf++ = (r - 10 * q) + '0'; /* 4 */ | 203 | out_q: |
| 166 | r = (q * (uint64_t)0x1999999a) >> 32; | 204 | /* 1 <= q < 100 */ |
| 167 | *buf++ = (q - 10 * r) + '0'; /* 5 */ | 205 | r = q; |
| 168 | /* Now value is under 10000, can avoid 64-bit multiply */ | 206 | out_r: |
| 169 | q = (r * 0x199a) >> 16; | 207 | /* 1 <= r < 100 */ |
| 170 | *buf++ = (r - 10 * q) + '0'; /* 6 */ | 208 | *((u16 *)buf) = decpair[r]; |
| 171 | r = (q * 0xcd) >> 11; | 209 | buf += r < 10 ? 1 : 2; |
| 172 | *buf++ = (q - 10 * r) + '0'; /* 7 */ | ||
| 173 | q = (r * 0xcd) >> 11; | ||
| 174 | *buf++ = (r - 10 * q) + '0'; /* 8 */ | ||
| 175 | *buf++ = q + '0'; /* 9 */ | ||
| 176 | return buf; | 210 | return buf; |
| 177 | } | 211 | } |
| 178 | #endif | ||
| 179 | 212 | ||
| 180 | /* Similar to above but do not pad with zeros. | 213 | #if BITS_PER_LONG == 64 && BITS_PER_LONG_LONG == 64 |
| 181 | * Code can be easily arranged to print 9 digits too, but our callers | ||
| 182 | * always call put_dec_full9() instead when the number has 9 decimal digits. | ||
| 183 | */ | ||
| 184 | static noinline_for_stack | 214 | static noinline_for_stack |
| 185 | char *put_dec_trunc8(char *buf, unsigned r) | 215 | char *put_dec_full8(char *buf, unsigned r) |
| 186 | { | 216 | { |
| 187 | unsigned q; | 217 | unsigned q; |
| 188 | 218 | ||
| 189 | /* Copy of previous function's body with added early returns */ | 219 | /* 0 <= r < 10^8 */ |
| 190 | while (r >= 10000) { | 220 | q = (r * (u64)0x28f5c29) >> 32; |
| 191 | q = r + '0'; | 221 | *((u16 *)buf) = decpair[r - 100*q]; |
| 192 | r = (r * (uint64_t)0x1999999a) >> 32; | 222 | buf += 2; |
| 193 | *buf++ = q - 10*r; | ||
| 194 | } | ||
| 195 | 223 | ||
| 196 | q = (r * 0x199a) >> 16; /* r <= 9999 */ | 224 | /* 0 <= q < 10^6 */ |
| 197 | *buf++ = (r - 10 * q) + '0'; | 225 | r = (q * (u64)0x28f5c29) >> 32; |
| 198 | if (q == 0) | 226 | *((u16 *)buf) = decpair[q - 100*r]; |
| 199 | return buf; | 227 | buf += 2; |
| 200 | r = (q * 0xcd) >> 11; /* q <= 999 */ | ||
| 201 | *buf++ = (q - 10 * r) + '0'; | ||
| 202 | if (r == 0) | ||
| 203 | return buf; | ||
| 204 | q = (r * 0xcd) >> 11; /* r <= 99 */ | ||
| 205 | *buf++ = (r - 10 * q) + '0'; | ||
| 206 | if (q == 0) | ||
| 207 | return buf; | ||
| 208 | *buf++ = q + '0'; /* q <= 9 */ | ||
| 209 | return buf; | ||
| 210 | } | ||
| 211 | 228 | ||
| 212 | /* There are two algorithms to print larger numbers. | 229 | /* 0 <= r < 10^4 */ |
| 213 | * One is generic: divide by 1000000000 and repeatedly print | 230 | q = (r * 0x147b) >> 19; |
| 214 | * groups of (up to) 9 digits. It's conceptually simple, | 231 | *((u16 *)buf) = decpair[r - 100*q]; |
| 215 | * but requires a (unsigned long long) / 1000000000 division. | 232 | buf += 2; |
| 216 | * | ||
| 217 | * Second algorithm splits 64-bit unsigned long long into 16-bit chunks, | ||
| 218 | * manipulates them cleverly and generates groups of 4 decimal digits. | ||
| 219 | * It so happens that it does NOT require long long division. | ||
| 220 | * | ||
| 221 | * If long is > 32 bits, division of 64-bit values is relatively easy, | ||
| 222 | * and we will use the first algorithm. | ||
| 223 | * If long long is > 64 bits (strange architecture with VERY large long long), | ||
| 224 | * second algorithm can't be used, and we again use the first one. | ||
| 225 | * | ||
| 226 | * Else (if long is 32 bits and long long is 64 bits) we use second one. | ||
| 227 | */ | ||
| 228 | 233 | ||
| 229 | #if BITS_PER_LONG != 32 || BITS_PER_LONG_LONG != 64 | 234 | /* 0 <= q < 100 */ |
| 230 | 235 | *((u16 *)buf) = decpair[q]; | |
| 231 | /* First algorithm: generic */ | 236 | buf += 2; |
| 237 | return buf; | ||
| 238 | } | ||
| 232 | 239 | ||
| 233 | static | 240 | static noinline_for_stack |
| 234 | char *put_dec(char *buf, unsigned long long n) | 241 | char *put_dec(char *buf, unsigned long long n) |
| 235 | { | 242 | { |
| 236 | if (n >= 100*1000*1000) { | 243 | if (n >= 100*1000*1000) |
| 237 | while (n >= 1000*1000*1000) | 244 | buf = put_dec_full8(buf, do_div(n, 100*1000*1000)); |
| 238 | buf = put_dec_full9(buf, do_div(n, 1000*1000*1000)); | 245 | /* 1 <= n <= 1.6e11 */ |
| 239 | if (n >= 100*1000*1000) | 246 | if (n >= 100*1000*1000) |
| 240 | return put_dec_full9(buf, n); | 247 | buf = put_dec_full8(buf, do_div(n, 100*1000*1000)); |
| 241 | } | 248 | /* 1 <= n < 1e8 */ |
| 242 | return put_dec_trunc8(buf, n); | 249 | return put_dec_trunc8(buf, n); |
| 243 | } | 250 | } |
| 244 | 251 | ||
| 245 | #else | 252 | #elif BITS_PER_LONG == 32 && BITS_PER_LONG_LONG == 64 |
| 246 | 253 | ||
| 247 | /* Second algorithm: valid only for 64-bit long longs */ | 254 | static void |
| 248 | 255 | put_dec_full4(char *buf, unsigned r) | |
| 249 | /* See comment in put_dec_full9 for choice of constants */ | ||
| 250 | static noinline_for_stack | ||
| 251 | void put_dec_full4(char *buf, unsigned q) | ||
| 252 | { | 256 | { |
| 253 | unsigned r; | 257 | unsigned q; |
| 254 | r = (q * 0xccd) >> 15; | 258 | |
| 255 | buf[0] = (q - 10 * r) + '0'; | 259 | /* 0 <= r < 10^4 */ |
| 256 | q = (r * 0xcd) >> 11; | 260 | q = (r * 0x147b) >> 19; |
| 257 | buf[1] = (r - 10 * q) + '0'; | 261 | *((u16 *)buf) = decpair[r - 100*q]; |
| 258 | r = (q * 0xcd) >> 11; | 262 | buf += 2; |
| 259 | buf[2] = (q - 10 * r) + '0'; | 263 | /* 0 <= q < 100 */ |
| 260 | buf[3] = r + '0'; | 264 | *((u16 *)buf) = decpair[q]; |
| 261 | } | 265 | } |
| 262 | 266 | ||
| 263 | /* | 267 | /* |
| @@ -265,9 +269,9 @@ void put_dec_full4(char *buf, unsigned q) | |||
| 265 | * The approximation x/10000 == (x * 0x346DC5D7) >> 43 | 269 | * The approximation x/10000 == (x * 0x346DC5D7) >> 43 |
| 266 | * holds for all x < 1,128,869,999. The largest value this | 270 | * holds for all x < 1,128,869,999. The largest value this |
| 267 | * helper will ever be asked to convert is 1,125,520,955. | 271 | * helper will ever be asked to convert is 1,125,520,955. |
| 268 | * (d1 in the put_dec code, assuming n is all-ones). | 272 | * (second call in the put_dec code, assuming n is all-ones). |
| 269 | */ | 273 | */ |
| 270 | static | 274 | static noinline_for_stack |
| 271 | unsigned put_dec_helper4(char *buf, unsigned x) | 275 | unsigned put_dec_helper4(char *buf, unsigned x) |
| 272 | { | 276 | { |
| 273 | uint32_t q = (x * (uint64_t)0x346DC5D7) >> 43; | 277 | uint32_t q = (x * (uint64_t)0x346DC5D7) >> 43; |
| @@ -294,6 +298,8 @@ char *put_dec(char *buf, unsigned long long n) | |||
| 294 | d2 = (h ) & 0xffff; | 298 | d2 = (h ) & 0xffff; |
| 295 | d3 = (h >> 16); /* implicit "& 0xffff" */ | 299 | d3 = (h >> 16); /* implicit "& 0xffff" */ |
| 296 | 300 | ||
| 301 | /* n = 2^48 d3 + 2^32 d2 + 2^16 d1 + d0 | ||
| 302 | = 281_4749_7671_0656 d3 + 42_9496_7296 d2 + 6_5536 d1 + d0 */ | ||
| 297 | q = 656 * d3 + 7296 * d2 + 5536 * d1 + ((uint32_t)n & 0xffff); | 303 | q = 656 * d3 + 7296 * d2 + 5536 * d1 + ((uint32_t)n & 0xffff); |
| 298 | q = put_dec_helper4(buf, q); | 304 | q = put_dec_helper4(buf, q); |
| 299 | 305 | ||
| @@ -323,7 +329,8 @@ char *put_dec(char *buf, unsigned long long n) | |||
| 323 | */ | 329 | */ |
| 324 | int num_to_str(char *buf, int size, unsigned long long num) | 330 | int num_to_str(char *buf, int size, unsigned long long num) |
| 325 | { | 331 | { |
| 326 | char tmp[sizeof(num) * 3]; | 332 | /* put_dec requires 2-byte alignment of the buffer. */ |
| 333 | char tmp[sizeof(num) * 3] __aligned(2); | ||
| 327 | int idx, len; | 334 | int idx, len; |
| 328 | 335 | ||
| 329 | /* put_dec() may work incorrectly for num = 0 (generate "", not "0") */ | 336 | /* put_dec() may work incorrectly for num = 0 (generate "", not "0") */ |
| @@ -384,7 +391,8 @@ static noinline_for_stack | |||
| 384 | char *number(char *buf, char *end, unsigned long long num, | 391 | char *number(char *buf, char *end, unsigned long long num, |
| 385 | struct printf_spec spec) | 392 | struct printf_spec spec) |
| 386 | { | 393 | { |
| 387 | char tmp[3 * sizeof(num)]; | 394 | /* put_dec requires 2-byte alignment of the buffer. */ |
| 395 | char tmp[3 * sizeof(num)] __aligned(2); | ||
| 388 | char sign; | 396 | char sign; |
| 389 | char locase; | 397 | char locase; |
| 390 | int need_pfx = ((spec.flags & SPECIAL) && spec.base != 10); | 398 | int need_pfx = ((spec.flags & SPECIAL) && spec.base != 10); |
| @@ -944,7 +952,7 @@ char *ip4_string(char *p, const u8 *addr, const char *fmt) | |||
| 944 | break; | 952 | break; |
| 945 | } | 953 | } |
| 946 | for (i = 0; i < 4; i++) { | 954 | for (i = 0; i < 4; i++) { |
| 947 | char temp[3]; /* hold each IP quad in reverse order */ | 955 | char temp[4] __aligned(2); /* hold each IP quad in reverse order */ |
| 948 | int digits = put_dec_trunc8(temp, addr[index]) - temp; | 956 | int digits = put_dec_trunc8(temp, addr[index]) - temp; |
| 949 | if (leading_zeros) { | 957 | if (leading_zeros) { |
| 950 | if (digits < 3) | 958 | if (digits < 3) |
