diff options
author | Jiri Kosina <jkosina@suse.cz> | 2013-01-29 04:48:30 -0500 |
---|---|---|
committer | Jiri Kosina <jkosina@suse.cz> | 2013-01-29 04:48:30 -0500 |
commit | 617677295b53a40d0e54aac4cbbc216ffbc755dd (patch) | |
tree | 51b9e87213243ed5efff252c8e8d8fec4eebc588 /lib | |
parent | 5c8d1b68e01a144813e38795fe6dbe7ebb506131 (diff) | |
parent | 6abb7c25775b7fb2225ad0508236d63ca710e65f (diff) |
Merge branch 'master' into for-next
Conflicts:
drivers/devfreq/exynos4_bus.c
Sync with Linus' tree to be able to apply patches that are
against newer code (mvneta).
Diffstat (limited to 'lib')
34 files changed, 1697 insertions, 434 deletions
diff --git a/lib/Kconfig b/lib/Kconfig index 4b31a46fb307..75cdb77fa49d 100644 --- a/lib/Kconfig +++ b/lib/Kconfig | |||
@@ -42,6 +42,9 @@ config GENERIC_IO | |||
42 | config STMP_DEVICE | 42 | config STMP_DEVICE |
43 | bool | 43 | bool |
44 | 44 | ||
45 | config PERCPU_RWSEM | ||
46 | boolean | ||
47 | |||
45 | config CRC_CCITT | 48 | config CRC_CCITT |
46 | tristate "CRC-CCITT functions" | 49 | tristate "CRC-CCITT functions" |
47 | help | 50 | help |
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index cba01d71db86..67604e599384 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug | |||
@@ -134,7 +134,7 @@ config DEBUG_SECTION_MISMATCH | |||
134 | any use of code/data previously in these sections would | 134 | any use of code/data previously in these sections would |
135 | most likely result in an oops. | 135 | most likely result in an oops. |
136 | In the code, functions and variables are annotated with | 136 | In the code, functions and variables are annotated with |
137 | __init, __devinit, etc. (see the full list in include/linux/init.h), | 137 | __init, __cpuinit, etc. (see the full list in include/linux/init.h), |
138 | which results in the code/data being placed in specific sections. | 138 | which results in the code/data being placed in specific sections. |
139 | The section mismatch analysis is always performed after a full | 139 | The section mismatch analysis is always performed after a full |
140 | kernel build, and enabling this option causes the following | 140 | kernel build, and enabling this option causes the following |
@@ -972,7 +972,7 @@ config RCU_CPU_STALL_TIMEOUT | |||
972 | int "RCU CPU stall timeout in seconds" | 972 | int "RCU CPU stall timeout in seconds" |
973 | depends on TREE_RCU || TREE_PREEMPT_RCU | 973 | depends on TREE_RCU || TREE_PREEMPT_RCU |
974 | range 3 300 | 974 | range 3 300 |
975 | default 60 | 975 | default 21 |
976 | help | 976 | help |
977 | If a given RCU grace period extends more than the specified | 977 | If a given RCU grace period extends more than the specified |
978 | number of seconds, a CPU stall warning is printed. If the | 978 | number of seconds, a CPU stall warning is printed. If the |
@@ -1192,14 +1192,14 @@ config MEMORY_NOTIFIER_ERROR_INJECT | |||
1192 | 1192 | ||
1193 | If unsure, say N. | 1193 | If unsure, say N. |
1194 | 1194 | ||
1195 | config PSERIES_RECONFIG_NOTIFIER_ERROR_INJECT | 1195 | config OF_RECONFIG_NOTIFIER_ERROR_INJECT |
1196 | tristate "pSeries reconfig notifier error injection module" | 1196 | tristate "OF reconfig notifier error injection module" |
1197 | depends on PPC_PSERIES && NOTIFIER_ERROR_INJECTION | 1197 | depends on OF_DYNAMIC && NOTIFIER_ERROR_INJECTION |
1198 | help | 1198 | help |
1199 | This option provides the ability to inject artificial errors to | 1199 | This option provides the ability to inject artificial errors to |
1200 | pSeries reconfig notifier chain callbacks. It is controlled | 1200 | OF reconfig notifier chain callbacks. It is controlled |
1201 | through debugfs interface under | 1201 | through debugfs interface under |
1202 | /sys/kernel/debug/notifier-error-inject/pSeries-reconfig/ | 1202 | /sys/kernel/debug/notifier-error-inject/OF-reconfig/ |
1203 | 1203 | ||
1204 | If the notifier call chain should be failed with some events | 1204 | If the notifier call chain should be failed with some events |
1205 | notified, write the error code to "actions/<notifier event>/error". | 1205 | notified, write the error code to "actions/<notifier event>/error". |
diff --git a/lib/Makefile b/lib/Makefile index 821a16229111..02ed6c04cd7d 100644 --- a/lib/Makefile +++ b/lib/Makefile | |||
@@ -12,7 +12,8 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ | |||
12 | idr.o int_sqrt.o extable.o \ | 12 | idr.o int_sqrt.o extable.o \ |
13 | sha1.o md5.o irq_regs.o reciprocal_div.o argv_split.o \ | 13 | sha1.o md5.o irq_regs.o reciprocal_div.o argv_split.o \ |
14 | proportions.o flex_proportions.o prio_heap.o ratelimit.o show_mem.o \ | 14 | proportions.o flex_proportions.o prio_heap.o ratelimit.o show_mem.o \ |
15 | is_single_threaded.o plist.o decompress.o | 15 | is_single_threaded.o plist.o decompress.o kobject_uevent.o \ |
16 | earlycpio.o | ||
16 | 17 | ||
17 | lib-$(CONFIG_MMU) += ioremap.o | 18 | lib-$(CONFIG_MMU) += ioremap.o |
18 | lib-$(CONFIG_SMP) += cpumask.o | 19 | lib-$(CONFIG_SMP) += cpumask.o |
@@ -31,7 +32,6 @@ CFLAGS_kobject.o += -DDEBUG | |||
31 | CFLAGS_kobject_uevent.o += -DDEBUG | 32 | CFLAGS_kobject_uevent.o += -DDEBUG |
32 | endif | 33 | endif |
33 | 34 | ||
34 | lib-$(CONFIG_HOTPLUG) += kobject_uevent.o | ||
35 | obj-$(CONFIG_GENERIC_IOMAP) += iomap.o | 35 | obj-$(CONFIG_GENERIC_IOMAP) += iomap.o |
36 | obj-$(CONFIG_GENERIC_PCI_IOMAP) += pci_iomap.o | 36 | obj-$(CONFIG_GENERIC_PCI_IOMAP) += pci_iomap.o |
37 | obj-$(CONFIG_HAS_IOMEM) += iomap_copy.o devres.o | 37 | obj-$(CONFIG_HAS_IOMEM) += iomap_copy.o devres.o |
@@ -40,6 +40,7 @@ obj-$(CONFIG_DEBUG_LOCKING_API_SELFTESTS) += locking-selftest.o | |||
40 | obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o | 40 | obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o |
41 | lib-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o | 41 | lib-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o |
42 | lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o | 42 | lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o |
43 | lib-$(CONFIG_PERCPU_RWSEM) += percpu-rwsem.o | ||
43 | 44 | ||
44 | CFLAGS_hweight.o = $(subst $(quote),,$(CONFIG_ARCH_HWEIGHT_CFLAGS)) | 45 | CFLAGS_hweight.o = $(subst $(quote),,$(CONFIG_ARCH_HWEIGHT_CFLAGS)) |
45 | obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o | 46 | obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o |
@@ -94,8 +95,8 @@ obj-$(CONFIG_NOTIFIER_ERROR_INJECTION) += notifier-error-inject.o | |||
94 | obj-$(CONFIG_CPU_NOTIFIER_ERROR_INJECT) += cpu-notifier-error-inject.o | 95 | obj-$(CONFIG_CPU_NOTIFIER_ERROR_INJECT) += cpu-notifier-error-inject.o |
95 | obj-$(CONFIG_PM_NOTIFIER_ERROR_INJECT) += pm-notifier-error-inject.o | 96 | obj-$(CONFIG_PM_NOTIFIER_ERROR_INJECT) += pm-notifier-error-inject.o |
96 | obj-$(CONFIG_MEMORY_NOTIFIER_ERROR_INJECT) += memory-notifier-error-inject.o | 97 | obj-$(CONFIG_MEMORY_NOTIFIER_ERROR_INJECT) += memory-notifier-error-inject.o |
97 | obj-$(CONFIG_PSERIES_RECONFIG_NOTIFIER_ERROR_INJECT) += \ | 98 | obj-$(CONFIG_OF_RECONFIG_NOTIFIER_ERROR_INJECT) += \ |
98 | pSeries-reconfig-notifier-error-inject.o | 99 | of-reconfig-notifier-error-inject.o |
99 | 100 | ||
100 | lib-$(CONFIG_GENERIC_BUG) += bug.o | 101 | lib-$(CONFIG_GENERIC_BUG) += bug.o |
101 | 102 | ||
@@ -163,7 +164,7 @@ $(obj)/crc32table.h: $(obj)/gen_crc32table | |||
163 | # | 164 | # |
164 | obj-$(CONFIG_OID_REGISTRY) += oid_registry.o | 165 | obj-$(CONFIG_OID_REGISTRY) += oid_registry.o |
165 | 166 | ||
166 | $(obj)/oid_registry.c: $(obj)/oid_registry_data.c | 167 | $(obj)/oid_registry.o: $(obj)/oid_registry_data.c |
167 | 168 | ||
168 | $(obj)/oid_registry_data.c: $(srctree)/include/linux/oid_registry.h \ | 169 | $(obj)/oid_registry_data.c: $(srctree)/include/linux/oid_registry.h \ |
169 | $(src)/build_OID_registry | 170 | $(src)/build_OID_registry |
diff --git a/lib/asn1_decoder.c b/lib/asn1_decoder.c index de2c8b5a715b..11b9b01fda6b 100644 --- a/lib/asn1_decoder.c +++ b/lib/asn1_decoder.c | |||
@@ -81,7 +81,7 @@ next_tag: | |||
81 | goto next_tag; | 81 | goto next_tag; |
82 | } | 82 | } |
83 | 83 | ||
84 | if (unlikely((tag & 0x1f) == 0x1f)) { | 84 | if (unlikely((tag & 0x1f) == ASN1_LONG_TAG)) { |
85 | do { | 85 | do { |
86 | if (unlikely(datalen - dp < 2)) | 86 | if (unlikely(datalen - dp < 2)) |
87 | goto data_overrun_error; | 87 | goto data_overrun_error; |
@@ -91,12 +91,12 @@ next_tag: | |||
91 | 91 | ||
92 | /* Extract the length */ | 92 | /* Extract the length */ |
93 | len = data[dp++]; | 93 | len = data[dp++]; |
94 | if (len < 0x7f) { | 94 | if (len <= 0x7f) { |
95 | dp += len; | 95 | dp += len; |
96 | goto next_tag; | 96 | goto next_tag; |
97 | } | 97 | } |
98 | 98 | ||
99 | if (unlikely(len == 0x80)) { | 99 | if (unlikely(len == ASN1_INDEFINITE_LENGTH)) { |
100 | /* Indefinite length */ | 100 | /* Indefinite length */ |
101 | if (unlikely((tag & ASN1_CONS_BIT) == ASN1_PRIM << 5)) | 101 | if (unlikely((tag & ASN1_CONS_BIT) == ASN1_PRIM << 5)) |
102 | goto indefinite_len_primitive; | 102 | goto indefinite_len_primitive; |
@@ -222,7 +222,7 @@ next_op: | |||
222 | if (unlikely(dp >= datalen - 1)) | 222 | if (unlikely(dp >= datalen - 1)) |
223 | goto data_overrun_error; | 223 | goto data_overrun_error; |
224 | tag = data[dp++]; | 224 | tag = data[dp++]; |
225 | if (unlikely((tag & 0x1f) == 0x1f)) | 225 | if (unlikely((tag & 0x1f) == ASN1_LONG_TAG)) |
226 | goto long_tag_not_supported; | 226 | goto long_tag_not_supported; |
227 | 227 | ||
228 | if (op & ASN1_OP_MATCH__ANY) { | 228 | if (op & ASN1_OP_MATCH__ANY) { |
@@ -254,7 +254,7 @@ next_op: | |||
254 | 254 | ||
255 | len = data[dp++]; | 255 | len = data[dp++]; |
256 | if (len > 0x7f) { | 256 | if (len > 0x7f) { |
257 | if (unlikely(len == 0x80)) { | 257 | if (unlikely(len == ASN1_INDEFINITE_LENGTH)) { |
258 | /* Indefinite length */ | 258 | /* Indefinite length */ |
259 | if (unlikely(!(tag & ASN1_CONS_BIT))) | 259 | if (unlikely(!(tag & ASN1_CONS_BIT))) |
260 | goto indefinite_len_primitive; | 260 | goto indefinite_len_primitive; |
diff --git a/lib/atomic64.c b/lib/atomic64.c index 978537809d84..08a4f068e61e 100644 --- a/lib/atomic64.c +++ b/lib/atomic64.c | |||
@@ -31,7 +31,11 @@ | |||
31 | static union { | 31 | static union { |
32 | raw_spinlock_t lock; | 32 | raw_spinlock_t lock; |
33 | char pad[L1_CACHE_BYTES]; | 33 | char pad[L1_CACHE_BYTES]; |
34 | } atomic64_lock[NR_LOCKS] __cacheline_aligned_in_smp; | 34 | } atomic64_lock[NR_LOCKS] __cacheline_aligned_in_smp = { |
35 | [0 ... (NR_LOCKS - 1)] = { | ||
36 | .lock = __RAW_SPIN_LOCK_UNLOCKED(atomic64_lock.lock), | ||
37 | }, | ||
38 | }; | ||
35 | 39 | ||
36 | static inline raw_spinlock_t *lock_addr(const atomic64_t *v) | 40 | static inline raw_spinlock_t *lock_addr(const atomic64_t *v) |
37 | { | 41 | { |
@@ -173,14 +177,3 @@ int atomic64_add_unless(atomic64_t *v, long long a, long long u) | |||
173 | return ret; | 177 | return ret; |
174 | } | 178 | } |
175 | EXPORT_SYMBOL(atomic64_add_unless); | 179 | EXPORT_SYMBOL(atomic64_add_unless); |
176 | |||
177 | static int init_atomic64_lock(void) | ||
178 | { | ||
179 | int i; | ||
180 | |||
181 | for (i = 0; i < NR_LOCKS; ++i) | ||
182 | raw_spin_lock_init(&atomic64_lock[i].lock); | ||
183 | return 0; | ||
184 | } | ||
185 | |||
186 | pure_initcall(init_atomic64_lock); | ||
@@ -55,6 +55,7 @@ static inline unsigned long bug_addr(const struct bug_entry *bug) | |||
55 | } | 55 | } |
56 | 56 | ||
57 | #ifdef CONFIG_MODULES | 57 | #ifdef CONFIG_MODULES |
58 | /* Updates are protected by module mutex */ | ||
58 | static LIST_HEAD(module_bug_list); | 59 | static LIST_HEAD(module_bug_list); |
59 | 60 | ||
60 | static const struct bug_entry *module_find_bug(unsigned long bugaddr) | 61 | static const struct bug_entry *module_find_bug(unsigned long bugaddr) |
diff --git a/lib/cpu_rmap.c b/lib/cpu_rmap.c index 145dec5267c9..5fbed5caba6e 100644 --- a/lib/cpu_rmap.c +++ b/lib/cpu_rmap.c | |||
@@ -45,6 +45,7 @@ struct cpu_rmap *alloc_cpu_rmap(unsigned int size, gfp_t flags) | |||
45 | if (!rmap) | 45 | if (!rmap) |
46 | return NULL; | 46 | return NULL; |
47 | 47 | ||
48 | kref_init(&rmap->refcount); | ||
48 | rmap->obj = (void **)((char *)rmap + obj_offset); | 49 | rmap->obj = (void **)((char *)rmap + obj_offset); |
49 | 50 | ||
50 | /* Initially assign CPUs to objects on a rota, since we have | 51 | /* Initially assign CPUs to objects on a rota, since we have |
@@ -63,6 +64,35 @@ struct cpu_rmap *alloc_cpu_rmap(unsigned int size, gfp_t flags) | |||
63 | } | 64 | } |
64 | EXPORT_SYMBOL(alloc_cpu_rmap); | 65 | EXPORT_SYMBOL(alloc_cpu_rmap); |
65 | 66 | ||
67 | /** | ||
68 | * cpu_rmap_release - internal reclaiming helper called from kref_put | ||
69 | * @ref: kref to struct cpu_rmap | ||
70 | */ | ||
71 | static void cpu_rmap_release(struct kref *ref) | ||
72 | { | ||
73 | struct cpu_rmap *rmap = container_of(ref, struct cpu_rmap, refcount); | ||
74 | kfree(rmap); | ||
75 | } | ||
76 | |||
77 | /** | ||
78 | * cpu_rmap_get - internal helper to get new ref on a cpu_rmap | ||
79 | * @rmap: reverse-map allocated with alloc_cpu_rmap() | ||
80 | */ | ||
81 | static inline void cpu_rmap_get(struct cpu_rmap *rmap) | ||
82 | { | ||
83 | kref_get(&rmap->refcount); | ||
84 | } | ||
85 | |||
86 | /** | ||
87 | * cpu_rmap_put - release ref on a cpu_rmap | ||
88 | * @rmap: reverse-map allocated with alloc_cpu_rmap() | ||
89 | */ | ||
90 | int cpu_rmap_put(struct cpu_rmap *rmap) | ||
91 | { | ||
92 | return kref_put(&rmap->refcount, cpu_rmap_release); | ||
93 | } | ||
94 | EXPORT_SYMBOL(cpu_rmap_put); | ||
95 | |||
66 | /* Reevaluate nearest object for given CPU, comparing with the given | 96 | /* Reevaluate nearest object for given CPU, comparing with the given |
67 | * neighbours at the given distance. | 97 | * neighbours at the given distance. |
68 | */ | 98 | */ |
@@ -197,8 +227,7 @@ struct irq_glue { | |||
197 | * free_irq_cpu_rmap - free a CPU affinity reverse-map used for IRQs | 227 | * free_irq_cpu_rmap - free a CPU affinity reverse-map used for IRQs |
198 | * @rmap: Reverse-map allocated with alloc_irq_cpu_map(), or %NULL | 228 | * @rmap: Reverse-map allocated with alloc_irq_cpu_map(), or %NULL |
199 | * | 229 | * |
200 | * Must be called in process context, before freeing the IRQs, and | 230 | * Must be called in process context, before freeing the IRQs. |
201 | * without holding any locks required by global workqueue items. | ||
202 | */ | 231 | */ |
203 | void free_irq_cpu_rmap(struct cpu_rmap *rmap) | 232 | void free_irq_cpu_rmap(struct cpu_rmap *rmap) |
204 | { | 233 | { |
@@ -212,12 +241,18 @@ void free_irq_cpu_rmap(struct cpu_rmap *rmap) | |||
212 | glue = rmap->obj[index]; | 241 | glue = rmap->obj[index]; |
213 | irq_set_affinity_notifier(glue->notify.irq, NULL); | 242 | irq_set_affinity_notifier(glue->notify.irq, NULL); |
214 | } | 243 | } |
215 | irq_run_affinity_notifiers(); | ||
216 | 244 | ||
217 | kfree(rmap); | 245 | cpu_rmap_put(rmap); |
218 | } | 246 | } |
219 | EXPORT_SYMBOL(free_irq_cpu_rmap); | 247 | EXPORT_SYMBOL(free_irq_cpu_rmap); |
220 | 248 | ||
249 | /** | ||
250 | * irq_cpu_rmap_notify - callback for IRQ subsystem when IRQ affinity updated | ||
251 | * @notify: struct irq_affinity_notify passed by irq/manage.c | ||
252 | * @mask: cpu mask for new SMP affinity | ||
253 | * | ||
254 | * This is executed in workqueue context. | ||
255 | */ | ||
221 | static void | 256 | static void |
222 | irq_cpu_rmap_notify(struct irq_affinity_notify *notify, const cpumask_t *mask) | 257 | irq_cpu_rmap_notify(struct irq_affinity_notify *notify, const cpumask_t *mask) |
223 | { | 258 | { |
@@ -230,10 +265,16 @@ irq_cpu_rmap_notify(struct irq_affinity_notify *notify, const cpumask_t *mask) | |||
230 | pr_warning("irq_cpu_rmap_notify: update failed: %d\n", rc); | 265 | pr_warning("irq_cpu_rmap_notify: update failed: %d\n", rc); |
231 | } | 266 | } |
232 | 267 | ||
268 | /** | ||
269 | * irq_cpu_rmap_release - reclaiming callback for IRQ subsystem | ||
270 | * @ref: kref to struct irq_affinity_notify passed by irq/manage.c | ||
271 | */ | ||
233 | static void irq_cpu_rmap_release(struct kref *ref) | 272 | static void irq_cpu_rmap_release(struct kref *ref) |
234 | { | 273 | { |
235 | struct irq_glue *glue = | 274 | struct irq_glue *glue = |
236 | container_of(ref, struct irq_glue, notify.kref); | 275 | container_of(ref, struct irq_glue, notify.kref); |
276 | |||
277 | cpu_rmap_put(glue->rmap); | ||
237 | kfree(glue); | 278 | kfree(glue); |
238 | } | 279 | } |
239 | 280 | ||
@@ -258,10 +299,13 @@ int irq_cpu_rmap_add(struct cpu_rmap *rmap, int irq) | |||
258 | glue->notify.notify = irq_cpu_rmap_notify; | 299 | glue->notify.notify = irq_cpu_rmap_notify; |
259 | glue->notify.release = irq_cpu_rmap_release; | 300 | glue->notify.release = irq_cpu_rmap_release; |
260 | glue->rmap = rmap; | 301 | glue->rmap = rmap; |
302 | cpu_rmap_get(rmap); | ||
261 | glue->index = cpu_rmap_add(rmap, glue); | 303 | glue->index = cpu_rmap_add(rmap, glue); |
262 | rc = irq_set_affinity_notifier(irq, &glue->notify); | 304 | rc = irq_set_affinity_notifier(irq, &glue->notify); |
263 | if (rc) | 305 | if (rc) { |
306 | cpu_rmap_put(glue->rmap); | ||
264 | kfree(glue); | 307 | kfree(glue); |
308 | } | ||
265 | return rc; | 309 | return rc; |
266 | } | 310 | } |
267 | EXPORT_SYMBOL(irq_cpu_rmap_add); | 311 | EXPORT_SYMBOL(irq_cpu_rmap_add); |
diff --git a/lib/cpumask.c b/lib/cpumask.c index 402a54ac35cb..d327b87c99b7 100644 --- a/lib/cpumask.c +++ b/lib/cpumask.c | |||
@@ -161,6 +161,6 @@ EXPORT_SYMBOL(free_cpumask_var); | |||
161 | */ | 161 | */ |
162 | void __init free_bootmem_cpumask_var(cpumask_var_t mask) | 162 | void __init free_bootmem_cpumask_var(cpumask_var_t mask) |
163 | { | 163 | { |
164 | free_bootmem((unsigned long)mask, cpumask_size()); | 164 | free_bootmem(__pa(mask), cpumask_size()); |
165 | } | 165 | } |
166 | #endif | 166 | #endif |
diff --git a/lib/dma-debug.c b/lib/dma-debug.c index d84beb994f36..5e396accd3d0 100644 --- a/lib/dma-debug.c +++ b/lib/dma-debug.c | |||
@@ -45,6 +45,12 @@ enum { | |||
45 | dma_debug_coherent, | 45 | dma_debug_coherent, |
46 | }; | 46 | }; |
47 | 47 | ||
48 | enum map_err_types { | ||
49 | MAP_ERR_CHECK_NOT_APPLICABLE, | ||
50 | MAP_ERR_NOT_CHECKED, | ||
51 | MAP_ERR_CHECKED, | ||
52 | }; | ||
53 | |||
48 | #define DMA_DEBUG_STACKTRACE_ENTRIES 5 | 54 | #define DMA_DEBUG_STACKTRACE_ENTRIES 5 |
49 | 55 | ||
50 | struct dma_debug_entry { | 56 | struct dma_debug_entry { |
@@ -57,6 +63,7 @@ struct dma_debug_entry { | |||
57 | int direction; | 63 | int direction; |
58 | int sg_call_ents; | 64 | int sg_call_ents; |
59 | int sg_mapped_ents; | 65 | int sg_mapped_ents; |
66 | enum map_err_types map_err_type; | ||
60 | #ifdef CONFIG_STACKTRACE | 67 | #ifdef CONFIG_STACKTRACE |
61 | struct stack_trace stacktrace; | 68 | struct stack_trace stacktrace; |
62 | unsigned long st_entries[DMA_DEBUG_STACKTRACE_ENTRIES]; | 69 | unsigned long st_entries[DMA_DEBUG_STACKTRACE_ENTRIES]; |
@@ -114,6 +121,12 @@ static struct device_driver *current_driver __read_mostly; | |||
114 | 121 | ||
115 | static DEFINE_RWLOCK(driver_name_lock); | 122 | static DEFINE_RWLOCK(driver_name_lock); |
116 | 123 | ||
124 | static const char *const maperr2str[] = { | ||
125 | [MAP_ERR_CHECK_NOT_APPLICABLE] = "dma map error check not applicable", | ||
126 | [MAP_ERR_NOT_CHECKED] = "dma map error not checked", | ||
127 | [MAP_ERR_CHECKED] = "dma map error checked", | ||
128 | }; | ||
129 | |||
117 | static const char *type2name[4] = { "single", "page", | 130 | static const char *type2name[4] = { "single", "page", |
118 | "scather-gather", "coherent" }; | 131 | "scather-gather", "coherent" }; |
119 | 132 | ||
@@ -376,11 +389,12 @@ void debug_dma_dump_mappings(struct device *dev) | |||
376 | list_for_each_entry(entry, &bucket->list, list) { | 389 | list_for_each_entry(entry, &bucket->list, list) { |
377 | if (!dev || dev == entry->dev) { | 390 | if (!dev || dev == entry->dev) { |
378 | dev_info(entry->dev, | 391 | dev_info(entry->dev, |
379 | "%s idx %d P=%Lx D=%Lx L=%Lx %s\n", | 392 | "%s idx %d P=%Lx D=%Lx L=%Lx %s %s\n", |
380 | type2name[entry->type], idx, | 393 | type2name[entry->type], idx, |
381 | (unsigned long long)entry->paddr, | 394 | (unsigned long long)entry->paddr, |
382 | entry->dev_addr, entry->size, | 395 | entry->dev_addr, entry->size, |
383 | dir2name[entry->direction]); | 396 | dir2name[entry->direction], |
397 | maperr2str[entry->map_err_type]); | ||
384 | } | 398 | } |
385 | } | 399 | } |
386 | 400 | ||
@@ -844,16 +858,16 @@ static void check_unmap(struct dma_debug_entry *ref) | |||
844 | struct hash_bucket *bucket; | 858 | struct hash_bucket *bucket; |
845 | unsigned long flags; | 859 | unsigned long flags; |
846 | 860 | ||
847 | if (dma_mapping_error(ref->dev, ref->dev_addr)) { | ||
848 | err_printk(ref->dev, NULL, "DMA-API: device driver tries " | ||
849 | "to free an invalid DMA memory address\n"); | ||
850 | return; | ||
851 | } | ||
852 | |||
853 | bucket = get_hash_bucket(ref, &flags); | 861 | bucket = get_hash_bucket(ref, &flags); |
854 | entry = bucket_find_exact(bucket, ref); | 862 | entry = bucket_find_exact(bucket, ref); |
855 | 863 | ||
856 | if (!entry) { | 864 | if (!entry) { |
865 | if (dma_mapping_error(ref->dev, ref->dev_addr)) { | ||
866 | err_printk(ref->dev, NULL, | ||
867 | "DMA-API: device driver tries " | ||
868 | "to free an invalid DMA memory address\n"); | ||
869 | return; | ||
870 | } | ||
857 | err_printk(ref->dev, NULL, "DMA-API: device driver tries " | 871 | err_printk(ref->dev, NULL, "DMA-API: device driver tries " |
858 | "to free DMA memory it has not allocated " | 872 | "to free DMA memory it has not allocated " |
859 | "[device address=0x%016llx] [size=%llu bytes]\n", | 873 | "[device address=0x%016llx] [size=%llu bytes]\n", |
@@ -910,6 +924,15 @@ static void check_unmap(struct dma_debug_entry *ref) | |||
910 | dir2name[ref->direction]); | 924 | dir2name[ref->direction]); |
911 | } | 925 | } |
912 | 926 | ||
927 | if (entry->map_err_type == MAP_ERR_NOT_CHECKED) { | ||
928 | err_printk(ref->dev, entry, | ||
929 | "DMA-API: device driver failed to check map error" | ||
930 | "[device address=0x%016llx] [size=%llu bytes] " | ||
931 | "[mapped as %s]", | ||
932 | ref->dev_addr, ref->size, | ||
933 | type2name[entry->type]); | ||
934 | } | ||
935 | |||
913 | hash_bucket_del(entry); | 936 | hash_bucket_del(entry); |
914 | dma_entry_free(entry); | 937 | dma_entry_free(entry); |
915 | 938 | ||
@@ -1017,7 +1040,7 @@ void debug_dma_map_page(struct device *dev, struct page *page, size_t offset, | |||
1017 | if (unlikely(global_disable)) | 1040 | if (unlikely(global_disable)) |
1018 | return; | 1041 | return; |
1019 | 1042 | ||
1020 | if (unlikely(dma_mapping_error(dev, dma_addr))) | 1043 | if (dma_mapping_error(dev, dma_addr)) |
1021 | return; | 1044 | return; |
1022 | 1045 | ||
1023 | entry = dma_entry_alloc(); | 1046 | entry = dma_entry_alloc(); |
@@ -1030,6 +1053,7 @@ void debug_dma_map_page(struct device *dev, struct page *page, size_t offset, | |||
1030 | entry->dev_addr = dma_addr; | 1053 | entry->dev_addr = dma_addr; |
1031 | entry->size = size; | 1054 | entry->size = size; |
1032 | entry->direction = direction; | 1055 | entry->direction = direction; |
1056 | entry->map_err_type = MAP_ERR_NOT_CHECKED; | ||
1033 | 1057 | ||
1034 | if (map_single) | 1058 | if (map_single) |
1035 | entry->type = dma_debug_single; | 1059 | entry->type = dma_debug_single; |
@@ -1045,6 +1069,30 @@ void debug_dma_map_page(struct device *dev, struct page *page, size_t offset, | |||
1045 | } | 1069 | } |
1046 | EXPORT_SYMBOL(debug_dma_map_page); | 1070 | EXPORT_SYMBOL(debug_dma_map_page); |
1047 | 1071 | ||
1072 | void debug_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) | ||
1073 | { | ||
1074 | struct dma_debug_entry ref; | ||
1075 | struct dma_debug_entry *entry; | ||
1076 | struct hash_bucket *bucket; | ||
1077 | unsigned long flags; | ||
1078 | |||
1079 | if (unlikely(global_disable)) | ||
1080 | return; | ||
1081 | |||
1082 | ref.dev = dev; | ||
1083 | ref.dev_addr = dma_addr; | ||
1084 | bucket = get_hash_bucket(&ref, &flags); | ||
1085 | entry = bucket_find_exact(bucket, &ref); | ||
1086 | |||
1087 | if (!entry) | ||
1088 | goto out; | ||
1089 | |||
1090 | entry->map_err_type = MAP_ERR_CHECKED; | ||
1091 | out: | ||
1092 | put_hash_bucket(bucket, &flags); | ||
1093 | } | ||
1094 | EXPORT_SYMBOL(debug_dma_mapping_error); | ||
1095 | |||
1048 | void debug_dma_unmap_page(struct device *dev, dma_addr_t addr, | 1096 | void debug_dma_unmap_page(struct device *dev, dma_addr_t addr, |
1049 | size_t size, int direction, bool map_single) | 1097 | size_t size, int direction, bool map_single) |
1050 | { | 1098 | { |
diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index e7f7d993357a..1db1fc660538 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c | |||
@@ -62,13 +62,6 @@ static LIST_HEAD(ddebug_tables); | |||
62 | static int verbose = 0; | 62 | static int verbose = 0; |
63 | module_param(verbose, int, 0644); | 63 | module_param(verbose, int, 0644); |
64 | 64 | ||
65 | /* Return the last part of a pathname */ | ||
66 | static inline const char *basename(const char *path) | ||
67 | { | ||
68 | const char *tail = strrchr(path, '/'); | ||
69 | return tail ? tail+1 : path; | ||
70 | } | ||
71 | |||
72 | /* Return the path relative to source root */ | 65 | /* Return the path relative to source root */ |
73 | static inline const char *trim_prefix(const char *path) | 66 | static inline const char *trim_prefix(const char *path) |
74 | { | 67 | { |
@@ -154,7 +147,7 @@ static int ddebug_change(const struct ddebug_query *query, | |||
154 | /* match against the source filename */ | 147 | /* match against the source filename */ |
155 | if (query->filename && | 148 | if (query->filename && |
156 | strcmp(query->filename, dp->filename) && | 149 | strcmp(query->filename, dp->filename) && |
157 | strcmp(query->filename, basename(dp->filename)) && | 150 | strcmp(query->filename, kbasename(dp->filename)) && |
158 | strcmp(query->filename, trim_prefix(dp->filename))) | 151 | strcmp(query->filename, trim_prefix(dp->filename))) |
159 | continue; | 152 | continue; |
160 | 153 | ||
diff --git a/lib/earlycpio.c b/lib/earlycpio.c new file mode 100644 index 000000000000..8078ef49cb79 --- /dev/null +++ b/lib/earlycpio.c | |||
@@ -0,0 +1,145 @@ | |||
1 | /* ----------------------------------------------------------------------- * | ||
2 | * | ||
3 | * Copyright 2012 Intel Corporation; author H. Peter Anvin | ||
4 | * | ||
5 | * This file is part of the Linux kernel, and is made available | ||
6 | * under the terms of the GNU General Public License version 2, as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope it will be useful, but | ||
10 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
12 | * General Public License for more details. | ||
13 | * | ||
14 | * ----------------------------------------------------------------------- */ | ||
15 | |||
16 | /* | ||
17 | * earlycpio.c | ||
18 | * | ||
19 | * Find a specific cpio member; must precede any compressed content. | ||
20 | * This is used to locate data items in the initramfs used by the | ||
21 | * kernel itself during early boot (before the main initramfs is | ||
22 | * decompressed.) It is the responsibility of the initramfs creator | ||
23 | * to ensure that these items are uncompressed at the head of the | ||
24 | * blob. Depending on the boot loader or package tool that may be a | ||
25 | * separate file or part of the same file. | ||
26 | */ | ||
27 | |||
28 | #include <linux/earlycpio.h> | ||
29 | #include <linux/kernel.h> | ||
30 | #include <linux/string.h> | ||
31 | |||
32 | enum cpio_fields { | ||
33 | C_MAGIC, | ||
34 | C_INO, | ||
35 | C_MODE, | ||
36 | C_UID, | ||
37 | C_GID, | ||
38 | C_NLINK, | ||
39 | C_MTIME, | ||
40 | C_FILESIZE, | ||
41 | C_MAJ, | ||
42 | C_MIN, | ||
43 | C_RMAJ, | ||
44 | C_RMIN, | ||
45 | C_NAMESIZE, | ||
46 | C_CHKSUM, | ||
47 | C_NFIELDS | ||
48 | }; | ||
49 | |||
50 | /** | ||
51 | * cpio_data find_cpio_data - Search for files in an uncompressed cpio | ||
52 | * @path: The directory to search for, including a slash at the end | ||
53 | * @data: Pointer to the the cpio archive or a header inside | ||
54 | * @len: Remaining length of the cpio based on data pointer | ||
55 | * @offset: When a matching file is found, this is the offset to the | ||
56 | * beginning of the cpio. It can be used to iterate through | ||
57 | * the cpio to find all files inside of a directory path | ||
58 | * | ||
59 | * @return: struct cpio_data containing the address, length and | ||
60 | * filename (with the directory path cut off) of the found file. | ||
61 | * If you search for a filename and not for files in a directory, | ||
62 | * pass the absolute path of the filename in the cpio and make sure | ||
63 | * the match returned an empty filename string. | ||
64 | */ | ||
65 | |||
66 | struct cpio_data __cpuinit find_cpio_data(const char *path, void *data, | ||
67 | size_t len, long *offset) | ||
68 | { | ||
69 | const size_t cpio_header_len = 8*C_NFIELDS - 2; | ||
70 | struct cpio_data cd = { NULL, 0, "" }; | ||
71 | const char *p, *dptr, *nptr; | ||
72 | unsigned int ch[C_NFIELDS], *chp, v; | ||
73 | unsigned char c, x; | ||
74 | size_t mypathsize = strlen(path); | ||
75 | int i, j; | ||
76 | |||
77 | p = data; | ||
78 | |||
79 | while (len > cpio_header_len) { | ||
80 | if (!*p) { | ||
81 | /* All cpio headers need to be 4-byte aligned */ | ||
82 | p += 4; | ||
83 | len -= 4; | ||
84 | continue; | ||
85 | } | ||
86 | |||
87 | j = 6; /* The magic field is only 6 characters */ | ||
88 | chp = ch; | ||
89 | for (i = C_NFIELDS; i; i--) { | ||
90 | v = 0; | ||
91 | while (j--) { | ||
92 | v <<= 4; | ||
93 | c = *p++; | ||
94 | |||
95 | x = c - '0'; | ||
96 | if (x < 10) { | ||
97 | v += x; | ||
98 | continue; | ||
99 | } | ||
100 | |||
101 | x = (c | 0x20) - 'a'; | ||
102 | if (x < 6) { | ||
103 | v += x + 10; | ||
104 | continue; | ||
105 | } | ||
106 | |||
107 | goto quit; /* Invalid hexadecimal */ | ||
108 | } | ||
109 | *chp++ = v; | ||
110 | j = 8; /* All other fields are 8 characters */ | ||
111 | } | ||
112 | |||
113 | if ((ch[C_MAGIC] - 0x070701) > 1) | ||
114 | goto quit; /* Invalid magic */ | ||
115 | |||
116 | len -= cpio_header_len; | ||
117 | |||
118 | dptr = PTR_ALIGN(p + ch[C_NAMESIZE], 4); | ||
119 | nptr = PTR_ALIGN(dptr + ch[C_FILESIZE], 4); | ||
120 | |||
121 | if (nptr > p + len || dptr < p || nptr < dptr) | ||
122 | goto quit; /* Buffer overrun */ | ||
123 | |||
124 | if ((ch[C_MODE] & 0170000) == 0100000 && | ||
125 | ch[C_NAMESIZE] >= mypathsize && | ||
126 | !memcmp(p, path, mypathsize)) { | ||
127 | *offset = (long)nptr - (long)data; | ||
128 | if (ch[C_NAMESIZE] - mypathsize >= MAX_CPIO_FILE_NAME) { | ||
129 | pr_warn( | ||
130 | "File %s exceeding MAX_CPIO_FILE_NAME [%d]\n", | ||
131 | p, MAX_CPIO_FILE_NAME); | ||
132 | } | ||
133 | strlcpy(cd.name, p + mypathsize, MAX_CPIO_FILE_NAME); | ||
134 | |||
135 | cd.data = (void *)dptr; | ||
136 | cd.size = ch[C_FILESIZE]; | ||
137 | return cd; /* Found it! */ | ||
138 | } | ||
139 | len -= (nptr - p); | ||
140 | p = nptr; | ||
141 | } | ||
142 | |||
143 | quit: | ||
144 | return cd; | ||
145 | } | ||
diff --git a/lib/interval_tree_test_main.c b/lib/interval_tree_test_main.c index b25903987f7a..245900b98c8e 100644 --- a/lib/interval_tree_test_main.c +++ b/lib/interval_tree_test_main.c | |||
@@ -30,7 +30,8 @@ static void init(void) | |||
30 | { | 30 | { |
31 | int i; | 31 | int i; |
32 | for (i = 0; i < NODES; i++) { | 32 | for (i = 0; i < NODES; i++) { |
33 | u32 a = prandom32(&rnd), b = prandom32(&rnd); | 33 | u32 a = prandom_u32_state(&rnd); |
34 | u32 b = prandom_u32_state(&rnd); | ||
34 | if (a <= b) { | 35 | if (a <= b) { |
35 | nodes[i].start = a; | 36 | nodes[i].start = a; |
36 | nodes[i].last = b; | 37 | nodes[i].last = b; |
@@ -40,7 +41,7 @@ static void init(void) | |||
40 | } | 41 | } |
41 | } | 42 | } |
42 | for (i = 0; i < SEARCHES; i++) | 43 | for (i = 0; i < SEARCHES; i++) |
43 | queries[i] = prandom32(&rnd); | 44 | queries[i] = prandom_u32_state(&rnd); |
44 | } | 45 | } |
45 | 46 | ||
46 | static int interval_tree_test_init(void) | 47 | static int interval_tree_test_init(void) |
@@ -51,7 +52,7 @@ static int interval_tree_test_init(void) | |||
51 | 52 | ||
52 | printk(KERN_ALERT "interval tree insert/remove"); | 53 | printk(KERN_ALERT "interval tree insert/remove"); |
53 | 54 | ||
54 | prandom32_seed(&rnd, 3141592653589793238ULL); | 55 | prandom_seed_state(&rnd, 3141592653589793238ULL); |
55 | init(); | 56 | init(); |
56 | 57 | ||
57 | time1 = get_cycles(); | 58 | time1 = get_cycles(); |
diff --git a/lib/kstrtox.c b/lib/kstrtox.c index c3615eab0cc3..f78ae0c0c4e2 100644 --- a/lib/kstrtox.c +++ b/lib/kstrtox.c | |||
@@ -104,6 +104,22 @@ static int _kstrtoull(const char *s, unsigned int base, unsigned long long *res) | |||
104 | return 0; | 104 | return 0; |
105 | } | 105 | } |
106 | 106 | ||
107 | /** | ||
108 | * kstrtoull - convert a string to an unsigned long long | ||
109 | * @s: The start of the string. The string must be null-terminated, and may also | ||
110 | * include a single newline before its terminating null. The first character | ||
111 | * may also be a plus sign, but not a minus sign. | ||
112 | * @base: The number base to use. The maximum supported base is 16. If base is | ||
113 | * given as 0, then the base of the string is automatically detected with the | ||
114 | * conventional semantics - If it begins with 0x the number will be parsed as a | ||
115 | * hexadecimal (case insensitive), if it otherwise begins with 0, it will be | ||
116 | * parsed as an octal number. Otherwise it will be parsed as a decimal. | ||
117 | * @res: Where to write the result of the conversion on success. | ||
118 | * | ||
119 | * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error. | ||
120 | * Used as a replacement for the obsolete simple_strtoull. Return code must | ||
121 | * be checked. | ||
122 | */ | ||
107 | int kstrtoull(const char *s, unsigned int base, unsigned long long *res) | 123 | int kstrtoull(const char *s, unsigned int base, unsigned long long *res) |
108 | { | 124 | { |
109 | if (s[0] == '+') | 125 | if (s[0] == '+') |
@@ -112,6 +128,22 @@ int kstrtoull(const char *s, unsigned int base, unsigned long long *res) | |||
112 | } | 128 | } |
113 | EXPORT_SYMBOL(kstrtoull); | 129 | EXPORT_SYMBOL(kstrtoull); |
114 | 130 | ||
131 | /** | ||
132 | * kstrtoll - convert a string to a long long | ||
133 | * @s: The start of the string. The string must be null-terminated, and may also | ||
134 | * include a single newline before its terminating null. The first character | ||
135 | * may also be a plus sign or a minus sign. | ||
136 | * @base: The number base to use. The maximum supported base is 16. If base is | ||
137 | * given as 0, then the base of the string is automatically detected with the | ||
138 | * conventional semantics - If it begins with 0x the number will be parsed as a | ||
139 | * hexadecimal (case insensitive), if it otherwise begins with 0, it will be | ||
140 | * parsed as an octal number. Otherwise it will be parsed as a decimal. | ||
141 | * @res: Where to write the result of the conversion on success. | ||
142 | * | ||
143 | * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error. | ||
144 | * Used as a replacement for the obsolete simple_strtoull. Return code must | ||
145 | * be checked. | ||
146 | */ | ||
115 | int kstrtoll(const char *s, unsigned int base, long long *res) | 147 | int kstrtoll(const char *s, unsigned int base, long long *res) |
116 | { | 148 | { |
117 | unsigned long long tmp; | 149 | unsigned long long tmp; |
@@ -168,6 +200,22 @@ int _kstrtol(const char *s, unsigned int base, long *res) | |||
168 | } | 200 | } |
169 | EXPORT_SYMBOL(_kstrtol); | 201 | EXPORT_SYMBOL(_kstrtol); |
170 | 202 | ||
203 | /** | ||
204 | * kstrtouint - convert a string to an unsigned int | ||
205 | * @s: The start of the string. The string must be null-terminated, and may also | ||
206 | * include a single newline before its terminating null. The first character | ||
207 | * may also be a plus sign, but not a minus sign. | ||
208 | * @base: The number base to use. The maximum supported base is 16. If base is | ||
209 | * given as 0, then the base of the string is automatically detected with the | ||
210 | * conventional semantics - If it begins with 0x the number will be parsed as a | ||
211 | * hexadecimal (case insensitive), if it otherwise begins with 0, it will be | ||
212 | * parsed as an octal number. Otherwise it will be parsed as a decimal. | ||
213 | * @res: Where to write the result of the conversion on success. | ||
214 | * | ||
215 | * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error. | ||
216 | * Used as a replacement for the obsolete simple_strtoull. Return code must | ||
217 | * be checked. | ||
218 | */ | ||
171 | int kstrtouint(const char *s, unsigned int base, unsigned int *res) | 219 | int kstrtouint(const char *s, unsigned int base, unsigned int *res) |
172 | { | 220 | { |
173 | unsigned long long tmp; | 221 | unsigned long long tmp; |
@@ -183,6 +231,22 @@ int kstrtouint(const char *s, unsigned int base, unsigned int *res) | |||
183 | } | 231 | } |
184 | EXPORT_SYMBOL(kstrtouint); | 232 | EXPORT_SYMBOL(kstrtouint); |
185 | 233 | ||
234 | /** | ||
235 | * kstrtoint - convert a string to an int | ||
236 | * @s: The start of the string. The string must be null-terminated, and may also | ||
237 | * include a single newline before its terminating null. The first character | ||
238 | * may also be a plus sign or a minus sign. | ||
239 | * @base: The number base to use. The maximum supported base is 16. If base is | ||
240 | * given as 0, then the base of the string is automatically detected with the | ||
241 | * conventional semantics - If it begins with 0x the number will be parsed as a | ||
242 | * hexadecimal (case insensitive), if it otherwise begins with 0, it will be | ||
243 | * parsed as an octal number. Otherwise it will be parsed as a decimal. | ||
244 | * @res: Where to write the result of the conversion on success. | ||
245 | * | ||
246 | * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error. | ||
247 | * Used as a replacement for the obsolete simple_strtoull. Return code must | ||
248 | * be checked. | ||
249 | */ | ||
186 | int kstrtoint(const char *s, unsigned int base, int *res) | 250 | int kstrtoint(const char *s, unsigned int base, int *res) |
187 | { | 251 | { |
188 | long long tmp; | 252 | long long tmp; |
diff --git a/lib/lru_cache.c b/lib/lru_cache.c index a07e7268d7ed..d71d89498943 100644 --- a/lib/lru_cache.c +++ b/lib/lru_cache.c | |||
@@ -44,8 +44,8 @@ MODULE_LICENSE("GPL"); | |||
44 | } while (0) | 44 | } while (0) |
45 | 45 | ||
46 | #define RETURN(x...) do { \ | 46 | #define RETURN(x...) do { \ |
47 | clear_bit(__LC_PARANOIA, &lc->flags); \ | 47 | clear_bit_unlock(__LC_PARANOIA, &lc->flags); \ |
48 | smp_mb__after_clear_bit(); return x ; } while (0) | 48 | return x ; } while (0) |
49 | 49 | ||
50 | /* BUG() if e is not one of the elements tracked by lc */ | 50 | /* BUG() if e is not one of the elements tracked by lc */ |
51 | #define PARANOIA_LC_ELEMENT(lc, e) do { \ | 51 | #define PARANOIA_LC_ELEMENT(lc, e) do { \ |
@@ -55,9 +55,40 @@ MODULE_LICENSE("GPL"); | |||
55 | BUG_ON(i >= lc_->nr_elements); \ | 55 | BUG_ON(i >= lc_->nr_elements); \ |
56 | BUG_ON(lc_->lc_element[i] != e_); } while (0) | 56 | BUG_ON(lc_->lc_element[i] != e_); } while (0) |
57 | 57 | ||
58 | |||
59 | /* We need to atomically | ||
60 | * - try to grab the lock (set LC_LOCKED) | ||
61 | * - only if there is no pending transaction | ||
62 | * (neither LC_DIRTY nor LC_STARVING is set) | ||
63 | * Because of PARANOIA_ENTRY() above abusing lc->flags as well, | ||
64 | * it is not sufficient to just say | ||
65 | * return 0 == cmpxchg(&lc->flags, 0, LC_LOCKED); | ||
66 | */ | ||
67 | int lc_try_lock(struct lru_cache *lc) | ||
68 | { | ||
69 | unsigned long val; | ||
70 | do { | ||
71 | val = cmpxchg(&lc->flags, 0, LC_LOCKED); | ||
72 | } while (unlikely (val == LC_PARANOIA)); | ||
73 | /* Spin until no-one is inside a PARANOIA_ENTRY()/RETURN() section. */ | ||
74 | return 0 == val; | ||
75 | #if 0 | ||
76 | /* Alternative approach, spin in case someone enters or leaves a | ||
77 | * PARANOIA_ENTRY()/RETURN() section. */ | ||
78 | unsigned long old, new, val; | ||
79 | do { | ||
80 | old = lc->flags & LC_PARANOIA; | ||
81 | new = old | LC_LOCKED; | ||
82 | val = cmpxchg(&lc->flags, old, new); | ||
83 | } while (unlikely (val == (old ^ LC_PARANOIA))); | ||
84 | return old == val; | ||
85 | #endif | ||
86 | } | ||
87 | |||
58 | /** | 88 | /** |
59 | * lc_create - prepares to track objects in an active set | 89 | * lc_create - prepares to track objects in an active set |
60 | * @name: descriptive name only used in lc_seq_printf_stats and lc_seq_dump_details | 90 | * @name: descriptive name only used in lc_seq_printf_stats and lc_seq_dump_details |
91 | * @max_pending_changes: maximum changes to accumulate until a transaction is required | ||
61 | * @e_count: number of elements allowed to be active simultaneously | 92 | * @e_count: number of elements allowed to be active simultaneously |
62 | * @e_size: size of the tracked objects | 93 | * @e_size: size of the tracked objects |
63 | * @e_off: offset to the &struct lc_element member in a tracked object | 94 | * @e_off: offset to the &struct lc_element member in a tracked object |
@@ -66,6 +97,7 @@ MODULE_LICENSE("GPL"); | |||
66 | * or NULL on (allocation) failure. | 97 | * or NULL on (allocation) failure. |
67 | */ | 98 | */ |
68 | struct lru_cache *lc_create(const char *name, struct kmem_cache *cache, | 99 | struct lru_cache *lc_create(const char *name, struct kmem_cache *cache, |
100 | unsigned max_pending_changes, | ||
69 | unsigned e_count, size_t e_size, size_t e_off) | 101 | unsigned e_count, size_t e_size, size_t e_off) |
70 | { | 102 | { |
71 | struct hlist_head *slot = NULL; | 103 | struct hlist_head *slot = NULL; |
@@ -98,12 +130,13 @@ struct lru_cache *lc_create(const char *name, struct kmem_cache *cache, | |||
98 | INIT_LIST_HEAD(&lc->in_use); | 130 | INIT_LIST_HEAD(&lc->in_use); |
99 | INIT_LIST_HEAD(&lc->lru); | 131 | INIT_LIST_HEAD(&lc->lru); |
100 | INIT_LIST_HEAD(&lc->free); | 132 | INIT_LIST_HEAD(&lc->free); |
133 | INIT_LIST_HEAD(&lc->to_be_changed); | ||
101 | 134 | ||
102 | lc->name = name; | 135 | lc->name = name; |
103 | lc->element_size = e_size; | 136 | lc->element_size = e_size; |
104 | lc->element_off = e_off; | 137 | lc->element_off = e_off; |
105 | lc->nr_elements = e_count; | 138 | lc->nr_elements = e_count; |
106 | lc->new_number = LC_FREE; | 139 | lc->max_pending_changes = max_pending_changes; |
107 | lc->lc_cache = cache; | 140 | lc->lc_cache = cache; |
108 | lc->lc_element = element; | 141 | lc->lc_element = element; |
109 | lc->lc_slot = slot; | 142 | lc->lc_slot = slot; |
@@ -117,6 +150,7 @@ struct lru_cache *lc_create(const char *name, struct kmem_cache *cache, | |||
117 | e = p + e_off; | 150 | e = p + e_off; |
118 | e->lc_index = i; | 151 | e->lc_index = i; |
119 | e->lc_number = LC_FREE; | 152 | e->lc_number = LC_FREE; |
153 | e->lc_new_number = LC_FREE; | ||
120 | list_add(&e->list, &lc->free); | 154 | list_add(&e->list, &lc->free); |
121 | element[i] = e; | 155 | element[i] = e; |
122 | } | 156 | } |
@@ -175,15 +209,15 @@ void lc_reset(struct lru_cache *lc) | |||
175 | INIT_LIST_HEAD(&lc->in_use); | 209 | INIT_LIST_HEAD(&lc->in_use); |
176 | INIT_LIST_HEAD(&lc->lru); | 210 | INIT_LIST_HEAD(&lc->lru); |
177 | INIT_LIST_HEAD(&lc->free); | 211 | INIT_LIST_HEAD(&lc->free); |
212 | INIT_LIST_HEAD(&lc->to_be_changed); | ||
178 | lc->used = 0; | 213 | lc->used = 0; |
179 | lc->hits = 0; | 214 | lc->hits = 0; |
180 | lc->misses = 0; | 215 | lc->misses = 0; |
181 | lc->starving = 0; | 216 | lc->starving = 0; |
182 | lc->dirty = 0; | 217 | lc->locked = 0; |
183 | lc->changed = 0; | 218 | lc->changed = 0; |
219 | lc->pending_changes = 0; | ||
184 | lc->flags = 0; | 220 | lc->flags = 0; |
185 | lc->changing_element = NULL; | ||
186 | lc->new_number = LC_FREE; | ||
187 | memset(lc->lc_slot, 0, sizeof(struct hlist_head) * lc->nr_elements); | 221 | memset(lc->lc_slot, 0, sizeof(struct hlist_head) * lc->nr_elements); |
188 | 222 | ||
189 | for (i = 0; i < lc->nr_elements; i++) { | 223 | for (i = 0; i < lc->nr_elements; i++) { |
@@ -194,6 +228,7 @@ void lc_reset(struct lru_cache *lc) | |||
194 | /* re-init it */ | 228 | /* re-init it */ |
195 | e->lc_index = i; | 229 | e->lc_index = i; |
196 | e->lc_number = LC_FREE; | 230 | e->lc_number = LC_FREE; |
231 | e->lc_new_number = LC_FREE; | ||
197 | list_add(&e->list, &lc->free); | 232 | list_add(&e->list, &lc->free); |
198 | } | 233 | } |
199 | } | 234 | } |
@@ -208,14 +243,14 @@ size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc) | |||
208 | /* NOTE: | 243 | /* NOTE: |
209 | * total calls to lc_get are | 244 | * total calls to lc_get are |
210 | * (starving + hits + misses) | 245 | * (starving + hits + misses) |
211 | * misses include "dirty" count (update from an other thread in | 246 | * misses include "locked" count (update from an other thread in |
212 | * progress) and "changed", when this in fact lead to an successful | 247 | * progress) and "changed", when this in fact lead to an successful |
213 | * update of the cache. | 248 | * update of the cache. |
214 | */ | 249 | */ |
215 | return seq_printf(seq, "\t%s: used:%u/%u " | 250 | return seq_printf(seq, "\t%s: used:%u/%u " |
216 | "hits:%lu misses:%lu starving:%lu dirty:%lu changed:%lu\n", | 251 | "hits:%lu misses:%lu starving:%lu locked:%lu changed:%lu\n", |
217 | lc->name, lc->used, lc->nr_elements, | 252 | lc->name, lc->used, lc->nr_elements, |
218 | lc->hits, lc->misses, lc->starving, lc->dirty, lc->changed); | 253 | lc->hits, lc->misses, lc->starving, lc->locked, lc->changed); |
219 | } | 254 | } |
220 | 255 | ||
221 | static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr) | 256 | static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr) |
@@ -224,16 +259,8 @@ static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr) | |||
224 | } | 259 | } |
225 | 260 | ||
226 | 261 | ||
227 | /** | 262 | static struct lc_element *__lc_find(struct lru_cache *lc, unsigned int enr, |
228 | * lc_find - find element by label, if present in the hash table | 263 | bool include_changing) |
229 | * @lc: The lru_cache object | ||
230 | * @enr: element number | ||
231 | * | ||
232 | * Returns the pointer to an element, if the element with the requested | ||
233 | * "label" or element number is present in the hash table, | ||
234 | * or NULL if not found. Does not change the refcnt. | ||
235 | */ | ||
236 | struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr) | ||
237 | { | 264 | { |
238 | struct hlist_node *n; | 265 | struct hlist_node *n; |
239 | struct lc_element *e; | 266 | struct lc_element *e; |
@@ -241,29 +268,48 @@ struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr) | |||
241 | BUG_ON(!lc); | 268 | BUG_ON(!lc); |
242 | BUG_ON(!lc->nr_elements); | 269 | BUG_ON(!lc->nr_elements); |
243 | hlist_for_each_entry(e, n, lc_hash_slot(lc, enr), colision) { | 270 | hlist_for_each_entry(e, n, lc_hash_slot(lc, enr), colision) { |
244 | if (e->lc_number == enr) | 271 | /* "about to be changed" elements, pending transaction commit, |
272 | * are hashed by their "new number". "Normal" elements have | ||
273 | * lc_number == lc_new_number. */ | ||
274 | if (e->lc_new_number != enr) | ||
275 | continue; | ||
276 | if (e->lc_new_number == e->lc_number || include_changing) | ||
245 | return e; | 277 | return e; |
278 | break; | ||
246 | } | 279 | } |
247 | return NULL; | 280 | return NULL; |
248 | } | 281 | } |
249 | 282 | ||
250 | /* returned element will be "recycled" immediately */ | 283 | /** |
251 | static struct lc_element *lc_evict(struct lru_cache *lc) | 284 | * lc_find - find element by label, if present in the hash table |
285 | * @lc: The lru_cache object | ||
286 | * @enr: element number | ||
287 | * | ||
288 | * Returns the pointer to an element, if the element with the requested | ||
289 | * "label" or element number is present in the hash table, | ||
290 | * or NULL if not found. Does not change the refcnt. | ||
291 | * Ignores elements that are "about to be used", i.e. not yet in the active | ||
292 | * set, but still pending transaction commit. | ||
293 | */ | ||
294 | struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr) | ||
252 | { | 295 | { |
253 | struct list_head *n; | 296 | return __lc_find(lc, enr, 0); |
254 | struct lc_element *e; | 297 | } |
255 | |||
256 | if (list_empty(&lc->lru)) | ||
257 | return NULL; | ||
258 | |||
259 | n = lc->lru.prev; | ||
260 | e = list_entry(n, struct lc_element, list); | ||
261 | |||
262 | PARANOIA_LC_ELEMENT(lc, e); | ||
263 | 298 | ||
264 | list_del(&e->list); | 299 | /** |
265 | hlist_del(&e->colision); | 300 | * lc_is_used - find element by label |
266 | return e; | 301 | * @lc: The lru_cache object |
302 | * @enr: element number | ||
303 | * | ||
304 | * Returns true, if the element with the requested "label" or element number is | ||
305 | * present in the hash table, and is used (refcnt > 0). | ||
306 | * Also finds elements that are not _currently_ used but only "about to be | ||
307 | * used", i.e. on the "to_be_changed" list, pending transaction commit. | ||
308 | */ | ||
309 | bool lc_is_used(struct lru_cache *lc, unsigned int enr) | ||
310 | { | ||
311 | struct lc_element *e = __lc_find(lc, enr, 1); | ||
312 | return e && e->refcnt; | ||
267 | } | 313 | } |
268 | 314 | ||
269 | /** | 315 | /** |
@@ -280,22 +326,34 @@ void lc_del(struct lru_cache *lc, struct lc_element *e) | |||
280 | PARANOIA_LC_ELEMENT(lc, e); | 326 | PARANOIA_LC_ELEMENT(lc, e); |
281 | BUG_ON(e->refcnt); | 327 | BUG_ON(e->refcnt); |
282 | 328 | ||
283 | e->lc_number = LC_FREE; | 329 | e->lc_number = e->lc_new_number = LC_FREE; |
284 | hlist_del_init(&e->colision); | 330 | hlist_del_init(&e->colision); |
285 | list_move(&e->list, &lc->free); | 331 | list_move(&e->list, &lc->free); |
286 | RETURN(); | 332 | RETURN(); |
287 | } | 333 | } |
288 | 334 | ||
289 | static struct lc_element *lc_get_unused_element(struct lru_cache *lc) | 335 | static struct lc_element *lc_prepare_for_change(struct lru_cache *lc, unsigned new_number) |
290 | { | 336 | { |
291 | struct list_head *n; | 337 | struct list_head *n; |
338 | struct lc_element *e; | ||
339 | |||
340 | if (!list_empty(&lc->free)) | ||
341 | n = lc->free.next; | ||
342 | else if (!list_empty(&lc->lru)) | ||
343 | n = lc->lru.prev; | ||
344 | else | ||
345 | return NULL; | ||
346 | |||
347 | e = list_entry(n, struct lc_element, list); | ||
348 | PARANOIA_LC_ELEMENT(lc, e); | ||
292 | 349 | ||
293 | if (list_empty(&lc->free)) | 350 | e->lc_new_number = new_number; |
294 | return lc_evict(lc); | 351 | if (!hlist_unhashed(&e->colision)) |
352 | __hlist_del(&e->colision); | ||
353 | hlist_add_head(&e->colision, lc_hash_slot(lc, new_number)); | ||
354 | list_move(&e->list, &lc->to_be_changed); | ||
295 | 355 | ||
296 | n = lc->free.next; | 356 | return e; |
297 | list_del(n); | ||
298 | return list_entry(n, struct lc_element, list); | ||
299 | } | 357 | } |
300 | 358 | ||
301 | static int lc_unused_element_available(struct lru_cache *lc) | 359 | static int lc_unused_element_available(struct lru_cache *lc) |
@@ -308,45 +366,7 @@ static int lc_unused_element_available(struct lru_cache *lc) | |||
308 | return 0; | 366 | return 0; |
309 | } | 367 | } |
310 | 368 | ||
311 | 369 | static struct lc_element *__lc_get(struct lru_cache *lc, unsigned int enr, bool may_change) | |
312 | /** | ||
313 | * lc_get - get element by label, maybe change the active set | ||
314 | * @lc: the lru cache to operate on | ||
315 | * @enr: the label to look up | ||
316 | * | ||
317 | * Finds an element in the cache, increases its usage count, | ||
318 | * "touches" and returns it. | ||
319 | * | ||
320 | * In case the requested number is not present, it needs to be added to the | ||
321 | * cache. Therefore it is possible that an other element becomes evicted from | ||
322 | * the cache. In either case, the user is notified so he is able to e.g. keep | ||
323 | * a persistent log of the cache changes, and therefore the objects in use. | ||
324 | * | ||
325 | * Return values: | ||
326 | * NULL | ||
327 | * The cache was marked %LC_STARVING, | ||
328 | * or the requested label was not in the active set | ||
329 | * and a changing transaction is still pending (@lc was marked %LC_DIRTY). | ||
330 | * Or no unused or free element could be recycled (@lc will be marked as | ||
331 | * %LC_STARVING, blocking further lc_get() operations). | ||
332 | * | ||
333 | * pointer to the element with the REQUESTED element number. | ||
334 | * In this case, it can be used right away | ||
335 | * | ||
336 | * pointer to an UNUSED element with some different element number, | ||
337 | * where that different number may also be %LC_FREE. | ||
338 | * | ||
339 | * In this case, the cache is marked %LC_DIRTY (blocking further changes), | ||
340 | * and the returned element pointer is removed from the lru list and | ||
341 | * hash collision chains. The user now should do whatever housekeeping | ||
342 | * is necessary. | ||
343 | * Then he must call lc_changed(lc,element_pointer), to finish | ||
344 | * the change. | ||
345 | * | ||
346 | * NOTE: The user needs to check the lc_number on EACH use, so he recognizes | ||
347 | * any cache set change. | ||
348 | */ | ||
349 | struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr) | ||
350 | { | 370 | { |
351 | struct lc_element *e; | 371 | struct lc_element *e; |
352 | 372 | ||
@@ -356,8 +376,12 @@ struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr) | |||
356 | RETURN(NULL); | 376 | RETURN(NULL); |
357 | } | 377 | } |
358 | 378 | ||
359 | e = lc_find(lc, enr); | 379 | e = __lc_find(lc, enr, 1); |
360 | if (e) { | 380 | /* if lc_new_number != lc_number, |
381 | * this enr is currently being pulled in already, | ||
382 | * and will be available once the pending transaction | ||
383 | * has been committed. */ | ||
384 | if (e && e->lc_new_number == e->lc_number) { | ||
361 | ++lc->hits; | 385 | ++lc->hits; |
362 | if (e->refcnt++ == 0) | 386 | if (e->refcnt++ == 0) |
363 | lc->used++; | 387 | lc->used++; |
@@ -366,6 +390,26 @@ struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr) | |||
366 | } | 390 | } |
367 | 391 | ||
368 | ++lc->misses; | 392 | ++lc->misses; |
393 | if (!may_change) | ||
394 | RETURN(NULL); | ||
395 | |||
396 | /* It has been found above, but on the "to_be_changed" list, not yet | ||
397 | * committed. Don't pull it in twice, wait for the transaction, then | ||
398 | * try again */ | ||
399 | if (e) | ||
400 | RETURN(NULL); | ||
401 | |||
402 | /* To avoid races with lc_try_lock(), first, mark us dirty | ||
403 | * (using test_and_set_bit, as it implies memory barriers), ... */ | ||
404 | test_and_set_bit(__LC_DIRTY, &lc->flags); | ||
405 | |||
406 | /* ... only then check if it is locked anyways. If lc_unlock clears | ||
407 | * the dirty bit again, that's not a problem, we will come here again. | ||
408 | */ | ||
409 | if (test_bit(__LC_LOCKED, &lc->flags)) { | ||
410 | ++lc->locked; | ||
411 | RETURN(NULL); | ||
412 | } | ||
369 | 413 | ||
370 | /* In case there is nothing available and we can not kick out | 414 | /* In case there is nothing available and we can not kick out |
371 | * the LRU element, we have to wait ... | 415 | * the LRU element, we have to wait ... |
@@ -375,71 +419,109 @@ struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr) | |||
375 | RETURN(NULL); | 419 | RETURN(NULL); |
376 | } | 420 | } |
377 | 421 | ||
378 | /* it was not present in the active set. | 422 | /* It was not present in the active set. We are going to recycle an |
379 | * we are going to recycle an unused (or even "free") element. | 423 | * unused (or even "free") element, but we won't accumulate more than |
380 | * user may need to commit a transaction to record that change. | 424 | * max_pending_changes changes. */ |
381 | * we serialize on flags & TF_DIRTY */ | 425 | if (lc->pending_changes >= lc->max_pending_changes) |
382 | if (test_and_set_bit(__LC_DIRTY, &lc->flags)) { | ||
383 | ++lc->dirty; | ||
384 | RETURN(NULL); | 426 | RETURN(NULL); |
385 | } | ||
386 | 427 | ||
387 | e = lc_get_unused_element(lc); | 428 | e = lc_prepare_for_change(lc, enr); |
388 | BUG_ON(!e); | 429 | BUG_ON(!e); |
389 | 430 | ||
390 | clear_bit(__LC_STARVING, &lc->flags); | 431 | clear_bit(__LC_STARVING, &lc->flags); |
391 | BUG_ON(++e->refcnt != 1); | 432 | BUG_ON(++e->refcnt != 1); |
392 | lc->used++; | 433 | lc->used++; |
393 | 434 | lc->pending_changes++; | |
394 | lc->changing_element = e; | ||
395 | lc->new_number = enr; | ||
396 | 435 | ||
397 | RETURN(e); | 436 | RETURN(e); |
398 | } | 437 | } |
399 | 438 | ||
400 | /* similar to lc_get, | 439 | /** |
401 | * but only gets a new reference on an existing element. | 440 | * lc_get - get element by label, maybe change the active set |
402 | * you either get the requested element, or NULL. | 441 | * @lc: the lru cache to operate on |
403 | * will be consolidated into one function. | 442 | * @enr: the label to look up |
443 | * | ||
444 | * Finds an element in the cache, increases its usage count, | ||
445 | * "touches" and returns it. | ||
446 | * | ||
447 | * In case the requested number is not present, it needs to be added to the | ||
448 | * cache. Therefore it is possible that an other element becomes evicted from | ||
449 | * the cache. In either case, the user is notified so he is able to e.g. keep | ||
450 | * a persistent log of the cache changes, and therefore the objects in use. | ||
451 | * | ||
452 | * Return values: | ||
453 | * NULL | ||
454 | * The cache was marked %LC_STARVING, | ||
455 | * or the requested label was not in the active set | ||
456 | * and a changing transaction is still pending (@lc was marked %LC_DIRTY). | ||
457 | * Or no unused or free element could be recycled (@lc will be marked as | ||
458 | * %LC_STARVING, blocking further lc_get() operations). | ||
459 | * | ||
460 | * pointer to the element with the REQUESTED element number. | ||
461 | * In this case, it can be used right away | ||
462 | * | ||
463 | * pointer to an UNUSED element with some different element number, | ||
464 | * where that different number may also be %LC_FREE. | ||
465 | * | ||
466 | * In this case, the cache is marked %LC_DIRTY, | ||
467 | * so lc_try_lock() will no longer succeed. | ||
468 | * The returned element pointer is moved to the "to_be_changed" list, | ||
469 | * and registered with the new element number on the hash collision chains, | ||
470 | * so it is possible to pick it up from lc_is_used(). | ||
471 | * Up to "max_pending_changes" (see lc_create()) can be accumulated. | ||
472 | * The user now should do whatever housekeeping is necessary, | ||
473 | * typically serialize on lc_try_lock_for_transaction(), then call | ||
474 | * lc_committed(lc) and lc_unlock(), to finish the change. | ||
475 | * | ||
476 | * NOTE: The user needs to check the lc_number on EACH use, so he recognizes | ||
477 | * any cache set change. | ||
404 | */ | 478 | */ |
405 | struct lc_element *lc_try_get(struct lru_cache *lc, unsigned int enr) | 479 | struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr) |
406 | { | 480 | { |
407 | struct lc_element *e; | 481 | return __lc_get(lc, enr, 1); |
408 | 482 | } | |
409 | PARANOIA_ENTRY(); | ||
410 | if (lc->flags & LC_STARVING) { | ||
411 | ++lc->starving; | ||
412 | RETURN(NULL); | ||
413 | } | ||
414 | 483 | ||
415 | e = lc_find(lc, enr); | 484 | /** |
416 | if (e) { | 485 | * lc_try_get - get element by label, if present; do not change the active set |
417 | ++lc->hits; | 486 | * @lc: the lru cache to operate on |
418 | if (e->refcnt++ == 0) | 487 | * @enr: the label to look up |
419 | lc->used++; | 488 | * |
420 | list_move(&e->list, &lc->in_use); /* Not evictable... */ | 489 | * Finds an element in the cache, increases its usage count, |
421 | } | 490 | * "touches" and returns it. |
422 | RETURN(e); | 491 | * |
492 | * Return values: | ||
493 | * NULL | ||
494 | * The cache was marked %LC_STARVING, | ||
495 | * or the requested label was not in the active set | ||
496 | * | ||
497 | * pointer to the element with the REQUESTED element number. | ||
498 | * In this case, it can be used right away | ||
499 | */ | ||
500 | struct lc_element *lc_try_get(struct lru_cache *lc, unsigned int enr) | ||
501 | { | ||
502 | return __lc_get(lc, enr, 0); | ||
423 | } | 503 | } |
424 | 504 | ||
425 | /** | 505 | /** |
426 | * lc_changed - tell @lc that the change has been recorded | 506 | * lc_committed - tell @lc that pending changes have been recorded |
427 | * @lc: the lru cache to operate on | 507 | * @lc: the lru cache to operate on |
428 | * @e: the element pending label change | 508 | * |
509 | * User is expected to serialize on explicit lc_try_lock_for_transaction() | ||
510 | * before the transaction is started, and later needs to lc_unlock() explicitly | ||
511 | * as well. | ||
429 | */ | 512 | */ |
430 | void lc_changed(struct lru_cache *lc, struct lc_element *e) | 513 | void lc_committed(struct lru_cache *lc) |
431 | { | 514 | { |
515 | struct lc_element *e, *tmp; | ||
516 | |||
432 | PARANOIA_ENTRY(); | 517 | PARANOIA_ENTRY(); |
433 | BUG_ON(e != lc->changing_element); | 518 | list_for_each_entry_safe(e, tmp, &lc->to_be_changed, list) { |
434 | PARANOIA_LC_ELEMENT(lc, e); | 519 | /* count number of changes, not number of transactions */ |
435 | ++lc->changed; | 520 | ++lc->changed; |
436 | e->lc_number = lc->new_number; | 521 | e->lc_number = e->lc_new_number; |
437 | list_add(&e->list, &lc->in_use); | 522 | list_move(&e->list, &lc->in_use); |
438 | hlist_add_head(&e->colision, lc_hash_slot(lc, lc->new_number)); | 523 | } |
439 | lc->changing_element = NULL; | 524 | lc->pending_changes = 0; |
440 | lc->new_number = LC_FREE; | ||
441 | clear_bit(__LC_DIRTY, &lc->flags); | ||
442 | smp_mb__after_clear_bit(); | ||
443 | RETURN(); | 525 | RETURN(); |
444 | } | 526 | } |
445 | 527 | ||
@@ -458,13 +540,12 @@ unsigned int lc_put(struct lru_cache *lc, struct lc_element *e) | |||
458 | PARANOIA_ENTRY(); | 540 | PARANOIA_ENTRY(); |
459 | PARANOIA_LC_ELEMENT(lc, e); | 541 | PARANOIA_LC_ELEMENT(lc, e); |
460 | BUG_ON(e->refcnt == 0); | 542 | BUG_ON(e->refcnt == 0); |
461 | BUG_ON(e == lc->changing_element); | 543 | BUG_ON(e->lc_number != e->lc_new_number); |
462 | if (--e->refcnt == 0) { | 544 | if (--e->refcnt == 0) { |
463 | /* move it to the front of LRU. */ | 545 | /* move it to the front of LRU. */ |
464 | list_move(&e->list, &lc->lru); | 546 | list_move(&e->list, &lc->lru); |
465 | lc->used--; | 547 | lc->used--; |
466 | clear_bit(__LC_STARVING, &lc->flags); | 548 | clear_bit_unlock(__LC_STARVING, &lc->flags); |
467 | smp_mb__after_clear_bit(); | ||
468 | } | 549 | } |
469 | RETURN(e->refcnt); | 550 | RETURN(e->refcnt); |
470 | } | 551 | } |
@@ -504,16 +585,24 @@ unsigned int lc_index_of(struct lru_cache *lc, struct lc_element *e) | |||
504 | void lc_set(struct lru_cache *lc, unsigned int enr, int index) | 585 | void lc_set(struct lru_cache *lc, unsigned int enr, int index) |
505 | { | 586 | { |
506 | struct lc_element *e; | 587 | struct lc_element *e; |
588 | struct list_head *lh; | ||
507 | 589 | ||
508 | if (index < 0 || index >= lc->nr_elements) | 590 | if (index < 0 || index >= lc->nr_elements) |
509 | return; | 591 | return; |
510 | 592 | ||
511 | e = lc_element_by_index(lc, index); | 593 | e = lc_element_by_index(lc, index); |
512 | e->lc_number = enr; | 594 | BUG_ON(e->lc_number != e->lc_new_number); |
595 | BUG_ON(e->refcnt != 0); | ||
513 | 596 | ||
597 | e->lc_number = e->lc_new_number = enr; | ||
514 | hlist_del_init(&e->colision); | 598 | hlist_del_init(&e->colision); |
515 | hlist_add_head(&e->colision, lc_hash_slot(lc, enr)); | 599 | if (enr == LC_FREE) |
516 | list_move(&e->list, e->refcnt ? &lc->in_use : &lc->lru); | 600 | lh = &lc->free; |
601 | else { | ||
602 | hlist_add_head(&e->colision, lc_hash_slot(lc, enr)); | ||
603 | lh = &lc->lru; | ||
604 | } | ||
605 | list_move(&e->list, lh); | ||
517 | } | 606 | } |
518 | 607 | ||
519 | /** | 608 | /** |
@@ -553,8 +642,10 @@ EXPORT_SYMBOL(lc_try_get); | |||
553 | EXPORT_SYMBOL(lc_find); | 642 | EXPORT_SYMBOL(lc_find); |
554 | EXPORT_SYMBOL(lc_get); | 643 | EXPORT_SYMBOL(lc_get); |
555 | EXPORT_SYMBOL(lc_put); | 644 | EXPORT_SYMBOL(lc_put); |
556 | EXPORT_SYMBOL(lc_changed); | 645 | EXPORT_SYMBOL(lc_committed); |
557 | EXPORT_SYMBOL(lc_element_by_index); | 646 | EXPORT_SYMBOL(lc_element_by_index); |
558 | EXPORT_SYMBOL(lc_index_of); | 647 | EXPORT_SYMBOL(lc_index_of); |
559 | EXPORT_SYMBOL(lc_seq_printf_stats); | 648 | EXPORT_SYMBOL(lc_seq_printf_stats); |
560 | EXPORT_SYMBOL(lc_seq_dump_details); | 649 | EXPORT_SYMBOL(lc_seq_dump_details); |
650 | EXPORT_SYMBOL(lc_try_lock); | ||
651 | EXPORT_SYMBOL(lc_is_used); | ||
diff --git a/lib/mpi/longlong.h b/lib/mpi/longlong.h index 678ce4f1e124..095ab157a521 100644 --- a/lib/mpi/longlong.h +++ b/lib/mpi/longlong.h | |||
@@ -641,7 +641,14 @@ do { \ | |||
641 | ************** MIPS ***************** | 641 | ************** MIPS ***************** |
642 | ***************************************/ | 642 | ***************************************/ |
643 | #if defined(__mips__) && W_TYPE_SIZE == 32 | 643 | #if defined(__mips__) && W_TYPE_SIZE == 32 |
644 | #if __GNUC__ > 2 || __GNUC_MINOR__ >= 7 | 644 | #if __GNUC__ >= 4 && __GNUC_MINOR__ >= 4 |
645 | #define umul_ppmm(w1, w0, u, v) \ | ||
646 | do { \ | ||
647 | UDItype __ll = (UDItype)(u) * (v); \ | ||
648 | w1 = __ll >> 32; \ | ||
649 | w0 = __ll; \ | ||
650 | } while (0) | ||
651 | #elif __GNUC__ > 2 || __GNUC_MINOR__ >= 7 | ||
645 | #define umul_ppmm(w1, w0, u, v) \ | 652 | #define umul_ppmm(w1, w0, u, v) \ |
646 | __asm__ ("multu %2,%3" \ | 653 | __asm__ ("multu %2,%3" \ |
647 | : "=l" ((USItype)(w0)), \ | 654 | : "=l" ((USItype)(w0)), \ |
@@ -666,7 +673,15 @@ do { \ | |||
666 | ************** MIPS/64 ************** | 673 | ************** MIPS/64 ************** |
667 | ***************************************/ | 674 | ***************************************/ |
668 | #if (defined(__mips) && __mips >= 3) && W_TYPE_SIZE == 64 | 675 | #if (defined(__mips) && __mips >= 3) && W_TYPE_SIZE == 64 |
669 | #if __GNUC__ > 2 || __GNUC_MINOR__ >= 7 | 676 | #if __GNUC__ >= 4 && __GNUC_MINOR__ >= 4 |
677 | #define umul_ppmm(w1, w0, u, v) \ | ||
678 | do { \ | ||
679 | typedef unsigned int __ll_UTItype __attribute__((mode(TI))); \ | ||
680 | __ll_UTItype __ll = (__ll_UTItype)(u) * (v); \ | ||
681 | w1 = __ll >> 64; \ | ||
682 | w0 = __ll; \ | ||
683 | } while (0) | ||
684 | #elif __GNUC__ > 2 || __GNUC_MINOR__ >= 7 | ||
670 | #define umul_ppmm(w1, w0, u, v) \ | 685 | #define umul_ppmm(w1, w0, u, v) \ |
671 | __asm__ ("dmultu %2,%3" \ | 686 | __asm__ ("dmultu %2,%3" \ |
672 | : "=l" ((UDItype)(w0)), \ | 687 | : "=l" ((UDItype)(w0)), \ |
diff --git a/lib/pSeries-reconfig-notifier-error-inject.c b/lib/of-reconfig-notifier-error-inject.c index 7f7c98dcd5c4..8dc79861758a 100644 --- a/lib/pSeries-reconfig-notifier-error-inject.c +++ b/lib/of-reconfig-notifier-error-inject.c | |||
@@ -1,20 +1,20 @@ | |||
1 | #include <linux/kernel.h> | 1 | #include <linux/kernel.h> |
2 | #include <linux/module.h> | 2 | #include <linux/module.h> |
3 | 3 | #include <linux/of.h> | |
4 | #include <asm/pSeries_reconfig.h> | ||
5 | 4 | ||
6 | #include "notifier-error-inject.h" | 5 | #include "notifier-error-inject.h" |
7 | 6 | ||
8 | static int priority; | 7 | static int priority; |
9 | module_param(priority, int, 0); | 8 | module_param(priority, int, 0); |
10 | MODULE_PARM_DESC(priority, "specify pSeries reconfig notifier priority"); | 9 | MODULE_PARM_DESC(priority, "specify OF reconfig notifier priority"); |
11 | 10 | ||
12 | static struct notifier_err_inject reconfig_err_inject = { | 11 | static struct notifier_err_inject reconfig_err_inject = { |
13 | .actions = { | 12 | .actions = { |
14 | { NOTIFIER_ERR_INJECT_ACTION(PSERIES_RECONFIG_ADD) }, | 13 | { NOTIFIER_ERR_INJECT_ACTION(OF_RECONFIG_ATTACH_NODE) }, |
15 | { NOTIFIER_ERR_INJECT_ACTION(PSERIES_RECONFIG_REMOVE) }, | 14 | { NOTIFIER_ERR_INJECT_ACTION(OF_RECONFIG_DETACH_NODE) }, |
16 | { NOTIFIER_ERR_INJECT_ACTION(PSERIES_DRCONF_MEM_ADD) }, | 15 | { NOTIFIER_ERR_INJECT_ACTION(OF_RECONFIG_ADD_PROPERTY) }, |
17 | { NOTIFIER_ERR_INJECT_ACTION(PSERIES_DRCONF_MEM_REMOVE) }, | 16 | { NOTIFIER_ERR_INJECT_ACTION(OF_RECONFIG_REMOVE_PROPERTY) }, |
17 | { NOTIFIER_ERR_INJECT_ACTION(OF_RECONFIG_UPDATE_PROPERTY) }, | ||
18 | {} | 18 | {} |
19 | } | 19 | } |
20 | }; | 20 | }; |
@@ -25,12 +25,12 @@ static int err_inject_init(void) | |||
25 | { | 25 | { |
26 | int err; | 26 | int err; |
27 | 27 | ||
28 | dir = notifier_err_inject_init("pSeries-reconfig", | 28 | dir = notifier_err_inject_init("OF-reconfig", |
29 | notifier_err_inject_dir, &reconfig_err_inject, priority); | 29 | notifier_err_inject_dir, &reconfig_err_inject, priority); |
30 | if (IS_ERR(dir)) | 30 | if (IS_ERR(dir)) |
31 | return PTR_ERR(dir); | 31 | return PTR_ERR(dir); |
32 | 32 | ||
33 | err = pSeries_reconfig_notifier_register(&reconfig_err_inject.nb); | 33 | err = of_reconfig_notifier_register(&reconfig_err_inject.nb); |
34 | if (err) | 34 | if (err) |
35 | debugfs_remove_recursive(dir); | 35 | debugfs_remove_recursive(dir); |
36 | 36 | ||
@@ -39,13 +39,13 @@ static int err_inject_init(void) | |||
39 | 39 | ||
40 | static void err_inject_exit(void) | 40 | static void err_inject_exit(void) |
41 | { | 41 | { |
42 | pSeries_reconfig_notifier_unregister(&reconfig_err_inject.nb); | 42 | of_reconfig_notifier_unregister(&reconfig_err_inject.nb); |
43 | debugfs_remove_recursive(dir); | 43 | debugfs_remove_recursive(dir); |
44 | } | 44 | } |
45 | 45 | ||
46 | module_init(err_inject_init); | 46 | module_init(err_inject_init); |
47 | module_exit(err_inject_exit); | 47 | module_exit(err_inject_exit); |
48 | 48 | ||
49 | MODULE_DESCRIPTION("pSeries reconfig notifier error injection module"); | 49 | MODULE_DESCRIPTION("OF reconfig notifier error injection module"); |
50 | MODULE_LICENSE("GPL"); | 50 | MODULE_LICENSE("GPL"); |
51 | MODULE_AUTHOR("Akinobu Mita <akinobu.mita@gmail.com>"); | 51 | MODULE_AUTHOR("Akinobu Mita <akinobu.mita@gmail.com>"); |
diff --git a/lib/percpu-rwsem.c b/lib/percpu-rwsem.c new file mode 100644 index 000000000000..652a8ee8efe9 --- /dev/null +++ b/lib/percpu-rwsem.c | |||
@@ -0,0 +1,165 @@ | |||
1 | #include <linux/atomic.h> | ||
2 | #include <linux/rwsem.h> | ||
3 | #include <linux/percpu.h> | ||
4 | #include <linux/wait.h> | ||
5 | #include <linux/lockdep.h> | ||
6 | #include <linux/percpu-rwsem.h> | ||
7 | #include <linux/rcupdate.h> | ||
8 | #include <linux/sched.h> | ||
9 | #include <linux/errno.h> | ||
10 | |||
11 | int __percpu_init_rwsem(struct percpu_rw_semaphore *brw, | ||
12 | const char *name, struct lock_class_key *rwsem_key) | ||
13 | { | ||
14 | brw->fast_read_ctr = alloc_percpu(int); | ||
15 | if (unlikely(!brw->fast_read_ctr)) | ||
16 | return -ENOMEM; | ||
17 | |||
18 | /* ->rw_sem represents the whole percpu_rw_semaphore for lockdep */ | ||
19 | __init_rwsem(&brw->rw_sem, name, rwsem_key); | ||
20 | atomic_set(&brw->write_ctr, 0); | ||
21 | atomic_set(&brw->slow_read_ctr, 0); | ||
22 | init_waitqueue_head(&brw->write_waitq); | ||
23 | return 0; | ||
24 | } | ||
25 | |||
26 | void percpu_free_rwsem(struct percpu_rw_semaphore *brw) | ||
27 | { | ||
28 | free_percpu(brw->fast_read_ctr); | ||
29 | brw->fast_read_ctr = NULL; /* catch use after free bugs */ | ||
30 | } | ||
31 | |||
32 | /* | ||
33 | * This is the fast-path for down_read/up_read, it only needs to ensure | ||
34 | * there is no pending writer (atomic_read(write_ctr) == 0) and inc/dec the | ||
35 | * fast per-cpu counter. The writer uses synchronize_sched_expedited() to | ||
36 | * serialize with the preempt-disabled section below. | ||
37 | * | ||
38 | * The nontrivial part is that we should guarantee acquire/release semantics | ||
39 | * in case when | ||
40 | * | ||
41 | * R_W: down_write() comes after up_read(), the writer should see all | ||
42 | * changes done by the reader | ||
43 | * or | ||
44 | * W_R: down_read() comes after up_write(), the reader should see all | ||
45 | * changes done by the writer | ||
46 | * | ||
47 | * If this helper fails the callers rely on the normal rw_semaphore and | ||
48 | * atomic_dec_and_test(), so in this case we have the necessary barriers. | ||
49 | * | ||
50 | * But if it succeeds we do not have any barriers, atomic_read(write_ctr) or | ||
51 | * __this_cpu_add() below can be reordered with any LOAD/STORE done by the | ||
52 | * reader inside the critical section. See the comments in down_write and | ||
53 | * up_write below. | ||
54 | */ | ||
55 | static bool update_fast_ctr(struct percpu_rw_semaphore *brw, unsigned int val) | ||
56 | { | ||
57 | bool success = false; | ||
58 | |||
59 | preempt_disable(); | ||
60 | if (likely(!atomic_read(&brw->write_ctr))) { | ||
61 | __this_cpu_add(*brw->fast_read_ctr, val); | ||
62 | success = true; | ||
63 | } | ||
64 | preempt_enable(); | ||
65 | |||
66 | return success; | ||
67 | } | ||
68 | |||
69 | /* | ||
70 | * Like the normal down_read() this is not recursive, the writer can | ||
71 | * come after the first percpu_down_read() and create the deadlock. | ||
72 | * | ||
73 | * Note: returns with lock_is_held(brw->rw_sem) == T for lockdep, | ||
74 | * percpu_up_read() does rwsem_release(). This pairs with the usage | ||
75 | * of ->rw_sem in percpu_down/up_write(). | ||
76 | */ | ||
77 | void percpu_down_read(struct percpu_rw_semaphore *brw) | ||
78 | { | ||
79 | might_sleep(); | ||
80 | if (likely(update_fast_ctr(brw, +1))) { | ||
81 | rwsem_acquire_read(&brw->rw_sem.dep_map, 0, 0, _RET_IP_); | ||
82 | return; | ||
83 | } | ||
84 | |||
85 | down_read(&brw->rw_sem); | ||
86 | atomic_inc(&brw->slow_read_ctr); | ||
87 | /* avoid up_read()->rwsem_release() */ | ||
88 | __up_read(&brw->rw_sem); | ||
89 | } | ||
90 | |||
91 | void percpu_up_read(struct percpu_rw_semaphore *brw) | ||
92 | { | ||
93 | rwsem_release(&brw->rw_sem.dep_map, 1, _RET_IP_); | ||
94 | |||
95 | if (likely(update_fast_ctr(brw, -1))) | ||
96 | return; | ||
97 | |||
98 | /* false-positive is possible but harmless */ | ||
99 | if (atomic_dec_and_test(&brw->slow_read_ctr)) | ||
100 | wake_up_all(&brw->write_waitq); | ||
101 | } | ||
102 | |||
103 | static int clear_fast_ctr(struct percpu_rw_semaphore *brw) | ||
104 | { | ||
105 | unsigned int sum = 0; | ||
106 | int cpu; | ||
107 | |||
108 | for_each_possible_cpu(cpu) { | ||
109 | sum += per_cpu(*brw->fast_read_ctr, cpu); | ||
110 | per_cpu(*brw->fast_read_ctr, cpu) = 0; | ||
111 | } | ||
112 | |||
113 | return sum; | ||
114 | } | ||
115 | |||
116 | /* | ||
117 | * A writer increments ->write_ctr to force the readers to switch to the | ||
118 | * slow mode, note the atomic_read() check in update_fast_ctr(). | ||
119 | * | ||
120 | * After that the readers can only inc/dec the slow ->slow_read_ctr counter, | ||
121 | * ->fast_read_ctr is stable. Once the writer moves its sum into the slow | ||
122 | * counter it represents the number of active readers. | ||
123 | * | ||
124 | * Finally the writer takes ->rw_sem for writing and blocks the new readers, | ||
125 | * then waits until the slow counter becomes zero. | ||
126 | */ | ||
127 | void percpu_down_write(struct percpu_rw_semaphore *brw) | ||
128 | { | ||
129 | /* tell update_fast_ctr() there is a pending writer */ | ||
130 | atomic_inc(&brw->write_ctr); | ||
131 | /* | ||
132 | * 1. Ensures that write_ctr != 0 is visible to any down_read/up_read | ||
133 | * so that update_fast_ctr() can't succeed. | ||
134 | * | ||
135 | * 2. Ensures we see the result of every previous this_cpu_add() in | ||
136 | * update_fast_ctr(). | ||
137 | * | ||
138 | * 3. Ensures that if any reader has exited its critical section via | ||
139 | * fast-path, it executes a full memory barrier before we return. | ||
140 | * See R_W case in the comment above update_fast_ctr(). | ||
141 | */ | ||
142 | synchronize_sched_expedited(); | ||
143 | |||
144 | /* exclude other writers, and block the new readers completely */ | ||
145 | down_write(&brw->rw_sem); | ||
146 | |||
147 | /* nobody can use fast_read_ctr, move its sum into slow_read_ctr */ | ||
148 | atomic_add(clear_fast_ctr(brw), &brw->slow_read_ctr); | ||
149 | |||
150 | /* wait for all readers to complete their percpu_up_read() */ | ||
151 | wait_event(brw->write_waitq, !atomic_read(&brw->slow_read_ctr)); | ||
152 | } | ||
153 | |||
154 | void percpu_up_write(struct percpu_rw_semaphore *brw) | ||
155 | { | ||
156 | /* release the lock, but the readers can't use the fast-path */ | ||
157 | up_write(&brw->rw_sem); | ||
158 | /* | ||
159 | * Insert the barrier before the next fast-path in down_read, | ||
160 | * see W_R case in the comment above update_fast_ctr(). | ||
161 | */ | ||
162 | synchronize_sched_expedited(); | ||
163 | /* the last writer unblocks update_fast_ctr() */ | ||
164 | atomic_dec(&brw->write_ctr); | ||
165 | } | ||
diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile index de06dfe165b8..9f7c184725d7 100644 --- a/lib/raid6/Makefile +++ b/lib/raid6/Makefile | |||
@@ -1,8 +1,11 @@ | |||
1 | obj-$(CONFIG_RAID6_PQ) += raid6_pq.o | 1 | obj-$(CONFIG_RAID6_PQ) += raid6_pq.o |
2 | 2 | ||
3 | raid6_pq-y += algos.o recov.o recov_ssse3.o tables.o int1.o int2.o int4.o \ | 3 | raid6_pq-y += algos.o recov.o tables.o int1.o int2.o int4.o \ |
4 | int8.o int16.o int32.o altivec1.o altivec2.o altivec4.o \ | 4 | int8.o int16.o int32.o |
5 | altivec8.o mmx.o sse1.o sse2.o | 5 | |
6 | raid6_pq-$(CONFIG_X86) += recov_ssse3.o recov_avx2.o mmx.o sse1.o sse2.o avx2.o | ||
7 | raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o | ||
8 | |||
6 | hostprogs-y += mktables | 9 | hostprogs-y += mktables |
7 | 10 | ||
8 | quiet_cmd_unroll = UNROLL $@ | 11 | quiet_cmd_unroll = UNROLL $@ |
diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c index 589f5f50ad2e..6d7316fe9f30 100644 --- a/lib/raid6/algos.c +++ b/lib/raid6/algos.c | |||
@@ -45,11 +45,20 @@ const struct raid6_calls * const raid6_algos[] = { | |||
45 | &raid6_sse1x2, | 45 | &raid6_sse1x2, |
46 | &raid6_sse2x1, | 46 | &raid6_sse2x1, |
47 | &raid6_sse2x2, | 47 | &raid6_sse2x2, |
48 | #ifdef CONFIG_AS_AVX2 | ||
49 | &raid6_avx2x1, | ||
50 | &raid6_avx2x2, | ||
51 | #endif | ||
48 | #endif | 52 | #endif |
49 | #if defined(__x86_64__) && !defined(__arch_um__) | 53 | #if defined(__x86_64__) && !defined(__arch_um__) |
50 | &raid6_sse2x1, | 54 | &raid6_sse2x1, |
51 | &raid6_sse2x2, | 55 | &raid6_sse2x2, |
52 | &raid6_sse2x4, | 56 | &raid6_sse2x4, |
57 | #ifdef CONFIG_AS_AVX2 | ||
58 | &raid6_avx2x1, | ||
59 | &raid6_avx2x2, | ||
60 | &raid6_avx2x4, | ||
61 | #endif | ||
53 | #endif | 62 | #endif |
54 | #ifdef CONFIG_ALTIVEC | 63 | #ifdef CONFIG_ALTIVEC |
55 | &raid6_altivec1, | 64 | &raid6_altivec1, |
@@ -72,6 +81,9 @@ EXPORT_SYMBOL_GPL(raid6_datap_recov); | |||
72 | 81 | ||
73 | const struct raid6_recov_calls *const raid6_recov_algos[] = { | 82 | const struct raid6_recov_calls *const raid6_recov_algos[] = { |
74 | #if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__) | 83 | #if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__) |
84 | #ifdef CONFIG_AS_AVX2 | ||
85 | &raid6_recov_avx2, | ||
86 | #endif | ||
75 | &raid6_recov_ssse3, | 87 | &raid6_recov_ssse3, |
76 | #endif | 88 | #endif |
77 | &raid6_recov_intx1, | 89 | &raid6_recov_intx1, |
diff --git a/lib/raid6/altivec.uc b/lib/raid6/altivec.uc index b71012b756f4..7cc12b532e95 100644 --- a/lib/raid6/altivec.uc +++ b/lib/raid6/altivec.uc | |||
@@ -24,13 +24,10 @@ | |||
24 | 24 | ||
25 | #include <linux/raid/pq.h> | 25 | #include <linux/raid/pq.h> |
26 | 26 | ||
27 | #ifdef CONFIG_ALTIVEC | ||
28 | |||
29 | #include <altivec.h> | 27 | #include <altivec.h> |
30 | #ifdef __KERNEL__ | 28 | #ifdef __KERNEL__ |
31 | # include <asm/cputable.h> | 29 | # include <asm/cputable.h> |
32 | # include <asm/switch_to.h> | 30 | # include <asm/switch_to.h> |
33 | #endif | ||
34 | 31 | ||
35 | /* | 32 | /* |
36 | * This is the C data type to use. We use a vector of | 33 | * This is the C data type to use. We use a vector of |
diff --git a/lib/raid6/avx2.c b/lib/raid6/avx2.c new file mode 100644 index 000000000000..bc3b1dd436eb --- /dev/null +++ b/lib/raid6/avx2.c | |||
@@ -0,0 +1,251 @@ | |||
1 | /* -*- linux-c -*- ------------------------------------------------------- * | ||
2 | * | ||
3 | * Copyright (C) 2012 Intel Corporation | ||
4 | * Author: Yuanhan Liu <yuanhan.liu@linux.intel.com> | ||
5 | * | ||
6 | * Based on sse2.c: Copyright 2002 H. Peter Anvin - All Rights Reserved | ||
7 | * | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or modify | ||
10 | * it under the terms of the GNU General Public License as published by | ||
11 | * the Free Software Foundation, Inc., 53 Temple Place Ste 330, | ||
12 | * Boston MA 02111-1307, USA; either version 2 of the License, or | ||
13 | * (at your option) any later version; incorporated herein by reference. | ||
14 | * | ||
15 | * ----------------------------------------------------------------------- */ | ||
16 | |||
17 | /* | ||
18 | * AVX2 implementation of RAID-6 syndrome functions | ||
19 | * | ||
20 | */ | ||
21 | |||
22 | #ifdef CONFIG_AS_AVX2 | ||
23 | |||
24 | #include <linux/raid/pq.h> | ||
25 | #include "x86.h" | ||
26 | |||
27 | static const struct raid6_avx2_constants { | ||
28 | u64 x1d[4]; | ||
29 | } raid6_avx2_constants __aligned(32) = { | ||
30 | { 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL, | ||
31 | 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,}, | ||
32 | }; | ||
33 | |||
34 | static int raid6_have_avx2(void) | ||
35 | { | ||
36 | return boot_cpu_has(X86_FEATURE_AVX2) && boot_cpu_has(X86_FEATURE_AVX); | ||
37 | } | ||
38 | |||
39 | /* | ||
40 | * Plain AVX2 implementation | ||
41 | */ | ||
42 | static void raid6_avx21_gen_syndrome(int disks, size_t bytes, void **ptrs) | ||
43 | { | ||
44 | u8 **dptr = (u8 **)ptrs; | ||
45 | u8 *p, *q; | ||
46 | int d, z, z0; | ||
47 | |||
48 | z0 = disks - 3; /* Highest data disk */ | ||
49 | p = dptr[z0+1]; /* XOR parity */ | ||
50 | q = dptr[z0+2]; /* RS syndrome */ | ||
51 | |||
52 | kernel_fpu_begin(); | ||
53 | |||
54 | asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0])); | ||
55 | asm volatile("vpxor %ymm3,%ymm3,%ymm3"); /* Zero temp */ | ||
56 | |||
57 | for (d = 0; d < bytes; d += 32) { | ||
58 | asm volatile("prefetchnta %0" : : "m" (dptr[z0][d])); | ||
59 | asm volatile("vmovdqa %0,%%ymm2" : : "m" (dptr[z0][d]));/* P[0] */ | ||
60 | asm volatile("prefetchnta %0" : : "m" (dptr[z0-1][d])); | ||
61 | asm volatile("vmovdqa %ymm2,%ymm4");/* Q[0] */ | ||
62 | asm volatile("vmovdqa %0,%%ymm6" : : "m" (dptr[z0-1][d])); | ||
63 | for (z = z0-2; z >= 0; z--) { | ||
64 | asm volatile("prefetchnta %0" : : "m" (dptr[z][d])); | ||
65 | asm volatile("vpcmpgtb %ymm4,%ymm3,%ymm5"); | ||
66 | asm volatile("vpaddb %ymm4,%ymm4,%ymm4"); | ||
67 | asm volatile("vpand %ymm0,%ymm5,%ymm5"); | ||
68 | asm volatile("vpxor %ymm5,%ymm4,%ymm4"); | ||
69 | asm volatile("vpxor %ymm6,%ymm2,%ymm2"); | ||
70 | asm volatile("vpxor %ymm6,%ymm4,%ymm4"); | ||
71 | asm volatile("vmovdqa %0,%%ymm6" : : "m" (dptr[z][d])); | ||
72 | } | ||
73 | asm volatile("vpcmpgtb %ymm4,%ymm3,%ymm5"); | ||
74 | asm volatile("vpaddb %ymm4,%ymm4,%ymm4"); | ||
75 | asm volatile("vpand %ymm0,%ymm5,%ymm5"); | ||
76 | asm volatile("vpxor %ymm5,%ymm4,%ymm4"); | ||
77 | asm volatile("vpxor %ymm6,%ymm2,%ymm2"); | ||
78 | asm volatile("vpxor %ymm6,%ymm4,%ymm4"); | ||
79 | |||
80 | asm volatile("vmovntdq %%ymm2,%0" : "=m" (p[d])); | ||
81 | asm volatile("vpxor %ymm2,%ymm2,%ymm2"); | ||
82 | asm volatile("vmovntdq %%ymm4,%0" : "=m" (q[d])); | ||
83 | asm volatile("vpxor %ymm4,%ymm4,%ymm4"); | ||
84 | } | ||
85 | |||
86 | asm volatile("sfence" : : : "memory"); | ||
87 | kernel_fpu_end(); | ||
88 | } | ||
89 | |||
90 | const struct raid6_calls raid6_avx2x1 = { | ||
91 | raid6_avx21_gen_syndrome, | ||
92 | raid6_have_avx2, | ||
93 | "avx2x1", | ||
94 | 1 /* Has cache hints */ | ||
95 | }; | ||
96 | |||
97 | /* | ||
98 | * Unrolled-by-2 AVX2 implementation | ||
99 | */ | ||
100 | static void raid6_avx22_gen_syndrome(int disks, size_t bytes, void **ptrs) | ||
101 | { | ||
102 | u8 **dptr = (u8 **)ptrs; | ||
103 | u8 *p, *q; | ||
104 | int d, z, z0; | ||
105 | |||
106 | z0 = disks - 3; /* Highest data disk */ | ||
107 | p = dptr[z0+1]; /* XOR parity */ | ||
108 | q = dptr[z0+2]; /* RS syndrome */ | ||
109 | |||
110 | kernel_fpu_begin(); | ||
111 | |||
112 | asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0])); | ||
113 | asm volatile("vpxor %ymm1,%ymm1,%ymm1"); /* Zero temp */ | ||
114 | |||
115 | /* We uniformly assume a single prefetch covers at least 32 bytes */ | ||
116 | for (d = 0; d < bytes; d += 64) { | ||
117 | asm volatile("prefetchnta %0" : : "m" (dptr[z0][d])); | ||
118 | asm volatile("prefetchnta %0" : : "m" (dptr[z0][d+32])); | ||
119 | asm volatile("vmovdqa %0,%%ymm2" : : "m" (dptr[z0][d]));/* P[0] */ | ||
120 | asm volatile("vmovdqa %0,%%ymm3" : : "m" (dptr[z0][d+32]));/* P[1] */ | ||
121 | asm volatile("vmovdqa %ymm2,%ymm4"); /* Q[0] */ | ||
122 | asm volatile("vmovdqa %ymm3,%ymm6"); /* Q[1] */ | ||
123 | for (z = z0-1; z >= 0; z--) { | ||
124 | asm volatile("prefetchnta %0" : : "m" (dptr[z][d])); | ||
125 | asm volatile("prefetchnta %0" : : "m" (dptr[z][d+32])); | ||
126 | asm volatile("vpcmpgtb %ymm4,%ymm1,%ymm5"); | ||
127 | asm volatile("vpcmpgtb %ymm6,%ymm1,%ymm7"); | ||
128 | asm volatile("vpaddb %ymm4,%ymm4,%ymm4"); | ||
129 | asm volatile("vpaddb %ymm6,%ymm6,%ymm6"); | ||
130 | asm volatile("vpand %ymm0,%ymm5,%ymm5"); | ||
131 | asm volatile("vpand %ymm0,%ymm7,%ymm7"); | ||
132 | asm volatile("vpxor %ymm5,%ymm4,%ymm4"); | ||
133 | asm volatile("vpxor %ymm7,%ymm6,%ymm6"); | ||
134 | asm volatile("vmovdqa %0,%%ymm5" : : "m" (dptr[z][d])); | ||
135 | asm volatile("vmovdqa %0,%%ymm7" : : "m" (dptr[z][d+32])); | ||
136 | asm volatile("vpxor %ymm5,%ymm2,%ymm2"); | ||
137 | asm volatile("vpxor %ymm7,%ymm3,%ymm3"); | ||
138 | asm volatile("vpxor %ymm5,%ymm4,%ymm4"); | ||
139 | asm volatile("vpxor %ymm7,%ymm6,%ymm6"); | ||
140 | } | ||
141 | asm volatile("vmovntdq %%ymm2,%0" : "=m" (p[d])); | ||
142 | asm volatile("vmovntdq %%ymm3,%0" : "=m" (p[d+32])); | ||
143 | asm volatile("vmovntdq %%ymm4,%0" : "=m" (q[d])); | ||
144 | asm volatile("vmovntdq %%ymm6,%0" : "=m" (q[d+32])); | ||
145 | } | ||
146 | |||
147 | asm volatile("sfence" : : : "memory"); | ||
148 | kernel_fpu_end(); | ||
149 | } | ||
150 | |||
151 | const struct raid6_calls raid6_avx2x2 = { | ||
152 | raid6_avx22_gen_syndrome, | ||
153 | raid6_have_avx2, | ||
154 | "avx2x2", | ||
155 | 1 /* Has cache hints */ | ||
156 | }; | ||
157 | |||
158 | #ifdef CONFIG_X86_64 | ||
159 | |||
160 | /* | ||
161 | * Unrolled-by-4 AVX2 implementation | ||
162 | */ | ||
163 | static void raid6_avx24_gen_syndrome(int disks, size_t bytes, void **ptrs) | ||
164 | { | ||
165 | u8 **dptr = (u8 **)ptrs; | ||
166 | u8 *p, *q; | ||
167 | int d, z, z0; | ||
168 | |||
169 | z0 = disks - 3; /* Highest data disk */ | ||
170 | p = dptr[z0+1]; /* XOR parity */ | ||
171 | q = dptr[z0+2]; /* RS syndrome */ | ||
172 | |||
173 | kernel_fpu_begin(); | ||
174 | |||
175 | asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0])); | ||
176 | asm volatile("vpxor %ymm1,%ymm1,%ymm1"); /* Zero temp */ | ||
177 | asm volatile("vpxor %ymm2,%ymm2,%ymm2"); /* P[0] */ | ||
178 | asm volatile("vpxor %ymm3,%ymm3,%ymm3"); /* P[1] */ | ||
179 | asm volatile("vpxor %ymm4,%ymm4,%ymm4"); /* Q[0] */ | ||
180 | asm volatile("vpxor %ymm6,%ymm6,%ymm6"); /* Q[1] */ | ||
181 | asm volatile("vpxor %ymm10,%ymm10,%ymm10"); /* P[2] */ | ||
182 | asm volatile("vpxor %ymm11,%ymm11,%ymm11"); /* P[3] */ | ||
183 | asm volatile("vpxor %ymm12,%ymm12,%ymm12"); /* Q[2] */ | ||
184 | asm volatile("vpxor %ymm14,%ymm14,%ymm14"); /* Q[3] */ | ||
185 | |||
186 | for (d = 0; d < bytes; d += 128) { | ||
187 | for (z = z0; z >= 0; z--) { | ||
188 | asm volatile("prefetchnta %0" : : "m" (dptr[z][d])); | ||
189 | asm volatile("prefetchnta %0" : : "m" (dptr[z][d+32])); | ||
190 | asm volatile("prefetchnta %0" : : "m" (dptr[z][d+64])); | ||
191 | asm volatile("prefetchnta %0" : : "m" (dptr[z][d+96])); | ||
192 | asm volatile("vpcmpgtb %ymm4,%ymm1,%ymm5"); | ||
193 | asm volatile("vpcmpgtb %ymm6,%ymm1,%ymm7"); | ||
194 | asm volatile("vpcmpgtb %ymm12,%ymm1,%ymm13"); | ||
195 | asm volatile("vpcmpgtb %ymm14,%ymm1,%ymm15"); | ||
196 | asm volatile("vpaddb %ymm4,%ymm4,%ymm4"); | ||
197 | asm volatile("vpaddb %ymm6,%ymm6,%ymm6"); | ||
198 | asm volatile("vpaddb %ymm12,%ymm12,%ymm12"); | ||
199 | asm volatile("vpaddb %ymm14,%ymm14,%ymm14"); | ||
200 | asm volatile("vpand %ymm0,%ymm5,%ymm5"); | ||
201 | asm volatile("vpand %ymm0,%ymm7,%ymm7"); | ||
202 | asm volatile("vpand %ymm0,%ymm13,%ymm13"); | ||
203 | asm volatile("vpand %ymm0,%ymm15,%ymm15"); | ||
204 | asm volatile("vpxor %ymm5,%ymm4,%ymm4"); | ||
205 | asm volatile("vpxor %ymm7,%ymm6,%ymm6"); | ||
206 | asm volatile("vpxor %ymm13,%ymm12,%ymm12"); | ||
207 | asm volatile("vpxor %ymm15,%ymm14,%ymm14"); | ||
208 | asm volatile("vmovdqa %0,%%ymm5" : : "m" (dptr[z][d])); | ||
209 | asm volatile("vmovdqa %0,%%ymm7" : : "m" (dptr[z][d+32])); | ||
210 | asm volatile("vmovdqa %0,%%ymm13" : : "m" (dptr[z][d+64])); | ||
211 | asm volatile("vmovdqa %0,%%ymm15" : : "m" (dptr[z][d+96])); | ||
212 | asm volatile("vpxor %ymm5,%ymm2,%ymm2"); | ||
213 | asm volatile("vpxor %ymm7,%ymm3,%ymm3"); | ||
214 | asm volatile("vpxor %ymm13,%ymm10,%ymm10"); | ||
215 | asm volatile("vpxor %ymm15,%ymm11,%ymm11"); | ||
216 | asm volatile("vpxor %ymm5,%ymm4,%ymm4"); | ||
217 | asm volatile("vpxor %ymm7,%ymm6,%ymm6"); | ||
218 | asm volatile("vpxor %ymm13,%ymm12,%ymm12"); | ||
219 | asm volatile("vpxor %ymm15,%ymm14,%ymm14"); | ||
220 | } | ||
221 | asm volatile("vmovntdq %%ymm2,%0" : "=m" (p[d])); | ||
222 | asm volatile("vpxor %ymm2,%ymm2,%ymm2"); | ||
223 | asm volatile("vmovntdq %%ymm3,%0" : "=m" (p[d+32])); | ||
224 | asm volatile("vpxor %ymm3,%ymm3,%ymm3"); | ||
225 | asm volatile("vmovntdq %%ymm10,%0" : "=m" (p[d+64])); | ||
226 | asm volatile("vpxor %ymm10,%ymm10,%ymm10"); | ||
227 | asm volatile("vmovntdq %%ymm11,%0" : "=m" (p[d+96])); | ||
228 | asm volatile("vpxor %ymm11,%ymm11,%ymm11"); | ||
229 | asm volatile("vmovntdq %%ymm4,%0" : "=m" (q[d])); | ||
230 | asm volatile("vpxor %ymm4,%ymm4,%ymm4"); | ||
231 | asm volatile("vmovntdq %%ymm6,%0" : "=m" (q[d+32])); | ||
232 | asm volatile("vpxor %ymm6,%ymm6,%ymm6"); | ||
233 | asm volatile("vmovntdq %%ymm12,%0" : "=m" (q[d+64])); | ||
234 | asm volatile("vpxor %ymm12,%ymm12,%ymm12"); | ||
235 | asm volatile("vmovntdq %%ymm14,%0" : "=m" (q[d+96])); | ||
236 | asm volatile("vpxor %ymm14,%ymm14,%ymm14"); | ||
237 | } | ||
238 | |||
239 | asm volatile("sfence" : : : "memory"); | ||
240 | kernel_fpu_end(); | ||
241 | } | ||
242 | |||
243 | const struct raid6_calls raid6_avx2x4 = { | ||
244 | raid6_avx24_gen_syndrome, | ||
245 | raid6_have_avx2, | ||
246 | "avx2x4", | ||
247 | 1 /* Has cache hints */ | ||
248 | }; | ||
249 | #endif | ||
250 | |||
251 | #endif /* CONFIG_AS_AVX2 */ | ||
diff --git a/lib/raid6/mmx.c b/lib/raid6/mmx.c index 279347f23094..590c71c9e200 100644 --- a/lib/raid6/mmx.c +++ b/lib/raid6/mmx.c | |||
@@ -16,7 +16,7 @@ | |||
16 | * MMX implementation of RAID-6 syndrome functions | 16 | * MMX implementation of RAID-6 syndrome functions |
17 | */ | 17 | */ |
18 | 18 | ||
19 | #if defined(__i386__) && !defined(__arch_um__) | 19 | #ifdef CONFIG_X86_32 |
20 | 20 | ||
21 | #include <linux/raid/pq.h> | 21 | #include <linux/raid/pq.h> |
22 | #include "x86.h" | 22 | #include "x86.h" |
diff --git a/lib/raid6/recov_avx2.c b/lib/raid6/recov_avx2.c new file mode 100644 index 000000000000..e1eea433a493 --- /dev/null +++ b/lib/raid6/recov_avx2.c | |||
@@ -0,0 +1,323 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2012 Intel Corporation | ||
3 | * Author: Jim Kukunas <james.t.kukunas@linux.intel.com> | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License | ||
7 | * as published by the Free Software Foundation; version 2 | ||
8 | * of the License. | ||
9 | */ | ||
10 | |||
11 | #if CONFIG_AS_AVX2 | ||
12 | |||
13 | #include <linux/raid/pq.h> | ||
14 | #include "x86.h" | ||
15 | |||
16 | static int raid6_has_avx2(void) | ||
17 | { | ||
18 | return boot_cpu_has(X86_FEATURE_AVX2) && | ||
19 | boot_cpu_has(X86_FEATURE_AVX); | ||
20 | } | ||
21 | |||
22 | static void raid6_2data_recov_avx2(int disks, size_t bytes, int faila, | ||
23 | int failb, void **ptrs) | ||
24 | { | ||
25 | u8 *p, *q, *dp, *dq; | ||
26 | const u8 *pbmul; /* P multiplier table for B data */ | ||
27 | const u8 *qmul; /* Q multiplier table (for both) */ | ||
28 | const u8 x0f = 0x0f; | ||
29 | |||
30 | p = (u8 *)ptrs[disks-2]; | ||
31 | q = (u8 *)ptrs[disks-1]; | ||
32 | |||
33 | /* Compute syndrome with zero for the missing data pages | ||
34 | Use the dead data pages as temporary storage for | ||
35 | delta p and delta q */ | ||
36 | dp = (u8 *)ptrs[faila]; | ||
37 | ptrs[faila] = (void *)raid6_empty_zero_page; | ||
38 | ptrs[disks-2] = dp; | ||
39 | dq = (u8 *)ptrs[failb]; | ||
40 | ptrs[failb] = (void *)raid6_empty_zero_page; | ||
41 | ptrs[disks-1] = dq; | ||
42 | |||
43 | raid6_call.gen_syndrome(disks, bytes, ptrs); | ||
44 | |||
45 | /* Restore pointer table */ | ||
46 | ptrs[faila] = dp; | ||
47 | ptrs[failb] = dq; | ||
48 | ptrs[disks-2] = p; | ||
49 | ptrs[disks-1] = q; | ||
50 | |||
51 | /* Now, pick the proper data tables */ | ||
52 | pbmul = raid6_vgfmul[raid6_gfexi[failb-faila]]; | ||
53 | qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^ | ||
54 | raid6_gfexp[failb]]]; | ||
55 | |||
56 | kernel_fpu_begin(); | ||
57 | |||
58 | /* ymm0 = x0f[16] */ | ||
59 | asm volatile("vpbroadcastb %0, %%ymm7" : : "m" (x0f)); | ||
60 | |||
61 | while (bytes) { | ||
62 | #ifdef CONFIG_X86_64 | ||
63 | asm volatile("vmovdqa %0, %%ymm1" : : "m" (q[0])); | ||
64 | asm volatile("vmovdqa %0, %%ymm9" : : "m" (q[32])); | ||
65 | asm volatile("vmovdqa %0, %%ymm0" : : "m" (p[0])); | ||
66 | asm volatile("vmovdqa %0, %%ymm8" : : "m" (p[32])); | ||
67 | asm volatile("vpxor %0, %%ymm1, %%ymm1" : : "m" (dq[0])); | ||
68 | asm volatile("vpxor %0, %%ymm9, %%ymm9" : : "m" (dq[32])); | ||
69 | asm volatile("vpxor %0, %%ymm0, %%ymm0" : : "m" (dp[0])); | ||
70 | asm volatile("vpxor %0, %%ymm8, %%ymm8" : : "m" (dp[32])); | ||
71 | |||
72 | /* | ||
73 | * 1 = dq[0] ^ q[0] | ||
74 | * 9 = dq[32] ^ q[32] | ||
75 | * 0 = dp[0] ^ p[0] | ||
76 | * 8 = dp[32] ^ p[32] | ||
77 | */ | ||
78 | |||
79 | asm volatile("vbroadcasti128 %0, %%ymm4" : : "m" (qmul[0])); | ||
80 | asm volatile("vbroadcasti128 %0, %%ymm5" : : "m" (qmul[16])); | ||
81 | |||
82 | asm volatile("vpsraw $4, %ymm1, %ymm3"); | ||
83 | asm volatile("vpsraw $4, %ymm9, %ymm12"); | ||
84 | asm volatile("vpand %ymm7, %ymm1, %ymm1"); | ||
85 | asm volatile("vpand %ymm7, %ymm9, %ymm9"); | ||
86 | asm volatile("vpand %ymm7, %ymm3, %ymm3"); | ||
87 | asm volatile("vpand %ymm7, %ymm12, %ymm12"); | ||
88 | asm volatile("vpshufb %ymm9, %ymm4, %ymm14"); | ||
89 | asm volatile("vpshufb %ymm1, %ymm4, %ymm4"); | ||
90 | asm volatile("vpshufb %ymm12, %ymm5, %ymm15"); | ||
91 | asm volatile("vpshufb %ymm3, %ymm5, %ymm5"); | ||
92 | asm volatile("vpxor %ymm14, %ymm15, %ymm15"); | ||
93 | asm volatile("vpxor %ymm4, %ymm5, %ymm5"); | ||
94 | |||
95 | /* | ||
96 | * 5 = qx[0] | ||
97 | * 15 = qx[32] | ||
98 | */ | ||
99 | |||
100 | asm volatile("vbroadcasti128 %0, %%ymm4" : : "m" (pbmul[0])); | ||
101 | asm volatile("vbroadcasti128 %0, %%ymm1" : : "m" (pbmul[16])); | ||
102 | asm volatile("vpsraw $4, %ymm0, %ymm2"); | ||
103 | asm volatile("vpsraw $4, %ymm8, %ymm6"); | ||
104 | asm volatile("vpand %ymm7, %ymm0, %ymm3"); | ||
105 | asm volatile("vpand %ymm7, %ymm8, %ymm14"); | ||
106 | asm volatile("vpand %ymm7, %ymm2, %ymm2"); | ||
107 | asm volatile("vpand %ymm7, %ymm6, %ymm6"); | ||
108 | asm volatile("vpshufb %ymm14, %ymm4, %ymm12"); | ||
109 | asm volatile("vpshufb %ymm3, %ymm4, %ymm4"); | ||
110 | asm volatile("vpshufb %ymm6, %ymm1, %ymm13"); | ||
111 | asm volatile("vpshufb %ymm2, %ymm1, %ymm1"); | ||
112 | asm volatile("vpxor %ymm4, %ymm1, %ymm1"); | ||
113 | asm volatile("vpxor %ymm12, %ymm13, %ymm13"); | ||
114 | |||
115 | /* | ||
116 | * 1 = pbmul[px[0]] | ||
117 | * 13 = pbmul[px[32]] | ||
118 | */ | ||
119 | asm volatile("vpxor %ymm5, %ymm1, %ymm1"); | ||
120 | asm volatile("vpxor %ymm15, %ymm13, %ymm13"); | ||
121 | |||
122 | /* | ||
123 | * 1 = db = DQ | ||
124 | * 13 = db[32] = DQ[32] | ||
125 | */ | ||
126 | asm volatile("vmovdqa %%ymm1, %0" : "=m" (dq[0])); | ||
127 | asm volatile("vmovdqa %%ymm13,%0" : "=m" (dq[32])); | ||
128 | asm volatile("vpxor %ymm1, %ymm0, %ymm0"); | ||
129 | asm volatile("vpxor %ymm13, %ymm8, %ymm8"); | ||
130 | |||
131 | asm volatile("vmovdqa %%ymm0, %0" : "=m" (dp[0])); | ||
132 | asm volatile("vmovdqa %%ymm8, %0" : "=m" (dp[32])); | ||
133 | |||
134 | bytes -= 64; | ||
135 | p += 64; | ||
136 | q += 64; | ||
137 | dp += 64; | ||
138 | dq += 64; | ||
139 | #else | ||
140 | asm volatile("vmovdqa %0, %%ymm1" : : "m" (*q)); | ||
141 | asm volatile("vmovdqa %0, %%ymm0" : : "m" (*p)); | ||
142 | asm volatile("vpxor %0, %%ymm1, %%ymm1" : : "m" (*dq)); | ||
143 | asm volatile("vpxor %0, %%ymm0, %%ymm0" : : "m" (*dp)); | ||
144 | |||
145 | /* 1 = dq ^ q; 0 = dp ^ p */ | ||
146 | |||
147 | asm volatile("vbroadcasti128 %0, %%ymm4" : : "m" (qmul[0])); | ||
148 | asm volatile("vbroadcasti128 %0, %%ymm5" : : "m" (qmul[16])); | ||
149 | |||
150 | /* | ||
151 | * 1 = dq ^ q | ||
152 | * 3 = dq ^ p >> 4 | ||
153 | */ | ||
154 | asm volatile("vpsraw $4, %ymm1, %ymm3"); | ||
155 | asm volatile("vpand %ymm7, %ymm1, %ymm1"); | ||
156 | asm volatile("vpand %ymm7, %ymm3, %ymm3"); | ||
157 | asm volatile("vpshufb %ymm1, %ymm4, %ymm4"); | ||
158 | asm volatile("vpshufb %ymm3, %ymm5, %ymm5"); | ||
159 | asm volatile("vpxor %ymm4, %ymm5, %ymm5"); | ||
160 | |||
161 | /* 5 = qx */ | ||
162 | |||
163 | asm volatile("vbroadcasti128 %0, %%ymm4" : : "m" (pbmul[0])); | ||
164 | asm volatile("vbroadcasti128 %0, %%ymm1" : : "m" (pbmul[16])); | ||
165 | |||
166 | asm volatile("vpsraw $4, %ymm0, %ymm2"); | ||
167 | asm volatile("vpand %ymm7, %ymm0, %ymm3"); | ||
168 | asm volatile("vpand %ymm7, %ymm2, %ymm2"); | ||
169 | asm volatile("vpshufb %ymm3, %ymm4, %ymm4"); | ||
170 | asm volatile("vpshufb %ymm2, %ymm1, %ymm1"); | ||
171 | asm volatile("vpxor %ymm4, %ymm1, %ymm1"); | ||
172 | |||
173 | /* 1 = pbmul[px] */ | ||
174 | asm volatile("vpxor %ymm5, %ymm1, %ymm1"); | ||
175 | /* 1 = db = DQ */ | ||
176 | asm volatile("vmovdqa %%ymm1, %0" : "=m" (dq[0])); | ||
177 | |||
178 | asm volatile("vpxor %ymm1, %ymm0, %ymm0"); | ||
179 | asm volatile("vmovdqa %%ymm0, %0" : "=m" (dp[0])); | ||
180 | |||
181 | bytes -= 32; | ||
182 | p += 32; | ||
183 | q += 32; | ||
184 | dp += 32; | ||
185 | dq += 32; | ||
186 | #endif | ||
187 | } | ||
188 | |||
189 | kernel_fpu_end(); | ||
190 | } | ||
191 | |||
192 | static void raid6_datap_recov_avx2(int disks, size_t bytes, int faila, | ||
193 | void **ptrs) | ||
194 | { | ||
195 | u8 *p, *q, *dq; | ||
196 | const u8 *qmul; /* Q multiplier table */ | ||
197 | const u8 x0f = 0x0f; | ||
198 | |||
199 | p = (u8 *)ptrs[disks-2]; | ||
200 | q = (u8 *)ptrs[disks-1]; | ||
201 | |||
202 | /* Compute syndrome with zero for the missing data page | ||
203 | Use the dead data page as temporary storage for delta q */ | ||
204 | dq = (u8 *)ptrs[faila]; | ||
205 | ptrs[faila] = (void *)raid6_empty_zero_page; | ||
206 | ptrs[disks-1] = dq; | ||
207 | |||
208 | raid6_call.gen_syndrome(disks, bytes, ptrs); | ||
209 | |||
210 | /* Restore pointer table */ | ||
211 | ptrs[faila] = dq; | ||
212 | ptrs[disks-1] = q; | ||
213 | |||
214 | /* Now, pick the proper data tables */ | ||
215 | qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]]; | ||
216 | |||
217 | kernel_fpu_begin(); | ||
218 | |||
219 | asm volatile("vpbroadcastb %0, %%ymm7" : : "m" (x0f)); | ||
220 | |||
221 | while (bytes) { | ||
222 | #ifdef CONFIG_X86_64 | ||
223 | asm volatile("vmovdqa %0, %%ymm3" : : "m" (dq[0])); | ||
224 | asm volatile("vmovdqa %0, %%ymm8" : : "m" (dq[32])); | ||
225 | asm volatile("vpxor %0, %%ymm3, %%ymm3" : : "m" (q[0])); | ||
226 | asm volatile("vpxor %0, %%ymm8, %%ymm8" : : "m" (q[32])); | ||
227 | |||
228 | /* | ||
229 | * 3 = q[0] ^ dq[0] | ||
230 | * 8 = q[32] ^ dq[32] | ||
231 | */ | ||
232 | asm volatile("vbroadcasti128 %0, %%ymm0" : : "m" (qmul[0])); | ||
233 | asm volatile("vmovapd %ymm0, %ymm13"); | ||
234 | asm volatile("vbroadcasti128 %0, %%ymm1" : : "m" (qmul[16])); | ||
235 | asm volatile("vmovapd %ymm1, %ymm14"); | ||
236 | |||
237 | asm volatile("vpsraw $4, %ymm3, %ymm6"); | ||
238 | asm volatile("vpsraw $4, %ymm8, %ymm12"); | ||
239 | asm volatile("vpand %ymm7, %ymm3, %ymm3"); | ||
240 | asm volatile("vpand %ymm7, %ymm8, %ymm8"); | ||
241 | asm volatile("vpand %ymm7, %ymm6, %ymm6"); | ||
242 | asm volatile("vpand %ymm7, %ymm12, %ymm12"); | ||
243 | asm volatile("vpshufb %ymm3, %ymm0, %ymm0"); | ||
244 | asm volatile("vpshufb %ymm8, %ymm13, %ymm13"); | ||
245 | asm volatile("vpshufb %ymm6, %ymm1, %ymm1"); | ||
246 | asm volatile("vpshufb %ymm12, %ymm14, %ymm14"); | ||
247 | asm volatile("vpxor %ymm0, %ymm1, %ymm1"); | ||
248 | asm volatile("vpxor %ymm13, %ymm14, %ymm14"); | ||
249 | |||
250 | /* | ||
251 | * 1 = qmul[q[0] ^ dq[0]] | ||
252 | * 14 = qmul[q[32] ^ dq[32]] | ||
253 | */ | ||
254 | asm volatile("vmovdqa %0, %%ymm2" : : "m" (p[0])); | ||
255 | asm volatile("vmovdqa %0, %%ymm12" : : "m" (p[32])); | ||
256 | asm volatile("vpxor %ymm1, %ymm2, %ymm2"); | ||
257 | asm volatile("vpxor %ymm14, %ymm12, %ymm12"); | ||
258 | |||
259 | /* | ||
260 | * 2 = p[0] ^ qmul[q[0] ^ dq[0]] | ||
261 | * 12 = p[32] ^ qmul[q[32] ^ dq[32]] | ||
262 | */ | ||
263 | |||
264 | asm volatile("vmovdqa %%ymm1, %0" : "=m" (dq[0])); | ||
265 | asm volatile("vmovdqa %%ymm14, %0" : "=m" (dq[32])); | ||
266 | asm volatile("vmovdqa %%ymm2, %0" : "=m" (p[0])); | ||
267 | asm volatile("vmovdqa %%ymm12,%0" : "=m" (p[32])); | ||
268 | |||
269 | bytes -= 64; | ||
270 | p += 64; | ||
271 | q += 64; | ||
272 | dq += 64; | ||
273 | #else | ||
274 | asm volatile("vmovdqa %0, %%ymm3" : : "m" (dq[0])); | ||
275 | asm volatile("vpxor %0, %%ymm3, %%ymm3" : : "m" (q[0])); | ||
276 | |||
277 | /* 3 = q ^ dq */ | ||
278 | |||
279 | asm volatile("vbroadcasti128 %0, %%ymm0" : : "m" (qmul[0])); | ||
280 | asm volatile("vbroadcasti128 %0, %%ymm1" : : "m" (qmul[16])); | ||
281 | |||
282 | asm volatile("vpsraw $4, %ymm3, %ymm6"); | ||
283 | asm volatile("vpand %ymm7, %ymm3, %ymm3"); | ||
284 | asm volatile("vpand %ymm7, %ymm6, %ymm6"); | ||
285 | asm volatile("vpshufb %ymm3, %ymm0, %ymm0"); | ||
286 | asm volatile("vpshufb %ymm6, %ymm1, %ymm1"); | ||
287 | asm volatile("vpxor %ymm0, %ymm1, %ymm1"); | ||
288 | |||
289 | /* 1 = qmul[q ^ dq] */ | ||
290 | |||
291 | asm volatile("vmovdqa %0, %%ymm2" : : "m" (p[0])); | ||
292 | asm volatile("vpxor %ymm1, %ymm2, %ymm2"); | ||
293 | |||
294 | /* 2 = p ^ qmul[q ^ dq] */ | ||
295 | |||
296 | asm volatile("vmovdqa %%ymm1, %0" : "=m" (dq[0])); | ||
297 | asm volatile("vmovdqa %%ymm2, %0" : "=m" (p[0])); | ||
298 | |||
299 | bytes -= 32; | ||
300 | p += 32; | ||
301 | q += 32; | ||
302 | dq += 32; | ||
303 | #endif | ||
304 | } | ||
305 | |||
306 | kernel_fpu_end(); | ||
307 | } | ||
308 | |||
309 | const struct raid6_recov_calls raid6_recov_avx2 = { | ||
310 | .data2 = raid6_2data_recov_avx2, | ||
311 | .datap = raid6_datap_recov_avx2, | ||
312 | .valid = raid6_has_avx2, | ||
313 | #ifdef CONFIG_X86_64 | ||
314 | .name = "avx2x2", | ||
315 | #else | ||
316 | .name = "avx2x1", | ||
317 | #endif | ||
318 | .priority = 2, | ||
319 | }; | ||
320 | |||
321 | #else | ||
322 | #warning "your version of binutils lacks AVX2 support" | ||
323 | #endif | ||
diff --git a/lib/raid6/recov_ssse3.c b/lib/raid6/recov_ssse3.c index ecb710c0b4d9..a9168328f03b 100644 --- a/lib/raid6/recov_ssse3.c +++ b/lib/raid6/recov_ssse3.c | |||
@@ -7,8 +7,6 @@ | |||
7 | * of the License. | 7 | * of the License. |
8 | */ | 8 | */ |
9 | 9 | ||
10 | #if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__) | ||
11 | |||
12 | #include <linux/raid/pq.h> | 10 | #include <linux/raid/pq.h> |
13 | #include "x86.h" | 11 | #include "x86.h" |
14 | 12 | ||
@@ -332,5 +330,3 @@ const struct raid6_recov_calls raid6_recov_ssse3 = { | |||
332 | #endif | 330 | #endif |
333 | .priority = 1, | 331 | .priority = 1, |
334 | }; | 332 | }; |
335 | |||
336 | #endif | ||
diff --git a/lib/raid6/sse1.c b/lib/raid6/sse1.c index 10dd91948c07..f76297139445 100644 --- a/lib/raid6/sse1.c +++ b/lib/raid6/sse1.c | |||
@@ -21,7 +21,7 @@ | |||
21 | * worthwhile as a separate implementation. | 21 | * worthwhile as a separate implementation. |
22 | */ | 22 | */ |
23 | 23 | ||
24 | #if defined(__i386__) && !defined(__arch_um__) | 24 | #ifdef CONFIG_X86_32 |
25 | 25 | ||
26 | #include <linux/raid/pq.h> | 26 | #include <linux/raid/pq.h> |
27 | #include "x86.h" | 27 | #include "x86.h" |
diff --git a/lib/raid6/sse2.c b/lib/raid6/sse2.c index bc2d57daa589..85b82c85f28e 100644 --- a/lib/raid6/sse2.c +++ b/lib/raid6/sse2.c | |||
@@ -17,8 +17,6 @@ | |||
17 | * | 17 | * |
18 | */ | 18 | */ |
19 | 19 | ||
20 | #if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__) | ||
21 | |||
22 | #include <linux/raid/pq.h> | 20 | #include <linux/raid/pq.h> |
23 | #include "x86.h" | 21 | #include "x86.h" |
24 | 22 | ||
@@ -159,9 +157,7 @@ const struct raid6_calls raid6_sse2x2 = { | |||
159 | 1 /* Has cache hints */ | 157 | 1 /* Has cache hints */ |
160 | }; | 158 | }; |
161 | 159 | ||
162 | #endif | 160 | #ifdef CONFIG_X86_64 |
163 | |||
164 | #if defined(__x86_64__) && !defined(__arch_um__) | ||
165 | 161 | ||
166 | /* | 162 | /* |
167 | * Unrolled-by-4 SSE2 implementation | 163 | * Unrolled-by-4 SSE2 implementation |
@@ -259,4 +255,4 @@ const struct raid6_calls raid6_sse2x4 = { | |||
259 | 1 /* Has cache hints */ | 255 | 1 /* Has cache hints */ |
260 | }; | 256 | }; |
261 | 257 | ||
262 | #endif | 258 | #endif /* CONFIG_X86_64 */ |
diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile index c76151d94764..087332dbf8aa 100644 --- a/lib/raid6/test/Makefile +++ b/lib/raid6/test/Makefile | |||
@@ -10,6 +10,31 @@ LD = ld | |||
10 | AWK = awk -f | 10 | AWK = awk -f |
11 | AR = ar | 11 | AR = ar |
12 | RANLIB = ranlib | 12 | RANLIB = ranlib |
13 | OBJS = int1.o int2.o int4.o int8.o int16.o int32.o recov.o algos.o tables.o | ||
14 | |||
15 | ARCH := $(shell uname -m 2>/dev/null | sed -e /s/i.86/i386/) | ||
16 | ifeq ($(ARCH),i386) | ||
17 | CFLAGS += -DCONFIG_X86_32 | ||
18 | IS_X86 = yes | ||
19 | endif | ||
20 | ifeq ($(ARCH),x86_64) | ||
21 | CFLAGS += -DCONFIG_X86_64 | ||
22 | IS_X86 = yes | ||
23 | endif | ||
24 | |||
25 | ifeq ($(IS_X86),yes) | ||
26 | OBJS += mmx.o sse1.o sse2.o avx2.o recov_ssse3.o recov_avx2.o | ||
27 | CFLAGS += $(shell echo "vpbroadcastb %xmm0, %ymm1" | \ | ||
28 | gcc -c -x assembler - >&/dev/null && \ | ||
29 | rm ./-.o && echo -DCONFIG_AS_AVX2=1) | ||
30 | else | ||
31 | HAS_ALTIVEC := $(shell echo -e '\#include <altivec.h>\nvector int a;' |\ | ||
32 | gcc -c -x c - >&/dev/null && \ | ||
33 | rm ./-.o && echo yes) | ||
34 | ifeq ($(HAS_ALTIVEC),yes) | ||
35 | OBJS += altivec1.o altivec2.o altivec4.o altivec8.o | ||
36 | endif | ||
37 | endif | ||
13 | 38 | ||
14 | .c.o: | 39 | .c.o: |
15 | $(CC) $(CFLAGS) -c -o $@ $< | 40 | $(CC) $(CFLAGS) -c -o $@ $< |
@@ -22,9 +47,7 @@ RANLIB = ranlib | |||
22 | 47 | ||
23 | all: raid6.a raid6test | 48 | all: raid6.a raid6test |
24 | 49 | ||
25 | raid6.a: int1.o int2.o int4.o int8.o int16.o int32.o mmx.o sse1.o sse2.o \ | 50 | raid6.a: $(OBJS) |
26 | altivec1.o altivec2.o altivec4.o altivec8.o recov.o recov_ssse3.o algos.o \ | ||
27 | tables.o | ||
28 | rm -f $@ | 51 | rm -f $@ |
29 | $(AR) cq $@ $^ | 52 | $(AR) cq $@ $^ |
30 | $(RANLIB) $@ | 53 | $(RANLIB) $@ |
diff --git a/lib/raid6/x86.h b/lib/raid6/x86.h index d55d63232c55..b7595484a815 100644 --- a/lib/raid6/x86.h +++ b/lib/raid6/x86.h | |||
@@ -45,19 +45,23 @@ static inline void kernel_fpu_end(void) | |||
45 | #define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */ | 45 | #define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */ |
46 | #define X86_FEATURE_SSSE3 (4*32+ 9) /* Supplemental SSE-3 */ | 46 | #define X86_FEATURE_SSSE3 (4*32+ 9) /* Supplemental SSE-3 */ |
47 | #define X86_FEATURE_AVX (4*32+28) /* Advanced Vector Extensions */ | 47 | #define X86_FEATURE_AVX (4*32+28) /* Advanced Vector Extensions */ |
48 | #define X86_FEATURE_AVX2 (9*32+ 5) /* AVX2 instructions */ | ||
48 | #define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */ | 49 | #define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */ |
49 | 50 | ||
50 | /* Should work well enough on modern CPUs for testing */ | 51 | /* Should work well enough on modern CPUs for testing */ |
51 | static inline int boot_cpu_has(int flag) | 52 | static inline int boot_cpu_has(int flag) |
52 | { | 53 | { |
53 | u32 eax = (flag & 0x20) ? 0x80000001 : 1; | 54 | u32 eax, ebx, ecx, edx; |
54 | u32 ecx, edx; | 55 | |
56 | eax = (flag & 0x100) ? 7 : | ||
57 | (flag & 0x20) ? 0x80000001 : 1; | ||
58 | ecx = 0; | ||
55 | 59 | ||
56 | asm volatile("cpuid" | 60 | asm volatile("cpuid" |
57 | : "+a" (eax), "=d" (edx), "=c" (ecx) | 61 | : "+a" (eax), "=b" (ebx), "=d" (edx), "+c" (ecx)); |
58 | : : "ebx"); | ||
59 | 62 | ||
60 | return ((flag & 0x80 ? ecx : edx) >> (flag & 31)) & 1; | 63 | return ((flag & 0x100 ? ebx : |
64 | (flag & 0x80) ? ecx : edx) >> (flag & 31)) & 1; | ||
61 | } | 65 | } |
62 | 66 | ||
63 | #endif /* ndef __KERNEL__ */ | 67 | #endif /* ndef __KERNEL__ */ |
diff --git a/lib/random32.c b/lib/random32.c index 938bde5876ac..52280d5526be 100644 --- a/lib/random32.c +++ b/lib/random32.c | |||
@@ -42,13 +42,13 @@ | |||
42 | static DEFINE_PER_CPU(struct rnd_state, net_rand_state); | 42 | static DEFINE_PER_CPU(struct rnd_state, net_rand_state); |
43 | 43 | ||
44 | /** | 44 | /** |
45 | * prandom32 - seeded pseudo-random number generator. | 45 | * prandom_u32_state - seeded pseudo-random number generator. |
46 | * @state: pointer to state structure holding seeded state. | 46 | * @state: pointer to state structure holding seeded state. |
47 | * | 47 | * |
48 | * This is used for pseudo-randomness with no outside seeding. | 48 | * This is used for pseudo-randomness with no outside seeding. |
49 | * For more random results, use random32(). | 49 | * For more random results, use prandom_u32(). |
50 | */ | 50 | */ |
51 | u32 prandom32(struct rnd_state *state) | 51 | u32 prandom_u32_state(struct rnd_state *state) |
52 | { | 52 | { |
53 | #define TAUSWORTHE(s,a,b,c,d) ((s&c)<<d) ^ (((s <<a) ^ s)>>b) | 53 | #define TAUSWORTHE(s,a,b,c,d) ((s&c)<<d) ^ (((s <<a) ^ s)>>b) |
54 | 54 | ||
@@ -58,32 +58,81 @@ u32 prandom32(struct rnd_state *state) | |||
58 | 58 | ||
59 | return (state->s1 ^ state->s2 ^ state->s3); | 59 | return (state->s1 ^ state->s2 ^ state->s3); |
60 | } | 60 | } |
61 | EXPORT_SYMBOL(prandom32); | 61 | EXPORT_SYMBOL(prandom_u32_state); |
62 | 62 | ||
63 | /** | 63 | /** |
64 | * random32 - pseudo random number generator | 64 | * prandom_u32 - pseudo random number generator |
65 | * | 65 | * |
66 | * A 32 bit pseudo-random number is generated using a fast | 66 | * A 32 bit pseudo-random number is generated using a fast |
67 | * algorithm suitable for simulation. This algorithm is NOT | 67 | * algorithm suitable for simulation. This algorithm is NOT |
68 | * considered safe for cryptographic use. | 68 | * considered safe for cryptographic use. |
69 | */ | 69 | */ |
70 | u32 random32(void) | 70 | u32 prandom_u32(void) |
71 | { | 71 | { |
72 | unsigned long r; | 72 | unsigned long r; |
73 | struct rnd_state *state = &get_cpu_var(net_rand_state); | 73 | struct rnd_state *state = &get_cpu_var(net_rand_state); |
74 | r = prandom32(state); | 74 | r = prandom_u32_state(state); |
75 | put_cpu_var(state); | 75 | put_cpu_var(state); |
76 | return r; | 76 | return r; |
77 | } | 77 | } |
78 | EXPORT_SYMBOL(random32); | 78 | EXPORT_SYMBOL(prandom_u32); |
79 | |||
80 | /* | ||
81 | * prandom_bytes_state - get the requested number of pseudo-random bytes | ||
82 | * | ||
83 | * @state: pointer to state structure holding seeded state. | ||
84 | * @buf: where to copy the pseudo-random bytes to | ||
85 | * @bytes: the requested number of bytes | ||
86 | * | ||
87 | * This is used for pseudo-randomness with no outside seeding. | ||
88 | * For more random results, use prandom_bytes(). | ||
89 | */ | ||
90 | void prandom_bytes_state(struct rnd_state *state, void *buf, int bytes) | ||
91 | { | ||
92 | unsigned char *p = buf; | ||
93 | int i; | ||
94 | |||
95 | for (i = 0; i < round_down(bytes, sizeof(u32)); i += sizeof(u32)) { | ||
96 | u32 random = prandom_u32_state(state); | ||
97 | int j; | ||
98 | |||
99 | for (j = 0; j < sizeof(u32); j++) { | ||
100 | p[i + j] = random; | ||
101 | random >>= BITS_PER_BYTE; | ||
102 | } | ||
103 | } | ||
104 | if (i < bytes) { | ||
105 | u32 random = prandom_u32_state(state); | ||
106 | |||
107 | for (; i < bytes; i++) { | ||
108 | p[i] = random; | ||
109 | random >>= BITS_PER_BYTE; | ||
110 | } | ||
111 | } | ||
112 | } | ||
113 | EXPORT_SYMBOL(prandom_bytes_state); | ||
114 | |||
115 | /** | ||
116 | * prandom_bytes - get the requested number of pseudo-random bytes | ||
117 | * @buf: where to copy the pseudo-random bytes to | ||
118 | * @bytes: the requested number of bytes | ||
119 | */ | ||
120 | void prandom_bytes(void *buf, int bytes) | ||
121 | { | ||
122 | struct rnd_state *state = &get_cpu_var(net_rand_state); | ||
123 | |||
124 | prandom_bytes_state(state, buf, bytes); | ||
125 | put_cpu_var(state); | ||
126 | } | ||
127 | EXPORT_SYMBOL(prandom_bytes); | ||
79 | 128 | ||
80 | /** | 129 | /** |
81 | * srandom32 - add entropy to pseudo random number generator | 130 | * prandom_seed - add entropy to pseudo random number generator |
82 | * @seed: seed value | 131 | * @seed: seed value |
83 | * | 132 | * |
84 | * Add some additional seeding to the random32() pool. | 133 | * Add some additional seeding to the prandom pool. |
85 | */ | 134 | */ |
86 | void srandom32(u32 entropy) | 135 | void prandom_seed(u32 entropy) |
87 | { | 136 | { |
88 | int i; | 137 | int i; |
89 | /* | 138 | /* |
@@ -95,13 +144,13 @@ void srandom32(u32 entropy) | |||
95 | state->s1 = __seed(state->s1 ^ entropy, 1); | 144 | state->s1 = __seed(state->s1 ^ entropy, 1); |
96 | } | 145 | } |
97 | } | 146 | } |
98 | EXPORT_SYMBOL(srandom32); | 147 | EXPORT_SYMBOL(prandom_seed); |
99 | 148 | ||
100 | /* | 149 | /* |
101 | * Generate some initially weak seeding values to allow | 150 | * Generate some initially weak seeding values to allow |
102 | * to start the random32() engine. | 151 | * to start the prandom_u32() engine. |
103 | */ | 152 | */ |
104 | static int __init random32_init(void) | 153 | static int __init prandom_init(void) |
105 | { | 154 | { |
106 | int i; | 155 | int i; |
107 | 156 | ||
@@ -114,22 +163,22 @@ static int __init random32_init(void) | |||
114 | state->s3 = __seed(LCG(state->s2), 15); | 163 | state->s3 = __seed(LCG(state->s2), 15); |
115 | 164 | ||
116 | /* "warm it up" */ | 165 | /* "warm it up" */ |
117 | prandom32(state); | 166 | prandom_u32_state(state); |
118 | prandom32(state); | 167 | prandom_u32_state(state); |
119 | prandom32(state); | 168 | prandom_u32_state(state); |
120 | prandom32(state); | 169 | prandom_u32_state(state); |
121 | prandom32(state); | 170 | prandom_u32_state(state); |
122 | prandom32(state); | 171 | prandom_u32_state(state); |
123 | } | 172 | } |
124 | return 0; | 173 | return 0; |
125 | } | 174 | } |
126 | core_initcall(random32_init); | 175 | core_initcall(prandom_init); |
127 | 176 | ||
128 | /* | 177 | /* |
129 | * Generate better values after random number generator | 178 | * Generate better values after random number generator |
130 | * is fully initialized. | 179 | * is fully initialized. |
131 | */ | 180 | */ |
132 | static int __init random32_reseed(void) | 181 | static int __init prandom_reseed(void) |
133 | { | 182 | { |
134 | int i; | 183 | int i; |
135 | 184 | ||
@@ -143,8 +192,8 @@ static int __init random32_reseed(void) | |||
143 | state->s3 = __seed(seeds[2], 15); | 192 | state->s3 = __seed(seeds[2], 15); |
144 | 193 | ||
145 | /* mix it in */ | 194 | /* mix it in */ |
146 | prandom32(state); | 195 | prandom_u32_state(state); |
147 | } | 196 | } |
148 | return 0; | 197 | return 0; |
149 | } | 198 | } |
150 | late_initcall(random32_reseed); | 199 | late_initcall(prandom_reseed); |
diff --git a/lib/rbtree.c b/lib/rbtree.c index 4f56a11d67fa..c0e31fe2fabf 100644 --- a/lib/rbtree.c +++ b/lib/rbtree.c | |||
@@ -194,8 +194,12 @@ __rb_insert(struct rb_node *node, struct rb_root *root, | |||
194 | } | 194 | } |
195 | } | 195 | } |
196 | 196 | ||
197 | __always_inline void | 197 | /* |
198 | __rb_erase_color(struct rb_node *parent, struct rb_root *root, | 198 | * Inline version for rb_erase() use - we want to be able to inline |
199 | * and eliminate the dummy_rotate callback there | ||
200 | */ | ||
201 | static __always_inline void | ||
202 | ____rb_erase_color(struct rb_node *parent, struct rb_root *root, | ||
199 | void (*augment_rotate)(struct rb_node *old, struct rb_node *new)) | 203 | void (*augment_rotate)(struct rb_node *old, struct rb_node *new)) |
200 | { | 204 | { |
201 | struct rb_node *node = NULL, *sibling, *tmp1, *tmp2; | 205 | struct rb_node *node = NULL, *sibling, *tmp1, *tmp2; |
@@ -355,6 +359,13 @@ __rb_erase_color(struct rb_node *parent, struct rb_root *root, | |||
355 | } | 359 | } |
356 | } | 360 | } |
357 | } | 361 | } |
362 | |||
363 | /* Non-inline version for rb_erase_augmented() use */ | ||
364 | void __rb_erase_color(struct rb_node *parent, struct rb_root *root, | ||
365 | void (*augment_rotate)(struct rb_node *old, struct rb_node *new)) | ||
366 | { | ||
367 | ____rb_erase_color(parent, root, augment_rotate); | ||
368 | } | ||
358 | EXPORT_SYMBOL(__rb_erase_color); | 369 | EXPORT_SYMBOL(__rb_erase_color); |
359 | 370 | ||
360 | /* | 371 | /* |
@@ -380,7 +391,10 @@ EXPORT_SYMBOL(rb_insert_color); | |||
380 | 391 | ||
381 | void rb_erase(struct rb_node *node, struct rb_root *root) | 392 | void rb_erase(struct rb_node *node, struct rb_root *root) |
382 | { | 393 | { |
383 | rb_erase_augmented(node, root, &dummy_callbacks); | 394 | struct rb_node *rebalance; |
395 | rebalance = __rb_erase_augmented(node, root, &dummy_callbacks); | ||
396 | if (rebalance) | ||
397 | ____rb_erase_color(rebalance, root, dummy_rotate); | ||
384 | } | 398 | } |
385 | EXPORT_SYMBOL(rb_erase); | 399 | EXPORT_SYMBOL(rb_erase); |
386 | 400 | ||
diff --git a/lib/rbtree_test.c b/lib/rbtree_test.c index 268b23951fec..af38aedbd874 100644 --- a/lib/rbtree_test.c +++ b/lib/rbtree_test.c | |||
@@ -96,8 +96,8 @@ static void init(void) | |||
96 | { | 96 | { |
97 | int i; | 97 | int i; |
98 | for (i = 0; i < NODES; i++) { | 98 | for (i = 0; i < NODES; i++) { |
99 | nodes[i].key = prandom32(&rnd); | 99 | nodes[i].key = prandom_u32_state(&rnd); |
100 | nodes[i].val = prandom32(&rnd); | 100 | nodes[i].val = prandom_u32_state(&rnd); |
101 | } | 101 | } |
102 | } | 102 | } |
103 | 103 | ||
@@ -118,7 +118,7 @@ static void check(int nr_nodes) | |||
118 | { | 118 | { |
119 | struct rb_node *rb; | 119 | struct rb_node *rb; |
120 | int count = 0; | 120 | int count = 0; |
121 | int blacks; | 121 | int blacks = 0; |
122 | u32 prev_key = 0; | 122 | u32 prev_key = 0; |
123 | 123 | ||
124 | for (rb = rb_first(&root); rb; rb = rb_next(rb)) { | 124 | for (rb = rb_first(&root); rb; rb = rb_next(rb)) { |
@@ -155,7 +155,7 @@ static int rbtree_test_init(void) | |||
155 | 155 | ||
156 | printk(KERN_ALERT "rbtree testing"); | 156 | printk(KERN_ALERT "rbtree testing"); |
157 | 157 | ||
158 | prandom32_seed(&rnd, 3141592653589793238ULL); | 158 | prandom_seed_state(&rnd, 3141592653589793238ULL); |
159 | init(); | 159 | init(); |
160 | 160 | ||
161 | time1 = get_cycles(); | 161 | time1 = get_cycles(); |
diff --git a/lib/scatterlist.c b/lib/scatterlist.c index 3675452b23ca..7874b01e816e 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c | |||
@@ -248,7 +248,8 @@ int __sg_alloc_table(struct sg_table *table, unsigned int nents, | |||
248 | unsigned int left; | 248 | unsigned int left; |
249 | 249 | ||
250 | #ifndef ARCH_HAS_SG_CHAIN | 250 | #ifndef ARCH_HAS_SG_CHAIN |
251 | BUG_ON(nents > max_ents); | 251 | if (WARN_ON_ONCE(nents > max_ents)) |
252 | return -EINVAL; | ||
252 | #endif | 253 | #endif |
253 | 254 | ||
254 | memset(table, 0, sizeof(*table)); | 255 | memset(table, 0, sizeof(*table)); |
diff --git a/lib/swiotlb.c b/lib/swiotlb.c index f114bf6a8e13..196b06984dec 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c | |||
@@ -57,7 +57,7 @@ int swiotlb_force; | |||
57 | * swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by this | 57 | * swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by this |
58 | * API. | 58 | * API. |
59 | */ | 59 | */ |
60 | static char *io_tlb_start, *io_tlb_end; | 60 | static phys_addr_t io_tlb_start, io_tlb_end; |
61 | 61 | ||
62 | /* | 62 | /* |
63 | * The number of IO TLB blocks (in groups of 64) between io_tlb_start and | 63 | * The number of IO TLB blocks (in groups of 64) between io_tlb_start and |
@@ -70,7 +70,7 @@ static unsigned long io_tlb_nslabs; | |||
70 | */ | 70 | */ |
71 | static unsigned long io_tlb_overflow = 32*1024; | 71 | static unsigned long io_tlb_overflow = 32*1024; |
72 | 72 | ||
73 | static void *io_tlb_overflow_buffer; | 73 | static phys_addr_t io_tlb_overflow_buffer; |
74 | 74 | ||
75 | /* | 75 | /* |
76 | * This is a free list describing the number of free entries available from | 76 | * This is a free list describing the number of free entries available from |
@@ -125,27 +125,38 @@ static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev, | |||
125 | void swiotlb_print_info(void) | 125 | void swiotlb_print_info(void) |
126 | { | 126 | { |
127 | unsigned long bytes = io_tlb_nslabs << IO_TLB_SHIFT; | 127 | unsigned long bytes = io_tlb_nslabs << IO_TLB_SHIFT; |
128 | phys_addr_t pstart, pend; | 128 | unsigned char *vstart, *vend; |
129 | 129 | ||
130 | pstart = virt_to_phys(io_tlb_start); | 130 | vstart = phys_to_virt(io_tlb_start); |
131 | pend = virt_to_phys(io_tlb_end); | 131 | vend = phys_to_virt(io_tlb_end); |
132 | 132 | ||
133 | printk(KERN_INFO "software IO TLB [mem %#010llx-%#010llx] (%luMB) mapped at [%p-%p]\n", | 133 | printk(KERN_INFO "software IO TLB [mem %#010llx-%#010llx] (%luMB) mapped at [%p-%p]\n", |
134 | (unsigned long long)pstart, (unsigned long long)pend - 1, | 134 | (unsigned long long)io_tlb_start, |
135 | bytes >> 20, io_tlb_start, io_tlb_end - 1); | 135 | (unsigned long long)io_tlb_end, |
136 | bytes >> 20, vstart, vend - 1); | ||
136 | } | 137 | } |
137 | 138 | ||
138 | void __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) | 139 | void __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) |
139 | { | 140 | { |
141 | void *v_overflow_buffer; | ||
140 | unsigned long i, bytes; | 142 | unsigned long i, bytes; |
141 | 143 | ||
142 | bytes = nslabs << IO_TLB_SHIFT; | 144 | bytes = nslabs << IO_TLB_SHIFT; |
143 | 145 | ||
144 | io_tlb_nslabs = nslabs; | 146 | io_tlb_nslabs = nslabs; |
145 | io_tlb_start = tlb; | 147 | io_tlb_start = __pa(tlb); |
146 | io_tlb_end = io_tlb_start + bytes; | 148 | io_tlb_end = io_tlb_start + bytes; |
147 | 149 | ||
148 | /* | 150 | /* |
151 | * Get the overflow emergency buffer | ||
152 | */ | ||
153 | v_overflow_buffer = alloc_bootmem_low_pages(PAGE_ALIGN(io_tlb_overflow)); | ||
154 | if (!v_overflow_buffer) | ||
155 | panic("Cannot allocate SWIOTLB overflow buffer!\n"); | ||
156 | |||
157 | io_tlb_overflow_buffer = __pa(v_overflow_buffer); | ||
158 | |||
159 | /* | ||
149 | * Allocate and initialize the free list array. This array is used | 160 | * Allocate and initialize the free list array. This array is used |
150 | * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE | 161 | * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE |
151 | * between io_tlb_start and io_tlb_end. | 162 | * between io_tlb_start and io_tlb_end. |
@@ -156,12 +167,6 @@ void __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) | |||
156 | io_tlb_index = 0; | 167 | io_tlb_index = 0; |
157 | io_tlb_orig_addr = alloc_bootmem_pages(PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t))); | 168 | io_tlb_orig_addr = alloc_bootmem_pages(PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t))); |
158 | 169 | ||
159 | /* | ||
160 | * Get the overflow emergency buffer | ||
161 | */ | ||
162 | io_tlb_overflow_buffer = alloc_bootmem_low_pages(PAGE_ALIGN(io_tlb_overflow)); | ||
163 | if (!io_tlb_overflow_buffer) | ||
164 | panic("Cannot allocate SWIOTLB overflow buffer!\n"); | ||
165 | if (verbose) | 170 | if (verbose) |
166 | swiotlb_print_info(); | 171 | swiotlb_print_info(); |
167 | } | 172 | } |
@@ -173,6 +178,7 @@ void __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) | |||
173 | static void __init | 178 | static void __init |
174 | swiotlb_init_with_default_size(size_t default_size, int verbose) | 179 | swiotlb_init_with_default_size(size_t default_size, int verbose) |
175 | { | 180 | { |
181 | unsigned char *vstart; | ||
176 | unsigned long bytes; | 182 | unsigned long bytes; |
177 | 183 | ||
178 | if (!io_tlb_nslabs) { | 184 | if (!io_tlb_nslabs) { |
@@ -185,11 +191,11 @@ swiotlb_init_with_default_size(size_t default_size, int verbose) | |||
185 | /* | 191 | /* |
186 | * Get IO TLB memory from the low pages | 192 | * Get IO TLB memory from the low pages |
187 | */ | 193 | */ |
188 | io_tlb_start = alloc_bootmem_low_pages(PAGE_ALIGN(bytes)); | 194 | vstart = alloc_bootmem_low_pages(PAGE_ALIGN(bytes)); |
189 | if (!io_tlb_start) | 195 | if (!vstart) |
190 | panic("Cannot allocate SWIOTLB buffer"); | 196 | panic("Cannot allocate SWIOTLB buffer"); |
191 | 197 | ||
192 | swiotlb_init_with_tbl(io_tlb_start, io_tlb_nslabs, verbose); | 198 | swiotlb_init_with_tbl(vstart, io_tlb_nslabs, verbose); |
193 | } | 199 | } |
194 | 200 | ||
195 | void __init | 201 | void __init |
@@ -207,6 +213,7 @@ int | |||
207 | swiotlb_late_init_with_default_size(size_t default_size) | 213 | swiotlb_late_init_with_default_size(size_t default_size) |
208 | { | 214 | { |
209 | unsigned long bytes, req_nslabs = io_tlb_nslabs; | 215 | unsigned long bytes, req_nslabs = io_tlb_nslabs; |
216 | unsigned char *vstart = NULL; | ||
210 | unsigned int order; | 217 | unsigned int order; |
211 | int rc = 0; | 218 | int rc = 0; |
212 | 219 | ||
@@ -223,14 +230,14 @@ swiotlb_late_init_with_default_size(size_t default_size) | |||
223 | bytes = io_tlb_nslabs << IO_TLB_SHIFT; | 230 | bytes = io_tlb_nslabs << IO_TLB_SHIFT; |
224 | 231 | ||
225 | while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) { | 232 | while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) { |
226 | io_tlb_start = (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN, | 233 | vstart = (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN, |
227 | order); | 234 | order); |
228 | if (io_tlb_start) | 235 | if (vstart) |
229 | break; | 236 | break; |
230 | order--; | 237 | order--; |
231 | } | 238 | } |
232 | 239 | ||
233 | if (!io_tlb_start) { | 240 | if (!vstart) { |
234 | io_tlb_nslabs = req_nslabs; | 241 | io_tlb_nslabs = req_nslabs; |
235 | return -ENOMEM; | 242 | return -ENOMEM; |
236 | } | 243 | } |
@@ -239,9 +246,9 @@ swiotlb_late_init_with_default_size(size_t default_size) | |||
239 | "for software IO TLB\n", (PAGE_SIZE << order) >> 20); | 246 | "for software IO TLB\n", (PAGE_SIZE << order) >> 20); |
240 | io_tlb_nslabs = SLABS_PER_PAGE << order; | 247 | io_tlb_nslabs = SLABS_PER_PAGE << order; |
241 | } | 248 | } |
242 | rc = swiotlb_late_init_with_tbl(io_tlb_start, io_tlb_nslabs); | 249 | rc = swiotlb_late_init_with_tbl(vstart, io_tlb_nslabs); |
243 | if (rc) | 250 | if (rc) |
244 | free_pages((unsigned long)io_tlb_start, order); | 251 | free_pages((unsigned long)vstart, order); |
245 | return rc; | 252 | return rc; |
246 | } | 253 | } |
247 | 254 | ||
@@ -249,14 +256,25 @@ int | |||
249 | swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs) | 256 | swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs) |
250 | { | 257 | { |
251 | unsigned long i, bytes; | 258 | unsigned long i, bytes; |
259 | unsigned char *v_overflow_buffer; | ||
252 | 260 | ||
253 | bytes = nslabs << IO_TLB_SHIFT; | 261 | bytes = nslabs << IO_TLB_SHIFT; |
254 | 262 | ||
255 | io_tlb_nslabs = nslabs; | 263 | io_tlb_nslabs = nslabs; |
256 | io_tlb_start = tlb; | 264 | io_tlb_start = virt_to_phys(tlb); |
257 | io_tlb_end = io_tlb_start + bytes; | 265 | io_tlb_end = io_tlb_start + bytes; |
258 | 266 | ||
259 | memset(io_tlb_start, 0, bytes); | 267 | memset(tlb, 0, bytes); |
268 | |||
269 | /* | ||
270 | * Get the overflow emergency buffer | ||
271 | */ | ||
272 | v_overflow_buffer = (void *)__get_free_pages(GFP_DMA, | ||
273 | get_order(io_tlb_overflow)); | ||
274 | if (!v_overflow_buffer) | ||
275 | goto cleanup2; | ||
276 | |||
277 | io_tlb_overflow_buffer = virt_to_phys(v_overflow_buffer); | ||
260 | 278 | ||
261 | /* | 279 | /* |
262 | * Allocate and initialize the free list array. This array is used | 280 | * Allocate and initialize the free list array. This array is used |
@@ -266,7 +284,7 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs) | |||
266 | io_tlb_list = (unsigned int *)__get_free_pages(GFP_KERNEL, | 284 | io_tlb_list = (unsigned int *)__get_free_pages(GFP_KERNEL, |
267 | get_order(io_tlb_nslabs * sizeof(int))); | 285 | get_order(io_tlb_nslabs * sizeof(int))); |
268 | if (!io_tlb_list) | 286 | if (!io_tlb_list) |
269 | goto cleanup2; | 287 | goto cleanup3; |
270 | 288 | ||
271 | for (i = 0; i < io_tlb_nslabs; i++) | 289 | for (i = 0; i < io_tlb_nslabs; i++) |
272 | io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE); | 290 | io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE); |
@@ -277,18 +295,10 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs) | |||
277 | get_order(io_tlb_nslabs * | 295 | get_order(io_tlb_nslabs * |
278 | sizeof(phys_addr_t))); | 296 | sizeof(phys_addr_t))); |
279 | if (!io_tlb_orig_addr) | 297 | if (!io_tlb_orig_addr) |
280 | goto cleanup3; | 298 | goto cleanup4; |
281 | 299 | ||
282 | memset(io_tlb_orig_addr, 0, io_tlb_nslabs * sizeof(phys_addr_t)); | 300 | memset(io_tlb_orig_addr, 0, io_tlb_nslabs * sizeof(phys_addr_t)); |
283 | 301 | ||
284 | /* | ||
285 | * Get the overflow emergency buffer | ||
286 | */ | ||
287 | io_tlb_overflow_buffer = (void *)__get_free_pages(GFP_DMA, | ||
288 | get_order(io_tlb_overflow)); | ||
289 | if (!io_tlb_overflow_buffer) | ||
290 | goto cleanup4; | ||
291 | |||
292 | swiotlb_print_info(); | 302 | swiotlb_print_info(); |
293 | 303 | ||
294 | late_alloc = 1; | 304 | late_alloc = 1; |
@@ -296,42 +306,42 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs) | |||
296 | return 0; | 306 | return 0; |
297 | 307 | ||
298 | cleanup4: | 308 | cleanup4: |
299 | free_pages((unsigned long)io_tlb_orig_addr, | ||
300 | get_order(io_tlb_nslabs * sizeof(phys_addr_t))); | ||
301 | io_tlb_orig_addr = NULL; | ||
302 | cleanup3: | ||
303 | free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs * | 309 | free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs * |
304 | sizeof(int))); | 310 | sizeof(int))); |
305 | io_tlb_list = NULL; | 311 | io_tlb_list = NULL; |
312 | cleanup3: | ||
313 | free_pages((unsigned long)v_overflow_buffer, | ||
314 | get_order(io_tlb_overflow)); | ||
315 | io_tlb_overflow_buffer = 0; | ||
306 | cleanup2: | 316 | cleanup2: |
307 | io_tlb_end = NULL; | 317 | io_tlb_end = 0; |
308 | io_tlb_start = NULL; | 318 | io_tlb_start = 0; |
309 | io_tlb_nslabs = 0; | 319 | io_tlb_nslabs = 0; |
310 | return -ENOMEM; | 320 | return -ENOMEM; |
311 | } | 321 | } |
312 | 322 | ||
313 | void __init swiotlb_free(void) | 323 | void __init swiotlb_free(void) |
314 | { | 324 | { |
315 | if (!io_tlb_overflow_buffer) | 325 | if (!io_tlb_orig_addr) |
316 | return; | 326 | return; |
317 | 327 | ||
318 | if (late_alloc) { | 328 | if (late_alloc) { |
319 | free_pages((unsigned long)io_tlb_overflow_buffer, | 329 | free_pages((unsigned long)phys_to_virt(io_tlb_overflow_buffer), |
320 | get_order(io_tlb_overflow)); | 330 | get_order(io_tlb_overflow)); |
321 | free_pages((unsigned long)io_tlb_orig_addr, | 331 | free_pages((unsigned long)io_tlb_orig_addr, |
322 | get_order(io_tlb_nslabs * sizeof(phys_addr_t))); | 332 | get_order(io_tlb_nslabs * sizeof(phys_addr_t))); |
323 | free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs * | 333 | free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs * |
324 | sizeof(int))); | 334 | sizeof(int))); |
325 | free_pages((unsigned long)io_tlb_start, | 335 | free_pages((unsigned long)phys_to_virt(io_tlb_start), |
326 | get_order(io_tlb_nslabs << IO_TLB_SHIFT)); | 336 | get_order(io_tlb_nslabs << IO_TLB_SHIFT)); |
327 | } else { | 337 | } else { |
328 | free_bootmem_late(__pa(io_tlb_overflow_buffer), | 338 | free_bootmem_late(io_tlb_overflow_buffer, |
329 | PAGE_ALIGN(io_tlb_overflow)); | 339 | PAGE_ALIGN(io_tlb_overflow)); |
330 | free_bootmem_late(__pa(io_tlb_orig_addr), | 340 | free_bootmem_late(__pa(io_tlb_orig_addr), |
331 | PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t))); | 341 | PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t))); |
332 | free_bootmem_late(__pa(io_tlb_list), | 342 | free_bootmem_late(__pa(io_tlb_list), |
333 | PAGE_ALIGN(io_tlb_nslabs * sizeof(int))); | 343 | PAGE_ALIGN(io_tlb_nslabs * sizeof(int))); |
334 | free_bootmem_late(__pa(io_tlb_start), | 344 | free_bootmem_late(io_tlb_start, |
335 | PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT)); | 345 | PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT)); |
336 | } | 346 | } |
337 | io_tlb_nslabs = 0; | 347 | io_tlb_nslabs = 0; |
@@ -339,21 +349,21 @@ void __init swiotlb_free(void) | |||
339 | 349 | ||
340 | static int is_swiotlb_buffer(phys_addr_t paddr) | 350 | static int is_swiotlb_buffer(phys_addr_t paddr) |
341 | { | 351 | { |
342 | return paddr >= virt_to_phys(io_tlb_start) && | 352 | return paddr >= io_tlb_start && paddr < io_tlb_end; |
343 | paddr < virt_to_phys(io_tlb_end); | ||
344 | } | 353 | } |
345 | 354 | ||
346 | /* | 355 | /* |
347 | * Bounce: copy the swiotlb buffer back to the original dma location | 356 | * Bounce: copy the swiotlb buffer back to the original dma location |
348 | */ | 357 | */ |
349 | void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size, | 358 | static void swiotlb_bounce(phys_addr_t orig_addr, phys_addr_t tlb_addr, |
350 | enum dma_data_direction dir) | 359 | size_t size, enum dma_data_direction dir) |
351 | { | 360 | { |
352 | unsigned long pfn = PFN_DOWN(phys); | 361 | unsigned long pfn = PFN_DOWN(orig_addr); |
362 | unsigned char *vaddr = phys_to_virt(tlb_addr); | ||
353 | 363 | ||
354 | if (PageHighMem(pfn_to_page(pfn))) { | 364 | if (PageHighMem(pfn_to_page(pfn))) { |
355 | /* The buffer does not have a mapping. Map it in and copy */ | 365 | /* The buffer does not have a mapping. Map it in and copy */ |
356 | unsigned int offset = phys & ~PAGE_MASK; | 366 | unsigned int offset = orig_addr & ~PAGE_MASK; |
357 | char *buffer; | 367 | char *buffer; |
358 | unsigned int sz = 0; | 368 | unsigned int sz = 0; |
359 | unsigned long flags; | 369 | unsigned long flags; |
@@ -364,32 +374,31 @@ void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size, | |||
364 | local_irq_save(flags); | 374 | local_irq_save(flags); |
365 | buffer = kmap_atomic(pfn_to_page(pfn)); | 375 | buffer = kmap_atomic(pfn_to_page(pfn)); |
366 | if (dir == DMA_TO_DEVICE) | 376 | if (dir == DMA_TO_DEVICE) |
367 | memcpy(dma_addr, buffer + offset, sz); | 377 | memcpy(vaddr, buffer + offset, sz); |
368 | else | 378 | else |
369 | memcpy(buffer + offset, dma_addr, sz); | 379 | memcpy(buffer + offset, vaddr, sz); |
370 | kunmap_atomic(buffer); | 380 | kunmap_atomic(buffer); |
371 | local_irq_restore(flags); | 381 | local_irq_restore(flags); |
372 | 382 | ||
373 | size -= sz; | 383 | size -= sz; |
374 | pfn++; | 384 | pfn++; |
375 | dma_addr += sz; | 385 | vaddr += sz; |
376 | offset = 0; | 386 | offset = 0; |
377 | } | 387 | } |
388 | } else if (dir == DMA_TO_DEVICE) { | ||
389 | memcpy(vaddr, phys_to_virt(orig_addr), size); | ||
378 | } else { | 390 | } else { |
379 | if (dir == DMA_TO_DEVICE) | 391 | memcpy(phys_to_virt(orig_addr), vaddr, size); |
380 | memcpy(dma_addr, phys_to_virt(phys), size); | ||
381 | else | ||
382 | memcpy(phys_to_virt(phys), dma_addr, size); | ||
383 | } | 392 | } |
384 | } | 393 | } |
385 | EXPORT_SYMBOL_GPL(swiotlb_bounce); | ||
386 | 394 | ||
387 | void *swiotlb_tbl_map_single(struct device *hwdev, dma_addr_t tbl_dma_addr, | 395 | phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, |
388 | phys_addr_t phys, size_t size, | 396 | dma_addr_t tbl_dma_addr, |
389 | enum dma_data_direction dir) | 397 | phys_addr_t orig_addr, size_t size, |
398 | enum dma_data_direction dir) | ||
390 | { | 399 | { |
391 | unsigned long flags; | 400 | unsigned long flags; |
392 | char *dma_addr; | 401 | phys_addr_t tlb_addr; |
393 | unsigned int nslots, stride, index, wrap; | 402 | unsigned int nslots, stride, index, wrap; |
394 | int i; | 403 | int i; |
395 | unsigned long mask; | 404 | unsigned long mask; |
@@ -453,7 +462,7 @@ void *swiotlb_tbl_map_single(struct device *hwdev, dma_addr_t tbl_dma_addr, | |||
453 | io_tlb_list[i] = 0; | 462 | io_tlb_list[i] = 0; |
454 | for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE - 1) && io_tlb_list[i]; i--) | 463 | for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE - 1) && io_tlb_list[i]; i--) |
455 | io_tlb_list[i] = ++count; | 464 | io_tlb_list[i] = ++count; |
456 | dma_addr = io_tlb_start + (index << IO_TLB_SHIFT); | 465 | tlb_addr = io_tlb_start + (index << IO_TLB_SHIFT); |
457 | 466 | ||
458 | /* | 467 | /* |
459 | * Update the indices to avoid searching in the next | 468 | * Update the indices to avoid searching in the next |
@@ -471,7 +480,7 @@ void *swiotlb_tbl_map_single(struct device *hwdev, dma_addr_t tbl_dma_addr, | |||
471 | 480 | ||
472 | not_found: | 481 | not_found: |
473 | spin_unlock_irqrestore(&io_tlb_lock, flags); | 482 | spin_unlock_irqrestore(&io_tlb_lock, flags); |
474 | return NULL; | 483 | return SWIOTLB_MAP_ERROR; |
475 | found: | 484 | found: |
476 | spin_unlock_irqrestore(&io_tlb_lock, flags); | 485 | spin_unlock_irqrestore(&io_tlb_lock, flags); |
477 | 486 | ||
@@ -481,11 +490,11 @@ found: | |||
481 | * needed. | 490 | * needed. |
482 | */ | 491 | */ |
483 | for (i = 0; i < nslots; i++) | 492 | for (i = 0; i < nslots; i++) |
484 | io_tlb_orig_addr[index+i] = phys + (i << IO_TLB_SHIFT); | 493 | io_tlb_orig_addr[index+i] = orig_addr + (i << IO_TLB_SHIFT); |
485 | if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) | 494 | if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) |
486 | swiotlb_bounce(phys, dma_addr, size, DMA_TO_DEVICE); | 495 | swiotlb_bounce(orig_addr, tlb_addr, size, DMA_TO_DEVICE); |
487 | 496 | ||
488 | return dma_addr; | 497 | return tlb_addr; |
489 | } | 498 | } |
490 | EXPORT_SYMBOL_GPL(swiotlb_tbl_map_single); | 499 | EXPORT_SYMBOL_GPL(swiotlb_tbl_map_single); |
491 | 500 | ||
@@ -493,11 +502,10 @@ EXPORT_SYMBOL_GPL(swiotlb_tbl_map_single); | |||
493 | * Allocates bounce buffer and returns its kernel virtual address. | 502 | * Allocates bounce buffer and returns its kernel virtual address. |
494 | */ | 503 | */ |
495 | 504 | ||
496 | static void * | 505 | phys_addr_t map_single(struct device *hwdev, phys_addr_t phys, size_t size, |
497 | map_single(struct device *hwdev, phys_addr_t phys, size_t size, | 506 | enum dma_data_direction dir) |
498 | enum dma_data_direction dir) | ||
499 | { | 507 | { |
500 | dma_addr_t start_dma_addr = swiotlb_virt_to_bus(hwdev, io_tlb_start); | 508 | dma_addr_t start_dma_addr = phys_to_dma(hwdev, io_tlb_start); |
501 | 509 | ||
502 | return swiotlb_tbl_map_single(hwdev, start_dma_addr, phys, size, dir); | 510 | return swiotlb_tbl_map_single(hwdev, start_dma_addr, phys, size, dir); |
503 | } | 511 | } |
@@ -505,20 +513,19 @@ map_single(struct device *hwdev, phys_addr_t phys, size_t size, | |||
505 | /* | 513 | /* |
506 | * dma_addr is the kernel virtual address of the bounce buffer to unmap. | 514 | * dma_addr is the kernel virtual address of the bounce buffer to unmap. |
507 | */ | 515 | */ |
508 | void | 516 | void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr, |
509 | swiotlb_tbl_unmap_single(struct device *hwdev, char *dma_addr, size_t size, | 517 | size_t size, enum dma_data_direction dir) |
510 | enum dma_data_direction dir) | ||
511 | { | 518 | { |
512 | unsigned long flags; | 519 | unsigned long flags; |
513 | int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; | 520 | int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; |
514 | int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT; | 521 | int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT; |
515 | phys_addr_t phys = io_tlb_orig_addr[index]; | 522 | phys_addr_t orig_addr = io_tlb_orig_addr[index]; |
516 | 523 | ||
517 | /* | 524 | /* |
518 | * First, sync the memory before unmapping the entry | 525 | * First, sync the memory before unmapping the entry |
519 | */ | 526 | */ |
520 | if (phys && ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL))) | 527 | if (orig_addr && ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL))) |
521 | swiotlb_bounce(phys, dma_addr, size, DMA_FROM_DEVICE); | 528 | swiotlb_bounce(orig_addr, tlb_addr, size, DMA_FROM_DEVICE); |
522 | 529 | ||
523 | /* | 530 | /* |
524 | * Return the buffer to the free list by setting the corresponding | 531 | * Return the buffer to the free list by setting the corresponding |
@@ -547,26 +554,27 @@ swiotlb_tbl_unmap_single(struct device *hwdev, char *dma_addr, size_t size, | |||
547 | } | 554 | } |
548 | EXPORT_SYMBOL_GPL(swiotlb_tbl_unmap_single); | 555 | EXPORT_SYMBOL_GPL(swiotlb_tbl_unmap_single); |
549 | 556 | ||
550 | void | 557 | void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr, |
551 | swiotlb_tbl_sync_single(struct device *hwdev, char *dma_addr, size_t size, | 558 | size_t size, enum dma_data_direction dir, |
552 | enum dma_data_direction dir, | 559 | enum dma_sync_target target) |
553 | enum dma_sync_target target) | ||
554 | { | 560 | { |
555 | int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT; | 561 | int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT; |
556 | phys_addr_t phys = io_tlb_orig_addr[index]; | 562 | phys_addr_t orig_addr = io_tlb_orig_addr[index]; |
557 | 563 | ||
558 | phys += ((unsigned long)dma_addr & ((1 << IO_TLB_SHIFT) - 1)); | 564 | orig_addr += (unsigned long)tlb_addr & ((1 << IO_TLB_SHIFT) - 1); |
559 | 565 | ||
560 | switch (target) { | 566 | switch (target) { |
561 | case SYNC_FOR_CPU: | 567 | case SYNC_FOR_CPU: |
562 | if (likely(dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)) | 568 | if (likely(dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)) |
563 | swiotlb_bounce(phys, dma_addr, size, DMA_FROM_DEVICE); | 569 | swiotlb_bounce(orig_addr, tlb_addr, |
570 | size, DMA_FROM_DEVICE); | ||
564 | else | 571 | else |
565 | BUG_ON(dir != DMA_TO_DEVICE); | 572 | BUG_ON(dir != DMA_TO_DEVICE); |
566 | break; | 573 | break; |
567 | case SYNC_FOR_DEVICE: | 574 | case SYNC_FOR_DEVICE: |
568 | if (likely(dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)) | 575 | if (likely(dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)) |
569 | swiotlb_bounce(phys, dma_addr, size, DMA_TO_DEVICE); | 576 | swiotlb_bounce(orig_addr, tlb_addr, |
577 | size, DMA_TO_DEVICE); | ||
570 | else | 578 | else |
571 | BUG_ON(dir != DMA_FROM_DEVICE); | 579 | BUG_ON(dir != DMA_FROM_DEVICE); |
572 | break; | 580 | break; |
@@ -589,12 +597,15 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size, | |||
589 | dma_mask = hwdev->coherent_dma_mask; | 597 | dma_mask = hwdev->coherent_dma_mask; |
590 | 598 | ||
591 | ret = (void *)__get_free_pages(flags, order); | 599 | ret = (void *)__get_free_pages(flags, order); |
592 | if (ret && swiotlb_virt_to_bus(hwdev, ret) + size - 1 > dma_mask) { | 600 | if (ret) { |
593 | /* | 601 | dev_addr = swiotlb_virt_to_bus(hwdev, ret); |
594 | * The allocated memory isn't reachable by the device. | 602 | if (dev_addr + size - 1 > dma_mask) { |
595 | */ | 603 | /* |
596 | free_pages((unsigned long) ret, order); | 604 | * The allocated memory isn't reachable by the device. |
597 | ret = NULL; | 605 | */ |
606 | free_pages((unsigned long) ret, order); | ||
607 | ret = NULL; | ||
608 | } | ||
598 | } | 609 | } |
599 | if (!ret) { | 610 | if (!ret) { |
600 | /* | 611 | /* |
@@ -602,25 +613,29 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size, | |||
602 | * GFP_DMA memory; fall back on map_single(), which | 613 | * GFP_DMA memory; fall back on map_single(), which |
603 | * will grab memory from the lowest available address range. | 614 | * will grab memory from the lowest available address range. |
604 | */ | 615 | */ |
605 | ret = map_single(hwdev, 0, size, DMA_FROM_DEVICE); | 616 | phys_addr_t paddr = map_single(hwdev, 0, size, DMA_FROM_DEVICE); |
606 | if (!ret) | 617 | if (paddr == SWIOTLB_MAP_ERROR) |
607 | return NULL; | 618 | return NULL; |
608 | } | ||
609 | 619 | ||
610 | memset(ret, 0, size); | 620 | ret = phys_to_virt(paddr); |
611 | dev_addr = swiotlb_virt_to_bus(hwdev, ret); | 621 | dev_addr = phys_to_dma(hwdev, paddr); |
612 | 622 | ||
613 | /* Confirm address can be DMA'd by device */ | 623 | /* Confirm address can be DMA'd by device */ |
614 | if (dev_addr + size - 1 > dma_mask) { | 624 | if (dev_addr + size - 1 > dma_mask) { |
615 | printk("hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016Lx\n", | 625 | printk("hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016Lx\n", |
616 | (unsigned long long)dma_mask, | 626 | (unsigned long long)dma_mask, |
617 | (unsigned long long)dev_addr); | 627 | (unsigned long long)dev_addr); |
618 | 628 | ||
619 | /* DMA_TO_DEVICE to avoid memcpy in unmap_single */ | 629 | /* DMA_TO_DEVICE to avoid memcpy in unmap_single */ |
620 | swiotlb_tbl_unmap_single(hwdev, ret, size, DMA_TO_DEVICE); | 630 | swiotlb_tbl_unmap_single(hwdev, paddr, |
621 | return NULL; | 631 | size, DMA_TO_DEVICE); |
632 | return NULL; | ||
633 | } | ||
622 | } | 634 | } |
635 | |||
623 | *dma_handle = dev_addr; | 636 | *dma_handle = dev_addr; |
637 | memset(ret, 0, size); | ||
638 | |||
624 | return ret; | 639 | return ret; |
625 | } | 640 | } |
626 | EXPORT_SYMBOL(swiotlb_alloc_coherent); | 641 | EXPORT_SYMBOL(swiotlb_alloc_coherent); |
@@ -636,7 +651,7 @@ swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr, | |||
636 | free_pages((unsigned long)vaddr, get_order(size)); | 651 | free_pages((unsigned long)vaddr, get_order(size)); |
637 | else | 652 | else |
638 | /* DMA_TO_DEVICE to avoid memcpy in swiotlb_tbl_unmap_single */ | 653 | /* DMA_TO_DEVICE to avoid memcpy in swiotlb_tbl_unmap_single */ |
639 | swiotlb_tbl_unmap_single(hwdev, vaddr, size, DMA_TO_DEVICE); | 654 | swiotlb_tbl_unmap_single(hwdev, paddr, size, DMA_TO_DEVICE); |
640 | } | 655 | } |
641 | EXPORT_SYMBOL(swiotlb_free_coherent); | 656 | EXPORT_SYMBOL(swiotlb_free_coherent); |
642 | 657 | ||
@@ -677,9 +692,8 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, | |||
677 | enum dma_data_direction dir, | 692 | enum dma_data_direction dir, |
678 | struct dma_attrs *attrs) | 693 | struct dma_attrs *attrs) |
679 | { | 694 | { |
680 | phys_addr_t phys = page_to_phys(page) + offset; | 695 | phys_addr_t map, phys = page_to_phys(page) + offset; |
681 | dma_addr_t dev_addr = phys_to_dma(dev, phys); | 696 | dma_addr_t dev_addr = phys_to_dma(dev, phys); |
682 | void *map; | ||
683 | 697 | ||
684 | BUG_ON(dir == DMA_NONE); | 698 | BUG_ON(dir == DMA_NONE); |
685 | /* | 699 | /* |
@@ -690,23 +704,19 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, | |||
690 | if (dma_capable(dev, dev_addr, size) && !swiotlb_force) | 704 | if (dma_capable(dev, dev_addr, size) && !swiotlb_force) |
691 | return dev_addr; | 705 | return dev_addr; |
692 | 706 | ||
693 | /* | 707 | /* Oh well, have to allocate and map a bounce buffer. */ |
694 | * Oh well, have to allocate and map a bounce buffer. | ||
695 | */ | ||
696 | map = map_single(dev, phys, size, dir); | 708 | map = map_single(dev, phys, size, dir); |
697 | if (!map) { | 709 | if (map == SWIOTLB_MAP_ERROR) { |
698 | swiotlb_full(dev, size, dir, 1); | 710 | swiotlb_full(dev, size, dir, 1); |
699 | map = io_tlb_overflow_buffer; | 711 | return phys_to_dma(dev, io_tlb_overflow_buffer); |
700 | } | 712 | } |
701 | 713 | ||
702 | dev_addr = swiotlb_virt_to_bus(dev, map); | 714 | dev_addr = phys_to_dma(dev, map); |
703 | 715 | ||
704 | /* | 716 | /* Ensure that the address returned is DMA'ble */ |
705 | * Ensure that the address returned is DMA'ble | ||
706 | */ | ||
707 | if (!dma_capable(dev, dev_addr, size)) { | 717 | if (!dma_capable(dev, dev_addr, size)) { |
708 | swiotlb_tbl_unmap_single(dev, map, size, dir); | 718 | swiotlb_tbl_unmap_single(dev, map, size, dir); |
709 | dev_addr = swiotlb_virt_to_bus(dev, io_tlb_overflow_buffer); | 719 | return phys_to_dma(dev, io_tlb_overflow_buffer); |
710 | } | 720 | } |
711 | 721 | ||
712 | return dev_addr; | 722 | return dev_addr; |
@@ -729,7 +739,7 @@ static void unmap_single(struct device *hwdev, dma_addr_t dev_addr, | |||
729 | BUG_ON(dir == DMA_NONE); | 739 | BUG_ON(dir == DMA_NONE); |
730 | 740 | ||
731 | if (is_swiotlb_buffer(paddr)) { | 741 | if (is_swiotlb_buffer(paddr)) { |
732 | swiotlb_tbl_unmap_single(hwdev, phys_to_virt(paddr), size, dir); | 742 | swiotlb_tbl_unmap_single(hwdev, paddr, size, dir); |
733 | return; | 743 | return; |
734 | } | 744 | } |
735 | 745 | ||
@@ -773,8 +783,7 @@ swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr, | |||
773 | BUG_ON(dir == DMA_NONE); | 783 | BUG_ON(dir == DMA_NONE); |
774 | 784 | ||
775 | if (is_swiotlb_buffer(paddr)) { | 785 | if (is_swiotlb_buffer(paddr)) { |
776 | swiotlb_tbl_sync_single(hwdev, phys_to_virt(paddr), size, dir, | 786 | swiotlb_tbl_sync_single(hwdev, paddr, size, dir, target); |
777 | target); | ||
778 | return; | 787 | return; |
779 | } | 788 | } |
780 | 789 | ||
@@ -831,9 +840,9 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems, | |||
831 | 840 | ||
832 | if (swiotlb_force || | 841 | if (swiotlb_force || |
833 | !dma_capable(hwdev, dev_addr, sg->length)) { | 842 | !dma_capable(hwdev, dev_addr, sg->length)) { |
834 | void *map = map_single(hwdev, sg_phys(sg), | 843 | phys_addr_t map = map_single(hwdev, sg_phys(sg), |
835 | sg->length, dir); | 844 | sg->length, dir); |
836 | if (!map) { | 845 | if (map == SWIOTLB_MAP_ERROR) { |
837 | /* Don't panic here, we expect map_sg users | 846 | /* Don't panic here, we expect map_sg users |
838 | to do proper error handling. */ | 847 | to do proper error handling. */ |
839 | swiotlb_full(hwdev, sg->length, dir, 0); | 848 | swiotlb_full(hwdev, sg->length, dir, 0); |
@@ -842,7 +851,7 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems, | |||
842 | sgl[0].dma_length = 0; | 851 | sgl[0].dma_length = 0; |
843 | return 0; | 852 | return 0; |
844 | } | 853 | } |
845 | sg->dma_address = swiotlb_virt_to_bus(hwdev, map); | 854 | sg->dma_address = phys_to_dma(hwdev, map); |
846 | } else | 855 | } else |
847 | sg->dma_address = dev_addr; | 856 | sg->dma_address = dev_addr; |
848 | sg->dma_length = sg->length; | 857 | sg->dma_length = sg->length; |
@@ -925,7 +934,7 @@ EXPORT_SYMBOL(swiotlb_sync_sg_for_device); | |||
925 | int | 934 | int |
926 | swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr) | 935 | swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr) |
927 | { | 936 | { |
928 | return (dma_addr == swiotlb_virt_to_bus(hwdev, io_tlb_overflow_buffer)); | 937 | return (dma_addr == phys_to_dma(hwdev, io_tlb_overflow_buffer)); |
929 | } | 938 | } |
930 | EXPORT_SYMBOL(swiotlb_dma_mapping_error); | 939 | EXPORT_SYMBOL(swiotlb_dma_mapping_error); |
931 | 940 | ||
@@ -938,6 +947,6 @@ EXPORT_SYMBOL(swiotlb_dma_mapping_error); | |||
938 | int | 947 | int |
939 | swiotlb_dma_supported(struct device *hwdev, u64 mask) | 948 | swiotlb_dma_supported(struct device *hwdev, u64 mask) |
940 | { | 949 | { |
941 | return swiotlb_virt_to_bus(hwdev, io_tlb_end - 1) <= mask; | 950 | return phys_to_dma(hwdev, io_tlb_end - 1) <= mask; |
942 | } | 951 | } |
943 | EXPORT_SYMBOL(swiotlb_dma_supported); | 952 | EXPORT_SYMBOL(swiotlb_dma_supported); |
diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 39c99fea7c03..fab33a9c5318 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c | |||
@@ -23,12 +23,12 @@ | |||
23 | #include <linux/ctype.h> | 23 | #include <linux/ctype.h> |
24 | #include <linux/kernel.h> | 24 | #include <linux/kernel.h> |
25 | #include <linux/kallsyms.h> | 25 | #include <linux/kallsyms.h> |
26 | #include <linux/math64.h> | ||
26 | #include <linux/uaccess.h> | 27 | #include <linux/uaccess.h> |
27 | #include <linux/ioport.h> | 28 | #include <linux/ioport.h> |
28 | #include <net/addrconf.h> | 29 | #include <net/addrconf.h> |
29 | 30 | ||
30 | #include <asm/page.h> /* for PAGE_SIZE */ | 31 | #include <asm/page.h> /* for PAGE_SIZE */ |
31 | #include <asm/div64.h> | ||
32 | #include <asm/sections.h> /* for dereference_function_descriptor() */ | 32 | #include <asm/sections.h> /* for dereference_function_descriptor() */ |
33 | 33 | ||
34 | #include "kstrtox.h" | 34 | #include "kstrtox.h" |
@@ -38,6 +38,8 @@ | |||
38 | * @cp: The start of the string | 38 | * @cp: The start of the string |
39 | * @endp: A pointer to the end of the parsed string will be placed here | 39 | * @endp: A pointer to the end of the parsed string will be placed here |
40 | * @base: The number base to use | 40 | * @base: The number base to use |
41 | * | ||
42 | * This function is obsolete. Please use kstrtoull instead. | ||
41 | */ | 43 | */ |
42 | unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int base) | 44 | unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int base) |
43 | { | 45 | { |
@@ -61,6 +63,8 @@ EXPORT_SYMBOL(simple_strtoull); | |||
61 | * @cp: The start of the string | 63 | * @cp: The start of the string |
62 | * @endp: A pointer to the end of the parsed string will be placed here | 64 | * @endp: A pointer to the end of the parsed string will be placed here |
63 | * @base: The number base to use | 65 | * @base: The number base to use |
66 | * | ||
67 | * This function is obsolete. Please use kstrtoul instead. | ||
64 | */ | 68 | */ |
65 | unsigned long simple_strtoul(const char *cp, char **endp, unsigned int base) | 69 | unsigned long simple_strtoul(const char *cp, char **endp, unsigned int base) |
66 | { | 70 | { |
@@ -73,6 +77,8 @@ EXPORT_SYMBOL(simple_strtoul); | |||
73 | * @cp: The start of the string | 77 | * @cp: The start of the string |
74 | * @endp: A pointer to the end of the parsed string will be placed here | 78 | * @endp: A pointer to the end of the parsed string will be placed here |
75 | * @base: The number base to use | 79 | * @base: The number base to use |
80 | * | ||
81 | * This function is obsolete. Please use kstrtol instead. | ||
76 | */ | 82 | */ |
77 | long simple_strtol(const char *cp, char **endp, unsigned int base) | 83 | long simple_strtol(const char *cp, char **endp, unsigned int base) |
78 | { | 84 | { |
@@ -88,6 +94,8 @@ EXPORT_SYMBOL(simple_strtol); | |||
88 | * @cp: The start of the string | 94 | * @cp: The start of the string |
89 | * @endp: A pointer to the end of the parsed string will be placed here | 95 | * @endp: A pointer to the end of the parsed string will be placed here |
90 | * @base: The number base to use | 96 | * @base: The number base to use |
97 | * | ||
98 | * This function is obsolete. Please use kstrtoll instead. | ||
91 | */ | 99 | */ |
92 | long long simple_strtoll(const char *cp, char **endp, unsigned int base) | 100 | long long simple_strtoll(const char *cp, char **endp, unsigned int base) |
93 | { | 101 | { |
@@ -1485,7 +1493,10 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args) | |||
1485 | num = va_arg(args, long); | 1493 | num = va_arg(args, long); |
1486 | break; | 1494 | break; |
1487 | case FORMAT_TYPE_SIZE_T: | 1495 | case FORMAT_TYPE_SIZE_T: |
1488 | num = va_arg(args, size_t); | 1496 | if (spec.flags & SIGN) |
1497 | num = va_arg(args, ssize_t); | ||
1498 | else | ||
1499 | num = va_arg(args, size_t); | ||
1489 | break; | 1500 | break; |
1490 | case FORMAT_TYPE_PTRDIFF: | 1501 | case FORMAT_TYPE_PTRDIFF: |
1491 | num = va_arg(args, ptrdiff_t); | 1502 | num = va_arg(args, ptrdiff_t); |
@@ -2013,7 +2024,11 @@ int vsscanf(const char *buf, const char *fmt, va_list args) | |||
2013 | char digit; | 2024 | char digit; |
2014 | int num = 0; | 2025 | int num = 0; |
2015 | u8 qualifier; | 2026 | u8 qualifier; |
2016 | u8 base; | 2027 | unsigned int base; |
2028 | union { | ||
2029 | long long s; | ||
2030 | unsigned long long u; | ||
2031 | } val; | ||
2017 | s16 field_width; | 2032 | s16 field_width; |
2018 | bool is_sign; | 2033 | bool is_sign; |
2019 | 2034 | ||
@@ -2053,8 +2068,11 @@ int vsscanf(const char *buf, const char *fmt, va_list args) | |||
2053 | 2068 | ||
2054 | /* get field width */ | 2069 | /* get field width */ |
2055 | field_width = -1; | 2070 | field_width = -1; |
2056 | if (isdigit(*fmt)) | 2071 | if (isdigit(*fmt)) { |
2057 | field_width = skip_atoi(&fmt); | 2072 | field_width = skip_atoi(&fmt); |
2073 | if (field_width <= 0) | ||
2074 | break; | ||
2075 | } | ||
2058 | 2076 | ||
2059 | /* get conversion qualifier */ | 2077 | /* get conversion qualifier */ |
2060 | qualifier = -1; | 2078 | qualifier = -1; |
@@ -2154,58 +2172,61 @@ int vsscanf(const char *buf, const char *fmt, va_list args) | |||
2154 | || (base == 0 && !isdigit(digit))) | 2172 | || (base == 0 && !isdigit(digit))) |
2155 | break; | 2173 | break; |
2156 | 2174 | ||
2175 | if (is_sign) | ||
2176 | val.s = qualifier != 'L' ? | ||
2177 | simple_strtol(str, &next, base) : | ||
2178 | simple_strtoll(str, &next, base); | ||
2179 | else | ||
2180 | val.u = qualifier != 'L' ? | ||
2181 | simple_strtoul(str, &next, base) : | ||
2182 | simple_strtoull(str, &next, base); | ||
2183 | |||
2184 | if (field_width > 0 && next - str > field_width) { | ||
2185 | if (base == 0) | ||
2186 | _parse_integer_fixup_radix(str, &base); | ||
2187 | while (next - str > field_width) { | ||
2188 | if (is_sign) | ||
2189 | val.s = div_s64(val.s, base); | ||
2190 | else | ||
2191 | val.u = div_u64(val.u, base); | ||
2192 | --next; | ||
2193 | } | ||
2194 | } | ||
2195 | |||
2157 | switch (qualifier) { | 2196 | switch (qualifier) { |
2158 | case 'H': /* that's 'hh' in format */ | 2197 | case 'H': /* that's 'hh' in format */ |
2159 | if (is_sign) { | 2198 | if (is_sign) |
2160 | signed char *s = (signed char *)va_arg(args, signed char *); | 2199 | *va_arg(args, signed char *) = val.s; |
2161 | *s = (signed char)simple_strtol(str, &next, base); | 2200 | else |
2162 | } else { | 2201 | *va_arg(args, unsigned char *) = val.u; |
2163 | unsigned char *s = (unsigned char *)va_arg(args, unsigned char *); | ||
2164 | *s = (unsigned char)simple_strtoul(str, &next, base); | ||
2165 | } | ||
2166 | break; | 2202 | break; |
2167 | case 'h': | 2203 | case 'h': |
2168 | if (is_sign) { | 2204 | if (is_sign) |
2169 | short *s = (short *)va_arg(args, short *); | 2205 | *va_arg(args, short *) = val.s; |
2170 | *s = (short)simple_strtol(str, &next, base); | 2206 | else |
2171 | } else { | 2207 | *va_arg(args, unsigned short *) = val.u; |
2172 | unsigned short *s = (unsigned short *)va_arg(args, unsigned short *); | ||
2173 | *s = (unsigned short)simple_strtoul(str, &next, base); | ||
2174 | } | ||
2175 | break; | 2208 | break; |
2176 | case 'l': | 2209 | case 'l': |
2177 | if (is_sign) { | 2210 | if (is_sign) |
2178 | long *l = (long *)va_arg(args, long *); | 2211 | *va_arg(args, long *) = val.s; |
2179 | *l = simple_strtol(str, &next, base); | 2212 | else |
2180 | } else { | 2213 | *va_arg(args, unsigned long *) = val.u; |
2181 | unsigned long *l = (unsigned long *)va_arg(args, unsigned long *); | ||
2182 | *l = simple_strtoul(str, &next, base); | ||
2183 | } | ||
2184 | break; | 2214 | break; |
2185 | case 'L': | 2215 | case 'L': |
2186 | if (is_sign) { | 2216 | if (is_sign) |
2187 | long long *l = (long long *)va_arg(args, long long *); | 2217 | *va_arg(args, long long *) = val.s; |
2188 | *l = simple_strtoll(str, &next, base); | 2218 | else |
2189 | } else { | 2219 | *va_arg(args, unsigned long long *) = val.u; |
2190 | unsigned long long *l = (unsigned long long *)va_arg(args, unsigned long long *); | ||
2191 | *l = simple_strtoull(str, &next, base); | ||
2192 | } | ||
2193 | break; | 2220 | break; |
2194 | case 'Z': | 2221 | case 'Z': |
2195 | case 'z': | 2222 | case 'z': |
2196 | { | 2223 | *va_arg(args, size_t *) = val.u; |
2197 | size_t *s = (size_t *)va_arg(args, size_t *); | 2224 | break; |
2198 | *s = (size_t)simple_strtoul(str, &next, base); | ||
2199 | } | ||
2200 | break; | ||
2201 | default: | 2225 | default: |
2202 | if (is_sign) { | 2226 | if (is_sign) |
2203 | int *i = (int *)va_arg(args, int *); | 2227 | *va_arg(args, int *) = val.s; |
2204 | *i = (int)simple_strtol(str, &next, base); | 2228 | else |
2205 | } else { | 2229 | *va_arg(args, unsigned int *) = val.u; |
2206 | unsigned int *i = (unsigned int *)va_arg(args, unsigned int*); | ||
2207 | *i = (unsigned int)simple_strtoul(str, &next, base); | ||
2208 | } | ||
2209 | break; | 2230 | break; |
2210 | } | 2231 | } |
2211 | num++; | 2232 | num++; |