diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Kconfig | 6 | ||||
-rw-r--r-- | lib/Kconfig.debug | 11 | ||||
-rw-r--r-- | lib/Kconfig.ubsan | 11 | ||||
-rw-r--r-- | lib/Makefile | 5 | ||||
-rw-r--r-- | lib/atomic64_test.c | 4 | ||||
-rw-r--r-- | lib/bitmap.c | 50 | ||||
-rw-r--r-- | lib/dma-debug.c | 52 | ||||
-rw-r--r-- | lib/iov_iter.c | 420 | ||||
-rw-r--r-- | lib/irq_poll.c | 2 | ||||
-rw-r--r-- | lib/kstrtox.c | 6 | ||||
-rw-r--r-- | lib/nmi_backtrace.c | 42 | ||||
-rw-r--r-- | lib/percpu-refcount.c | 169 | ||||
-rw-r--r-- | lib/radix-tree.c | 14 | ||||
-rw-r--r-- | lib/raid6/.gitignore | 1 | ||||
-rw-r--r-- | lib/raid6/Makefile | 8 | ||||
-rw-r--r-- | lib/raid6/algos.c | 18 | ||||
-rw-r--r-- | lib/raid6/avx512.c | 569 | ||||
-rw-r--r-- | lib/raid6/recov_avx512.c | 388 | ||||
-rw-r--r-- | lib/raid6/recov_s390xc.c | 116 | ||||
-rw-r--r-- | lib/raid6/s390vx.uc | 168 | ||||
-rw-r--r-- | lib/raid6/test/Makefile | 5 | ||||
-rw-r--r-- | lib/raid6/test/test.c | 7 | ||||
-rw-r--r-- | lib/raid6/x86.h | 10 | ||||
-rw-r--r-- | lib/random32.c | 6 | ||||
-rw-r--r-- | lib/rhashtable.c | 300 | ||||
-rw-r--r-- | lib/sbitmap.c | 347 | ||||
-rw-r--r-- | lib/strncpy_from_user.c | 2 | ||||
-rw-r--r-- | lib/test_bpf.c | 1 | ||||
-rw-r--r-- | lib/win_minmax.c | 98 |
29 files changed, 2620 insertions, 216 deletions
diff --git a/lib/Kconfig b/lib/Kconfig index d79909dc01ec..260a80e313b9 100644 --- a/lib/Kconfig +++ b/lib/Kconfig | |||
@@ -457,9 +457,6 @@ config NLATTR | |||
457 | config GENERIC_ATOMIC64 | 457 | config GENERIC_ATOMIC64 |
458 | bool | 458 | bool |
459 | 459 | ||
460 | config ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE | ||
461 | def_bool y if GENERIC_ATOMIC64 | ||
462 | |||
463 | config LRU_CACHE | 460 | config LRU_CACHE |
464 | tristate | 461 | tristate |
465 | 462 | ||
@@ -550,4 +547,7 @@ config STACKDEPOT | |||
550 | bool | 547 | bool |
551 | select STACKTRACE | 548 | select STACKTRACE |
552 | 549 | ||
550 | config SBITMAP | ||
551 | bool | ||
552 | |||
553 | endmenu | 553 | endmenu |
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index cab7405f48d2..33bc56cf60d7 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug | |||
@@ -305,7 +305,7 @@ config DEBUG_SECTION_MISMATCH | |||
305 | a larger kernel). | 305 | a larger kernel). |
306 | - Run the section mismatch analysis for each module/built-in.o file. | 306 | - Run the section mismatch analysis for each module/built-in.o file. |
307 | When we run the section mismatch analysis on vmlinux.o, we | 307 | When we run the section mismatch analysis on vmlinux.o, we |
308 | lose valueble information about where the mismatch was | 308 | lose valuable information about where the mismatch was |
309 | introduced. | 309 | introduced. |
310 | Running the analysis for each module/built-in.o file | 310 | Running the analysis for each module/built-in.o file |
311 | tells where the mismatch happens much closer to the | 311 | tells where the mismatch happens much closer to the |
@@ -1857,15 +1857,6 @@ config PROVIDE_OHCI1394_DMA_INIT | |||
1857 | 1857 | ||
1858 | See Documentation/debugging-via-ohci1394.txt for more information. | 1858 | See Documentation/debugging-via-ohci1394.txt for more information. |
1859 | 1859 | ||
1860 | config BUILD_DOCSRC | ||
1861 | bool "Build targets in Documentation/ tree" | ||
1862 | depends on HEADERS_CHECK | ||
1863 | help | ||
1864 | This option attempts to build objects from the source files in the | ||
1865 | kernel Documentation/ tree. | ||
1866 | |||
1867 | Say N if you are unsure. | ||
1868 | |||
1869 | config DMA_API_DEBUG | 1860 | config DMA_API_DEBUG |
1870 | bool "Enable debugging of DMA-API usage" | 1861 | bool "Enable debugging of DMA-API usage" |
1871 | depends on HAVE_DMA_API_DEBUG | 1862 | depends on HAVE_DMA_API_DEBUG |
diff --git a/lib/Kconfig.ubsan b/lib/Kconfig.ubsan index 39494af9a84a..bc6e651df68c 100644 --- a/lib/Kconfig.ubsan +++ b/lib/Kconfig.ubsan | |||
@@ -1,6 +1,9 @@ | |||
1 | config ARCH_HAS_UBSAN_SANITIZE_ALL | 1 | config ARCH_HAS_UBSAN_SANITIZE_ALL |
2 | bool | 2 | bool |
3 | 3 | ||
4 | config ARCH_WANTS_UBSAN_NO_NULL | ||
5 | def_bool n | ||
6 | |||
4 | config UBSAN | 7 | config UBSAN |
5 | bool "Undefined behaviour sanity checker" | 8 | bool "Undefined behaviour sanity checker" |
6 | help | 9 | help |
@@ -34,3 +37,11 @@ config UBSAN_ALIGNMENT | |||
34 | This option enables detection of unaligned memory accesses. | 37 | This option enables detection of unaligned memory accesses. |
35 | Enabling this option on architectures that support unaligned | 38 | Enabling this option on architectures that support unaligned |
36 | accesses may produce a lot of false positives. | 39 | accesses may produce a lot of false positives. |
40 | |||
41 | config UBSAN_NULL | ||
42 | bool "Enable checking of null pointers" | ||
43 | depends on UBSAN | ||
44 | default y if !ARCH_WANTS_UBSAN_NO_NULL | ||
45 | help | ||
46 | This option enables detection of memory accesses via a | ||
47 | null pointer. | ||
diff --git a/lib/Makefile b/lib/Makefile index 5dc77a8ec297..50144a3aeebd 100644 --- a/lib/Makefile +++ b/lib/Makefile | |||
@@ -22,7 +22,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ | |||
22 | sha1.o chacha20.o md5.o irq_regs.o argv_split.o \ | 22 | sha1.o chacha20.o md5.o irq_regs.o argv_split.o \ |
23 | flex_proportions.o ratelimit.o show_mem.o \ | 23 | flex_proportions.o ratelimit.o show_mem.o \ |
24 | is_single_threaded.o plist.o decompress.o kobject_uevent.o \ | 24 | is_single_threaded.o plist.o decompress.o kobject_uevent.o \ |
25 | earlycpio.o seq_buf.o nmi_backtrace.o nodemask.o | 25 | earlycpio.o seq_buf.o nmi_backtrace.o nodemask.o win_minmax.o |
26 | 26 | ||
27 | lib-$(CONFIG_MMU) += ioremap.o | 27 | lib-$(CONFIG_MMU) += ioremap.o |
28 | lib-$(CONFIG_SMP) += cpumask.o | 28 | lib-$(CONFIG_SMP) += cpumask.o |
@@ -180,6 +180,7 @@ obj-$(CONFIG_IRQ_POLL) += irq_poll.o | |||
180 | 180 | ||
181 | obj-$(CONFIG_STACKDEPOT) += stackdepot.o | 181 | obj-$(CONFIG_STACKDEPOT) += stackdepot.o |
182 | KASAN_SANITIZE_stackdepot.o := n | 182 | KASAN_SANITIZE_stackdepot.o := n |
183 | KCOV_INSTRUMENT_stackdepot.o := n | ||
183 | 184 | ||
184 | libfdt_files = fdt.o fdt_ro.o fdt_wip.o fdt_rw.o fdt_sw.o fdt_strerror.o \ | 185 | libfdt_files = fdt.o fdt_ro.o fdt_wip.o fdt_rw.o fdt_sw.o fdt_strerror.o \ |
185 | fdt_empty_tree.o | 186 | fdt_empty_tree.o |
@@ -227,3 +228,5 @@ obj-$(CONFIG_UCS2_STRING) += ucs2_string.o | |||
227 | obj-$(CONFIG_UBSAN) += ubsan.o | 228 | obj-$(CONFIG_UBSAN) += ubsan.o |
228 | 229 | ||
229 | UBSAN_SANITIZE_ubsan.o := n | 230 | UBSAN_SANITIZE_ubsan.o := n |
231 | |||
232 | obj-$(CONFIG_SBITMAP) += sbitmap.o | ||
diff --git a/lib/atomic64_test.c b/lib/atomic64_test.c index dbb369145dda..46042901130f 100644 --- a/lib/atomic64_test.c +++ b/lib/atomic64_test.c | |||
@@ -213,7 +213,6 @@ static __init void test_atomic64(void) | |||
213 | r += one; | 213 | r += one; |
214 | BUG_ON(v.counter != r); | 214 | BUG_ON(v.counter != r); |
215 | 215 | ||
216 | #ifdef CONFIG_ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE | ||
217 | INIT(onestwos); | 216 | INIT(onestwos); |
218 | BUG_ON(atomic64_dec_if_positive(&v) != (onestwos - 1)); | 217 | BUG_ON(atomic64_dec_if_positive(&v) != (onestwos - 1)); |
219 | r -= one; | 218 | r -= one; |
@@ -226,9 +225,6 @@ static __init void test_atomic64(void) | |||
226 | INIT(-one); | 225 | INIT(-one); |
227 | BUG_ON(atomic64_dec_if_positive(&v) != (-one - one)); | 226 | BUG_ON(atomic64_dec_if_positive(&v) != (-one - one)); |
228 | BUG_ON(v.counter != r); | 227 | BUG_ON(v.counter != r); |
229 | #else | ||
230 | #warning Please implement atomic64_dec_if_positive for your architecture and select the above Kconfig symbol | ||
231 | #endif | ||
232 | 228 | ||
233 | INIT(onestwos); | 229 | INIT(onestwos); |
234 | BUG_ON(!atomic64_inc_not_zero(&v)); | 230 | BUG_ON(!atomic64_inc_not_zero(&v)); |
diff --git a/lib/bitmap.c b/lib/bitmap.c index eca88087fa8a..0b66f0e5eb6b 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c | |||
@@ -496,6 +496,11 @@ EXPORT_SYMBOL(bitmap_print_to_pagebuf); | |||
496 | * ranges. Consecutively set bits are shown as two hyphen-separated | 496 | * ranges. Consecutively set bits are shown as two hyphen-separated |
497 | * decimal numbers, the smallest and largest bit numbers set in | 497 | * decimal numbers, the smallest and largest bit numbers set in |
498 | * the range. | 498 | * the range. |
499 | * Optionally each range can be postfixed to denote that only parts of it | ||
500 | * should be set. The range will divided to groups of specific size. | ||
501 | * From each group will be used only defined amount of bits. | ||
502 | * Syntax: range:used_size/group_size | ||
503 | * Example: 0-1023:2/256 ==> 0,1,256,257,512,513,768,769 | ||
499 | * | 504 | * |
500 | * Returns 0 on success, -errno on invalid input strings. | 505 | * Returns 0 on success, -errno on invalid input strings. |
501 | * Error values: | 506 | * Error values: |
@@ -507,16 +512,20 @@ static int __bitmap_parselist(const char *buf, unsigned int buflen, | |||
507 | int is_user, unsigned long *maskp, | 512 | int is_user, unsigned long *maskp, |
508 | int nmaskbits) | 513 | int nmaskbits) |
509 | { | 514 | { |
510 | unsigned a, b; | 515 | unsigned int a, b, old_a, old_b; |
516 | unsigned int group_size, used_size; | ||
511 | int c, old_c, totaldigits, ndigits; | 517 | int c, old_c, totaldigits, ndigits; |
512 | const char __user __force *ubuf = (const char __user __force *)buf; | 518 | const char __user __force *ubuf = (const char __user __force *)buf; |
513 | int at_start, in_range; | 519 | int at_start, in_range, in_partial_range; |
514 | 520 | ||
515 | totaldigits = c = 0; | 521 | totaldigits = c = 0; |
522 | old_a = old_b = 0; | ||
523 | group_size = used_size = 0; | ||
516 | bitmap_zero(maskp, nmaskbits); | 524 | bitmap_zero(maskp, nmaskbits); |
517 | do { | 525 | do { |
518 | at_start = 1; | 526 | at_start = 1; |
519 | in_range = 0; | 527 | in_range = 0; |
528 | in_partial_range = 0; | ||
520 | a = b = 0; | 529 | a = b = 0; |
521 | ndigits = totaldigits; | 530 | ndigits = totaldigits; |
522 | 531 | ||
@@ -547,6 +556,24 @@ static int __bitmap_parselist(const char *buf, unsigned int buflen, | |||
547 | if ((totaldigits != ndigits) && isspace(old_c)) | 556 | if ((totaldigits != ndigits) && isspace(old_c)) |
548 | return -EINVAL; | 557 | return -EINVAL; |
549 | 558 | ||
559 | if (c == '/') { | ||
560 | used_size = a; | ||
561 | at_start = 1; | ||
562 | in_range = 0; | ||
563 | a = b = 0; | ||
564 | continue; | ||
565 | } | ||
566 | |||
567 | if (c == ':') { | ||
568 | old_a = a; | ||
569 | old_b = b; | ||
570 | at_start = 1; | ||
571 | in_range = 0; | ||
572 | in_partial_range = 1; | ||
573 | a = b = 0; | ||
574 | continue; | ||
575 | } | ||
576 | |||
550 | if (c == '-') { | 577 | if (c == '-') { |
551 | if (at_start || in_range) | 578 | if (at_start || in_range) |
552 | return -EINVAL; | 579 | return -EINVAL; |
@@ -567,15 +594,30 @@ static int __bitmap_parselist(const char *buf, unsigned int buflen, | |||
567 | } | 594 | } |
568 | if (ndigits == totaldigits) | 595 | if (ndigits == totaldigits) |
569 | continue; | 596 | continue; |
597 | if (in_partial_range) { | ||
598 | group_size = a; | ||
599 | a = old_a; | ||
600 | b = old_b; | ||
601 | old_a = old_b = 0; | ||
602 | } | ||
570 | /* if no digit is after '-', it's wrong*/ | 603 | /* if no digit is after '-', it's wrong*/ |
571 | if (at_start && in_range) | 604 | if (at_start && in_range) |
572 | return -EINVAL; | 605 | return -EINVAL; |
573 | if (!(a <= b)) | 606 | if (!(a <= b) || !(used_size <= group_size)) |
574 | return -EINVAL; | 607 | return -EINVAL; |
575 | if (b >= nmaskbits) | 608 | if (b >= nmaskbits) |
576 | return -ERANGE; | 609 | return -ERANGE; |
577 | while (a <= b) { | 610 | while (a <= b) { |
578 | set_bit(a, maskp); | 611 | if (in_partial_range) { |
612 | static int pos_in_group = 1; | ||
613 | |||
614 | if (pos_in_group <= used_size) | ||
615 | set_bit(a, maskp); | ||
616 | |||
617 | if (a == b || ++pos_in_group > group_size) | ||
618 | pos_in_group = 1; | ||
619 | } else | ||
620 | set_bit(a, maskp); | ||
579 | a++; | 621 | a++; |
580 | } | 622 | } |
581 | } while (buflen && c == ','); | 623 | } while (buflen && c == ','); |
diff --git a/lib/dma-debug.c b/lib/dma-debug.c index 06f02f6aecd2..8971370bfb16 100644 --- a/lib/dma-debug.c +++ b/lib/dma-debug.c | |||
@@ -44,6 +44,7 @@ enum { | |||
44 | dma_debug_page, | 44 | dma_debug_page, |
45 | dma_debug_sg, | 45 | dma_debug_sg, |
46 | dma_debug_coherent, | 46 | dma_debug_coherent, |
47 | dma_debug_resource, | ||
47 | }; | 48 | }; |
48 | 49 | ||
49 | enum map_err_types { | 50 | enum map_err_types { |
@@ -151,8 +152,9 @@ static const char *const maperr2str[] = { | |||
151 | [MAP_ERR_CHECKED] = "dma map error checked", | 152 | [MAP_ERR_CHECKED] = "dma map error checked", |
152 | }; | 153 | }; |
153 | 154 | ||
154 | static const char *type2name[4] = { "single", "page", | 155 | static const char *type2name[5] = { "single", "page", |
155 | "scather-gather", "coherent" }; | 156 | "scather-gather", "coherent", |
157 | "resource" }; | ||
156 | 158 | ||
157 | static const char *dir2name[4] = { "DMA_BIDIRECTIONAL", "DMA_TO_DEVICE", | 159 | static const char *dir2name[4] = { "DMA_BIDIRECTIONAL", "DMA_TO_DEVICE", |
158 | "DMA_FROM_DEVICE", "DMA_NONE" }; | 160 | "DMA_FROM_DEVICE", "DMA_NONE" }; |
@@ -400,6 +402,9 @@ static void hash_bucket_del(struct dma_debug_entry *entry) | |||
400 | 402 | ||
401 | static unsigned long long phys_addr(struct dma_debug_entry *entry) | 403 | static unsigned long long phys_addr(struct dma_debug_entry *entry) |
402 | { | 404 | { |
405 | if (entry->type == dma_debug_resource) | ||
406 | return __pfn_to_phys(entry->pfn) + entry->offset; | ||
407 | |||
403 | return page_to_phys(pfn_to_page(entry->pfn)) + entry->offset; | 408 | return page_to_phys(pfn_to_page(entry->pfn)) + entry->offset; |
404 | } | 409 | } |
405 | 410 | ||
@@ -1519,6 +1524,49 @@ void debug_dma_free_coherent(struct device *dev, size_t size, | |||
1519 | } | 1524 | } |
1520 | EXPORT_SYMBOL(debug_dma_free_coherent); | 1525 | EXPORT_SYMBOL(debug_dma_free_coherent); |
1521 | 1526 | ||
1527 | void debug_dma_map_resource(struct device *dev, phys_addr_t addr, size_t size, | ||
1528 | int direction, dma_addr_t dma_addr) | ||
1529 | { | ||
1530 | struct dma_debug_entry *entry; | ||
1531 | |||
1532 | if (unlikely(dma_debug_disabled())) | ||
1533 | return; | ||
1534 | |||
1535 | entry = dma_entry_alloc(); | ||
1536 | if (!entry) | ||
1537 | return; | ||
1538 | |||
1539 | entry->type = dma_debug_resource; | ||
1540 | entry->dev = dev; | ||
1541 | entry->pfn = PHYS_PFN(addr); | ||
1542 | entry->offset = offset_in_page(addr); | ||
1543 | entry->size = size; | ||
1544 | entry->dev_addr = dma_addr; | ||
1545 | entry->direction = direction; | ||
1546 | entry->map_err_type = MAP_ERR_NOT_CHECKED; | ||
1547 | |||
1548 | add_dma_entry(entry); | ||
1549 | } | ||
1550 | EXPORT_SYMBOL(debug_dma_map_resource); | ||
1551 | |||
1552 | void debug_dma_unmap_resource(struct device *dev, dma_addr_t dma_addr, | ||
1553 | size_t size, int direction) | ||
1554 | { | ||
1555 | struct dma_debug_entry ref = { | ||
1556 | .type = dma_debug_resource, | ||
1557 | .dev = dev, | ||
1558 | .dev_addr = dma_addr, | ||
1559 | .size = size, | ||
1560 | .direction = direction, | ||
1561 | }; | ||
1562 | |||
1563 | if (unlikely(dma_debug_disabled())) | ||
1564 | return; | ||
1565 | |||
1566 | check_unmap(&ref); | ||
1567 | } | ||
1568 | EXPORT_SYMBOL(debug_dma_unmap_resource); | ||
1569 | |||
1522 | void debug_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, | 1570 | void debug_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, |
1523 | size_t size, int direction) | 1571 | size_t size, int direction) |
1524 | { | 1572 | { |
diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 7e3138cfc8c9..f0c7f1481bae 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c | |||
@@ -3,8 +3,11 @@ | |||
3 | #include <linux/pagemap.h> | 3 | #include <linux/pagemap.h> |
4 | #include <linux/slab.h> | 4 | #include <linux/slab.h> |
5 | #include <linux/vmalloc.h> | 5 | #include <linux/vmalloc.h> |
6 | #include <linux/splice.h> | ||
6 | #include <net/checksum.h> | 7 | #include <net/checksum.h> |
7 | 8 | ||
9 | #define PIPE_PARANOIA /* for now */ | ||
10 | |||
8 | #define iterate_iovec(i, n, __v, __p, skip, STEP) { \ | 11 | #define iterate_iovec(i, n, __v, __p, skip, STEP) { \ |
9 | size_t left; \ | 12 | size_t left; \ |
10 | size_t wanted = n; \ | 13 | size_t wanted = n; \ |
@@ -290,6 +293,93 @@ done: | |||
290 | return wanted - bytes; | 293 | return wanted - bytes; |
291 | } | 294 | } |
292 | 295 | ||
296 | #ifdef PIPE_PARANOIA | ||
297 | static bool sanity(const struct iov_iter *i) | ||
298 | { | ||
299 | struct pipe_inode_info *pipe = i->pipe; | ||
300 | int idx = i->idx; | ||
301 | int next = pipe->curbuf + pipe->nrbufs; | ||
302 | if (i->iov_offset) { | ||
303 | struct pipe_buffer *p; | ||
304 | if (unlikely(!pipe->nrbufs)) | ||
305 | goto Bad; // pipe must be non-empty | ||
306 | if (unlikely(idx != ((next - 1) & (pipe->buffers - 1)))) | ||
307 | goto Bad; // must be at the last buffer... | ||
308 | |||
309 | p = &pipe->bufs[idx]; | ||
310 | if (unlikely(p->offset + p->len != i->iov_offset)) | ||
311 | goto Bad; // ... at the end of segment | ||
312 | } else { | ||
313 | if (idx != (next & (pipe->buffers - 1))) | ||
314 | goto Bad; // must be right after the last buffer | ||
315 | } | ||
316 | return true; | ||
317 | Bad: | ||
318 | printk(KERN_ERR "idx = %d, offset = %zd\n", i->idx, i->iov_offset); | ||
319 | printk(KERN_ERR "curbuf = %d, nrbufs = %d, buffers = %d\n", | ||
320 | pipe->curbuf, pipe->nrbufs, pipe->buffers); | ||
321 | for (idx = 0; idx < pipe->buffers; idx++) | ||
322 | printk(KERN_ERR "[%p %p %d %d]\n", | ||
323 | pipe->bufs[idx].ops, | ||
324 | pipe->bufs[idx].page, | ||
325 | pipe->bufs[idx].offset, | ||
326 | pipe->bufs[idx].len); | ||
327 | WARN_ON(1); | ||
328 | return false; | ||
329 | } | ||
330 | #else | ||
331 | #define sanity(i) true | ||
332 | #endif | ||
333 | |||
334 | static inline int next_idx(int idx, struct pipe_inode_info *pipe) | ||
335 | { | ||
336 | return (idx + 1) & (pipe->buffers - 1); | ||
337 | } | ||
338 | |||
339 | static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t bytes, | ||
340 | struct iov_iter *i) | ||
341 | { | ||
342 | struct pipe_inode_info *pipe = i->pipe; | ||
343 | struct pipe_buffer *buf; | ||
344 | size_t off; | ||
345 | int idx; | ||
346 | |||
347 | if (unlikely(bytes > i->count)) | ||
348 | bytes = i->count; | ||
349 | |||
350 | if (unlikely(!bytes)) | ||
351 | return 0; | ||
352 | |||
353 | if (!sanity(i)) | ||
354 | return 0; | ||
355 | |||
356 | off = i->iov_offset; | ||
357 | idx = i->idx; | ||
358 | buf = &pipe->bufs[idx]; | ||
359 | if (off) { | ||
360 | if (offset == off && buf->page == page) { | ||
361 | /* merge with the last one */ | ||
362 | buf->len += bytes; | ||
363 | i->iov_offset += bytes; | ||
364 | goto out; | ||
365 | } | ||
366 | idx = next_idx(idx, pipe); | ||
367 | buf = &pipe->bufs[idx]; | ||
368 | } | ||
369 | if (idx == pipe->curbuf && pipe->nrbufs) | ||
370 | return 0; | ||
371 | pipe->nrbufs++; | ||
372 | buf->ops = &page_cache_pipe_buf_ops; | ||
373 | get_page(buf->page = page); | ||
374 | buf->offset = offset; | ||
375 | buf->len = bytes; | ||
376 | i->iov_offset = offset + bytes; | ||
377 | i->idx = idx; | ||
378 | out: | ||
379 | i->count -= bytes; | ||
380 | return bytes; | ||
381 | } | ||
382 | |||
293 | /* | 383 | /* |
294 | * Fault in one or more iovecs of the given iov_iter, to a maximum length of | 384 | * Fault in one or more iovecs of the given iov_iter, to a maximum length of |
295 | * bytes. For each iovec, fault in each page that constitutes the iovec. | 385 | * bytes. For each iovec, fault in each page that constitutes the iovec. |
@@ -306,8 +396,7 @@ int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes) | |||
306 | 396 | ||
307 | if (!(i->type & (ITER_BVEC|ITER_KVEC))) { | 397 | if (!(i->type & (ITER_BVEC|ITER_KVEC))) { |
308 | iterate_iovec(i, bytes, v, iov, skip, ({ | 398 | iterate_iovec(i, bytes, v, iov, skip, ({ |
309 | err = fault_in_multipages_readable(v.iov_base, | 399 | err = fault_in_pages_readable(v.iov_base, v.iov_len); |
310 | v.iov_len); | ||
311 | if (unlikely(err)) | 400 | if (unlikely(err)) |
312 | return err; | 401 | return err; |
313 | 0;})) | 402 | 0;})) |
@@ -356,9 +445,98 @@ static void memzero_page(struct page *page, size_t offset, size_t len) | |||
356 | kunmap_atomic(addr); | 445 | kunmap_atomic(addr); |
357 | } | 446 | } |
358 | 447 | ||
448 | static inline bool allocated(struct pipe_buffer *buf) | ||
449 | { | ||
450 | return buf->ops == &default_pipe_buf_ops; | ||
451 | } | ||
452 | |||
453 | static inline void data_start(const struct iov_iter *i, int *idxp, size_t *offp) | ||
454 | { | ||
455 | size_t off = i->iov_offset; | ||
456 | int idx = i->idx; | ||
457 | if (off && (!allocated(&i->pipe->bufs[idx]) || off == PAGE_SIZE)) { | ||
458 | idx = next_idx(idx, i->pipe); | ||
459 | off = 0; | ||
460 | } | ||
461 | *idxp = idx; | ||
462 | *offp = off; | ||
463 | } | ||
464 | |||
465 | static size_t push_pipe(struct iov_iter *i, size_t size, | ||
466 | int *idxp, size_t *offp) | ||
467 | { | ||
468 | struct pipe_inode_info *pipe = i->pipe; | ||
469 | size_t off; | ||
470 | int idx; | ||
471 | ssize_t left; | ||
472 | |||
473 | if (unlikely(size > i->count)) | ||
474 | size = i->count; | ||
475 | if (unlikely(!size)) | ||
476 | return 0; | ||
477 | |||
478 | left = size; | ||
479 | data_start(i, &idx, &off); | ||
480 | *idxp = idx; | ||
481 | *offp = off; | ||
482 | if (off) { | ||
483 | left -= PAGE_SIZE - off; | ||
484 | if (left <= 0) { | ||
485 | pipe->bufs[idx].len += size; | ||
486 | return size; | ||
487 | } | ||
488 | pipe->bufs[idx].len = PAGE_SIZE; | ||
489 | idx = next_idx(idx, pipe); | ||
490 | } | ||
491 | while (idx != pipe->curbuf || !pipe->nrbufs) { | ||
492 | struct page *page = alloc_page(GFP_USER); | ||
493 | if (!page) | ||
494 | break; | ||
495 | pipe->nrbufs++; | ||
496 | pipe->bufs[idx].ops = &default_pipe_buf_ops; | ||
497 | pipe->bufs[idx].page = page; | ||
498 | pipe->bufs[idx].offset = 0; | ||
499 | if (left <= PAGE_SIZE) { | ||
500 | pipe->bufs[idx].len = left; | ||
501 | return size; | ||
502 | } | ||
503 | pipe->bufs[idx].len = PAGE_SIZE; | ||
504 | left -= PAGE_SIZE; | ||
505 | idx = next_idx(idx, pipe); | ||
506 | } | ||
507 | return size - left; | ||
508 | } | ||
509 | |||
510 | static size_t copy_pipe_to_iter(const void *addr, size_t bytes, | ||
511 | struct iov_iter *i) | ||
512 | { | ||
513 | struct pipe_inode_info *pipe = i->pipe; | ||
514 | size_t n, off; | ||
515 | int idx; | ||
516 | |||
517 | if (!sanity(i)) | ||
518 | return 0; | ||
519 | |||
520 | bytes = n = push_pipe(i, bytes, &idx, &off); | ||
521 | if (unlikely(!n)) | ||
522 | return 0; | ||
523 | for ( ; n; idx = next_idx(idx, pipe), off = 0) { | ||
524 | size_t chunk = min_t(size_t, n, PAGE_SIZE - off); | ||
525 | memcpy_to_page(pipe->bufs[idx].page, off, addr, chunk); | ||
526 | i->idx = idx; | ||
527 | i->iov_offset = off + chunk; | ||
528 | n -= chunk; | ||
529 | addr += chunk; | ||
530 | } | ||
531 | i->count -= bytes; | ||
532 | return bytes; | ||
533 | } | ||
534 | |||
359 | size_t copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i) | 535 | size_t copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i) |
360 | { | 536 | { |
361 | const char *from = addr; | 537 | const char *from = addr; |
538 | if (unlikely(i->type & ITER_PIPE)) | ||
539 | return copy_pipe_to_iter(addr, bytes, i); | ||
362 | iterate_and_advance(i, bytes, v, | 540 | iterate_and_advance(i, bytes, v, |
363 | __copy_to_user(v.iov_base, (from += v.iov_len) - v.iov_len, | 541 | __copy_to_user(v.iov_base, (from += v.iov_len) - v.iov_len, |
364 | v.iov_len), | 542 | v.iov_len), |
@@ -374,6 +552,10 @@ EXPORT_SYMBOL(copy_to_iter); | |||
374 | size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i) | 552 | size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i) |
375 | { | 553 | { |
376 | char *to = addr; | 554 | char *to = addr; |
555 | if (unlikely(i->type & ITER_PIPE)) { | ||
556 | WARN_ON(1); | ||
557 | return 0; | ||
558 | } | ||
377 | iterate_and_advance(i, bytes, v, | 559 | iterate_and_advance(i, bytes, v, |
378 | __copy_from_user((to += v.iov_len) - v.iov_len, v.iov_base, | 560 | __copy_from_user((to += v.iov_len) - v.iov_len, v.iov_base, |
379 | v.iov_len), | 561 | v.iov_len), |
@@ -389,6 +571,10 @@ EXPORT_SYMBOL(copy_from_iter); | |||
389 | size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i) | 571 | size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i) |
390 | { | 572 | { |
391 | char *to = addr; | 573 | char *to = addr; |
574 | if (unlikely(i->type & ITER_PIPE)) { | ||
575 | WARN_ON(1); | ||
576 | return 0; | ||
577 | } | ||
392 | iterate_and_advance(i, bytes, v, | 578 | iterate_and_advance(i, bytes, v, |
393 | __copy_from_user_nocache((to += v.iov_len) - v.iov_len, | 579 | __copy_from_user_nocache((to += v.iov_len) - v.iov_len, |
394 | v.iov_base, v.iov_len), | 580 | v.iov_base, v.iov_len), |
@@ -409,14 +595,20 @@ size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, | |||
409 | size_t wanted = copy_to_iter(kaddr + offset, bytes, i); | 595 | size_t wanted = copy_to_iter(kaddr + offset, bytes, i); |
410 | kunmap_atomic(kaddr); | 596 | kunmap_atomic(kaddr); |
411 | return wanted; | 597 | return wanted; |
412 | } else | 598 | } else if (likely(!(i->type & ITER_PIPE))) |
413 | return copy_page_to_iter_iovec(page, offset, bytes, i); | 599 | return copy_page_to_iter_iovec(page, offset, bytes, i); |
600 | else | ||
601 | return copy_page_to_iter_pipe(page, offset, bytes, i); | ||
414 | } | 602 | } |
415 | EXPORT_SYMBOL(copy_page_to_iter); | 603 | EXPORT_SYMBOL(copy_page_to_iter); |
416 | 604 | ||
417 | size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes, | 605 | size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes, |
418 | struct iov_iter *i) | 606 | struct iov_iter *i) |
419 | { | 607 | { |
608 | if (unlikely(i->type & ITER_PIPE)) { | ||
609 | WARN_ON(1); | ||
610 | return 0; | ||
611 | } | ||
420 | if (i->type & (ITER_BVEC|ITER_KVEC)) { | 612 | if (i->type & (ITER_BVEC|ITER_KVEC)) { |
421 | void *kaddr = kmap_atomic(page); | 613 | void *kaddr = kmap_atomic(page); |
422 | size_t wanted = copy_from_iter(kaddr + offset, bytes, i); | 614 | size_t wanted = copy_from_iter(kaddr + offset, bytes, i); |
@@ -427,8 +619,34 @@ size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes, | |||
427 | } | 619 | } |
428 | EXPORT_SYMBOL(copy_page_from_iter); | 620 | EXPORT_SYMBOL(copy_page_from_iter); |
429 | 621 | ||
622 | static size_t pipe_zero(size_t bytes, struct iov_iter *i) | ||
623 | { | ||
624 | struct pipe_inode_info *pipe = i->pipe; | ||
625 | size_t n, off; | ||
626 | int idx; | ||
627 | |||
628 | if (!sanity(i)) | ||
629 | return 0; | ||
630 | |||
631 | bytes = n = push_pipe(i, bytes, &idx, &off); | ||
632 | if (unlikely(!n)) | ||
633 | return 0; | ||
634 | |||
635 | for ( ; n; idx = next_idx(idx, pipe), off = 0) { | ||
636 | size_t chunk = min_t(size_t, n, PAGE_SIZE - off); | ||
637 | memzero_page(pipe->bufs[idx].page, off, chunk); | ||
638 | i->idx = idx; | ||
639 | i->iov_offset = off + chunk; | ||
640 | n -= chunk; | ||
641 | } | ||
642 | i->count -= bytes; | ||
643 | return bytes; | ||
644 | } | ||
645 | |||
430 | size_t iov_iter_zero(size_t bytes, struct iov_iter *i) | 646 | size_t iov_iter_zero(size_t bytes, struct iov_iter *i) |
431 | { | 647 | { |
648 | if (unlikely(i->type & ITER_PIPE)) | ||
649 | return pipe_zero(bytes, i); | ||
432 | iterate_and_advance(i, bytes, v, | 650 | iterate_and_advance(i, bytes, v, |
433 | __clear_user(v.iov_base, v.iov_len), | 651 | __clear_user(v.iov_base, v.iov_len), |
434 | memzero_page(v.bv_page, v.bv_offset, v.bv_len), | 652 | memzero_page(v.bv_page, v.bv_offset, v.bv_len), |
@@ -443,6 +661,11 @@ size_t iov_iter_copy_from_user_atomic(struct page *page, | |||
443 | struct iov_iter *i, unsigned long offset, size_t bytes) | 661 | struct iov_iter *i, unsigned long offset, size_t bytes) |
444 | { | 662 | { |
445 | char *kaddr = kmap_atomic(page), *p = kaddr + offset; | 663 | char *kaddr = kmap_atomic(page), *p = kaddr + offset; |
664 | if (unlikely(i->type & ITER_PIPE)) { | ||
665 | kunmap_atomic(kaddr); | ||
666 | WARN_ON(1); | ||
667 | return 0; | ||
668 | } | ||
446 | iterate_all_kinds(i, bytes, v, | 669 | iterate_all_kinds(i, bytes, v, |
447 | __copy_from_user_inatomic((p += v.iov_len) - v.iov_len, | 670 | __copy_from_user_inatomic((p += v.iov_len) - v.iov_len, |
448 | v.iov_base, v.iov_len), | 671 | v.iov_base, v.iov_len), |
@@ -455,8 +678,49 @@ size_t iov_iter_copy_from_user_atomic(struct page *page, | |||
455 | } | 678 | } |
456 | EXPORT_SYMBOL(iov_iter_copy_from_user_atomic); | 679 | EXPORT_SYMBOL(iov_iter_copy_from_user_atomic); |
457 | 680 | ||
681 | static void pipe_advance(struct iov_iter *i, size_t size) | ||
682 | { | ||
683 | struct pipe_inode_info *pipe = i->pipe; | ||
684 | struct pipe_buffer *buf; | ||
685 | int idx = i->idx; | ||
686 | size_t off = i->iov_offset; | ||
687 | |||
688 | if (unlikely(i->count < size)) | ||
689 | size = i->count; | ||
690 | |||
691 | if (size) { | ||
692 | if (off) /* make it relative to the beginning of buffer */ | ||
693 | size += off - pipe->bufs[idx].offset; | ||
694 | while (1) { | ||
695 | buf = &pipe->bufs[idx]; | ||
696 | if (size <= buf->len) | ||
697 | break; | ||
698 | size -= buf->len; | ||
699 | idx = next_idx(idx, pipe); | ||
700 | } | ||
701 | buf->len = size; | ||
702 | i->idx = idx; | ||
703 | off = i->iov_offset = buf->offset + size; | ||
704 | } | ||
705 | if (off) | ||
706 | idx = next_idx(idx, pipe); | ||
707 | if (pipe->nrbufs) { | ||
708 | int unused = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1); | ||
709 | /* [curbuf,unused) is in use. Free [idx,unused) */ | ||
710 | while (idx != unused) { | ||
711 | pipe_buf_release(pipe, &pipe->bufs[idx]); | ||
712 | idx = next_idx(idx, pipe); | ||
713 | pipe->nrbufs--; | ||
714 | } | ||
715 | } | ||
716 | } | ||
717 | |||
458 | void iov_iter_advance(struct iov_iter *i, size_t size) | 718 | void iov_iter_advance(struct iov_iter *i, size_t size) |
459 | { | 719 | { |
720 | if (unlikely(i->type & ITER_PIPE)) { | ||
721 | pipe_advance(i, size); | ||
722 | return; | ||
723 | } | ||
460 | iterate_and_advance(i, size, v, 0, 0, 0) | 724 | iterate_and_advance(i, size, v, 0, 0, 0) |
461 | } | 725 | } |
462 | EXPORT_SYMBOL(iov_iter_advance); | 726 | EXPORT_SYMBOL(iov_iter_advance); |
@@ -466,6 +730,8 @@ EXPORT_SYMBOL(iov_iter_advance); | |||
466 | */ | 730 | */ |
467 | size_t iov_iter_single_seg_count(const struct iov_iter *i) | 731 | size_t iov_iter_single_seg_count(const struct iov_iter *i) |
468 | { | 732 | { |
733 | if (unlikely(i->type & ITER_PIPE)) | ||
734 | return i->count; // it is a silly place, anyway | ||
469 | if (i->nr_segs == 1) | 735 | if (i->nr_segs == 1) |
470 | return i->count; | 736 | return i->count; |
471 | else if (i->type & ITER_BVEC) | 737 | else if (i->type & ITER_BVEC) |
@@ -501,6 +767,19 @@ void iov_iter_bvec(struct iov_iter *i, int direction, | |||
501 | } | 767 | } |
502 | EXPORT_SYMBOL(iov_iter_bvec); | 768 | EXPORT_SYMBOL(iov_iter_bvec); |
503 | 769 | ||
770 | void iov_iter_pipe(struct iov_iter *i, int direction, | ||
771 | struct pipe_inode_info *pipe, | ||
772 | size_t count) | ||
773 | { | ||
774 | BUG_ON(direction != ITER_PIPE); | ||
775 | i->type = direction; | ||
776 | i->pipe = pipe; | ||
777 | i->idx = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1); | ||
778 | i->iov_offset = 0; | ||
779 | i->count = count; | ||
780 | } | ||
781 | EXPORT_SYMBOL(iov_iter_pipe); | ||
782 | |||
504 | unsigned long iov_iter_alignment(const struct iov_iter *i) | 783 | unsigned long iov_iter_alignment(const struct iov_iter *i) |
505 | { | 784 | { |
506 | unsigned long res = 0; | 785 | unsigned long res = 0; |
@@ -509,6 +788,11 @@ unsigned long iov_iter_alignment(const struct iov_iter *i) | |||
509 | if (!size) | 788 | if (!size) |
510 | return 0; | 789 | return 0; |
511 | 790 | ||
791 | if (unlikely(i->type & ITER_PIPE)) { | ||
792 | if (i->iov_offset && allocated(&i->pipe->bufs[i->idx])) | ||
793 | return size | i->iov_offset; | ||
794 | return size; | ||
795 | } | ||
512 | iterate_all_kinds(i, size, v, | 796 | iterate_all_kinds(i, size, v, |
513 | (res |= (unsigned long)v.iov_base | v.iov_len, 0), | 797 | (res |= (unsigned long)v.iov_base | v.iov_len, 0), |
514 | res |= v.bv_offset | v.bv_len, | 798 | res |= v.bv_offset | v.bv_len, |
@@ -525,6 +809,11 @@ unsigned long iov_iter_gap_alignment(const struct iov_iter *i) | |||
525 | if (!size) | 809 | if (!size) |
526 | return 0; | 810 | return 0; |
527 | 811 | ||
812 | if (unlikely(i->type & ITER_PIPE)) { | ||
813 | WARN_ON(1); | ||
814 | return ~0U; | ||
815 | } | ||
816 | |||
528 | iterate_all_kinds(i, size, v, | 817 | iterate_all_kinds(i, size, v, |
529 | (res |= (!res ? 0 : (unsigned long)v.iov_base) | | 818 | (res |= (!res ? 0 : (unsigned long)v.iov_base) | |
530 | (size != v.iov_len ? size : 0), 0), | 819 | (size != v.iov_len ? size : 0), 0), |
@@ -537,6 +826,47 @@ unsigned long iov_iter_gap_alignment(const struct iov_iter *i) | |||
537 | } | 826 | } |
538 | EXPORT_SYMBOL(iov_iter_gap_alignment); | 827 | EXPORT_SYMBOL(iov_iter_gap_alignment); |
539 | 828 | ||
829 | static inline size_t __pipe_get_pages(struct iov_iter *i, | ||
830 | size_t maxsize, | ||
831 | struct page **pages, | ||
832 | int idx, | ||
833 | size_t *start) | ||
834 | { | ||
835 | struct pipe_inode_info *pipe = i->pipe; | ||
836 | ssize_t n = push_pipe(i, maxsize, &idx, start); | ||
837 | if (!n) | ||
838 | return -EFAULT; | ||
839 | |||
840 | maxsize = n; | ||
841 | n += *start; | ||
842 | while (n > 0) { | ||
843 | get_page(*pages++ = pipe->bufs[idx].page); | ||
844 | idx = next_idx(idx, pipe); | ||
845 | n -= PAGE_SIZE; | ||
846 | } | ||
847 | |||
848 | return maxsize; | ||
849 | } | ||
850 | |||
851 | static ssize_t pipe_get_pages(struct iov_iter *i, | ||
852 | struct page **pages, size_t maxsize, unsigned maxpages, | ||
853 | size_t *start) | ||
854 | { | ||
855 | unsigned npages; | ||
856 | size_t capacity; | ||
857 | int idx; | ||
858 | |||
859 | if (!sanity(i)) | ||
860 | return -EFAULT; | ||
861 | |||
862 | data_start(i, &idx, start); | ||
863 | /* some of this one + all after this one */ | ||
864 | npages = ((i->pipe->curbuf - idx - 1) & (i->pipe->buffers - 1)) + 1; | ||
865 | capacity = min(npages,maxpages) * PAGE_SIZE - *start; | ||
866 | |||
867 | return __pipe_get_pages(i, min(maxsize, capacity), pages, idx, start); | ||
868 | } | ||
869 | |||
540 | ssize_t iov_iter_get_pages(struct iov_iter *i, | 870 | ssize_t iov_iter_get_pages(struct iov_iter *i, |
541 | struct page **pages, size_t maxsize, unsigned maxpages, | 871 | struct page **pages, size_t maxsize, unsigned maxpages, |
542 | size_t *start) | 872 | size_t *start) |
@@ -547,6 +877,8 @@ ssize_t iov_iter_get_pages(struct iov_iter *i, | |||
547 | if (!maxsize) | 877 | if (!maxsize) |
548 | return 0; | 878 | return 0; |
549 | 879 | ||
880 | if (unlikely(i->type & ITER_PIPE)) | ||
881 | return pipe_get_pages(i, pages, maxsize, maxpages, start); | ||
550 | iterate_all_kinds(i, maxsize, v, ({ | 882 | iterate_all_kinds(i, maxsize, v, ({ |
551 | unsigned long addr = (unsigned long)v.iov_base; | 883 | unsigned long addr = (unsigned long)v.iov_base; |
552 | size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1)); | 884 | size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1)); |
@@ -582,6 +914,37 @@ static struct page **get_pages_array(size_t n) | |||
582 | return p; | 914 | return p; |
583 | } | 915 | } |
584 | 916 | ||
917 | static ssize_t pipe_get_pages_alloc(struct iov_iter *i, | ||
918 | struct page ***pages, size_t maxsize, | ||
919 | size_t *start) | ||
920 | { | ||
921 | struct page **p; | ||
922 | size_t n; | ||
923 | int idx; | ||
924 | int npages; | ||
925 | |||
926 | if (!sanity(i)) | ||
927 | return -EFAULT; | ||
928 | |||
929 | data_start(i, &idx, start); | ||
930 | /* some of this one + all after this one */ | ||
931 | npages = ((i->pipe->curbuf - idx - 1) & (i->pipe->buffers - 1)) + 1; | ||
932 | n = npages * PAGE_SIZE - *start; | ||
933 | if (maxsize > n) | ||
934 | maxsize = n; | ||
935 | else | ||
936 | npages = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE); | ||
937 | p = get_pages_array(npages); | ||
938 | if (!p) | ||
939 | return -ENOMEM; | ||
940 | n = __pipe_get_pages(i, maxsize, p, idx, start); | ||
941 | if (n > 0) | ||
942 | *pages = p; | ||
943 | else | ||
944 | kvfree(p); | ||
945 | return n; | ||
946 | } | ||
947 | |||
585 | ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, | 948 | ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, |
586 | struct page ***pages, size_t maxsize, | 949 | struct page ***pages, size_t maxsize, |
587 | size_t *start) | 950 | size_t *start) |
@@ -594,6 +957,8 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, | |||
594 | if (!maxsize) | 957 | if (!maxsize) |
595 | return 0; | 958 | return 0; |
596 | 959 | ||
960 | if (unlikely(i->type & ITER_PIPE)) | ||
961 | return pipe_get_pages_alloc(i, pages, maxsize, start); | ||
597 | iterate_all_kinds(i, maxsize, v, ({ | 962 | iterate_all_kinds(i, maxsize, v, ({ |
598 | unsigned long addr = (unsigned long)v.iov_base; | 963 | unsigned long addr = (unsigned long)v.iov_base; |
599 | size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1)); | 964 | size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1)); |
@@ -635,6 +1000,10 @@ size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, | |||
635 | __wsum sum, next; | 1000 | __wsum sum, next; |
636 | size_t off = 0; | 1001 | size_t off = 0; |
637 | sum = *csum; | 1002 | sum = *csum; |
1003 | if (unlikely(i->type & ITER_PIPE)) { | ||
1004 | WARN_ON(1); | ||
1005 | return 0; | ||
1006 | } | ||
638 | iterate_and_advance(i, bytes, v, ({ | 1007 | iterate_and_advance(i, bytes, v, ({ |
639 | int err = 0; | 1008 | int err = 0; |
640 | next = csum_and_copy_from_user(v.iov_base, | 1009 | next = csum_and_copy_from_user(v.iov_base, |
@@ -673,6 +1042,10 @@ size_t csum_and_copy_to_iter(const void *addr, size_t bytes, __wsum *csum, | |||
673 | __wsum sum, next; | 1042 | __wsum sum, next; |
674 | size_t off = 0; | 1043 | size_t off = 0; |
675 | sum = *csum; | 1044 | sum = *csum; |
1045 | if (unlikely(i->type & ITER_PIPE)) { | ||
1046 | WARN_ON(1); /* for now */ | ||
1047 | return 0; | ||
1048 | } | ||
676 | iterate_and_advance(i, bytes, v, ({ | 1049 | iterate_and_advance(i, bytes, v, ({ |
677 | int err = 0; | 1050 | int err = 0; |
678 | next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len, | 1051 | next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len, |
@@ -712,7 +1085,20 @@ int iov_iter_npages(const struct iov_iter *i, int maxpages) | |||
712 | if (!size) | 1085 | if (!size) |
713 | return 0; | 1086 | return 0; |
714 | 1087 | ||
715 | iterate_all_kinds(i, size, v, ({ | 1088 | if (unlikely(i->type & ITER_PIPE)) { |
1089 | struct pipe_inode_info *pipe = i->pipe; | ||
1090 | size_t off; | ||
1091 | int idx; | ||
1092 | |||
1093 | if (!sanity(i)) | ||
1094 | return 0; | ||
1095 | |||
1096 | data_start(i, &idx, &off); | ||
1097 | /* some of this one + all after this one */ | ||
1098 | npages = ((pipe->curbuf - idx - 1) & (pipe->buffers - 1)) + 1; | ||
1099 | if (npages >= maxpages) | ||
1100 | return maxpages; | ||
1101 | } else iterate_all_kinds(i, size, v, ({ | ||
716 | unsigned long p = (unsigned long)v.iov_base; | 1102 | unsigned long p = (unsigned long)v.iov_base; |
717 | npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE) | 1103 | npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE) |
718 | - p / PAGE_SIZE; | 1104 | - p / PAGE_SIZE; |
@@ -737,6 +1123,10 @@ EXPORT_SYMBOL(iov_iter_npages); | |||
737 | const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags) | 1123 | const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags) |
738 | { | 1124 | { |
739 | *new = *old; | 1125 | *new = *old; |
1126 | if (unlikely(new->type & ITER_PIPE)) { | ||
1127 | WARN_ON(1); | ||
1128 | return NULL; | ||
1129 | } | ||
740 | if (new->type & ITER_BVEC) | 1130 | if (new->type & ITER_BVEC) |
741 | return new->bvec = kmemdup(new->bvec, | 1131 | return new->bvec = kmemdup(new->bvec, |
742 | new->nr_segs * sizeof(struct bio_vec), | 1132 | new->nr_segs * sizeof(struct bio_vec), |
@@ -749,6 +1139,28 @@ const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags) | |||
749 | } | 1139 | } |
750 | EXPORT_SYMBOL(dup_iter); | 1140 | EXPORT_SYMBOL(dup_iter); |
751 | 1141 | ||
1142 | /** | ||
1143 | * import_iovec() - Copy an array of &struct iovec from userspace | ||
1144 | * into the kernel, check that it is valid, and initialize a new | ||
1145 | * &struct iov_iter iterator to access it. | ||
1146 | * | ||
1147 | * @type: One of %READ or %WRITE. | ||
1148 | * @uvector: Pointer to the userspace array. | ||
1149 | * @nr_segs: Number of elements in userspace array. | ||
1150 | * @fast_segs: Number of elements in @iov. | ||
1151 | * @iov: (input and output parameter) Pointer to pointer to (usually small | ||
1152 | * on-stack) kernel array. | ||
1153 | * @i: Pointer to iterator that will be initialized on success. | ||
1154 | * | ||
1155 | * If the array pointed to by *@iov is large enough to hold all @nr_segs, | ||
1156 | * then this function places %NULL in *@iov on return. Otherwise, a new | ||
1157 | * array will be allocated and the result placed in *@iov. This means that | ||
1158 | * the caller may call kfree() on *@iov regardless of whether the small | ||
1159 | * on-stack array was used or not (and regardless of whether this function | ||
1160 | * returns an error or not). | ||
1161 | * | ||
1162 | * Return: 0 on success or negative error code on error. | ||
1163 | */ | ||
752 | int import_iovec(int type, const struct iovec __user * uvector, | 1164 | int import_iovec(int type, const struct iovec __user * uvector, |
753 | unsigned nr_segs, unsigned fast_segs, | 1165 | unsigned nr_segs, unsigned fast_segs, |
754 | struct iovec **iov, struct iov_iter *i) | 1166 | struct iovec **iov, struct iov_iter *i) |
diff --git a/lib/irq_poll.c b/lib/irq_poll.c index 2be55692aa43..1d6565e81030 100644 --- a/lib/irq_poll.c +++ b/lib/irq_poll.c | |||
@@ -74,7 +74,7 @@ void irq_poll_complete(struct irq_poll *iop) | |||
74 | } | 74 | } |
75 | EXPORT_SYMBOL(irq_poll_complete); | 75 | EXPORT_SYMBOL(irq_poll_complete); |
76 | 76 | ||
77 | static void irq_poll_softirq(struct softirq_action *h) | 77 | static void __latent_entropy irq_poll_softirq(struct softirq_action *h) |
78 | { | 78 | { |
79 | struct list_head *list = this_cpu_ptr(&blk_cpu_iopoll); | 79 | struct list_head *list = this_cpu_ptr(&blk_cpu_iopoll); |
80 | int rearm = 0, budget = irq_poll_budget; | 80 | int rearm = 0, budget = irq_poll_budget; |
diff --git a/lib/kstrtox.c b/lib/kstrtox.c index d8a5cf66c316..b8e2080c1a47 100644 --- a/lib/kstrtox.c +++ b/lib/kstrtox.c | |||
@@ -48,11 +48,9 @@ unsigned int _parse_integer(const char *s, unsigned int base, unsigned long long | |||
48 | { | 48 | { |
49 | unsigned long long res; | 49 | unsigned long long res; |
50 | unsigned int rv; | 50 | unsigned int rv; |
51 | int overflow; | ||
52 | 51 | ||
53 | res = 0; | 52 | res = 0; |
54 | rv = 0; | 53 | rv = 0; |
55 | overflow = 0; | ||
56 | while (*s) { | 54 | while (*s) { |
57 | unsigned int val; | 55 | unsigned int val; |
58 | 56 | ||
@@ -71,15 +69,13 @@ unsigned int _parse_integer(const char *s, unsigned int base, unsigned long long | |||
71 | */ | 69 | */ |
72 | if (unlikely(res & (~0ull << 60))) { | 70 | if (unlikely(res & (~0ull << 60))) { |
73 | if (res > div_u64(ULLONG_MAX - val, base)) | 71 | if (res > div_u64(ULLONG_MAX - val, base)) |
74 | overflow = 1; | 72 | rv |= KSTRTOX_OVERFLOW; |
75 | } | 73 | } |
76 | res = res * base + val; | 74 | res = res * base + val; |
77 | rv++; | 75 | rv++; |
78 | s++; | 76 | s++; |
79 | } | 77 | } |
80 | *p = res; | 78 | *p = res; |
81 | if (overflow) | ||
82 | rv |= KSTRTOX_OVERFLOW; | ||
83 | return rv; | 79 | return rv; |
84 | } | 80 | } |
85 | 81 | ||
diff --git a/lib/nmi_backtrace.c b/lib/nmi_backtrace.c index 26caf51cc238..75554754eadf 100644 --- a/lib/nmi_backtrace.c +++ b/lib/nmi_backtrace.c | |||
@@ -16,21 +16,23 @@ | |||
16 | #include <linux/delay.h> | 16 | #include <linux/delay.h> |
17 | #include <linux/kprobes.h> | 17 | #include <linux/kprobes.h> |
18 | #include <linux/nmi.h> | 18 | #include <linux/nmi.h> |
19 | #include <linux/cpu.h> | ||
19 | 20 | ||
20 | #ifdef arch_trigger_all_cpu_backtrace | 21 | #ifdef arch_trigger_cpumask_backtrace |
21 | /* For reliability, we're prepared to waste bits here. */ | 22 | /* For reliability, we're prepared to waste bits here. */ |
22 | static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly; | 23 | static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly; |
23 | 24 | ||
24 | /* "in progress" flag of arch_trigger_all_cpu_backtrace */ | 25 | /* "in progress" flag of arch_trigger_cpumask_backtrace */ |
25 | static unsigned long backtrace_flag; | 26 | static unsigned long backtrace_flag; |
26 | 27 | ||
27 | /* | 28 | /* |
28 | * When raise() is called it will be is passed a pointer to the | 29 | * When raise() is called it will be passed a pointer to the |
29 | * backtrace_mask. Architectures that call nmi_cpu_backtrace() | 30 | * backtrace_mask. Architectures that call nmi_cpu_backtrace() |
30 | * directly from their raise() functions may rely on the mask | 31 | * directly from their raise() functions may rely on the mask |
31 | * they are passed being updated as a side effect of this call. | 32 | * they are passed being updated as a side effect of this call. |
32 | */ | 33 | */ |
33 | void nmi_trigger_all_cpu_backtrace(bool include_self, | 34 | void nmi_trigger_cpumask_backtrace(const cpumask_t *mask, |
35 | bool exclude_self, | ||
34 | void (*raise)(cpumask_t *mask)) | 36 | void (*raise)(cpumask_t *mask)) |
35 | { | 37 | { |
36 | int i, this_cpu = get_cpu(); | 38 | int i, this_cpu = get_cpu(); |
@@ -44,13 +46,22 @@ void nmi_trigger_all_cpu_backtrace(bool include_self, | |||
44 | return; | 46 | return; |
45 | } | 47 | } |
46 | 48 | ||
47 | cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask); | 49 | cpumask_copy(to_cpumask(backtrace_mask), mask); |
48 | if (!include_self) | 50 | if (exclude_self) |
49 | cpumask_clear_cpu(this_cpu, to_cpumask(backtrace_mask)); | 51 | cpumask_clear_cpu(this_cpu, to_cpumask(backtrace_mask)); |
50 | 52 | ||
53 | /* | ||
54 | * Don't try to send an NMI to this cpu; it may work on some | ||
55 | * architectures, but on others it may not, and we'll get | ||
56 | * information at least as useful just by doing a dump_stack() here. | ||
57 | * Note that nmi_cpu_backtrace(NULL) will clear the cpu bit. | ||
58 | */ | ||
59 | if (cpumask_test_cpu(this_cpu, to_cpumask(backtrace_mask))) | ||
60 | nmi_cpu_backtrace(NULL); | ||
61 | |||
51 | if (!cpumask_empty(to_cpumask(backtrace_mask))) { | 62 | if (!cpumask_empty(to_cpumask(backtrace_mask))) { |
52 | pr_info("Sending NMI to %s CPUs:\n", | 63 | pr_info("Sending NMI from CPU %d to CPUs %*pbl:\n", |
53 | (include_self ? "all" : "other")); | 64 | this_cpu, nr_cpumask_bits, to_cpumask(backtrace_mask)); |
54 | raise(to_cpumask(backtrace_mask)); | 65 | raise(to_cpumask(backtrace_mask)); |
55 | } | 66 | } |
56 | 67 | ||
@@ -77,11 +88,16 @@ bool nmi_cpu_backtrace(struct pt_regs *regs) | |||
77 | int cpu = smp_processor_id(); | 88 | int cpu = smp_processor_id(); |
78 | 89 | ||
79 | if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) { | 90 | if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) { |
80 | pr_warn("NMI backtrace for cpu %d\n", cpu); | 91 | if (regs && cpu_in_idle(instruction_pointer(regs))) { |
81 | if (regs) | 92 | pr_warn("NMI backtrace for cpu %d skipped: idling at pc %#lx\n", |
82 | show_regs(regs); | 93 | cpu, instruction_pointer(regs)); |
83 | else | 94 | } else { |
84 | dump_stack(); | 95 | pr_warn("NMI backtrace for cpu %d\n", cpu); |
96 | if (regs) | ||
97 | show_regs(regs); | ||
98 | else | ||
99 | dump_stack(); | ||
100 | } | ||
85 | cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask)); | 101 | cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask)); |
86 | return true; | 102 | return true; |
87 | } | 103 | } |
diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c index 27fe74948882..9ac959ef4cae 100644 --- a/lib/percpu-refcount.c +++ b/lib/percpu-refcount.c | |||
@@ -33,6 +33,7 @@ | |||
33 | 33 | ||
34 | #define PERCPU_COUNT_BIAS (1LU << (BITS_PER_LONG - 1)) | 34 | #define PERCPU_COUNT_BIAS (1LU << (BITS_PER_LONG - 1)) |
35 | 35 | ||
36 | static DEFINE_SPINLOCK(percpu_ref_switch_lock); | ||
36 | static DECLARE_WAIT_QUEUE_HEAD(percpu_ref_switch_waitq); | 37 | static DECLARE_WAIT_QUEUE_HEAD(percpu_ref_switch_waitq); |
37 | 38 | ||
38 | static unsigned long __percpu *percpu_count_ptr(struct percpu_ref *ref) | 39 | static unsigned long __percpu *percpu_count_ptr(struct percpu_ref *ref) |
@@ -82,6 +83,7 @@ int percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release, | |||
82 | atomic_long_set(&ref->count, start_count); | 83 | atomic_long_set(&ref->count, start_count); |
83 | 84 | ||
84 | ref->release = release; | 85 | ref->release = release; |
86 | ref->confirm_switch = NULL; | ||
85 | return 0; | 87 | return 0; |
86 | } | 88 | } |
87 | EXPORT_SYMBOL_GPL(percpu_ref_init); | 89 | EXPORT_SYMBOL_GPL(percpu_ref_init); |
@@ -101,6 +103,8 @@ void percpu_ref_exit(struct percpu_ref *ref) | |||
101 | unsigned long __percpu *percpu_count = percpu_count_ptr(ref); | 103 | unsigned long __percpu *percpu_count = percpu_count_ptr(ref); |
102 | 104 | ||
103 | if (percpu_count) { | 105 | if (percpu_count) { |
106 | /* non-NULL confirm_switch indicates switching in progress */ | ||
107 | WARN_ON_ONCE(ref->confirm_switch); | ||
104 | free_percpu(percpu_count); | 108 | free_percpu(percpu_count); |
105 | ref->percpu_count_ptr = __PERCPU_REF_ATOMIC_DEAD; | 109 | ref->percpu_count_ptr = __PERCPU_REF_ATOMIC_DEAD; |
106 | } | 110 | } |
@@ -161,66 +165,23 @@ static void percpu_ref_noop_confirm_switch(struct percpu_ref *ref) | |||
161 | static void __percpu_ref_switch_to_atomic(struct percpu_ref *ref, | 165 | static void __percpu_ref_switch_to_atomic(struct percpu_ref *ref, |
162 | percpu_ref_func_t *confirm_switch) | 166 | percpu_ref_func_t *confirm_switch) |
163 | { | 167 | { |
164 | if (!(ref->percpu_count_ptr & __PERCPU_REF_ATOMIC)) { | 168 | if (ref->percpu_count_ptr & __PERCPU_REF_ATOMIC) { |
165 | /* switching from percpu to atomic */ | 169 | if (confirm_switch) |
166 | ref->percpu_count_ptr |= __PERCPU_REF_ATOMIC; | 170 | confirm_switch(ref); |
167 | 171 | return; | |
168 | /* | ||
169 | * Non-NULL ->confirm_switch is used to indicate that | ||
170 | * switching is in progress. Use noop one if unspecified. | ||
171 | */ | ||
172 | WARN_ON_ONCE(ref->confirm_switch); | ||
173 | ref->confirm_switch = | ||
174 | confirm_switch ?: percpu_ref_noop_confirm_switch; | ||
175 | |||
176 | percpu_ref_get(ref); /* put after confirmation */ | ||
177 | call_rcu_sched(&ref->rcu, percpu_ref_switch_to_atomic_rcu); | ||
178 | } else if (confirm_switch) { | ||
179 | /* | ||
180 | * Somebody already set ATOMIC. Switching may still be in | ||
181 | * progress. @confirm_switch must be invoked after the | ||
182 | * switching is complete and a full sched RCU grace period | ||
183 | * has passed. Wait synchronously for the previous | ||
184 | * switching and schedule @confirm_switch invocation. | ||
185 | */ | ||
186 | wait_event(percpu_ref_switch_waitq, !ref->confirm_switch); | ||
187 | ref->confirm_switch = confirm_switch; | ||
188 | |||
189 | percpu_ref_get(ref); /* put after confirmation */ | ||
190 | call_rcu_sched(&ref->rcu, percpu_ref_call_confirm_rcu); | ||
191 | } | 172 | } |
192 | } | ||
193 | 173 | ||
194 | /** | 174 | /* switching from percpu to atomic */ |
195 | * percpu_ref_switch_to_atomic - switch a percpu_ref to atomic mode | 175 | ref->percpu_count_ptr |= __PERCPU_REF_ATOMIC; |
196 | * @ref: percpu_ref to switch to atomic mode | 176 | |
197 | * @confirm_switch: optional confirmation callback | 177 | /* |
198 | * | 178 | * Non-NULL ->confirm_switch is used to indicate that switching is |
199 | * There's no reason to use this function for the usual reference counting. | 179 | * in progress. Use noop one if unspecified. |
200 | * Use percpu_ref_kill[_and_confirm](). | 180 | */ |
201 | * | 181 | ref->confirm_switch = confirm_switch ?: percpu_ref_noop_confirm_switch; |
202 | * Schedule switching of @ref to atomic mode. All its percpu counts will | 182 | |
203 | * be collected to the main atomic counter. On completion, when all CPUs | 183 | percpu_ref_get(ref); /* put after confirmation */ |
204 | * are guaraneed to be in atomic mode, @confirm_switch, which may not | 184 | call_rcu_sched(&ref->rcu, percpu_ref_switch_to_atomic_rcu); |
205 | * block, is invoked. This function may be invoked concurrently with all | ||
206 | * the get/put operations and can safely be mixed with kill and reinit | ||
207 | * operations. Note that @ref will stay in atomic mode across kill/reinit | ||
208 | * cycles until percpu_ref_switch_to_percpu() is called. | ||
209 | * | ||
210 | * This function normally doesn't block and can be called from any context | ||
211 | * but it may block if @confirm_kill is specified and @ref is already in | ||
212 | * the process of switching to atomic mode. In such cases, @confirm_switch | ||
213 | * will be invoked after the switching is complete. | ||
214 | * | ||
215 | * Due to the way percpu_ref is implemented, @confirm_switch will be called | ||
216 | * after at least one full sched RCU grace period has passed but this is an | ||
217 | * implementation detail and must not be depended upon. | ||
218 | */ | ||
219 | void percpu_ref_switch_to_atomic(struct percpu_ref *ref, | ||
220 | percpu_ref_func_t *confirm_switch) | ||
221 | { | ||
222 | ref->force_atomic = true; | ||
223 | __percpu_ref_switch_to_atomic(ref, confirm_switch); | ||
224 | } | 185 | } |
225 | 186 | ||
226 | static void __percpu_ref_switch_to_percpu(struct percpu_ref *ref) | 187 | static void __percpu_ref_switch_to_percpu(struct percpu_ref *ref) |
@@ -233,8 +194,6 @@ static void __percpu_ref_switch_to_percpu(struct percpu_ref *ref) | |||
233 | if (!(ref->percpu_count_ptr & __PERCPU_REF_ATOMIC)) | 194 | if (!(ref->percpu_count_ptr & __PERCPU_REF_ATOMIC)) |
234 | return; | 195 | return; |
235 | 196 | ||
236 | wait_event(percpu_ref_switch_waitq, !ref->confirm_switch); | ||
237 | |||
238 | atomic_long_add(PERCPU_COUNT_BIAS, &ref->count); | 197 | atomic_long_add(PERCPU_COUNT_BIAS, &ref->count); |
239 | 198 | ||
240 | /* | 199 | /* |
@@ -250,6 +209,58 @@ static void __percpu_ref_switch_to_percpu(struct percpu_ref *ref) | |||
250 | ref->percpu_count_ptr & ~__PERCPU_REF_ATOMIC); | 209 | ref->percpu_count_ptr & ~__PERCPU_REF_ATOMIC); |
251 | } | 210 | } |
252 | 211 | ||
212 | static void __percpu_ref_switch_mode(struct percpu_ref *ref, | ||
213 | percpu_ref_func_t *confirm_switch) | ||
214 | { | ||
215 | lockdep_assert_held(&percpu_ref_switch_lock); | ||
216 | |||
217 | /* | ||
218 | * If the previous ATOMIC switching hasn't finished yet, wait for | ||
219 | * its completion. If the caller ensures that ATOMIC switching | ||
220 | * isn't in progress, this function can be called from any context. | ||
221 | */ | ||
222 | wait_event_lock_irq(percpu_ref_switch_waitq, !ref->confirm_switch, | ||
223 | percpu_ref_switch_lock); | ||
224 | |||
225 | if (ref->force_atomic || (ref->percpu_count_ptr & __PERCPU_REF_DEAD)) | ||
226 | __percpu_ref_switch_to_atomic(ref, confirm_switch); | ||
227 | else | ||
228 | __percpu_ref_switch_to_percpu(ref); | ||
229 | } | ||
230 | |||
231 | /** | ||
232 | * percpu_ref_switch_to_atomic - switch a percpu_ref to atomic mode | ||
233 | * @ref: percpu_ref to switch to atomic mode | ||
234 | * @confirm_switch: optional confirmation callback | ||
235 | * | ||
236 | * There's no reason to use this function for the usual reference counting. | ||
237 | * Use percpu_ref_kill[_and_confirm](). | ||
238 | * | ||
239 | * Schedule switching of @ref to atomic mode. All its percpu counts will | ||
240 | * be collected to the main atomic counter. On completion, when all CPUs | ||
241 | * are guaraneed to be in atomic mode, @confirm_switch, which may not | ||
242 | * block, is invoked. This function may be invoked concurrently with all | ||
243 | * the get/put operations and can safely be mixed with kill and reinit | ||
244 | * operations. Note that @ref will stay in atomic mode across kill/reinit | ||
245 | * cycles until percpu_ref_switch_to_percpu() is called. | ||
246 | * | ||
247 | * This function may block if @ref is in the process of switching to atomic | ||
248 | * mode. If the caller ensures that @ref is not in the process of | ||
249 | * switching to atomic mode, this function can be called from any context. | ||
250 | */ | ||
251 | void percpu_ref_switch_to_atomic(struct percpu_ref *ref, | ||
252 | percpu_ref_func_t *confirm_switch) | ||
253 | { | ||
254 | unsigned long flags; | ||
255 | |||
256 | spin_lock_irqsave(&percpu_ref_switch_lock, flags); | ||
257 | |||
258 | ref->force_atomic = true; | ||
259 | __percpu_ref_switch_mode(ref, confirm_switch); | ||
260 | |||
261 | spin_unlock_irqrestore(&percpu_ref_switch_lock, flags); | ||
262 | } | ||
263 | |||
253 | /** | 264 | /** |
254 | * percpu_ref_switch_to_percpu - switch a percpu_ref to percpu mode | 265 | * percpu_ref_switch_to_percpu - switch a percpu_ref to percpu mode |
255 | * @ref: percpu_ref to switch to percpu mode | 266 | * @ref: percpu_ref to switch to percpu mode |
@@ -264,17 +275,20 @@ static void __percpu_ref_switch_to_percpu(struct percpu_ref *ref) | |||
264 | * dying or dead, the actual switching takes place on the following | 275 | * dying or dead, the actual switching takes place on the following |
265 | * percpu_ref_reinit(). | 276 | * percpu_ref_reinit(). |
266 | * | 277 | * |
267 | * This function normally doesn't block and can be called from any context | 278 | * This function may block if @ref is in the process of switching to atomic |
268 | * but it may block if @ref is in the process of switching to atomic mode | 279 | * mode. If the caller ensures that @ref is not in the process of |
269 | * by percpu_ref_switch_atomic(). | 280 | * switching to atomic mode, this function can be called from any context. |
270 | */ | 281 | */ |
271 | void percpu_ref_switch_to_percpu(struct percpu_ref *ref) | 282 | void percpu_ref_switch_to_percpu(struct percpu_ref *ref) |
272 | { | 283 | { |
284 | unsigned long flags; | ||
285 | |||
286 | spin_lock_irqsave(&percpu_ref_switch_lock, flags); | ||
287 | |||
273 | ref->force_atomic = false; | 288 | ref->force_atomic = false; |
289 | __percpu_ref_switch_mode(ref, NULL); | ||
274 | 290 | ||
275 | /* a dying or dead ref can't be switched to percpu mode w/o reinit */ | 291 | spin_unlock_irqrestore(&percpu_ref_switch_lock, flags); |
276 | if (!(ref->percpu_count_ptr & __PERCPU_REF_DEAD)) | ||
277 | __percpu_ref_switch_to_percpu(ref); | ||
278 | } | 292 | } |
279 | 293 | ||
280 | /** | 294 | /** |
@@ -290,21 +304,23 @@ void percpu_ref_switch_to_percpu(struct percpu_ref *ref) | |||
290 | * | 304 | * |
291 | * This function normally doesn't block and can be called from any context | 305 | * This function normally doesn't block and can be called from any context |
292 | * but it may block if @confirm_kill is specified and @ref is in the | 306 | * but it may block if @confirm_kill is specified and @ref is in the |
293 | * process of switching to atomic mode by percpu_ref_switch_atomic(). | 307 | * process of switching to atomic mode by percpu_ref_switch_to_atomic(). |
294 | * | ||
295 | * Due to the way percpu_ref is implemented, @confirm_switch will be called | ||
296 | * after at least one full sched RCU grace period has passed but this is an | ||
297 | * implementation detail and must not be depended upon. | ||
298 | */ | 308 | */ |
299 | void percpu_ref_kill_and_confirm(struct percpu_ref *ref, | 309 | void percpu_ref_kill_and_confirm(struct percpu_ref *ref, |
300 | percpu_ref_func_t *confirm_kill) | 310 | percpu_ref_func_t *confirm_kill) |
301 | { | 311 | { |
312 | unsigned long flags; | ||
313 | |||
314 | spin_lock_irqsave(&percpu_ref_switch_lock, flags); | ||
315 | |||
302 | WARN_ONCE(ref->percpu_count_ptr & __PERCPU_REF_DEAD, | 316 | WARN_ONCE(ref->percpu_count_ptr & __PERCPU_REF_DEAD, |
303 | "%s called more than once on %pf!", __func__, ref->release); | 317 | "%s called more than once on %pf!", __func__, ref->release); |
304 | 318 | ||
305 | ref->percpu_count_ptr |= __PERCPU_REF_DEAD; | 319 | ref->percpu_count_ptr |= __PERCPU_REF_DEAD; |
306 | __percpu_ref_switch_to_atomic(ref, confirm_kill); | 320 | __percpu_ref_switch_mode(ref, confirm_kill); |
307 | percpu_ref_put(ref); | 321 | percpu_ref_put(ref); |
322 | |||
323 | spin_unlock_irqrestore(&percpu_ref_switch_lock, flags); | ||
308 | } | 324 | } |
309 | EXPORT_SYMBOL_GPL(percpu_ref_kill_and_confirm); | 325 | EXPORT_SYMBOL_GPL(percpu_ref_kill_and_confirm); |
310 | 326 | ||
@@ -321,11 +337,16 @@ EXPORT_SYMBOL_GPL(percpu_ref_kill_and_confirm); | |||
321 | */ | 337 | */ |
322 | void percpu_ref_reinit(struct percpu_ref *ref) | 338 | void percpu_ref_reinit(struct percpu_ref *ref) |
323 | { | 339 | { |
340 | unsigned long flags; | ||
341 | |||
342 | spin_lock_irqsave(&percpu_ref_switch_lock, flags); | ||
343 | |||
324 | WARN_ON_ONCE(!percpu_ref_is_zero(ref)); | 344 | WARN_ON_ONCE(!percpu_ref_is_zero(ref)); |
325 | 345 | ||
326 | ref->percpu_count_ptr &= ~__PERCPU_REF_DEAD; | 346 | ref->percpu_count_ptr &= ~__PERCPU_REF_DEAD; |
327 | percpu_ref_get(ref); | 347 | percpu_ref_get(ref); |
328 | if (!ref->force_atomic) | 348 | __percpu_ref_switch_mode(ref, NULL); |
329 | __percpu_ref_switch_to_percpu(ref); | 349 | |
350 | spin_unlock_irqrestore(&percpu_ref_switch_lock, flags); | ||
330 | } | 351 | } |
331 | EXPORT_SYMBOL_GPL(percpu_ref_reinit); | 352 | EXPORT_SYMBOL_GPL(percpu_ref_reinit); |
diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 91f0727e3cad..8e6d552c40dd 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c | |||
@@ -1583,15 +1583,10 @@ void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) | |||
1583 | } | 1583 | } |
1584 | EXPORT_SYMBOL(radix_tree_delete); | 1584 | EXPORT_SYMBOL(radix_tree_delete); |
1585 | 1585 | ||
1586 | struct radix_tree_node *radix_tree_replace_clear_tags( | 1586 | void radix_tree_clear_tags(struct radix_tree_root *root, |
1587 | struct radix_tree_root *root, | 1587 | struct radix_tree_node *node, |
1588 | unsigned long index, void *entry) | 1588 | void **slot) |
1589 | { | 1589 | { |
1590 | struct radix_tree_node *node; | ||
1591 | void **slot; | ||
1592 | |||
1593 | __radix_tree_lookup(root, index, &node, &slot); | ||
1594 | |||
1595 | if (node) { | 1590 | if (node) { |
1596 | unsigned int tag, offset = get_slot_offset(node, slot); | 1591 | unsigned int tag, offset = get_slot_offset(node, slot); |
1597 | for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) | 1592 | for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) |
@@ -1600,9 +1595,6 @@ struct radix_tree_node *radix_tree_replace_clear_tags( | |||
1600 | /* Clear root node tags */ | 1595 | /* Clear root node tags */ |
1601 | root->gfp_mask &= __GFP_BITS_MASK; | 1596 | root->gfp_mask &= __GFP_BITS_MASK; |
1602 | } | 1597 | } |
1603 | |||
1604 | radix_tree_replace_slot(slot, entry); | ||
1605 | return node; | ||
1606 | } | 1598 | } |
1607 | 1599 | ||
1608 | /** | 1600 | /** |
diff --git a/lib/raid6/.gitignore b/lib/raid6/.gitignore index 0a7e494b2bcd..f01b1cb04f91 100644 --- a/lib/raid6/.gitignore +++ b/lib/raid6/.gitignore | |||
@@ -3,3 +3,4 @@ altivec*.c | |||
3 | int*.c | 3 | int*.c |
4 | tables.c | 4 | tables.c |
5 | neon?.c | 5 | neon?.c |
6 | s390vx?.c | ||
diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile index 3b10a48fa040..3057011f5599 100644 --- a/lib/raid6/Makefile +++ b/lib/raid6/Makefile | |||
@@ -3,10 +3,11 @@ obj-$(CONFIG_RAID6_PQ) += raid6_pq.o | |||
3 | raid6_pq-y += algos.o recov.o tables.o int1.o int2.o int4.o \ | 3 | raid6_pq-y += algos.o recov.o tables.o int1.o int2.o int4.o \ |
4 | int8.o int16.o int32.o | 4 | int8.o int16.o int32.o |
5 | 5 | ||
6 | raid6_pq-$(CONFIG_X86) += recov_ssse3.o recov_avx2.o mmx.o sse1.o sse2.o avx2.o | 6 | raid6_pq-$(CONFIG_X86) += recov_ssse3.o recov_avx2.o mmx.o sse1.o sse2.o avx2.o avx512.o recov_avx512.o |
7 | raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o | 7 | raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o |
8 | raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o | 8 | raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o |
9 | raid6_pq-$(CONFIG_TILEGX) += tilegx8.o | 9 | raid6_pq-$(CONFIG_TILEGX) += tilegx8.o |
10 | raid6_pq-$(CONFIG_S390) += s390vx8.o recov_s390xc.o | ||
10 | 11 | ||
11 | hostprogs-y += mktables | 12 | hostprogs-y += mktables |
12 | 13 | ||
@@ -116,6 +117,11 @@ $(obj)/tilegx8.c: UNROLL := 8 | |||
116 | $(obj)/tilegx8.c: $(src)/tilegx.uc $(src)/unroll.awk FORCE | 117 | $(obj)/tilegx8.c: $(src)/tilegx.uc $(src)/unroll.awk FORCE |
117 | $(call if_changed,unroll) | 118 | $(call if_changed,unroll) |
118 | 119 | ||
120 | targets += s390vx8.c | ||
121 | $(obj)/s390vx8.c: UNROLL := 8 | ||
122 | $(obj)/s390vx8.c: $(src)/s390vx.uc $(src)/unroll.awk FORCE | ||
123 | $(call if_changed,unroll) | ||
124 | |||
119 | quiet_cmd_mktable = TABLE $@ | 125 | quiet_cmd_mktable = TABLE $@ |
120 | cmd_mktable = $(obj)/mktables > $@ || ( rm -f $@ && exit 1 ) | 126 | cmd_mktable = $(obj)/mktables > $@ || ( rm -f $@ && exit 1 ) |
121 | 127 | ||
diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c index 975c6e0434bd..7857049fd7d3 100644 --- a/lib/raid6/algos.c +++ b/lib/raid6/algos.c | |||
@@ -49,6 +49,10 @@ const struct raid6_calls * const raid6_algos[] = { | |||
49 | &raid6_avx2x1, | 49 | &raid6_avx2x1, |
50 | &raid6_avx2x2, | 50 | &raid6_avx2x2, |
51 | #endif | 51 | #endif |
52 | #ifdef CONFIG_AS_AVX512 | ||
53 | &raid6_avx512x1, | ||
54 | &raid6_avx512x2, | ||
55 | #endif | ||
52 | #endif | 56 | #endif |
53 | #if defined(__x86_64__) && !defined(__arch_um__) | 57 | #if defined(__x86_64__) && !defined(__arch_um__) |
54 | &raid6_sse2x1, | 58 | &raid6_sse2x1, |
@@ -59,6 +63,11 @@ const struct raid6_calls * const raid6_algos[] = { | |||
59 | &raid6_avx2x2, | 63 | &raid6_avx2x2, |
60 | &raid6_avx2x4, | 64 | &raid6_avx2x4, |
61 | #endif | 65 | #endif |
66 | #ifdef CONFIG_AS_AVX512 | ||
67 | &raid6_avx512x1, | ||
68 | &raid6_avx512x2, | ||
69 | &raid6_avx512x4, | ||
70 | #endif | ||
62 | #endif | 71 | #endif |
63 | #ifdef CONFIG_ALTIVEC | 72 | #ifdef CONFIG_ALTIVEC |
64 | &raid6_altivec1, | 73 | &raid6_altivec1, |
@@ -69,6 +78,9 @@ const struct raid6_calls * const raid6_algos[] = { | |||
69 | #if defined(CONFIG_TILEGX) | 78 | #if defined(CONFIG_TILEGX) |
70 | &raid6_tilegx8, | 79 | &raid6_tilegx8, |
71 | #endif | 80 | #endif |
81 | #if defined(CONFIG_S390) | ||
82 | &raid6_s390vx8, | ||
83 | #endif | ||
72 | &raid6_intx1, | 84 | &raid6_intx1, |
73 | &raid6_intx2, | 85 | &raid6_intx2, |
74 | &raid6_intx4, | 86 | &raid6_intx4, |
@@ -89,12 +101,18 @@ void (*raid6_datap_recov)(int, size_t, int, void **); | |||
89 | EXPORT_SYMBOL_GPL(raid6_datap_recov); | 101 | EXPORT_SYMBOL_GPL(raid6_datap_recov); |
90 | 102 | ||
91 | const struct raid6_recov_calls *const raid6_recov_algos[] = { | 103 | const struct raid6_recov_calls *const raid6_recov_algos[] = { |
104 | #ifdef CONFIG_AS_AVX512 | ||
105 | &raid6_recov_avx512, | ||
106 | #endif | ||
92 | #ifdef CONFIG_AS_AVX2 | 107 | #ifdef CONFIG_AS_AVX2 |
93 | &raid6_recov_avx2, | 108 | &raid6_recov_avx2, |
94 | #endif | 109 | #endif |
95 | #ifdef CONFIG_AS_SSSE3 | 110 | #ifdef CONFIG_AS_SSSE3 |
96 | &raid6_recov_ssse3, | 111 | &raid6_recov_ssse3, |
97 | #endif | 112 | #endif |
113 | #ifdef CONFIG_S390 | ||
114 | &raid6_recov_s390xc, | ||
115 | #endif | ||
98 | &raid6_recov_intx1, | 116 | &raid6_recov_intx1, |
99 | NULL | 117 | NULL |
100 | }; | 118 | }; |
diff --git a/lib/raid6/avx512.c b/lib/raid6/avx512.c new file mode 100644 index 000000000000..f524a7972006 --- /dev/null +++ b/lib/raid6/avx512.c | |||
@@ -0,0 +1,569 @@ | |||
1 | /* -*- linux-c -*- -------------------------------------------------------- | ||
2 | * | ||
3 | * Copyright (C) 2016 Intel Corporation | ||
4 | * | ||
5 | * Author: Gayatri Kammela <gayatri.kammela@intel.com> | ||
6 | * Author: Megha Dey <megha.dey@linux.intel.com> | ||
7 | * | ||
8 | * Based on avx2.c: Copyright 2012 Yuanhan Liu All Rights Reserved | ||
9 | * Based on sse2.c: Copyright 2002 H. Peter Anvin - All Rights Reserved | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or modify | ||
12 | * it under the terms of the GNU General Public License as published by | ||
13 | * the Free Software Foundation, Inc., 53 Temple Place Ste 330, | ||
14 | * Boston MA 02111-1307, USA; either version 2 of the License, or | ||
15 | * (at your option) any later version; incorporated herein by reference. | ||
16 | * | ||
17 | * ----------------------------------------------------------------------- | ||
18 | */ | ||
19 | |||
20 | /* | ||
21 | * AVX512 implementation of RAID-6 syndrome functions | ||
22 | * | ||
23 | */ | ||
24 | |||
25 | #ifdef CONFIG_AS_AVX512 | ||
26 | |||
27 | #include <linux/raid/pq.h> | ||
28 | #include "x86.h" | ||
29 | |||
30 | static const struct raid6_avx512_constants { | ||
31 | u64 x1d[8]; | ||
32 | } raid6_avx512_constants __aligned(512) = { | ||
33 | { 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL, | ||
34 | 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL, | ||
35 | 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL, | ||
36 | 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,}, | ||
37 | }; | ||
38 | |||
39 | static int raid6_have_avx512(void) | ||
40 | { | ||
41 | return boot_cpu_has(X86_FEATURE_AVX2) && | ||
42 | boot_cpu_has(X86_FEATURE_AVX) && | ||
43 | boot_cpu_has(X86_FEATURE_AVX512F) && | ||
44 | boot_cpu_has(X86_FEATURE_AVX512BW) && | ||
45 | boot_cpu_has(X86_FEATURE_AVX512VL) && | ||
46 | boot_cpu_has(X86_FEATURE_AVX512DQ); | ||
47 | } | ||
48 | |||
49 | static void raid6_avx5121_gen_syndrome(int disks, size_t bytes, void **ptrs) | ||
50 | { | ||
51 | u8 **dptr = (u8 **)ptrs; | ||
52 | u8 *p, *q; | ||
53 | int d, z, z0; | ||
54 | |||
55 | z0 = disks - 3; /* Highest data disk */ | ||
56 | p = dptr[z0+1]; /* XOR parity */ | ||
57 | q = dptr[z0+2]; /* RS syndrome */ | ||
58 | |||
59 | kernel_fpu_begin(); | ||
60 | |||
61 | asm volatile("vmovdqa64 %0,%%zmm0\n\t" | ||
62 | "vpxorq %%zmm1,%%zmm1,%%zmm1" /* Zero temp */ | ||
63 | : | ||
64 | : "m" (raid6_avx512_constants.x1d[0])); | ||
65 | |||
66 | for (d = 0; d < bytes; d += 64) { | ||
67 | asm volatile("prefetchnta %0\n\t" | ||
68 | "vmovdqa64 %0,%%zmm2\n\t" /* P[0] */ | ||
69 | "prefetchnta %1\n\t" | ||
70 | "vmovdqa64 %%zmm2,%%zmm4\n\t" /* Q[0] */ | ||
71 | "vmovdqa64 %1,%%zmm6" | ||
72 | : | ||
73 | : "m" (dptr[z0][d]), "m" (dptr[z0-1][d])); | ||
74 | for (z = z0-2; z >= 0; z--) { | ||
75 | asm volatile("prefetchnta %0\n\t" | ||
76 | "vpcmpgtb %%zmm4,%%zmm1,%%k1\n\t" | ||
77 | "vpmovm2b %%k1,%%zmm5\n\t" | ||
78 | "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t" | ||
79 | "vpandq %%zmm0,%%zmm5,%%zmm5\n\t" | ||
80 | "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t" | ||
81 | "vpxorq %%zmm6,%%zmm2,%%zmm2\n\t" | ||
82 | "vpxorq %%zmm6,%%zmm4,%%zmm4\n\t" | ||
83 | "vmovdqa64 %0,%%zmm6" | ||
84 | : | ||
85 | : "m" (dptr[z][d])); | ||
86 | } | ||
87 | asm volatile("vpcmpgtb %%zmm4,%%zmm1,%%k1\n\t" | ||
88 | "vpmovm2b %%k1,%%zmm5\n\t" | ||
89 | "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t" | ||
90 | "vpandq %%zmm0,%%zmm5,%%zmm5\n\t" | ||
91 | "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t" | ||
92 | "vpxorq %%zmm6,%%zmm2,%%zmm2\n\t" | ||
93 | "vpxorq %%zmm6,%%zmm4,%%zmm4\n\t" | ||
94 | "vmovntdq %%zmm2,%0\n\t" | ||
95 | "vpxorq %%zmm2,%%zmm2,%%zmm2\n\t" | ||
96 | "vmovntdq %%zmm4,%1\n\t" | ||
97 | "vpxorq %%zmm4,%%zmm4,%%zmm4" | ||
98 | : | ||
99 | : "m" (p[d]), "m" (q[d])); | ||
100 | } | ||
101 | |||
102 | asm volatile("sfence" : : : "memory"); | ||
103 | kernel_fpu_end(); | ||
104 | } | ||
105 | |||
106 | static void raid6_avx5121_xor_syndrome(int disks, int start, int stop, | ||
107 | size_t bytes, void **ptrs) | ||
108 | { | ||
109 | u8 **dptr = (u8 **)ptrs; | ||
110 | u8 *p, *q; | ||
111 | int d, z, z0; | ||
112 | |||
113 | z0 = stop; /* P/Q right side optimization */ | ||
114 | p = dptr[disks-2]; /* XOR parity */ | ||
115 | q = dptr[disks-1]; /* RS syndrome */ | ||
116 | |||
117 | kernel_fpu_begin(); | ||
118 | |||
119 | asm volatile("vmovdqa64 %0,%%zmm0" | ||
120 | : : "m" (raid6_avx512_constants.x1d[0])); | ||
121 | |||
122 | for (d = 0 ; d < bytes ; d += 64) { | ||
123 | asm volatile("vmovdqa64 %0,%%zmm4\n\t" | ||
124 | "vmovdqa64 %1,%%zmm2\n\t" | ||
125 | "vpxorq %%zmm4,%%zmm2,%%zmm2" | ||
126 | : | ||
127 | : "m" (dptr[z0][d]), "m" (p[d])); | ||
128 | /* P/Q data pages */ | ||
129 | for (z = z0-1 ; z >= start ; z--) { | ||
130 | asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t" | ||
131 | "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t" | ||
132 | "vpmovm2b %%k1,%%zmm5\n\t" | ||
133 | "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t" | ||
134 | "vpandq %%zmm0,%%zmm5,%%zmm5\n\t" | ||
135 | "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t" | ||
136 | "vmovdqa64 %0,%%zmm5\n\t" | ||
137 | "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t" | ||
138 | "vpxorq %%zmm5,%%zmm4,%%zmm4" | ||
139 | : | ||
140 | : "m" (dptr[z][d])); | ||
141 | } | ||
142 | /* P/Q left side optimization */ | ||
143 | for (z = start-1 ; z >= 0 ; z--) { | ||
144 | asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t" | ||
145 | "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t" | ||
146 | "vpmovm2b %%k1,%%zmm5\n\t" | ||
147 | "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t" | ||
148 | "vpandq %%zmm0,%%zmm5,%%zmm5\n\t" | ||
149 | "vpxorq %%zmm5,%%zmm4,%%zmm4" | ||
150 | : | ||
151 | : ); | ||
152 | } | ||
153 | asm volatile("vpxorq %0,%%zmm4,%%zmm4\n\t" | ||
154 | /* Don't use movntdq for r/w memory area < cache line */ | ||
155 | "vmovdqa64 %%zmm4,%0\n\t" | ||
156 | "vmovdqa64 %%zmm2,%1" | ||
157 | : | ||
158 | : "m" (q[d]), "m" (p[d])); | ||
159 | } | ||
160 | |||
161 | asm volatile("sfence" : : : "memory"); | ||
162 | kernel_fpu_end(); | ||
163 | } | ||
164 | |||
165 | const struct raid6_calls raid6_avx512x1 = { | ||
166 | raid6_avx5121_gen_syndrome, | ||
167 | raid6_avx5121_xor_syndrome, | ||
168 | raid6_have_avx512, | ||
169 | "avx512x1", | ||
170 | 1 /* Has cache hints */ | ||
171 | }; | ||
172 | |||
173 | /* | ||
174 | * Unrolled-by-2 AVX512 implementation | ||
175 | */ | ||
176 | static void raid6_avx5122_gen_syndrome(int disks, size_t bytes, void **ptrs) | ||
177 | { | ||
178 | u8 **dptr = (u8 **)ptrs; | ||
179 | u8 *p, *q; | ||
180 | int d, z, z0; | ||
181 | |||
182 | z0 = disks - 3; /* Highest data disk */ | ||
183 | p = dptr[z0+1]; /* XOR parity */ | ||
184 | q = dptr[z0+2]; /* RS syndrome */ | ||
185 | |||
186 | kernel_fpu_begin(); | ||
187 | |||
188 | asm volatile("vmovdqa64 %0,%%zmm0\n\t" | ||
189 | "vpxorq %%zmm1,%%zmm1,%%zmm1" /* Zero temp */ | ||
190 | : | ||
191 | : "m" (raid6_avx512_constants.x1d[0])); | ||
192 | |||
193 | /* We uniformly assume a single prefetch covers at least 64 bytes */ | ||
194 | for (d = 0; d < bytes; d += 128) { | ||
195 | asm volatile("prefetchnta %0\n\t" | ||
196 | "prefetchnta %1\n\t" | ||
197 | "vmovdqa64 %0,%%zmm2\n\t" /* P[0] */ | ||
198 | "vmovdqa64 %1,%%zmm3\n\t" /* P[1] */ | ||
199 | "vmovdqa64 %%zmm2,%%zmm4\n\t" /* Q[0] */ | ||
200 | "vmovdqa64 %%zmm3,%%zmm6" /* Q[1] */ | ||
201 | : | ||
202 | : "m" (dptr[z0][d]), "m" (dptr[z0][d+64])); | ||
203 | for (z = z0-1; z >= 0; z--) { | ||
204 | asm volatile("prefetchnta %0\n\t" | ||
205 | "prefetchnta %1\n\t" | ||
206 | "vpcmpgtb %%zmm4,%%zmm1,%%k1\n\t" | ||
207 | "vpcmpgtb %%zmm6,%%zmm1,%%k2\n\t" | ||
208 | "vpmovm2b %%k1,%%zmm5\n\t" | ||
209 | "vpmovm2b %%k2,%%zmm7\n\t" | ||
210 | "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t" | ||
211 | "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t" | ||
212 | "vpandq %%zmm0,%%zmm5,%%zmm5\n\t" | ||
213 | "vpandq %%zmm0,%%zmm7,%%zmm7\n\t" | ||
214 | "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t" | ||
215 | "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t" | ||
216 | "vmovdqa64 %0,%%zmm5\n\t" | ||
217 | "vmovdqa64 %1,%%zmm7\n\t" | ||
218 | "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t" | ||
219 | "vpxorq %%zmm7,%%zmm3,%%zmm3\n\t" | ||
220 | "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t" | ||
221 | "vpxorq %%zmm7,%%zmm6,%%zmm6" | ||
222 | : | ||
223 | : "m" (dptr[z][d]), "m" (dptr[z][d+64])); | ||
224 | } | ||
225 | asm volatile("vmovntdq %%zmm2,%0\n\t" | ||
226 | "vmovntdq %%zmm3,%1\n\t" | ||
227 | "vmovntdq %%zmm4,%2\n\t" | ||
228 | "vmovntdq %%zmm6,%3" | ||
229 | : | ||
230 | : "m" (p[d]), "m" (p[d+64]), "m" (q[d]), | ||
231 | "m" (q[d+64])); | ||
232 | } | ||
233 | |||
234 | asm volatile("sfence" : : : "memory"); | ||
235 | kernel_fpu_end(); | ||
236 | } | ||
237 | |||
238 | static void raid6_avx5122_xor_syndrome(int disks, int start, int stop, | ||
239 | size_t bytes, void **ptrs) | ||
240 | { | ||
241 | u8 **dptr = (u8 **)ptrs; | ||
242 | u8 *p, *q; | ||
243 | int d, z, z0; | ||
244 | |||
245 | z0 = stop; /* P/Q right side optimization */ | ||
246 | p = dptr[disks-2]; /* XOR parity */ | ||
247 | q = dptr[disks-1]; /* RS syndrome */ | ||
248 | |||
249 | kernel_fpu_begin(); | ||
250 | |||
251 | asm volatile("vmovdqa64 %0,%%zmm0" | ||
252 | : : "m" (raid6_avx512_constants.x1d[0])); | ||
253 | |||
254 | for (d = 0 ; d < bytes ; d += 128) { | ||
255 | asm volatile("vmovdqa64 %0,%%zmm4\n\t" | ||
256 | "vmovdqa64 %1,%%zmm6\n\t" | ||
257 | "vmovdqa64 %2,%%zmm2\n\t" | ||
258 | "vmovdqa64 %3,%%zmm3\n\t" | ||
259 | "vpxorq %%zmm4,%%zmm2,%%zmm2\n\t" | ||
260 | "vpxorq %%zmm6,%%zmm3,%%zmm3" | ||
261 | : | ||
262 | : "m" (dptr[z0][d]), "m" (dptr[z0][d+64]), | ||
263 | "m" (p[d]), "m" (p[d+64])); | ||
264 | /* P/Q data pages */ | ||
265 | for (z = z0-1 ; z >= start ; z--) { | ||
266 | asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t" | ||
267 | "vpxorq %%zmm7,%%zmm7,%%zmm7\n\t" | ||
268 | "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t" | ||
269 | "vpcmpgtb %%zmm6,%%zmm7,%%k2\n\t" | ||
270 | "vpmovm2b %%k1,%%zmm5\n\t" | ||
271 | "vpmovm2b %%k2,%%zmm7\n\t" | ||
272 | "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t" | ||
273 | "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t" | ||
274 | "vpandq %%zmm0,%%zmm5,%%zmm5\n\t" | ||
275 | "vpandq %%zmm0,%%zmm7,%%zmm7\n\t" | ||
276 | "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t" | ||
277 | "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t" | ||
278 | "vmovdqa64 %0,%%zmm5\n\t" | ||
279 | "vmovdqa64 %1,%%zmm7\n\t" | ||
280 | "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t" | ||
281 | "vpxorq %%zmm7,%%zmm3,%%zmm3\n\t" | ||
282 | "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t" | ||
283 | "vpxorq %%zmm7,%%zmm6,%%zmm6" | ||
284 | : | ||
285 | : "m" (dptr[z][d]), "m" (dptr[z][d+64])); | ||
286 | } | ||
287 | /* P/Q left side optimization */ | ||
288 | for (z = start-1 ; z >= 0 ; z--) { | ||
289 | asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t" | ||
290 | "vpxorq %%zmm7,%%zmm7,%%zmm7\n\t" | ||
291 | "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t" | ||
292 | "vpcmpgtb %%zmm6,%%zmm7,%%k2\n\t" | ||
293 | "vpmovm2b %%k1,%%zmm5\n\t" | ||
294 | "vpmovm2b %%k2,%%zmm7\n\t" | ||
295 | "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t" | ||
296 | "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t" | ||
297 | "vpandq %%zmm0,%%zmm5,%%zmm5\n\t" | ||
298 | "vpandq %%zmm0,%%zmm7,%%zmm7\n\t" | ||
299 | "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t" | ||
300 | "vpxorq %%zmm7,%%zmm6,%%zmm6" | ||
301 | : | ||
302 | : ); | ||
303 | } | ||
304 | asm volatile("vpxorq %0,%%zmm4,%%zmm4\n\t" | ||
305 | "vpxorq %1,%%zmm6,%%zmm6\n\t" | ||
306 | /* Don't use movntdq for r/w | ||
307 | * memory area < cache line | ||
308 | */ | ||
309 | "vmovdqa64 %%zmm4,%0\n\t" | ||
310 | "vmovdqa64 %%zmm6,%1\n\t" | ||
311 | "vmovdqa64 %%zmm2,%2\n\t" | ||
312 | "vmovdqa64 %%zmm3,%3" | ||
313 | : | ||
314 | : "m" (q[d]), "m" (q[d+64]), "m" (p[d]), | ||
315 | "m" (p[d+64])); | ||
316 | } | ||
317 | |||
318 | asm volatile("sfence" : : : "memory"); | ||
319 | kernel_fpu_end(); | ||
320 | } | ||
321 | |||
322 | const struct raid6_calls raid6_avx512x2 = { | ||
323 | raid6_avx5122_gen_syndrome, | ||
324 | raid6_avx5122_xor_syndrome, | ||
325 | raid6_have_avx512, | ||
326 | "avx512x2", | ||
327 | 1 /* Has cache hints */ | ||
328 | }; | ||
329 | |||
330 | #ifdef CONFIG_X86_64 | ||
331 | |||
332 | /* | ||
333 | * Unrolled-by-4 AVX2 implementation | ||
334 | */ | ||
335 | static void raid6_avx5124_gen_syndrome(int disks, size_t bytes, void **ptrs) | ||
336 | { | ||
337 | u8 **dptr = (u8 **)ptrs; | ||
338 | u8 *p, *q; | ||
339 | int d, z, z0; | ||
340 | |||
341 | z0 = disks - 3; /* Highest data disk */ | ||
342 | p = dptr[z0+1]; /* XOR parity */ | ||
343 | q = dptr[z0+2]; /* RS syndrome */ | ||
344 | |||
345 | kernel_fpu_begin(); | ||
346 | |||
347 | asm volatile("vmovdqa64 %0,%%zmm0\n\t" | ||
348 | "vpxorq %%zmm1,%%zmm1,%%zmm1\n\t" /* Zero temp */ | ||
349 | "vpxorq %%zmm2,%%zmm2,%%zmm2\n\t" /* P[0] */ | ||
350 | "vpxorq %%zmm3,%%zmm3,%%zmm3\n\t" /* P[1] */ | ||
351 | "vpxorq %%zmm4,%%zmm4,%%zmm4\n\t" /* Q[0] */ | ||
352 | "vpxorq %%zmm6,%%zmm6,%%zmm6\n\t" /* Q[1] */ | ||
353 | "vpxorq %%zmm10,%%zmm10,%%zmm10\n\t" /* P[2] */ | ||
354 | "vpxorq %%zmm11,%%zmm11,%%zmm11\n\t" /* P[3] */ | ||
355 | "vpxorq %%zmm12,%%zmm12,%%zmm12\n\t" /* Q[2] */ | ||
356 | "vpxorq %%zmm14,%%zmm14,%%zmm14" /* Q[3] */ | ||
357 | : | ||
358 | : "m" (raid6_avx512_constants.x1d[0])); | ||
359 | |||
360 | for (d = 0; d < bytes; d += 256) { | ||
361 | for (z = z0; z >= 0; z--) { | ||
362 | asm volatile("prefetchnta %0\n\t" | ||
363 | "prefetchnta %1\n\t" | ||
364 | "prefetchnta %2\n\t" | ||
365 | "prefetchnta %3\n\t" | ||
366 | "vpcmpgtb %%zmm4,%%zmm1,%%k1\n\t" | ||
367 | "vpcmpgtb %%zmm6,%%zmm1,%%k2\n\t" | ||
368 | "vpcmpgtb %%zmm12,%%zmm1,%%k3\n\t" | ||
369 | "vpcmpgtb %%zmm14,%%zmm1,%%k4\n\t" | ||
370 | "vpmovm2b %%k1,%%zmm5\n\t" | ||
371 | "vpmovm2b %%k2,%%zmm7\n\t" | ||
372 | "vpmovm2b %%k3,%%zmm13\n\t" | ||
373 | "vpmovm2b %%k4,%%zmm15\n\t" | ||
374 | "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t" | ||
375 | "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t" | ||
376 | "vpaddb %%zmm12,%%zmm12,%%zmm12\n\t" | ||
377 | "vpaddb %%zmm14,%%zmm14,%%zmm14\n\t" | ||
378 | "vpandq %%zmm0,%%zmm5,%%zmm5\n\t" | ||
379 | "vpandq %%zmm0,%%zmm7,%%zmm7\n\t" | ||
380 | "vpandq %%zmm0,%%zmm13,%%zmm13\n\t" | ||
381 | "vpandq %%zmm0,%%zmm15,%%zmm15\n\t" | ||
382 | "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t" | ||
383 | "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t" | ||
384 | "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t" | ||
385 | "vpxorq %%zmm15,%%zmm14,%%zmm14\n\t" | ||
386 | "vmovdqa64 %0,%%zmm5\n\t" | ||
387 | "vmovdqa64 %1,%%zmm7\n\t" | ||
388 | "vmovdqa64 %2,%%zmm13\n\t" | ||
389 | "vmovdqa64 %3,%%zmm15\n\t" | ||
390 | "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t" | ||
391 | "vpxorq %%zmm7,%%zmm3,%%zmm3\n\t" | ||
392 | "vpxorq %%zmm13,%%zmm10,%%zmm10\n\t" | ||
393 | "vpxorq %%zmm15,%%zmm11,%%zmm11\n" | ||
394 | "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t" | ||
395 | "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t" | ||
396 | "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t" | ||
397 | "vpxorq %%zmm15,%%zmm14,%%zmm14" | ||
398 | : | ||
399 | : "m" (dptr[z][d]), "m" (dptr[z][d+64]), | ||
400 | "m" (dptr[z][d+128]), "m" (dptr[z][d+192])); | ||
401 | } | ||
402 | asm volatile("vmovntdq %%zmm2,%0\n\t" | ||
403 | "vpxorq %%zmm2,%%zmm2,%%zmm2\n\t" | ||
404 | "vmovntdq %%zmm3,%1\n\t" | ||
405 | "vpxorq %%zmm3,%%zmm3,%%zmm3\n\t" | ||
406 | "vmovntdq %%zmm10,%2\n\t" | ||
407 | "vpxorq %%zmm10,%%zmm10,%%zmm10\n\t" | ||
408 | "vmovntdq %%zmm11,%3\n\t" | ||
409 | "vpxorq %%zmm11,%%zmm11,%%zmm11\n\t" | ||
410 | "vmovntdq %%zmm4,%4\n\t" | ||
411 | "vpxorq %%zmm4,%%zmm4,%%zmm4\n\t" | ||
412 | "vmovntdq %%zmm6,%5\n\t" | ||
413 | "vpxorq %%zmm6,%%zmm6,%%zmm6\n\t" | ||
414 | "vmovntdq %%zmm12,%6\n\t" | ||
415 | "vpxorq %%zmm12,%%zmm12,%%zmm12\n\t" | ||
416 | "vmovntdq %%zmm14,%7\n\t" | ||
417 | "vpxorq %%zmm14,%%zmm14,%%zmm14" | ||
418 | : | ||
419 | : "m" (p[d]), "m" (p[d+64]), "m" (p[d+128]), | ||
420 | "m" (p[d+192]), "m" (q[d]), "m" (q[d+64]), | ||
421 | "m" (q[d+128]), "m" (q[d+192])); | ||
422 | } | ||
423 | |||
424 | asm volatile("sfence" : : : "memory"); | ||
425 | kernel_fpu_end(); | ||
426 | } | ||
427 | |||
428 | static void raid6_avx5124_xor_syndrome(int disks, int start, int stop, | ||
429 | size_t bytes, void **ptrs) | ||
430 | { | ||
431 | u8 **dptr = (u8 **)ptrs; | ||
432 | u8 *p, *q; | ||
433 | int d, z, z0; | ||
434 | |||
435 | z0 = stop; /* P/Q right side optimization */ | ||
436 | p = dptr[disks-2]; /* XOR parity */ | ||
437 | q = dptr[disks-1]; /* RS syndrome */ | ||
438 | |||
439 | kernel_fpu_begin(); | ||
440 | |||
441 | asm volatile("vmovdqa64 %0,%%zmm0" | ||
442 | :: "m" (raid6_avx512_constants.x1d[0])); | ||
443 | |||
444 | for (d = 0 ; d < bytes ; d += 256) { | ||
445 | asm volatile("vmovdqa64 %0,%%zmm4\n\t" | ||
446 | "vmovdqa64 %1,%%zmm6\n\t" | ||
447 | "vmovdqa64 %2,%%zmm12\n\t" | ||
448 | "vmovdqa64 %3,%%zmm14\n\t" | ||
449 | "vmovdqa64 %4,%%zmm2\n\t" | ||
450 | "vmovdqa64 %5,%%zmm3\n\t" | ||
451 | "vmovdqa64 %6,%%zmm10\n\t" | ||
452 | "vmovdqa64 %7,%%zmm11\n\t" | ||
453 | "vpxorq %%zmm4,%%zmm2,%%zmm2\n\t" | ||
454 | "vpxorq %%zmm6,%%zmm3,%%zmm3\n\t" | ||
455 | "vpxorq %%zmm12,%%zmm10,%%zmm10\n\t" | ||
456 | "vpxorq %%zmm14,%%zmm11,%%zmm11" | ||
457 | : | ||
458 | : "m" (dptr[z0][d]), "m" (dptr[z0][d+64]), | ||
459 | "m" (dptr[z0][d+128]), "m" (dptr[z0][d+192]), | ||
460 | "m" (p[d]), "m" (p[d+64]), "m" (p[d+128]), | ||
461 | "m" (p[d+192])); | ||
462 | /* P/Q data pages */ | ||
463 | for (z = z0-1 ; z >= start ; z--) { | ||
464 | asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t" | ||
465 | "vpxorq %%zmm7,%%zmm7,%%zmm7\n\t" | ||
466 | "vpxorq %%zmm13,%%zmm13,%%zmm13\n\t" | ||
467 | "vpxorq %%zmm15,%%zmm15,%%zmm15\n\t" | ||
468 | "prefetchnta %0\n\t" | ||
469 | "prefetchnta %2\n\t" | ||
470 | "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t" | ||
471 | "vpcmpgtb %%zmm6,%%zmm7,%%k2\n\t" | ||
472 | "vpcmpgtb %%zmm12,%%zmm13,%%k3\n\t" | ||
473 | "vpcmpgtb %%zmm14,%%zmm15,%%k4\n\t" | ||
474 | "vpmovm2b %%k1,%%zmm5\n\t" | ||
475 | "vpmovm2b %%k2,%%zmm7\n\t" | ||
476 | "vpmovm2b %%k3,%%zmm13\n\t" | ||
477 | "vpmovm2b %%k4,%%zmm15\n\t" | ||
478 | "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t" | ||
479 | "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t" | ||
480 | "vpaddb %%zmm12,%%zmm12,%%zmm12\n\t" | ||
481 | "vpaddb %%Zmm14,%%zmm14,%%zmm14\n\t" | ||
482 | "vpandq %%zmm0,%%zmm5,%%zmm5\n\t" | ||
483 | "vpandq %%zmm0,%%zmm7,%%zmm7\n\t" | ||
484 | "vpandq %%zmm0,%%zmm13,%%zmm13\n\t" | ||
485 | "vpandq %%zmm0,%%zmm15,%%zmm15\n\t" | ||
486 | "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t" | ||
487 | "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t" | ||
488 | "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t" | ||
489 | "vpxorq %%zmm15,%%zmm14,%%zmm14\n\t" | ||
490 | "vmovdqa64 %0,%%zmm5\n\t" | ||
491 | "vmovdqa64 %1,%%zmm7\n\t" | ||
492 | "vmovdqa64 %2,%%zmm13\n\t" | ||
493 | "vmovdqa64 %3,%%zmm15\n\t" | ||
494 | "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t" | ||
495 | "vpxorq %%zmm7,%%zmm3,%%zmm3\n\t" | ||
496 | "vpxorq %%zmm13,%%zmm10,%%zmm10\n\t" | ||
497 | "vpxorq %%zmm15,%%zmm11,%%zmm11\n\t" | ||
498 | "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t" | ||
499 | "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t" | ||
500 | "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t" | ||
501 | "vpxorq %%zmm15,%%zmm14,%%zmm14" | ||
502 | : | ||
503 | : "m" (dptr[z][d]), "m" (dptr[z][d+64]), | ||
504 | "m" (dptr[z][d+128]), | ||
505 | "m" (dptr[z][d+192])); | ||
506 | } | ||
507 | asm volatile("prefetchnta %0\n\t" | ||
508 | "prefetchnta %1\n\t" | ||
509 | : | ||
510 | : "m" (q[d]), "m" (q[d+128])); | ||
511 | /* P/Q left side optimization */ | ||
512 | for (z = start-1 ; z >= 0 ; z--) { | ||
513 | asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t" | ||
514 | "vpxorq %%zmm7,%%zmm7,%%zmm7\n\t" | ||
515 | "vpxorq %%zmm13,%%zmm13,%%zmm13\n\t" | ||
516 | "vpxorq %%zmm15,%%zmm15,%%zmm15\n\t" | ||
517 | "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t" | ||
518 | "vpcmpgtb %%zmm6,%%zmm7,%%k2\n\t" | ||
519 | "vpcmpgtb %%zmm12,%%zmm13,%%k3\n\t" | ||
520 | "vpcmpgtb %%zmm14,%%zmm15,%%k4\n\t" | ||
521 | "vpmovm2b %%k1,%%zmm5\n\t" | ||
522 | "vpmovm2b %%k2,%%zmm7\n\t" | ||
523 | "vpmovm2b %%k3,%%zmm13\n\t" | ||
524 | "vpmovm2b %%k4,%%zmm15\n\t" | ||
525 | "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t" | ||
526 | "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t" | ||
527 | "vpaddb %%zmm12,%%zmm12,%%zmm12\n\t" | ||
528 | "vpaddb %%zmm14,%%zmm14,%%zmm14\n\t" | ||
529 | "vpandq %%zmm0,%%zmm5,%%zmm5\n\t" | ||
530 | "vpandq %%zmm0,%%zmm7,%%zmm7\n\t" | ||
531 | "vpandq %%zmm0,%%zmm13,%%zmm13\n\t" | ||
532 | "vpandq %%zmm0,%%zmm15,%%zmm15\n\t" | ||
533 | "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t" | ||
534 | "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t" | ||
535 | "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t" | ||
536 | "vpxorq %%zmm15,%%zmm14,%%zmm14" | ||
537 | : | ||
538 | : ); | ||
539 | } | ||
540 | asm volatile("vmovntdq %%zmm2,%0\n\t" | ||
541 | "vmovntdq %%zmm3,%1\n\t" | ||
542 | "vmovntdq %%zmm10,%2\n\t" | ||
543 | "vmovntdq %%zmm11,%3\n\t" | ||
544 | "vpxorq %4,%%zmm4,%%zmm4\n\t" | ||
545 | "vpxorq %5,%%zmm6,%%zmm6\n\t" | ||
546 | "vpxorq %6,%%zmm12,%%zmm12\n\t" | ||
547 | "vpxorq %7,%%zmm14,%%zmm14\n\t" | ||
548 | "vmovntdq %%zmm4,%4\n\t" | ||
549 | "vmovntdq %%zmm6,%5\n\t" | ||
550 | "vmovntdq %%zmm12,%6\n\t" | ||
551 | "vmovntdq %%zmm14,%7" | ||
552 | : | ||
553 | : "m" (p[d]), "m" (p[d+64]), "m" (p[d+128]), | ||
554 | "m" (p[d+192]), "m" (q[d]), "m" (q[d+64]), | ||
555 | "m" (q[d+128]), "m" (q[d+192])); | ||
556 | } | ||
557 | asm volatile("sfence" : : : "memory"); | ||
558 | kernel_fpu_end(); | ||
559 | } | ||
560 | const struct raid6_calls raid6_avx512x4 = { | ||
561 | raid6_avx5124_gen_syndrome, | ||
562 | raid6_avx5124_xor_syndrome, | ||
563 | raid6_have_avx512, | ||
564 | "avx512x4", | ||
565 | 1 /* Has cache hints */ | ||
566 | }; | ||
567 | #endif | ||
568 | |||
569 | #endif /* CONFIG_AS_AVX512 */ | ||
diff --git a/lib/raid6/recov_avx512.c b/lib/raid6/recov_avx512.c new file mode 100644 index 000000000000..625aafa33b61 --- /dev/null +++ b/lib/raid6/recov_avx512.c | |||
@@ -0,0 +1,388 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2016 Intel Corporation | ||
3 | * | ||
4 | * Author: Gayatri Kammela <gayatri.kammela@intel.com> | ||
5 | * Author: Megha Dey <megha.dey@linux.intel.com> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License | ||
9 | * as published by the Free Software Foundation; version 2 | ||
10 | * of the License. | ||
11 | * | ||
12 | */ | ||
13 | |||
14 | #ifdef CONFIG_AS_AVX512 | ||
15 | |||
16 | #include <linux/raid/pq.h> | ||
17 | #include "x86.h" | ||
18 | |||
19 | static int raid6_has_avx512(void) | ||
20 | { | ||
21 | return boot_cpu_has(X86_FEATURE_AVX2) && | ||
22 | boot_cpu_has(X86_FEATURE_AVX) && | ||
23 | boot_cpu_has(X86_FEATURE_AVX512F) && | ||
24 | boot_cpu_has(X86_FEATURE_AVX512BW) && | ||
25 | boot_cpu_has(X86_FEATURE_AVX512VL) && | ||
26 | boot_cpu_has(X86_FEATURE_AVX512DQ); | ||
27 | } | ||
28 | |||
29 | static void raid6_2data_recov_avx512(int disks, size_t bytes, int faila, | ||
30 | int failb, void **ptrs) | ||
31 | { | ||
32 | u8 *p, *q, *dp, *dq; | ||
33 | const u8 *pbmul; /* P multiplier table for B data */ | ||
34 | const u8 *qmul; /* Q multiplier table (for both) */ | ||
35 | const u8 x0f = 0x0f; | ||
36 | |||
37 | p = (u8 *)ptrs[disks-2]; | ||
38 | q = (u8 *)ptrs[disks-1]; | ||
39 | |||
40 | /* | ||
41 | * Compute syndrome with zero for the missing data pages | ||
42 | * Use the dead data pages as temporary storage for | ||
43 | * delta p and delta q | ||
44 | */ | ||
45 | |||
46 | dp = (u8 *)ptrs[faila]; | ||
47 | ptrs[faila] = (void *)raid6_empty_zero_page; | ||
48 | ptrs[disks-2] = dp; | ||
49 | dq = (u8 *)ptrs[failb]; | ||
50 | ptrs[failb] = (void *)raid6_empty_zero_page; | ||
51 | ptrs[disks-1] = dq; | ||
52 | |||
53 | raid6_call.gen_syndrome(disks, bytes, ptrs); | ||
54 | |||
55 | /* Restore pointer table */ | ||
56 | ptrs[faila] = dp; | ||
57 | ptrs[failb] = dq; | ||
58 | ptrs[disks-2] = p; | ||
59 | ptrs[disks-1] = q; | ||
60 | |||
61 | /* Now, pick the proper data tables */ | ||
62 | pbmul = raid6_vgfmul[raid6_gfexi[failb-faila]]; | ||
63 | qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^ | ||
64 | raid6_gfexp[failb]]]; | ||
65 | |||
66 | kernel_fpu_begin(); | ||
67 | |||
68 | /* zmm0 = x0f[16] */ | ||
69 | asm volatile("vpbroadcastb %0, %%zmm7" : : "m" (x0f)); | ||
70 | |||
71 | while (bytes) { | ||
72 | #ifdef CONFIG_X86_64 | ||
73 | asm volatile("vmovdqa64 %0, %%zmm1\n\t" | ||
74 | "vmovdqa64 %1, %%zmm9\n\t" | ||
75 | "vmovdqa64 %2, %%zmm0\n\t" | ||
76 | "vmovdqa64 %3, %%zmm8\n\t" | ||
77 | "vpxorq %4, %%zmm1, %%zmm1\n\t" | ||
78 | "vpxorq %5, %%zmm9, %%zmm9\n\t" | ||
79 | "vpxorq %6, %%zmm0, %%zmm0\n\t" | ||
80 | "vpxorq %7, %%zmm8, %%zmm8" | ||
81 | : | ||
82 | : "m" (q[0]), "m" (q[64]), "m" (p[0]), | ||
83 | "m" (p[64]), "m" (dq[0]), "m" (dq[64]), | ||
84 | "m" (dp[0]), "m" (dp[64])); | ||
85 | |||
86 | /* | ||
87 | * 1 = dq[0] ^ q[0] | ||
88 | * 9 = dq[64] ^ q[64] | ||
89 | * 0 = dp[0] ^ p[0] | ||
90 | * 8 = dp[64] ^ p[64] | ||
91 | */ | ||
92 | |||
93 | asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t" | ||
94 | "vbroadcasti64x2 %1, %%zmm5" | ||
95 | : | ||
96 | : "m" (qmul[0]), "m" (qmul[16])); | ||
97 | |||
98 | asm volatile("vpsraw $4, %%zmm1, %%zmm3\n\t" | ||
99 | "vpsraw $4, %%zmm9, %%zmm12\n\t" | ||
100 | "vpandq %%zmm7, %%zmm1, %%zmm1\n\t" | ||
101 | "vpandq %%zmm7, %%zmm9, %%zmm9\n\t" | ||
102 | "vpandq %%zmm7, %%zmm3, %%zmm3\n\t" | ||
103 | "vpandq %%zmm7, %%zmm12, %%zmm12\n\t" | ||
104 | "vpshufb %%zmm9, %%zmm4, %%zmm14\n\t" | ||
105 | "vpshufb %%zmm1, %%zmm4, %%zmm4\n\t" | ||
106 | "vpshufb %%zmm12, %%zmm5, %%zmm15\n\t" | ||
107 | "vpshufb %%zmm3, %%zmm5, %%zmm5\n\t" | ||
108 | "vpxorq %%zmm14, %%zmm15, %%zmm15\n\t" | ||
109 | "vpxorq %%zmm4, %%zmm5, %%zmm5" | ||
110 | : | ||
111 | : ); | ||
112 | |||
113 | /* | ||
114 | * 5 = qx[0] | ||
115 | * 15 = qx[64] | ||
116 | */ | ||
117 | |||
118 | asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t" | ||
119 | "vbroadcasti64x2 %1, %%zmm1\n\t" | ||
120 | "vpsraw $4, %%zmm0, %%zmm2\n\t" | ||
121 | "vpsraw $4, %%zmm8, %%zmm6\n\t" | ||
122 | "vpandq %%zmm7, %%zmm0, %%zmm3\n\t" | ||
123 | "vpandq %%zmm7, %%zmm8, %%zmm14\n\t" | ||
124 | "vpandq %%zmm7, %%zmm2, %%zmm2\n\t" | ||
125 | "vpandq %%zmm7, %%zmm6, %%zmm6\n\t" | ||
126 | "vpshufb %%zmm14, %%zmm4, %%zmm12\n\t" | ||
127 | "vpshufb %%zmm3, %%zmm4, %%zmm4\n\t" | ||
128 | "vpshufb %%zmm6, %%zmm1, %%zmm13\n\t" | ||
129 | "vpshufb %%zmm2, %%zmm1, %%zmm1\n\t" | ||
130 | "vpxorq %%zmm4, %%zmm1, %%zmm1\n\t" | ||
131 | "vpxorq %%zmm12, %%zmm13, %%zmm13" | ||
132 | : | ||
133 | : "m" (pbmul[0]), "m" (pbmul[16])); | ||
134 | |||
135 | /* | ||
136 | * 1 = pbmul[px[0]] | ||
137 | * 13 = pbmul[px[64]] | ||
138 | */ | ||
139 | asm volatile("vpxorq %%zmm5, %%zmm1, %%zmm1\n\t" | ||
140 | "vpxorq %%zmm15, %%zmm13, %%zmm13" | ||
141 | : | ||
142 | : ); | ||
143 | |||
144 | /* | ||
145 | * 1 = db = DQ | ||
146 | * 13 = db[64] = DQ[64] | ||
147 | */ | ||
148 | asm volatile("vmovdqa64 %%zmm1, %0\n\t" | ||
149 | "vmovdqa64 %%zmm13,%1\n\t" | ||
150 | "vpxorq %%zmm1, %%zmm0, %%zmm0\n\t" | ||
151 | "vpxorq %%zmm13, %%zmm8, %%zmm8" | ||
152 | : | ||
153 | : "m" (dq[0]), "m" (dq[64])); | ||
154 | |||
155 | asm volatile("vmovdqa64 %%zmm0, %0\n\t" | ||
156 | "vmovdqa64 %%zmm8, %1" | ||
157 | : | ||
158 | : "m" (dp[0]), "m" (dp[64])); | ||
159 | |||
160 | bytes -= 128; | ||
161 | p += 128; | ||
162 | q += 128; | ||
163 | dp += 128; | ||
164 | dq += 128; | ||
165 | #else | ||
166 | asm volatile("vmovdqa64 %0, %%zmm1\n\t" | ||
167 | "vmovdqa64 %1, %%zmm0\n\t" | ||
168 | "vpxorq %2, %%zmm1, %%zmm1\n\t" | ||
169 | "vpxorq %3, %%zmm0, %%zmm0" | ||
170 | : | ||
171 | : "m" (*q), "m" (*p), "m"(*dq), "m" (*dp)); | ||
172 | |||
173 | /* 1 = dq ^ q; 0 = dp ^ p */ | ||
174 | |||
175 | asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t" | ||
176 | "vbroadcasti64x2 %1, %%zmm5" | ||
177 | : | ||
178 | : "m" (qmul[0]), "m" (qmul[16])); | ||
179 | |||
180 | /* | ||
181 | * 1 = dq ^ q | ||
182 | * 3 = dq ^ p >> 4 | ||
183 | */ | ||
184 | asm volatile("vpsraw $4, %%zmm1, %%zmm3\n\t" | ||
185 | "vpandq %%zmm7, %%zmm1, %%zmm1\n\t" | ||
186 | "vpandq %%zmm7, %%zmm3, %%zmm3\n\t" | ||
187 | "vpshufb %%zmm1, %%zmm4, %%zmm4\n\t" | ||
188 | "vpshufb %%zmm3, %%zmm5, %%zmm5\n\t" | ||
189 | "vpxorq %%zmm4, %%zmm5, %%zmm5" | ||
190 | : | ||
191 | : ); | ||
192 | |||
193 | /* 5 = qx */ | ||
194 | |||
195 | asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t" | ||
196 | "vbroadcasti64x2 %1, %%zmm1" | ||
197 | : | ||
198 | : "m" (pbmul[0]), "m" (pbmul[16])); | ||
199 | |||
200 | asm volatile("vpsraw $4, %%zmm0, %%zmm2\n\t" | ||
201 | "vpandq %%zmm7, %%zmm0, %%zmm3\n\t" | ||
202 | "vpandq %%zmm7, %%zmm2, %%zmm2\n\t" | ||
203 | "vpshufb %%zmm3, %%zmm4, %%zmm4\n\t" | ||
204 | "vpshufb %%zmm2, %%zmm1, %%zmm1\n\t" | ||
205 | "vpxorq %%zmm4, %%zmm1, %%zmm1" | ||
206 | : | ||
207 | : ); | ||
208 | |||
209 | /* 1 = pbmul[px] */ | ||
210 | asm volatile("vpxorq %%zmm5, %%zmm1, %%zmm1\n\t" | ||
211 | /* 1 = db = DQ */ | ||
212 | "vmovdqa64 %%zmm1, %0\n\t" | ||
213 | : | ||
214 | : "m" (dq[0])); | ||
215 | |||
216 | asm volatile("vpxorq %%zmm1, %%zmm0, %%zmm0\n\t" | ||
217 | "vmovdqa64 %%zmm0, %0" | ||
218 | : | ||
219 | : "m" (dp[0])); | ||
220 | |||
221 | bytes -= 64; | ||
222 | p += 64; | ||
223 | q += 64; | ||
224 | dp += 64; | ||
225 | dq += 64; | ||
226 | #endif | ||
227 | } | ||
228 | |||
229 | kernel_fpu_end(); | ||
230 | } | ||
231 | |||
232 | static void raid6_datap_recov_avx512(int disks, size_t bytes, int faila, | ||
233 | void **ptrs) | ||
234 | { | ||
235 | u8 *p, *q, *dq; | ||
236 | const u8 *qmul; /* Q multiplier table */ | ||
237 | const u8 x0f = 0x0f; | ||
238 | |||
239 | p = (u8 *)ptrs[disks-2]; | ||
240 | q = (u8 *)ptrs[disks-1]; | ||
241 | |||
242 | /* | ||
243 | * Compute syndrome with zero for the missing data page | ||
244 | * Use the dead data page as temporary storage for delta q | ||
245 | */ | ||
246 | |||
247 | dq = (u8 *)ptrs[faila]; | ||
248 | ptrs[faila] = (void *)raid6_empty_zero_page; | ||
249 | ptrs[disks-1] = dq; | ||
250 | |||
251 | raid6_call.gen_syndrome(disks, bytes, ptrs); | ||
252 | |||
253 | /* Restore pointer table */ | ||
254 | ptrs[faila] = dq; | ||
255 | ptrs[disks-1] = q; | ||
256 | |||
257 | /* Now, pick the proper data tables */ | ||
258 | qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]]; | ||
259 | |||
260 | kernel_fpu_begin(); | ||
261 | |||
262 | asm volatile("vpbroadcastb %0, %%zmm7" : : "m" (x0f)); | ||
263 | |||
264 | while (bytes) { | ||
265 | #ifdef CONFIG_X86_64 | ||
266 | asm volatile("vmovdqa64 %0, %%zmm3\n\t" | ||
267 | "vmovdqa64 %1, %%zmm8\n\t" | ||
268 | "vpxorq %2, %%zmm3, %%zmm3\n\t" | ||
269 | "vpxorq %3, %%zmm8, %%zmm8" | ||
270 | : | ||
271 | : "m" (dq[0]), "m" (dq[64]), "m" (q[0]), | ||
272 | "m" (q[64])); | ||
273 | |||
274 | /* | ||
275 | * 3 = q[0] ^ dq[0] | ||
276 | * 8 = q[64] ^ dq[64] | ||
277 | */ | ||
278 | asm volatile("vbroadcasti64x2 %0, %%zmm0\n\t" | ||
279 | "vmovapd %%zmm0, %%zmm13\n\t" | ||
280 | "vbroadcasti64x2 %1, %%zmm1\n\t" | ||
281 | "vmovapd %%zmm1, %%zmm14" | ||
282 | : | ||
283 | : "m" (qmul[0]), "m" (qmul[16])); | ||
284 | |||
285 | asm volatile("vpsraw $4, %%zmm3, %%zmm6\n\t" | ||
286 | "vpsraw $4, %%zmm8, %%zmm12\n\t" | ||
287 | "vpandq %%zmm7, %%zmm3, %%zmm3\n\t" | ||
288 | "vpandq %%zmm7, %%zmm8, %%zmm8\n\t" | ||
289 | "vpandq %%zmm7, %%zmm6, %%zmm6\n\t" | ||
290 | "vpandq %%zmm7, %%zmm12, %%zmm12\n\t" | ||
291 | "vpshufb %%zmm3, %%zmm0, %%zmm0\n\t" | ||
292 | "vpshufb %%zmm8, %%zmm13, %%zmm13\n\t" | ||
293 | "vpshufb %%zmm6, %%zmm1, %%zmm1\n\t" | ||
294 | "vpshufb %%zmm12, %%zmm14, %%zmm14\n\t" | ||
295 | "vpxorq %%zmm0, %%zmm1, %%zmm1\n\t" | ||
296 | "vpxorq %%zmm13, %%zmm14, %%zmm14" | ||
297 | : | ||
298 | : ); | ||
299 | |||
300 | /* | ||
301 | * 1 = qmul[q[0] ^ dq[0]] | ||
302 | * 14 = qmul[q[64] ^ dq[64]] | ||
303 | */ | ||
304 | asm volatile("vmovdqa64 %0, %%zmm2\n\t" | ||
305 | "vmovdqa64 %1, %%zmm12\n\t" | ||
306 | "vpxorq %%zmm1, %%zmm2, %%zmm2\n\t" | ||
307 | "vpxorq %%zmm14, %%zmm12, %%zmm12" | ||
308 | : | ||
309 | : "m" (p[0]), "m" (p[64])); | ||
310 | |||
311 | /* | ||
312 | * 2 = p[0] ^ qmul[q[0] ^ dq[0]] | ||
313 | * 12 = p[64] ^ qmul[q[64] ^ dq[64]] | ||
314 | */ | ||
315 | |||
316 | asm volatile("vmovdqa64 %%zmm1, %0\n\t" | ||
317 | "vmovdqa64 %%zmm14, %1\n\t" | ||
318 | "vmovdqa64 %%zmm2, %2\n\t" | ||
319 | "vmovdqa64 %%zmm12,%3" | ||
320 | : | ||
321 | : "m" (dq[0]), "m" (dq[64]), "m" (p[0]), | ||
322 | "m" (p[64])); | ||
323 | |||
324 | bytes -= 128; | ||
325 | p += 128; | ||
326 | q += 128; | ||
327 | dq += 128; | ||
328 | #else | ||
329 | asm volatile("vmovdqa64 %0, %%zmm3\n\t" | ||
330 | "vpxorq %1, %%zmm3, %%zmm3" | ||
331 | : | ||
332 | : "m" (dq[0]), "m" (q[0])); | ||
333 | |||
334 | /* 3 = q ^ dq */ | ||
335 | |||
336 | asm volatile("vbroadcasti64x2 %0, %%zmm0\n\t" | ||
337 | "vbroadcasti64x2 %1, %%zmm1" | ||
338 | : | ||
339 | : "m" (qmul[0]), "m" (qmul[16])); | ||
340 | |||
341 | asm volatile("vpsraw $4, %%zmm3, %%zmm6\n\t" | ||
342 | "vpandq %%zmm7, %%zmm3, %%zmm3\n\t" | ||
343 | "vpandq %%zmm7, %%zmm6, %%zmm6\n\t" | ||
344 | "vpshufb %%zmm3, %%zmm0, %%zmm0\n\t" | ||
345 | "vpshufb %%zmm6, %%zmm1, %%zmm1\n\t" | ||
346 | "vpxorq %%zmm0, %%zmm1, %%zmm1" | ||
347 | : | ||
348 | : ); | ||
349 | |||
350 | /* 1 = qmul[q ^ dq] */ | ||
351 | |||
352 | asm volatile("vmovdqa64 %0, %%zmm2\n\t" | ||
353 | "vpxorq %%zmm1, %%zmm2, %%zmm2" | ||
354 | : | ||
355 | : "m" (p[0])); | ||
356 | |||
357 | /* 2 = p ^ qmul[q ^ dq] */ | ||
358 | |||
359 | asm volatile("vmovdqa64 %%zmm1, %0\n\t" | ||
360 | "vmovdqa64 %%zmm2, %1" | ||
361 | : | ||
362 | : "m" (dq[0]), "m" (p[0])); | ||
363 | |||
364 | bytes -= 64; | ||
365 | p += 64; | ||
366 | q += 64; | ||
367 | dq += 64; | ||
368 | #endif | ||
369 | } | ||
370 | |||
371 | kernel_fpu_end(); | ||
372 | } | ||
373 | |||
374 | const struct raid6_recov_calls raid6_recov_avx512 = { | ||
375 | .data2 = raid6_2data_recov_avx512, | ||
376 | .datap = raid6_datap_recov_avx512, | ||
377 | .valid = raid6_has_avx512, | ||
378 | #ifdef CONFIG_X86_64 | ||
379 | .name = "avx512x2", | ||
380 | #else | ||
381 | .name = "avx512x1", | ||
382 | #endif | ||
383 | .priority = 3, | ||
384 | }; | ||
385 | |||
386 | #else | ||
387 | #warning "your version of binutils lacks AVX512 support" | ||
388 | #endif | ||
diff --git a/lib/raid6/recov_s390xc.c b/lib/raid6/recov_s390xc.c new file mode 100644 index 000000000000..b042dac826cc --- /dev/null +++ b/lib/raid6/recov_s390xc.c | |||
@@ -0,0 +1,116 @@ | |||
1 | /* | ||
2 | * RAID-6 data recovery in dual failure mode based on the XC instruction. | ||
3 | * | ||
4 | * Copyright IBM Corp. 2016 | ||
5 | * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> | ||
6 | */ | ||
7 | |||
8 | #include <linux/export.h> | ||
9 | #include <linux/raid/pq.h> | ||
10 | |||
11 | static inline void xor_block(u8 *p1, u8 *p2) | ||
12 | { | ||
13 | typedef struct { u8 _[256]; } addrtype; | ||
14 | |||
15 | asm volatile( | ||
16 | " xc 0(256,%[p1]),0(%[p2])\n" | ||
17 | : "+m" (*(addrtype *) p1) : "m" (*(addrtype *) p2), | ||
18 | [p1] "a" (p1), [p2] "a" (p2) : "cc"); | ||
19 | } | ||
20 | |||
21 | /* Recover two failed data blocks. */ | ||
22 | static void raid6_2data_recov_s390xc(int disks, size_t bytes, int faila, | ||
23 | int failb, void **ptrs) | ||
24 | { | ||
25 | u8 *p, *q, *dp, *dq; | ||
26 | const u8 *pbmul; /* P multiplier table for B data */ | ||
27 | const u8 *qmul; /* Q multiplier table (for both) */ | ||
28 | int i; | ||
29 | |||
30 | p = (u8 *)ptrs[disks-2]; | ||
31 | q = (u8 *)ptrs[disks-1]; | ||
32 | |||
33 | /* Compute syndrome with zero for the missing data pages | ||
34 | Use the dead data pages as temporary storage for | ||
35 | delta p and delta q */ | ||
36 | dp = (u8 *)ptrs[faila]; | ||
37 | ptrs[faila] = (void *)raid6_empty_zero_page; | ||
38 | ptrs[disks-2] = dp; | ||
39 | dq = (u8 *)ptrs[failb]; | ||
40 | ptrs[failb] = (void *)raid6_empty_zero_page; | ||
41 | ptrs[disks-1] = dq; | ||
42 | |||
43 | raid6_call.gen_syndrome(disks, bytes, ptrs); | ||
44 | |||
45 | /* Restore pointer table */ | ||
46 | ptrs[faila] = dp; | ||
47 | ptrs[failb] = dq; | ||
48 | ptrs[disks-2] = p; | ||
49 | ptrs[disks-1] = q; | ||
50 | |||
51 | /* Now, pick the proper data tables */ | ||
52 | pbmul = raid6_gfmul[raid6_gfexi[failb-faila]]; | ||
53 | qmul = raid6_gfmul[raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]]]; | ||
54 | |||
55 | /* Now do it... */ | ||
56 | while (bytes) { | ||
57 | xor_block(dp, p); | ||
58 | xor_block(dq, q); | ||
59 | for (i = 0; i < 256; i++) | ||
60 | dq[i] = pbmul[dp[i]] ^ qmul[dq[i]]; | ||
61 | xor_block(dp, dq); | ||
62 | p += 256; | ||
63 | q += 256; | ||
64 | dp += 256; | ||
65 | dq += 256; | ||
66 | bytes -= 256; | ||
67 | } | ||
68 | } | ||
69 | |||
70 | /* Recover failure of one data block plus the P block */ | ||
71 | static void raid6_datap_recov_s390xc(int disks, size_t bytes, int faila, | ||
72 | void **ptrs) | ||
73 | { | ||
74 | u8 *p, *q, *dq; | ||
75 | const u8 *qmul; /* Q multiplier table */ | ||
76 | int i; | ||
77 | |||
78 | p = (u8 *)ptrs[disks-2]; | ||
79 | q = (u8 *)ptrs[disks-1]; | ||
80 | |||
81 | /* Compute syndrome with zero for the missing data page | ||
82 | Use the dead data page as temporary storage for delta q */ | ||
83 | dq = (u8 *)ptrs[faila]; | ||
84 | ptrs[faila] = (void *)raid6_empty_zero_page; | ||
85 | ptrs[disks-1] = dq; | ||
86 | |||
87 | raid6_call.gen_syndrome(disks, bytes, ptrs); | ||
88 | |||
89 | /* Restore pointer table */ | ||
90 | ptrs[faila] = dq; | ||
91 | ptrs[disks-1] = q; | ||
92 | |||
93 | /* Now, pick the proper data tables */ | ||
94 | qmul = raid6_gfmul[raid6_gfinv[raid6_gfexp[faila]]]; | ||
95 | |||
96 | /* Now do it... */ | ||
97 | while (bytes) { | ||
98 | xor_block(dq, q); | ||
99 | for (i = 0; i < 256; i++) | ||
100 | dq[i] = qmul[dq[i]]; | ||
101 | xor_block(p, dq); | ||
102 | p += 256; | ||
103 | q += 256; | ||
104 | dq += 256; | ||
105 | bytes -= 256; | ||
106 | } | ||
107 | } | ||
108 | |||
109 | |||
110 | const struct raid6_recov_calls raid6_recov_s390xc = { | ||
111 | .data2 = raid6_2data_recov_s390xc, | ||
112 | .datap = raid6_datap_recov_s390xc, | ||
113 | .valid = NULL, | ||
114 | .name = "s390xc", | ||
115 | .priority = 1, | ||
116 | }; | ||
diff --git a/lib/raid6/s390vx.uc b/lib/raid6/s390vx.uc new file mode 100644 index 000000000000..7b45191a655f --- /dev/null +++ b/lib/raid6/s390vx.uc | |||
@@ -0,0 +1,168 @@ | |||
1 | /* | ||
2 | * raid6_vx$#.c | ||
3 | * | ||
4 | * $#-way unrolled RAID6 gen/xor functions for s390 | ||
5 | * based on the vector facility | ||
6 | * | ||
7 | * Copyright IBM Corp. 2016 | ||
8 | * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> | ||
9 | * | ||
10 | * This file is postprocessed using unroll.awk. | ||
11 | */ | ||
12 | |||
13 | #include <linux/raid/pq.h> | ||
14 | #include <asm/fpu/api.h> | ||
15 | |||
16 | asm(".include \"asm/vx-insn.h\"\n"); | ||
17 | |||
18 | #define NSIZE 16 | ||
19 | |||
20 | static inline void LOAD_CONST(void) | ||
21 | { | ||
22 | asm volatile("VREPIB %v24,7"); | ||
23 | asm volatile("VREPIB %v25,0x1d"); | ||
24 | } | ||
25 | |||
26 | /* | ||
27 | * The SHLBYTE() operation shifts each of the 16 bytes in | ||
28 | * vector register y left by 1 bit and stores the result in | ||
29 | * vector register x. | ||
30 | */ | ||
31 | static inline void SHLBYTE(int x, int y) | ||
32 | { | ||
33 | asm volatile ("VAB %0,%1,%1" : : "i" (x), "i" (y)); | ||
34 | } | ||
35 | |||
36 | /* | ||
37 | * For each of the 16 bytes in the vector register y the MASK() | ||
38 | * operation returns 0xFF if the high bit of the byte is 1, | ||
39 | * or 0x00 if the high bit is 0. The result is stored in vector | ||
40 | * register x. | ||
41 | */ | ||
42 | static inline void MASK(int x, int y) | ||
43 | { | ||
44 | asm volatile ("VESRAVB %0,%1,24" : : "i" (x), "i" (y)); | ||
45 | } | ||
46 | |||
47 | static inline void AND(int x, int y, int z) | ||
48 | { | ||
49 | asm volatile ("VN %0,%1,%2" : : "i" (x), "i" (y), "i" (z)); | ||
50 | } | ||
51 | |||
52 | static inline void XOR(int x, int y, int z) | ||
53 | { | ||
54 | asm volatile ("VX %0,%1,%2" : : "i" (x), "i" (y), "i" (z)); | ||
55 | } | ||
56 | |||
57 | static inline void LOAD_DATA(int x, int n, u8 *ptr) | ||
58 | { | ||
59 | typedef struct { u8 _[16*n]; } addrtype; | ||
60 | register addrtype *__ptr asm("1") = (addrtype *) ptr; | ||
61 | |||
62 | asm volatile ("VLM %2,%3,0,%r1" | ||
63 | : : "m" (*__ptr), "a" (__ptr), "i" (x), "i" (x + n - 1)); | ||
64 | } | ||
65 | |||
66 | static inline void STORE_DATA(int x, int n, u8 *ptr) | ||
67 | { | ||
68 | typedef struct { u8 _[16*n]; } addrtype; | ||
69 | register addrtype *__ptr asm("1") = (addrtype *) ptr; | ||
70 | |||
71 | asm volatile ("VSTM %2,%3,0,1" | ||
72 | : "=m" (*__ptr) : "a" (__ptr), "i" (x), "i" (x + n - 1)); | ||
73 | } | ||
74 | |||
75 | static inline void COPY_VEC(int x, int y) | ||
76 | { | ||
77 | asm volatile ("VLR %0,%1" : : "i" (x), "i" (y)); | ||
78 | } | ||
79 | |||
80 | static void raid6_s390vx$#_gen_syndrome(int disks, size_t bytes, void **ptrs) | ||
81 | { | ||
82 | struct kernel_fpu vxstate; | ||
83 | u8 **dptr, *p, *q; | ||
84 | int d, z, z0; | ||
85 | |||
86 | kernel_fpu_begin(&vxstate, KERNEL_VXR); | ||
87 | LOAD_CONST(); | ||
88 | |||
89 | dptr = (u8 **) ptrs; | ||
90 | z0 = disks - 3; /* Highest data disk */ | ||
91 | p = dptr[z0 + 1]; /* XOR parity */ | ||
92 | q = dptr[z0 + 2]; /* RS syndrome */ | ||
93 | |||
94 | for (d = 0; d < bytes; d += $#*NSIZE) { | ||
95 | LOAD_DATA(0,$#,&dptr[z0][d]); | ||
96 | COPY_VEC(8+$$,0+$$); | ||
97 | for (z = z0 - 1; z >= 0; z--) { | ||
98 | MASK(16+$$,8+$$); | ||
99 | AND(16+$$,16+$$,25); | ||
100 | SHLBYTE(8+$$,8+$$); | ||
101 | XOR(8+$$,8+$$,16+$$); | ||
102 | LOAD_DATA(16,$#,&dptr[z][d]); | ||
103 | XOR(0+$$,0+$$,16+$$); | ||
104 | XOR(8+$$,8+$$,16+$$); | ||
105 | } | ||
106 | STORE_DATA(0,$#,&p[d]); | ||
107 | STORE_DATA(8,$#,&q[d]); | ||
108 | } | ||
109 | kernel_fpu_end(&vxstate, KERNEL_VXR); | ||
110 | } | ||
111 | |||
112 | static void raid6_s390vx$#_xor_syndrome(int disks, int start, int stop, | ||
113 | size_t bytes, void **ptrs) | ||
114 | { | ||
115 | struct kernel_fpu vxstate; | ||
116 | u8 **dptr, *p, *q; | ||
117 | int d, z, z0; | ||
118 | |||
119 | dptr = (u8 **) ptrs; | ||
120 | z0 = stop; /* P/Q right side optimization */ | ||
121 | p = dptr[disks - 2]; /* XOR parity */ | ||
122 | q = dptr[disks - 1]; /* RS syndrome */ | ||
123 | |||
124 | kernel_fpu_begin(&vxstate, KERNEL_VXR); | ||
125 | LOAD_CONST(); | ||
126 | |||
127 | for (d = 0; d < bytes; d += $#*NSIZE) { | ||
128 | /* P/Q data pages */ | ||
129 | LOAD_DATA(0,$#,&dptr[z0][d]); | ||
130 | COPY_VEC(8+$$,0+$$); | ||
131 | for (z = z0 - 1; z >= start; z--) { | ||
132 | MASK(16+$$,8+$$); | ||
133 | AND(16+$$,16+$$,25); | ||
134 | SHLBYTE(8+$$,8+$$); | ||
135 | XOR(8+$$,8+$$,16+$$); | ||
136 | LOAD_DATA(16,$#,&dptr[z][d]); | ||
137 | XOR(0+$$,0+$$,16+$$); | ||
138 | XOR(8+$$,8+$$,16+$$); | ||
139 | } | ||
140 | /* P/Q left side optimization */ | ||
141 | for (z = start - 1; z >= 0; z--) { | ||
142 | MASK(16+$$,8+$$); | ||
143 | AND(16+$$,16+$$,25); | ||
144 | SHLBYTE(8+$$,8+$$); | ||
145 | XOR(8+$$,8+$$,16+$$); | ||
146 | } | ||
147 | LOAD_DATA(16,$#,&p[d]); | ||
148 | XOR(16+$$,16+$$,0+$$); | ||
149 | STORE_DATA(16,$#,&p[d]); | ||
150 | LOAD_DATA(16,$#,&q[d]); | ||
151 | XOR(16+$$,16+$$,8+$$); | ||
152 | STORE_DATA(16,$#,&q[d]); | ||
153 | } | ||
154 | kernel_fpu_end(&vxstate, KERNEL_VXR); | ||
155 | } | ||
156 | |||
157 | static int raid6_s390vx$#_valid(void) | ||
158 | { | ||
159 | return MACHINE_HAS_VX; | ||
160 | } | ||
161 | |||
162 | const struct raid6_calls raid6_s390vx$# = { | ||
163 | raid6_s390vx$#_gen_syndrome, | ||
164 | raid6_s390vx$#_xor_syndrome, | ||
165 | raid6_s390vx$#_valid, | ||
166 | "vx128x$#", | ||
167 | 1 | ||
168 | }; | ||
diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile index 29090f3db677..2c7b60edea04 100644 --- a/lib/raid6/test/Makefile +++ b/lib/raid6/test/Makefile | |||
@@ -32,10 +32,13 @@ ifeq ($(ARCH),arm64) | |||
32 | endif | 32 | endif |
33 | 33 | ||
34 | ifeq ($(IS_X86),yes) | 34 | ifeq ($(IS_X86),yes) |
35 | OBJS += mmx.o sse1.o sse2.o avx2.o recov_ssse3.o recov_avx2.o | 35 | OBJS += mmx.o sse1.o sse2.o avx2.o recov_ssse3.o recov_avx2.o avx512.o recov_avx512.o |
36 | CFLAGS += $(shell echo "vpbroadcastb %xmm0, %ymm1" | \ | 36 | CFLAGS += $(shell echo "vpbroadcastb %xmm0, %ymm1" | \ |
37 | gcc -c -x assembler - >&/dev/null && \ | 37 | gcc -c -x assembler - >&/dev/null && \ |
38 | rm ./-.o && echo -DCONFIG_AS_AVX2=1) | 38 | rm ./-.o && echo -DCONFIG_AS_AVX2=1) |
39 | CFLAGS += $(shell echo "vpmovm2b %k1, %zmm5" | \ | ||
40 | gcc -c -x assembler - >&/dev/null && \ | ||
41 | rm ./-.o && echo -DCONFIG_AS_AVX512=1) | ||
39 | else ifeq ($(HAS_NEON),yes) | 42 | else ifeq ($(HAS_NEON),yes) |
40 | OBJS += neon.o neon1.o neon2.o neon4.o neon8.o | 43 | OBJS += neon.o neon1.o neon2.o neon4.o neon8.o |
41 | CFLAGS += -DCONFIG_KERNEL_MODE_NEON=1 | 44 | CFLAGS += -DCONFIG_KERNEL_MODE_NEON=1 |
diff --git a/lib/raid6/test/test.c b/lib/raid6/test/test.c index 3bebbabdb510..b07f4d8e6b03 100644 --- a/lib/raid6/test/test.c +++ b/lib/raid6/test/test.c | |||
@@ -21,12 +21,13 @@ | |||
21 | 21 | ||
22 | #define NDISKS 16 /* Including P and Q */ | 22 | #define NDISKS 16 /* Including P and Q */ |
23 | 23 | ||
24 | const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256))); | 24 | const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(PAGE_SIZE))); |
25 | struct raid6_calls raid6_call; | 25 | struct raid6_calls raid6_call; |
26 | 26 | ||
27 | char *dataptrs[NDISKS]; | 27 | char *dataptrs[NDISKS]; |
28 | char data[NDISKS][PAGE_SIZE]; | 28 | char data[NDISKS][PAGE_SIZE] __attribute__((aligned(PAGE_SIZE))); |
29 | char recovi[PAGE_SIZE], recovj[PAGE_SIZE]; | 29 | char recovi[PAGE_SIZE] __attribute__((aligned(PAGE_SIZE))); |
30 | char recovj[PAGE_SIZE] __attribute__((aligned(PAGE_SIZE))); | ||
30 | 31 | ||
31 | static void makedata(int start, int stop) | 32 | static void makedata(int start, int stop) |
32 | { | 33 | { |
diff --git a/lib/raid6/x86.h b/lib/raid6/x86.h index 8fe9d9662abb..834d268a4b05 100644 --- a/lib/raid6/x86.h +++ b/lib/raid6/x86.h | |||
@@ -46,6 +46,16 @@ static inline void kernel_fpu_end(void) | |||
46 | #define X86_FEATURE_SSSE3 (4*32+ 9) /* Supplemental SSE-3 */ | 46 | #define X86_FEATURE_SSSE3 (4*32+ 9) /* Supplemental SSE-3 */ |
47 | #define X86_FEATURE_AVX (4*32+28) /* Advanced Vector Extensions */ | 47 | #define X86_FEATURE_AVX (4*32+28) /* Advanced Vector Extensions */ |
48 | #define X86_FEATURE_AVX2 (9*32+ 5) /* AVX2 instructions */ | 48 | #define X86_FEATURE_AVX2 (9*32+ 5) /* AVX2 instructions */ |
49 | #define X86_FEATURE_AVX512F (9*32+16) /* AVX-512 Foundation */ | ||
50 | #define X86_FEATURE_AVX512DQ (9*32+17) /* AVX-512 DQ (Double/Quad granular) | ||
51 | * Instructions | ||
52 | */ | ||
53 | #define X86_FEATURE_AVX512BW (9*32+30) /* AVX-512 BW (Byte/Word granular) | ||
54 | * Instructions | ||
55 | */ | ||
56 | #define X86_FEATURE_AVX512VL (9*32+31) /* AVX-512 VL (128/256 Vector Length) | ||
57 | * Extensions | ||
58 | */ | ||
49 | #define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */ | 59 | #define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */ |
50 | 60 | ||
51 | /* Should work well enough on modern CPUs for testing */ | 61 | /* Should work well enough on modern CPUs for testing */ |
diff --git a/lib/random32.c b/lib/random32.c index 69ed593aab07..fa594b1140e6 100644 --- a/lib/random32.c +++ b/lib/random32.c | |||
@@ -47,7 +47,7 @@ static inline void prandom_state_selftest(void) | |||
47 | } | 47 | } |
48 | #endif | 48 | #endif |
49 | 49 | ||
50 | static DEFINE_PER_CPU(struct rnd_state, net_rand_state); | 50 | static DEFINE_PER_CPU(struct rnd_state, net_rand_state) __latent_entropy; |
51 | 51 | ||
52 | /** | 52 | /** |
53 | * prandom_u32_state - seeded pseudo-random number generator. | 53 | * prandom_u32_state - seeded pseudo-random number generator. |
@@ -81,7 +81,7 @@ u32 prandom_u32(void) | |||
81 | u32 res; | 81 | u32 res; |
82 | 82 | ||
83 | res = prandom_u32_state(state); | 83 | res = prandom_u32_state(state); |
84 | put_cpu_var(state); | 84 | put_cpu_var(net_rand_state); |
85 | 85 | ||
86 | return res; | 86 | return res; |
87 | } | 87 | } |
@@ -128,7 +128,7 @@ void prandom_bytes(void *buf, size_t bytes) | |||
128 | struct rnd_state *state = &get_cpu_var(net_rand_state); | 128 | struct rnd_state *state = &get_cpu_var(net_rand_state); |
129 | 129 | ||
130 | prandom_bytes_state(state, buf, bytes); | 130 | prandom_bytes_state(state, buf, bytes); |
131 | put_cpu_var(state); | 131 | put_cpu_var(net_rand_state); |
132 | } | 132 | } |
133 | EXPORT_SYMBOL(prandom_bytes); | 133 | EXPORT_SYMBOL(prandom_bytes); |
134 | 134 | ||
diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 56054e541a0f..32d0ad058380 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c | |||
@@ -378,22 +378,8 @@ static void rht_deferred_worker(struct work_struct *work) | |||
378 | schedule_work(&ht->run_work); | 378 | schedule_work(&ht->run_work); |
379 | } | 379 | } |
380 | 380 | ||
381 | static bool rhashtable_check_elasticity(struct rhashtable *ht, | 381 | static int rhashtable_insert_rehash(struct rhashtable *ht, |
382 | struct bucket_table *tbl, | 382 | struct bucket_table *tbl) |
383 | unsigned int hash) | ||
384 | { | ||
385 | unsigned int elasticity = ht->elasticity; | ||
386 | struct rhash_head *head; | ||
387 | |||
388 | rht_for_each(head, tbl, hash) | ||
389 | if (!--elasticity) | ||
390 | return true; | ||
391 | |||
392 | return false; | ||
393 | } | ||
394 | |||
395 | int rhashtable_insert_rehash(struct rhashtable *ht, | ||
396 | struct bucket_table *tbl) | ||
397 | { | 383 | { |
398 | struct bucket_table *old_tbl; | 384 | struct bucket_table *old_tbl; |
399 | struct bucket_table *new_tbl; | 385 | struct bucket_table *new_tbl; |
@@ -439,61 +425,172 @@ fail: | |||
439 | 425 | ||
440 | return err; | 426 | return err; |
441 | } | 427 | } |
442 | EXPORT_SYMBOL_GPL(rhashtable_insert_rehash); | ||
443 | 428 | ||
444 | struct bucket_table *rhashtable_insert_slow(struct rhashtable *ht, | 429 | static void *rhashtable_lookup_one(struct rhashtable *ht, |
445 | const void *key, | 430 | struct bucket_table *tbl, unsigned int hash, |
446 | struct rhash_head *obj, | 431 | const void *key, struct rhash_head *obj) |
447 | struct bucket_table *tbl) | ||
448 | { | 432 | { |
433 | struct rhashtable_compare_arg arg = { | ||
434 | .ht = ht, | ||
435 | .key = key, | ||
436 | }; | ||
437 | struct rhash_head __rcu **pprev; | ||
449 | struct rhash_head *head; | 438 | struct rhash_head *head; |
450 | unsigned int hash; | 439 | int elasticity; |
451 | int err; | ||
452 | 440 | ||
453 | tbl = rhashtable_last_table(ht, tbl); | 441 | elasticity = ht->elasticity; |
454 | hash = head_hashfn(ht, tbl, obj); | 442 | pprev = &tbl->buckets[hash]; |
455 | spin_lock_nested(rht_bucket_lock(tbl, hash), SINGLE_DEPTH_NESTING); | 443 | rht_for_each(head, tbl, hash) { |
444 | struct rhlist_head *list; | ||
445 | struct rhlist_head *plist; | ||
456 | 446 | ||
457 | err = -EEXIST; | 447 | elasticity--; |
458 | if (key && rhashtable_lookup_fast(ht, key, ht->p)) | 448 | if (!key || |
459 | goto exit; | 449 | (ht->p.obj_cmpfn ? |
450 | ht->p.obj_cmpfn(&arg, rht_obj(ht, head)) : | ||
451 | rhashtable_compare(&arg, rht_obj(ht, head)))) | ||
452 | continue; | ||
460 | 453 | ||
461 | err = -E2BIG; | 454 | if (!ht->rhlist) |
462 | if (unlikely(rht_grow_above_max(ht, tbl))) | 455 | return rht_obj(ht, head); |
463 | goto exit; | 456 | |
457 | list = container_of(obj, struct rhlist_head, rhead); | ||
458 | plist = container_of(head, struct rhlist_head, rhead); | ||
459 | |||
460 | RCU_INIT_POINTER(list->next, plist); | ||
461 | head = rht_dereference_bucket(head->next, tbl, hash); | ||
462 | RCU_INIT_POINTER(list->rhead.next, head); | ||
463 | rcu_assign_pointer(*pprev, obj); | ||
464 | |||
465 | return NULL; | ||
466 | } | ||
467 | |||
468 | if (elasticity <= 0) | ||
469 | return ERR_PTR(-EAGAIN); | ||
470 | |||
471 | return ERR_PTR(-ENOENT); | ||
472 | } | ||
473 | |||
474 | static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht, | ||
475 | struct bucket_table *tbl, | ||
476 | unsigned int hash, | ||
477 | struct rhash_head *obj, | ||
478 | void *data) | ||
479 | { | ||
480 | struct bucket_table *new_tbl; | ||
481 | struct rhash_head *head; | ||
482 | |||
483 | if (!IS_ERR_OR_NULL(data)) | ||
484 | return ERR_PTR(-EEXIST); | ||
485 | |||
486 | if (PTR_ERR(data) != -EAGAIN && PTR_ERR(data) != -ENOENT) | ||
487 | return ERR_CAST(data); | ||
464 | 488 | ||
465 | err = -EAGAIN; | 489 | new_tbl = rcu_dereference(tbl->future_tbl); |
466 | if (rhashtable_check_elasticity(ht, tbl, hash) || | 490 | if (new_tbl) |
467 | rht_grow_above_100(ht, tbl)) | 491 | return new_tbl; |
468 | goto exit; | ||
469 | 492 | ||
470 | err = 0; | 493 | if (PTR_ERR(data) != -ENOENT) |
494 | return ERR_CAST(data); | ||
495 | |||
496 | if (unlikely(rht_grow_above_max(ht, tbl))) | ||
497 | return ERR_PTR(-E2BIG); | ||
498 | |||
499 | if (unlikely(rht_grow_above_100(ht, tbl))) | ||
500 | return ERR_PTR(-EAGAIN); | ||
471 | 501 | ||
472 | head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash); | 502 | head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash); |
473 | 503 | ||
474 | RCU_INIT_POINTER(obj->next, head); | 504 | RCU_INIT_POINTER(obj->next, head); |
505 | if (ht->rhlist) { | ||
506 | struct rhlist_head *list; | ||
507 | |||
508 | list = container_of(obj, struct rhlist_head, rhead); | ||
509 | RCU_INIT_POINTER(list->next, NULL); | ||
510 | } | ||
475 | 511 | ||
476 | rcu_assign_pointer(tbl->buckets[hash], obj); | 512 | rcu_assign_pointer(tbl->buckets[hash], obj); |
477 | 513 | ||
478 | atomic_inc(&ht->nelems); | 514 | atomic_inc(&ht->nelems); |
515 | if (rht_grow_above_75(ht, tbl)) | ||
516 | schedule_work(&ht->run_work); | ||
479 | 517 | ||
480 | exit: | 518 | return NULL; |
481 | spin_unlock(rht_bucket_lock(tbl, hash)); | 519 | } |
482 | 520 | ||
483 | if (err == 0) | 521 | static void *rhashtable_try_insert(struct rhashtable *ht, const void *key, |
484 | return NULL; | 522 | struct rhash_head *obj) |
485 | else if (err == -EAGAIN) | 523 | { |
486 | return tbl; | 524 | struct bucket_table *new_tbl; |
487 | else | 525 | struct bucket_table *tbl; |
488 | return ERR_PTR(err); | 526 | unsigned int hash; |
527 | spinlock_t *lock; | ||
528 | void *data; | ||
529 | |||
530 | tbl = rcu_dereference(ht->tbl); | ||
531 | |||
532 | /* All insertions must grab the oldest table containing | ||
533 | * the hashed bucket that is yet to be rehashed. | ||
534 | */ | ||
535 | for (;;) { | ||
536 | hash = rht_head_hashfn(ht, tbl, obj, ht->p); | ||
537 | lock = rht_bucket_lock(tbl, hash); | ||
538 | spin_lock_bh(lock); | ||
539 | |||
540 | if (tbl->rehash <= hash) | ||
541 | break; | ||
542 | |||
543 | spin_unlock_bh(lock); | ||
544 | tbl = rcu_dereference(tbl->future_tbl); | ||
545 | } | ||
546 | |||
547 | data = rhashtable_lookup_one(ht, tbl, hash, key, obj); | ||
548 | new_tbl = rhashtable_insert_one(ht, tbl, hash, obj, data); | ||
549 | if (PTR_ERR(new_tbl) != -EEXIST) | ||
550 | data = ERR_CAST(new_tbl); | ||
551 | |||
552 | while (!IS_ERR_OR_NULL(new_tbl)) { | ||
553 | tbl = new_tbl; | ||
554 | hash = rht_head_hashfn(ht, tbl, obj, ht->p); | ||
555 | spin_lock_nested(rht_bucket_lock(tbl, hash), | ||
556 | SINGLE_DEPTH_NESTING); | ||
557 | |||
558 | data = rhashtable_lookup_one(ht, tbl, hash, key, obj); | ||
559 | new_tbl = rhashtable_insert_one(ht, tbl, hash, obj, data); | ||
560 | if (PTR_ERR(new_tbl) != -EEXIST) | ||
561 | data = ERR_CAST(new_tbl); | ||
562 | |||
563 | spin_unlock(rht_bucket_lock(tbl, hash)); | ||
564 | } | ||
565 | |||
566 | spin_unlock_bh(lock); | ||
567 | |||
568 | if (PTR_ERR(data) == -EAGAIN) | ||
569 | data = ERR_PTR(rhashtable_insert_rehash(ht, tbl) ?: | ||
570 | -EAGAIN); | ||
571 | |||
572 | return data; | ||
573 | } | ||
574 | |||
575 | void *rhashtable_insert_slow(struct rhashtable *ht, const void *key, | ||
576 | struct rhash_head *obj) | ||
577 | { | ||
578 | void *data; | ||
579 | |||
580 | do { | ||
581 | rcu_read_lock(); | ||
582 | data = rhashtable_try_insert(ht, key, obj); | ||
583 | rcu_read_unlock(); | ||
584 | } while (PTR_ERR(data) == -EAGAIN); | ||
585 | |||
586 | return data; | ||
489 | } | 587 | } |
490 | EXPORT_SYMBOL_GPL(rhashtable_insert_slow); | 588 | EXPORT_SYMBOL_GPL(rhashtable_insert_slow); |
491 | 589 | ||
492 | /** | 590 | /** |
493 | * rhashtable_walk_init - Initialise an iterator | 591 | * rhashtable_walk_enter - Initialise an iterator |
494 | * @ht: Table to walk over | 592 | * @ht: Table to walk over |
495 | * @iter: Hash table Iterator | 593 | * @iter: Hash table Iterator |
496 | * @gfp: GFP flags for allocations | ||
497 | * | 594 | * |
498 | * This function prepares a hash table walk. | 595 | * This function prepares a hash table walk. |
499 | * | 596 | * |
@@ -508,30 +605,22 @@ EXPORT_SYMBOL_GPL(rhashtable_insert_slow); | |||
508 | * This function may sleep so you must not call it from interrupt | 605 | * This function may sleep so you must not call it from interrupt |
509 | * context or with spin locks held. | 606 | * context or with spin locks held. |
510 | * | 607 | * |
511 | * You must call rhashtable_walk_exit if this function returns | 608 | * You must call rhashtable_walk_exit after this function returns. |
512 | * successfully. | ||
513 | */ | 609 | */ |
514 | int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter, | 610 | void rhashtable_walk_enter(struct rhashtable *ht, struct rhashtable_iter *iter) |
515 | gfp_t gfp) | ||
516 | { | 611 | { |
517 | iter->ht = ht; | 612 | iter->ht = ht; |
518 | iter->p = NULL; | 613 | iter->p = NULL; |
519 | iter->slot = 0; | 614 | iter->slot = 0; |
520 | iter->skip = 0; | 615 | iter->skip = 0; |
521 | 616 | ||
522 | iter->walker = kmalloc(sizeof(*iter->walker), gfp); | ||
523 | if (!iter->walker) | ||
524 | return -ENOMEM; | ||
525 | |||
526 | spin_lock(&ht->lock); | 617 | spin_lock(&ht->lock); |
527 | iter->walker->tbl = | 618 | iter->walker.tbl = |
528 | rcu_dereference_protected(ht->tbl, lockdep_is_held(&ht->lock)); | 619 | rcu_dereference_protected(ht->tbl, lockdep_is_held(&ht->lock)); |
529 | list_add(&iter->walker->list, &iter->walker->tbl->walkers); | 620 | list_add(&iter->walker.list, &iter->walker.tbl->walkers); |
530 | spin_unlock(&ht->lock); | 621 | spin_unlock(&ht->lock); |
531 | |||
532 | return 0; | ||
533 | } | 622 | } |
534 | EXPORT_SYMBOL_GPL(rhashtable_walk_init); | 623 | EXPORT_SYMBOL_GPL(rhashtable_walk_enter); |
535 | 624 | ||
536 | /** | 625 | /** |
537 | * rhashtable_walk_exit - Free an iterator | 626 | * rhashtable_walk_exit - Free an iterator |
@@ -542,10 +631,9 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_init); | |||
542 | void rhashtable_walk_exit(struct rhashtable_iter *iter) | 631 | void rhashtable_walk_exit(struct rhashtable_iter *iter) |
543 | { | 632 | { |
544 | spin_lock(&iter->ht->lock); | 633 | spin_lock(&iter->ht->lock); |
545 | if (iter->walker->tbl) | 634 | if (iter->walker.tbl) |
546 | list_del(&iter->walker->list); | 635 | list_del(&iter->walker.list); |
547 | spin_unlock(&iter->ht->lock); | 636 | spin_unlock(&iter->ht->lock); |
548 | kfree(iter->walker); | ||
549 | } | 637 | } |
550 | EXPORT_SYMBOL_GPL(rhashtable_walk_exit); | 638 | EXPORT_SYMBOL_GPL(rhashtable_walk_exit); |
551 | 639 | ||
@@ -571,12 +659,12 @@ int rhashtable_walk_start(struct rhashtable_iter *iter) | |||
571 | rcu_read_lock(); | 659 | rcu_read_lock(); |
572 | 660 | ||
573 | spin_lock(&ht->lock); | 661 | spin_lock(&ht->lock); |
574 | if (iter->walker->tbl) | 662 | if (iter->walker.tbl) |
575 | list_del(&iter->walker->list); | 663 | list_del(&iter->walker.list); |
576 | spin_unlock(&ht->lock); | 664 | spin_unlock(&ht->lock); |
577 | 665 | ||
578 | if (!iter->walker->tbl) { | 666 | if (!iter->walker.tbl) { |
579 | iter->walker->tbl = rht_dereference_rcu(ht->tbl, ht); | 667 | iter->walker.tbl = rht_dereference_rcu(ht->tbl, ht); |
580 | return -EAGAIN; | 668 | return -EAGAIN; |
581 | } | 669 | } |
582 | 670 | ||
@@ -598,12 +686,17 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_start); | |||
598 | */ | 686 | */ |
599 | void *rhashtable_walk_next(struct rhashtable_iter *iter) | 687 | void *rhashtable_walk_next(struct rhashtable_iter *iter) |
600 | { | 688 | { |
601 | struct bucket_table *tbl = iter->walker->tbl; | 689 | struct bucket_table *tbl = iter->walker.tbl; |
690 | struct rhlist_head *list = iter->list; | ||
602 | struct rhashtable *ht = iter->ht; | 691 | struct rhashtable *ht = iter->ht; |
603 | struct rhash_head *p = iter->p; | 692 | struct rhash_head *p = iter->p; |
693 | bool rhlist = ht->rhlist; | ||
604 | 694 | ||
605 | if (p) { | 695 | if (p) { |
606 | p = rht_dereference_bucket_rcu(p->next, tbl, iter->slot); | 696 | if (!rhlist || !(list = rcu_dereference(list->next))) { |
697 | p = rcu_dereference(p->next); | ||
698 | list = container_of(p, struct rhlist_head, rhead); | ||
699 | } | ||
607 | goto next; | 700 | goto next; |
608 | } | 701 | } |
609 | 702 | ||
@@ -611,6 +704,18 @@ void *rhashtable_walk_next(struct rhashtable_iter *iter) | |||
611 | int skip = iter->skip; | 704 | int skip = iter->skip; |
612 | 705 | ||
613 | rht_for_each_rcu(p, tbl, iter->slot) { | 706 | rht_for_each_rcu(p, tbl, iter->slot) { |
707 | if (rhlist) { | ||
708 | list = container_of(p, struct rhlist_head, | ||
709 | rhead); | ||
710 | do { | ||
711 | if (!skip) | ||
712 | goto next; | ||
713 | skip--; | ||
714 | list = rcu_dereference(list->next); | ||
715 | } while (list); | ||
716 | |||
717 | continue; | ||
718 | } | ||
614 | if (!skip) | 719 | if (!skip) |
615 | break; | 720 | break; |
616 | skip--; | 721 | skip--; |
@@ -620,7 +725,8 @@ next: | |||
620 | if (!rht_is_a_nulls(p)) { | 725 | if (!rht_is_a_nulls(p)) { |
621 | iter->skip++; | 726 | iter->skip++; |
622 | iter->p = p; | 727 | iter->p = p; |
623 | return rht_obj(ht, p); | 728 | iter->list = list; |
729 | return rht_obj(ht, rhlist ? &list->rhead : p); | ||
624 | } | 730 | } |
625 | 731 | ||
626 | iter->skip = 0; | 732 | iter->skip = 0; |
@@ -631,8 +737,8 @@ next: | |||
631 | /* Ensure we see any new tables. */ | 737 | /* Ensure we see any new tables. */ |
632 | smp_rmb(); | 738 | smp_rmb(); |
633 | 739 | ||
634 | iter->walker->tbl = rht_dereference_rcu(tbl->future_tbl, ht); | 740 | iter->walker.tbl = rht_dereference_rcu(tbl->future_tbl, ht); |
635 | if (iter->walker->tbl) { | 741 | if (iter->walker.tbl) { |
636 | iter->slot = 0; | 742 | iter->slot = 0; |
637 | iter->skip = 0; | 743 | iter->skip = 0; |
638 | return ERR_PTR(-EAGAIN); | 744 | return ERR_PTR(-EAGAIN); |
@@ -652,7 +758,7 @@ void rhashtable_walk_stop(struct rhashtable_iter *iter) | |||
652 | __releases(RCU) | 758 | __releases(RCU) |
653 | { | 759 | { |
654 | struct rhashtable *ht; | 760 | struct rhashtable *ht; |
655 | struct bucket_table *tbl = iter->walker->tbl; | 761 | struct bucket_table *tbl = iter->walker.tbl; |
656 | 762 | ||
657 | if (!tbl) | 763 | if (!tbl) |
658 | goto out; | 764 | goto out; |
@@ -661,9 +767,9 @@ void rhashtable_walk_stop(struct rhashtable_iter *iter) | |||
661 | 767 | ||
662 | spin_lock(&ht->lock); | 768 | spin_lock(&ht->lock); |
663 | if (tbl->rehash < tbl->size) | 769 | if (tbl->rehash < tbl->size) |
664 | list_add(&iter->walker->list, &tbl->walkers); | 770 | list_add(&iter->walker.list, &tbl->walkers); |
665 | else | 771 | else |
666 | iter->walker->tbl = NULL; | 772 | iter->walker.tbl = NULL; |
667 | spin_unlock(&ht->lock); | 773 | spin_unlock(&ht->lock); |
668 | 774 | ||
669 | iter->p = NULL; | 775 | iter->p = NULL; |
@@ -809,6 +915,48 @@ int rhashtable_init(struct rhashtable *ht, | |||
809 | EXPORT_SYMBOL_GPL(rhashtable_init); | 915 | EXPORT_SYMBOL_GPL(rhashtable_init); |
810 | 916 | ||
811 | /** | 917 | /** |
918 | * rhltable_init - initialize a new hash list table | ||
919 | * @hlt: hash list table to be initialized | ||
920 | * @params: configuration parameters | ||
921 | * | ||
922 | * Initializes a new hash list table. | ||
923 | * | ||
924 | * See documentation for rhashtable_init. | ||
925 | */ | ||
926 | int rhltable_init(struct rhltable *hlt, const struct rhashtable_params *params) | ||
927 | { | ||
928 | int err; | ||
929 | |||
930 | /* No rhlist NULLs marking for now. */ | ||
931 | if (params->nulls_base) | ||
932 | return -EINVAL; | ||
933 | |||
934 | err = rhashtable_init(&hlt->ht, params); | ||
935 | hlt->ht.rhlist = true; | ||
936 | return err; | ||
937 | } | ||
938 | EXPORT_SYMBOL_GPL(rhltable_init); | ||
939 | |||
940 | static void rhashtable_free_one(struct rhashtable *ht, struct rhash_head *obj, | ||
941 | void (*free_fn)(void *ptr, void *arg), | ||
942 | void *arg) | ||
943 | { | ||
944 | struct rhlist_head *list; | ||
945 | |||
946 | if (!ht->rhlist) { | ||
947 | free_fn(rht_obj(ht, obj), arg); | ||
948 | return; | ||
949 | } | ||
950 | |||
951 | list = container_of(obj, struct rhlist_head, rhead); | ||
952 | do { | ||
953 | obj = &list->rhead; | ||
954 | list = rht_dereference(list->next, ht); | ||
955 | free_fn(rht_obj(ht, obj), arg); | ||
956 | } while (list); | ||
957 | } | ||
958 | |||
959 | /** | ||
812 | * rhashtable_free_and_destroy - free elements and destroy hash table | 960 | * rhashtable_free_and_destroy - free elements and destroy hash table |
813 | * @ht: the hash table to destroy | 961 | * @ht: the hash table to destroy |
814 | * @free_fn: callback to release resources of element | 962 | * @free_fn: callback to release resources of element |
@@ -845,7 +993,7 @@ void rhashtable_free_and_destroy(struct rhashtable *ht, | |||
845 | pos = next, | 993 | pos = next, |
846 | next = !rht_is_a_nulls(pos) ? | 994 | next = !rht_is_a_nulls(pos) ? |
847 | rht_dereference(pos->next, ht) : NULL) | 995 | rht_dereference(pos->next, ht) : NULL) |
848 | free_fn(rht_obj(ht, pos), arg); | 996 | rhashtable_free_one(ht, pos, free_fn, arg); |
849 | } | 997 | } |
850 | } | 998 | } |
851 | 999 | ||
diff --git a/lib/sbitmap.c b/lib/sbitmap.c new file mode 100644 index 000000000000..2cecf05c82fd --- /dev/null +++ b/lib/sbitmap.c | |||
@@ -0,0 +1,347 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2016 Facebook | ||
3 | * Copyright (C) 2013-2014 Jens Axboe | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public | ||
7 | * License v2 as published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
12 | * General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program. If not, see <https://www.gnu.org/licenses/>. | ||
16 | */ | ||
17 | |||
18 | #include <linux/random.h> | ||
19 | #include <linux/sbitmap.h> | ||
20 | |||
21 | int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift, | ||
22 | gfp_t flags, int node) | ||
23 | { | ||
24 | unsigned int bits_per_word; | ||
25 | unsigned int i; | ||
26 | |||
27 | if (shift < 0) { | ||
28 | shift = ilog2(BITS_PER_LONG); | ||
29 | /* | ||
30 | * If the bitmap is small, shrink the number of bits per word so | ||
31 | * we spread over a few cachelines, at least. If less than 4 | ||
32 | * bits, just forget about it, it's not going to work optimally | ||
33 | * anyway. | ||
34 | */ | ||
35 | if (depth >= 4) { | ||
36 | while ((4U << shift) > depth) | ||
37 | shift--; | ||
38 | } | ||
39 | } | ||
40 | bits_per_word = 1U << shift; | ||
41 | if (bits_per_word > BITS_PER_LONG) | ||
42 | return -EINVAL; | ||
43 | |||
44 | sb->shift = shift; | ||
45 | sb->depth = depth; | ||
46 | sb->map_nr = DIV_ROUND_UP(sb->depth, bits_per_word); | ||
47 | |||
48 | if (depth == 0) { | ||
49 | sb->map = NULL; | ||
50 | return 0; | ||
51 | } | ||
52 | |||
53 | sb->map = kzalloc_node(sb->map_nr * sizeof(*sb->map), flags, node); | ||
54 | if (!sb->map) | ||
55 | return -ENOMEM; | ||
56 | |||
57 | for (i = 0; i < sb->map_nr; i++) { | ||
58 | sb->map[i].depth = min(depth, bits_per_word); | ||
59 | depth -= sb->map[i].depth; | ||
60 | } | ||
61 | return 0; | ||
62 | } | ||
63 | EXPORT_SYMBOL_GPL(sbitmap_init_node); | ||
64 | |||
65 | void sbitmap_resize(struct sbitmap *sb, unsigned int depth) | ||
66 | { | ||
67 | unsigned int bits_per_word = 1U << sb->shift; | ||
68 | unsigned int i; | ||
69 | |||
70 | sb->depth = depth; | ||
71 | sb->map_nr = DIV_ROUND_UP(sb->depth, bits_per_word); | ||
72 | |||
73 | for (i = 0; i < sb->map_nr; i++) { | ||
74 | sb->map[i].depth = min(depth, bits_per_word); | ||
75 | depth -= sb->map[i].depth; | ||
76 | } | ||
77 | } | ||
78 | EXPORT_SYMBOL_GPL(sbitmap_resize); | ||
79 | |||
80 | static int __sbitmap_get_word(struct sbitmap_word *word, unsigned int hint, | ||
81 | bool wrap) | ||
82 | { | ||
83 | unsigned int orig_hint = hint; | ||
84 | int nr; | ||
85 | |||
86 | while (1) { | ||
87 | nr = find_next_zero_bit(&word->word, word->depth, hint); | ||
88 | if (unlikely(nr >= word->depth)) { | ||
89 | /* | ||
90 | * We started with an offset, and we didn't reset the | ||
91 | * offset to 0 in a failure case, so start from 0 to | ||
92 | * exhaust the map. | ||
93 | */ | ||
94 | if (orig_hint && hint && wrap) { | ||
95 | hint = orig_hint = 0; | ||
96 | continue; | ||
97 | } | ||
98 | return -1; | ||
99 | } | ||
100 | |||
101 | if (!test_and_set_bit(nr, &word->word)) | ||
102 | break; | ||
103 | |||
104 | hint = nr + 1; | ||
105 | if (hint >= word->depth - 1) | ||
106 | hint = 0; | ||
107 | } | ||
108 | |||
109 | return nr; | ||
110 | } | ||
111 | |||
112 | int sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint, bool round_robin) | ||
113 | { | ||
114 | unsigned int i, index; | ||
115 | int nr = -1; | ||
116 | |||
117 | index = SB_NR_TO_INDEX(sb, alloc_hint); | ||
118 | |||
119 | for (i = 0; i < sb->map_nr; i++) { | ||
120 | nr = __sbitmap_get_word(&sb->map[index], | ||
121 | SB_NR_TO_BIT(sb, alloc_hint), | ||
122 | !round_robin); | ||
123 | if (nr != -1) { | ||
124 | nr += index << sb->shift; | ||
125 | break; | ||
126 | } | ||
127 | |||
128 | /* Jump to next index. */ | ||
129 | index++; | ||
130 | alloc_hint = index << sb->shift; | ||
131 | |||
132 | if (index >= sb->map_nr) { | ||
133 | index = 0; | ||
134 | alloc_hint = 0; | ||
135 | } | ||
136 | } | ||
137 | |||
138 | return nr; | ||
139 | } | ||
140 | EXPORT_SYMBOL_GPL(sbitmap_get); | ||
141 | |||
142 | bool sbitmap_any_bit_set(const struct sbitmap *sb) | ||
143 | { | ||
144 | unsigned int i; | ||
145 | |||
146 | for (i = 0; i < sb->map_nr; i++) { | ||
147 | if (sb->map[i].word) | ||
148 | return true; | ||
149 | } | ||
150 | return false; | ||
151 | } | ||
152 | EXPORT_SYMBOL_GPL(sbitmap_any_bit_set); | ||
153 | |||
154 | bool sbitmap_any_bit_clear(const struct sbitmap *sb) | ||
155 | { | ||
156 | unsigned int i; | ||
157 | |||
158 | for (i = 0; i < sb->map_nr; i++) { | ||
159 | const struct sbitmap_word *word = &sb->map[i]; | ||
160 | unsigned long ret; | ||
161 | |||
162 | ret = find_first_zero_bit(&word->word, word->depth); | ||
163 | if (ret < word->depth) | ||
164 | return true; | ||
165 | } | ||
166 | return false; | ||
167 | } | ||
168 | EXPORT_SYMBOL_GPL(sbitmap_any_bit_clear); | ||
169 | |||
170 | unsigned int sbitmap_weight(const struct sbitmap *sb) | ||
171 | { | ||
172 | unsigned int i, weight = 0; | ||
173 | |||
174 | for (i = 0; i < sb->map_nr; i++) { | ||
175 | const struct sbitmap_word *word = &sb->map[i]; | ||
176 | |||
177 | weight += bitmap_weight(&word->word, word->depth); | ||
178 | } | ||
179 | return weight; | ||
180 | } | ||
181 | EXPORT_SYMBOL_GPL(sbitmap_weight); | ||
182 | |||
183 | static unsigned int sbq_calc_wake_batch(unsigned int depth) | ||
184 | { | ||
185 | unsigned int wake_batch; | ||
186 | |||
187 | /* | ||
188 | * For each batch, we wake up one queue. We need to make sure that our | ||
189 | * batch size is small enough that the full depth of the bitmap is | ||
190 | * enough to wake up all of the queues. | ||
191 | */ | ||
192 | wake_batch = SBQ_WAKE_BATCH; | ||
193 | if (wake_batch > depth / SBQ_WAIT_QUEUES) | ||
194 | wake_batch = max(1U, depth / SBQ_WAIT_QUEUES); | ||
195 | |||
196 | return wake_batch; | ||
197 | } | ||
198 | |||
199 | int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth, | ||
200 | int shift, bool round_robin, gfp_t flags, int node) | ||
201 | { | ||
202 | int ret; | ||
203 | int i; | ||
204 | |||
205 | ret = sbitmap_init_node(&sbq->sb, depth, shift, flags, node); | ||
206 | if (ret) | ||
207 | return ret; | ||
208 | |||
209 | sbq->alloc_hint = alloc_percpu_gfp(unsigned int, flags); | ||
210 | if (!sbq->alloc_hint) { | ||
211 | sbitmap_free(&sbq->sb); | ||
212 | return -ENOMEM; | ||
213 | } | ||
214 | |||
215 | if (depth && !round_robin) { | ||
216 | for_each_possible_cpu(i) | ||
217 | *per_cpu_ptr(sbq->alloc_hint, i) = prandom_u32() % depth; | ||
218 | } | ||
219 | |||
220 | sbq->wake_batch = sbq_calc_wake_batch(depth); | ||
221 | atomic_set(&sbq->wake_index, 0); | ||
222 | |||
223 | sbq->ws = kzalloc_node(SBQ_WAIT_QUEUES * sizeof(*sbq->ws), flags, node); | ||
224 | if (!sbq->ws) { | ||
225 | free_percpu(sbq->alloc_hint); | ||
226 | sbitmap_free(&sbq->sb); | ||
227 | return -ENOMEM; | ||
228 | } | ||
229 | |||
230 | for (i = 0; i < SBQ_WAIT_QUEUES; i++) { | ||
231 | init_waitqueue_head(&sbq->ws[i].wait); | ||
232 | atomic_set(&sbq->ws[i].wait_cnt, sbq->wake_batch); | ||
233 | } | ||
234 | |||
235 | sbq->round_robin = round_robin; | ||
236 | return 0; | ||
237 | } | ||
238 | EXPORT_SYMBOL_GPL(sbitmap_queue_init_node); | ||
239 | |||
240 | void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth) | ||
241 | { | ||
242 | sbq->wake_batch = sbq_calc_wake_batch(depth); | ||
243 | sbitmap_resize(&sbq->sb, depth); | ||
244 | } | ||
245 | EXPORT_SYMBOL_GPL(sbitmap_queue_resize); | ||
246 | |||
247 | int __sbitmap_queue_get(struct sbitmap_queue *sbq) | ||
248 | { | ||
249 | unsigned int hint, depth; | ||
250 | int nr; | ||
251 | |||
252 | hint = this_cpu_read(*sbq->alloc_hint); | ||
253 | depth = READ_ONCE(sbq->sb.depth); | ||
254 | if (unlikely(hint >= depth)) { | ||
255 | hint = depth ? prandom_u32() % depth : 0; | ||
256 | this_cpu_write(*sbq->alloc_hint, hint); | ||
257 | } | ||
258 | nr = sbitmap_get(&sbq->sb, hint, sbq->round_robin); | ||
259 | |||
260 | if (nr == -1) { | ||
261 | /* If the map is full, a hint won't do us much good. */ | ||
262 | this_cpu_write(*sbq->alloc_hint, 0); | ||
263 | } else if (nr == hint || unlikely(sbq->round_robin)) { | ||
264 | /* Only update the hint if we used it. */ | ||
265 | hint = nr + 1; | ||
266 | if (hint >= depth - 1) | ||
267 | hint = 0; | ||
268 | this_cpu_write(*sbq->alloc_hint, hint); | ||
269 | } | ||
270 | |||
271 | return nr; | ||
272 | } | ||
273 | EXPORT_SYMBOL_GPL(__sbitmap_queue_get); | ||
274 | |||
275 | static struct sbq_wait_state *sbq_wake_ptr(struct sbitmap_queue *sbq) | ||
276 | { | ||
277 | int i, wake_index; | ||
278 | |||
279 | wake_index = atomic_read(&sbq->wake_index); | ||
280 | for (i = 0; i < SBQ_WAIT_QUEUES; i++) { | ||
281 | struct sbq_wait_state *ws = &sbq->ws[wake_index]; | ||
282 | |||
283 | if (waitqueue_active(&ws->wait)) { | ||
284 | int o = atomic_read(&sbq->wake_index); | ||
285 | |||
286 | if (wake_index != o) | ||
287 | atomic_cmpxchg(&sbq->wake_index, o, wake_index); | ||
288 | return ws; | ||
289 | } | ||
290 | |||
291 | wake_index = sbq_index_inc(wake_index); | ||
292 | } | ||
293 | |||
294 | return NULL; | ||
295 | } | ||
296 | |||
297 | static void sbq_wake_up(struct sbitmap_queue *sbq) | ||
298 | { | ||
299 | struct sbq_wait_state *ws; | ||
300 | int wait_cnt; | ||
301 | |||
302 | /* Ensure that the wait list checks occur after clear_bit(). */ | ||
303 | smp_mb(); | ||
304 | |||
305 | ws = sbq_wake_ptr(sbq); | ||
306 | if (!ws) | ||
307 | return; | ||
308 | |||
309 | wait_cnt = atomic_dec_return(&ws->wait_cnt); | ||
310 | if (unlikely(wait_cnt < 0)) | ||
311 | wait_cnt = atomic_inc_return(&ws->wait_cnt); | ||
312 | if (wait_cnt == 0) { | ||
313 | atomic_add(sbq->wake_batch, &ws->wait_cnt); | ||
314 | sbq_index_atomic_inc(&sbq->wake_index); | ||
315 | wake_up(&ws->wait); | ||
316 | } | ||
317 | } | ||
318 | |||
319 | void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr, | ||
320 | unsigned int cpu) | ||
321 | { | ||
322 | sbitmap_clear_bit(&sbq->sb, nr); | ||
323 | sbq_wake_up(sbq); | ||
324 | if (likely(!sbq->round_robin && nr < sbq->sb.depth)) | ||
325 | *per_cpu_ptr(sbq->alloc_hint, cpu) = nr; | ||
326 | } | ||
327 | EXPORT_SYMBOL_GPL(sbitmap_queue_clear); | ||
328 | |||
329 | void sbitmap_queue_wake_all(struct sbitmap_queue *sbq) | ||
330 | { | ||
331 | int i, wake_index; | ||
332 | |||
333 | /* | ||
334 | * Make sure all changes prior to this are visible from other CPUs. | ||
335 | */ | ||
336 | smp_mb(); | ||
337 | wake_index = atomic_read(&sbq->wake_index); | ||
338 | for (i = 0; i < SBQ_WAIT_QUEUES; i++) { | ||
339 | struct sbq_wait_state *ws = &sbq->ws[wake_index]; | ||
340 | |||
341 | if (waitqueue_active(&ws->wait)) | ||
342 | wake_up(&ws->wait); | ||
343 | |||
344 | wake_index = sbq_index_inc(wake_index); | ||
345 | } | ||
346 | } | ||
347 | EXPORT_SYMBOL_GPL(sbitmap_queue_wake_all); | ||
diff --git a/lib/strncpy_from_user.c b/lib/strncpy_from_user.c index 9c5fe8110413..7e35fc450c5b 100644 --- a/lib/strncpy_from_user.c +++ b/lib/strncpy_from_user.c | |||
@@ -1,6 +1,7 @@ | |||
1 | #include <linux/compiler.h> | 1 | #include <linux/compiler.h> |
2 | #include <linux/export.h> | 2 | #include <linux/export.h> |
3 | #include <linux/kasan-checks.h> | 3 | #include <linux/kasan-checks.h> |
4 | #include <linux/thread_info.h> | ||
4 | #include <linux/uaccess.h> | 5 | #include <linux/uaccess.h> |
5 | #include <linux/kernel.h> | 6 | #include <linux/kernel.h> |
6 | #include <linux/errno.h> | 7 | #include <linux/errno.h> |
@@ -111,6 +112,7 @@ long strncpy_from_user(char *dst, const char __user *src, long count) | |||
111 | long retval; | 112 | long retval; |
112 | 113 | ||
113 | kasan_check_write(dst, count); | 114 | kasan_check_write(dst, count); |
115 | check_object_size(dst, count, false); | ||
114 | user_access_begin(); | 116 | user_access_begin(); |
115 | retval = do_strncpy_from_user(dst, src, count, max); | 117 | retval = do_strncpy_from_user(dst, src, count, max); |
116 | user_access_end(); | 118 | user_access_end(); |
diff --git a/lib/test_bpf.c b/lib/test_bpf.c index 93f45011a59d..94346b4d8984 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c | |||
@@ -5485,6 +5485,7 @@ static struct sk_buff *populate_skb(char *buf, int size) | |||
5485 | skb->hash = SKB_HASH; | 5485 | skb->hash = SKB_HASH; |
5486 | skb->queue_mapping = SKB_QUEUE_MAP; | 5486 | skb->queue_mapping = SKB_QUEUE_MAP; |
5487 | skb->vlan_tci = SKB_VLAN_TCI; | 5487 | skb->vlan_tci = SKB_VLAN_TCI; |
5488 | skb->vlan_proto = htons(ETH_P_IP); | ||
5488 | skb->dev = &dev; | 5489 | skb->dev = &dev; |
5489 | skb->dev->ifindex = SKB_DEV_IFINDEX; | 5490 | skb->dev->ifindex = SKB_DEV_IFINDEX; |
5490 | skb->dev->type = SKB_DEV_TYPE; | 5491 | skb->dev->type = SKB_DEV_TYPE; |
diff --git a/lib/win_minmax.c b/lib/win_minmax.c new file mode 100644 index 000000000000..c8420d404926 --- /dev/null +++ b/lib/win_minmax.c | |||
@@ -0,0 +1,98 @@ | |||
1 | /** | ||
2 | * lib/minmax.c: windowed min/max tracker | ||
3 | * | ||
4 | * Kathleen Nichols' algorithm for tracking the minimum (or maximum) | ||
5 | * value of a data stream over some fixed time interval. (E.g., | ||
6 | * the minimum RTT over the past five minutes.) It uses constant | ||
7 | * space and constant time per update yet almost always delivers | ||
8 | * the same minimum as an implementation that has to keep all the | ||
9 | * data in the window. | ||
10 | * | ||
11 | * The algorithm keeps track of the best, 2nd best & 3rd best min | ||
12 | * values, maintaining an invariant that the measurement time of | ||
13 | * the n'th best >= n-1'th best. It also makes sure that the three | ||
14 | * values are widely separated in the time window since that bounds | ||
15 | * the worse case error when that data is monotonically increasing | ||
16 | * over the window. | ||
17 | * | ||
18 | * Upon getting a new min, we can forget everything earlier because | ||
19 | * it has no value - the new min is <= everything else in the window | ||
20 | * by definition and it's the most recent. So we restart fresh on | ||
21 | * every new min and overwrites 2nd & 3rd choices. The same property | ||
22 | * holds for 2nd & 3rd best. | ||
23 | */ | ||
24 | #include <linux/module.h> | ||
25 | #include <linux/win_minmax.h> | ||
26 | |||
27 | /* As time advances, update the 1st, 2nd, and 3rd choices. */ | ||
28 | static u32 minmax_subwin_update(struct minmax *m, u32 win, | ||
29 | const struct minmax_sample *val) | ||
30 | { | ||
31 | u32 dt = val->t - m->s[0].t; | ||
32 | |||
33 | if (unlikely(dt > win)) { | ||
34 | /* | ||
35 | * Passed entire window without a new val so make 2nd | ||
36 | * choice the new val & 3rd choice the new 2nd choice. | ||
37 | * we may have to iterate this since our 2nd choice | ||
38 | * may also be outside the window (we checked on entry | ||
39 | * that the third choice was in the window). | ||
40 | */ | ||
41 | m->s[0] = m->s[1]; | ||
42 | m->s[1] = m->s[2]; | ||
43 | m->s[2] = *val; | ||
44 | if (unlikely(val->t - m->s[0].t > win)) { | ||
45 | m->s[0] = m->s[1]; | ||
46 | m->s[1] = m->s[2]; | ||
47 | m->s[2] = *val; | ||
48 | } | ||
49 | } else if (unlikely(m->s[1].t == m->s[0].t) && dt > win/4) { | ||
50 | /* | ||
51 | * We've passed a quarter of the window without a new val | ||
52 | * so take a 2nd choice from the 2nd quarter of the window. | ||
53 | */ | ||
54 | m->s[2] = m->s[1] = *val; | ||
55 | } else if (unlikely(m->s[2].t == m->s[1].t) && dt > win/2) { | ||
56 | /* | ||
57 | * We've passed half the window without finding a new val | ||
58 | * so take a 3rd choice from the last half of the window | ||
59 | */ | ||
60 | m->s[2] = *val; | ||
61 | } | ||
62 | return m->s[0].v; | ||
63 | } | ||
64 | |||
65 | /* Check if new measurement updates the 1st, 2nd or 3rd choice max. */ | ||
66 | u32 minmax_running_max(struct minmax *m, u32 win, u32 t, u32 meas) | ||
67 | { | ||
68 | struct minmax_sample val = { .t = t, .v = meas }; | ||
69 | |||
70 | if (unlikely(val.v >= m->s[0].v) || /* found new max? */ | ||
71 | unlikely(val.t - m->s[2].t > win)) /* nothing left in window? */ | ||
72 | return minmax_reset(m, t, meas); /* forget earlier samples */ | ||
73 | |||
74 | if (unlikely(val.v >= m->s[1].v)) | ||
75 | m->s[2] = m->s[1] = val; | ||
76 | else if (unlikely(val.v >= m->s[2].v)) | ||
77 | m->s[2] = val; | ||
78 | |||
79 | return minmax_subwin_update(m, win, &val); | ||
80 | } | ||
81 | EXPORT_SYMBOL(minmax_running_max); | ||
82 | |||
83 | /* Check if new measurement updates the 1st, 2nd or 3rd choice min. */ | ||
84 | u32 minmax_running_min(struct minmax *m, u32 win, u32 t, u32 meas) | ||
85 | { | ||
86 | struct minmax_sample val = { .t = t, .v = meas }; | ||
87 | |||
88 | if (unlikely(val.v <= m->s[0].v) || /* found new min? */ | ||
89 | unlikely(val.t - m->s[2].t > win)) /* nothing left in window? */ | ||
90 | return minmax_reset(m, t, meas); /* forget earlier samples */ | ||
91 | |||
92 | if (unlikely(val.v <= m->s[1].v)) | ||
93 | m->s[2] = m->s[1] = val; | ||
94 | else if (unlikely(val.v <= m->s[2].v)) | ||
95 | m->s[2] = val; | ||
96 | |||
97 | return minmax_subwin_update(m, win, &val); | ||
98 | } | ||