aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/Kconfig6
-rw-r--r--lib/Kconfig.debug11
-rw-r--r--lib/Kconfig.ubsan11
-rw-r--r--lib/Makefile5
-rw-r--r--lib/atomic64_test.c4
-rw-r--r--lib/bitmap.c50
-rw-r--r--lib/dma-debug.c52
-rw-r--r--lib/iov_iter.c420
-rw-r--r--lib/irq_poll.c2
-rw-r--r--lib/kstrtox.c6
-rw-r--r--lib/nmi_backtrace.c42
-rw-r--r--lib/percpu-refcount.c169
-rw-r--r--lib/radix-tree.c14
-rw-r--r--lib/raid6/.gitignore1
-rw-r--r--lib/raid6/Makefile8
-rw-r--r--lib/raid6/algos.c18
-rw-r--r--lib/raid6/avx512.c569
-rw-r--r--lib/raid6/recov_avx512.c388
-rw-r--r--lib/raid6/recov_s390xc.c116
-rw-r--r--lib/raid6/s390vx.uc168
-rw-r--r--lib/raid6/test/Makefile5
-rw-r--r--lib/raid6/test/test.c7
-rw-r--r--lib/raid6/x86.h10
-rw-r--r--lib/random32.c6
-rw-r--r--lib/rhashtable.c300
-rw-r--r--lib/sbitmap.c347
-rw-r--r--lib/strncpy_from_user.c2
-rw-r--r--lib/test_bpf.c1
-rw-r--r--lib/win_minmax.c98
29 files changed, 2620 insertions, 216 deletions
diff --git a/lib/Kconfig b/lib/Kconfig
index d79909dc01ec..260a80e313b9 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -457,9 +457,6 @@ config NLATTR
457config GENERIC_ATOMIC64 457config GENERIC_ATOMIC64
458 bool 458 bool
459 459
460config ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
461 def_bool y if GENERIC_ATOMIC64
462
463config LRU_CACHE 460config LRU_CACHE
464 tristate 461 tristate
465 462
@@ -550,4 +547,7 @@ config STACKDEPOT
550 bool 547 bool
551 select STACKTRACE 548 select STACKTRACE
552 549
550config SBITMAP
551 bool
552
553endmenu 553endmenu
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index cab7405f48d2..33bc56cf60d7 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -305,7 +305,7 @@ config DEBUG_SECTION_MISMATCH
305 a larger kernel). 305 a larger kernel).
306 - Run the section mismatch analysis for each module/built-in.o file. 306 - Run the section mismatch analysis for each module/built-in.o file.
307 When we run the section mismatch analysis on vmlinux.o, we 307 When we run the section mismatch analysis on vmlinux.o, we
308 lose valueble information about where the mismatch was 308 lose valuable information about where the mismatch was
309 introduced. 309 introduced.
310 Running the analysis for each module/built-in.o file 310 Running the analysis for each module/built-in.o file
311 tells where the mismatch happens much closer to the 311 tells where the mismatch happens much closer to the
@@ -1857,15 +1857,6 @@ config PROVIDE_OHCI1394_DMA_INIT
1857 1857
1858 See Documentation/debugging-via-ohci1394.txt for more information. 1858 See Documentation/debugging-via-ohci1394.txt for more information.
1859 1859
1860config BUILD_DOCSRC
1861 bool "Build targets in Documentation/ tree"
1862 depends on HEADERS_CHECK
1863 help
1864 This option attempts to build objects from the source files in the
1865 kernel Documentation/ tree.
1866
1867 Say N if you are unsure.
1868
1869config DMA_API_DEBUG 1860config DMA_API_DEBUG
1870 bool "Enable debugging of DMA-API usage" 1861 bool "Enable debugging of DMA-API usage"
1871 depends on HAVE_DMA_API_DEBUG 1862 depends on HAVE_DMA_API_DEBUG
diff --git a/lib/Kconfig.ubsan b/lib/Kconfig.ubsan
index 39494af9a84a..bc6e651df68c 100644
--- a/lib/Kconfig.ubsan
+++ b/lib/Kconfig.ubsan
@@ -1,6 +1,9 @@
1config ARCH_HAS_UBSAN_SANITIZE_ALL 1config ARCH_HAS_UBSAN_SANITIZE_ALL
2 bool 2 bool
3 3
4config ARCH_WANTS_UBSAN_NO_NULL
5 def_bool n
6
4config UBSAN 7config UBSAN
5 bool "Undefined behaviour sanity checker" 8 bool "Undefined behaviour sanity checker"
6 help 9 help
@@ -34,3 +37,11 @@ config UBSAN_ALIGNMENT
34 This option enables detection of unaligned memory accesses. 37 This option enables detection of unaligned memory accesses.
35 Enabling this option on architectures that support unaligned 38 Enabling this option on architectures that support unaligned
36 accesses may produce a lot of false positives. 39 accesses may produce a lot of false positives.
40
41config UBSAN_NULL
42 bool "Enable checking of null pointers"
43 depends on UBSAN
44 default y if !ARCH_WANTS_UBSAN_NO_NULL
45 help
46 This option enables detection of memory accesses via a
47 null pointer.
diff --git a/lib/Makefile b/lib/Makefile
index 5dc77a8ec297..50144a3aeebd 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -22,7 +22,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \
22 sha1.o chacha20.o md5.o irq_regs.o argv_split.o \ 22 sha1.o chacha20.o md5.o irq_regs.o argv_split.o \
23 flex_proportions.o ratelimit.o show_mem.o \ 23 flex_proportions.o ratelimit.o show_mem.o \
24 is_single_threaded.o plist.o decompress.o kobject_uevent.o \ 24 is_single_threaded.o plist.o decompress.o kobject_uevent.o \
25 earlycpio.o seq_buf.o nmi_backtrace.o nodemask.o 25 earlycpio.o seq_buf.o nmi_backtrace.o nodemask.o win_minmax.o
26 26
27lib-$(CONFIG_MMU) += ioremap.o 27lib-$(CONFIG_MMU) += ioremap.o
28lib-$(CONFIG_SMP) += cpumask.o 28lib-$(CONFIG_SMP) += cpumask.o
@@ -180,6 +180,7 @@ obj-$(CONFIG_IRQ_POLL) += irq_poll.o
180 180
181obj-$(CONFIG_STACKDEPOT) += stackdepot.o 181obj-$(CONFIG_STACKDEPOT) += stackdepot.o
182KASAN_SANITIZE_stackdepot.o := n 182KASAN_SANITIZE_stackdepot.o := n
183KCOV_INSTRUMENT_stackdepot.o := n
183 184
184libfdt_files = fdt.o fdt_ro.o fdt_wip.o fdt_rw.o fdt_sw.o fdt_strerror.o \ 185libfdt_files = fdt.o fdt_ro.o fdt_wip.o fdt_rw.o fdt_sw.o fdt_strerror.o \
185 fdt_empty_tree.o 186 fdt_empty_tree.o
@@ -227,3 +228,5 @@ obj-$(CONFIG_UCS2_STRING) += ucs2_string.o
227obj-$(CONFIG_UBSAN) += ubsan.o 228obj-$(CONFIG_UBSAN) += ubsan.o
228 229
229UBSAN_SANITIZE_ubsan.o := n 230UBSAN_SANITIZE_ubsan.o := n
231
232obj-$(CONFIG_SBITMAP) += sbitmap.o
diff --git a/lib/atomic64_test.c b/lib/atomic64_test.c
index dbb369145dda..46042901130f 100644
--- a/lib/atomic64_test.c
+++ b/lib/atomic64_test.c
@@ -213,7 +213,6 @@ static __init void test_atomic64(void)
213 r += one; 213 r += one;
214 BUG_ON(v.counter != r); 214 BUG_ON(v.counter != r);
215 215
216#ifdef CONFIG_ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
217 INIT(onestwos); 216 INIT(onestwos);
218 BUG_ON(atomic64_dec_if_positive(&v) != (onestwos - 1)); 217 BUG_ON(atomic64_dec_if_positive(&v) != (onestwos - 1));
219 r -= one; 218 r -= one;
@@ -226,9 +225,6 @@ static __init void test_atomic64(void)
226 INIT(-one); 225 INIT(-one);
227 BUG_ON(atomic64_dec_if_positive(&v) != (-one - one)); 226 BUG_ON(atomic64_dec_if_positive(&v) != (-one - one));
228 BUG_ON(v.counter != r); 227 BUG_ON(v.counter != r);
229#else
230#warning Please implement atomic64_dec_if_positive for your architecture and select the above Kconfig symbol
231#endif
232 228
233 INIT(onestwos); 229 INIT(onestwos);
234 BUG_ON(!atomic64_inc_not_zero(&v)); 230 BUG_ON(!atomic64_inc_not_zero(&v));
diff --git a/lib/bitmap.c b/lib/bitmap.c
index eca88087fa8a..0b66f0e5eb6b 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -496,6 +496,11 @@ EXPORT_SYMBOL(bitmap_print_to_pagebuf);
496 * ranges. Consecutively set bits are shown as two hyphen-separated 496 * ranges. Consecutively set bits are shown as two hyphen-separated
497 * decimal numbers, the smallest and largest bit numbers set in 497 * decimal numbers, the smallest and largest bit numbers set in
498 * the range. 498 * the range.
499 * Optionally each range can be postfixed to denote that only parts of it
500 * should be set. The range will divided to groups of specific size.
501 * From each group will be used only defined amount of bits.
502 * Syntax: range:used_size/group_size
503 * Example: 0-1023:2/256 ==> 0,1,256,257,512,513,768,769
499 * 504 *
500 * Returns 0 on success, -errno on invalid input strings. 505 * Returns 0 on success, -errno on invalid input strings.
501 * Error values: 506 * Error values:
@@ -507,16 +512,20 @@ static int __bitmap_parselist(const char *buf, unsigned int buflen,
507 int is_user, unsigned long *maskp, 512 int is_user, unsigned long *maskp,
508 int nmaskbits) 513 int nmaskbits)
509{ 514{
510 unsigned a, b; 515 unsigned int a, b, old_a, old_b;
516 unsigned int group_size, used_size;
511 int c, old_c, totaldigits, ndigits; 517 int c, old_c, totaldigits, ndigits;
512 const char __user __force *ubuf = (const char __user __force *)buf; 518 const char __user __force *ubuf = (const char __user __force *)buf;
513 int at_start, in_range; 519 int at_start, in_range, in_partial_range;
514 520
515 totaldigits = c = 0; 521 totaldigits = c = 0;
522 old_a = old_b = 0;
523 group_size = used_size = 0;
516 bitmap_zero(maskp, nmaskbits); 524 bitmap_zero(maskp, nmaskbits);
517 do { 525 do {
518 at_start = 1; 526 at_start = 1;
519 in_range = 0; 527 in_range = 0;
528 in_partial_range = 0;
520 a = b = 0; 529 a = b = 0;
521 ndigits = totaldigits; 530 ndigits = totaldigits;
522 531
@@ -547,6 +556,24 @@ static int __bitmap_parselist(const char *buf, unsigned int buflen,
547 if ((totaldigits != ndigits) && isspace(old_c)) 556 if ((totaldigits != ndigits) && isspace(old_c))
548 return -EINVAL; 557 return -EINVAL;
549 558
559 if (c == '/') {
560 used_size = a;
561 at_start = 1;
562 in_range = 0;
563 a = b = 0;
564 continue;
565 }
566
567 if (c == ':') {
568 old_a = a;
569 old_b = b;
570 at_start = 1;
571 in_range = 0;
572 in_partial_range = 1;
573 a = b = 0;
574 continue;
575 }
576
550 if (c == '-') { 577 if (c == '-') {
551 if (at_start || in_range) 578 if (at_start || in_range)
552 return -EINVAL; 579 return -EINVAL;
@@ -567,15 +594,30 @@ static int __bitmap_parselist(const char *buf, unsigned int buflen,
567 } 594 }
568 if (ndigits == totaldigits) 595 if (ndigits == totaldigits)
569 continue; 596 continue;
597 if (in_partial_range) {
598 group_size = a;
599 a = old_a;
600 b = old_b;
601 old_a = old_b = 0;
602 }
570 /* if no digit is after '-', it's wrong*/ 603 /* if no digit is after '-', it's wrong*/
571 if (at_start && in_range) 604 if (at_start && in_range)
572 return -EINVAL; 605 return -EINVAL;
573 if (!(a <= b)) 606 if (!(a <= b) || !(used_size <= group_size))
574 return -EINVAL; 607 return -EINVAL;
575 if (b >= nmaskbits) 608 if (b >= nmaskbits)
576 return -ERANGE; 609 return -ERANGE;
577 while (a <= b) { 610 while (a <= b) {
578 set_bit(a, maskp); 611 if (in_partial_range) {
612 static int pos_in_group = 1;
613
614 if (pos_in_group <= used_size)
615 set_bit(a, maskp);
616
617 if (a == b || ++pos_in_group > group_size)
618 pos_in_group = 1;
619 } else
620 set_bit(a, maskp);
579 a++; 621 a++;
580 } 622 }
581 } while (buflen && c == ','); 623 } while (buflen && c == ',');
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index 06f02f6aecd2..8971370bfb16 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -44,6 +44,7 @@ enum {
44 dma_debug_page, 44 dma_debug_page,
45 dma_debug_sg, 45 dma_debug_sg,
46 dma_debug_coherent, 46 dma_debug_coherent,
47 dma_debug_resource,
47}; 48};
48 49
49enum map_err_types { 50enum map_err_types {
@@ -151,8 +152,9 @@ static const char *const maperr2str[] = {
151 [MAP_ERR_CHECKED] = "dma map error checked", 152 [MAP_ERR_CHECKED] = "dma map error checked",
152}; 153};
153 154
154static const char *type2name[4] = { "single", "page", 155static const char *type2name[5] = { "single", "page",
155 "scather-gather", "coherent" }; 156 "scather-gather", "coherent",
157 "resource" };
156 158
157static const char *dir2name[4] = { "DMA_BIDIRECTIONAL", "DMA_TO_DEVICE", 159static const char *dir2name[4] = { "DMA_BIDIRECTIONAL", "DMA_TO_DEVICE",
158 "DMA_FROM_DEVICE", "DMA_NONE" }; 160 "DMA_FROM_DEVICE", "DMA_NONE" };
@@ -400,6 +402,9 @@ static void hash_bucket_del(struct dma_debug_entry *entry)
400 402
401static unsigned long long phys_addr(struct dma_debug_entry *entry) 403static unsigned long long phys_addr(struct dma_debug_entry *entry)
402{ 404{
405 if (entry->type == dma_debug_resource)
406 return __pfn_to_phys(entry->pfn) + entry->offset;
407
403 return page_to_phys(pfn_to_page(entry->pfn)) + entry->offset; 408 return page_to_phys(pfn_to_page(entry->pfn)) + entry->offset;
404} 409}
405 410
@@ -1519,6 +1524,49 @@ void debug_dma_free_coherent(struct device *dev, size_t size,
1519} 1524}
1520EXPORT_SYMBOL(debug_dma_free_coherent); 1525EXPORT_SYMBOL(debug_dma_free_coherent);
1521 1526
1527void debug_dma_map_resource(struct device *dev, phys_addr_t addr, size_t size,
1528 int direction, dma_addr_t dma_addr)
1529{
1530 struct dma_debug_entry *entry;
1531
1532 if (unlikely(dma_debug_disabled()))
1533 return;
1534
1535 entry = dma_entry_alloc();
1536 if (!entry)
1537 return;
1538
1539 entry->type = dma_debug_resource;
1540 entry->dev = dev;
1541 entry->pfn = PHYS_PFN(addr);
1542 entry->offset = offset_in_page(addr);
1543 entry->size = size;
1544 entry->dev_addr = dma_addr;
1545 entry->direction = direction;
1546 entry->map_err_type = MAP_ERR_NOT_CHECKED;
1547
1548 add_dma_entry(entry);
1549}
1550EXPORT_SYMBOL(debug_dma_map_resource);
1551
1552void debug_dma_unmap_resource(struct device *dev, dma_addr_t dma_addr,
1553 size_t size, int direction)
1554{
1555 struct dma_debug_entry ref = {
1556 .type = dma_debug_resource,
1557 .dev = dev,
1558 .dev_addr = dma_addr,
1559 .size = size,
1560 .direction = direction,
1561 };
1562
1563 if (unlikely(dma_debug_disabled()))
1564 return;
1565
1566 check_unmap(&ref);
1567}
1568EXPORT_SYMBOL(debug_dma_unmap_resource);
1569
1522void debug_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, 1570void debug_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle,
1523 size_t size, int direction) 1571 size_t size, int direction)
1524{ 1572{
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 7e3138cfc8c9..f0c7f1481bae 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -3,8 +3,11 @@
3#include <linux/pagemap.h> 3#include <linux/pagemap.h>
4#include <linux/slab.h> 4#include <linux/slab.h>
5#include <linux/vmalloc.h> 5#include <linux/vmalloc.h>
6#include <linux/splice.h>
6#include <net/checksum.h> 7#include <net/checksum.h>
7 8
9#define PIPE_PARANOIA /* for now */
10
8#define iterate_iovec(i, n, __v, __p, skip, STEP) { \ 11#define iterate_iovec(i, n, __v, __p, skip, STEP) { \
9 size_t left; \ 12 size_t left; \
10 size_t wanted = n; \ 13 size_t wanted = n; \
@@ -290,6 +293,93 @@ done:
290 return wanted - bytes; 293 return wanted - bytes;
291} 294}
292 295
296#ifdef PIPE_PARANOIA
297static bool sanity(const struct iov_iter *i)
298{
299 struct pipe_inode_info *pipe = i->pipe;
300 int idx = i->idx;
301 int next = pipe->curbuf + pipe->nrbufs;
302 if (i->iov_offset) {
303 struct pipe_buffer *p;
304 if (unlikely(!pipe->nrbufs))
305 goto Bad; // pipe must be non-empty
306 if (unlikely(idx != ((next - 1) & (pipe->buffers - 1))))
307 goto Bad; // must be at the last buffer...
308
309 p = &pipe->bufs[idx];
310 if (unlikely(p->offset + p->len != i->iov_offset))
311 goto Bad; // ... at the end of segment
312 } else {
313 if (idx != (next & (pipe->buffers - 1)))
314 goto Bad; // must be right after the last buffer
315 }
316 return true;
317Bad:
318 printk(KERN_ERR "idx = %d, offset = %zd\n", i->idx, i->iov_offset);
319 printk(KERN_ERR "curbuf = %d, nrbufs = %d, buffers = %d\n",
320 pipe->curbuf, pipe->nrbufs, pipe->buffers);
321 for (idx = 0; idx < pipe->buffers; idx++)
322 printk(KERN_ERR "[%p %p %d %d]\n",
323 pipe->bufs[idx].ops,
324 pipe->bufs[idx].page,
325 pipe->bufs[idx].offset,
326 pipe->bufs[idx].len);
327 WARN_ON(1);
328 return false;
329}
330#else
331#define sanity(i) true
332#endif
333
334static inline int next_idx(int idx, struct pipe_inode_info *pipe)
335{
336 return (idx + 1) & (pipe->buffers - 1);
337}
338
339static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t bytes,
340 struct iov_iter *i)
341{
342 struct pipe_inode_info *pipe = i->pipe;
343 struct pipe_buffer *buf;
344 size_t off;
345 int idx;
346
347 if (unlikely(bytes > i->count))
348 bytes = i->count;
349
350 if (unlikely(!bytes))
351 return 0;
352
353 if (!sanity(i))
354 return 0;
355
356 off = i->iov_offset;
357 idx = i->idx;
358 buf = &pipe->bufs[idx];
359 if (off) {
360 if (offset == off && buf->page == page) {
361 /* merge with the last one */
362 buf->len += bytes;
363 i->iov_offset += bytes;
364 goto out;
365 }
366 idx = next_idx(idx, pipe);
367 buf = &pipe->bufs[idx];
368 }
369 if (idx == pipe->curbuf && pipe->nrbufs)
370 return 0;
371 pipe->nrbufs++;
372 buf->ops = &page_cache_pipe_buf_ops;
373 get_page(buf->page = page);
374 buf->offset = offset;
375 buf->len = bytes;
376 i->iov_offset = offset + bytes;
377 i->idx = idx;
378out:
379 i->count -= bytes;
380 return bytes;
381}
382
293/* 383/*
294 * Fault in one or more iovecs of the given iov_iter, to a maximum length of 384 * Fault in one or more iovecs of the given iov_iter, to a maximum length of
295 * bytes. For each iovec, fault in each page that constitutes the iovec. 385 * bytes. For each iovec, fault in each page that constitutes the iovec.
@@ -306,8 +396,7 @@ int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes)
306 396
307 if (!(i->type & (ITER_BVEC|ITER_KVEC))) { 397 if (!(i->type & (ITER_BVEC|ITER_KVEC))) {
308 iterate_iovec(i, bytes, v, iov, skip, ({ 398 iterate_iovec(i, bytes, v, iov, skip, ({
309 err = fault_in_multipages_readable(v.iov_base, 399 err = fault_in_pages_readable(v.iov_base, v.iov_len);
310 v.iov_len);
311 if (unlikely(err)) 400 if (unlikely(err))
312 return err; 401 return err;
313 0;})) 402 0;}))
@@ -356,9 +445,98 @@ static void memzero_page(struct page *page, size_t offset, size_t len)
356 kunmap_atomic(addr); 445 kunmap_atomic(addr);
357} 446}
358 447
448static inline bool allocated(struct pipe_buffer *buf)
449{
450 return buf->ops == &default_pipe_buf_ops;
451}
452
453static inline void data_start(const struct iov_iter *i, int *idxp, size_t *offp)
454{
455 size_t off = i->iov_offset;
456 int idx = i->idx;
457 if (off && (!allocated(&i->pipe->bufs[idx]) || off == PAGE_SIZE)) {
458 idx = next_idx(idx, i->pipe);
459 off = 0;
460 }
461 *idxp = idx;
462 *offp = off;
463}
464
465static size_t push_pipe(struct iov_iter *i, size_t size,
466 int *idxp, size_t *offp)
467{
468 struct pipe_inode_info *pipe = i->pipe;
469 size_t off;
470 int idx;
471 ssize_t left;
472
473 if (unlikely(size > i->count))
474 size = i->count;
475 if (unlikely(!size))
476 return 0;
477
478 left = size;
479 data_start(i, &idx, &off);
480 *idxp = idx;
481 *offp = off;
482 if (off) {
483 left -= PAGE_SIZE - off;
484 if (left <= 0) {
485 pipe->bufs[idx].len += size;
486 return size;
487 }
488 pipe->bufs[idx].len = PAGE_SIZE;
489 idx = next_idx(idx, pipe);
490 }
491 while (idx != pipe->curbuf || !pipe->nrbufs) {
492 struct page *page = alloc_page(GFP_USER);
493 if (!page)
494 break;
495 pipe->nrbufs++;
496 pipe->bufs[idx].ops = &default_pipe_buf_ops;
497 pipe->bufs[idx].page = page;
498 pipe->bufs[idx].offset = 0;
499 if (left <= PAGE_SIZE) {
500 pipe->bufs[idx].len = left;
501 return size;
502 }
503 pipe->bufs[idx].len = PAGE_SIZE;
504 left -= PAGE_SIZE;
505 idx = next_idx(idx, pipe);
506 }
507 return size - left;
508}
509
510static size_t copy_pipe_to_iter(const void *addr, size_t bytes,
511 struct iov_iter *i)
512{
513 struct pipe_inode_info *pipe = i->pipe;
514 size_t n, off;
515 int idx;
516
517 if (!sanity(i))
518 return 0;
519
520 bytes = n = push_pipe(i, bytes, &idx, &off);
521 if (unlikely(!n))
522 return 0;
523 for ( ; n; idx = next_idx(idx, pipe), off = 0) {
524 size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
525 memcpy_to_page(pipe->bufs[idx].page, off, addr, chunk);
526 i->idx = idx;
527 i->iov_offset = off + chunk;
528 n -= chunk;
529 addr += chunk;
530 }
531 i->count -= bytes;
532 return bytes;
533}
534
359size_t copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i) 535size_t copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
360{ 536{
361 const char *from = addr; 537 const char *from = addr;
538 if (unlikely(i->type & ITER_PIPE))
539 return copy_pipe_to_iter(addr, bytes, i);
362 iterate_and_advance(i, bytes, v, 540 iterate_and_advance(i, bytes, v,
363 __copy_to_user(v.iov_base, (from += v.iov_len) - v.iov_len, 541 __copy_to_user(v.iov_base, (from += v.iov_len) - v.iov_len,
364 v.iov_len), 542 v.iov_len),
@@ -374,6 +552,10 @@ EXPORT_SYMBOL(copy_to_iter);
374size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i) 552size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
375{ 553{
376 char *to = addr; 554 char *to = addr;
555 if (unlikely(i->type & ITER_PIPE)) {
556 WARN_ON(1);
557 return 0;
558 }
377 iterate_and_advance(i, bytes, v, 559 iterate_and_advance(i, bytes, v,
378 __copy_from_user((to += v.iov_len) - v.iov_len, v.iov_base, 560 __copy_from_user((to += v.iov_len) - v.iov_len, v.iov_base,
379 v.iov_len), 561 v.iov_len),
@@ -389,6 +571,10 @@ EXPORT_SYMBOL(copy_from_iter);
389size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i) 571size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
390{ 572{
391 char *to = addr; 573 char *to = addr;
574 if (unlikely(i->type & ITER_PIPE)) {
575 WARN_ON(1);
576 return 0;
577 }
392 iterate_and_advance(i, bytes, v, 578 iterate_and_advance(i, bytes, v,
393 __copy_from_user_nocache((to += v.iov_len) - v.iov_len, 579 __copy_from_user_nocache((to += v.iov_len) - v.iov_len,
394 v.iov_base, v.iov_len), 580 v.iov_base, v.iov_len),
@@ -409,14 +595,20 @@ size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
409 size_t wanted = copy_to_iter(kaddr + offset, bytes, i); 595 size_t wanted = copy_to_iter(kaddr + offset, bytes, i);
410 kunmap_atomic(kaddr); 596 kunmap_atomic(kaddr);
411 return wanted; 597 return wanted;
412 } else 598 } else if (likely(!(i->type & ITER_PIPE)))
413 return copy_page_to_iter_iovec(page, offset, bytes, i); 599 return copy_page_to_iter_iovec(page, offset, bytes, i);
600 else
601 return copy_page_to_iter_pipe(page, offset, bytes, i);
414} 602}
415EXPORT_SYMBOL(copy_page_to_iter); 603EXPORT_SYMBOL(copy_page_to_iter);
416 604
417size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes, 605size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
418 struct iov_iter *i) 606 struct iov_iter *i)
419{ 607{
608 if (unlikely(i->type & ITER_PIPE)) {
609 WARN_ON(1);
610 return 0;
611 }
420 if (i->type & (ITER_BVEC|ITER_KVEC)) { 612 if (i->type & (ITER_BVEC|ITER_KVEC)) {
421 void *kaddr = kmap_atomic(page); 613 void *kaddr = kmap_atomic(page);
422 size_t wanted = copy_from_iter(kaddr + offset, bytes, i); 614 size_t wanted = copy_from_iter(kaddr + offset, bytes, i);
@@ -427,8 +619,34 @@ size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
427} 619}
428EXPORT_SYMBOL(copy_page_from_iter); 620EXPORT_SYMBOL(copy_page_from_iter);
429 621
622static size_t pipe_zero(size_t bytes, struct iov_iter *i)
623{
624 struct pipe_inode_info *pipe = i->pipe;
625 size_t n, off;
626 int idx;
627
628 if (!sanity(i))
629 return 0;
630
631 bytes = n = push_pipe(i, bytes, &idx, &off);
632 if (unlikely(!n))
633 return 0;
634
635 for ( ; n; idx = next_idx(idx, pipe), off = 0) {
636 size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
637 memzero_page(pipe->bufs[idx].page, off, chunk);
638 i->idx = idx;
639 i->iov_offset = off + chunk;
640 n -= chunk;
641 }
642 i->count -= bytes;
643 return bytes;
644}
645
430size_t iov_iter_zero(size_t bytes, struct iov_iter *i) 646size_t iov_iter_zero(size_t bytes, struct iov_iter *i)
431{ 647{
648 if (unlikely(i->type & ITER_PIPE))
649 return pipe_zero(bytes, i);
432 iterate_and_advance(i, bytes, v, 650 iterate_and_advance(i, bytes, v,
433 __clear_user(v.iov_base, v.iov_len), 651 __clear_user(v.iov_base, v.iov_len),
434 memzero_page(v.bv_page, v.bv_offset, v.bv_len), 652 memzero_page(v.bv_page, v.bv_offset, v.bv_len),
@@ -443,6 +661,11 @@ size_t iov_iter_copy_from_user_atomic(struct page *page,
443 struct iov_iter *i, unsigned long offset, size_t bytes) 661 struct iov_iter *i, unsigned long offset, size_t bytes)
444{ 662{
445 char *kaddr = kmap_atomic(page), *p = kaddr + offset; 663 char *kaddr = kmap_atomic(page), *p = kaddr + offset;
664 if (unlikely(i->type & ITER_PIPE)) {
665 kunmap_atomic(kaddr);
666 WARN_ON(1);
667 return 0;
668 }
446 iterate_all_kinds(i, bytes, v, 669 iterate_all_kinds(i, bytes, v,
447 __copy_from_user_inatomic((p += v.iov_len) - v.iov_len, 670 __copy_from_user_inatomic((p += v.iov_len) - v.iov_len,
448 v.iov_base, v.iov_len), 671 v.iov_base, v.iov_len),
@@ -455,8 +678,49 @@ size_t iov_iter_copy_from_user_atomic(struct page *page,
455} 678}
456EXPORT_SYMBOL(iov_iter_copy_from_user_atomic); 679EXPORT_SYMBOL(iov_iter_copy_from_user_atomic);
457 680
681static void pipe_advance(struct iov_iter *i, size_t size)
682{
683 struct pipe_inode_info *pipe = i->pipe;
684 struct pipe_buffer *buf;
685 int idx = i->idx;
686 size_t off = i->iov_offset;
687
688 if (unlikely(i->count < size))
689 size = i->count;
690
691 if (size) {
692 if (off) /* make it relative to the beginning of buffer */
693 size += off - pipe->bufs[idx].offset;
694 while (1) {
695 buf = &pipe->bufs[idx];
696 if (size <= buf->len)
697 break;
698 size -= buf->len;
699 idx = next_idx(idx, pipe);
700 }
701 buf->len = size;
702 i->idx = idx;
703 off = i->iov_offset = buf->offset + size;
704 }
705 if (off)
706 idx = next_idx(idx, pipe);
707 if (pipe->nrbufs) {
708 int unused = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
709 /* [curbuf,unused) is in use. Free [idx,unused) */
710 while (idx != unused) {
711 pipe_buf_release(pipe, &pipe->bufs[idx]);
712 idx = next_idx(idx, pipe);
713 pipe->nrbufs--;
714 }
715 }
716}
717
458void iov_iter_advance(struct iov_iter *i, size_t size) 718void iov_iter_advance(struct iov_iter *i, size_t size)
459{ 719{
720 if (unlikely(i->type & ITER_PIPE)) {
721 pipe_advance(i, size);
722 return;
723 }
460 iterate_and_advance(i, size, v, 0, 0, 0) 724 iterate_and_advance(i, size, v, 0, 0, 0)
461} 725}
462EXPORT_SYMBOL(iov_iter_advance); 726EXPORT_SYMBOL(iov_iter_advance);
@@ -466,6 +730,8 @@ EXPORT_SYMBOL(iov_iter_advance);
466 */ 730 */
467size_t iov_iter_single_seg_count(const struct iov_iter *i) 731size_t iov_iter_single_seg_count(const struct iov_iter *i)
468{ 732{
733 if (unlikely(i->type & ITER_PIPE))
734 return i->count; // it is a silly place, anyway
469 if (i->nr_segs == 1) 735 if (i->nr_segs == 1)
470 return i->count; 736 return i->count;
471 else if (i->type & ITER_BVEC) 737 else if (i->type & ITER_BVEC)
@@ -501,6 +767,19 @@ void iov_iter_bvec(struct iov_iter *i, int direction,
501} 767}
502EXPORT_SYMBOL(iov_iter_bvec); 768EXPORT_SYMBOL(iov_iter_bvec);
503 769
770void iov_iter_pipe(struct iov_iter *i, int direction,
771 struct pipe_inode_info *pipe,
772 size_t count)
773{
774 BUG_ON(direction != ITER_PIPE);
775 i->type = direction;
776 i->pipe = pipe;
777 i->idx = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
778 i->iov_offset = 0;
779 i->count = count;
780}
781EXPORT_SYMBOL(iov_iter_pipe);
782
504unsigned long iov_iter_alignment(const struct iov_iter *i) 783unsigned long iov_iter_alignment(const struct iov_iter *i)
505{ 784{
506 unsigned long res = 0; 785 unsigned long res = 0;
@@ -509,6 +788,11 @@ unsigned long iov_iter_alignment(const struct iov_iter *i)
509 if (!size) 788 if (!size)
510 return 0; 789 return 0;
511 790
791 if (unlikely(i->type & ITER_PIPE)) {
792 if (i->iov_offset && allocated(&i->pipe->bufs[i->idx]))
793 return size | i->iov_offset;
794 return size;
795 }
512 iterate_all_kinds(i, size, v, 796 iterate_all_kinds(i, size, v,
513 (res |= (unsigned long)v.iov_base | v.iov_len, 0), 797 (res |= (unsigned long)v.iov_base | v.iov_len, 0),
514 res |= v.bv_offset | v.bv_len, 798 res |= v.bv_offset | v.bv_len,
@@ -525,6 +809,11 @@ unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
525 if (!size) 809 if (!size)
526 return 0; 810 return 0;
527 811
812 if (unlikely(i->type & ITER_PIPE)) {
813 WARN_ON(1);
814 return ~0U;
815 }
816
528 iterate_all_kinds(i, size, v, 817 iterate_all_kinds(i, size, v,
529 (res |= (!res ? 0 : (unsigned long)v.iov_base) | 818 (res |= (!res ? 0 : (unsigned long)v.iov_base) |
530 (size != v.iov_len ? size : 0), 0), 819 (size != v.iov_len ? size : 0), 0),
@@ -537,6 +826,47 @@ unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
537} 826}
538EXPORT_SYMBOL(iov_iter_gap_alignment); 827EXPORT_SYMBOL(iov_iter_gap_alignment);
539 828
829static inline size_t __pipe_get_pages(struct iov_iter *i,
830 size_t maxsize,
831 struct page **pages,
832 int idx,
833 size_t *start)
834{
835 struct pipe_inode_info *pipe = i->pipe;
836 ssize_t n = push_pipe(i, maxsize, &idx, start);
837 if (!n)
838 return -EFAULT;
839
840 maxsize = n;
841 n += *start;
842 while (n > 0) {
843 get_page(*pages++ = pipe->bufs[idx].page);
844 idx = next_idx(idx, pipe);
845 n -= PAGE_SIZE;
846 }
847
848 return maxsize;
849}
850
851static ssize_t pipe_get_pages(struct iov_iter *i,
852 struct page **pages, size_t maxsize, unsigned maxpages,
853 size_t *start)
854{
855 unsigned npages;
856 size_t capacity;
857 int idx;
858
859 if (!sanity(i))
860 return -EFAULT;
861
862 data_start(i, &idx, start);
863 /* some of this one + all after this one */
864 npages = ((i->pipe->curbuf - idx - 1) & (i->pipe->buffers - 1)) + 1;
865 capacity = min(npages,maxpages) * PAGE_SIZE - *start;
866
867 return __pipe_get_pages(i, min(maxsize, capacity), pages, idx, start);
868}
869
540ssize_t iov_iter_get_pages(struct iov_iter *i, 870ssize_t iov_iter_get_pages(struct iov_iter *i,
541 struct page **pages, size_t maxsize, unsigned maxpages, 871 struct page **pages, size_t maxsize, unsigned maxpages,
542 size_t *start) 872 size_t *start)
@@ -547,6 +877,8 @@ ssize_t iov_iter_get_pages(struct iov_iter *i,
547 if (!maxsize) 877 if (!maxsize)
548 return 0; 878 return 0;
549 879
880 if (unlikely(i->type & ITER_PIPE))
881 return pipe_get_pages(i, pages, maxsize, maxpages, start);
550 iterate_all_kinds(i, maxsize, v, ({ 882 iterate_all_kinds(i, maxsize, v, ({
551 unsigned long addr = (unsigned long)v.iov_base; 883 unsigned long addr = (unsigned long)v.iov_base;
552 size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1)); 884 size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
@@ -582,6 +914,37 @@ static struct page **get_pages_array(size_t n)
582 return p; 914 return p;
583} 915}
584 916
917static ssize_t pipe_get_pages_alloc(struct iov_iter *i,
918 struct page ***pages, size_t maxsize,
919 size_t *start)
920{
921 struct page **p;
922 size_t n;
923 int idx;
924 int npages;
925
926 if (!sanity(i))
927 return -EFAULT;
928
929 data_start(i, &idx, start);
930 /* some of this one + all after this one */
931 npages = ((i->pipe->curbuf - idx - 1) & (i->pipe->buffers - 1)) + 1;
932 n = npages * PAGE_SIZE - *start;
933 if (maxsize > n)
934 maxsize = n;
935 else
936 npages = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE);
937 p = get_pages_array(npages);
938 if (!p)
939 return -ENOMEM;
940 n = __pipe_get_pages(i, maxsize, p, idx, start);
941 if (n > 0)
942 *pages = p;
943 else
944 kvfree(p);
945 return n;
946}
947
585ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, 948ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
586 struct page ***pages, size_t maxsize, 949 struct page ***pages, size_t maxsize,
587 size_t *start) 950 size_t *start)
@@ -594,6 +957,8 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
594 if (!maxsize) 957 if (!maxsize)
595 return 0; 958 return 0;
596 959
960 if (unlikely(i->type & ITER_PIPE))
961 return pipe_get_pages_alloc(i, pages, maxsize, start);
597 iterate_all_kinds(i, maxsize, v, ({ 962 iterate_all_kinds(i, maxsize, v, ({
598 unsigned long addr = (unsigned long)v.iov_base; 963 unsigned long addr = (unsigned long)v.iov_base;
599 size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1)); 964 size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
@@ -635,6 +1000,10 @@ size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum,
635 __wsum sum, next; 1000 __wsum sum, next;
636 size_t off = 0; 1001 size_t off = 0;
637 sum = *csum; 1002 sum = *csum;
1003 if (unlikely(i->type & ITER_PIPE)) {
1004 WARN_ON(1);
1005 return 0;
1006 }
638 iterate_and_advance(i, bytes, v, ({ 1007 iterate_and_advance(i, bytes, v, ({
639 int err = 0; 1008 int err = 0;
640 next = csum_and_copy_from_user(v.iov_base, 1009 next = csum_and_copy_from_user(v.iov_base,
@@ -673,6 +1042,10 @@ size_t csum_and_copy_to_iter(const void *addr, size_t bytes, __wsum *csum,
673 __wsum sum, next; 1042 __wsum sum, next;
674 size_t off = 0; 1043 size_t off = 0;
675 sum = *csum; 1044 sum = *csum;
1045 if (unlikely(i->type & ITER_PIPE)) {
1046 WARN_ON(1); /* for now */
1047 return 0;
1048 }
676 iterate_and_advance(i, bytes, v, ({ 1049 iterate_and_advance(i, bytes, v, ({
677 int err = 0; 1050 int err = 0;
678 next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len, 1051 next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len,
@@ -712,7 +1085,20 @@ int iov_iter_npages(const struct iov_iter *i, int maxpages)
712 if (!size) 1085 if (!size)
713 return 0; 1086 return 0;
714 1087
715 iterate_all_kinds(i, size, v, ({ 1088 if (unlikely(i->type & ITER_PIPE)) {
1089 struct pipe_inode_info *pipe = i->pipe;
1090 size_t off;
1091 int idx;
1092
1093 if (!sanity(i))
1094 return 0;
1095
1096 data_start(i, &idx, &off);
1097 /* some of this one + all after this one */
1098 npages = ((pipe->curbuf - idx - 1) & (pipe->buffers - 1)) + 1;
1099 if (npages >= maxpages)
1100 return maxpages;
1101 } else iterate_all_kinds(i, size, v, ({
716 unsigned long p = (unsigned long)v.iov_base; 1102 unsigned long p = (unsigned long)v.iov_base;
717 npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE) 1103 npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE)
718 - p / PAGE_SIZE; 1104 - p / PAGE_SIZE;
@@ -737,6 +1123,10 @@ EXPORT_SYMBOL(iov_iter_npages);
737const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags) 1123const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
738{ 1124{
739 *new = *old; 1125 *new = *old;
1126 if (unlikely(new->type & ITER_PIPE)) {
1127 WARN_ON(1);
1128 return NULL;
1129 }
740 if (new->type & ITER_BVEC) 1130 if (new->type & ITER_BVEC)
741 return new->bvec = kmemdup(new->bvec, 1131 return new->bvec = kmemdup(new->bvec,
742 new->nr_segs * sizeof(struct bio_vec), 1132 new->nr_segs * sizeof(struct bio_vec),
@@ -749,6 +1139,28 @@ const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
749} 1139}
750EXPORT_SYMBOL(dup_iter); 1140EXPORT_SYMBOL(dup_iter);
751 1141
1142/**
1143 * import_iovec() - Copy an array of &struct iovec from userspace
1144 * into the kernel, check that it is valid, and initialize a new
1145 * &struct iov_iter iterator to access it.
1146 *
1147 * @type: One of %READ or %WRITE.
1148 * @uvector: Pointer to the userspace array.
1149 * @nr_segs: Number of elements in userspace array.
1150 * @fast_segs: Number of elements in @iov.
1151 * @iov: (input and output parameter) Pointer to pointer to (usually small
1152 * on-stack) kernel array.
1153 * @i: Pointer to iterator that will be initialized on success.
1154 *
1155 * If the array pointed to by *@iov is large enough to hold all @nr_segs,
1156 * then this function places %NULL in *@iov on return. Otherwise, a new
1157 * array will be allocated and the result placed in *@iov. This means that
1158 * the caller may call kfree() on *@iov regardless of whether the small
1159 * on-stack array was used or not (and regardless of whether this function
1160 * returns an error or not).
1161 *
1162 * Return: 0 on success or negative error code on error.
1163 */
752int import_iovec(int type, const struct iovec __user * uvector, 1164int import_iovec(int type, const struct iovec __user * uvector,
753 unsigned nr_segs, unsigned fast_segs, 1165 unsigned nr_segs, unsigned fast_segs,
754 struct iovec **iov, struct iov_iter *i) 1166 struct iovec **iov, struct iov_iter *i)
diff --git a/lib/irq_poll.c b/lib/irq_poll.c
index 2be55692aa43..1d6565e81030 100644
--- a/lib/irq_poll.c
+++ b/lib/irq_poll.c
@@ -74,7 +74,7 @@ void irq_poll_complete(struct irq_poll *iop)
74} 74}
75EXPORT_SYMBOL(irq_poll_complete); 75EXPORT_SYMBOL(irq_poll_complete);
76 76
77static void irq_poll_softirq(struct softirq_action *h) 77static void __latent_entropy irq_poll_softirq(struct softirq_action *h)
78{ 78{
79 struct list_head *list = this_cpu_ptr(&blk_cpu_iopoll); 79 struct list_head *list = this_cpu_ptr(&blk_cpu_iopoll);
80 int rearm = 0, budget = irq_poll_budget; 80 int rearm = 0, budget = irq_poll_budget;
diff --git a/lib/kstrtox.c b/lib/kstrtox.c
index d8a5cf66c316..b8e2080c1a47 100644
--- a/lib/kstrtox.c
+++ b/lib/kstrtox.c
@@ -48,11 +48,9 @@ unsigned int _parse_integer(const char *s, unsigned int base, unsigned long long
48{ 48{
49 unsigned long long res; 49 unsigned long long res;
50 unsigned int rv; 50 unsigned int rv;
51 int overflow;
52 51
53 res = 0; 52 res = 0;
54 rv = 0; 53 rv = 0;
55 overflow = 0;
56 while (*s) { 54 while (*s) {
57 unsigned int val; 55 unsigned int val;
58 56
@@ -71,15 +69,13 @@ unsigned int _parse_integer(const char *s, unsigned int base, unsigned long long
71 */ 69 */
72 if (unlikely(res & (~0ull << 60))) { 70 if (unlikely(res & (~0ull << 60))) {
73 if (res > div_u64(ULLONG_MAX - val, base)) 71 if (res > div_u64(ULLONG_MAX - val, base))
74 overflow = 1; 72 rv |= KSTRTOX_OVERFLOW;
75 } 73 }
76 res = res * base + val; 74 res = res * base + val;
77 rv++; 75 rv++;
78 s++; 76 s++;
79 } 77 }
80 *p = res; 78 *p = res;
81 if (overflow)
82 rv |= KSTRTOX_OVERFLOW;
83 return rv; 79 return rv;
84} 80}
85 81
diff --git a/lib/nmi_backtrace.c b/lib/nmi_backtrace.c
index 26caf51cc238..75554754eadf 100644
--- a/lib/nmi_backtrace.c
+++ b/lib/nmi_backtrace.c
@@ -16,21 +16,23 @@
16#include <linux/delay.h> 16#include <linux/delay.h>
17#include <linux/kprobes.h> 17#include <linux/kprobes.h>
18#include <linux/nmi.h> 18#include <linux/nmi.h>
19#include <linux/cpu.h>
19 20
20#ifdef arch_trigger_all_cpu_backtrace 21#ifdef arch_trigger_cpumask_backtrace
21/* For reliability, we're prepared to waste bits here. */ 22/* For reliability, we're prepared to waste bits here. */
22static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly; 23static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
23 24
24/* "in progress" flag of arch_trigger_all_cpu_backtrace */ 25/* "in progress" flag of arch_trigger_cpumask_backtrace */
25static unsigned long backtrace_flag; 26static unsigned long backtrace_flag;
26 27
27/* 28/*
28 * When raise() is called it will be is passed a pointer to the 29 * When raise() is called it will be passed a pointer to the
29 * backtrace_mask. Architectures that call nmi_cpu_backtrace() 30 * backtrace_mask. Architectures that call nmi_cpu_backtrace()
30 * directly from their raise() functions may rely on the mask 31 * directly from their raise() functions may rely on the mask
31 * they are passed being updated as a side effect of this call. 32 * they are passed being updated as a side effect of this call.
32 */ 33 */
33void nmi_trigger_all_cpu_backtrace(bool include_self, 34void nmi_trigger_cpumask_backtrace(const cpumask_t *mask,
35 bool exclude_self,
34 void (*raise)(cpumask_t *mask)) 36 void (*raise)(cpumask_t *mask))
35{ 37{
36 int i, this_cpu = get_cpu(); 38 int i, this_cpu = get_cpu();
@@ -44,13 +46,22 @@ void nmi_trigger_all_cpu_backtrace(bool include_self,
44 return; 46 return;
45 } 47 }
46 48
47 cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask); 49 cpumask_copy(to_cpumask(backtrace_mask), mask);
48 if (!include_self) 50 if (exclude_self)
49 cpumask_clear_cpu(this_cpu, to_cpumask(backtrace_mask)); 51 cpumask_clear_cpu(this_cpu, to_cpumask(backtrace_mask));
50 52
53 /*
54 * Don't try to send an NMI to this cpu; it may work on some
55 * architectures, but on others it may not, and we'll get
56 * information at least as useful just by doing a dump_stack() here.
57 * Note that nmi_cpu_backtrace(NULL) will clear the cpu bit.
58 */
59 if (cpumask_test_cpu(this_cpu, to_cpumask(backtrace_mask)))
60 nmi_cpu_backtrace(NULL);
61
51 if (!cpumask_empty(to_cpumask(backtrace_mask))) { 62 if (!cpumask_empty(to_cpumask(backtrace_mask))) {
52 pr_info("Sending NMI to %s CPUs:\n", 63 pr_info("Sending NMI from CPU %d to CPUs %*pbl:\n",
53 (include_self ? "all" : "other")); 64 this_cpu, nr_cpumask_bits, to_cpumask(backtrace_mask));
54 raise(to_cpumask(backtrace_mask)); 65 raise(to_cpumask(backtrace_mask));
55 } 66 }
56 67
@@ -77,11 +88,16 @@ bool nmi_cpu_backtrace(struct pt_regs *regs)
77 int cpu = smp_processor_id(); 88 int cpu = smp_processor_id();
78 89
79 if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) { 90 if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) {
80 pr_warn("NMI backtrace for cpu %d\n", cpu); 91 if (regs && cpu_in_idle(instruction_pointer(regs))) {
81 if (regs) 92 pr_warn("NMI backtrace for cpu %d skipped: idling at pc %#lx\n",
82 show_regs(regs); 93 cpu, instruction_pointer(regs));
83 else 94 } else {
84 dump_stack(); 95 pr_warn("NMI backtrace for cpu %d\n", cpu);
96 if (regs)
97 show_regs(regs);
98 else
99 dump_stack();
100 }
85 cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask)); 101 cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
86 return true; 102 return true;
87 } 103 }
diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c
index 27fe74948882..9ac959ef4cae 100644
--- a/lib/percpu-refcount.c
+++ b/lib/percpu-refcount.c
@@ -33,6 +33,7 @@
33 33
34#define PERCPU_COUNT_BIAS (1LU << (BITS_PER_LONG - 1)) 34#define PERCPU_COUNT_BIAS (1LU << (BITS_PER_LONG - 1))
35 35
36static DEFINE_SPINLOCK(percpu_ref_switch_lock);
36static DECLARE_WAIT_QUEUE_HEAD(percpu_ref_switch_waitq); 37static DECLARE_WAIT_QUEUE_HEAD(percpu_ref_switch_waitq);
37 38
38static unsigned long __percpu *percpu_count_ptr(struct percpu_ref *ref) 39static unsigned long __percpu *percpu_count_ptr(struct percpu_ref *ref)
@@ -82,6 +83,7 @@ int percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release,
82 atomic_long_set(&ref->count, start_count); 83 atomic_long_set(&ref->count, start_count);
83 84
84 ref->release = release; 85 ref->release = release;
86 ref->confirm_switch = NULL;
85 return 0; 87 return 0;
86} 88}
87EXPORT_SYMBOL_GPL(percpu_ref_init); 89EXPORT_SYMBOL_GPL(percpu_ref_init);
@@ -101,6 +103,8 @@ void percpu_ref_exit(struct percpu_ref *ref)
101 unsigned long __percpu *percpu_count = percpu_count_ptr(ref); 103 unsigned long __percpu *percpu_count = percpu_count_ptr(ref);
102 104
103 if (percpu_count) { 105 if (percpu_count) {
106 /* non-NULL confirm_switch indicates switching in progress */
107 WARN_ON_ONCE(ref->confirm_switch);
104 free_percpu(percpu_count); 108 free_percpu(percpu_count);
105 ref->percpu_count_ptr = __PERCPU_REF_ATOMIC_DEAD; 109 ref->percpu_count_ptr = __PERCPU_REF_ATOMIC_DEAD;
106 } 110 }
@@ -161,66 +165,23 @@ static void percpu_ref_noop_confirm_switch(struct percpu_ref *ref)
161static void __percpu_ref_switch_to_atomic(struct percpu_ref *ref, 165static void __percpu_ref_switch_to_atomic(struct percpu_ref *ref,
162 percpu_ref_func_t *confirm_switch) 166 percpu_ref_func_t *confirm_switch)
163{ 167{
164 if (!(ref->percpu_count_ptr & __PERCPU_REF_ATOMIC)) { 168 if (ref->percpu_count_ptr & __PERCPU_REF_ATOMIC) {
165 /* switching from percpu to atomic */ 169 if (confirm_switch)
166 ref->percpu_count_ptr |= __PERCPU_REF_ATOMIC; 170 confirm_switch(ref);
167 171 return;
168 /*
169 * Non-NULL ->confirm_switch is used to indicate that
170 * switching is in progress. Use noop one if unspecified.
171 */
172 WARN_ON_ONCE(ref->confirm_switch);
173 ref->confirm_switch =
174 confirm_switch ?: percpu_ref_noop_confirm_switch;
175
176 percpu_ref_get(ref); /* put after confirmation */
177 call_rcu_sched(&ref->rcu, percpu_ref_switch_to_atomic_rcu);
178 } else if (confirm_switch) {
179 /*
180 * Somebody already set ATOMIC. Switching may still be in
181 * progress. @confirm_switch must be invoked after the
182 * switching is complete and a full sched RCU grace period
183 * has passed. Wait synchronously for the previous
184 * switching and schedule @confirm_switch invocation.
185 */
186 wait_event(percpu_ref_switch_waitq, !ref->confirm_switch);
187 ref->confirm_switch = confirm_switch;
188
189 percpu_ref_get(ref); /* put after confirmation */
190 call_rcu_sched(&ref->rcu, percpu_ref_call_confirm_rcu);
191 } 172 }
192}
193 173
194/** 174 /* switching from percpu to atomic */
195 * percpu_ref_switch_to_atomic - switch a percpu_ref to atomic mode 175 ref->percpu_count_ptr |= __PERCPU_REF_ATOMIC;
196 * @ref: percpu_ref to switch to atomic mode 176
197 * @confirm_switch: optional confirmation callback 177 /*
198 * 178 * Non-NULL ->confirm_switch is used to indicate that switching is
199 * There's no reason to use this function for the usual reference counting. 179 * in progress. Use noop one if unspecified.
200 * Use percpu_ref_kill[_and_confirm](). 180 */
201 * 181 ref->confirm_switch = confirm_switch ?: percpu_ref_noop_confirm_switch;
202 * Schedule switching of @ref to atomic mode. All its percpu counts will 182
203 * be collected to the main atomic counter. On completion, when all CPUs 183 percpu_ref_get(ref); /* put after confirmation */
204 * are guaraneed to be in atomic mode, @confirm_switch, which may not 184 call_rcu_sched(&ref->rcu, percpu_ref_switch_to_atomic_rcu);
205 * block, is invoked. This function may be invoked concurrently with all
206 * the get/put operations and can safely be mixed with kill and reinit
207 * operations. Note that @ref will stay in atomic mode across kill/reinit
208 * cycles until percpu_ref_switch_to_percpu() is called.
209 *
210 * This function normally doesn't block and can be called from any context
211 * but it may block if @confirm_kill is specified and @ref is already in
212 * the process of switching to atomic mode. In such cases, @confirm_switch
213 * will be invoked after the switching is complete.
214 *
215 * Due to the way percpu_ref is implemented, @confirm_switch will be called
216 * after at least one full sched RCU grace period has passed but this is an
217 * implementation detail and must not be depended upon.
218 */
219void percpu_ref_switch_to_atomic(struct percpu_ref *ref,
220 percpu_ref_func_t *confirm_switch)
221{
222 ref->force_atomic = true;
223 __percpu_ref_switch_to_atomic(ref, confirm_switch);
224} 185}
225 186
226static void __percpu_ref_switch_to_percpu(struct percpu_ref *ref) 187static void __percpu_ref_switch_to_percpu(struct percpu_ref *ref)
@@ -233,8 +194,6 @@ static void __percpu_ref_switch_to_percpu(struct percpu_ref *ref)
233 if (!(ref->percpu_count_ptr & __PERCPU_REF_ATOMIC)) 194 if (!(ref->percpu_count_ptr & __PERCPU_REF_ATOMIC))
234 return; 195 return;
235 196
236 wait_event(percpu_ref_switch_waitq, !ref->confirm_switch);
237
238 atomic_long_add(PERCPU_COUNT_BIAS, &ref->count); 197 atomic_long_add(PERCPU_COUNT_BIAS, &ref->count);
239 198
240 /* 199 /*
@@ -250,6 +209,58 @@ static void __percpu_ref_switch_to_percpu(struct percpu_ref *ref)
250 ref->percpu_count_ptr & ~__PERCPU_REF_ATOMIC); 209 ref->percpu_count_ptr & ~__PERCPU_REF_ATOMIC);
251} 210}
252 211
212static void __percpu_ref_switch_mode(struct percpu_ref *ref,
213 percpu_ref_func_t *confirm_switch)
214{
215 lockdep_assert_held(&percpu_ref_switch_lock);
216
217 /*
218 * If the previous ATOMIC switching hasn't finished yet, wait for
219 * its completion. If the caller ensures that ATOMIC switching
220 * isn't in progress, this function can be called from any context.
221 */
222 wait_event_lock_irq(percpu_ref_switch_waitq, !ref->confirm_switch,
223 percpu_ref_switch_lock);
224
225 if (ref->force_atomic || (ref->percpu_count_ptr & __PERCPU_REF_DEAD))
226 __percpu_ref_switch_to_atomic(ref, confirm_switch);
227 else
228 __percpu_ref_switch_to_percpu(ref);
229}
230
231/**
232 * percpu_ref_switch_to_atomic - switch a percpu_ref to atomic mode
233 * @ref: percpu_ref to switch to atomic mode
234 * @confirm_switch: optional confirmation callback
235 *
236 * There's no reason to use this function for the usual reference counting.
237 * Use percpu_ref_kill[_and_confirm]().
238 *
239 * Schedule switching of @ref to atomic mode. All its percpu counts will
240 * be collected to the main atomic counter. On completion, when all CPUs
241 * are guaraneed to be in atomic mode, @confirm_switch, which may not
242 * block, is invoked. This function may be invoked concurrently with all
243 * the get/put operations and can safely be mixed with kill and reinit
244 * operations. Note that @ref will stay in atomic mode across kill/reinit
245 * cycles until percpu_ref_switch_to_percpu() is called.
246 *
247 * This function may block if @ref is in the process of switching to atomic
248 * mode. If the caller ensures that @ref is not in the process of
249 * switching to atomic mode, this function can be called from any context.
250 */
251void percpu_ref_switch_to_atomic(struct percpu_ref *ref,
252 percpu_ref_func_t *confirm_switch)
253{
254 unsigned long flags;
255
256 spin_lock_irqsave(&percpu_ref_switch_lock, flags);
257
258 ref->force_atomic = true;
259 __percpu_ref_switch_mode(ref, confirm_switch);
260
261 spin_unlock_irqrestore(&percpu_ref_switch_lock, flags);
262}
263
253/** 264/**
254 * percpu_ref_switch_to_percpu - switch a percpu_ref to percpu mode 265 * percpu_ref_switch_to_percpu - switch a percpu_ref to percpu mode
255 * @ref: percpu_ref to switch to percpu mode 266 * @ref: percpu_ref to switch to percpu mode
@@ -264,17 +275,20 @@ static void __percpu_ref_switch_to_percpu(struct percpu_ref *ref)
264 * dying or dead, the actual switching takes place on the following 275 * dying or dead, the actual switching takes place on the following
265 * percpu_ref_reinit(). 276 * percpu_ref_reinit().
266 * 277 *
267 * This function normally doesn't block and can be called from any context 278 * This function may block if @ref is in the process of switching to atomic
268 * but it may block if @ref is in the process of switching to atomic mode 279 * mode. If the caller ensures that @ref is not in the process of
269 * by percpu_ref_switch_atomic(). 280 * switching to atomic mode, this function can be called from any context.
270 */ 281 */
271void percpu_ref_switch_to_percpu(struct percpu_ref *ref) 282void percpu_ref_switch_to_percpu(struct percpu_ref *ref)
272{ 283{
284 unsigned long flags;
285
286 spin_lock_irqsave(&percpu_ref_switch_lock, flags);
287
273 ref->force_atomic = false; 288 ref->force_atomic = false;
289 __percpu_ref_switch_mode(ref, NULL);
274 290
275 /* a dying or dead ref can't be switched to percpu mode w/o reinit */ 291 spin_unlock_irqrestore(&percpu_ref_switch_lock, flags);
276 if (!(ref->percpu_count_ptr & __PERCPU_REF_DEAD))
277 __percpu_ref_switch_to_percpu(ref);
278} 292}
279 293
280/** 294/**
@@ -290,21 +304,23 @@ void percpu_ref_switch_to_percpu(struct percpu_ref *ref)
290 * 304 *
291 * This function normally doesn't block and can be called from any context 305 * This function normally doesn't block and can be called from any context
292 * but it may block if @confirm_kill is specified and @ref is in the 306 * but it may block if @confirm_kill is specified and @ref is in the
293 * process of switching to atomic mode by percpu_ref_switch_atomic(). 307 * process of switching to atomic mode by percpu_ref_switch_to_atomic().
294 *
295 * Due to the way percpu_ref is implemented, @confirm_switch will be called
296 * after at least one full sched RCU grace period has passed but this is an
297 * implementation detail and must not be depended upon.
298 */ 308 */
299void percpu_ref_kill_and_confirm(struct percpu_ref *ref, 309void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
300 percpu_ref_func_t *confirm_kill) 310 percpu_ref_func_t *confirm_kill)
301{ 311{
312 unsigned long flags;
313
314 spin_lock_irqsave(&percpu_ref_switch_lock, flags);
315
302 WARN_ONCE(ref->percpu_count_ptr & __PERCPU_REF_DEAD, 316 WARN_ONCE(ref->percpu_count_ptr & __PERCPU_REF_DEAD,
303 "%s called more than once on %pf!", __func__, ref->release); 317 "%s called more than once on %pf!", __func__, ref->release);
304 318
305 ref->percpu_count_ptr |= __PERCPU_REF_DEAD; 319 ref->percpu_count_ptr |= __PERCPU_REF_DEAD;
306 __percpu_ref_switch_to_atomic(ref, confirm_kill); 320 __percpu_ref_switch_mode(ref, confirm_kill);
307 percpu_ref_put(ref); 321 percpu_ref_put(ref);
322
323 spin_unlock_irqrestore(&percpu_ref_switch_lock, flags);
308} 324}
309EXPORT_SYMBOL_GPL(percpu_ref_kill_and_confirm); 325EXPORT_SYMBOL_GPL(percpu_ref_kill_and_confirm);
310 326
@@ -321,11 +337,16 @@ EXPORT_SYMBOL_GPL(percpu_ref_kill_and_confirm);
321 */ 337 */
322void percpu_ref_reinit(struct percpu_ref *ref) 338void percpu_ref_reinit(struct percpu_ref *ref)
323{ 339{
340 unsigned long flags;
341
342 spin_lock_irqsave(&percpu_ref_switch_lock, flags);
343
324 WARN_ON_ONCE(!percpu_ref_is_zero(ref)); 344 WARN_ON_ONCE(!percpu_ref_is_zero(ref));
325 345
326 ref->percpu_count_ptr &= ~__PERCPU_REF_DEAD; 346 ref->percpu_count_ptr &= ~__PERCPU_REF_DEAD;
327 percpu_ref_get(ref); 347 percpu_ref_get(ref);
328 if (!ref->force_atomic) 348 __percpu_ref_switch_mode(ref, NULL);
329 __percpu_ref_switch_to_percpu(ref); 349
350 spin_unlock_irqrestore(&percpu_ref_switch_lock, flags);
330} 351}
331EXPORT_SYMBOL_GPL(percpu_ref_reinit); 352EXPORT_SYMBOL_GPL(percpu_ref_reinit);
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 91f0727e3cad..8e6d552c40dd 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -1583,15 +1583,10 @@ void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
1583} 1583}
1584EXPORT_SYMBOL(radix_tree_delete); 1584EXPORT_SYMBOL(radix_tree_delete);
1585 1585
1586struct radix_tree_node *radix_tree_replace_clear_tags( 1586void radix_tree_clear_tags(struct radix_tree_root *root,
1587 struct radix_tree_root *root, 1587 struct radix_tree_node *node,
1588 unsigned long index, void *entry) 1588 void **slot)
1589{ 1589{
1590 struct radix_tree_node *node;
1591 void **slot;
1592
1593 __radix_tree_lookup(root, index, &node, &slot);
1594
1595 if (node) { 1590 if (node) {
1596 unsigned int tag, offset = get_slot_offset(node, slot); 1591 unsigned int tag, offset = get_slot_offset(node, slot);
1597 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) 1592 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
@@ -1600,9 +1595,6 @@ struct radix_tree_node *radix_tree_replace_clear_tags(
1600 /* Clear root node tags */ 1595 /* Clear root node tags */
1601 root->gfp_mask &= __GFP_BITS_MASK; 1596 root->gfp_mask &= __GFP_BITS_MASK;
1602 } 1597 }
1603
1604 radix_tree_replace_slot(slot, entry);
1605 return node;
1606} 1598}
1607 1599
1608/** 1600/**
diff --git a/lib/raid6/.gitignore b/lib/raid6/.gitignore
index 0a7e494b2bcd..f01b1cb04f91 100644
--- a/lib/raid6/.gitignore
+++ b/lib/raid6/.gitignore
@@ -3,3 +3,4 @@ altivec*.c
3int*.c 3int*.c
4tables.c 4tables.c
5neon?.c 5neon?.c
6s390vx?.c
diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile
index 3b10a48fa040..3057011f5599 100644
--- a/lib/raid6/Makefile
+++ b/lib/raid6/Makefile
@@ -3,10 +3,11 @@ obj-$(CONFIG_RAID6_PQ) += raid6_pq.o
3raid6_pq-y += algos.o recov.o tables.o int1.o int2.o int4.o \ 3raid6_pq-y += algos.o recov.o tables.o int1.o int2.o int4.o \
4 int8.o int16.o int32.o 4 int8.o int16.o int32.o
5 5
6raid6_pq-$(CONFIG_X86) += recov_ssse3.o recov_avx2.o mmx.o sse1.o sse2.o avx2.o 6raid6_pq-$(CONFIG_X86) += recov_ssse3.o recov_avx2.o mmx.o sse1.o sse2.o avx2.o avx512.o recov_avx512.o
7raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o 7raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o
8raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o 8raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o
9raid6_pq-$(CONFIG_TILEGX) += tilegx8.o 9raid6_pq-$(CONFIG_TILEGX) += tilegx8.o
10raid6_pq-$(CONFIG_S390) += s390vx8.o recov_s390xc.o
10 11
11hostprogs-y += mktables 12hostprogs-y += mktables
12 13
@@ -116,6 +117,11 @@ $(obj)/tilegx8.c: UNROLL := 8
116$(obj)/tilegx8.c: $(src)/tilegx.uc $(src)/unroll.awk FORCE 117$(obj)/tilegx8.c: $(src)/tilegx.uc $(src)/unroll.awk FORCE
117 $(call if_changed,unroll) 118 $(call if_changed,unroll)
118 119
120targets += s390vx8.c
121$(obj)/s390vx8.c: UNROLL := 8
122$(obj)/s390vx8.c: $(src)/s390vx.uc $(src)/unroll.awk FORCE
123 $(call if_changed,unroll)
124
119quiet_cmd_mktable = TABLE $@ 125quiet_cmd_mktable = TABLE $@
120 cmd_mktable = $(obj)/mktables > $@ || ( rm -f $@ && exit 1 ) 126 cmd_mktable = $(obj)/mktables > $@ || ( rm -f $@ && exit 1 )
121 127
diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c
index 975c6e0434bd..7857049fd7d3 100644
--- a/lib/raid6/algos.c
+++ b/lib/raid6/algos.c
@@ -49,6 +49,10 @@ const struct raid6_calls * const raid6_algos[] = {
49 &raid6_avx2x1, 49 &raid6_avx2x1,
50 &raid6_avx2x2, 50 &raid6_avx2x2,
51#endif 51#endif
52#ifdef CONFIG_AS_AVX512
53 &raid6_avx512x1,
54 &raid6_avx512x2,
55#endif
52#endif 56#endif
53#if defined(__x86_64__) && !defined(__arch_um__) 57#if defined(__x86_64__) && !defined(__arch_um__)
54 &raid6_sse2x1, 58 &raid6_sse2x1,
@@ -59,6 +63,11 @@ const struct raid6_calls * const raid6_algos[] = {
59 &raid6_avx2x2, 63 &raid6_avx2x2,
60 &raid6_avx2x4, 64 &raid6_avx2x4,
61#endif 65#endif
66#ifdef CONFIG_AS_AVX512
67 &raid6_avx512x1,
68 &raid6_avx512x2,
69 &raid6_avx512x4,
70#endif
62#endif 71#endif
63#ifdef CONFIG_ALTIVEC 72#ifdef CONFIG_ALTIVEC
64 &raid6_altivec1, 73 &raid6_altivec1,
@@ -69,6 +78,9 @@ const struct raid6_calls * const raid6_algos[] = {
69#if defined(CONFIG_TILEGX) 78#if defined(CONFIG_TILEGX)
70 &raid6_tilegx8, 79 &raid6_tilegx8,
71#endif 80#endif
81#if defined(CONFIG_S390)
82 &raid6_s390vx8,
83#endif
72 &raid6_intx1, 84 &raid6_intx1,
73 &raid6_intx2, 85 &raid6_intx2,
74 &raid6_intx4, 86 &raid6_intx4,
@@ -89,12 +101,18 @@ void (*raid6_datap_recov)(int, size_t, int, void **);
89EXPORT_SYMBOL_GPL(raid6_datap_recov); 101EXPORT_SYMBOL_GPL(raid6_datap_recov);
90 102
91const struct raid6_recov_calls *const raid6_recov_algos[] = { 103const struct raid6_recov_calls *const raid6_recov_algos[] = {
104#ifdef CONFIG_AS_AVX512
105 &raid6_recov_avx512,
106#endif
92#ifdef CONFIG_AS_AVX2 107#ifdef CONFIG_AS_AVX2
93 &raid6_recov_avx2, 108 &raid6_recov_avx2,
94#endif 109#endif
95#ifdef CONFIG_AS_SSSE3 110#ifdef CONFIG_AS_SSSE3
96 &raid6_recov_ssse3, 111 &raid6_recov_ssse3,
97#endif 112#endif
113#ifdef CONFIG_S390
114 &raid6_recov_s390xc,
115#endif
98 &raid6_recov_intx1, 116 &raid6_recov_intx1,
99 NULL 117 NULL
100}; 118};
diff --git a/lib/raid6/avx512.c b/lib/raid6/avx512.c
new file mode 100644
index 000000000000..f524a7972006
--- /dev/null
+++ b/lib/raid6/avx512.c
@@ -0,0 +1,569 @@
1/* -*- linux-c -*- --------------------------------------------------------
2 *
3 * Copyright (C) 2016 Intel Corporation
4 *
5 * Author: Gayatri Kammela <gayatri.kammela@intel.com>
6 * Author: Megha Dey <megha.dey@linux.intel.com>
7 *
8 * Based on avx2.c: Copyright 2012 Yuanhan Liu All Rights Reserved
9 * Based on sse2.c: Copyright 2002 H. Peter Anvin - All Rights Reserved
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
14 * Boston MA 02111-1307, USA; either version 2 of the License, or
15 * (at your option) any later version; incorporated herein by reference.
16 *
17 * -----------------------------------------------------------------------
18 */
19
20/*
21 * AVX512 implementation of RAID-6 syndrome functions
22 *
23 */
24
25#ifdef CONFIG_AS_AVX512
26
27#include <linux/raid/pq.h>
28#include "x86.h"
29
30static const struct raid6_avx512_constants {
31 u64 x1d[8];
32} raid6_avx512_constants __aligned(512) = {
33 { 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,
34 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,
35 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,
36 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,},
37};
38
39static int raid6_have_avx512(void)
40{
41 return boot_cpu_has(X86_FEATURE_AVX2) &&
42 boot_cpu_has(X86_FEATURE_AVX) &&
43 boot_cpu_has(X86_FEATURE_AVX512F) &&
44 boot_cpu_has(X86_FEATURE_AVX512BW) &&
45 boot_cpu_has(X86_FEATURE_AVX512VL) &&
46 boot_cpu_has(X86_FEATURE_AVX512DQ);
47}
48
49static void raid6_avx5121_gen_syndrome(int disks, size_t bytes, void **ptrs)
50{
51 u8 **dptr = (u8 **)ptrs;
52 u8 *p, *q;
53 int d, z, z0;
54
55 z0 = disks - 3; /* Highest data disk */
56 p = dptr[z0+1]; /* XOR parity */
57 q = dptr[z0+2]; /* RS syndrome */
58
59 kernel_fpu_begin();
60
61 asm volatile("vmovdqa64 %0,%%zmm0\n\t"
62 "vpxorq %%zmm1,%%zmm1,%%zmm1" /* Zero temp */
63 :
64 : "m" (raid6_avx512_constants.x1d[0]));
65
66 for (d = 0; d < bytes; d += 64) {
67 asm volatile("prefetchnta %0\n\t"
68 "vmovdqa64 %0,%%zmm2\n\t" /* P[0] */
69 "prefetchnta %1\n\t"
70 "vmovdqa64 %%zmm2,%%zmm4\n\t" /* Q[0] */
71 "vmovdqa64 %1,%%zmm6"
72 :
73 : "m" (dptr[z0][d]), "m" (dptr[z0-1][d]));
74 for (z = z0-2; z >= 0; z--) {
75 asm volatile("prefetchnta %0\n\t"
76 "vpcmpgtb %%zmm4,%%zmm1,%%k1\n\t"
77 "vpmovm2b %%k1,%%zmm5\n\t"
78 "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
79 "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
80 "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
81 "vpxorq %%zmm6,%%zmm2,%%zmm2\n\t"
82 "vpxorq %%zmm6,%%zmm4,%%zmm4\n\t"
83 "vmovdqa64 %0,%%zmm6"
84 :
85 : "m" (dptr[z][d]));
86 }
87 asm volatile("vpcmpgtb %%zmm4,%%zmm1,%%k1\n\t"
88 "vpmovm2b %%k1,%%zmm5\n\t"
89 "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
90 "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
91 "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
92 "vpxorq %%zmm6,%%zmm2,%%zmm2\n\t"
93 "vpxorq %%zmm6,%%zmm4,%%zmm4\n\t"
94 "vmovntdq %%zmm2,%0\n\t"
95 "vpxorq %%zmm2,%%zmm2,%%zmm2\n\t"
96 "vmovntdq %%zmm4,%1\n\t"
97 "vpxorq %%zmm4,%%zmm4,%%zmm4"
98 :
99 : "m" (p[d]), "m" (q[d]));
100 }
101
102 asm volatile("sfence" : : : "memory");
103 kernel_fpu_end();
104}
105
106static void raid6_avx5121_xor_syndrome(int disks, int start, int stop,
107 size_t bytes, void **ptrs)
108{
109 u8 **dptr = (u8 **)ptrs;
110 u8 *p, *q;
111 int d, z, z0;
112
113 z0 = stop; /* P/Q right side optimization */
114 p = dptr[disks-2]; /* XOR parity */
115 q = dptr[disks-1]; /* RS syndrome */
116
117 kernel_fpu_begin();
118
119 asm volatile("vmovdqa64 %0,%%zmm0"
120 : : "m" (raid6_avx512_constants.x1d[0]));
121
122 for (d = 0 ; d < bytes ; d += 64) {
123 asm volatile("vmovdqa64 %0,%%zmm4\n\t"
124 "vmovdqa64 %1,%%zmm2\n\t"
125 "vpxorq %%zmm4,%%zmm2,%%zmm2"
126 :
127 : "m" (dptr[z0][d]), "m" (p[d]));
128 /* P/Q data pages */
129 for (z = z0-1 ; z >= start ; z--) {
130 asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t"
131 "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t"
132 "vpmovm2b %%k1,%%zmm5\n\t"
133 "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
134 "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
135 "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
136 "vmovdqa64 %0,%%zmm5\n\t"
137 "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t"
138 "vpxorq %%zmm5,%%zmm4,%%zmm4"
139 :
140 : "m" (dptr[z][d]));
141 }
142 /* P/Q left side optimization */
143 for (z = start-1 ; z >= 0 ; z--) {
144 asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t"
145 "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t"
146 "vpmovm2b %%k1,%%zmm5\n\t"
147 "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
148 "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
149 "vpxorq %%zmm5,%%zmm4,%%zmm4"
150 :
151 : );
152 }
153 asm volatile("vpxorq %0,%%zmm4,%%zmm4\n\t"
154 /* Don't use movntdq for r/w memory area < cache line */
155 "vmovdqa64 %%zmm4,%0\n\t"
156 "vmovdqa64 %%zmm2,%1"
157 :
158 : "m" (q[d]), "m" (p[d]));
159 }
160
161 asm volatile("sfence" : : : "memory");
162 kernel_fpu_end();
163}
164
165const struct raid6_calls raid6_avx512x1 = {
166 raid6_avx5121_gen_syndrome,
167 raid6_avx5121_xor_syndrome,
168 raid6_have_avx512,
169 "avx512x1",
170 1 /* Has cache hints */
171};
172
173/*
174 * Unrolled-by-2 AVX512 implementation
175 */
176static void raid6_avx5122_gen_syndrome(int disks, size_t bytes, void **ptrs)
177{
178 u8 **dptr = (u8 **)ptrs;
179 u8 *p, *q;
180 int d, z, z0;
181
182 z0 = disks - 3; /* Highest data disk */
183 p = dptr[z0+1]; /* XOR parity */
184 q = dptr[z0+2]; /* RS syndrome */
185
186 kernel_fpu_begin();
187
188 asm volatile("vmovdqa64 %0,%%zmm0\n\t"
189 "vpxorq %%zmm1,%%zmm1,%%zmm1" /* Zero temp */
190 :
191 : "m" (raid6_avx512_constants.x1d[0]));
192
193 /* We uniformly assume a single prefetch covers at least 64 bytes */
194 for (d = 0; d < bytes; d += 128) {
195 asm volatile("prefetchnta %0\n\t"
196 "prefetchnta %1\n\t"
197 "vmovdqa64 %0,%%zmm2\n\t" /* P[0] */
198 "vmovdqa64 %1,%%zmm3\n\t" /* P[1] */
199 "vmovdqa64 %%zmm2,%%zmm4\n\t" /* Q[0] */
200 "vmovdqa64 %%zmm3,%%zmm6" /* Q[1] */
201 :
202 : "m" (dptr[z0][d]), "m" (dptr[z0][d+64]));
203 for (z = z0-1; z >= 0; z--) {
204 asm volatile("prefetchnta %0\n\t"
205 "prefetchnta %1\n\t"
206 "vpcmpgtb %%zmm4,%%zmm1,%%k1\n\t"
207 "vpcmpgtb %%zmm6,%%zmm1,%%k2\n\t"
208 "vpmovm2b %%k1,%%zmm5\n\t"
209 "vpmovm2b %%k2,%%zmm7\n\t"
210 "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
211 "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t"
212 "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
213 "vpandq %%zmm0,%%zmm7,%%zmm7\n\t"
214 "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
215 "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
216 "vmovdqa64 %0,%%zmm5\n\t"
217 "vmovdqa64 %1,%%zmm7\n\t"
218 "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t"
219 "vpxorq %%zmm7,%%zmm3,%%zmm3\n\t"
220 "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
221 "vpxorq %%zmm7,%%zmm6,%%zmm6"
222 :
223 : "m" (dptr[z][d]), "m" (dptr[z][d+64]));
224 }
225 asm volatile("vmovntdq %%zmm2,%0\n\t"
226 "vmovntdq %%zmm3,%1\n\t"
227 "vmovntdq %%zmm4,%2\n\t"
228 "vmovntdq %%zmm6,%3"
229 :
230 : "m" (p[d]), "m" (p[d+64]), "m" (q[d]),
231 "m" (q[d+64]));
232 }
233
234 asm volatile("sfence" : : : "memory");
235 kernel_fpu_end();
236}
237
238static void raid6_avx5122_xor_syndrome(int disks, int start, int stop,
239 size_t bytes, void **ptrs)
240{
241 u8 **dptr = (u8 **)ptrs;
242 u8 *p, *q;
243 int d, z, z0;
244
245 z0 = stop; /* P/Q right side optimization */
246 p = dptr[disks-2]; /* XOR parity */
247 q = dptr[disks-1]; /* RS syndrome */
248
249 kernel_fpu_begin();
250
251 asm volatile("vmovdqa64 %0,%%zmm0"
252 : : "m" (raid6_avx512_constants.x1d[0]));
253
254 for (d = 0 ; d < bytes ; d += 128) {
255 asm volatile("vmovdqa64 %0,%%zmm4\n\t"
256 "vmovdqa64 %1,%%zmm6\n\t"
257 "vmovdqa64 %2,%%zmm2\n\t"
258 "vmovdqa64 %3,%%zmm3\n\t"
259 "vpxorq %%zmm4,%%zmm2,%%zmm2\n\t"
260 "vpxorq %%zmm6,%%zmm3,%%zmm3"
261 :
262 : "m" (dptr[z0][d]), "m" (dptr[z0][d+64]),
263 "m" (p[d]), "m" (p[d+64]));
264 /* P/Q data pages */
265 for (z = z0-1 ; z >= start ; z--) {
266 asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t"
267 "vpxorq %%zmm7,%%zmm7,%%zmm7\n\t"
268 "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t"
269 "vpcmpgtb %%zmm6,%%zmm7,%%k2\n\t"
270 "vpmovm2b %%k1,%%zmm5\n\t"
271 "vpmovm2b %%k2,%%zmm7\n\t"
272 "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
273 "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t"
274 "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
275 "vpandq %%zmm0,%%zmm7,%%zmm7\n\t"
276 "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
277 "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
278 "vmovdqa64 %0,%%zmm5\n\t"
279 "vmovdqa64 %1,%%zmm7\n\t"
280 "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t"
281 "vpxorq %%zmm7,%%zmm3,%%zmm3\n\t"
282 "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
283 "vpxorq %%zmm7,%%zmm6,%%zmm6"
284 :
285 : "m" (dptr[z][d]), "m" (dptr[z][d+64]));
286 }
287 /* P/Q left side optimization */
288 for (z = start-1 ; z >= 0 ; z--) {
289 asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t"
290 "vpxorq %%zmm7,%%zmm7,%%zmm7\n\t"
291 "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t"
292 "vpcmpgtb %%zmm6,%%zmm7,%%k2\n\t"
293 "vpmovm2b %%k1,%%zmm5\n\t"
294 "vpmovm2b %%k2,%%zmm7\n\t"
295 "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
296 "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t"
297 "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
298 "vpandq %%zmm0,%%zmm7,%%zmm7\n\t"
299 "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
300 "vpxorq %%zmm7,%%zmm6,%%zmm6"
301 :
302 : );
303 }
304 asm volatile("vpxorq %0,%%zmm4,%%zmm4\n\t"
305 "vpxorq %1,%%zmm6,%%zmm6\n\t"
306 /* Don't use movntdq for r/w
307 * memory area < cache line
308 */
309 "vmovdqa64 %%zmm4,%0\n\t"
310 "vmovdqa64 %%zmm6,%1\n\t"
311 "vmovdqa64 %%zmm2,%2\n\t"
312 "vmovdqa64 %%zmm3,%3"
313 :
314 : "m" (q[d]), "m" (q[d+64]), "m" (p[d]),
315 "m" (p[d+64]));
316 }
317
318 asm volatile("sfence" : : : "memory");
319 kernel_fpu_end();
320}
321
322const struct raid6_calls raid6_avx512x2 = {
323 raid6_avx5122_gen_syndrome,
324 raid6_avx5122_xor_syndrome,
325 raid6_have_avx512,
326 "avx512x2",
327 1 /* Has cache hints */
328};
329
330#ifdef CONFIG_X86_64
331
332/*
333 * Unrolled-by-4 AVX2 implementation
334 */
335static void raid6_avx5124_gen_syndrome(int disks, size_t bytes, void **ptrs)
336{
337 u8 **dptr = (u8 **)ptrs;
338 u8 *p, *q;
339 int d, z, z0;
340
341 z0 = disks - 3; /* Highest data disk */
342 p = dptr[z0+1]; /* XOR parity */
343 q = dptr[z0+2]; /* RS syndrome */
344
345 kernel_fpu_begin();
346
347 asm volatile("vmovdqa64 %0,%%zmm0\n\t"
348 "vpxorq %%zmm1,%%zmm1,%%zmm1\n\t" /* Zero temp */
349 "vpxorq %%zmm2,%%zmm2,%%zmm2\n\t" /* P[0] */
350 "vpxorq %%zmm3,%%zmm3,%%zmm3\n\t" /* P[1] */
351 "vpxorq %%zmm4,%%zmm4,%%zmm4\n\t" /* Q[0] */
352 "vpxorq %%zmm6,%%zmm6,%%zmm6\n\t" /* Q[1] */
353 "vpxorq %%zmm10,%%zmm10,%%zmm10\n\t" /* P[2] */
354 "vpxorq %%zmm11,%%zmm11,%%zmm11\n\t" /* P[3] */
355 "vpxorq %%zmm12,%%zmm12,%%zmm12\n\t" /* Q[2] */
356 "vpxorq %%zmm14,%%zmm14,%%zmm14" /* Q[3] */
357 :
358 : "m" (raid6_avx512_constants.x1d[0]));
359
360 for (d = 0; d < bytes; d += 256) {
361 for (z = z0; z >= 0; z--) {
362 asm volatile("prefetchnta %0\n\t"
363 "prefetchnta %1\n\t"
364 "prefetchnta %2\n\t"
365 "prefetchnta %3\n\t"
366 "vpcmpgtb %%zmm4,%%zmm1,%%k1\n\t"
367 "vpcmpgtb %%zmm6,%%zmm1,%%k2\n\t"
368 "vpcmpgtb %%zmm12,%%zmm1,%%k3\n\t"
369 "vpcmpgtb %%zmm14,%%zmm1,%%k4\n\t"
370 "vpmovm2b %%k1,%%zmm5\n\t"
371 "vpmovm2b %%k2,%%zmm7\n\t"
372 "vpmovm2b %%k3,%%zmm13\n\t"
373 "vpmovm2b %%k4,%%zmm15\n\t"
374 "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
375 "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t"
376 "vpaddb %%zmm12,%%zmm12,%%zmm12\n\t"
377 "vpaddb %%zmm14,%%zmm14,%%zmm14\n\t"
378 "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
379 "vpandq %%zmm0,%%zmm7,%%zmm7\n\t"
380 "vpandq %%zmm0,%%zmm13,%%zmm13\n\t"
381 "vpandq %%zmm0,%%zmm15,%%zmm15\n\t"
382 "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
383 "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
384 "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t"
385 "vpxorq %%zmm15,%%zmm14,%%zmm14\n\t"
386 "vmovdqa64 %0,%%zmm5\n\t"
387 "vmovdqa64 %1,%%zmm7\n\t"
388 "vmovdqa64 %2,%%zmm13\n\t"
389 "vmovdqa64 %3,%%zmm15\n\t"
390 "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t"
391 "vpxorq %%zmm7,%%zmm3,%%zmm3\n\t"
392 "vpxorq %%zmm13,%%zmm10,%%zmm10\n\t"
393 "vpxorq %%zmm15,%%zmm11,%%zmm11\n"
394 "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
395 "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
396 "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t"
397 "vpxorq %%zmm15,%%zmm14,%%zmm14"
398 :
399 : "m" (dptr[z][d]), "m" (dptr[z][d+64]),
400 "m" (dptr[z][d+128]), "m" (dptr[z][d+192]));
401 }
402 asm volatile("vmovntdq %%zmm2,%0\n\t"
403 "vpxorq %%zmm2,%%zmm2,%%zmm2\n\t"
404 "vmovntdq %%zmm3,%1\n\t"
405 "vpxorq %%zmm3,%%zmm3,%%zmm3\n\t"
406 "vmovntdq %%zmm10,%2\n\t"
407 "vpxorq %%zmm10,%%zmm10,%%zmm10\n\t"
408 "vmovntdq %%zmm11,%3\n\t"
409 "vpxorq %%zmm11,%%zmm11,%%zmm11\n\t"
410 "vmovntdq %%zmm4,%4\n\t"
411 "vpxorq %%zmm4,%%zmm4,%%zmm4\n\t"
412 "vmovntdq %%zmm6,%5\n\t"
413 "vpxorq %%zmm6,%%zmm6,%%zmm6\n\t"
414 "vmovntdq %%zmm12,%6\n\t"
415 "vpxorq %%zmm12,%%zmm12,%%zmm12\n\t"
416 "vmovntdq %%zmm14,%7\n\t"
417 "vpxorq %%zmm14,%%zmm14,%%zmm14"
418 :
419 : "m" (p[d]), "m" (p[d+64]), "m" (p[d+128]),
420 "m" (p[d+192]), "m" (q[d]), "m" (q[d+64]),
421 "m" (q[d+128]), "m" (q[d+192]));
422 }
423
424 asm volatile("sfence" : : : "memory");
425 kernel_fpu_end();
426}
427
428static void raid6_avx5124_xor_syndrome(int disks, int start, int stop,
429 size_t bytes, void **ptrs)
430{
431 u8 **dptr = (u8 **)ptrs;
432 u8 *p, *q;
433 int d, z, z0;
434
435 z0 = stop; /* P/Q right side optimization */
436 p = dptr[disks-2]; /* XOR parity */
437 q = dptr[disks-1]; /* RS syndrome */
438
439 kernel_fpu_begin();
440
441 asm volatile("vmovdqa64 %0,%%zmm0"
442 :: "m" (raid6_avx512_constants.x1d[0]));
443
444 for (d = 0 ; d < bytes ; d += 256) {
445 asm volatile("vmovdqa64 %0,%%zmm4\n\t"
446 "vmovdqa64 %1,%%zmm6\n\t"
447 "vmovdqa64 %2,%%zmm12\n\t"
448 "vmovdqa64 %3,%%zmm14\n\t"
449 "vmovdqa64 %4,%%zmm2\n\t"
450 "vmovdqa64 %5,%%zmm3\n\t"
451 "vmovdqa64 %6,%%zmm10\n\t"
452 "vmovdqa64 %7,%%zmm11\n\t"
453 "vpxorq %%zmm4,%%zmm2,%%zmm2\n\t"
454 "vpxorq %%zmm6,%%zmm3,%%zmm3\n\t"
455 "vpxorq %%zmm12,%%zmm10,%%zmm10\n\t"
456 "vpxorq %%zmm14,%%zmm11,%%zmm11"
457 :
458 : "m" (dptr[z0][d]), "m" (dptr[z0][d+64]),
459 "m" (dptr[z0][d+128]), "m" (dptr[z0][d+192]),
460 "m" (p[d]), "m" (p[d+64]), "m" (p[d+128]),
461 "m" (p[d+192]));
462 /* P/Q data pages */
463 for (z = z0-1 ; z >= start ; z--) {
464 asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t"
465 "vpxorq %%zmm7,%%zmm7,%%zmm7\n\t"
466 "vpxorq %%zmm13,%%zmm13,%%zmm13\n\t"
467 "vpxorq %%zmm15,%%zmm15,%%zmm15\n\t"
468 "prefetchnta %0\n\t"
469 "prefetchnta %2\n\t"
470 "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t"
471 "vpcmpgtb %%zmm6,%%zmm7,%%k2\n\t"
472 "vpcmpgtb %%zmm12,%%zmm13,%%k3\n\t"
473 "vpcmpgtb %%zmm14,%%zmm15,%%k4\n\t"
474 "vpmovm2b %%k1,%%zmm5\n\t"
475 "vpmovm2b %%k2,%%zmm7\n\t"
476 "vpmovm2b %%k3,%%zmm13\n\t"
477 "vpmovm2b %%k4,%%zmm15\n\t"
478 "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
479 "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t"
480 "vpaddb %%zmm12,%%zmm12,%%zmm12\n\t"
481 "vpaddb %%Zmm14,%%zmm14,%%zmm14\n\t"
482 "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
483 "vpandq %%zmm0,%%zmm7,%%zmm7\n\t"
484 "vpandq %%zmm0,%%zmm13,%%zmm13\n\t"
485 "vpandq %%zmm0,%%zmm15,%%zmm15\n\t"
486 "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
487 "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
488 "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t"
489 "vpxorq %%zmm15,%%zmm14,%%zmm14\n\t"
490 "vmovdqa64 %0,%%zmm5\n\t"
491 "vmovdqa64 %1,%%zmm7\n\t"
492 "vmovdqa64 %2,%%zmm13\n\t"
493 "vmovdqa64 %3,%%zmm15\n\t"
494 "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t"
495 "vpxorq %%zmm7,%%zmm3,%%zmm3\n\t"
496 "vpxorq %%zmm13,%%zmm10,%%zmm10\n\t"
497 "vpxorq %%zmm15,%%zmm11,%%zmm11\n\t"
498 "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
499 "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
500 "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t"
501 "vpxorq %%zmm15,%%zmm14,%%zmm14"
502 :
503 : "m" (dptr[z][d]), "m" (dptr[z][d+64]),
504 "m" (dptr[z][d+128]),
505 "m" (dptr[z][d+192]));
506 }
507 asm volatile("prefetchnta %0\n\t"
508 "prefetchnta %1\n\t"
509 :
510 : "m" (q[d]), "m" (q[d+128]));
511 /* P/Q left side optimization */
512 for (z = start-1 ; z >= 0 ; z--) {
513 asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t"
514 "vpxorq %%zmm7,%%zmm7,%%zmm7\n\t"
515 "vpxorq %%zmm13,%%zmm13,%%zmm13\n\t"
516 "vpxorq %%zmm15,%%zmm15,%%zmm15\n\t"
517 "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t"
518 "vpcmpgtb %%zmm6,%%zmm7,%%k2\n\t"
519 "vpcmpgtb %%zmm12,%%zmm13,%%k3\n\t"
520 "vpcmpgtb %%zmm14,%%zmm15,%%k4\n\t"
521 "vpmovm2b %%k1,%%zmm5\n\t"
522 "vpmovm2b %%k2,%%zmm7\n\t"
523 "vpmovm2b %%k3,%%zmm13\n\t"
524 "vpmovm2b %%k4,%%zmm15\n\t"
525 "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
526 "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t"
527 "vpaddb %%zmm12,%%zmm12,%%zmm12\n\t"
528 "vpaddb %%zmm14,%%zmm14,%%zmm14\n\t"
529 "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
530 "vpandq %%zmm0,%%zmm7,%%zmm7\n\t"
531 "vpandq %%zmm0,%%zmm13,%%zmm13\n\t"
532 "vpandq %%zmm0,%%zmm15,%%zmm15\n\t"
533 "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
534 "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
535 "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t"
536 "vpxorq %%zmm15,%%zmm14,%%zmm14"
537 :
538 : );
539 }
540 asm volatile("vmovntdq %%zmm2,%0\n\t"
541 "vmovntdq %%zmm3,%1\n\t"
542 "vmovntdq %%zmm10,%2\n\t"
543 "vmovntdq %%zmm11,%3\n\t"
544 "vpxorq %4,%%zmm4,%%zmm4\n\t"
545 "vpxorq %5,%%zmm6,%%zmm6\n\t"
546 "vpxorq %6,%%zmm12,%%zmm12\n\t"
547 "vpxorq %7,%%zmm14,%%zmm14\n\t"
548 "vmovntdq %%zmm4,%4\n\t"
549 "vmovntdq %%zmm6,%5\n\t"
550 "vmovntdq %%zmm12,%6\n\t"
551 "vmovntdq %%zmm14,%7"
552 :
553 : "m" (p[d]), "m" (p[d+64]), "m" (p[d+128]),
554 "m" (p[d+192]), "m" (q[d]), "m" (q[d+64]),
555 "m" (q[d+128]), "m" (q[d+192]));
556 }
557 asm volatile("sfence" : : : "memory");
558 kernel_fpu_end();
559}
560const struct raid6_calls raid6_avx512x4 = {
561 raid6_avx5124_gen_syndrome,
562 raid6_avx5124_xor_syndrome,
563 raid6_have_avx512,
564 "avx512x4",
565 1 /* Has cache hints */
566};
567#endif
568
569#endif /* CONFIG_AS_AVX512 */
diff --git a/lib/raid6/recov_avx512.c b/lib/raid6/recov_avx512.c
new file mode 100644
index 000000000000..625aafa33b61
--- /dev/null
+++ b/lib/raid6/recov_avx512.c
@@ -0,0 +1,388 @@
1/*
2 * Copyright (C) 2016 Intel Corporation
3 *
4 * Author: Gayatri Kammela <gayatri.kammela@intel.com>
5 * Author: Megha Dey <megha.dey@linux.intel.com>
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; version 2
10 * of the License.
11 *
12 */
13
14#ifdef CONFIG_AS_AVX512
15
16#include <linux/raid/pq.h>
17#include "x86.h"
18
19static int raid6_has_avx512(void)
20{
21 return boot_cpu_has(X86_FEATURE_AVX2) &&
22 boot_cpu_has(X86_FEATURE_AVX) &&
23 boot_cpu_has(X86_FEATURE_AVX512F) &&
24 boot_cpu_has(X86_FEATURE_AVX512BW) &&
25 boot_cpu_has(X86_FEATURE_AVX512VL) &&
26 boot_cpu_has(X86_FEATURE_AVX512DQ);
27}
28
29static void raid6_2data_recov_avx512(int disks, size_t bytes, int faila,
30 int failb, void **ptrs)
31{
32 u8 *p, *q, *dp, *dq;
33 const u8 *pbmul; /* P multiplier table for B data */
34 const u8 *qmul; /* Q multiplier table (for both) */
35 const u8 x0f = 0x0f;
36
37 p = (u8 *)ptrs[disks-2];
38 q = (u8 *)ptrs[disks-1];
39
40 /*
41 * Compute syndrome with zero for the missing data pages
42 * Use the dead data pages as temporary storage for
43 * delta p and delta q
44 */
45
46 dp = (u8 *)ptrs[faila];
47 ptrs[faila] = (void *)raid6_empty_zero_page;
48 ptrs[disks-2] = dp;
49 dq = (u8 *)ptrs[failb];
50 ptrs[failb] = (void *)raid6_empty_zero_page;
51 ptrs[disks-1] = dq;
52
53 raid6_call.gen_syndrome(disks, bytes, ptrs);
54
55 /* Restore pointer table */
56 ptrs[faila] = dp;
57 ptrs[failb] = dq;
58 ptrs[disks-2] = p;
59 ptrs[disks-1] = q;
60
61 /* Now, pick the proper data tables */
62 pbmul = raid6_vgfmul[raid6_gfexi[failb-faila]];
63 qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^
64 raid6_gfexp[failb]]];
65
66 kernel_fpu_begin();
67
68 /* zmm0 = x0f[16] */
69 asm volatile("vpbroadcastb %0, %%zmm7" : : "m" (x0f));
70
71 while (bytes) {
72#ifdef CONFIG_X86_64
73 asm volatile("vmovdqa64 %0, %%zmm1\n\t"
74 "vmovdqa64 %1, %%zmm9\n\t"
75 "vmovdqa64 %2, %%zmm0\n\t"
76 "vmovdqa64 %3, %%zmm8\n\t"
77 "vpxorq %4, %%zmm1, %%zmm1\n\t"
78 "vpxorq %5, %%zmm9, %%zmm9\n\t"
79 "vpxorq %6, %%zmm0, %%zmm0\n\t"
80 "vpxorq %7, %%zmm8, %%zmm8"
81 :
82 : "m" (q[0]), "m" (q[64]), "m" (p[0]),
83 "m" (p[64]), "m" (dq[0]), "m" (dq[64]),
84 "m" (dp[0]), "m" (dp[64]));
85
86 /*
87 * 1 = dq[0] ^ q[0]
88 * 9 = dq[64] ^ q[64]
89 * 0 = dp[0] ^ p[0]
90 * 8 = dp[64] ^ p[64]
91 */
92
93 asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
94 "vbroadcasti64x2 %1, %%zmm5"
95 :
96 : "m" (qmul[0]), "m" (qmul[16]));
97
98 asm volatile("vpsraw $4, %%zmm1, %%zmm3\n\t"
99 "vpsraw $4, %%zmm9, %%zmm12\n\t"
100 "vpandq %%zmm7, %%zmm1, %%zmm1\n\t"
101 "vpandq %%zmm7, %%zmm9, %%zmm9\n\t"
102 "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
103 "vpandq %%zmm7, %%zmm12, %%zmm12\n\t"
104 "vpshufb %%zmm9, %%zmm4, %%zmm14\n\t"
105 "vpshufb %%zmm1, %%zmm4, %%zmm4\n\t"
106 "vpshufb %%zmm12, %%zmm5, %%zmm15\n\t"
107 "vpshufb %%zmm3, %%zmm5, %%zmm5\n\t"
108 "vpxorq %%zmm14, %%zmm15, %%zmm15\n\t"
109 "vpxorq %%zmm4, %%zmm5, %%zmm5"
110 :
111 : );
112
113 /*
114 * 5 = qx[0]
115 * 15 = qx[64]
116 */
117
118 asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
119 "vbroadcasti64x2 %1, %%zmm1\n\t"
120 "vpsraw $4, %%zmm0, %%zmm2\n\t"
121 "vpsraw $4, %%zmm8, %%zmm6\n\t"
122 "vpandq %%zmm7, %%zmm0, %%zmm3\n\t"
123 "vpandq %%zmm7, %%zmm8, %%zmm14\n\t"
124 "vpandq %%zmm7, %%zmm2, %%zmm2\n\t"
125 "vpandq %%zmm7, %%zmm6, %%zmm6\n\t"
126 "vpshufb %%zmm14, %%zmm4, %%zmm12\n\t"
127 "vpshufb %%zmm3, %%zmm4, %%zmm4\n\t"
128 "vpshufb %%zmm6, %%zmm1, %%zmm13\n\t"
129 "vpshufb %%zmm2, %%zmm1, %%zmm1\n\t"
130 "vpxorq %%zmm4, %%zmm1, %%zmm1\n\t"
131 "vpxorq %%zmm12, %%zmm13, %%zmm13"
132 :
133 : "m" (pbmul[0]), "m" (pbmul[16]));
134
135 /*
136 * 1 = pbmul[px[0]]
137 * 13 = pbmul[px[64]]
138 */
139 asm volatile("vpxorq %%zmm5, %%zmm1, %%zmm1\n\t"
140 "vpxorq %%zmm15, %%zmm13, %%zmm13"
141 :
142 : );
143
144 /*
145 * 1 = db = DQ
146 * 13 = db[64] = DQ[64]
147 */
148 asm volatile("vmovdqa64 %%zmm1, %0\n\t"
149 "vmovdqa64 %%zmm13,%1\n\t"
150 "vpxorq %%zmm1, %%zmm0, %%zmm0\n\t"
151 "vpxorq %%zmm13, %%zmm8, %%zmm8"
152 :
153 : "m" (dq[0]), "m" (dq[64]));
154
155 asm volatile("vmovdqa64 %%zmm0, %0\n\t"
156 "vmovdqa64 %%zmm8, %1"
157 :
158 : "m" (dp[0]), "m" (dp[64]));
159
160 bytes -= 128;
161 p += 128;
162 q += 128;
163 dp += 128;
164 dq += 128;
165#else
166 asm volatile("vmovdqa64 %0, %%zmm1\n\t"
167 "vmovdqa64 %1, %%zmm0\n\t"
168 "vpxorq %2, %%zmm1, %%zmm1\n\t"
169 "vpxorq %3, %%zmm0, %%zmm0"
170 :
171 : "m" (*q), "m" (*p), "m"(*dq), "m" (*dp));
172
173 /* 1 = dq ^ q; 0 = dp ^ p */
174
175 asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
176 "vbroadcasti64x2 %1, %%zmm5"
177 :
178 : "m" (qmul[0]), "m" (qmul[16]));
179
180 /*
181 * 1 = dq ^ q
182 * 3 = dq ^ p >> 4
183 */
184 asm volatile("vpsraw $4, %%zmm1, %%zmm3\n\t"
185 "vpandq %%zmm7, %%zmm1, %%zmm1\n\t"
186 "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
187 "vpshufb %%zmm1, %%zmm4, %%zmm4\n\t"
188 "vpshufb %%zmm3, %%zmm5, %%zmm5\n\t"
189 "vpxorq %%zmm4, %%zmm5, %%zmm5"
190 :
191 : );
192
193 /* 5 = qx */
194
195 asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
196 "vbroadcasti64x2 %1, %%zmm1"
197 :
198 : "m" (pbmul[0]), "m" (pbmul[16]));
199
200 asm volatile("vpsraw $4, %%zmm0, %%zmm2\n\t"
201 "vpandq %%zmm7, %%zmm0, %%zmm3\n\t"
202 "vpandq %%zmm7, %%zmm2, %%zmm2\n\t"
203 "vpshufb %%zmm3, %%zmm4, %%zmm4\n\t"
204 "vpshufb %%zmm2, %%zmm1, %%zmm1\n\t"
205 "vpxorq %%zmm4, %%zmm1, %%zmm1"
206 :
207 : );
208
209 /* 1 = pbmul[px] */
210 asm volatile("vpxorq %%zmm5, %%zmm1, %%zmm1\n\t"
211 /* 1 = db = DQ */
212 "vmovdqa64 %%zmm1, %0\n\t"
213 :
214 : "m" (dq[0]));
215
216 asm volatile("vpxorq %%zmm1, %%zmm0, %%zmm0\n\t"
217 "vmovdqa64 %%zmm0, %0"
218 :
219 : "m" (dp[0]));
220
221 bytes -= 64;
222 p += 64;
223 q += 64;
224 dp += 64;
225 dq += 64;
226#endif
227 }
228
229 kernel_fpu_end();
230}
231
232static void raid6_datap_recov_avx512(int disks, size_t bytes, int faila,
233 void **ptrs)
234{
235 u8 *p, *q, *dq;
236 const u8 *qmul; /* Q multiplier table */
237 const u8 x0f = 0x0f;
238
239 p = (u8 *)ptrs[disks-2];
240 q = (u8 *)ptrs[disks-1];
241
242 /*
243 * Compute syndrome with zero for the missing data page
244 * Use the dead data page as temporary storage for delta q
245 */
246
247 dq = (u8 *)ptrs[faila];
248 ptrs[faila] = (void *)raid6_empty_zero_page;
249 ptrs[disks-1] = dq;
250
251 raid6_call.gen_syndrome(disks, bytes, ptrs);
252
253 /* Restore pointer table */
254 ptrs[faila] = dq;
255 ptrs[disks-1] = q;
256
257 /* Now, pick the proper data tables */
258 qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]];
259
260 kernel_fpu_begin();
261
262 asm volatile("vpbroadcastb %0, %%zmm7" : : "m" (x0f));
263
264 while (bytes) {
265#ifdef CONFIG_X86_64
266 asm volatile("vmovdqa64 %0, %%zmm3\n\t"
267 "vmovdqa64 %1, %%zmm8\n\t"
268 "vpxorq %2, %%zmm3, %%zmm3\n\t"
269 "vpxorq %3, %%zmm8, %%zmm8"
270 :
271 : "m" (dq[0]), "m" (dq[64]), "m" (q[0]),
272 "m" (q[64]));
273
274 /*
275 * 3 = q[0] ^ dq[0]
276 * 8 = q[64] ^ dq[64]
277 */
278 asm volatile("vbroadcasti64x2 %0, %%zmm0\n\t"
279 "vmovapd %%zmm0, %%zmm13\n\t"
280 "vbroadcasti64x2 %1, %%zmm1\n\t"
281 "vmovapd %%zmm1, %%zmm14"
282 :
283 : "m" (qmul[0]), "m" (qmul[16]));
284
285 asm volatile("vpsraw $4, %%zmm3, %%zmm6\n\t"
286 "vpsraw $4, %%zmm8, %%zmm12\n\t"
287 "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
288 "vpandq %%zmm7, %%zmm8, %%zmm8\n\t"
289 "vpandq %%zmm7, %%zmm6, %%zmm6\n\t"
290 "vpandq %%zmm7, %%zmm12, %%zmm12\n\t"
291 "vpshufb %%zmm3, %%zmm0, %%zmm0\n\t"
292 "vpshufb %%zmm8, %%zmm13, %%zmm13\n\t"
293 "vpshufb %%zmm6, %%zmm1, %%zmm1\n\t"
294 "vpshufb %%zmm12, %%zmm14, %%zmm14\n\t"
295 "vpxorq %%zmm0, %%zmm1, %%zmm1\n\t"
296 "vpxorq %%zmm13, %%zmm14, %%zmm14"
297 :
298 : );
299
300 /*
301 * 1 = qmul[q[0] ^ dq[0]]
302 * 14 = qmul[q[64] ^ dq[64]]
303 */
304 asm volatile("vmovdqa64 %0, %%zmm2\n\t"
305 "vmovdqa64 %1, %%zmm12\n\t"
306 "vpxorq %%zmm1, %%zmm2, %%zmm2\n\t"
307 "vpxorq %%zmm14, %%zmm12, %%zmm12"
308 :
309 : "m" (p[0]), "m" (p[64]));
310
311 /*
312 * 2 = p[0] ^ qmul[q[0] ^ dq[0]]
313 * 12 = p[64] ^ qmul[q[64] ^ dq[64]]
314 */
315
316 asm volatile("vmovdqa64 %%zmm1, %0\n\t"
317 "vmovdqa64 %%zmm14, %1\n\t"
318 "vmovdqa64 %%zmm2, %2\n\t"
319 "vmovdqa64 %%zmm12,%3"
320 :
321 : "m" (dq[0]), "m" (dq[64]), "m" (p[0]),
322 "m" (p[64]));
323
324 bytes -= 128;
325 p += 128;
326 q += 128;
327 dq += 128;
328#else
329 asm volatile("vmovdqa64 %0, %%zmm3\n\t"
330 "vpxorq %1, %%zmm3, %%zmm3"
331 :
332 : "m" (dq[0]), "m" (q[0]));
333
334 /* 3 = q ^ dq */
335
336 asm volatile("vbroadcasti64x2 %0, %%zmm0\n\t"
337 "vbroadcasti64x2 %1, %%zmm1"
338 :
339 : "m" (qmul[0]), "m" (qmul[16]));
340
341 asm volatile("vpsraw $4, %%zmm3, %%zmm6\n\t"
342 "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
343 "vpandq %%zmm7, %%zmm6, %%zmm6\n\t"
344 "vpshufb %%zmm3, %%zmm0, %%zmm0\n\t"
345 "vpshufb %%zmm6, %%zmm1, %%zmm1\n\t"
346 "vpxorq %%zmm0, %%zmm1, %%zmm1"
347 :
348 : );
349
350 /* 1 = qmul[q ^ dq] */
351
352 asm volatile("vmovdqa64 %0, %%zmm2\n\t"
353 "vpxorq %%zmm1, %%zmm2, %%zmm2"
354 :
355 : "m" (p[0]));
356
357 /* 2 = p ^ qmul[q ^ dq] */
358
359 asm volatile("vmovdqa64 %%zmm1, %0\n\t"
360 "vmovdqa64 %%zmm2, %1"
361 :
362 : "m" (dq[0]), "m" (p[0]));
363
364 bytes -= 64;
365 p += 64;
366 q += 64;
367 dq += 64;
368#endif
369 }
370
371 kernel_fpu_end();
372}
373
374const struct raid6_recov_calls raid6_recov_avx512 = {
375 .data2 = raid6_2data_recov_avx512,
376 .datap = raid6_datap_recov_avx512,
377 .valid = raid6_has_avx512,
378#ifdef CONFIG_X86_64
379 .name = "avx512x2",
380#else
381 .name = "avx512x1",
382#endif
383 .priority = 3,
384};
385
386#else
387#warning "your version of binutils lacks AVX512 support"
388#endif
diff --git a/lib/raid6/recov_s390xc.c b/lib/raid6/recov_s390xc.c
new file mode 100644
index 000000000000..b042dac826cc
--- /dev/null
+++ b/lib/raid6/recov_s390xc.c
@@ -0,0 +1,116 @@
1/*
2 * RAID-6 data recovery in dual failure mode based on the XC instruction.
3 *
4 * Copyright IBM Corp. 2016
5 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
6 */
7
8#include <linux/export.h>
9#include <linux/raid/pq.h>
10
11static inline void xor_block(u8 *p1, u8 *p2)
12{
13 typedef struct { u8 _[256]; } addrtype;
14
15 asm volatile(
16 " xc 0(256,%[p1]),0(%[p2])\n"
17 : "+m" (*(addrtype *) p1) : "m" (*(addrtype *) p2),
18 [p1] "a" (p1), [p2] "a" (p2) : "cc");
19}
20
21/* Recover two failed data blocks. */
22static void raid6_2data_recov_s390xc(int disks, size_t bytes, int faila,
23 int failb, void **ptrs)
24{
25 u8 *p, *q, *dp, *dq;
26 const u8 *pbmul; /* P multiplier table for B data */
27 const u8 *qmul; /* Q multiplier table (for both) */
28 int i;
29
30 p = (u8 *)ptrs[disks-2];
31 q = (u8 *)ptrs[disks-1];
32
33 /* Compute syndrome with zero for the missing data pages
34 Use the dead data pages as temporary storage for
35 delta p and delta q */
36 dp = (u8 *)ptrs[faila];
37 ptrs[faila] = (void *)raid6_empty_zero_page;
38 ptrs[disks-2] = dp;
39 dq = (u8 *)ptrs[failb];
40 ptrs[failb] = (void *)raid6_empty_zero_page;
41 ptrs[disks-1] = dq;
42
43 raid6_call.gen_syndrome(disks, bytes, ptrs);
44
45 /* Restore pointer table */
46 ptrs[faila] = dp;
47 ptrs[failb] = dq;
48 ptrs[disks-2] = p;
49 ptrs[disks-1] = q;
50
51 /* Now, pick the proper data tables */
52 pbmul = raid6_gfmul[raid6_gfexi[failb-faila]];
53 qmul = raid6_gfmul[raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]]];
54
55 /* Now do it... */
56 while (bytes) {
57 xor_block(dp, p);
58 xor_block(dq, q);
59 for (i = 0; i < 256; i++)
60 dq[i] = pbmul[dp[i]] ^ qmul[dq[i]];
61 xor_block(dp, dq);
62 p += 256;
63 q += 256;
64 dp += 256;
65 dq += 256;
66 bytes -= 256;
67 }
68}
69
70/* Recover failure of one data block plus the P block */
71static void raid6_datap_recov_s390xc(int disks, size_t bytes, int faila,
72 void **ptrs)
73{
74 u8 *p, *q, *dq;
75 const u8 *qmul; /* Q multiplier table */
76 int i;
77
78 p = (u8 *)ptrs[disks-2];
79 q = (u8 *)ptrs[disks-1];
80
81 /* Compute syndrome with zero for the missing data page
82 Use the dead data page as temporary storage for delta q */
83 dq = (u8 *)ptrs[faila];
84 ptrs[faila] = (void *)raid6_empty_zero_page;
85 ptrs[disks-1] = dq;
86
87 raid6_call.gen_syndrome(disks, bytes, ptrs);
88
89 /* Restore pointer table */
90 ptrs[faila] = dq;
91 ptrs[disks-1] = q;
92
93 /* Now, pick the proper data tables */
94 qmul = raid6_gfmul[raid6_gfinv[raid6_gfexp[faila]]];
95
96 /* Now do it... */
97 while (bytes) {
98 xor_block(dq, q);
99 for (i = 0; i < 256; i++)
100 dq[i] = qmul[dq[i]];
101 xor_block(p, dq);
102 p += 256;
103 q += 256;
104 dq += 256;
105 bytes -= 256;
106 }
107}
108
109
110const struct raid6_recov_calls raid6_recov_s390xc = {
111 .data2 = raid6_2data_recov_s390xc,
112 .datap = raid6_datap_recov_s390xc,
113 .valid = NULL,
114 .name = "s390xc",
115 .priority = 1,
116};
diff --git a/lib/raid6/s390vx.uc b/lib/raid6/s390vx.uc
new file mode 100644
index 000000000000..7b45191a655f
--- /dev/null
+++ b/lib/raid6/s390vx.uc
@@ -0,0 +1,168 @@
1/*
2 * raid6_vx$#.c
3 *
4 * $#-way unrolled RAID6 gen/xor functions for s390
5 * based on the vector facility
6 *
7 * Copyright IBM Corp. 2016
8 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
9 *
10 * This file is postprocessed using unroll.awk.
11 */
12
13#include <linux/raid/pq.h>
14#include <asm/fpu/api.h>
15
16asm(".include \"asm/vx-insn.h\"\n");
17
18#define NSIZE 16
19
20static inline void LOAD_CONST(void)
21{
22 asm volatile("VREPIB %v24,7");
23 asm volatile("VREPIB %v25,0x1d");
24}
25
26/*
27 * The SHLBYTE() operation shifts each of the 16 bytes in
28 * vector register y left by 1 bit and stores the result in
29 * vector register x.
30 */
31static inline void SHLBYTE(int x, int y)
32{
33 asm volatile ("VAB %0,%1,%1" : : "i" (x), "i" (y));
34}
35
36/*
37 * For each of the 16 bytes in the vector register y the MASK()
38 * operation returns 0xFF if the high bit of the byte is 1,
39 * or 0x00 if the high bit is 0. The result is stored in vector
40 * register x.
41 */
42static inline void MASK(int x, int y)
43{
44 asm volatile ("VESRAVB %0,%1,24" : : "i" (x), "i" (y));
45}
46
47static inline void AND(int x, int y, int z)
48{
49 asm volatile ("VN %0,%1,%2" : : "i" (x), "i" (y), "i" (z));
50}
51
52static inline void XOR(int x, int y, int z)
53{
54 asm volatile ("VX %0,%1,%2" : : "i" (x), "i" (y), "i" (z));
55}
56
57static inline void LOAD_DATA(int x, int n, u8 *ptr)
58{
59 typedef struct { u8 _[16*n]; } addrtype;
60 register addrtype *__ptr asm("1") = (addrtype *) ptr;
61
62 asm volatile ("VLM %2,%3,0,%r1"
63 : : "m" (*__ptr), "a" (__ptr), "i" (x), "i" (x + n - 1));
64}
65
66static inline void STORE_DATA(int x, int n, u8 *ptr)
67{
68 typedef struct { u8 _[16*n]; } addrtype;
69 register addrtype *__ptr asm("1") = (addrtype *) ptr;
70
71 asm volatile ("VSTM %2,%3,0,1"
72 : "=m" (*__ptr) : "a" (__ptr), "i" (x), "i" (x + n - 1));
73}
74
75static inline void COPY_VEC(int x, int y)
76{
77 asm volatile ("VLR %0,%1" : : "i" (x), "i" (y));
78}
79
80static void raid6_s390vx$#_gen_syndrome(int disks, size_t bytes, void **ptrs)
81{
82 struct kernel_fpu vxstate;
83 u8 **dptr, *p, *q;
84 int d, z, z0;
85
86 kernel_fpu_begin(&vxstate, KERNEL_VXR);
87 LOAD_CONST();
88
89 dptr = (u8 **) ptrs;
90 z0 = disks - 3; /* Highest data disk */
91 p = dptr[z0 + 1]; /* XOR parity */
92 q = dptr[z0 + 2]; /* RS syndrome */
93
94 for (d = 0; d < bytes; d += $#*NSIZE) {
95 LOAD_DATA(0,$#,&dptr[z0][d]);
96 COPY_VEC(8+$$,0+$$);
97 for (z = z0 - 1; z >= 0; z--) {
98 MASK(16+$$,8+$$);
99 AND(16+$$,16+$$,25);
100 SHLBYTE(8+$$,8+$$);
101 XOR(8+$$,8+$$,16+$$);
102 LOAD_DATA(16,$#,&dptr[z][d]);
103 XOR(0+$$,0+$$,16+$$);
104 XOR(8+$$,8+$$,16+$$);
105 }
106 STORE_DATA(0,$#,&p[d]);
107 STORE_DATA(8,$#,&q[d]);
108 }
109 kernel_fpu_end(&vxstate, KERNEL_VXR);
110}
111
112static void raid6_s390vx$#_xor_syndrome(int disks, int start, int stop,
113 size_t bytes, void **ptrs)
114{
115 struct kernel_fpu vxstate;
116 u8 **dptr, *p, *q;
117 int d, z, z0;
118
119 dptr = (u8 **) ptrs;
120 z0 = stop; /* P/Q right side optimization */
121 p = dptr[disks - 2]; /* XOR parity */
122 q = dptr[disks - 1]; /* RS syndrome */
123
124 kernel_fpu_begin(&vxstate, KERNEL_VXR);
125 LOAD_CONST();
126
127 for (d = 0; d < bytes; d += $#*NSIZE) {
128 /* P/Q data pages */
129 LOAD_DATA(0,$#,&dptr[z0][d]);
130 COPY_VEC(8+$$,0+$$);
131 for (z = z0 - 1; z >= start; z--) {
132 MASK(16+$$,8+$$);
133 AND(16+$$,16+$$,25);
134 SHLBYTE(8+$$,8+$$);
135 XOR(8+$$,8+$$,16+$$);
136 LOAD_DATA(16,$#,&dptr[z][d]);
137 XOR(0+$$,0+$$,16+$$);
138 XOR(8+$$,8+$$,16+$$);
139 }
140 /* P/Q left side optimization */
141 for (z = start - 1; z >= 0; z--) {
142 MASK(16+$$,8+$$);
143 AND(16+$$,16+$$,25);
144 SHLBYTE(8+$$,8+$$);
145 XOR(8+$$,8+$$,16+$$);
146 }
147 LOAD_DATA(16,$#,&p[d]);
148 XOR(16+$$,16+$$,0+$$);
149 STORE_DATA(16,$#,&p[d]);
150 LOAD_DATA(16,$#,&q[d]);
151 XOR(16+$$,16+$$,8+$$);
152 STORE_DATA(16,$#,&q[d]);
153 }
154 kernel_fpu_end(&vxstate, KERNEL_VXR);
155}
156
157static int raid6_s390vx$#_valid(void)
158{
159 return MACHINE_HAS_VX;
160}
161
162const struct raid6_calls raid6_s390vx$# = {
163 raid6_s390vx$#_gen_syndrome,
164 raid6_s390vx$#_xor_syndrome,
165 raid6_s390vx$#_valid,
166 "vx128x$#",
167 1
168};
diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile
index 29090f3db677..2c7b60edea04 100644
--- a/lib/raid6/test/Makefile
+++ b/lib/raid6/test/Makefile
@@ -32,10 +32,13 @@ ifeq ($(ARCH),arm64)
32endif 32endif
33 33
34ifeq ($(IS_X86),yes) 34ifeq ($(IS_X86),yes)
35 OBJS += mmx.o sse1.o sse2.o avx2.o recov_ssse3.o recov_avx2.o 35 OBJS += mmx.o sse1.o sse2.o avx2.o recov_ssse3.o recov_avx2.o avx512.o recov_avx512.o
36 CFLAGS += $(shell echo "vpbroadcastb %xmm0, %ymm1" | \ 36 CFLAGS += $(shell echo "vpbroadcastb %xmm0, %ymm1" | \
37 gcc -c -x assembler - >&/dev/null && \ 37 gcc -c -x assembler - >&/dev/null && \
38 rm ./-.o && echo -DCONFIG_AS_AVX2=1) 38 rm ./-.o && echo -DCONFIG_AS_AVX2=1)
39 CFLAGS += $(shell echo "vpmovm2b %k1, %zmm5" | \
40 gcc -c -x assembler - >&/dev/null && \
41 rm ./-.o && echo -DCONFIG_AS_AVX512=1)
39else ifeq ($(HAS_NEON),yes) 42else ifeq ($(HAS_NEON),yes)
40 OBJS += neon.o neon1.o neon2.o neon4.o neon8.o 43 OBJS += neon.o neon1.o neon2.o neon4.o neon8.o
41 CFLAGS += -DCONFIG_KERNEL_MODE_NEON=1 44 CFLAGS += -DCONFIG_KERNEL_MODE_NEON=1
diff --git a/lib/raid6/test/test.c b/lib/raid6/test/test.c
index 3bebbabdb510..b07f4d8e6b03 100644
--- a/lib/raid6/test/test.c
+++ b/lib/raid6/test/test.c
@@ -21,12 +21,13 @@
21 21
22#define NDISKS 16 /* Including P and Q */ 22#define NDISKS 16 /* Including P and Q */
23 23
24const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256))); 24const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(PAGE_SIZE)));
25struct raid6_calls raid6_call; 25struct raid6_calls raid6_call;
26 26
27char *dataptrs[NDISKS]; 27char *dataptrs[NDISKS];
28char data[NDISKS][PAGE_SIZE]; 28char data[NDISKS][PAGE_SIZE] __attribute__((aligned(PAGE_SIZE)));
29char recovi[PAGE_SIZE], recovj[PAGE_SIZE]; 29char recovi[PAGE_SIZE] __attribute__((aligned(PAGE_SIZE)));
30char recovj[PAGE_SIZE] __attribute__((aligned(PAGE_SIZE)));
30 31
31static void makedata(int start, int stop) 32static void makedata(int start, int stop)
32{ 33{
diff --git a/lib/raid6/x86.h b/lib/raid6/x86.h
index 8fe9d9662abb..834d268a4b05 100644
--- a/lib/raid6/x86.h
+++ b/lib/raid6/x86.h
@@ -46,6 +46,16 @@ static inline void kernel_fpu_end(void)
46#define X86_FEATURE_SSSE3 (4*32+ 9) /* Supplemental SSE-3 */ 46#define X86_FEATURE_SSSE3 (4*32+ 9) /* Supplemental SSE-3 */
47#define X86_FEATURE_AVX (4*32+28) /* Advanced Vector Extensions */ 47#define X86_FEATURE_AVX (4*32+28) /* Advanced Vector Extensions */
48#define X86_FEATURE_AVX2 (9*32+ 5) /* AVX2 instructions */ 48#define X86_FEATURE_AVX2 (9*32+ 5) /* AVX2 instructions */
49#define X86_FEATURE_AVX512F (9*32+16) /* AVX-512 Foundation */
50#define X86_FEATURE_AVX512DQ (9*32+17) /* AVX-512 DQ (Double/Quad granular)
51 * Instructions
52 */
53#define X86_FEATURE_AVX512BW (9*32+30) /* AVX-512 BW (Byte/Word granular)
54 * Instructions
55 */
56#define X86_FEATURE_AVX512VL (9*32+31) /* AVX-512 VL (128/256 Vector Length)
57 * Extensions
58 */
49#define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */ 59#define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */
50 60
51/* Should work well enough on modern CPUs for testing */ 61/* Should work well enough on modern CPUs for testing */
diff --git a/lib/random32.c b/lib/random32.c
index 69ed593aab07..fa594b1140e6 100644
--- a/lib/random32.c
+++ b/lib/random32.c
@@ -47,7 +47,7 @@ static inline void prandom_state_selftest(void)
47} 47}
48#endif 48#endif
49 49
50static DEFINE_PER_CPU(struct rnd_state, net_rand_state); 50static DEFINE_PER_CPU(struct rnd_state, net_rand_state) __latent_entropy;
51 51
52/** 52/**
53 * prandom_u32_state - seeded pseudo-random number generator. 53 * prandom_u32_state - seeded pseudo-random number generator.
@@ -81,7 +81,7 @@ u32 prandom_u32(void)
81 u32 res; 81 u32 res;
82 82
83 res = prandom_u32_state(state); 83 res = prandom_u32_state(state);
84 put_cpu_var(state); 84 put_cpu_var(net_rand_state);
85 85
86 return res; 86 return res;
87} 87}
@@ -128,7 +128,7 @@ void prandom_bytes(void *buf, size_t bytes)
128 struct rnd_state *state = &get_cpu_var(net_rand_state); 128 struct rnd_state *state = &get_cpu_var(net_rand_state);
129 129
130 prandom_bytes_state(state, buf, bytes); 130 prandom_bytes_state(state, buf, bytes);
131 put_cpu_var(state); 131 put_cpu_var(net_rand_state);
132} 132}
133EXPORT_SYMBOL(prandom_bytes); 133EXPORT_SYMBOL(prandom_bytes);
134 134
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 56054e541a0f..32d0ad058380 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -378,22 +378,8 @@ static void rht_deferred_worker(struct work_struct *work)
378 schedule_work(&ht->run_work); 378 schedule_work(&ht->run_work);
379} 379}
380 380
381static bool rhashtable_check_elasticity(struct rhashtable *ht, 381static int rhashtable_insert_rehash(struct rhashtable *ht,
382 struct bucket_table *tbl, 382 struct bucket_table *tbl)
383 unsigned int hash)
384{
385 unsigned int elasticity = ht->elasticity;
386 struct rhash_head *head;
387
388 rht_for_each(head, tbl, hash)
389 if (!--elasticity)
390 return true;
391
392 return false;
393}
394
395int rhashtable_insert_rehash(struct rhashtable *ht,
396 struct bucket_table *tbl)
397{ 383{
398 struct bucket_table *old_tbl; 384 struct bucket_table *old_tbl;
399 struct bucket_table *new_tbl; 385 struct bucket_table *new_tbl;
@@ -439,61 +425,172 @@ fail:
439 425
440 return err; 426 return err;
441} 427}
442EXPORT_SYMBOL_GPL(rhashtable_insert_rehash);
443 428
444struct bucket_table *rhashtable_insert_slow(struct rhashtable *ht, 429static void *rhashtable_lookup_one(struct rhashtable *ht,
445 const void *key, 430 struct bucket_table *tbl, unsigned int hash,
446 struct rhash_head *obj, 431 const void *key, struct rhash_head *obj)
447 struct bucket_table *tbl)
448{ 432{
433 struct rhashtable_compare_arg arg = {
434 .ht = ht,
435 .key = key,
436 };
437 struct rhash_head __rcu **pprev;
449 struct rhash_head *head; 438 struct rhash_head *head;
450 unsigned int hash; 439 int elasticity;
451 int err;
452 440
453 tbl = rhashtable_last_table(ht, tbl); 441 elasticity = ht->elasticity;
454 hash = head_hashfn(ht, tbl, obj); 442 pprev = &tbl->buckets[hash];
455 spin_lock_nested(rht_bucket_lock(tbl, hash), SINGLE_DEPTH_NESTING); 443 rht_for_each(head, tbl, hash) {
444 struct rhlist_head *list;
445 struct rhlist_head *plist;
456 446
457 err = -EEXIST; 447 elasticity--;
458 if (key && rhashtable_lookup_fast(ht, key, ht->p)) 448 if (!key ||
459 goto exit; 449 (ht->p.obj_cmpfn ?
450 ht->p.obj_cmpfn(&arg, rht_obj(ht, head)) :
451 rhashtable_compare(&arg, rht_obj(ht, head))))
452 continue;
460 453
461 err = -E2BIG; 454 if (!ht->rhlist)
462 if (unlikely(rht_grow_above_max(ht, tbl))) 455 return rht_obj(ht, head);
463 goto exit; 456
457 list = container_of(obj, struct rhlist_head, rhead);
458 plist = container_of(head, struct rhlist_head, rhead);
459
460 RCU_INIT_POINTER(list->next, plist);
461 head = rht_dereference_bucket(head->next, tbl, hash);
462 RCU_INIT_POINTER(list->rhead.next, head);
463 rcu_assign_pointer(*pprev, obj);
464
465 return NULL;
466 }
467
468 if (elasticity <= 0)
469 return ERR_PTR(-EAGAIN);
470
471 return ERR_PTR(-ENOENT);
472}
473
474static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht,
475 struct bucket_table *tbl,
476 unsigned int hash,
477 struct rhash_head *obj,
478 void *data)
479{
480 struct bucket_table *new_tbl;
481 struct rhash_head *head;
482
483 if (!IS_ERR_OR_NULL(data))
484 return ERR_PTR(-EEXIST);
485
486 if (PTR_ERR(data) != -EAGAIN && PTR_ERR(data) != -ENOENT)
487 return ERR_CAST(data);
464 488
465 err = -EAGAIN; 489 new_tbl = rcu_dereference(tbl->future_tbl);
466 if (rhashtable_check_elasticity(ht, tbl, hash) || 490 if (new_tbl)
467 rht_grow_above_100(ht, tbl)) 491 return new_tbl;
468 goto exit;
469 492
470 err = 0; 493 if (PTR_ERR(data) != -ENOENT)
494 return ERR_CAST(data);
495
496 if (unlikely(rht_grow_above_max(ht, tbl)))
497 return ERR_PTR(-E2BIG);
498
499 if (unlikely(rht_grow_above_100(ht, tbl)))
500 return ERR_PTR(-EAGAIN);
471 501
472 head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash); 502 head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash);
473 503
474 RCU_INIT_POINTER(obj->next, head); 504 RCU_INIT_POINTER(obj->next, head);
505 if (ht->rhlist) {
506 struct rhlist_head *list;
507
508 list = container_of(obj, struct rhlist_head, rhead);
509 RCU_INIT_POINTER(list->next, NULL);
510 }
475 511
476 rcu_assign_pointer(tbl->buckets[hash], obj); 512 rcu_assign_pointer(tbl->buckets[hash], obj);
477 513
478 atomic_inc(&ht->nelems); 514 atomic_inc(&ht->nelems);
515 if (rht_grow_above_75(ht, tbl))
516 schedule_work(&ht->run_work);
479 517
480exit: 518 return NULL;
481 spin_unlock(rht_bucket_lock(tbl, hash)); 519}
482 520
483 if (err == 0) 521static void *rhashtable_try_insert(struct rhashtable *ht, const void *key,
484 return NULL; 522 struct rhash_head *obj)
485 else if (err == -EAGAIN) 523{
486 return tbl; 524 struct bucket_table *new_tbl;
487 else 525 struct bucket_table *tbl;
488 return ERR_PTR(err); 526 unsigned int hash;
527 spinlock_t *lock;
528 void *data;
529
530 tbl = rcu_dereference(ht->tbl);
531
532 /* All insertions must grab the oldest table containing
533 * the hashed bucket that is yet to be rehashed.
534 */
535 for (;;) {
536 hash = rht_head_hashfn(ht, tbl, obj, ht->p);
537 lock = rht_bucket_lock(tbl, hash);
538 spin_lock_bh(lock);
539
540 if (tbl->rehash <= hash)
541 break;
542
543 spin_unlock_bh(lock);
544 tbl = rcu_dereference(tbl->future_tbl);
545 }
546
547 data = rhashtable_lookup_one(ht, tbl, hash, key, obj);
548 new_tbl = rhashtable_insert_one(ht, tbl, hash, obj, data);
549 if (PTR_ERR(new_tbl) != -EEXIST)
550 data = ERR_CAST(new_tbl);
551
552 while (!IS_ERR_OR_NULL(new_tbl)) {
553 tbl = new_tbl;
554 hash = rht_head_hashfn(ht, tbl, obj, ht->p);
555 spin_lock_nested(rht_bucket_lock(tbl, hash),
556 SINGLE_DEPTH_NESTING);
557
558 data = rhashtable_lookup_one(ht, tbl, hash, key, obj);
559 new_tbl = rhashtable_insert_one(ht, tbl, hash, obj, data);
560 if (PTR_ERR(new_tbl) != -EEXIST)
561 data = ERR_CAST(new_tbl);
562
563 spin_unlock(rht_bucket_lock(tbl, hash));
564 }
565
566 spin_unlock_bh(lock);
567
568 if (PTR_ERR(data) == -EAGAIN)
569 data = ERR_PTR(rhashtable_insert_rehash(ht, tbl) ?:
570 -EAGAIN);
571
572 return data;
573}
574
575void *rhashtable_insert_slow(struct rhashtable *ht, const void *key,
576 struct rhash_head *obj)
577{
578 void *data;
579
580 do {
581 rcu_read_lock();
582 data = rhashtable_try_insert(ht, key, obj);
583 rcu_read_unlock();
584 } while (PTR_ERR(data) == -EAGAIN);
585
586 return data;
489} 587}
490EXPORT_SYMBOL_GPL(rhashtable_insert_slow); 588EXPORT_SYMBOL_GPL(rhashtable_insert_slow);
491 589
492/** 590/**
493 * rhashtable_walk_init - Initialise an iterator 591 * rhashtable_walk_enter - Initialise an iterator
494 * @ht: Table to walk over 592 * @ht: Table to walk over
495 * @iter: Hash table Iterator 593 * @iter: Hash table Iterator
496 * @gfp: GFP flags for allocations
497 * 594 *
498 * This function prepares a hash table walk. 595 * This function prepares a hash table walk.
499 * 596 *
@@ -508,30 +605,22 @@ EXPORT_SYMBOL_GPL(rhashtable_insert_slow);
508 * This function may sleep so you must not call it from interrupt 605 * This function may sleep so you must not call it from interrupt
509 * context or with spin locks held. 606 * context or with spin locks held.
510 * 607 *
511 * You must call rhashtable_walk_exit if this function returns 608 * You must call rhashtable_walk_exit after this function returns.
512 * successfully.
513 */ 609 */
514int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter, 610void rhashtable_walk_enter(struct rhashtable *ht, struct rhashtable_iter *iter)
515 gfp_t gfp)
516{ 611{
517 iter->ht = ht; 612 iter->ht = ht;
518 iter->p = NULL; 613 iter->p = NULL;
519 iter->slot = 0; 614 iter->slot = 0;
520 iter->skip = 0; 615 iter->skip = 0;
521 616
522 iter->walker = kmalloc(sizeof(*iter->walker), gfp);
523 if (!iter->walker)
524 return -ENOMEM;
525
526 spin_lock(&ht->lock); 617 spin_lock(&ht->lock);
527 iter->walker->tbl = 618 iter->walker.tbl =
528 rcu_dereference_protected(ht->tbl, lockdep_is_held(&ht->lock)); 619 rcu_dereference_protected(ht->tbl, lockdep_is_held(&ht->lock));
529 list_add(&iter->walker->list, &iter->walker->tbl->walkers); 620 list_add(&iter->walker.list, &iter->walker.tbl->walkers);
530 spin_unlock(&ht->lock); 621 spin_unlock(&ht->lock);
531
532 return 0;
533} 622}
534EXPORT_SYMBOL_GPL(rhashtable_walk_init); 623EXPORT_SYMBOL_GPL(rhashtable_walk_enter);
535 624
536/** 625/**
537 * rhashtable_walk_exit - Free an iterator 626 * rhashtable_walk_exit - Free an iterator
@@ -542,10 +631,9 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_init);
542void rhashtable_walk_exit(struct rhashtable_iter *iter) 631void rhashtable_walk_exit(struct rhashtable_iter *iter)
543{ 632{
544 spin_lock(&iter->ht->lock); 633 spin_lock(&iter->ht->lock);
545 if (iter->walker->tbl) 634 if (iter->walker.tbl)
546 list_del(&iter->walker->list); 635 list_del(&iter->walker.list);
547 spin_unlock(&iter->ht->lock); 636 spin_unlock(&iter->ht->lock);
548 kfree(iter->walker);
549} 637}
550EXPORT_SYMBOL_GPL(rhashtable_walk_exit); 638EXPORT_SYMBOL_GPL(rhashtable_walk_exit);
551 639
@@ -571,12 +659,12 @@ int rhashtable_walk_start(struct rhashtable_iter *iter)
571 rcu_read_lock(); 659 rcu_read_lock();
572 660
573 spin_lock(&ht->lock); 661 spin_lock(&ht->lock);
574 if (iter->walker->tbl) 662 if (iter->walker.tbl)
575 list_del(&iter->walker->list); 663 list_del(&iter->walker.list);
576 spin_unlock(&ht->lock); 664 spin_unlock(&ht->lock);
577 665
578 if (!iter->walker->tbl) { 666 if (!iter->walker.tbl) {
579 iter->walker->tbl = rht_dereference_rcu(ht->tbl, ht); 667 iter->walker.tbl = rht_dereference_rcu(ht->tbl, ht);
580 return -EAGAIN; 668 return -EAGAIN;
581 } 669 }
582 670
@@ -598,12 +686,17 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_start);
598 */ 686 */
599void *rhashtable_walk_next(struct rhashtable_iter *iter) 687void *rhashtable_walk_next(struct rhashtable_iter *iter)
600{ 688{
601 struct bucket_table *tbl = iter->walker->tbl; 689 struct bucket_table *tbl = iter->walker.tbl;
690 struct rhlist_head *list = iter->list;
602 struct rhashtable *ht = iter->ht; 691 struct rhashtable *ht = iter->ht;
603 struct rhash_head *p = iter->p; 692 struct rhash_head *p = iter->p;
693 bool rhlist = ht->rhlist;
604 694
605 if (p) { 695 if (p) {
606 p = rht_dereference_bucket_rcu(p->next, tbl, iter->slot); 696 if (!rhlist || !(list = rcu_dereference(list->next))) {
697 p = rcu_dereference(p->next);
698 list = container_of(p, struct rhlist_head, rhead);
699 }
607 goto next; 700 goto next;
608 } 701 }
609 702
@@ -611,6 +704,18 @@ void *rhashtable_walk_next(struct rhashtable_iter *iter)
611 int skip = iter->skip; 704 int skip = iter->skip;
612 705
613 rht_for_each_rcu(p, tbl, iter->slot) { 706 rht_for_each_rcu(p, tbl, iter->slot) {
707 if (rhlist) {
708 list = container_of(p, struct rhlist_head,
709 rhead);
710 do {
711 if (!skip)
712 goto next;
713 skip--;
714 list = rcu_dereference(list->next);
715 } while (list);
716
717 continue;
718 }
614 if (!skip) 719 if (!skip)
615 break; 720 break;
616 skip--; 721 skip--;
@@ -620,7 +725,8 @@ next:
620 if (!rht_is_a_nulls(p)) { 725 if (!rht_is_a_nulls(p)) {
621 iter->skip++; 726 iter->skip++;
622 iter->p = p; 727 iter->p = p;
623 return rht_obj(ht, p); 728 iter->list = list;
729 return rht_obj(ht, rhlist ? &list->rhead : p);
624 } 730 }
625 731
626 iter->skip = 0; 732 iter->skip = 0;
@@ -631,8 +737,8 @@ next:
631 /* Ensure we see any new tables. */ 737 /* Ensure we see any new tables. */
632 smp_rmb(); 738 smp_rmb();
633 739
634 iter->walker->tbl = rht_dereference_rcu(tbl->future_tbl, ht); 740 iter->walker.tbl = rht_dereference_rcu(tbl->future_tbl, ht);
635 if (iter->walker->tbl) { 741 if (iter->walker.tbl) {
636 iter->slot = 0; 742 iter->slot = 0;
637 iter->skip = 0; 743 iter->skip = 0;
638 return ERR_PTR(-EAGAIN); 744 return ERR_PTR(-EAGAIN);
@@ -652,7 +758,7 @@ void rhashtable_walk_stop(struct rhashtable_iter *iter)
652 __releases(RCU) 758 __releases(RCU)
653{ 759{
654 struct rhashtable *ht; 760 struct rhashtable *ht;
655 struct bucket_table *tbl = iter->walker->tbl; 761 struct bucket_table *tbl = iter->walker.tbl;
656 762
657 if (!tbl) 763 if (!tbl)
658 goto out; 764 goto out;
@@ -661,9 +767,9 @@ void rhashtable_walk_stop(struct rhashtable_iter *iter)
661 767
662 spin_lock(&ht->lock); 768 spin_lock(&ht->lock);
663 if (tbl->rehash < tbl->size) 769 if (tbl->rehash < tbl->size)
664 list_add(&iter->walker->list, &tbl->walkers); 770 list_add(&iter->walker.list, &tbl->walkers);
665 else 771 else
666 iter->walker->tbl = NULL; 772 iter->walker.tbl = NULL;
667 spin_unlock(&ht->lock); 773 spin_unlock(&ht->lock);
668 774
669 iter->p = NULL; 775 iter->p = NULL;
@@ -809,6 +915,48 @@ int rhashtable_init(struct rhashtable *ht,
809EXPORT_SYMBOL_GPL(rhashtable_init); 915EXPORT_SYMBOL_GPL(rhashtable_init);
810 916
811/** 917/**
918 * rhltable_init - initialize a new hash list table
919 * @hlt: hash list table to be initialized
920 * @params: configuration parameters
921 *
922 * Initializes a new hash list table.
923 *
924 * See documentation for rhashtable_init.
925 */
926int rhltable_init(struct rhltable *hlt, const struct rhashtable_params *params)
927{
928 int err;
929
930 /* No rhlist NULLs marking for now. */
931 if (params->nulls_base)
932 return -EINVAL;
933
934 err = rhashtable_init(&hlt->ht, params);
935 hlt->ht.rhlist = true;
936 return err;
937}
938EXPORT_SYMBOL_GPL(rhltable_init);
939
940static void rhashtable_free_one(struct rhashtable *ht, struct rhash_head *obj,
941 void (*free_fn)(void *ptr, void *arg),
942 void *arg)
943{
944 struct rhlist_head *list;
945
946 if (!ht->rhlist) {
947 free_fn(rht_obj(ht, obj), arg);
948 return;
949 }
950
951 list = container_of(obj, struct rhlist_head, rhead);
952 do {
953 obj = &list->rhead;
954 list = rht_dereference(list->next, ht);
955 free_fn(rht_obj(ht, obj), arg);
956 } while (list);
957}
958
959/**
812 * rhashtable_free_and_destroy - free elements and destroy hash table 960 * rhashtable_free_and_destroy - free elements and destroy hash table
813 * @ht: the hash table to destroy 961 * @ht: the hash table to destroy
814 * @free_fn: callback to release resources of element 962 * @free_fn: callback to release resources of element
@@ -845,7 +993,7 @@ void rhashtable_free_and_destroy(struct rhashtable *ht,
845 pos = next, 993 pos = next,
846 next = !rht_is_a_nulls(pos) ? 994 next = !rht_is_a_nulls(pos) ?
847 rht_dereference(pos->next, ht) : NULL) 995 rht_dereference(pos->next, ht) : NULL)
848 free_fn(rht_obj(ht, pos), arg); 996 rhashtable_free_one(ht, pos, free_fn, arg);
849 } 997 }
850 } 998 }
851 999
diff --git a/lib/sbitmap.c b/lib/sbitmap.c
new file mode 100644
index 000000000000..2cecf05c82fd
--- /dev/null
+++ b/lib/sbitmap.c
@@ -0,0 +1,347 @@
1/*
2 * Copyright (C) 2016 Facebook
3 * Copyright (C) 2013-2014 Jens Axboe
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public
7 * License v2 as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <https://www.gnu.org/licenses/>.
16 */
17
18#include <linux/random.h>
19#include <linux/sbitmap.h>
20
21int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift,
22 gfp_t flags, int node)
23{
24 unsigned int bits_per_word;
25 unsigned int i;
26
27 if (shift < 0) {
28 shift = ilog2(BITS_PER_LONG);
29 /*
30 * If the bitmap is small, shrink the number of bits per word so
31 * we spread over a few cachelines, at least. If less than 4
32 * bits, just forget about it, it's not going to work optimally
33 * anyway.
34 */
35 if (depth >= 4) {
36 while ((4U << shift) > depth)
37 shift--;
38 }
39 }
40 bits_per_word = 1U << shift;
41 if (bits_per_word > BITS_PER_LONG)
42 return -EINVAL;
43
44 sb->shift = shift;
45 sb->depth = depth;
46 sb->map_nr = DIV_ROUND_UP(sb->depth, bits_per_word);
47
48 if (depth == 0) {
49 sb->map = NULL;
50 return 0;
51 }
52
53 sb->map = kzalloc_node(sb->map_nr * sizeof(*sb->map), flags, node);
54 if (!sb->map)
55 return -ENOMEM;
56
57 for (i = 0; i < sb->map_nr; i++) {
58 sb->map[i].depth = min(depth, bits_per_word);
59 depth -= sb->map[i].depth;
60 }
61 return 0;
62}
63EXPORT_SYMBOL_GPL(sbitmap_init_node);
64
65void sbitmap_resize(struct sbitmap *sb, unsigned int depth)
66{
67 unsigned int bits_per_word = 1U << sb->shift;
68 unsigned int i;
69
70 sb->depth = depth;
71 sb->map_nr = DIV_ROUND_UP(sb->depth, bits_per_word);
72
73 for (i = 0; i < sb->map_nr; i++) {
74 sb->map[i].depth = min(depth, bits_per_word);
75 depth -= sb->map[i].depth;
76 }
77}
78EXPORT_SYMBOL_GPL(sbitmap_resize);
79
80static int __sbitmap_get_word(struct sbitmap_word *word, unsigned int hint,
81 bool wrap)
82{
83 unsigned int orig_hint = hint;
84 int nr;
85
86 while (1) {
87 nr = find_next_zero_bit(&word->word, word->depth, hint);
88 if (unlikely(nr >= word->depth)) {
89 /*
90 * We started with an offset, and we didn't reset the
91 * offset to 0 in a failure case, so start from 0 to
92 * exhaust the map.
93 */
94 if (orig_hint && hint && wrap) {
95 hint = orig_hint = 0;
96 continue;
97 }
98 return -1;
99 }
100
101 if (!test_and_set_bit(nr, &word->word))
102 break;
103
104 hint = nr + 1;
105 if (hint >= word->depth - 1)
106 hint = 0;
107 }
108
109 return nr;
110}
111
112int sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint, bool round_robin)
113{
114 unsigned int i, index;
115 int nr = -1;
116
117 index = SB_NR_TO_INDEX(sb, alloc_hint);
118
119 for (i = 0; i < sb->map_nr; i++) {
120 nr = __sbitmap_get_word(&sb->map[index],
121 SB_NR_TO_BIT(sb, alloc_hint),
122 !round_robin);
123 if (nr != -1) {
124 nr += index << sb->shift;
125 break;
126 }
127
128 /* Jump to next index. */
129 index++;
130 alloc_hint = index << sb->shift;
131
132 if (index >= sb->map_nr) {
133 index = 0;
134 alloc_hint = 0;
135 }
136 }
137
138 return nr;
139}
140EXPORT_SYMBOL_GPL(sbitmap_get);
141
142bool sbitmap_any_bit_set(const struct sbitmap *sb)
143{
144 unsigned int i;
145
146 for (i = 0; i < sb->map_nr; i++) {
147 if (sb->map[i].word)
148 return true;
149 }
150 return false;
151}
152EXPORT_SYMBOL_GPL(sbitmap_any_bit_set);
153
154bool sbitmap_any_bit_clear(const struct sbitmap *sb)
155{
156 unsigned int i;
157
158 for (i = 0; i < sb->map_nr; i++) {
159 const struct sbitmap_word *word = &sb->map[i];
160 unsigned long ret;
161
162 ret = find_first_zero_bit(&word->word, word->depth);
163 if (ret < word->depth)
164 return true;
165 }
166 return false;
167}
168EXPORT_SYMBOL_GPL(sbitmap_any_bit_clear);
169
170unsigned int sbitmap_weight(const struct sbitmap *sb)
171{
172 unsigned int i, weight = 0;
173
174 for (i = 0; i < sb->map_nr; i++) {
175 const struct sbitmap_word *word = &sb->map[i];
176
177 weight += bitmap_weight(&word->word, word->depth);
178 }
179 return weight;
180}
181EXPORT_SYMBOL_GPL(sbitmap_weight);
182
183static unsigned int sbq_calc_wake_batch(unsigned int depth)
184{
185 unsigned int wake_batch;
186
187 /*
188 * For each batch, we wake up one queue. We need to make sure that our
189 * batch size is small enough that the full depth of the bitmap is
190 * enough to wake up all of the queues.
191 */
192 wake_batch = SBQ_WAKE_BATCH;
193 if (wake_batch > depth / SBQ_WAIT_QUEUES)
194 wake_batch = max(1U, depth / SBQ_WAIT_QUEUES);
195
196 return wake_batch;
197}
198
199int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth,
200 int shift, bool round_robin, gfp_t flags, int node)
201{
202 int ret;
203 int i;
204
205 ret = sbitmap_init_node(&sbq->sb, depth, shift, flags, node);
206 if (ret)
207 return ret;
208
209 sbq->alloc_hint = alloc_percpu_gfp(unsigned int, flags);
210 if (!sbq->alloc_hint) {
211 sbitmap_free(&sbq->sb);
212 return -ENOMEM;
213 }
214
215 if (depth && !round_robin) {
216 for_each_possible_cpu(i)
217 *per_cpu_ptr(sbq->alloc_hint, i) = prandom_u32() % depth;
218 }
219
220 sbq->wake_batch = sbq_calc_wake_batch(depth);
221 atomic_set(&sbq->wake_index, 0);
222
223 sbq->ws = kzalloc_node(SBQ_WAIT_QUEUES * sizeof(*sbq->ws), flags, node);
224 if (!sbq->ws) {
225 free_percpu(sbq->alloc_hint);
226 sbitmap_free(&sbq->sb);
227 return -ENOMEM;
228 }
229
230 for (i = 0; i < SBQ_WAIT_QUEUES; i++) {
231 init_waitqueue_head(&sbq->ws[i].wait);
232 atomic_set(&sbq->ws[i].wait_cnt, sbq->wake_batch);
233 }
234
235 sbq->round_robin = round_robin;
236 return 0;
237}
238EXPORT_SYMBOL_GPL(sbitmap_queue_init_node);
239
240void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth)
241{
242 sbq->wake_batch = sbq_calc_wake_batch(depth);
243 sbitmap_resize(&sbq->sb, depth);
244}
245EXPORT_SYMBOL_GPL(sbitmap_queue_resize);
246
247int __sbitmap_queue_get(struct sbitmap_queue *sbq)
248{
249 unsigned int hint, depth;
250 int nr;
251
252 hint = this_cpu_read(*sbq->alloc_hint);
253 depth = READ_ONCE(sbq->sb.depth);
254 if (unlikely(hint >= depth)) {
255 hint = depth ? prandom_u32() % depth : 0;
256 this_cpu_write(*sbq->alloc_hint, hint);
257 }
258 nr = sbitmap_get(&sbq->sb, hint, sbq->round_robin);
259
260 if (nr == -1) {
261 /* If the map is full, a hint won't do us much good. */
262 this_cpu_write(*sbq->alloc_hint, 0);
263 } else if (nr == hint || unlikely(sbq->round_robin)) {
264 /* Only update the hint if we used it. */
265 hint = nr + 1;
266 if (hint >= depth - 1)
267 hint = 0;
268 this_cpu_write(*sbq->alloc_hint, hint);
269 }
270
271 return nr;
272}
273EXPORT_SYMBOL_GPL(__sbitmap_queue_get);
274
275static struct sbq_wait_state *sbq_wake_ptr(struct sbitmap_queue *sbq)
276{
277 int i, wake_index;
278
279 wake_index = atomic_read(&sbq->wake_index);
280 for (i = 0; i < SBQ_WAIT_QUEUES; i++) {
281 struct sbq_wait_state *ws = &sbq->ws[wake_index];
282
283 if (waitqueue_active(&ws->wait)) {
284 int o = atomic_read(&sbq->wake_index);
285
286 if (wake_index != o)
287 atomic_cmpxchg(&sbq->wake_index, o, wake_index);
288 return ws;
289 }
290
291 wake_index = sbq_index_inc(wake_index);
292 }
293
294 return NULL;
295}
296
297static void sbq_wake_up(struct sbitmap_queue *sbq)
298{
299 struct sbq_wait_state *ws;
300 int wait_cnt;
301
302 /* Ensure that the wait list checks occur after clear_bit(). */
303 smp_mb();
304
305 ws = sbq_wake_ptr(sbq);
306 if (!ws)
307 return;
308
309 wait_cnt = atomic_dec_return(&ws->wait_cnt);
310 if (unlikely(wait_cnt < 0))
311 wait_cnt = atomic_inc_return(&ws->wait_cnt);
312 if (wait_cnt == 0) {
313 atomic_add(sbq->wake_batch, &ws->wait_cnt);
314 sbq_index_atomic_inc(&sbq->wake_index);
315 wake_up(&ws->wait);
316 }
317}
318
319void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr,
320 unsigned int cpu)
321{
322 sbitmap_clear_bit(&sbq->sb, nr);
323 sbq_wake_up(sbq);
324 if (likely(!sbq->round_robin && nr < sbq->sb.depth))
325 *per_cpu_ptr(sbq->alloc_hint, cpu) = nr;
326}
327EXPORT_SYMBOL_GPL(sbitmap_queue_clear);
328
329void sbitmap_queue_wake_all(struct sbitmap_queue *sbq)
330{
331 int i, wake_index;
332
333 /*
334 * Make sure all changes prior to this are visible from other CPUs.
335 */
336 smp_mb();
337 wake_index = atomic_read(&sbq->wake_index);
338 for (i = 0; i < SBQ_WAIT_QUEUES; i++) {
339 struct sbq_wait_state *ws = &sbq->ws[wake_index];
340
341 if (waitqueue_active(&ws->wait))
342 wake_up(&ws->wait);
343
344 wake_index = sbq_index_inc(wake_index);
345 }
346}
347EXPORT_SYMBOL_GPL(sbitmap_queue_wake_all);
diff --git a/lib/strncpy_from_user.c b/lib/strncpy_from_user.c
index 9c5fe8110413..7e35fc450c5b 100644
--- a/lib/strncpy_from_user.c
+++ b/lib/strncpy_from_user.c
@@ -1,6 +1,7 @@
1#include <linux/compiler.h> 1#include <linux/compiler.h>
2#include <linux/export.h> 2#include <linux/export.h>
3#include <linux/kasan-checks.h> 3#include <linux/kasan-checks.h>
4#include <linux/thread_info.h>
4#include <linux/uaccess.h> 5#include <linux/uaccess.h>
5#include <linux/kernel.h> 6#include <linux/kernel.h>
6#include <linux/errno.h> 7#include <linux/errno.h>
@@ -111,6 +112,7 @@ long strncpy_from_user(char *dst, const char __user *src, long count)
111 long retval; 112 long retval;
112 113
113 kasan_check_write(dst, count); 114 kasan_check_write(dst, count);
115 check_object_size(dst, count, false);
114 user_access_begin(); 116 user_access_begin();
115 retval = do_strncpy_from_user(dst, src, count, max); 117 retval = do_strncpy_from_user(dst, src, count, max);
116 user_access_end(); 118 user_access_end();
diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index 93f45011a59d..94346b4d8984 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -5485,6 +5485,7 @@ static struct sk_buff *populate_skb(char *buf, int size)
5485 skb->hash = SKB_HASH; 5485 skb->hash = SKB_HASH;
5486 skb->queue_mapping = SKB_QUEUE_MAP; 5486 skb->queue_mapping = SKB_QUEUE_MAP;
5487 skb->vlan_tci = SKB_VLAN_TCI; 5487 skb->vlan_tci = SKB_VLAN_TCI;
5488 skb->vlan_proto = htons(ETH_P_IP);
5488 skb->dev = &dev; 5489 skb->dev = &dev;
5489 skb->dev->ifindex = SKB_DEV_IFINDEX; 5490 skb->dev->ifindex = SKB_DEV_IFINDEX;
5490 skb->dev->type = SKB_DEV_TYPE; 5491 skb->dev->type = SKB_DEV_TYPE;
diff --git a/lib/win_minmax.c b/lib/win_minmax.c
new file mode 100644
index 000000000000..c8420d404926
--- /dev/null
+++ b/lib/win_minmax.c
@@ -0,0 +1,98 @@
1/**
2 * lib/minmax.c: windowed min/max tracker
3 *
4 * Kathleen Nichols' algorithm for tracking the minimum (or maximum)
5 * value of a data stream over some fixed time interval. (E.g.,
6 * the minimum RTT over the past five minutes.) It uses constant
7 * space and constant time per update yet almost always delivers
8 * the same minimum as an implementation that has to keep all the
9 * data in the window.
10 *
11 * The algorithm keeps track of the best, 2nd best & 3rd best min
12 * values, maintaining an invariant that the measurement time of
13 * the n'th best >= n-1'th best. It also makes sure that the three
14 * values are widely separated in the time window since that bounds
15 * the worse case error when that data is monotonically increasing
16 * over the window.
17 *
18 * Upon getting a new min, we can forget everything earlier because
19 * it has no value - the new min is <= everything else in the window
20 * by definition and it's the most recent. So we restart fresh on
21 * every new min and overwrites 2nd & 3rd choices. The same property
22 * holds for 2nd & 3rd best.
23 */
24#include <linux/module.h>
25#include <linux/win_minmax.h>
26
27/* As time advances, update the 1st, 2nd, and 3rd choices. */
28static u32 minmax_subwin_update(struct minmax *m, u32 win,
29 const struct minmax_sample *val)
30{
31 u32 dt = val->t - m->s[0].t;
32
33 if (unlikely(dt > win)) {
34 /*
35 * Passed entire window without a new val so make 2nd
36 * choice the new val & 3rd choice the new 2nd choice.
37 * we may have to iterate this since our 2nd choice
38 * may also be outside the window (we checked on entry
39 * that the third choice was in the window).
40 */
41 m->s[0] = m->s[1];
42 m->s[1] = m->s[2];
43 m->s[2] = *val;
44 if (unlikely(val->t - m->s[0].t > win)) {
45 m->s[0] = m->s[1];
46 m->s[1] = m->s[2];
47 m->s[2] = *val;
48 }
49 } else if (unlikely(m->s[1].t == m->s[0].t) && dt > win/4) {
50 /*
51 * We've passed a quarter of the window without a new val
52 * so take a 2nd choice from the 2nd quarter of the window.
53 */
54 m->s[2] = m->s[1] = *val;
55 } else if (unlikely(m->s[2].t == m->s[1].t) && dt > win/2) {
56 /*
57 * We've passed half the window without finding a new val
58 * so take a 3rd choice from the last half of the window
59 */
60 m->s[2] = *val;
61 }
62 return m->s[0].v;
63}
64
65/* Check if new measurement updates the 1st, 2nd or 3rd choice max. */
66u32 minmax_running_max(struct minmax *m, u32 win, u32 t, u32 meas)
67{
68 struct minmax_sample val = { .t = t, .v = meas };
69
70 if (unlikely(val.v >= m->s[0].v) || /* found new max? */
71 unlikely(val.t - m->s[2].t > win)) /* nothing left in window? */
72 return minmax_reset(m, t, meas); /* forget earlier samples */
73
74 if (unlikely(val.v >= m->s[1].v))
75 m->s[2] = m->s[1] = val;
76 else if (unlikely(val.v >= m->s[2].v))
77 m->s[2] = val;
78
79 return minmax_subwin_update(m, win, &val);
80}
81EXPORT_SYMBOL(minmax_running_max);
82
83/* Check if new measurement updates the 1st, 2nd or 3rd choice min. */
84u32 minmax_running_min(struct minmax *m, u32 win, u32 t, u32 meas)
85{
86 struct minmax_sample val = { .t = t, .v = meas };
87
88 if (unlikely(val.v <= m->s[0].v) || /* found new min? */
89 unlikely(val.t - m->s[2].t > win)) /* nothing left in window? */
90 return minmax_reset(m, t, meas); /* forget earlier samples */
91
92 if (unlikely(val.v <= m->s[1].v))
93 m->s[2] = m->s[1] = val;
94 else if (unlikely(val.v <= m->s[2].v))
95 m->s[2] = val;
96
97 return minmax_subwin_update(m, win, &val);
98}